mirror of
https://github.com/harivansh-afk/evaluclaude-harness.git
synced 2026-04-15 07:04:47 +00:00
analyze
This commit is contained in:
parent
4b24606d0e
commit
9297f0b1ee
13 changed files with 1292 additions and 16 deletions
318
package-lock.json
generated
318
package-lock.json
generated
|
|
@ -9,6 +9,7 @@
|
|||
"version": "0.1.0",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.2.4",
|
||||
"commander": "^12.1.0",
|
||||
"glob": "^10.4.0",
|
||||
"inquirer": "^9.2.0",
|
||||
|
|
@ -30,6 +31,28 @@
|
|||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/claude-agent-sdk": {
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/claude-agent-sdk/-/claude-agent-sdk-0.2.4.tgz",
|
||||
"integrity": "sha512-5RpMO8aLEwuAd8h7/QHMCKzdVSihZCtHGnouPp+Isvc7zPzQXKb6GvUitkbs3wIBgIbXA/vXQmIi126uw9qo0A==",
|
||||
"license": "SEE LICENSE IN README.md",
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-darwin-arm64": "^0.33.5",
|
||||
"@img/sharp-darwin-x64": "^0.33.5",
|
||||
"@img/sharp-linux-arm": "^0.33.5",
|
||||
"@img/sharp-linux-arm64": "^0.33.5",
|
||||
"@img/sharp-linux-x64": "^0.33.5",
|
||||
"@img/sharp-linuxmusl-arm64": "^0.33.5",
|
||||
"@img/sharp-linuxmusl-x64": "^0.33.5",
|
||||
"@img/sharp-win32-x64": "^0.33.5"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"zod": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.21.5",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz",
|
||||
|
|
@ -522,6 +545,291 @@
|
|||
"dev": true,
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@img/sharp-darwin-arm64": {
|
||||
"version": "0.33.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.33.5.tgz",
|
||||
"integrity": "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-darwin-arm64": "1.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-darwin-x64": {
|
||||
"version": "0.33.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.33.5.tgz",
|
||||
"integrity": "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-darwin-x64": "1.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-darwin-arm64": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.0.4.tgz",
|
||||
"integrity": "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-darwin-x64": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.0.4.tgz",
|
||||
"integrity": "sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-arm": {
|
||||
"version": "1.0.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.0.5.tgz",
|
||||
"integrity": "sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-arm64": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.0.4.tgz",
|
||||
"integrity": "sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-x64": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.0.4.tgz",
|
||||
"integrity": "sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linuxmusl-arm64": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.0.4.tgz",
|
||||
"integrity": "sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linuxmusl-x64": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.0.4.tgz",
|
||||
"integrity": "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-arm": {
|
||||
"version": "0.33.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.33.5.tgz",
|
||||
"integrity": "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-arm": "1.0.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-arm64": {
|
||||
"version": "0.33.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.33.5.tgz",
|
||||
"integrity": "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-arm64": "1.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-x64": {
|
||||
"version": "0.33.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.33.5.tgz",
|
||||
"integrity": "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-x64": "1.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linuxmusl-arm64": {
|
||||
"version": "0.33.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.33.5.tgz",
|
||||
"integrity": "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linuxmusl-arm64": "1.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linuxmusl-x64": {
|
||||
"version": "0.33.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.33.5.tgz",
|
||||
"integrity": "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linuxmusl-x64": "1.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-win32-x64": {
|
||||
"version": "0.33.5",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.33.5.tgz",
|
||||
"integrity": "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "Apache-2.0 AND LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@inquirer/external-editor": {
|
||||
"version": "1.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@inquirer/external-editor/-/external-editor-1.0.3.tgz",
|
||||
|
|
@ -3818,6 +4126,16 @@
|
|||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/zod": {
|
||||
"version": "4.3.5",
|
||||
"resolved": "https://registry.npmjs.org/zod/-/zod-4.3.5.tgz",
|
||||
"integrity": "sha512-k7Nwx6vuWx1IJ9Bjuf4Zt1PEllcwe7cls3VNzm4CQ1/hgtFUK2bRNG3rvnpPUhFjmqJKAKtjV576KnUkHocg/g==",
|
||||
"license": "MIT",
|
||||
"peer": true,
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/colinhacks"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.2.4",
|
||||
"commander": "^12.1.0",
|
||||
"glob": "^10.4.0",
|
||||
"inquirer": "^9.2.0",
|
||||
|
|
|
|||
72
prompts/analyzer-developer.md
Normal file
72
prompts/analyzer-developer.md
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
# EvalSpec Schema Reference
|
||||
|
||||
## Assertion Types
|
||||
|
||||
### Deterministic Assertions (for pure functions, exact outputs)
|
||||
|
||||
| Type | Properties | Use Case |
|
||||
|------|------------|----------|
|
||||
| `equals` | `expected`, `path?` | Exact value match |
|
||||
| `contains` | `value`, `path?` | Substring or array element |
|
||||
| `throws` | `errorType?`, `messageContains?` | Exception expected |
|
||||
| `typeof` | `expected`, `path?` | Type checking |
|
||||
| `matches` | `pattern`, `path?` | Regex pattern match |
|
||||
| `truthy`/`falsy` | `path?` | Boolean coercion |
|
||||
| `custom` | `description`, `check` | Complex validation |
|
||||
|
||||
### LLM Rubric Assertions (for subjective quality, UI, user experience)
|
||||
|
||||
| Type | Properties | Use Case |
|
||||
|------|------------|----------|
|
||||
| `llm-rubric` | `rubric`, `criteria[]`, `passingThreshold?` | Quality evaluation by Claude |
|
||||
|
||||
**When to use LLM rubrics:**
|
||||
- Error message quality (is it helpful? actionable?)
|
||||
- UI component output (does it render correctly? accessible?)
|
||||
- API response format (well-structured? consistent?)
|
||||
- Generated content quality (documentation, code suggestions)
|
||||
|
||||
**Example:**
|
||||
```json
|
||||
{
|
||||
"type": "llm-rubric",
|
||||
"rubric": "error-message-quality",
|
||||
"criteria": ["clarity", "actionability", "includes context"],
|
||||
"passingThreshold": 0.7,
|
||||
"description": "Error message should clearly explain what went wrong and how to fix it"
|
||||
}
|
||||
```
|
||||
|
||||
## Formatting Rules
|
||||
|
||||
- **Scenario IDs**: kebab-case, descriptive (e.g., `user-auth-invalid-token`)
|
||||
- **Module paths**: Match source file paths exactly (e.g., `src/auth/login.py`)
|
||||
- **Function names**: Match source exactly, including case
|
||||
- **Tags**: lowercase, categorize by domain (`auth`, `api`, `database`, etc.)
|
||||
|
||||
## Priority Guidelines
|
||||
|
||||
| Priority | When to Use |
|
||||
|----------|-------------|
|
||||
| `critical` | Core business logic, security-sensitive, payment flows |
|
||||
| `high` | Public API, user-facing, data integrity |
|
||||
| `medium` | Internal utilities, helper functions |
|
||||
| `low` | Convenience methods, formatting, logging |
|
||||
|
||||
## Mock Specification
|
||||
|
||||
When specifying mocks:
|
||||
```json
|
||||
{
|
||||
"target": "module.external_api.fetch",
|
||||
"returnValue": {"status": "ok"},
|
||||
"sideEffect": "raises ConnectionError"
|
||||
}
|
||||
```
|
||||
|
||||
## Input Generation
|
||||
|
||||
Generate realistic inputs based on:
|
||||
1. Parameter types from signatures
|
||||
2. Docstring examples
|
||||
3. Domain semantics (emails, UUIDs, timestamps)
|
||||
81
prompts/analyzer-system.md
Normal file
81
prompts/analyzer-system.md
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
# Codebase Analyzer Agent
|
||||
|
||||
You are an expert test engineer analyzing codebases to generate comprehensive evaluation specifications. Your goal is to identify testable functions and generate structured EvalSpec JSON that will drive automated test generation.
|
||||
|
||||
## Important Context
|
||||
|
||||
You will receive a **RepoSummary JSON** containing structured codebase information:
|
||||
- Module paths and their exports (functions, classes, types)
|
||||
- Function signatures and docstrings
|
||||
- Import dependencies
|
||||
- Git activity (most active files)
|
||||
- Project configuration (test framework, etc.)
|
||||
|
||||
### Interactive Mode (Deep Analysis)
|
||||
|
||||
In interactive mode, perform **thorough codebase exploration**:
|
||||
|
||||
1. **Phase 1 - Understand**: Read key files to understand the architecture
|
||||
- Check entry points, main handlers, API routes
|
||||
- Read complex functions (high git activity, many imports)
|
||||
- Understand the data flow and dependencies
|
||||
|
||||
2. **Phase 2 - Identify Test Targets**: Use Grep to find:
|
||||
- Error handling patterns (try/catch, error boundaries)
|
||||
- Validation logic (input validation, schema checks)
|
||||
- External integrations (API calls, database queries)
|
||||
- State management (reducers, context, stores)
|
||||
|
||||
3. **Phase 3 - Ask User**: Use AskUserQuestion to confirm:
|
||||
- Priority modules (what matters most?)
|
||||
- Known edge cases or bugs
|
||||
- External services to mock
|
||||
|
||||
4. **Phase 4 - Generate**: Create comprehensive EvalSpec with:
|
||||
- Deterministic tests for pure functions
|
||||
- Rubric-based tests for subjective quality (UI, error messages)
|
||||
- Integration tests for critical flows
|
||||
|
||||
### Non-Interactive Mode (Fast Analysis)
|
||||
|
||||
Work only with the RepoSummary data. Generate tests based on signatures and docstrings.
|
||||
|
||||
## Core Principles
|
||||
|
||||
1. **Functional Tests Only**: Every test scenario must invoke actual code. No syntax checks, no type-only tests.
|
||||
|
||||
2. **Ask, Don't Assume**: In interactive mode, use AskUserQuestion to understand the user's priorities BEFORE generating the EvalSpec:
|
||||
- "Which module or feature is most critical to test?"
|
||||
- "Are there specific edge cases or error conditions you care about?"
|
||||
- "What external services should be mocked (databases, APIs, etc.)?"
|
||||
|
||||
Ask 1-2 focused questions to tailor the test scenarios to the user's needs.
|
||||
|
||||
3. **Prioritize by Impact**: Focus on:
|
||||
- Public API functions (exported, not prefixed with _)
|
||||
- Functions with complex logic (high complexity score)
|
||||
- Functions with error handling (try/catch, raises)
|
||||
- Entry points and main handlers
|
||||
|
||||
4. **Coverage Categories**:
|
||||
- `unit`: Pure function tests, isolated logic
|
||||
- `integration`: Tests crossing module boundaries
|
||||
- `edge-case`: Boundary conditions, empty inputs, nulls
|
||||
- `negative`: Error paths, invalid inputs, exceptions
|
||||
|
||||
## Constraints
|
||||
|
||||
- Generate 5-15 scenarios per analysis (adjustable via user input)
|
||||
- Each scenario MUST have at least one assertion
|
||||
- Use kebab-case for scenario IDs (e.g., "auth-login-success")
|
||||
- Use snake_case for function/module references matching source
|
||||
- Docstrings provide intent hints—use them for assertion design
|
||||
- Signatures reveal parameter types—use for input generation
|
||||
|
||||
## Output Format
|
||||
|
||||
You MUST return ONLY valid EvalSpec JSON matching the provided schema.
|
||||
- Do NOT include any explanatory text before or after the JSON
|
||||
- Do NOT wrap the JSON in markdown code blocks
|
||||
- Start your response with `{` and end with `}`
|
||||
- The JSON must be valid and parseable
|
||||
35
prompts/analyzer-user.md
Normal file
35
prompts/analyzer-user.md
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
# Codebase Analysis Request
|
||||
|
||||
## Repository Summary
|
||||
|
||||
```json
|
||||
{{REPO_SUMMARY}}
|
||||
```
|
||||
|
||||
## Analysis Focus
|
||||
|
||||
{{FOCUS_INSTRUCTIONS}}
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Review the module exports, focusing on:
|
||||
- Functions with signatures (testable interfaces)
|
||||
- Classes with methods
|
||||
- High-complexity modules
|
||||
|
||||
2. For each testable target, generate scenarios covering:
|
||||
- Happy path (normal operation)
|
||||
- Edge cases (empty, null, boundary values)
|
||||
- Error paths (exceptions, invalid inputs)
|
||||
|
||||
3. Use AskUserQuestion if you need clarification about:
|
||||
- Business logic requirements
|
||||
- Expected behavior for ambiguous cases
|
||||
- Priority of specific modules
|
||||
|
||||
4. Generate the EvalSpec JSON with {{MAX_SCENARIOS}} scenarios maximum.
|
||||
|
||||
5. Prioritize:
|
||||
- Most active files (by git history)
|
||||
- Functions with docstrings (clearer intent)
|
||||
- Public exports over internal helpers
|
||||
12
src/analyzer/index.ts
Normal file
12
src/analyzer/index.ts
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
export { generateEvalSpec, generateEvalSpecInteractive, generateEvalSpecNonInteractive } from './spec-generator.js';
|
||||
export type { GenerateResult, GenerateOptions } from './spec-generator.js';
|
||||
export type {
|
||||
EvalSpec,
|
||||
EvalScenario,
|
||||
Assertion,
|
||||
MockSpec,
|
||||
DeterministicGrade,
|
||||
RubricGrade,
|
||||
} from './types.js';
|
||||
export { EVAL_SPEC_JSON_SCHEMA } from './types.js';
|
||||
export { buildSystemPrompt, buildUserPrompt, optimizeForPrompt } from './prompt-builder.js';
|
||||
112
src/analyzer/prompt-builder.ts
Normal file
112
src/analyzer/prompt-builder.ts
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import type { RepoSummary } from '../introspector/types.js';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const PROMPTS_DIR = path.join(__dirname, '../../prompts');
|
||||
|
||||
export interface PromptConfig {
|
||||
repoSummary: RepoSummary;
|
||||
focus?: string[];
|
||||
maxScenarios?: number;
|
||||
}
|
||||
|
||||
export async function loadPrompt(name: string): Promise<string> {
|
||||
const filePath = path.join(PROMPTS_DIR, `${name}.md`);
|
||||
return fs.readFile(filePath, 'utf-8');
|
||||
}
|
||||
|
||||
export async function buildSystemPrompt(): Promise<string> {
|
||||
const system = await loadPrompt('analyzer-system');
|
||||
const developer = await loadPrompt('analyzer-developer');
|
||||
return `${system}\n\n${developer}`;
|
||||
}
|
||||
|
||||
export async function buildUserPrompt(config: PromptConfig): Promise<string> {
|
||||
const template = await loadPrompt('analyzer-user');
|
||||
|
||||
const optimizedSummary = optimizeForPrompt(config.repoSummary);
|
||||
const summaryJson = JSON.stringify(optimizedSummary, null, 2);
|
||||
|
||||
const focusInstructions = config.focus?.length
|
||||
? `Focus specifically on these modules/functions: ${config.focus.join(', ')}`
|
||||
: 'Analyze the entire codebase and identify the most important testable functions.';
|
||||
|
||||
const maxScenarios = config.maxScenarios ?? 10;
|
||||
|
||||
return template
|
||||
.replace('{{REPO_SUMMARY}}', summaryJson)
|
||||
.replace('{{FOCUS_INSTRUCTIONS}}', focusInstructions)
|
||||
.replace('{{MAX_SCENARIOS}}', String(maxScenarios));
|
||||
}
|
||||
|
||||
export function optimizeForPrompt(summary: RepoSummary): OptimizedRepoSummary {
|
||||
return {
|
||||
name: path.basename(summary.root),
|
||||
languages: summary.languages,
|
||||
analyzedAt: summary.analyzedAt,
|
||||
|
||||
modules: summary.modules.map(m => ({
|
||||
path: m.path,
|
||||
complexity: m.complexity,
|
||||
exports: m.exports.map(e => ({
|
||||
name: e.name,
|
||||
kind: e.kind,
|
||||
signature: e.signature,
|
||||
docstring: e.docstring,
|
||||
line: e.lineNumber,
|
||||
async: e.isAsync,
|
||||
})).filter(e => !e.name.startsWith('_')),
|
||||
imports: m.imports.slice(0, 10),
|
||||
})).filter(m => m.exports.length > 0),
|
||||
|
||||
config: {
|
||||
python: summary.config.python ? {
|
||||
testFramework: summary.config.python.testFramework,
|
||||
hasTyping: summary.config.python.hasTyping,
|
||||
} : undefined,
|
||||
typescript: summary.config.typescript ? {
|
||||
testFramework: summary.config.typescript.testFramework,
|
||||
hasTypes: summary.config.typescript.hasTypes,
|
||||
} : undefined,
|
||||
},
|
||||
|
||||
git: summary.git ? {
|
||||
branch: summary.git.branch,
|
||||
activeFiles: summary.git.fileHistory
|
||||
?.sort((a, b) => b.commitCount - a.commitCount)
|
||||
.slice(0, 10)
|
||||
.map(f => ({ path: f.path, commits: f.commitCount })),
|
||||
} : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
export interface OptimizedRepoSummary {
|
||||
name: string;
|
||||
languages: string[];
|
||||
analyzedAt: string;
|
||||
modules: OptimizedModule[];
|
||||
config: {
|
||||
python?: { testFramework: string; hasTyping: boolean };
|
||||
typescript?: { testFramework: string; hasTypes: boolean };
|
||||
};
|
||||
git?: {
|
||||
branch: string;
|
||||
activeFiles?: { path: string; commits: number }[];
|
||||
};
|
||||
}
|
||||
|
||||
interface OptimizedModule {
|
||||
path: string;
|
||||
complexity: string;
|
||||
exports: {
|
||||
name: string;
|
||||
kind: string;
|
||||
signature?: string;
|
||||
docstring?: string;
|
||||
line: number;
|
||||
async?: boolean;
|
||||
}[];
|
||||
imports: string[];
|
||||
}
|
||||
174
src/analyzer/spec-generator.ts
Normal file
174
src/analyzer/spec-generator.ts
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
import { query, type SDKMessage, type Options, type CanUseTool, type PermissionResult } from '@anthropic-ai/claude-agent-sdk';
|
||||
import type { RepoSummary } from '../introspector/types.js';
|
||||
import type { EvalSpec } from './types.js';
|
||||
import { buildSystemPrompt, buildUserPrompt } from './prompt-builder.js';
|
||||
import { EVAL_SPEC_JSON_SCHEMA } from './types.js';
|
||||
|
||||
export interface GenerateOptions {
|
||||
interactive?: boolean;
|
||||
onQuestion?: (question: string) => Promise<string>;
|
||||
focus?: string[];
|
||||
maxScenarios?: number;
|
||||
}
|
||||
|
||||
export interface GenerateResult {
|
||||
spec: EvalSpec;
|
||||
tokensUsed: number;
|
||||
questionsAsked: number;
|
||||
}
|
||||
|
||||
export async function generateEvalSpec(
|
||||
repoSummary: RepoSummary,
|
||||
options: GenerateOptions = {}
|
||||
): Promise<GenerateResult> {
|
||||
const { interactive = false, onQuestion, focus, maxScenarios = 10 } = options;
|
||||
|
||||
const systemPrompt = await buildSystemPrompt();
|
||||
const userPrompt = await buildUserPrompt({
|
||||
repoSummary,
|
||||
focus,
|
||||
maxScenarios,
|
||||
});
|
||||
|
||||
let tokensUsed = 0;
|
||||
let questionsAsked = 0;
|
||||
let spec: EvalSpec | null = null;
|
||||
|
||||
const canUseTool: CanUseTool = async (toolName, input): Promise<PermissionResult> => {
|
||||
if (toolName === 'AskUserQuestion' && interactive && onQuestion) {
|
||||
// Extract question from various possible field names
|
||||
const inputObj = input as Record<string, unknown>;
|
||||
const question = String(
|
||||
inputObj.question ||
|
||||
inputObj.text ||
|
||||
inputObj.message ||
|
||||
inputObj.prompt ||
|
||||
JSON.stringify(input)
|
||||
);
|
||||
|
||||
const answer = await onQuestion(question);
|
||||
questionsAsked++;
|
||||
return {
|
||||
behavior: 'allow',
|
||||
updatedInput: { ...input, answer },
|
||||
};
|
||||
}
|
||||
// Allow all other tools in interactive mode
|
||||
return { behavior: 'allow' };
|
||||
};
|
||||
|
||||
const queryOptions: Options = {
|
||||
// In interactive mode, allow all tools; in non-interactive, restrict to none
|
||||
tools: interactive
|
||||
? { type: 'preset', preset: 'claude_code' }
|
||||
: [],
|
||||
permissionMode: 'bypassPermissions',
|
||||
allowDangerouslySkipPermissions: true,
|
||||
outputFormat: {
|
||||
type: 'json_schema',
|
||||
schema: EVAL_SPEC_JSON_SCHEMA,
|
||||
},
|
||||
canUseTool: interactive ? canUseTool : undefined,
|
||||
};
|
||||
|
||||
const fullPrompt = `${systemPrompt}\n\n---\n\n${userPrompt}`;
|
||||
|
||||
for await (const message of query({ prompt: fullPrompt, options: queryOptions })) {
|
||||
handleMessage(message);
|
||||
|
||||
if (message.type === 'result') {
|
||||
if (message.subtype === 'success') {
|
||||
// SDK returns parsed JSON in structured_output when outputFormat is set
|
||||
const structuredOutput = (message as { structured_output?: unknown }).structured_output;
|
||||
const resultData = structuredOutput ?? message.result;
|
||||
spec = parseResult(resultData);
|
||||
tokensUsed = (message.usage?.input_tokens ?? 0) + (message.usage?.output_tokens ?? 0);
|
||||
} else {
|
||||
throw new Error(`Generation failed: ${message.subtype}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!spec) {
|
||||
throw new Error('Failed to generate EvalSpec: no result received');
|
||||
}
|
||||
|
||||
spec.metadata = {
|
||||
...spec.metadata,
|
||||
generatedBy: 'evaluclaude-harness',
|
||||
totalTokens: tokensUsed,
|
||||
questionsAsked,
|
||||
};
|
||||
|
||||
return { spec, tokensUsed, questionsAsked };
|
||||
}
|
||||
|
||||
function parseResult(result: unknown): EvalSpec {
|
||||
if (typeof result === 'string') {
|
||||
let jsonStr = result.trim();
|
||||
|
||||
// Try to extract JSON from markdown code blocks
|
||||
const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
|
||||
if (jsonMatch) {
|
||||
jsonStr = jsonMatch[1].trim();
|
||||
}
|
||||
|
||||
// Try to find JSON object in the string
|
||||
const startIdx = jsonStr.indexOf('{');
|
||||
const endIdx = jsonStr.lastIndexOf('}');
|
||||
if (startIdx !== -1 && endIdx !== -1 && endIdx > startIdx) {
|
||||
jsonStr = jsonStr.slice(startIdx, endIdx + 1);
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(jsonStr) as EvalSpec;
|
||||
} catch (e) {
|
||||
console.error('Raw result:', result);
|
||||
throw new Error(`Failed to parse result as JSON: ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (result && typeof result === 'object') {
|
||||
return result as EvalSpec;
|
||||
}
|
||||
|
||||
throw new Error(`Unexpected result type: ${typeof result}`);
|
||||
}
|
||||
|
||||
function handleMessage(message: SDKMessage): void {
|
||||
switch (message.type) {
|
||||
case 'assistant':
|
||||
if (message.message?.content) {
|
||||
for (const block of message.message.content) {
|
||||
if (block.type === 'text') {
|
||||
process.stderr.write(`\n${block.text}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 'result':
|
||||
if (message.subtype !== 'success') {
|
||||
console.error('Error:', message.subtype);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
export async function generateEvalSpecNonInteractive(
|
||||
repoSummary: RepoSummary,
|
||||
options: Omit<GenerateOptions, 'interactive' | 'onQuestion'> = {}
|
||||
): Promise<GenerateResult> {
|
||||
return generateEvalSpec(repoSummary, { ...options, interactive: false });
|
||||
}
|
||||
|
||||
export async function generateEvalSpecInteractive(
|
||||
repoSummary: RepoSummary,
|
||||
questionHandler: (question: string) => Promise<string>,
|
||||
options: Omit<GenerateOptions, 'interactive' | 'onQuestion'> = {}
|
||||
): Promise<GenerateResult> {
|
||||
return generateEvalSpec(repoSummary, {
|
||||
...options,
|
||||
interactive: true,
|
||||
onQuestion: questionHandler,
|
||||
});
|
||||
}
|
||||
263
src/analyzer/types.ts
Normal file
263
src/analyzer/types.ts
Normal file
|
|
@ -0,0 +1,263 @@
|
|||
export interface EvalSpec {
|
||||
version: '1.0';
|
||||
repo: {
|
||||
name: string;
|
||||
languages: string[];
|
||||
analyzedAt: string;
|
||||
};
|
||||
scenarios: EvalScenario[];
|
||||
grading: {
|
||||
deterministic: DeterministicGrade[];
|
||||
rubrics: RubricGrade[];
|
||||
};
|
||||
metadata: {
|
||||
generatedBy: string;
|
||||
totalTokens: number;
|
||||
questionsAsked: number;
|
||||
confidence: 'low' | 'medium' | 'high';
|
||||
};
|
||||
}
|
||||
|
||||
export interface EvalScenario {
|
||||
id: string;
|
||||
name: string;
|
||||
description: string;
|
||||
target: {
|
||||
module: string;
|
||||
function: string;
|
||||
type: 'function' | 'method' | 'class';
|
||||
};
|
||||
category: 'unit' | 'integration' | 'edge-case' | 'negative';
|
||||
priority: 'critical' | 'high' | 'medium' | 'low';
|
||||
setup?: {
|
||||
fixtures: string[];
|
||||
mocks: MockSpec[];
|
||||
};
|
||||
input: {
|
||||
args: Record<string, unknown>;
|
||||
kwargs?: Record<string, unknown>;
|
||||
};
|
||||
assertions: Assertion[];
|
||||
tags: string[];
|
||||
}
|
||||
|
||||
export interface MockSpec {
|
||||
target: string;
|
||||
returnValue?: unknown;
|
||||
sideEffect?: string;
|
||||
}
|
||||
|
||||
export type Assertion =
|
||||
| EqualsAssertion
|
||||
| ContainsAssertion
|
||||
| ThrowsAssertion
|
||||
| TypeAssertion
|
||||
| MatchesAssertion
|
||||
| TruthyAssertion
|
||||
| CustomAssertion
|
||||
| LLMRubricAssertion;
|
||||
|
||||
export interface LLMRubricAssertion extends BaseAssertion {
|
||||
type: 'llm-rubric';
|
||||
rubric: string;
|
||||
criteria: string[];
|
||||
passingThreshold?: number;
|
||||
}
|
||||
|
||||
export interface BaseAssertion {
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export interface EqualsAssertion extends BaseAssertion {
|
||||
type: 'equals';
|
||||
expected: unknown;
|
||||
path?: string;
|
||||
}
|
||||
|
||||
export interface ContainsAssertion extends BaseAssertion {
|
||||
type: 'contains';
|
||||
value: unknown;
|
||||
path?: string;
|
||||
}
|
||||
|
||||
export interface ThrowsAssertion extends BaseAssertion {
|
||||
type: 'throws';
|
||||
errorType?: string;
|
||||
messageContains?: string;
|
||||
}
|
||||
|
||||
export interface TypeAssertion extends BaseAssertion {
|
||||
type: 'typeof';
|
||||
expected: 'string' | 'number' | 'boolean' | 'object' | 'array' | 'null' | 'undefined';
|
||||
path?: string;
|
||||
}
|
||||
|
||||
export interface MatchesAssertion extends BaseAssertion {
|
||||
type: 'matches';
|
||||
pattern: string;
|
||||
path?: string;
|
||||
}
|
||||
|
||||
export interface TruthyAssertion extends BaseAssertion {
|
||||
type: 'truthy' | 'falsy';
|
||||
path?: string;
|
||||
}
|
||||
|
||||
export interface CustomAssertion {
|
||||
type: 'custom';
|
||||
description: string;
|
||||
check: string;
|
||||
}
|
||||
|
||||
export interface DeterministicGrade {
|
||||
scenarioId: string;
|
||||
check: 'pass' | 'fail' | 'error';
|
||||
score: number;
|
||||
}
|
||||
|
||||
export interface RubricGrade {
|
||||
scenarioId: string;
|
||||
criteria: string;
|
||||
maxScore: number;
|
||||
}
|
||||
|
||||
export const EVAL_SPEC_JSON_SCHEMA = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
version: { type: 'string', const: '1.0' },
|
||||
repo: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
name: { type: 'string' },
|
||||
languages: { type: 'array', items: { type: 'string' } },
|
||||
analyzedAt: { type: 'string' },
|
||||
},
|
||||
required: ['name', 'languages', 'analyzedAt'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
scenarios: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
id: { type: 'string' },
|
||||
name: { type: 'string' },
|
||||
description: { type: 'string' },
|
||||
target: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
module: { type: 'string' },
|
||||
function: { type: 'string' },
|
||||
type: { type: 'string', enum: ['function', 'method', 'class'] },
|
||||
},
|
||||
required: ['module', 'function', 'type'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
category: { type: 'string', enum: ['unit', 'integration', 'edge-case', 'negative'] },
|
||||
priority: { type: 'string', enum: ['critical', 'high', 'medium', 'low'] },
|
||||
setup: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
fixtures: { type: 'array', items: { type: 'string' } },
|
||||
mocks: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
target: { type: 'string' },
|
||||
returnValue: {},
|
||||
sideEffect: { type: 'string' },
|
||||
},
|
||||
required: ['target'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['fixtures', 'mocks'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
input: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
args: { type: 'object' },
|
||||
kwargs: { type: 'object' },
|
||||
},
|
||||
required: ['args'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
assertions: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
type: { type: 'string' },
|
||||
expected: {},
|
||||
value: {},
|
||||
path: { type: 'string' },
|
||||
errorType: { type: 'string' },
|
||||
messageContains: { type: 'string' },
|
||||
pattern: { type: 'string' },
|
||||
description: { type: 'string' },
|
||||
check: { type: 'string' },
|
||||
rubric: { type: 'string' },
|
||||
criteria: { type: 'array', items: { type: 'string' } },
|
||||
passingThreshold: { type: 'number' },
|
||||
},
|
||||
required: ['type'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
tags: { type: 'array', items: { type: 'string' } },
|
||||
},
|
||||
required: ['id', 'name', 'description', 'target', 'category', 'priority', 'input', 'assertions', 'tags'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
grading: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
deterministic: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
scenarioId: { type: 'string' },
|
||||
check: { type: 'string', enum: ['pass', 'fail', 'error'] },
|
||||
score: { type: 'number' },
|
||||
},
|
||||
required: ['scenarioId', 'check', 'score'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
rubrics: {
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
scenarioId: { type: 'string' },
|
||||
criteria: { type: 'string' },
|
||||
maxScore: { type: 'number' },
|
||||
},
|
||||
required: ['scenarioId', 'criteria', 'maxScore'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
required: ['deterministic', 'rubrics'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
metadata: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
generatedBy: { type: 'string' },
|
||||
totalTokens: { type: 'number' },
|
||||
questionsAsked: { type: 'number' },
|
||||
confidence: { type: 'string', enum: ['low', 'medium', 'high'] },
|
||||
},
|
||||
required: ['generatedBy', 'totalTokens', 'questionsAsked', 'confidence'],
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
required: ['version', 'repo', 'scenarios', 'grading', 'metadata'],
|
||||
additionalProperties: false,
|
||||
} as const;
|
||||
145
src/cli/commands/analyze.ts
Normal file
145
src/cli/commands/analyze.ts
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
import { Command } from 'commander';
|
||||
import * as path from 'node:path';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import { analyze } from '../../introspector/index.js';
|
||||
import { generateEvalSpec, generateEvalSpecInteractive } from '../../analyzer/index.js';
|
||||
|
||||
interface StructuredQuestion {
|
||||
questions: {
|
||||
question: string;
|
||||
header?: string;
|
||||
options?: {
|
||||
label: string;
|
||||
description?: string;
|
||||
}[];
|
||||
multiSelect?: boolean;
|
||||
}[];
|
||||
}
|
||||
|
||||
async function handleQuestion(questionData: string): Promise<string> {
|
||||
const { default: inquirer } = await import('inquirer');
|
||||
|
||||
// Try to parse as structured question
|
||||
let parsed: StructuredQuestion | null = null;
|
||||
try {
|
||||
parsed = JSON.parse(questionData);
|
||||
} catch {
|
||||
// Not JSON, treat as plain text
|
||||
}
|
||||
|
||||
if (parsed?.questions && Array.isArray(parsed.questions)) {
|
||||
const answers: string[] = [];
|
||||
|
||||
for (const q of parsed.questions) {
|
||||
console.log(`\n🤖 ${q.header || 'Question'}:\n`);
|
||||
|
||||
if (q.options && q.options.length > 0) {
|
||||
// Render as selection
|
||||
const choices = q.options.map(opt => ({
|
||||
name: opt.description ? `${opt.label} - ${opt.description}` : opt.label,
|
||||
value: opt.label,
|
||||
}));
|
||||
|
||||
const { selection } = await inquirer.prompt([{
|
||||
type: q.multiSelect ? 'checkbox' : 'list',
|
||||
name: 'selection',
|
||||
message: q.question,
|
||||
choices,
|
||||
}]);
|
||||
|
||||
answers.push(Array.isArray(selection) ? selection.join(', ') : selection);
|
||||
} else {
|
||||
// Plain text input
|
||||
const { answer } = await inquirer.prompt([{
|
||||
type: 'input',
|
||||
name: 'answer',
|
||||
message: q.question,
|
||||
}]);
|
||||
answers.push(answer);
|
||||
}
|
||||
}
|
||||
|
||||
return answers.join('\n');
|
||||
}
|
||||
|
||||
// Fallback: plain text question
|
||||
const { answer } = await inquirer.prompt([{
|
||||
type: 'input',
|
||||
name: 'answer',
|
||||
message: `🤖 Claude asks: ${questionData}`,
|
||||
}]);
|
||||
|
||||
return answer;
|
||||
}
|
||||
|
||||
export const analyzeCommand = new Command('analyze')
|
||||
.description('Analyze a codebase and generate EvalSpec using Claude')
|
||||
.argument('[path]', 'Path to the repository to analyze', '.')
|
||||
.option('-o, --output <file>', 'Output file for the EvalSpec JSON')
|
||||
.option('-i, --interactive', 'Enable interactive mode with clarifying questions')
|
||||
.option('--focus <modules>', 'Comma-separated list of modules/functions to focus on')
|
||||
.option('--max-scenarios <n>', 'Maximum number of test scenarios to generate', '10')
|
||||
.option('--quiet', 'Suppress progress messages')
|
||||
.action(async (repoPath: string, options: AnalyzeOptions) => {
|
||||
const absolutePath = path.resolve(repoPath);
|
||||
const log = options.quiet ? () => {} : console.log;
|
||||
|
||||
log(`\n🔬 Analyzing codebase: ${absolutePath}\n`);
|
||||
|
||||
try {
|
||||
log('Step 1: Running tree-sitter introspection...');
|
||||
const repoSummary = await analyze({
|
||||
root: absolutePath,
|
||||
onProgress: options.quiet ? undefined : (msg) => log(` ${msg}`),
|
||||
});
|
||||
|
||||
log(`\nStep 2: Generating EvalSpec with Claude...\n`);
|
||||
|
||||
const focus = options.focus?.split(',').map(s => s.trim());
|
||||
const maxScenarios = parseInt(options.maxScenarios, 10);
|
||||
|
||||
let result;
|
||||
|
||||
if (options.interactive) {
|
||||
result = await generateEvalSpecInteractive(
|
||||
repoSummary,
|
||||
handleQuestion,
|
||||
{ focus, maxScenarios }
|
||||
);
|
||||
} else {
|
||||
result = await generateEvalSpec(repoSummary, {
|
||||
interactive: false,
|
||||
focus,
|
||||
maxScenarios,
|
||||
});
|
||||
}
|
||||
|
||||
const { spec, tokensUsed, questionsAsked } = result;
|
||||
|
||||
log('\n✅ EvalSpec generated successfully!');
|
||||
log(` Scenarios: ${spec.scenarios.length}`);
|
||||
log(` Tokens used: ${tokensUsed}`);
|
||||
log(` Questions asked: ${questionsAsked}`);
|
||||
log(` Confidence: ${spec.metadata.confidence}`);
|
||||
|
||||
const json = JSON.stringify(spec, null, 2);
|
||||
|
||||
if (options.output) {
|
||||
await fs.writeFile(options.output, json);
|
||||
log(`\n📄 Written to: ${options.output}`);
|
||||
} else {
|
||||
console.log('\n' + json);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('\n❌ Error:', error instanceof Error ? error.message : error);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
interface AnalyzeOptions {
|
||||
output?: string;
|
||||
interactive?: boolean;
|
||||
focus?: string;
|
||||
maxScenarios: string;
|
||||
quiet?: boolean;
|
||||
}
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import { Command } from 'commander';
|
||||
import { introCommand } from './commands/intro.js';
|
||||
import { analyzeCommand } from './commands/analyze.js';
|
||||
|
||||
const program = new Command();
|
||||
|
||||
|
|
@ -11,5 +12,6 @@ program
|
|||
.version('0.1.0');
|
||||
|
||||
program.addCommand(introCommand);
|
||||
program.addCommand(analyzeCommand);
|
||||
|
||||
program.parse(process.argv);
|
||||
|
|
|
|||
|
|
@ -1 +1,2 @@
|
|||
export * from './introspector/index.js';
|
||||
export * from './analyzer/index.js';
|
||||
|
|
|
|||
|
|
@ -3,26 +3,56 @@ import TypeScriptLang from 'tree-sitter-typescript';
|
|||
import { BaseParser } from './base.js';
|
||||
import type { ModuleInfo, ExportInfo } from '../types.js';
|
||||
|
||||
const { typescript: TypeScript } = TypeScriptLang;
|
||||
const { typescript: TypeScript, tsx: TSX } = TypeScriptLang;
|
||||
|
||||
export class TypeScriptParser extends BaseParser {
|
||||
readonly language = 'typescript';
|
||||
private parser: Parser;
|
||||
|
||||
private tsParser: Parser;
|
||||
private tsxParser: Parser;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.parser = new Parser();
|
||||
this.parser.setLanguage(TypeScript);
|
||||
this.tsParser = new Parser();
|
||||
this.tsParser.setLanguage(TypeScript);
|
||||
|
||||
this.tsxParser = new Parser();
|
||||
this.tsxParser.setLanguage(TSX);
|
||||
}
|
||||
|
||||
parse(source: string, filePath: string): ModuleInfo {
|
||||
const tree = this.parser.parse(source);
|
||||
const isTsx = filePath.endsWith('.tsx') || filePath.endsWith('.jsx');
|
||||
const parser = isTsx ? this.tsxParser : this.tsParser;
|
||||
|
||||
let tree: Parser.Tree;
|
||||
try {
|
||||
tree = parser.parse(source);
|
||||
} catch (error) {
|
||||
return this.createEmptyModule(filePath, `Parse error: ${error}`);
|
||||
}
|
||||
|
||||
const rootNode = tree.rootNode;
|
||||
|
||||
if (rootNode.hasError) {
|
||||
const errorCount = this.countErrors(rootNode);
|
||||
if (errorCount > 10) {
|
||||
return this.createEmptyModule(filePath, `Too many syntax errors (${errorCount})`);
|
||||
}
|
||||
}
|
||||
|
||||
const exports: ExportInfo[] = [];
|
||||
const imports: string[] = [];
|
||||
|
||||
this.walkNode(rootNode, source, exports, imports, false);
|
||||
try {
|
||||
this.walkNode(rootNode, source, exports, imports, false, 0);
|
||||
} catch (error) {
|
||||
return {
|
||||
path: filePath,
|
||||
exports,
|
||||
imports: [...new Set(imports)],
|
||||
complexity: this.calculateComplexity(exports.length),
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
path: filePath,
|
||||
|
|
@ -32,13 +62,34 @@ export class TypeScriptParser extends BaseParser {
|
|||
};
|
||||
}
|
||||
|
||||
private createEmptyModule(path: string, reason: string): ModuleInfo {
|
||||
return {
|
||||
path,
|
||||
exports: [],
|
||||
imports: [],
|
||||
complexity: 'low',
|
||||
};
|
||||
}
|
||||
|
||||
private countErrors(node: Parser.SyntaxNode): number {
|
||||
let count = node.type === 'ERROR' ? 1 : 0;
|
||||
for (const child of node.children) {
|
||||
count += this.countErrors(child);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
private walkNode(
|
||||
node: Parser.SyntaxNode,
|
||||
source: string,
|
||||
exports: ExportInfo[],
|
||||
imports: string[],
|
||||
isExported: boolean
|
||||
isExported: boolean,
|
||||
depth: number
|
||||
): void {
|
||||
if (depth > 50) return;
|
||||
if (node.type === 'ERROR') return;
|
||||
|
||||
switch (node.type) {
|
||||
case 'function_declaration':
|
||||
exports.push(this.extractFunction(node, source, isExported));
|
||||
|
|
@ -59,9 +110,8 @@ export class TypeScriptParser extends BaseParser {
|
|||
break;
|
||||
|
||||
case 'export_statement':
|
||||
// Recurse with isExported = true
|
||||
for (const child of node.children) {
|
||||
this.walkNode(child, source, exports, imports, true);
|
||||
this.walkNode(child, source, exports, imports, true, depth + 1);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
@ -70,9 +120,24 @@ export class TypeScriptParser extends BaseParser {
|
|||
break;
|
||||
|
||||
case 'program':
|
||||
// Recurse into top-level statements
|
||||
for (const child of node.children) {
|
||||
this.walkNode(child, source, exports, imports, false);
|
||||
this.walkNode(child, source, exports, imports, false, depth + 1);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'export_clause':
|
||||
for (const child of node.children) {
|
||||
if (child.type === 'export_specifier') {
|
||||
const nameNode = child.childForFieldName('name') || child.firstChild;
|
||||
if (nameNode && nameNode.type === 'identifier') {
|
||||
exports.push({
|
||||
name: this.getText(source, nameNode.startIndex, nameNode.endIndex),
|
||||
kind: 'constant',
|
||||
lineNumber: child.startPosition.row + 1,
|
||||
isExported: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
@ -85,7 +150,6 @@ export class TypeScriptParser extends BaseParser {
|
|||
|
||||
const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
|
||||
|
||||
// Build signature
|
||||
let signature = '';
|
||||
if (paramsNode) {
|
||||
signature = this.getText(source, paramsNode.startIndex, paramsNode.endIndex);
|
||||
|
|
@ -94,7 +158,6 @@ export class TypeScriptParser extends BaseParser {
|
|||
signature += `: ${this.getText(source, returnTypeNode.startIndex, returnTypeNode.endIndex)}`;
|
||||
}
|
||||
|
||||
// Check for async
|
||||
const isAsync = node.children.some(c => c.type === 'async');
|
||||
|
||||
return {
|
||||
|
|
@ -111,7 +174,6 @@ export class TypeScriptParser extends BaseParser {
|
|||
const nameNode = node.childForFieldName('name');
|
||||
const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
|
||||
|
||||
// Get heritage clause for extends/implements
|
||||
let signature: string | undefined;
|
||||
const heritageNode = node.children.find(c => c.type === 'class_heritage');
|
||||
if (heritageNode) {
|
||||
|
|
@ -138,7 +200,6 @@ export class TypeScriptParser extends BaseParser {
|
|||
if (nameNode) {
|
||||
const name = this.getText(source, nameNode.startIndex, nameNode.endIndex);
|
||||
|
||||
// Check if it's a function expression or arrow function
|
||||
const isFunction = valueNode && (
|
||||
valueNode.type === 'arrow_function' ||
|
||||
valueNode.type === 'function_expression' ||
|
||||
|
|
@ -176,7 +237,6 @@ export class TypeScriptParser extends BaseParser {
|
|||
|
||||
for (const child of node.children) {
|
||||
if (child.type === 'string') {
|
||||
// Remove quotes from the import path
|
||||
const importPath = this.getText(source, child.startIndex, child.endIndex)
|
||||
.replace(/^["']|["']$/g, '');
|
||||
imports.push(importPath);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue