mirror of
https://github.com/harivansh-afk/evaluclaude-harness.git
synced 2026-04-15 05:02:09 +00:00
iteration 0
This commit is contained in:
commit
4b24606d0e
25 changed files with 7843 additions and 0 deletions
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
/node_modules
|
||||
/dist
|
||||
52
AGENTS.md
Normal file
52
AGENTS.md
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
# Evaluclaude Harness - Agent Instructions
|
||||
|
||||
## Project Overview
|
||||
|
||||
This is a CLI tool for generating evaluation tests for codebases using Claude. The core philosophy is "Zero-to-evals in one command."
|
||||
|
||||
## Commands
|
||||
|
||||
```bash
|
||||
# Build the project
|
||||
npm run build
|
||||
|
||||
# Run typecheck
|
||||
npm run typecheck
|
||||
|
||||
# Run tests
|
||||
npm test
|
||||
|
||||
# Run the CLI
|
||||
npm start -- intro <path>
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── cli/ # Commander.js CLI
|
||||
├── introspector/ # Tree-sitter codebase parsing (NO LLM)
|
||||
│ ├── parsers/ # Language-specific parsers
|
||||
│ ├── scanner.ts # File discovery
|
||||
│ ├── git.ts # Git integration
|
||||
│ └── summarizer.ts # Main analysis logic
|
||||
└── index.ts # Main exports
|
||||
```
|
||||
|
||||
## Key Principles
|
||||
|
||||
1. **Tree-sitter for introspection**: Never send raw code to Claude for structure extraction
|
||||
2. **Claude generates specs, not code**: EvalSpec JSON is generated by Claude, test code is rendered deterministically
|
||||
3. **Git-aware incremental**: Only re-analyze changed files
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `tree-sitter`: Native AST parsing
|
||||
- `tree-sitter-python`: Python grammar
|
||||
- `tree-sitter-typescript`: TypeScript grammar
|
||||
- `commander`: CLI framework
|
||||
- `glob`: File pattern matching
|
||||
|
||||
## Testing
|
||||
|
||||
Use vitest for testing. Test files go in `tests/` directory.
|
||||
133
docs/00-tree-sitter-introspector.md
Normal file
133
docs/00-tree-sitter-introspector.md
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
# 0. Tree-Sitter Introspector - System Design
|
||||
|
||||
> **Priority**: 🔴 FOUNDATIONAL — Build this first
|
||||
> **Complexity**: Medium
|
||||
> **Effort Estimate**: 6-10 hours
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
The Tree-Sitter Introspector parses Python and TypeScript codebases locally using tree-sitter AST parsing, extracting structured metadata (functions, classes, imports) **without** sending raw code to Claude. This saves tokens, is faster, and produces reliable structured data.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Introspector Module │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ File Scanner │───▶│ Tree-Sitter │───▶│ Summarizer │ │
|
||||
│ │ (glob/git) │ │ Parsers │ │ │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
│ │ │ │ │
|
||||
│ ▼ ▼ ▼ │
|
||||
│ File list + Per-file AST RepoSummary │
|
||||
│ metadata extracts JSON │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
```typescript
|
||||
interface RepoSummary {
|
||||
languages: ('python' | 'typescript')[];
|
||||
root: string;
|
||||
analyzedAt: string;
|
||||
files: FileInfo[];
|
||||
modules: ModuleInfo[];
|
||||
config: ConfigInfo;
|
||||
git?: GitInfo;
|
||||
}
|
||||
|
||||
interface ModuleInfo {
|
||||
path: string;
|
||||
exports: ExportInfo[];
|
||||
imports: string[];
|
||||
complexity: 'low' | 'medium' | 'high';
|
||||
}
|
||||
|
||||
interface ExportInfo {
|
||||
name: string;
|
||||
kind: 'function' | 'class' | 'constant' | 'type';
|
||||
signature?: string;
|
||||
docstring?: string;
|
||||
lineNumber: number;
|
||||
isAsync?: boolean;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Key Implementation Details
|
||||
|
||||
### Tree-Sitter Queries (Python)
|
||||
|
||||
```typescript
|
||||
const FUNCTION_QUERY = `
|
||||
(function_definition
|
||||
name: (identifier) @name
|
||||
parameters: (parameters) @params
|
||||
return_type: (type)? @return_type
|
||||
) @func
|
||||
`;
|
||||
|
||||
const CLASS_QUERY = `
|
||||
(class_definition
|
||||
name: (identifier) @name
|
||||
body: (block) @body
|
||||
) @class
|
||||
`;
|
||||
```
|
||||
|
||||
### Git-Aware Incremental
|
||||
|
||||
```typescript
|
||||
async function getChangedFiles(since: string): Promise<string[]> {
|
||||
const { stdout } = await exec(`git diff --name-only ${since}`);
|
||||
return stdout.split('\n').filter(f => /\.(py|ts|tsx)$/.test(f));
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
src/introspector/
|
||||
├── index.ts # Main entry point
|
||||
├── types.ts # TypeScript interfaces
|
||||
├── scanner.ts # File discovery
|
||||
├── parsers/
|
||||
│ ├── python.ts # Python tree-sitter queries
|
||||
│ └── typescript.ts # TS tree-sitter queries
|
||||
├── git.ts # Git integration
|
||||
└── summarizer.ts # Combine into RepoSummary
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
```json
|
||||
{
|
||||
"tree-sitter": "^0.21.0",
|
||||
"tree-sitter-python": "^0.21.0",
|
||||
"tree-sitter-typescript": "^0.21.0",
|
||||
"glob": "^10.3.0"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Parses Python files (functions, classes, imports)
|
||||
- [ ] Parses TypeScript files (functions, classes, imports)
|
||||
- [ ] Handles 1000+ file repos in <10 seconds
|
||||
- [ ] Incremental mode only parses changed files
|
||||
- [ ] Gracefully handles syntax errors
|
||||
142
docs/01-codebase-analyzer-prompt.md
Normal file
142
docs/01-codebase-analyzer-prompt.md
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
# 1. Codebase Analyzer Prompt - System Design
|
||||
|
||||
> **Priority**: 🟡 HIGH — Core LLM logic
|
||||
> **Complexity**: High (prompt engineering)
|
||||
> **Effort Estimate**: 8-12 hours (iterative refinement)
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
The Codebase Analyzer takes structured `RepoSummary` from the introspector and generates `EvalSpec` JSON defining what tests to create. Key insight: **Claude generates specs, not code**. Test code is deterministically rendered from specs.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Codebase Analyzer Agent │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ RepoSummary │───▶│ Claude Agent │───▶│ EvalSpec │ │
|
||||
│ │ JSON │ │ SDK │ │ JSON │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌──────────────┐ │
|
||||
│ │AskUserQuestion│ │
|
||||
│ │ (optional) │ │
|
||||
│ └──────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
```typescript
|
||||
interface EvalSpec {
|
||||
version: '1.0';
|
||||
repo: { name: string; languages: string[]; analyzedAt: string };
|
||||
scenarios: EvalScenario[];
|
||||
grading: {
|
||||
deterministic: DeterministicGrade[];
|
||||
rubrics: RubricGrade[];
|
||||
};
|
||||
metadata: {
|
||||
generatedBy: string;
|
||||
totalTokens: number;
|
||||
questionsAsked: number;
|
||||
confidence: 'low' | 'medium' | 'high';
|
||||
};
|
||||
}
|
||||
|
||||
interface EvalScenario {
|
||||
id: string; // "auth-login-success"
|
||||
name: string;
|
||||
description: string;
|
||||
target: {
|
||||
module: string;
|
||||
function: string;
|
||||
type: 'function' | 'method' | 'class';
|
||||
};
|
||||
category: 'unit' | 'integration' | 'edge-case' | 'negative';
|
||||
priority: 'critical' | 'high' | 'medium' | 'low';
|
||||
setup?: { fixtures: string[]; mocks: MockSpec[] };
|
||||
input: { args: Record<string, any>; kwargs?: Record<string, any> };
|
||||
assertions: Assertion[];
|
||||
tags: string[];
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Prompt Architecture (Three-Part)
|
||||
|
||||
### 1. System Prompt
|
||||
- Defines Claude's identity as codebase analyzer
|
||||
- Constraints: functional tests only, no syntax checks, ask don't assume
|
||||
|
||||
### 2. Developer Prompt
|
||||
- Contains EvalSpec JSON schema
|
||||
- Formatting rules (snake_case, kebab-case IDs)
|
||||
- Assertion type reference
|
||||
|
||||
### 3. User Prompt (Template)
|
||||
- Injects RepoSummary JSON
|
||||
- User context about what to evaluate
|
||||
- Instructions for output format
|
||||
|
||||
---
|
||||
|
||||
## Key Implementation
|
||||
|
||||
```typescript
|
||||
async function generateEvalSpec(options: GenerateOptions): Promise<EvalSpec> {
|
||||
const agentOptions: ClaudeAgentOptions = {
|
||||
systemPrompt: await loadPrompt('analyzer-system.md'),
|
||||
permissionMode: options.interactive ? 'default' : 'dontAsk',
|
||||
canUseTool: async ({ toolName, input }) => {
|
||||
if (toolName === 'AskUserQuestion' && options.onQuestion) {
|
||||
const answer = await options.onQuestion(input);
|
||||
return { behavior: 'allow', updatedInput: { ...input, answers: { [input.question]: answer } } };
|
||||
}
|
||||
return { behavior: 'deny' };
|
||||
},
|
||||
outputFormat: { type: 'json_schema', json_schema: { name: 'EvalSpec', schema: EVAL_SPEC_SCHEMA } },
|
||||
};
|
||||
|
||||
for await (const msg of query(prompt, agentOptions)) {
|
||||
if (msg.type === 'result') return msg.output as EvalSpec;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
src/analyzer/
|
||||
├── index.ts # Main entry point
|
||||
├── types.ts # EvalSpec types
|
||||
├── spec-generator.ts # Claude Agent SDK integration
|
||||
├── validator.ts # JSON schema validation
|
||||
└── prompt-builder.ts # Builds prompts from templates
|
||||
|
||||
prompts/
|
||||
├── analyzer-system.md
|
||||
├── analyzer-developer.md
|
||||
└── analyzer-user.md
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Generates valid EvalSpec JSON for Python repos
|
||||
- [ ] Generates valid EvalSpec JSON for TypeScript repos
|
||||
- [ ] Asks 2-3 clarifying questions on complex repos
|
||||
- [ ] <10k tokens per analysis
|
||||
- [ ] 100% assertion coverage (every scenario has assertions)
|
||||
159
docs/02-synchronous-claude-session.md
Normal file
159
docs/02-synchronous-claude-session.md
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
# 2. Synchronous Claude Session with Questions - System Design
|
||||
|
||||
> **Priority**: 🟡 HIGH — Interactive UX
|
||||
> **Complexity**: Medium
|
||||
> **Effort Estimate**: 4-6 hours
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Handles **interactive communication** between Claude and the user during eval generation. When Claude calls `AskUserQuestion`, we display it in CLI, collect the answer, and return it to Claude.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Claude Session Manager │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Claude Agent │◀──────────────────▶│ Question │ │
|
||||
│ │ SDK │ AskUserQuestion │ Handler │ │
|
||||
│ └──────────────┘ └──────────────┘ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ Result CLI/stdin │
|
||||
│ (EvalSpec) (inquirer) │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Session Modes
|
||||
|
||||
| Mode | Usage | Behavior |
|
||||
|------|-------|----------|
|
||||
| `interactive` | Local dev | Full CLI prompts via inquirer |
|
||||
| `non-interactive` | CI/CD | Deny questions, use defaults |
|
||||
| `auto-answer` | Scripted | Use provided default answers |
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
```typescript
|
||||
interface Question {
|
||||
header: string;
|
||||
question: string;
|
||||
options?: QuestionOption[];
|
||||
multiSelect?: boolean;
|
||||
freeText?: boolean;
|
||||
defaultValue?: string;
|
||||
}
|
||||
|
||||
interface SessionOptions {
|
||||
interactive: boolean;
|
||||
defaultAnswers?: Record<string, string>;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
type SessionMode = 'interactive' | 'non-interactive' | 'auto-answer';
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Key Implementation
|
||||
|
||||
```typescript
|
||||
class ClaudeSession {
|
||||
async run<T>(systemPrompt: string, userPrompt: string, outputSchema?: object): Promise<T> {
|
||||
const agentOptions: ClaudeAgentOptions = {
|
||||
systemPrompt,
|
||||
permissionMode: this.getPermissionMode(),
|
||||
canUseTool: this.createToolHandler(),
|
||||
outputFormat: outputSchema ? { type: 'json_schema', json_schema: { name: 'Output', schema: outputSchema } } : undefined,
|
||||
};
|
||||
|
||||
for await (const msg of query(userPrompt, agentOptions)) {
|
||||
if (msg.type === 'result') return msg.output as T;
|
||||
}
|
||||
}
|
||||
|
||||
private async handleAskUserQuestion(input: any) {
|
||||
if (this.mode === 'non-interactive') {
|
||||
return { behavior: 'deny', message: 'Interactive questions not allowed in CI' };
|
||||
}
|
||||
|
||||
const answers: Record<string, string> = {};
|
||||
for (const question of input.questions) {
|
||||
answers[question.question] = await promptCLI(question);
|
||||
}
|
||||
return { behavior: 'allow', updatedInput: { questions: input.questions, answers } };
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CLI Adapter (inquirer)
|
||||
|
||||
```typescript
|
||||
async function promptSelect(question: Question): Promise<string> {
|
||||
const { answer } = await inquirer.prompt([{
|
||||
type: 'list',
|
||||
name: 'answer',
|
||||
message: question.question,
|
||||
choices: question.options!.map(opt => ({ name: `${opt.label} - ${opt.description}`, value: opt.label })),
|
||||
}]);
|
||||
return answer;
|
||||
}
|
||||
```
|
||||
|
||||
**User sees:**
|
||||
```
|
||||
┌─ Priority ────────────────────────
|
||||
│ I found 47 utility functions. Which should I prioritize?
|
||||
|
||||
? Select an option:
|
||||
❯ all - Test all 47 functions
|
||||
top-10 - Focus on 10 most-used
|
||||
critical - Only critical path functions
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
src/session/
|
||||
├── index.ts # Main exports
|
||||
├── types.ts # TypeScript interfaces
|
||||
├── client.ts # Claude SDK wrapper
|
||||
├── question-handler.ts # AskUserQuestion logic
|
||||
├── cli-adapter.ts # Terminal UI (inquirer)
|
||||
├── modes.ts # Mode detection
|
||||
└── persistence.ts # Save/resume session
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
```json
|
||||
{
|
||||
"@anthropic-ai/claude-agent-sdk": "^0.1.0",
|
||||
"inquirer": "^9.2.0"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Interactive mode works in terminal
|
||||
- [ ] Non-interactive mode works in CI
|
||||
- [ ] Auto-answer mode uses provided defaults
|
||||
- [ ] Session state can be saved and resumed
|
||||
- [ ] Ctrl+C exits cleanly
|
||||
157
docs/03-test-renderers.md
Normal file
157
docs/03-test-renderers.md
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
# 3. Test Renderers - System Design
|
||||
|
||||
> **Priority**: 🟢 MEDIUM — Deterministic layer
|
||||
> **Complexity**: Medium
|
||||
> **Effort Estimate**: 8-12 hours
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Test Renderers **deterministically transform** `EvalSpec` JSON into runnable test files. Key insight:
|
||||
- **Claude generates specs** (what to test, inputs, assertions)
|
||||
- **Renderers generate code** (deterministic, templated, no LLM)
|
||||
|
||||
This makes tests reliable, debuggable, and version-controllable.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Renderer Pipeline │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ EvalSpec │───▶│ Renderer │───▶│ Test Files │ │
|
||||
│ │ JSON │ │ (per-lang) │ │ (.py/.ts) │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
│ │
|
||||
│ Supported: pytest (Python) | vitest (TS) | jest (TS) │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
```typescript
|
||||
interface RenderOptions {
|
||||
outputDir: string;
|
||||
framework: 'pytest' | 'vitest' | 'jest';
|
||||
includeFixtures: boolean;
|
||||
generateMocks: boolean;
|
||||
}
|
||||
|
||||
interface RenderResult {
|
||||
files: GeneratedFile[];
|
||||
stats: { scenarioCount: number; fileCount: number; assertionCount: number };
|
||||
}
|
||||
|
||||
interface GeneratedFile {
|
||||
path: string;
|
||||
content: string;
|
||||
scenarios: string[]; // Which scenario IDs
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Assertion Mapping
|
||||
|
||||
| EvalSpec Type | Python (pytest) | TypeScript (vitest) |
|
||||
|---------------|-----------------|---------------------|
|
||||
| `equals` | `assert result == expected` | `expect(result).toBe(expected)` |
|
||||
| `contains` | `assert key in result` | `expect(result).toContain(key)` |
|
||||
| `matches` | `assert re.match(pattern, result)` | `expect(result).toMatch(pattern)` |
|
||||
| `throws` | `pytest.raises(ExceptionType)` | `expect(() => fn()).toThrow()` |
|
||||
| `type` | `assert isinstance(result, Type)` | `expect(typeof result).toBe('type')` |
|
||||
|
||||
---
|
||||
|
||||
## Example Transformation
|
||||
|
||||
**EvalSpec scenario:**
|
||||
```json
|
||||
{
|
||||
"id": "auth-login-success",
|
||||
"target": { "module": "src/auth/login.py", "function": "login" },
|
||||
"input": { "args": { "username": "test", "password": "valid" } },
|
||||
"assertions": [
|
||||
{ "type": "type", "target": "return", "expected": "dict" },
|
||||
{ "type": "contains", "target": "return", "expected": "token" }
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Generated pytest:**
|
||||
```python
|
||||
def test_auth_login_success():
|
||||
"""Verify login returns JWT on valid credentials"""
|
||||
result = login("test", "valid")
|
||||
assert isinstance(result, dict)
|
||||
assert "token" in result
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
src/renderers/
|
||||
├── index.ts # Registry + main export
|
||||
├── types.ts # Interfaces
|
||||
├── base.ts # Abstract base renderer
|
||||
├── python/
|
||||
│ ├── pytest-renderer.ts
|
||||
│ ├── assertions.ts
|
||||
│ └── templates/
|
||||
│ └── test-file.py.hbs
|
||||
├── typescript/
|
||||
│ ├── vitest-renderer.ts
|
||||
│ ├── jest-renderer.ts
|
||||
│ └── assertions.ts
|
||||
└── utils/
|
||||
└── template-engine.ts
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Incremental Rendering
|
||||
|
||||
```typescript
|
||||
async function renderIncremental(
|
||||
spec: EvalSpec,
|
||||
options: RenderOptions,
|
||||
changedFiles: string[]
|
||||
): Promise<RenderResult> {
|
||||
const filteredSpec = {
|
||||
...spec,
|
||||
scenarios: spec.scenarios.filter(s =>
|
||||
changedFiles.some(f => s.target.module.includes(f))
|
||||
),
|
||||
};
|
||||
return renderSpec(filteredSpec, options);
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
```json
|
||||
{
|
||||
"handlebars": "^4.7.8"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Pytest renderer generates valid Python test files
|
||||
- [ ] Vitest renderer generates valid TypeScript test files
|
||||
- [ ] Generated tests pass linting
|
||||
- [ ] All assertion types are supported
|
||||
- [ ] Mocks and fixtures correctly generated
|
||||
- [ ] Incremental rendering works
|
||||
269
docs/04-functional-test-execution.md
Normal file
269
docs/04-functional-test-execution.md
Normal file
|
|
@ -0,0 +1,269 @@
|
|||
# 4. Functional Test Execution & Grading - System Design
|
||||
|
||||
> **Priority**: 🟢 MEDIUM — Runtime layer
|
||||
> **Complexity**: Medium-High
|
||||
> **Effort Estimate**: 6-10 hours
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Executes generated tests in a **sandboxed environment** and produces structured results. Tests run in isolation to prevent accidental side effects. Results feed into Promptfoo for aggregation and UI.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Test Execution Pipeline │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Test Files │───▶│ Sandbox │───▶│ Results │ │
|
||||
│ │ (.py/.ts) │ │ Runner │ │ JSON │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
│ │ │ │
|
||||
│ ▼ ▼ │
|
||||
│ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ pytest/ │ │ Promptfoo │ │
|
||||
│ │ vitest │ │ Integration │ │
|
||||
│ └──────────────┘ └──────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
```typescript
|
||||
interface ExecutionOptions {
|
||||
framework: 'pytest' | 'vitest' | 'jest';
|
||||
sandbox: boolean;
|
||||
timeout: number; // ms per test
|
||||
parallel: boolean;
|
||||
filter?: string[]; // Run specific test IDs
|
||||
}
|
||||
|
||||
interface ExecutionResult {
|
||||
summary: {
|
||||
total: number;
|
||||
passed: number;
|
||||
failed: number;
|
||||
skipped: number;
|
||||
duration: number;
|
||||
};
|
||||
tests: TestResult[];
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
interface TestResult {
|
||||
id: string; // Maps to EvalScenario.id
|
||||
name: string;
|
||||
status: 'passed' | 'failed' | 'skipped' | 'error';
|
||||
duration: number;
|
||||
assertions: {
|
||||
passed: number;
|
||||
failed: number;
|
||||
details: AssertionResult[];
|
||||
};
|
||||
error?: { message: string; stack?: string };
|
||||
stdout?: string;
|
||||
stderr?: string;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Sandbox Configuration
|
||||
|
||||
```typescript
|
||||
const SANDBOX_CONFIG = {
|
||||
enabled: true,
|
||||
autoAllowBashIfSandboxed: true,
|
||||
network: {
|
||||
allowLocalBinding: true,
|
||||
allowOutbound: false, // No external network
|
||||
},
|
||||
filesystem: {
|
||||
readOnly: ['/'],
|
||||
writable: ['/tmp', './test-output'],
|
||||
},
|
||||
env: {
|
||||
inherit: ['PATH', 'HOME'],
|
||||
set: { CI: 'true', NODE_ENV: 'test' },
|
||||
},
|
||||
};
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Runner Implementations
|
||||
|
||||
### Pytest Runner
|
||||
|
||||
```typescript
|
||||
async function runPytest(testDir: string, options: ExecutionOptions): Promise<ExecutionResult> {
|
||||
const args = [
|
||||
'-v',
|
||||
'--tb=short',
|
||||
'--json-report',
|
||||
'--json-report-file=results.json',
|
||||
options.parallel ? '-n auto' : '',
|
||||
options.filter?.map(f => `-k ${f}`).join(' ') || '',
|
||||
].filter(Boolean);
|
||||
|
||||
const { exitCode, stdout, stderr } = await exec(
|
||||
`pytest ${args.join(' ')} ${testDir}`,
|
||||
{ timeout: options.timeout, cwd: testDir }
|
||||
);
|
||||
|
||||
const report = JSON.parse(await fs.readFile('results.json', 'utf-8'));
|
||||
return parseJsonReport(report);
|
||||
}
|
||||
```
|
||||
|
||||
### Vitest Runner
|
||||
|
||||
```typescript
|
||||
async function runVitest(testDir: string, options: ExecutionOptions): Promise<ExecutionResult> {
|
||||
const args = [
|
||||
'run',
|
||||
'--reporter=json',
|
||||
'--outputFile=results.json',
|
||||
options.filter?.length ? `--testNamePattern="${options.filter.join('|')}"` : '',
|
||||
].filter(Boolean);
|
||||
|
||||
const { exitCode } = await exec(
|
||||
`npx vitest ${args.join(' ')}`,
|
||||
{ timeout: options.timeout, cwd: testDir }
|
||||
);
|
||||
|
||||
const report = JSON.parse(await fs.readFile('results.json', 'utf-8'));
|
||||
return parseVitestReport(report);
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Promptfoo Integration
|
||||
|
||||
### Custom Provider (`providers/test-runner.py`)
|
||||
|
||||
```python
|
||||
def get_provider_response(prompt: str, options: dict, context: dict) -> dict:
|
||||
"""Runs tests and returns structured results."""
|
||||
import subprocess
|
||||
import json
|
||||
|
||||
test_dir = options.get('test_dir', './tests')
|
||||
framework = options.get('framework', 'pytest')
|
||||
|
||||
if framework == 'pytest':
|
||||
result = subprocess.run(
|
||||
['pytest', '--json-report', '--json-report-file=/tmp/results.json', test_dir],
|
||||
capture_output=True, text=True, timeout=300
|
||||
)
|
||||
with open('/tmp/results.json') as f:
|
||||
report = json.load(f)
|
||||
|
||||
return {
|
||||
'output': json.dumps({
|
||||
'passed': report['summary']['passed'],
|
||||
'failed': report['summary']['failed'],
|
||||
'tests': report['tests'],
|
||||
}),
|
||||
'error': None,
|
||||
}
|
||||
```
|
||||
|
||||
### Promptfoo Config
|
||||
|
||||
```yaml
|
||||
providers:
|
||||
- id: file://providers/test-runner.py
|
||||
label: functional-tests
|
||||
config:
|
||||
test_dir: .evaluclaude/tests
|
||||
framework: pytest
|
||||
timeout: 300
|
||||
|
||||
tests:
|
||||
- vars:
|
||||
scenario_id: auth-login-success
|
||||
assert:
|
||||
- type: python
|
||||
value: |
|
||||
import json
|
||||
result = json.loads(output)
|
||||
result['passed'] > 0 and result['failed'] == 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
src/runners/
|
||||
├── index.ts # Main entry + registry
|
||||
├── types.ts # Interfaces
|
||||
├── sandbox.ts # Isolation wrapper
|
||||
├── pytest-runner.ts # Python test execution
|
||||
├── vitest-runner.ts # Vitest execution
|
||||
├── jest-runner.ts # Jest execution
|
||||
└── result-parser.ts # Normalize results
|
||||
|
||||
providers/
|
||||
└── test-runner.py # Promptfoo provider
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Result Parsing
|
||||
|
||||
```typescript
|
||||
function parseJsonReport(report: any): ExecutionResult {
|
||||
return {
|
||||
summary: {
|
||||
total: report.summary.total,
|
||||
passed: report.summary.passed,
|
||||
failed: report.summary.failed,
|
||||
skipped: report.summary.skipped || 0,
|
||||
duration: report.duration,
|
||||
},
|
||||
tests: report.tests.map((t: any) => ({
|
||||
id: extractScenarioId(t.nodeid),
|
||||
name: t.nodeid,
|
||||
status: t.outcome,
|
||||
duration: t.call?.duration || 0,
|
||||
assertions: { passed: 0, failed: 0, details: [] },
|
||||
error: t.call?.crash ? { message: t.call.crash.message } : undefined,
|
||||
})),
|
||||
errors: [],
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
```json
|
||||
{
|
||||
"dependencies": {}
|
||||
}
|
||||
```
|
||||
|
||||
**Test framework deps (installed in target repo):**
|
||||
- `pytest`, `pytest-json-report`, `pytest-xdist` (Python)
|
||||
- `vitest` (TypeScript)
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Pytest tests run and produce JSON results
|
||||
- [ ] Vitest tests run and produce JSON results
|
||||
- [ ] Sandbox prevents network/filesystem escape
|
||||
- [ ] Results map back to EvalScenario IDs
|
||||
- [ ] Promptfoo integration works
|
||||
- [ ] Parallel execution supported
|
||||
305
docs/05-llm-rubric-graders.md
Normal file
305
docs/05-llm-rubric-graders.md
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
# 5. LLM Rubric Graders - System Design
|
||||
|
||||
> **Priority**: 🟢 MEDIUM — Subjective quality layer
|
||||
> **Complexity**: Medium
|
||||
> **Effort Estimate**: 4-6 hours
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
LLM Rubric Graders use Claude to evaluate **subjective quality** that deterministic tests can't measure:
|
||||
- Code readability
|
||||
- Error message helpfulness
|
||||
- Documentation quality
|
||||
- API design consistency
|
||||
|
||||
These complement functional tests with human-like judgment.
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ LLM Grading Pipeline │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Output │───▶│ Rubric │───▶│ Grading │ │
|
||||
│ │ (code/ │ │ + Claude │ │ Result │ │
|
||||
│ │ text) │ │ │ │ │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
│ │ │
|
||||
│ Uses Promptfoo │
|
||||
│ llm-rubric assertion │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
```typescript
|
||||
interface Rubric {
|
||||
name: string;
|
||||
description: string;
|
||||
criteria: RubricCriterion[];
|
||||
passingThreshold: number; // 0-1
|
||||
}
|
||||
|
||||
interface RubricCriterion {
|
||||
name: string;
|
||||
description: string;
|
||||
weight: number; // Relative weight
|
||||
examples?: {
|
||||
good: string;
|
||||
bad: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface RubricGradingResult {
|
||||
pass: boolean;
|
||||
score: number; // 0-1
|
||||
reason: string;
|
||||
criterionScores: {
|
||||
name: string;
|
||||
score: number;
|
||||
feedback: string;
|
||||
}[];
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Rubric Examples
|
||||
|
||||
### Code Quality Rubric (`rubrics/code-quality.yaml`)
|
||||
|
||||
```yaml
|
||||
name: code-quality
|
||||
description: Evaluates generated code for quality and maintainability
|
||||
passingThreshold: 0.7
|
||||
|
||||
criteria:
|
||||
- name: readability
|
||||
weight: 0.3
|
||||
description: Code is easy to read and understand
|
||||
examples:
|
||||
good: "Clear variable names, logical flow, proper indentation"
|
||||
bad: "Single-letter variables, deeply nested logic, inconsistent style"
|
||||
|
||||
- name: correctness
|
||||
weight: 0.4
|
||||
description: Code correctly implements the intended behavior
|
||||
examples:
|
||||
good: "Handles edge cases, correct algorithm, proper error handling"
|
||||
bad: "Missing edge cases, off-by-one errors, swallowed exceptions"
|
||||
|
||||
- name: efficiency
|
||||
weight: 0.2
|
||||
description: Code uses appropriate data structures and algorithms
|
||||
examples:
|
||||
good: "O(n) where O(n) is optimal, avoids unnecessary allocations"
|
||||
bad: "O(n²) when O(n) is possible, creates objects in tight loops"
|
||||
|
||||
- name: maintainability
|
||||
weight: 0.1
|
||||
description: Code is easy to modify and extend
|
||||
examples:
|
||||
good: "Single responsibility, low coupling, clear interfaces"
|
||||
bad: "God functions, tight coupling, magic numbers"
|
||||
```
|
||||
|
||||
### Error Messages Rubric (`rubrics/error-messages.yaml`)
|
||||
|
||||
```yaml
|
||||
name: error-messages
|
||||
description: Evaluates quality of error messages
|
||||
passingThreshold: 0.6
|
||||
|
||||
criteria:
|
||||
- name: clarity
|
||||
weight: 0.4
|
||||
description: Error message clearly explains what went wrong
|
||||
|
||||
- name: actionability
|
||||
weight: 0.4
|
||||
description: Error message suggests how to fix the problem
|
||||
|
||||
- name: context
|
||||
weight: 0.2
|
||||
description: Error message includes relevant context (file, line, values)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Promptfoo Integration
|
||||
|
||||
### Using `llm-rubric` Assertion
|
||||
|
||||
```yaml
|
||||
# promptfooconfig.yaml
|
||||
tests:
|
||||
- vars:
|
||||
code_output: "{{generated_code}}"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
value: |
|
||||
Evaluate this code for quality:
|
||||
|
||||
{{code_output}}
|
||||
|
||||
Score on:
|
||||
1. Readability (0-10)
|
||||
2. Correctness (0-10)
|
||||
3. Efficiency (0-10)
|
||||
4. Maintainability (0-10)
|
||||
|
||||
Provide overall score and specific feedback.
|
||||
threshold: 0.7
|
||||
```
|
||||
|
||||
### Custom Python Grader
|
||||
|
||||
```python
|
||||
# graders/rubric_grader.py
|
||||
import json
|
||||
from anthropic import Anthropic
|
||||
|
||||
def get_assert(output: str, context: dict) -> dict:
|
||||
"""Grade output using LLM rubric."""
|
||||
rubric = context.get('config', {}).get('rubric', 'code-quality')
|
||||
rubric_def = load_rubric(rubric)
|
||||
|
||||
client = Anthropic()
|
||||
|
||||
prompt = f"""
|
||||
You are evaluating code quality against this rubric:
|
||||
|
||||
{json.dumps(rubric_def, indent=2)}
|
||||
|
||||
Code to evaluate:
|
||||
```
|
||||
{output}
|
||||
```
|
||||
|
||||
For each criterion, provide:
|
||||
1. Score (0-1)
|
||||
2. Brief feedback
|
||||
|
||||
Return JSON:
|
||||
{{
|
||||
"scores": {{"criterion_name": {{"score": 0.8, "feedback": "..."}}}},
|
||||
"overall": 0.75,
|
||||
"summary": "..."
|
||||
}}
|
||||
"""
|
||||
|
||||
response = client.messages.create(
|
||||
model="claude-sonnet-4-20250514",
|
||||
max_tokens=1024,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
|
||||
result = json.loads(response.content[0].text)
|
||||
|
||||
return {
|
||||
"pass": result["overall"] >= rubric_def["passingThreshold"],
|
||||
"score": result["overall"],
|
||||
"reason": result["summary"],
|
||||
"namedScores": {k: v["score"] for k, v in result["scores"].items()},
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Calibration
|
||||
|
||||
LLM graders need calibration to ensure consistency:
|
||||
|
||||
```typescript
|
||||
interface CalibrationSet {
|
||||
rubric: string;
|
||||
examples: CalibrationExample[];
|
||||
}
|
||||
|
||||
interface CalibrationExample {
|
||||
input: string;
|
||||
expectedScore: number;
|
||||
expectedFeedback: string[];
|
||||
}
|
||||
|
||||
async function calibrate(rubric: Rubric, examples: CalibrationExample[]): Promise<CalibrationResult> {
|
||||
const results = await Promise.all(
|
||||
examples.map(ex => gradeWithRubric(ex.input, rubric))
|
||||
);
|
||||
|
||||
const agreement = results.filter((r, i) =>
|
||||
Math.abs(r.score - examples[i].expectedScore) < 0.1
|
||||
).length / results.length;
|
||||
|
||||
return {
|
||||
agreement,
|
||||
drift: results.map((r, i) => r.score - examples[i].expectedScore),
|
||||
needsAdjustment: agreement < 0.8,
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
src/graders/
|
||||
├── llm/
|
||||
│ ├── index.ts # Main entry
|
||||
│ ├── provider.ts # Promptfoo custom provider
|
||||
│ ├── rubric-loader.ts # Load YAML rubrics
|
||||
│ └── grader.ts # Core grading logic
|
||||
└── calibration/
|
||||
├── calibrator.ts # Calibration runner
|
||||
└── examples/ # Calibration datasets
|
||||
|
||||
rubrics/
|
||||
├── code-quality.yaml
|
||||
├── error-messages.yaml
|
||||
├── documentation.yaml
|
||||
└── api-design.yaml
|
||||
|
||||
graders/
|
||||
└── rubric_grader.py # Python grader for Promptfoo
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## When to Use LLM vs Deterministic
|
||||
|
||||
| Use LLM Graders | Use Deterministic |
|
||||
|-----------------|-------------------|
|
||||
| Subjective quality | Pass/fail assertions |
|
||||
| Style/readability | Type checking |
|
||||
| Helpfulness | Value equality |
|
||||
| Consistency | Error presence |
|
||||
| User experience | Performance thresholds |
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
```json
|
||||
{
|
||||
"js-yaml": "^4.1.0"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Rubrics load from YAML files
|
||||
- [ ] LLM grader produces consistent scores
|
||||
- [ ] Calibration detects drift
|
||||
- [ ] Integrates with Promptfoo `llm-rubric`
|
||||
- [ ] Custom Python grader works
|
||||
- [ ] >80% agreement with human judgment
|
||||
364
docs/06-observability-tracing.md
Normal file
364
docs/06-observability-tracing.md
Normal file
|
|
@ -0,0 +1,364 @@
|
|||
# 6. Observability & Tracing - System Design
|
||||
|
||||
> **Priority**: 🟡 HIGH — Debugging is critical
|
||||
> **Complexity**: Medium
|
||||
> **Effort Estimate**: 4-6 hours
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Every eval run produces a **trace** capturing what Claude did and why. No black boxes. When a test fails, you can see:
|
||||
- What files Claude analyzed
|
||||
- What questions it asked
|
||||
- What specs it generated
|
||||
- The reasoning behind each decision
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ Observability Pipeline │
|
||||
├─────────────────────────────────────────────────────────────────┤
|
||||
│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │
|
||||
│ │ Claude Agent │───▶│ Tracer │───▶│ Trace Store │ │
|
||||
│ │ Hooks │ │ (collector) │ │ (.json) │ │
|
||||
│ └──────────────┘ └──────────────┘ └──────────────┘ │
|
||||
│ │ │
|
||||
│ ▼ │
|
||||
│ ┌──────────────┐ │
|
||||
│ │ Trace Viewer │ │
|
||||
│ │ (Promptfoo) │ │
|
||||
│ └──────────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Core Types
|
||||
|
||||
```typescript
|
||||
interface EvalTrace {
|
||||
id: string; // UUID
|
||||
evalId: string; // Links to EvalSpec
|
||||
startedAt: string;
|
||||
completedAt: string;
|
||||
duration: number; // ms
|
||||
|
||||
status: 'success' | 'partial' | 'failed';
|
||||
|
||||
introspection: {
|
||||
filesAnalyzed: string[];
|
||||
totalFunctions: number;
|
||||
totalClasses: number;
|
||||
duration: number;
|
||||
};
|
||||
|
||||
analysis: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
toolCalls: ToolCall[];
|
||||
questionsAsked: Question[];
|
||||
decisions: Decision[];
|
||||
};
|
||||
|
||||
generation: {
|
||||
scenariosGenerated: number;
|
||||
filesWritten: string[];
|
||||
};
|
||||
|
||||
execution: {
|
||||
testsPassed: number;
|
||||
testsFailed: number;
|
||||
testsSkipped: number;
|
||||
failures: TestFailure[];
|
||||
};
|
||||
|
||||
errors: TraceError[];
|
||||
}
|
||||
|
||||
interface ToolCall {
|
||||
timestamp: string;
|
||||
tool: string;
|
||||
input: any;
|
||||
output: any;
|
||||
duration: number;
|
||||
}
|
||||
|
||||
interface Decision {
|
||||
timestamp: string;
|
||||
type: 'include' | 'exclude' | 'prioritize' | 'question';
|
||||
subject: string; // What was decided about
|
||||
reasoning: string; // Why
|
||||
confidence: number; // 0-1
|
||||
}
|
||||
|
||||
interface TestFailure {
|
||||
scenarioId: string;
|
||||
error: string;
|
||||
stack?: string;
|
||||
expected?: any;
|
||||
actual?: any;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Hook-Based Collection
|
||||
|
||||
Use Claude Agent SDK hooks to capture everything:
|
||||
|
||||
```typescript
|
||||
import { ClaudeAgentOptions } from '@anthropic-ai/claude-agent-sdk';
|
||||
import { Tracer } from './tracer';
|
||||
|
||||
function createTracedOptions(tracer: Tracer): Partial<ClaudeAgentOptions> {
|
||||
return {
|
||||
hooks: {
|
||||
preToolUse: [{
|
||||
hooks: [async (input) => {
|
||||
tracer.recordToolStart(input.tool_name, input.tool_input);
|
||||
return { continue_: true };
|
||||
}]
|
||||
}],
|
||||
postToolUse: [{
|
||||
hooks: [async (input) => {
|
||||
tracer.recordToolEnd(input.tool_name, input.tool_output);
|
||||
return {};
|
||||
}]
|
||||
}],
|
||||
},
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Tracer Implementation
|
||||
|
||||
```typescript
|
||||
class Tracer {
|
||||
private trace: EvalTrace;
|
||||
private currentToolCall?: { name: string; input: any; startTime: number };
|
||||
|
||||
constructor(evalId: string) {
|
||||
this.trace = {
|
||||
id: crypto.randomUUID(),
|
||||
evalId,
|
||||
startedAt: new Date().toISOString(),
|
||||
completedAt: '',
|
||||
duration: 0,
|
||||
status: 'success',
|
||||
introspection: { filesAnalyzed: [], totalFunctions: 0, totalClasses: 0, duration: 0 },
|
||||
analysis: { promptTokens: 0, completionTokens: 0, toolCalls: [], questionsAsked: [], decisions: [] },
|
||||
generation: { scenariosGenerated: 0, filesWritten: [] },
|
||||
execution: { testsPassed: 0, testsFailed: 0, testsSkipped: 0, failures: [] },
|
||||
errors: [],
|
||||
};
|
||||
}
|
||||
|
||||
recordToolStart(name: string, input: any): void {
|
||||
this.currentToolCall = { name, input, startTime: Date.now() };
|
||||
}
|
||||
|
||||
recordToolEnd(name: string, output: any): void {
|
||||
if (this.currentToolCall?.name === name) {
|
||||
this.trace.analysis.toolCalls.push({
|
||||
timestamp: new Date().toISOString(),
|
||||
tool: name,
|
||||
input: this.currentToolCall.input,
|
||||
output,
|
||||
duration: Date.now() - this.currentToolCall.startTime,
|
||||
});
|
||||
this.currentToolCall = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
recordQuestion(question: any, answer: string): void {
|
||||
this.trace.analysis.questionsAsked.push({
|
||||
...question,
|
||||
answer,
|
||||
timestamp: new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
recordDecision(type: Decision['type'], subject: string, reasoning: string, confidence: number): void {
|
||||
this.trace.analysis.decisions.push({
|
||||
timestamp: new Date().toISOString(),
|
||||
type,
|
||||
subject,
|
||||
reasoning,
|
||||
confidence,
|
||||
});
|
||||
}
|
||||
|
||||
recordError(error: Error, context?: string): void {
|
||||
this.trace.errors.push({
|
||||
timestamp: new Date().toISOString(),
|
||||
message: error.message,
|
||||
stack: error.stack,
|
||||
context,
|
||||
});
|
||||
this.trace.status = 'failed';
|
||||
}
|
||||
|
||||
finalize(): EvalTrace {
|
||||
this.trace.completedAt = new Date().toISOString();
|
||||
this.trace.duration = new Date(this.trace.completedAt).getTime() - new Date(this.trace.startedAt).getTime();
|
||||
return this.trace;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Trace Storage
|
||||
|
||||
```typescript
|
||||
const TRACES_DIR = '.evaluclaude/traces';
|
||||
|
||||
async function saveTrace(trace: EvalTrace): Promise<string> {
|
||||
await fs.mkdir(TRACES_DIR, { recursive: true });
|
||||
const filePath = path.join(TRACES_DIR, `${trace.id}.json`);
|
||||
await fs.writeFile(filePath, JSON.stringify(trace, null, 2));
|
||||
return filePath;
|
||||
}
|
||||
|
||||
async function loadTrace(traceId: string): Promise<EvalTrace> {
|
||||
const filePath = path.join(TRACES_DIR, `${traceId}.json`);
|
||||
const content = await fs.readFile(filePath, 'utf-8');
|
||||
return JSON.parse(content);
|
||||
}
|
||||
|
||||
async function listTraces(evalId?: string): Promise<EvalTrace[]> {
|
||||
const files = await fs.readdir(TRACES_DIR);
|
||||
const traces = await Promise.all(
|
||||
files.filter(f => f.endsWith('.json')).map(f => loadTrace(f.replace('.json', '')))
|
||||
);
|
||||
return evalId ? traces.filter(t => t.evalId === evalId) : traces;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Promptfoo Integration
|
||||
|
||||
Link traces to test results:
|
||||
|
||||
```yaml
|
||||
# promptfooconfig.yaml
|
||||
defaultTest:
|
||||
metadata:
|
||||
traceFile: .evaluclaude/traces/{{evalId}}.json
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Trace Viewer CLI
|
||||
|
||||
```typescript
|
||||
// src/cli/commands/view.ts
|
||||
import { Command } from 'commander';
|
||||
import { loadTrace, listTraces } from '../observability/trace-store';
|
||||
|
||||
export const viewCommand = new Command('view')
|
||||
.description('View eval trace')
|
||||
.argument('[trace-id]', 'Specific trace ID')
|
||||
.option('--last', 'View most recent trace')
|
||||
.option('--json', 'Output raw JSON')
|
||||
.action(async (traceId, options) => {
|
||||
let trace: EvalTrace;
|
||||
|
||||
if (options.last) {
|
||||
const traces = await listTraces();
|
||||
trace = traces.sort((a, b) =>
|
||||
new Date(b.startedAt).getTime() - new Date(a.startedAt).getTime()
|
||||
)[0];
|
||||
} else {
|
||||
trace = await loadTrace(traceId);
|
||||
}
|
||||
|
||||
if (options.json) {
|
||||
console.log(JSON.stringify(trace, null, 2));
|
||||
} else {
|
||||
displayTrace(trace);
|
||||
}
|
||||
});
|
||||
|
||||
function displayTrace(trace: EvalTrace): void {
|
||||
console.log(`\n📊 Trace: ${trace.id}`);
|
||||
console.log(` Status: ${trace.status}`);
|
||||
console.log(` Duration: ${trace.duration}ms`);
|
||||
console.log(`\n📂 Introspection:`);
|
||||
console.log(` Files: ${trace.introspection.filesAnalyzed.length}`);
|
||||
console.log(` Functions: ${trace.introspection.totalFunctions}`);
|
||||
console.log(`\n🤖 Analysis:`);
|
||||
console.log(` Tool calls: ${trace.analysis.toolCalls.length}`);
|
||||
console.log(` Questions: ${trace.analysis.questionsAsked.length}`);
|
||||
console.log(` Decisions: ${trace.analysis.decisions.length}`);
|
||||
console.log(`\n🧪 Execution:`);
|
||||
console.log(` ✅ Passed: ${trace.execution.testsPassed}`);
|
||||
console.log(` ❌ Failed: ${trace.execution.testsFailed}`);
|
||||
|
||||
if (trace.execution.failures.length > 0) {
|
||||
console.log(`\n❌ Failures:`);
|
||||
trace.execution.failures.forEach(f => {
|
||||
console.log(` - ${f.scenarioId}: ${f.error}`);
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
src/observability/
|
||||
├── index.ts # Main exports
|
||||
├── tracer.ts # Hook-based collection
|
||||
├── trace-store.ts # Persist to filesystem
|
||||
├── trace-viewer.ts # Format for display
|
||||
└── types.ts # EvalTrace interface
|
||||
|
||||
.evaluclaude/
|
||||
└── traces/
|
||||
├── abc123.json
|
||||
├── def456.json
|
||||
└── ...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## What Gets Traced
|
||||
|
||||
| Phase | Captured |
|
||||
|-------|----------|
|
||||
| Introspection | Files parsed, functions/classes found, duration |
|
||||
| Analysis | Every tool call, questions asked, decisions made |
|
||||
| Generation | Scenarios created, files written |
|
||||
| Execution | Test results, failures with context |
|
||||
| Errors | Any exceptions with stack traces |
|
||||
|
||||
---
|
||||
|
||||
## Dependencies
|
||||
|
||||
```json
|
||||
{
|
||||
"dependencies": {}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- [ ] Every eval run produces a trace
|
||||
- [ ] Traces capture all tool calls
|
||||
- [ ] Questions and answers are recorded
|
||||
- [ ] Test failures link to trace
|
||||
- [ ] CLI viewer displays traces clearly
|
||||
- [ ] Traces stored efficiently (<1MB each)
|
||||
3823
package-lock.json
generated
Normal file
3823
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
46
package.json
Normal file
46
package.json
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
{
|
||||
"name": "evaluclaude-harness",
|
||||
"version": "0.1.0",
|
||||
"description": "Zero-to-evals in one command. Claude analyzes codebases and generates functional tests.",
|
||||
"type": "module",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"bin": {
|
||||
"evaluclaude": "./dist/cli/index.js"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"dev": "tsc --watch",
|
||||
"start": "node dist/cli/index.js",
|
||||
"test": "vitest",
|
||||
"lint": "eslint src --ext .ts",
|
||||
"typecheck": "tsc --noEmit"
|
||||
},
|
||||
"keywords": [
|
||||
"eval",
|
||||
"claude",
|
||||
"testing",
|
||||
"ai",
|
||||
"code-analysis"
|
||||
],
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"commander": "^12.1.0",
|
||||
"glob": "^10.4.0",
|
||||
"inquirer": "^9.2.0",
|
||||
"tree-sitter": "^0.21.1",
|
||||
"tree-sitter-python": "^0.21.0",
|
||||
"tree-sitter-typescript": "^0.21.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/inquirer": "^9.0.7",
|
||||
"@types/node": "^20.14.0",
|
||||
"eslint": "^8.57.0",
|
||||
"typescript": "^5.4.5",
|
||||
"vitest": "^1.6.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
}
|
||||
118
src/cli/commands/intro.ts
Normal file
118
src/cli/commands/intro.ts
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
import { Command } from 'commander';
|
||||
import * as path from 'node:path';
|
||||
import { analyze, treeToString } from '../../introspector/index.js';
|
||||
|
||||
export const introCommand = new Command('intro')
|
||||
.description('Introspect a codebase and output its structure (tree-sitter analysis)')
|
||||
.argument('[path]', 'Path to the repository to analyze', '.')
|
||||
.option('-o, --output <file>', 'Output file for the RepoSummary JSON')
|
||||
.option('--json', 'Output as JSON (default)')
|
||||
.option('--summary', 'Output a human-readable summary instead of JSON')
|
||||
.option('--tree', 'Show file tree structure')
|
||||
.action(async (repoPath: string, options: { output?: string; json?: boolean; summary?: boolean; tree?: boolean }) => {
|
||||
const absolutePath = path.resolve(repoPath);
|
||||
|
||||
console.log(`\n🔍 Analyzing: ${absolutePath}\n`);
|
||||
|
||||
try {
|
||||
const summary = await analyze({
|
||||
root: absolutePath,
|
||||
onProgress: (msg) => console.log(` ${msg}`),
|
||||
});
|
||||
|
||||
console.log('');
|
||||
|
||||
if (options.tree && summary.tree) {
|
||||
console.log('📁 File Tree:\n');
|
||||
console.log(treeToString(summary.tree));
|
||||
console.log('');
|
||||
} else if (options.summary) {
|
||||
printHumanSummary(summary);
|
||||
} else {
|
||||
const json = JSON.stringify(summary, null, 2);
|
||||
|
||||
if (options.output) {
|
||||
const fs = await import('node:fs/promises');
|
||||
await fs.writeFile(options.output, json);
|
||||
console.log(`📄 Written to: ${options.output}`);
|
||||
} else {
|
||||
console.log(json);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('❌ Error analyzing repository:', error);
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
function printHumanSummary(summary: import('../../introspector/types.js').RepoSummary): void {
|
||||
console.log('📊 Repository Summary');
|
||||
console.log('─'.repeat(50));
|
||||
console.log(`📁 Root: ${summary.root}`);
|
||||
console.log(`🗓️ Analyzed: ${summary.analyzedAt}`);
|
||||
console.log(`🔤 Languages: ${summary.languages.join(', ') || 'none detected'}`);
|
||||
|
||||
console.log('\n📂 Files:');
|
||||
console.log(` Total: ${summary.files.length}`);
|
||||
console.log(` Source: ${summary.files.filter(f => f.role === 'source').length}`);
|
||||
console.log(` Test: ${summary.files.filter(f => f.role === 'test').length}`);
|
||||
console.log(` Config: ${summary.files.filter(f => f.role === 'config').length}`);
|
||||
|
||||
console.log('\n📦 Modules:');
|
||||
console.log(` Total: ${summary.modules.length}`);
|
||||
|
||||
const totalExports = summary.modules.reduce((sum, m) => sum + m.exports.length, 0);
|
||||
const functions = summary.modules.flatMap(m => m.exports.filter(e => e.kind === 'function'));
|
||||
const classes = summary.modules.flatMap(m => m.exports.filter(e => e.kind === 'class'));
|
||||
|
||||
console.log(` Functions: ${functions.length}`);
|
||||
console.log(` Classes: ${classes.length}`);
|
||||
console.log(` Total exports: ${totalExports}`);
|
||||
|
||||
if (summary.config.python) {
|
||||
console.log('\n🐍 Python:');
|
||||
console.log(` Test framework: ${summary.config.python.testFramework}`);
|
||||
console.log(` pyproject.toml: ${summary.config.python.pyprojectToml ? '✓' : '✗'}`);
|
||||
console.log(` setup.py: ${summary.config.python.setupPy ? '✓' : '✗'}`);
|
||||
}
|
||||
|
||||
if (summary.config.typescript) {
|
||||
console.log('\n📘 TypeScript:');
|
||||
console.log(` Test framework: ${summary.config.typescript.testFramework}`);
|
||||
console.log(` package.json: ${summary.config.typescript.packageJson ? '✓' : '✗'}`);
|
||||
console.log(` tsconfig.json: ${summary.config.typescript.tsconfig ? '✓' : '✗'}`);
|
||||
}
|
||||
|
||||
if (summary.git) {
|
||||
console.log('\n📌 Git:');
|
||||
console.log(` Branch: ${summary.git.branch}`);
|
||||
console.log(` Commit: ${summary.git.currentCommit.slice(0, 8)}`);
|
||||
|
||||
if (summary.git.recentCommits && summary.git.recentCommits.length > 0) {
|
||||
console.log('\n📜 Recent Commits:');
|
||||
for (const commit of summary.git.recentCommits.slice(0, 5)) {
|
||||
const date = new Date(commit.date).toLocaleDateString();
|
||||
console.log(` ${commit.shortHash} ${date} - ${commit.message.slice(0, 50)}${commit.message.length > 50 ? '...' : ''}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (summary.git.fileHistory && summary.git.fileHistory.length > 0) {
|
||||
console.log('\n🔥 Most Active Files (by commit count):');
|
||||
for (const file of summary.git.fileHistory.slice(0, 5)) {
|
||||
console.log(` ${file.path} (${file.commitCount} commits)`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Show top modules by export count
|
||||
const topModules = [...summary.modules]
|
||||
.sort((a, b) => b.exports.length - a.exports.length)
|
||||
.slice(0, 5);
|
||||
|
||||
if (topModules.length > 0) {
|
||||
console.log('\n🏆 Top modules by exports:');
|
||||
for (const mod of topModules) {
|
||||
console.log(` ${mod.path}: ${mod.exports.length} exports`);
|
||||
}
|
||||
}
|
||||
}
|
||||
15
src/cli/index.ts
Normal file
15
src/cli/index.ts
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
import { Command } from 'commander';
|
||||
import { introCommand } from './commands/intro.js';
|
||||
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name('evaluclaude')
|
||||
.description('Zero-to-evals in one command. Claude analyzes codebases and generates functional tests.')
|
||||
.version('0.1.0');
|
||||
|
||||
program.addCommand(introCommand);
|
||||
|
||||
program.parse(process.argv);
|
||||
1
src/index.ts
Normal file
1
src/index.ts
Normal file
|
|
@ -0,0 +1 @@
|
|||
export * from './introspector/index.js';
|
||||
199
src/introspector/git.ts
Normal file
199
src/introspector/git.ts
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
import { exec } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import type { GitInfo, CommitInfo, FileHistoryInfo } from './types.js';
|
||||
|
||||
const execAsync = promisify(exec);
|
||||
const MAX_COMMITS = 20;
|
||||
const MAX_FILE_HISTORY = 50;
|
||||
|
||||
export async function getGitInfo(root: string, lastCommit?: string): Promise<GitInfo | undefined> {
|
||||
try {
|
||||
// Check if it's a git repo
|
||||
await execAsync('git rev-parse --git-dir', { cwd: root });
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
const [currentCommitResult, branchResult] = await Promise.all([
|
||||
execAsync('git rev-parse HEAD', { cwd: root }),
|
||||
execAsync('git branch --show-current', { cwd: root }),
|
||||
]);
|
||||
|
||||
const currentCommit = currentCommitResult.stdout.trim();
|
||||
const branch = branchResult.stdout.trim() || 'HEAD';
|
||||
|
||||
let changedSince: string[] = [];
|
||||
if (lastCommit && lastCommit !== currentCommit) {
|
||||
changedSince = await getChangedFiles(root, lastCommit);
|
||||
}
|
||||
|
||||
// Fetch recent commits
|
||||
const recentCommits = await getRecentCommits(root);
|
||||
|
||||
// Fetch file history (most frequently changed files)
|
||||
const fileHistory = await getFileHistory(root);
|
||||
|
||||
return {
|
||||
currentCommit,
|
||||
lastAnalyzedCommit: lastCommit || currentCommit,
|
||||
changedSince,
|
||||
branch,
|
||||
recentCommits,
|
||||
fileHistory,
|
||||
};
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getChangedFiles(root: string, since: string): Promise<string[]> {
|
||||
try {
|
||||
const { stdout } = await execAsync(`git diff --name-only ${since}`, { cwd: root });
|
||||
return stdout
|
||||
.split('\n')
|
||||
.filter(f => f && isSourceFile(f));
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export async function getCurrentCommit(root: string): Promise<string | undefined> {
|
||||
try {
|
||||
const { stdout } = await execAsync('git rev-parse HEAD', { cwd: root });
|
||||
return stdout.trim();
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export async function isGitRepo(root: string): Promise<boolean> {
|
||||
try {
|
||||
await execAsync('git rev-parse --git-dir', { cwd: root });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function isSourceFile(filePath: string): boolean {
|
||||
return /\.(py|ts|tsx|js|jsx)$/.test(filePath);
|
||||
}
|
||||
|
||||
export async function getRecentCommits(root: string, limit: number = MAX_COMMITS): Promise<CommitInfo[]> {
|
||||
try {
|
||||
// Format: hash|short|author|date|message|filesChanged
|
||||
const { stdout } = await execAsync(
|
||||
`git log -${limit} --pretty=format:"%H|%h|%an|%aI|%s" --shortstat`,
|
||||
{ cwd: root, maxBuffer: 1024 * 1024 }
|
||||
);
|
||||
|
||||
const commits: CommitInfo[] = [];
|
||||
const lines = stdout.split('\n');
|
||||
|
||||
let i = 0;
|
||||
while (i < lines.length) {
|
||||
const line = lines[i]?.trim();
|
||||
if (!line) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const parts = line.split('|');
|
||||
if (parts.length >= 5) {
|
||||
// Parse the commit line
|
||||
const [hash, shortHash, author, date, ...messageParts] = parts;
|
||||
const message = messageParts.join('|'); // In case message contains |
|
||||
|
||||
// Look for stats line (next non-empty line)
|
||||
let filesChanged = 0;
|
||||
if (i + 1 < lines.length) {
|
||||
const statsLine = lines[i + 1]?.trim();
|
||||
if (statsLine) {
|
||||
const match = statsLine.match(/(\d+) files? changed/);
|
||||
if (match) {
|
||||
filesChanged = parseInt(match[1], 10);
|
||||
i++; // Skip stats line
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
commits.push({
|
||||
hash,
|
||||
shortHash,
|
||||
author,
|
||||
date,
|
||||
message,
|
||||
filesChanged,
|
||||
});
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
return commits;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export async function getFileHistory(root: string, limit: number = MAX_FILE_HISTORY): Promise<FileHistoryInfo[]> {
|
||||
try {
|
||||
// Get the most frequently modified source files
|
||||
const { stdout } = await execAsync(
|
||||
`git log --pretty=format: --name-only | grep -E '\\.(py|ts|tsx|js|jsx)$' | sort | uniq -c | sort -rn | head -${limit}`,
|
||||
{ cwd: root, maxBuffer: 1024 * 1024, shell: '/bin/bash' }
|
||||
);
|
||||
|
||||
const files: FileHistoryInfo[] = [];
|
||||
|
||||
for (const line of stdout.split('\n')) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
|
||||
const match = trimmed.match(/^\s*(\d+)\s+(.+)$/);
|
||||
if (match) {
|
||||
const commitCount = parseInt(match[1], 10);
|
||||
const filePath = match[2];
|
||||
|
||||
// Get contributors for this file
|
||||
const contributors = await getFileContributors(root, filePath);
|
||||
const lastModified = await getFileLastModified(root, filePath);
|
||||
|
||||
files.push({
|
||||
path: filePath,
|
||||
commitCount,
|
||||
lastModified,
|
||||
contributors,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return files;
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function getFileContributors(root: string, filePath: string): Promise<string[]> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`git log --pretty=format:"%an" -- "${filePath}" | sort -u | head -5`,
|
||||
{ cwd: root, shell: '/bin/bash' }
|
||||
);
|
||||
return stdout.split('\n').filter(s => s.trim()).slice(0, 5);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function getFileLastModified(root: string, filePath: string): Promise<string> {
|
||||
try {
|
||||
const { stdout } = await execAsync(
|
||||
`git log -1 --pretty=format:"%aI" -- "${filePath}"`,
|
||||
{ cwd: root }
|
||||
);
|
||||
return stdout.trim();
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
25
src/introspector/index.ts
Normal file
25
src/introspector/index.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
export { analyze, analyzeIncremental } from './summarizer.js';
|
||||
export { scanDirectory, detectConfig } from './scanner.js';
|
||||
export { getGitInfo, getChangedFiles, getCurrentCommit, isGitRepo, getRecentCommits, getFileHistory } from './git.js';
|
||||
export { buildFileTree, treeToString, getTreeStats } from './tree.js';
|
||||
export { PythonParser } from './parsers/python.js';
|
||||
export { TypeScriptParser } from './parsers/typescript.js';
|
||||
|
||||
export type {
|
||||
RepoSummary,
|
||||
FileInfo,
|
||||
ModuleInfo,
|
||||
ExportInfo,
|
||||
ConfigInfo,
|
||||
GitInfo,
|
||||
CommitInfo,
|
||||
FileHistoryInfo,
|
||||
FileTreeNode,
|
||||
Language,
|
||||
} from './types.js';
|
||||
|
||||
import { analyze as analyzeRepo } from './summarizer.js';
|
||||
|
||||
export async function introspect(repoPath: string): Promise<import('./types.js').RepoSummary> {
|
||||
return analyzeRepo({ root: repoPath });
|
||||
}
|
||||
29
src/introspector/parsers/base.ts
Normal file
29
src/introspector/parsers/base.ts
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import type { ModuleInfo, ExportInfo } from '../types.js';
|
||||
|
||||
export interface ParserResult {
|
||||
exports: ExportInfo[];
|
||||
imports: string[];
|
||||
}
|
||||
|
||||
export abstract class BaseParser {
|
||||
abstract readonly language: string;
|
||||
|
||||
abstract parse(source: string, filePath: string): ModuleInfo;
|
||||
|
||||
protected getText(source: string, startIndex: number, endIndex: number): string {
|
||||
return source.slice(startIndex, endIndex);
|
||||
}
|
||||
|
||||
protected calculateComplexity(exportCount: number): ModuleInfo['complexity'] {
|
||||
if (exportCount <= 5) return 'low';
|
||||
if (exportCount <= 15) return 'medium';
|
||||
return 'high';
|
||||
}
|
||||
|
||||
protected extractFirstLineOfDocstring(docstring: string | undefined): string | undefined {
|
||||
if (!docstring) return undefined;
|
||||
const trimmed = docstring.trim();
|
||||
const firstLine = trimmed.split('\n')[0];
|
||||
return firstLine.replace(/^["']{1,3}|["']{1,3}$/g, '').trim() || undefined;
|
||||
}
|
||||
}
|
||||
167
src/introspector/parsers/python.ts
Normal file
167
src/introspector/parsers/python.ts
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
import Parser from 'tree-sitter';
|
||||
import Python from 'tree-sitter-python';
|
||||
import { BaseParser } from './base.js';
|
||||
import type { ModuleInfo, ExportInfo } from '../types.js';
|
||||
|
||||
export class PythonParser extends BaseParser {
|
||||
readonly language = 'python';
|
||||
private parser: Parser;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.parser = new Parser();
|
||||
this.parser.setLanguage(Python);
|
||||
}
|
||||
|
||||
parse(source: string, filePath: string): ModuleInfo {
|
||||
const tree = this.parser.parse(source);
|
||||
const rootNode = tree.rootNode;
|
||||
|
||||
const exports: ExportInfo[] = [];
|
||||
const imports: string[] = [];
|
||||
|
||||
// Walk the tree to extract functions, classes, and imports
|
||||
this.walkNode(rootNode, source, exports, imports);
|
||||
|
||||
return {
|
||||
path: filePath,
|
||||
exports,
|
||||
imports: [...new Set(imports)],
|
||||
complexity: this.calculateComplexity(exports.length),
|
||||
};
|
||||
}
|
||||
|
||||
private walkNode(
|
||||
node: Parser.SyntaxNode,
|
||||
source: string,
|
||||
exports: ExportInfo[],
|
||||
imports: string[]
|
||||
): void {
|
||||
switch (node.type) {
|
||||
case 'function_definition':
|
||||
exports.push(this.extractFunction(node, source));
|
||||
break;
|
||||
|
||||
case 'class_definition':
|
||||
exports.push(this.extractClass(node, source));
|
||||
break;
|
||||
|
||||
case 'import_statement':
|
||||
imports.push(...this.extractImport(node, source));
|
||||
break;
|
||||
|
||||
case 'import_from_statement':
|
||||
imports.push(...this.extractFromImport(node, source));
|
||||
break;
|
||||
|
||||
default:
|
||||
// Recurse into children for top-level nodes
|
||||
if (node.type === 'module' || node.type === 'decorated_definition') {
|
||||
for (const child of node.children) {
|
||||
this.walkNode(child, source, exports, imports);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private extractFunction(node: Parser.SyntaxNode, source: string): ExportInfo {
|
||||
const nameNode = node.childForFieldName('name');
|
||||
const paramsNode = node.childForFieldName('parameters');
|
||||
const returnTypeNode = node.childForFieldName('return_type');
|
||||
const bodyNode = node.childForFieldName('body');
|
||||
|
||||
const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
|
||||
|
||||
// Build signature
|
||||
let signature = '';
|
||||
if (paramsNode) {
|
||||
signature = this.getText(source, paramsNode.startIndex, paramsNode.endIndex);
|
||||
}
|
||||
if (returnTypeNode) {
|
||||
signature += ` -> ${this.getText(source, returnTypeNode.startIndex, returnTypeNode.endIndex)}`;
|
||||
}
|
||||
|
||||
// Check for async
|
||||
const isAsync = node.children.some(c => c.type === 'async');
|
||||
|
||||
// Try to extract docstring
|
||||
let docstring: string | undefined;
|
||||
if (bodyNode && bodyNode.firstChild?.type === 'expression_statement') {
|
||||
const exprStmt = bodyNode.firstChild;
|
||||
const strNode = exprStmt.firstChild;
|
||||
if (strNode?.type === 'string') {
|
||||
docstring = this.extractFirstLineOfDocstring(
|
||||
this.getText(source, strNode.startIndex, strNode.endIndex)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
name,
|
||||
kind: 'function',
|
||||
signature: signature || undefined,
|
||||
docstring,
|
||||
lineNumber: node.startPosition.row + 1,
|
||||
isAsync,
|
||||
};
|
||||
}
|
||||
|
||||
private extractClass(node: Parser.SyntaxNode, source: string): ExportInfo {
|
||||
const nameNode = node.childForFieldName('name');
|
||||
const bodyNode = node.childForFieldName('body');
|
||||
|
||||
const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
|
||||
|
||||
// Try to extract docstring
|
||||
let docstring: string | undefined;
|
||||
if (bodyNode && bodyNode.firstChild?.type === 'expression_statement') {
|
||||
const exprStmt = bodyNode.firstChild;
|
||||
const strNode = exprStmt.firstChild;
|
||||
if (strNode?.type === 'string') {
|
||||
docstring = this.extractFirstLineOfDocstring(
|
||||
this.getText(source, strNode.startIndex, strNode.endIndex)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Build a basic signature showing inheritance
|
||||
let signature: string | undefined;
|
||||
const superclassNode = node.childForFieldName('superclasses');
|
||||
if (superclassNode) {
|
||||
signature = this.getText(source, superclassNode.startIndex, superclassNode.endIndex);
|
||||
}
|
||||
|
||||
return {
|
||||
name,
|
||||
kind: 'class',
|
||||
signature,
|
||||
docstring,
|
||||
lineNumber: node.startPosition.row + 1,
|
||||
};
|
||||
}
|
||||
|
||||
private extractImport(node: Parser.SyntaxNode, source: string): string[] {
|
||||
const imports: string[] = [];
|
||||
|
||||
for (const child of node.children) {
|
||||
if (child.type === 'dotted_name') {
|
||||
imports.push(this.getText(source, child.startIndex, child.endIndex));
|
||||
} else if (child.type === 'aliased_import') {
|
||||
const nameNode = child.childForFieldName('name');
|
||||
if (nameNode) {
|
||||
imports.push(this.getText(source, nameNode.startIndex, nameNode.endIndex));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return imports;
|
||||
}
|
||||
|
||||
private extractFromImport(node: Parser.SyntaxNode, source: string): string[] {
|
||||
const moduleNode = node.childForFieldName('module_name');
|
||||
if (moduleNode) {
|
||||
return [this.getText(source, moduleNode.startIndex, moduleNode.endIndex)];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
}
|
||||
188
src/introspector/parsers/typescript.ts
Normal file
188
src/introspector/parsers/typescript.ts
Normal file
|
|
@ -0,0 +1,188 @@
|
|||
import Parser from 'tree-sitter';
|
||||
import TypeScriptLang from 'tree-sitter-typescript';
|
||||
import { BaseParser } from './base.js';
|
||||
import type { ModuleInfo, ExportInfo } from '../types.js';
|
||||
|
||||
const { typescript: TypeScript } = TypeScriptLang;
|
||||
|
||||
export class TypeScriptParser extends BaseParser {
|
||||
readonly language = 'typescript';
|
||||
private parser: Parser;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.parser = new Parser();
|
||||
this.parser.setLanguage(TypeScript);
|
||||
}
|
||||
|
||||
parse(source: string, filePath: string): ModuleInfo {
|
||||
const tree = this.parser.parse(source);
|
||||
const rootNode = tree.rootNode;
|
||||
|
||||
const exports: ExportInfo[] = [];
|
||||
const imports: string[] = [];
|
||||
|
||||
this.walkNode(rootNode, source, exports, imports, false);
|
||||
|
||||
return {
|
||||
path: filePath,
|
||||
exports,
|
||||
imports: [...new Set(imports)],
|
||||
complexity: this.calculateComplexity(exports.length),
|
||||
};
|
||||
}
|
||||
|
||||
private walkNode(
|
||||
node: Parser.SyntaxNode,
|
||||
source: string,
|
||||
exports: ExportInfo[],
|
||||
imports: string[],
|
||||
isExported: boolean
|
||||
): void {
|
||||
switch (node.type) {
|
||||
case 'function_declaration':
|
||||
exports.push(this.extractFunction(node, source, isExported));
|
||||
break;
|
||||
|
||||
case 'class_declaration':
|
||||
exports.push(this.extractClass(node, source, isExported));
|
||||
break;
|
||||
|
||||
case 'lexical_declaration':
|
||||
case 'variable_declaration':
|
||||
exports.push(...this.extractVariables(node, source, isExported));
|
||||
break;
|
||||
|
||||
case 'type_alias_declaration':
|
||||
case 'interface_declaration':
|
||||
exports.push(this.extractTypeDefinition(node, source, isExported));
|
||||
break;
|
||||
|
||||
case 'export_statement':
|
||||
// Recurse with isExported = true
|
||||
for (const child of node.children) {
|
||||
this.walkNode(child, source, exports, imports, true);
|
||||
}
|
||||
break;
|
||||
|
||||
case 'import_statement':
|
||||
imports.push(...this.extractImport(node, source));
|
||||
break;
|
||||
|
||||
case 'program':
|
||||
// Recurse into top-level statements
|
||||
for (const child of node.children) {
|
||||
this.walkNode(child, source, exports, imports, false);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private extractFunction(node: Parser.SyntaxNode, source: string, isExported: boolean): ExportInfo {
|
||||
const nameNode = node.childForFieldName('name');
|
||||
const paramsNode = node.childForFieldName('parameters');
|
||||
const returnTypeNode = node.childForFieldName('return_type');
|
||||
|
||||
const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
|
||||
|
||||
// Build signature
|
||||
let signature = '';
|
||||
if (paramsNode) {
|
||||
signature = this.getText(source, paramsNode.startIndex, paramsNode.endIndex);
|
||||
}
|
||||
if (returnTypeNode) {
|
||||
signature += `: ${this.getText(source, returnTypeNode.startIndex, returnTypeNode.endIndex)}`;
|
||||
}
|
||||
|
||||
// Check for async
|
||||
const isAsync = node.children.some(c => c.type === 'async');
|
||||
|
||||
return {
|
||||
name,
|
||||
kind: 'function',
|
||||
signature: signature || undefined,
|
||||
lineNumber: node.startPosition.row + 1,
|
||||
isAsync,
|
||||
isExported,
|
||||
};
|
||||
}
|
||||
|
||||
private extractClass(node: Parser.SyntaxNode, source: string, isExported: boolean): ExportInfo {
|
||||
const nameNode = node.childForFieldName('name');
|
||||
const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
|
||||
|
||||
// Get heritage clause for extends/implements
|
||||
let signature: string | undefined;
|
||||
const heritageNode = node.children.find(c => c.type === 'class_heritage');
|
||||
if (heritageNode) {
|
||||
signature = this.getText(source, heritageNode.startIndex, heritageNode.endIndex);
|
||||
}
|
||||
|
||||
return {
|
||||
name,
|
||||
kind: 'class',
|
||||
signature,
|
||||
lineNumber: node.startPosition.row + 1,
|
||||
isExported,
|
||||
};
|
||||
}
|
||||
|
||||
private extractVariables(node: Parser.SyntaxNode, source: string, isExported: boolean): ExportInfo[] {
|
||||
const exports: ExportInfo[] = [];
|
||||
|
||||
for (const child of node.children) {
|
||||
if (child.type === 'variable_declarator') {
|
||||
const nameNode = child.childForFieldName('name');
|
||||
const valueNode = child.childForFieldName('value');
|
||||
|
||||
if (nameNode) {
|
||||
const name = this.getText(source, nameNode.startIndex, nameNode.endIndex);
|
||||
|
||||
// Check if it's a function expression or arrow function
|
||||
const isFunction = valueNode && (
|
||||
valueNode.type === 'arrow_function' ||
|
||||
valueNode.type === 'function_expression' ||
|
||||
valueNode.type === 'function'
|
||||
);
|
||||
|
||||
exports.push({
|
||||
name,
|
||||
kind: isFunction ? 'function' : 'constant',
|
||||
lineNumber: child.startPosition.row + 1,
|
||||
isExported,
|
||||
isAsync: valueNode?.children.some(c => c.type === 'async'),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return exports;
|
||||
}
|
||||
|
||||
private extractTypeDefinition(node: Parser.SyntaxNode, source: string, isExported: boolean): ExportInfo {
|
||||
const nameNode = node.childForFieldName('name');
|
||||
const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
|
||||
|
||||
return {
|
||||
name,
|
||||
kind: 'type',
|
||||
lineNumber: node.startPosition.row + 1,
|
||||
isExported,
|
||||
};
|
||||
}
|
||||
|
||||
private extractImport(node: Parser.SyntaxNode, source: string): string[] {
|
||||
const imports: string[] = [];
|
||||
|
||||
for (const child of node.children) {
|
||||
if (child.type === 'string') {
|
||||
// Remove quotes from the import path
|
||||
const importPath = this.getText(source, child.startIndex, child.endIndex)
|
||||
.replace(/^["']|["']$/g, '');
|
||||
imports.push(importPath);
|
||||
}
|
||||
}
|
||||
|
||||
return imports;
|
||||
}
|
||||
}
|
||||
213
src/introspector/scanner.ts
Normal file
213
src/introspector/scanner.ts
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
import { glob } from 'glob';
|
||||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import type { FileInfo } from './types.js';
|
||||
|
||||
const IGNORE_PATTERNS = [
|
||||
'node_modules/**',
|
||||
'.git/**',
|
||||
'__pycache__/**',
|
||||
'*.pyc',
|
||||
'dist/**',
|
||||
'build/**',
|
||||
'.venv/**',
|
||||
'venv/**',
|
||||
'.env/**',
|
||||
'env/**',
|
||||
'coverage/**',
|
||||
'.next/**',
|
||||
'.nuxt/**',
|
||||
];
|
||||
|
||||
export async function scanDirectory(root: string): Promise<FileInfo[]> {
|
||||
const patterns = ['**/*.py', '**/*.ts', '**/*.tsx', '**/*.js', '**/*.jsx'];
|
||||
|
||||
const files: string[] = [];
|
||||
for (const pattern of patterns) {
|
||||
const matches = await glob(pattern, {
|
||||
cwd: root,
|
||||
ignore: IGNORE_PATTERNS,
|
||||
nodir: true,
|
||||
});
|
||||
files.push(...matches);
|
||||
}
|
||||
|
||||
const uniqueFiles = [...new Set(files)];
|
||||
|
||||
const fileInfos = await Promise.all(
|
||||
uniqueFiles.map(async (relativePath) => {
|
||||
const fullPath = path.join(root, relativePath);
|
||||
try {
|
||||
const stats = await fs.stat(fullPath);
|
||||
return {
|
||||
path: relativePath,
|
||||
lang: detectLanguage(relativePath),
|
||||
role: detectRole(relativePath),
|
||||
size: stats.size,
|
||||
lastModified: stats.mtime.toISOString(),
|
||||
} satisfies FileInfo;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
return fileInfos.filter((f): f is FileInfo => f !== null);
|
||||
}
|
||||
|
||||
function detectLanguage(filePath: string): FileInfo['lang'] {
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
switch (ext) {
|
||||
case '.py':
|
||||
return 'python';
|
||||
case '.ts':
|
||||
case '.tsx':
|
||||
return 'typescript';
|
||||
case '.js':
|
||||
case '.jsx':
|
||||
return 'typescript'; // Treat JS as TS for parsing
|
||||
default:
|
||||
return 'other';
|
||||
}
|
||||
}
|
||||
|
||||
function detectRole(filePath: string): FileInfo['role'] {
|
||||
const lowerPath = filePath.toLowerCase();
|
||||
const fileName = lowerPath.split('/').pop() || '';
|
||||
|
||||
// Test files - be more specific to avoid false positives
|
||||
if (
|
||||
lowerPath.includes('__tests__') ||
|
||||
lowerPath.includes('/tests/') ||
|
||||
lowerPath.includes('/test/') ||
|
||||
fileName.endsWith('_test.py') ||
|
||||
fileName.endsWith('.test.ts') ||
|
||||
fileName.endsWith('.test.tsx') ||
|
||||
fileName.endsWith('.test.js') ||
|
||||
fileName.endsWith('.spec.ts') ||
|
||||
fileName.endsWith('.spec.tsx') ||
|
||||
fileName.endsWith('.spec.js') ||
|
||||
fileName.startsWith('test_')
|
||||
) {
|
||||
return 'test';
|
||||
}
|
||||
|
||||
// Config files
|
||||
if (
|
||||
lowerPath.includes('config') ||
|
||||
lowerPath.includes('settings') ||
|
||||
lowerPath.includes('.env') ||
|
||||
lowerPath.endsWith('conftest.py') ||
|
||||
lowerPath.endsWith('setup.py') ||
|
||||
lowerPath.endsWith('pyproject.toml')
|
||||
) {
|
||||
return 'config';
|
||||
}
|
||||
|
||||
// Documentation
|
||||
if (
|
||||
lowerPath.includes('docs') ||
|
||||
lowerPath.includes('doc') ||
|
||||
lowerPath.includes('readme')
|
||||
) {
|
||||
return 'docs';
|
||||
}
|
||||
|
||||
return 'source';
|
||||
}
|
||||
|
||||
export async function detectConfig(root: string): Promise<{
|
||||
python?: {
|
||||
entryPoints: string[];
|
||||
testFramework: 'pytest' | 'unittest' | 'none';
|
||||
hasTyping: boolean;
|
||||
pyprojectToml: boolean;
|
||||
setupPy: boolean;
|
||||
};
|
||||
typescript?: {
|
||||
entryPoints: string[];
|
||||
testFramework: 'vitest' | 'jest' | 'none';
|
||||
hasTypes: boolean;
|
||||
packageJson: boolean;
|
||||
tsconfig: boolean;
|
||||
};
|
||||
}> {
|
||||
const config: ReturnType<typeof detectConfig> extends Promise<infer T> ? T : never = {};
|
||||
|
||||
// Check for Python project
|
||||
const hasPyprojectToml = await fileExists(path.join(root, 'pyproject.toml'));
|
||||
const hasSetupPy = await fileExists(path.join(root, 'setup.py'));
|
||||
const hasRequirementsTxt = await fileExists(path.join(root, 'requirements.txt'));
|
||||
|
||||
if (hasPyprojectToml || hasSetupPy || hasRequirementsTxt) {
|
||||
let testFramework: 'pytest' | 'unittest' | 'none' = 'none';
|
||||
|
||||
// Check for pytest
|
||||
if (hasPyprojectToml) {
|
||||
try {
|
||||
const content = await fs.readFile(path.join(root, 'pyproject.toml'), 'utf-8');
|
||||
if (content.includes('pytest')) {
|
||||
testFramework = 'pytest';
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
if (testFramework === 'none' && hasRequirementsTxt) {
|
||||
try {
|
||||
const content = await fs.readFile(path.join(root, 'requirements.txt'), 'utf-8');
|
||||
if (content.includes('pytest')) {
|
||||
testFramework = 'pytest';
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
config.python = {
|
||||
entryPoints: [],
|
||||
testFramework,
|
||||
hasTyping: false,
|
||||
pyprojectToml: hasPyprojectToml,
|
||||
setupPy: hasSetupPy,
|
||||
};
|
||||
}
|
||||
|
||||
// Check for TypeScript/JavaScript project
|
||||
const hasPackageJson = await fileExists(path.join(root, 'package.json'));
|
||||
const hasTsconfig = await fileExists(path.join(root, 'tsconfig.json'));
|
||||
|
||||
if (hasPackageJson || hasTsconfig) {
|
||||
let testFramework: 'vitest' | 'jest' | 'none' = 'none';
|
||||
|
||||
if (hasPackageJson) {
|
||||
try {
|
||||
const content = await fs.readFile(path.join(root, 'package.json'), 'utf-8');
|
||||
const pkg = JSON.parse(content);
|
||||
const allDeps = { ...pkg.dependencies, ...pkg.devDependencies };
|
||||
|
||||
if ('vitest' in allDeps) {
|
||||
testFramework = 'vitest';
|
||||
} else if ('jest' in allDeps) {
|
||||
testFramework = 'jest';
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
config.typescript = {
|
||||
entryPoints: [],
|
||||
testFramework,
|
||||
hasTypes: hasTsconfig,
|
||||
packageJson: hasPackageJson,
|
||||
tsconfig: hasTsconfig,
|
||||
};
|
||||
}
|
||||
|
||||
return config;
|
||||
}
|
||||
|
||||
async function fileExists(filePath: string): Promise<boolean> {
|
||||
try {
|
||||
await fs.access(filePath);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
134
src/introspector/summarizer.ts
Normal file
134
src/introspector/summarizer.ts
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
import * as fs from 'node:fs/promises';
|
||||
import * as path from 'node:path';
|
||||
import { scanDirectory, detectConfig } from './scanner.js';
|
||||
import { PythonParser } from './parsers/python.js';
|
||||
import { TypeScriptParser } from './parsers/typescript.js';
|
||||
import { getGitInfo, getChangedFiles } from './git.js';
|
||||
import { buildFileTree } from './tree.js';
|
||||
import type { RepoSummary, ModuleInfo, FileInfo, Language } from './types.js';
|
||||
|
||||
export interface AnalyzeOptions {
|
||||
root: string;
|
||||
incremental?: boolean;
|
||||
lastCommit?: string;
|
||||
onlyFiles?: string[];
|
||||
onProgress?: (message: string) => void;
|
||||
}
|
||||
|
||||
export async function analyze(options: AnalyzeOptions): Promise<RepoSummary> {
|
||||
const { root, incremental, lastCommit, onlyFiles, onProgress } = options;
|
||||
|
||||
onProgress?.('Scanning directory...');
|
||||
let files = await scanDirectory(root);
|
||||
|
||||
// Filter for incremental analysis
|
||||
if (onlyFiles && onlyFiles.length > 0) {
|
||||
files = files.filter(f => onlyFiles.includes(f.path));
|
||||
onProgress?.(`Filtered to ${files.length} changed files`);
|
||||
}
|
||||
|
||||
onProgress?.(`Found ${files.length} source files`);
|
||||
|
||||
// Initialize parsers
|
||||
const pythonParser = new PythonParser();
|
||||
const tsParser = new TypeScriptParser();
|
||||
|
||||
const modules: ModuleInfo[] = [];
|
||||
const sourceFiles = files.filter(f => f.role === 'source' && f.lang !== 'other');
|
||||
|
||||
onProgress?.(`Parsing ${sourceFiles.length} modules...`);
|
||||
|
||||
for (const file of sourceFiles) {
|
||||
const fullPath = path.join(root, file.path);
|
||||
|
||||
try {
|
||||
const source = await fs.readFile(fullPath, 'utf-8');
|
||||
|
||||
let moduleInfo: ModuleInfo;
|
||||
if (file.lang === 'python') {
|
||||
moduleInfo = pythonParser.parse(source, file.path);
|
||||
} else if (file.lang === 'typescript') {
|
||||
moduleInfo = tsParser.parse(source, file.path);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
modules.push(moduleInfo);
|
||||
} catch (error) {
|
||||
// Skip files that can't be parsed
|
||||
onProgress?.(`Warning: Could not parse ${file.path}`);
|
||||
}
|
||||
}
|
||||
|
||||
onProgress?.('Detecting project configuration...');
|
||||
const config = await detectConfig(root);
|
||||
|
||||
onProgress?.('Getting git info...');
|
||||
const git = await getGitInfo(root, lastCommit);
|
||||
|
||||
onProgress?.('Building file tree...');
|
||||
const tree = buildFileTree(files, path.basename(root));
|
||||
|
||||
// Detect languages used
|
||||
const languages = detectLanguages(files);
|
||||
|
||||
onProgress?.('Analysis complete');
|
||||
|
||||
return {
|
||||
languages,
|
||||
root,
|
||||
analyzedAt: new Date().toISOString(),
|
||||
files,
|
||||
modules,
|
||||
config,
|
||||
git,
|
||||
tree,
|
||||
};
|
||||
}
|
||||
|
||||
export async function analyzeIncremental(
|
||||
root: string,
|
||||
lastCommit: string,
|
||||
onProgress?: (message: string) => void
|
||||
): Promise<RepoSummary> {
|
||||
onProgress?.('Getting changed files since last commit...');
|
||||
const changedFiles = await getChangedFiles(root, lastCommit);
|
||||
|
||||
if (changedFiles.length === 0) {
|
||||
onProgress?.('No files changed');
|
||||
// Return minimal summary
|
||||
return {
|
||||
languages: [],
|
||||
root,
|
||||
analyzedAt: new Date().toISOString(),
|
||||
files: [],
|
||||
modules: [],
|
||||
config: {},
|
||||
git: await getGitInfo(root, lastCommit),
|
||||
};
|
||||
}
|
||||
|
||||
onProgress?.(`Found ${changedFiles.length} changed files`);
|
||||
|
||||
return analyze({
|
||||
root,
|
||||
incremental: true,
|
||||
lastCommit,
|
||||
onlyFiles: changedFiles,
|
||||
onProgress,
|
||||
});
|
||||
}
|
||||
|
||||
function detectLanguages(files: FileInfo[]): Language[] {
|
||||
const languages = new Set<Language>();
|
||||
|
||||
for (const file of files) {
|
||||
if (file.lang === 'python') {
|
||||
languages.add('python');
|
||||
} else if (file.lang === 'typescript') {
|
||||
languages.add('typescript');
|
||||
}
|
||||
}
|
||||
|
||||
return [...languages];
|
||||
}
|
||||
157
src/introspector/tree.ts
Normal file
157
src/introspector/tree.ts
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
import type { FileInfo, FileTreeNode } from './types.js';
|
||||
|
||||
export function buildFileTree(files: FileInfo[], rootName: string = '.'): FileTreeNode {
|
||||
const root: FileTreeNode = {
|
||||
name: rootName,
|
||||
path: '',
|
||||
type: 'directory',
|
||||
children: [],
|
||||
};
|
||||
|
||||
// Build a map for quick lookup
|
||||
const nodeMap = new Map<string, FileTreeNode>();
|
||||
nodeMap.set('', root);
|
||||
|
||||
// Sort files to ensure parents are created before children
|
||||
const sortedFiles = [...files].sort((a, b) => a.path.localeCompare(b.path));
|
||||
|
||||
for (const file of sortedFiles) {
|
||||
const parts = file.path.split('/');
|
||||
let currentPath = '';
|
||||
|
||||
// Create all parent directories
|
||||
for (let i = 0; i < parts.length - 1; i++) {
|
||||
const parentPath = currentPath;
|
||||
currentPath = currentPath ? `${currentPath}/${parts[i]}` : parts[i];
|
||||
|
||||
if (!nodeMap.has(currentPath)) {
|
||||
const dirNode: FileTreeNode = {
|
||||
name: parts[i],
|
||||
path: currentPath,
|
||||
type: 'directory',
|
||||
children: [],
|
||||
};
|
||||
nodeMap.set(currentPath, dirNode);
|
||||
|
||||
// Add to parent
|
||||
const parent = nodeMap.get(parentPath);
|
||||
if (parent && parent.children) {
|
||||
parent.children.push(dirNode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create the file node
|
||||
const fileName = parts[parts.length - 1];
|
||||
const fileNode: FileTreeNode = {
|
||||
name: fileName,
|
||||
path: file.path,
|
||||
type: 'file',
|
||||
lang: file.lang,
|
||||
role: file.role,
|
||||
};
|
||||
|
||||
// Add to parent directory
|
||||
const parentPath = parts.slice(0, -1).join('/');
|
||||
const parent = nodeMap.get(parentPath);
|
||||
if (parent && parent.children) {
|
||||
parent.children.push(fileNode);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort children alphabetically (directories first)
|
||||
sortTreeRecursive(root);
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
function sortTreeRecursive(node: FileTreeNode): void {
|
||||
if (node.children) {
|
||||
node.children.sort((a, b) => {
|
||||
// Directories first
|
||||
if (a.type !== b.type) {
|
||||
return a.type === 'directory' ? -1 : 1;
|
||||
}
|
||||
return a.name.localeCompare(b.name);
|
||||
});
|
||||
|
||||
for (const child of node.children) {
|
||||
sortTreeRecursive(child);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export function treeToString(node: FileTreeNode, prefix: string = '', isLast: boolean = true): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
const connector = isLast ? '└── ' : '├── ';
|
||||
const extension = isLast ? ' ' : '│ ';
|
||||
|
||||
if (node.path === '') {
|
||||
// Root node
|
||||
lines.push(node.name);
|
||||
} else {
|
||||
const icon = node.type === 'directory' ? '📁' : getFileIcon(node.lang, node.role);
|
||||
lines.push(`${prefix}${connector}${icon} ${node.name}`);
|
||||
}
|
||||
|
||||
if (node.children) {
|
||||
const children = node.children;
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
const child = children[i];
|
||||
const childIsLast = i === children.length - 1;
|
||||
const newPrefix = node.path === '' ? '' : prefix + extension;
|
||||
lines.push(treeToString(child, newPrefix, childIsLast));
|
||||
}
|
||||
}
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function getFileIcon(lang?: string, role?: string): string {
|
||||
if (role === 'test') return '🧪';
|
||||
if (role === 'config') return '⚙️';
|
||||
if (role === 'docs') return '📄';
|
||||
|
||||
switch (lang) {
|
||||
case 'python': return '🐍';
|
||||
case 'typescript': return '📘';
|
||||
default: return '📄';
|
||||
}
|
||||
}
|
||||
|
||||
export function getTreeStats(node: FileTreeNode): {
|
||||
directories: number;
|
||||
files: number;
|
||||
byLang: Record<string, number>;
|
||||
byRole: Record<string, number>;
|
||||
} {
|
||||
const stats = {
|
||||
directories: 0,
|
||||
files: 0,
|
||||
byLang: {} as Record<string, number>,
|
||||
byRole: {} as Record<string, number>,
|
||||
};
|
||||
|
||||
function traverse(n: FileTreeNode): void {
|
||||
if (n.type === 'directory') {
|
||||
stats.directories++;
|
||||
if (n.children) {
|
||||
for (const child of n.children) {
|
||||
traverse(child);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
stats.files++;
|
||||
if (n.lang) {
|
||||
stats.byLang[n.lang] = (stats.byLang[n.lang] || 0) + 1;
|
||||
}
|
||||
if (n.role) {
|
||||
stats.byRole[n.role] = (stats.byRole[n.role] || 0) + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
traverse(node);
|
||||
return stats;
|
||||
}
|
||||
88
src/introspector/types.ts
Normal file
88
src/introspector/types.ts
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
export interface RepoSummary {
|
||||
languages: ('python' | 'typescript')[];
|
||||
root: string;
|
||||
analyzedAt: string;
|
||||
files: FileInfo[];
|
||||
modules: ModuleInfo[];
|
||||
config: ConfigInfo;
|
||||
git?: GitInfo;
|
||||
tree?: FileTreeNode;
|
||||
}
|
||||
|
||||
export interface FileInfo {
|
||||
path: string;
|
||||
lang: 'python' | 'typescript' | 'other';
|
||||
role: 'source' | 'test' | 'config' | 'docs';
|
||||
size: number;
|
||||
lastModified: string;
|
||||
}
|
||||
|
||||
export interface ModuleInfo {
|
||||
path: string;
|
||||
exports: ExportInfo[];
|
||||
imports: string[];
|
||||
complexity: 'low' | 'medium' | 'high';
|
||||
}
|
||||
|
||||
export interface ExportInfo {
|
||||
name: string;
|
||||
kind: 'function' | 'class' | 'constant' | 'type';
|
||||
signature?: string;
|
||||
docstring?: string;
|
||||
lineNumber: number;
|
||||
isAsync?: boolean;
|
||||
isExported?: boolean;
|
||||
}
|
||||
|
||||
export interface ConfigInfo {
|
||||
python?: {
|
||||
entryPoints: string[];
|
||||
testFramework: 'pytest' | 'unittest' | 'none';
|
||||
hasTyping: boolean;
|
||||
pyprojectToml: boolean;
|
||||
setupPy: boolean;
|
||||
};
|
||||
typescript?: {
|
||||
entryPoints: string[];
|
||||
testFramework: 'vitest' | 'jest' | 'none';
|
||||
hasTypes: boolean;
|
||||
packageJson: boolean;
|
||||
tsconfig: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
export interface GitInfo {
|
||||
lastAnalyzedCommit: string;
|
||||
currentCommit: string;
|
||||
changedSince: string[];
|
||||
branch: string;
|
||||
recentCommits?: CommitInfo[];
|
||||
fileHistory?: FileHistoryInfo[];
|
||||
}
|
||||
|
||||
export interface CommitInfo {
|
||||
hash: string;
|
||||
shortHash: string;
|
||||
author: string;
|
||||
date: string;
|
||||
message: string;
|
||||
filesChanged: number;
|
||||
}
|
||||
|
||||
export interface FileHistoryInfo {
|
||||
path: string;
|
||||
commitCount: number;
|
||||
lastModified: string;
|
||||
contributors: string[];
|
||||
}
|
||||
|
||||
export interface FileTreeNode {
|
||||
name: string;
|
||||
path: string;
|
||||
type: 'file' | 'directory';
|
||||
children?: FileTreeNode[];
|
||||
lang?: 'python' | 'typescript' | 'other';
|
||||
role?: 'source' | 'test' | 'config' | 'docs';
|
||||
}
|
||||
|
||||
export type Language = 'python' | 'typescript';
|
||||
21
tsconfig.json
Normal file
21
tsconfig.json
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "NodeNext",
|
||||
"moduleResolution": "NodeNext",
|
||||
"lib": ["ES2022"],
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"strict": true,
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"resolveJsonModule": true,
|
||||
"allowSyntheticDefaultImports": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "tests"]
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue