iteration 0

2026-04-15 03:00:46 +00:00 · 2026-01-11 16:58:40 -05:00 · 2026-01-11 16:58:40 -05:00 · 4b24606d0e
commit 4b24606d0e
25 changed files with 7843 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+/node_modules
+/dist
--- a/AGENTS.md
+++ b/AGENTS.md
@ -0,0 +1,52 @@
+# Evaluclaude Harness - Agent Instructions
+
+## Project Overview
+
+This is a CLI tool for generating evaluation tests for codebases using Claude. The core philosophy is "Zero-to-evals in one command."
+
+## Commands
+
+```bash
+# Build the project
+npm run build
+
+# Run typecheck
+npm run typecheck
+
+# Run tests
+npm test
+
+# Run the CLI
+npm start -- intro <path>
+```
+
+## Project Structure
+
+```
+src/
+├── cli/              # Commander.js CLI
+├── introspector/     # Tree-sitter codebase parsing (NO LLM)
+│   ├── parsers/      # Language-specific parsers
+│   ├── scanner.ts    # File discovery
+│   ├── git.ts        # Git integration
+│   └── summarizer.ts # Main analysis logic
+└── index.ts          # Main exports
+```
+
+## Key Principles
+
+1. **Tree-sitter for introspection**: Never send raw code to Claude for structure extraction
+2. **Claude generates specs, not code**: EvalSpec JSON is generated by Claude, test code is rendered deterministically
+3. **Git-aware incremental**: Only re-analyze changed files
+
+## Dependencies
+
+- `tree-sitter`: Native AST parsing
+- `tree-sitter-python`: Python grammar
+- `tree-sitter-typescript`: TypeScript grammar
+- `commander`: CLI framework
+- `glob`: File pattern matching
+
+## Testing
+
+Use vitest for testing. Test files go in `tests/` directory.
--- a/PLAN.md
+++ b/PLAN.md
--- a/docs/00-tree-sitter-introspector.md
+++ b/docs/00-tree-sitter-introspector.md
@ -0,0 +1,133 @@
+# 0. Tree-Sitter Introspector - System Design
+
+> **Priority**: 🔴 FOUNDATIONAL — Build this first  
+> **Complexity**: Medium  
+> **Effort Estimate**: 6-10 hours
+
+---
+
+## Overview
+
+The Tree-Sitter Introspector parses Python and TypeScript codebases locally using tree-sitter AST parsing, extracting structured metadata (functions, classes, imports) **without** sending raw code to Claude. This saves tokens, is faster, and produces reliable structured data.
+
+---
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                      Introspector Module                        │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐      │
+│  │ File Scanner │───▶│ Tree-Sitter  │───▶│  Summarizer  │      │
+│  │  (glob/git)  │    │   Parsers    │    │              │      │
+│  └──────────────┘    └──────────────┘    └──────────────┘      │
+│         │                   │                   │               │
+│         ▼                   ▼                   ▼               │
+│    File list +        Per-file AST        RepoSummary          │
+│    metadata           extracts            JSON                  │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Core Types
+
+```typescript
+interface RepoSummary {
+  languages: ('python' | 'typescript')[];
+  root: string;
+  analyzedAt: string;
+  files: FileInfo[];
+  modules: ModuleInfo[];
+  config: ConfigInfo;
+  git?: GitInfo;
+}
+
+interface ModuleInfo {
+  path: string;
+  exports: ExportInfo[];
+  imports: string[];
+  complexity: 'low' | 'medium' | 'high';
+}
+
+interface ExportInfo {
+  name: string;
+  kind: 'function' | 'class' | 'constant' | 'type';
+  signature?: string;
+  docstring?: string;
+  lineNumber: number;
+  isAsync?: boolean;
+}
+```
+
+---
+
+## Key Implementation Details
+
+### Tree-Sitter Queries (Python)
+
+```typescript
+const FUNCTION_QUERY = `
+  (function_definition
+    name: (identifier) @name
+    parameters: (parameters) @params
+    return_type: (type)? @return_type
+  ) @func
+`;
+
+const CLASS_QUERY = `
+  (class_definition
+    name: (identifier) @name
+    body: (block) @body
+  ) @class
+`;
+```
+
+### Git-Aware Incremental
+
+```typescript
+async function getChangedFiles(since: string): Promise<string[]> {
+  const { stdout } = await exec(`git diff --name-only ${since}`);
+  return stdout.split('\n').filter(f => /\.(py|ts|tsx)$/.test(f));
+}
+```
+
+---
+
+## File Structure
+
+```
+src/introspector/
+├── index.ts              # Main entry point
+├── types.ts              # TypeScript interfaces
+├── scanner.ts            # File discovery
+├── parsers/
+│   ├── python.ts         # Python tree-sitter queries
+│   └── typescript.ts     # TS tree-sitter queries
+├── git.ts                # Git integration
+└── summarizer.ts         # Combine into RepoSummary
+```
+
+---
+
+## Dependencies
+
+```json
+{
+  "tree-sitter": "^0.21.0",
+  "tree-sitter-python": "^0.21.0",
+  "tree-sitter-typescript": "^0.21.0",
+  "glob": "^10.3.0"
+}
+```
+
+---
+
+## Success Criteria
+
+- [ ] Parses Python files (functions, classes, imports)
+- [ ] Parses TypeScript files (functions, classes, imports)
+- [ ] Handles 1000+ file repos in <10 seconds
+- [ ] Incremental mode only parses changed files
+- [ ] Gracefully handles syntax errors
--- a/docs/01-codebase-analyzer-prompt.md
+++ b/docs/01-codebase-analyzer-prompt.md
@ -0,0 +1,142 @@
+# 1. Codebase Analyzer Prompt - System Design
+
+> **Priority**: 🟡 HIGH — Core LLM logic  
+> **Complexity**: High (prompt engineering)  
+> **Effort Estimate**: 8-12 hours (iterative refinement)
+
+---
+
+## Overview
+
+The Codebase Analyzer takes structured `RepoSummary` from the introspector and generates `EvalSpec` JSON defining what tests to create. Key insight: **Claude generates specs, not code**. Test code is deterministically rendered from specs.
+
+---
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    Codebase Analyzer Agent                      │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐      │
+│  │ RepoSummary  │───▶│ Claude Agent │───▶│   EvalSpec   │      │
+│  │    JSON      │    │    SDK       │    │    JSON      │      │
+│  └──────────────┘    └──────────────┘    └──────────────┘      │
+│                            │                                    │
+│                            ▼                                    │
+│                    ┌──────────────┐                             │
+│                    │AskUserQuestion│                            │
+│                    │   (optional)  │                            │
+│                    └──────────────┘                             │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Core Types
+
+```typescript
+interface EvalSpec {
+  version: '1.0';
+  repo: { name: string; languages: string[]; analyzedAt: string };
+  scenarios: EvalScenario[];
+  grading: {
+    deterministic: DeterministicGrade[];
+    rubrics: RubricGrade[];
+  };
+  metadata: {
+    generatedBy: string;
+    totalTokens: number;
+    questionsAsked: number;
+    confidence: 'low' | 'medium' | 'high';
+  };
+}
+
+interface EvalScenario {
+  id: string;                    // "auth-login-success"
+  name: string;
+  description: string;
+  target: {
+    module: string;
+    function: string;
+    type: 'function' | 'method' | 'class';
+  };
+  category: 'unit' | 'integration' | 'edge-case' | 'negative';
+  priority: 'critical' | 'high' | 'medium' | 'low';
+  setup?: { fixtures: string[]; mocks: MockSpec[] };
+  input: { args: Record<string, any>; kwargs?: Record<string, any> };
+  assertions: Assertion[];
+  tags: string[];
+}
+```
+
+---
+
+## Prompt Architecture (Three-Part)
+
+### 1. System Prompt
+- Defines Claude's identity as codebase analyzer
+- Constraints: functional tests only, no syntax checks, ask don't assume
+
+### 2. Developer Prompt  
+- Contains EvalSpec JSON schema
+- Formatting rules (snake_case, kebab-case IDs)
+- Assertion type reference
+
+### 3. User Prompt (Template)
+- Injects RepoSummary JSON
+- User context about what to evaluate
+- Instructions for output format
+
+---
+
+## Key Implementation
+
+```typescript
+async function generateEvalSpec(options: GenerateOptions): Promise<EvalSpec> {
+  const agentOptions: ClaudeAgentOptions = {
+    systemPrompt: await loadPrompt('analyzer-system.md'),
+    permissionMode: options.interactive ? 'default' : 'dontAsk',
+    canUseTool: async ({ toolName, input }) => {
+      if (toolName === 'AskUserQuestion' && options.onQuestion) {
+        const answer = await options.onQuestion(input);
+        return { behavior: 'allow', updatedInput: { ...input, answers: { [input.question]: answer } } };
+      }
+      return { behavior: 'deny' };
+    },
+    outputFormat: { type: 'json_schema', json_schema: { name: 'EvalSpec', schema: EVAL_SPEC_SCHEMA } },
+  };
+
+  for await (const msg of query(prompt, agentOptions)) {
+    if (msg.type === 'result') return msg.output as EvalSpec;
+  }
+}
+```
+
+---
+
+## File Structure
+
+```
+src/analyzer/
+├── index.ts              # Main entry point
+├── types.ts              # EvalSpec types
+├── spec-generator.ts     # Claude Agent SDK integration
+├── validator.ts          # JSON schema validation
+└── prompt-builder.ts     # Builds prompts from templates
+
+prompts/
+├── analyzer-system.md
+├── analyzer-developer.md
+└── analyzer-user.md
+```
+
+---
+
+## Success Criteria
+
+- [ ] Generates valid EvalSpec JSON for Python repos
+- [ ] Generates valid EvalSpec JSON for TypeScript repos
+- [ ] Asks 2-3 clarifying questions on complex repos
+- [ ] <10k tokens per analysis
+- [ ] 100% assertion coverage (every scenario has assertions)
--- a/docs/02-synchronous-claude-session.md
+++ b/docs/02-synchronous-claude-session.md
@ -0,0 +1,159 @@
+# 2. Synchronous Claude Session with Questions - System Design
+
+> **Priority**: 🟡 HIGH — Interactive UX  
+> **Complexity**: Medium  
+> **Effort Estimate**: 4-6 hours
+
+---
+
+## Overview
+
+Handles **interactive communication** between Claude and the user during eval generation. When Claude calls `AskUserQuestion`, we display it in CLI, collect the answer, and return it to Claude.
+
+---
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    Claude Session Manager                       │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌──────────────┐                    ┌──────────────┐          │
+│  │ Claude Agent │◀──────────────────▶│  Question    │          │
+│  │    SDK       │   AskUserQuestion  │   Handler    │          │
+│  └──────────────┘                    └──────────────┘          │
+│         │                                   │                   │
+│         ▼                                   ▼                   │
+│     Result                              CLI/stdin               │
+│    (EvalSpec)                           (inquirer)              │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Session Modes
+
+| Mode | Usage | Behavior |
+|------|-------|----------|
+| `interactive` | Local dev | Full CLI prompts via inquirer |
+| `non-interactive` | CI/CD | Deny questions, use defaults |
+| `auto-answer` | Scripted | Use provided default answers |
+
+---
+
+## Core Types
+
+```typescript
+interface Question {
+  header: string;
+  question: string;
+  options?: QuestionOption[];
+  multiSelect?: boolean;
+  freeText?: boolean;
+  defaultValue?: string;
+}
+
+interface SessionOptions {
+  interactive: boolean;
+  defaultAnswers?: Record<string, string>;
+  timeout?: number;
+}
+
+type SessionMode = 'interactive' | 'non-interactive' | 'auto-answer';
+```
+
+---
+
+## Key Implementation
+
+```typescript
+class ClaudeSession {
+  async run<T>(systemPrompt: string, userPrompt: string, outputSchema?: object): Promise<T> {
+    const agentOptions: ClaudeAgentOptions = {
+      systemPrompt,
+      permissionMode: this.getPermissionMode(),
+      canUseTool: this.createToolHandler(),
+      outputFormat: outputSchema ? { type: 'json_schema', json_schema: { name: 'Output', schema: outputSchema } } : undefined,
+    };
+
+    for await (const msg of query(userPrompt, agentOptions)) {
+      if (msg.type === 'result') return msg.output as T;
+    }
+  }
+
+  private async handleAskUserQuestion(input: any) {
+    if (this.mode === 'non-interactive') {
+      return { behavior: 'deny', message: 'Interactive questions not allowed in CI' };
+    }
+    
+    const answers: Record<string, string> = {};
+    for (const question of input.questions) {
+      answers[question.question] = await promptCLI(question);
+    }
+    return { behavior: 'allow', updatedInput: { questions: input.questions, answers } };
+  }
+}
+```
+
+---
+
+## CLI Adapter (inquirer)
+
+```typescript
+async function promptSelect(question: Question): Promise<string> {
+  const { answer } = await inquirer.prompt([{
+    type: 'list',
+    name: 'answer',
+    message: question.question,
+    choices: question.options!.map(opt => ({ name: `${opt.label} - ${opt.description}`, value: opt.label })),
+  }]);
+  return answer;
+}
+```
+
+**User sees:**
+```
+┌─ Priority ────────────────────────
+│ I found 47 utility functions. Which should I prioritize?
+
+? Select an option:
+❯ all - Test all 47 functions
+  top-10 - Focus on 10 most-used
+  critical - Only critical path functions
+```
+
+---
+
+## File Structure
+
+```
+src/session/
+├── index.ts              # Main exports
+├── types.ts              # TypeScript interfaces
+├── client.ts             # Claude SDK wrapper
+├── question-handler.ts   # AskUserQuestion logic
+├── cli-adapter.ts        # Terminal UI (inquirer)
+├── modes.ts              # Mode detection
+└── persistence.ts        # Save/resume session
+```
+
+---
+
+## Dependencies
+
+```json
+{
+  "@anthropic-ai/claude-agent-sdk": "^0.1.0",
+  "inquirer": "^9.2.0"
+}
+```
+
+---
+
+## Success Criteria
+
+- [ ] Interactive mode works in terminal
+- [ ] Non-interactive mode works in CI
+- [ ] Auto-answer mode uses provided defaults
+- [ ] Session state can be saved and resumed
+- [ ] Ctrl+C exits cleanly
--- a/docs/03-test-renderers.md
+++ b/docs/03-test-renderers.md
@ -0,0 +1,157 @@
+# 3. Test Renderers - System Design
+
+> **Priority**: 🟢 MEDIUM — Deterministic layer  
+> **Complexity**: Medium  
+> **Effort Estimate**: 8-12 hours
+
+---
+
+## Overview
+
+Test Renderers **deterministically transform** `EvalSpec` JSON into runnable test files. Key insight:
+- **Claude generates specs** (what to test, inputs, assertions)
+- **Renderers generate code** (deterministic, templated, no LLM)
+
+This makes tests reliable, debuggable, and version-controllable.
+
+---
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                      Renderer Pipeline                          │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐      │
+│  │   EvalSpec   │───▶│   Renderer   │───▶│  Test Files  │      │
+│  │     JSON     │    │   (per-lang) │    │  (.py/.ts)   │      │
+│  └──────────────┘    └──────────────┘    └──────────────┘      │
+│                                                                 │
+│  Supported: pytest (Python) | vitest (TS) | jest (TS)          │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Core Types
+
+```typescript
+interface RenderOptions {
+  outputDir: string;
+  framework: 'pytest' | 'vitest' | 'jest';
+  includeFixtures: boolean;
+  generateMocks: boolean;
+}
+
+interface RenderResult {
+  files: GeneratedFile[];
+  stats: { scenarioCount: number; fileCount: number; assertionCount: number };
+}
+
+interface GeneratedFile {
+  path: string;
+  content: string;
+  scenarios: string[];  // Which scenario IDs
+}
+```
+
+---
+
+## Assertion Mapping
+
+| EvalSpec Type | Python (pytest) | TypeScript (vitest) |
+|---------------|-----------------|---------------------|
+| `equals` | `assert result == expected` | `expect(result).toBe(expected)` |
+| `contains` | `assert key in result` | `expect(result).toContain(key)` |
+| `matches` | `assert re.match(pattern, result)` | `expect(result).toMatch(pattern)` |
+| `throws` | `pytest.raises(ExceptionType)` | `expect(() => fn()).toThrow()` |
+| `type` | `assert isinstance(result, Type)` | `expect(typeof result).toBe('type')` |
+
+---
+
+## Example Transformation
+
+**EvalSpec scenario:**
+```json
+{
+  "id": "auth-login-success",
+  "target": { "module": "src/auth/login.py", "function": "login" },
+  "input": { "args": { "username": "test", "password": "valid" } },
+  "assertions": [
+    { "type": "type", "target": "return", "expected": "dict" },
+    { "type": "contains", "target": "return", "expected": "token" }
+  ]
+}
+```
+
+**Generated pytest:**
+```python
+def test_auth_login_success():
+    """Verify login returns JWT on valid credentials"""
+    result = login("test", "valid")
+    assert isinstance(result, dict)
+    assert "token" in result
+```
+
+---
+
+## File Structure
+
+```
+src/renderers/
+├── index.ts              # Registry + main export
+├── types.ts              # Interfaces
+├── base.ts               # Abstract base renderer
+├── python/
+│   ├── pytest-renderer.ts
+│   ├── assertions.ts
+│   └── templates/
+│       └── test-file.py.hbs
+├── typescript/
+│   ├── vitest-renderer.ts
+│   ├── jest-renderer.ts
+│   └── assertions.ts
+└── utils/
+    └── template-engine.ts
+```
+
+---
+
+## Incremental Rendering
+
+```typescript
+async function renderIncremental(
+  spec: EvalSpec,
+  options: RenderOptions,
+  changedFiles: string[]
+): Promise<RenderResult> {
+  const filteredSpec = {
+    ...spec,
+    scenarios: spec.scenarios.filter(s => 
+      changedFiles.some(f => s.target.module.includes(f))
+    ),
+  };
+  return renderSpec(filteredSpec, options);
+}
+```
+
+---
+
+## Dependencies
+
+```json
+{
+  "handlebars": "^4.7.8"
+}
+```
+
+---
+
+## Success Criteria
+
+- [ ] Pytest renderer generates valid Python test files
+- [ ] Vitest renderer generates valid TypeScript test files
+- [ ] Generated tests pass linting
+- [ ] All assertion types are supported
+- [ ] Mocks and fixtures correctly generated
+- [ ] Incremental rendering works
--- a/docs/04-functional-test-execution.md
+++ b/docs/04-functional-test-execution.md
@ -0,0 +1,269 @@
+# 4. Functional Test Execution & Grading - System Design
+
+> **Priority**: 🟢 MEDIUM — Runtime layer  
+> **Complexity**: Medium-High  
+> **Effort Estimate**: 6-10 hours
+
+---
+
+## Overview
+
+Executes generated tests in a **sandboxed environment** and produces structured results. Tests run in isolation to prevent accidental side effects. Results feed into Promptfoo for aggregation and UI.
+
+---
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                      Test Execution Pipeline                    │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐      │
+│  │  Test Files  │───▶│   Sandbox    │───▶│   Results    │      │
+│  │  (.py/.ts)   │    │   Runner     │    │    JSON      │      │
+│  └──────────────┘    └──────────────┘    └──────────────┘      │
+│                            │                   │                │
+│                            ▼                   ▼                │
+│                    ┌──────────────┐    ┌──────────────┐        │
+│                    │  pytest/     │    │  Promptfoo   │        │
+│                    │  vitest      │    │  Integration │        │
+│                    └──────────────┘    └──────────────┘        │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Core Types
+
+```typescript
+interface ExecutionOptions {
+  framework: 'pytest' | 'vitest' | 'jest';
+  sandbox: boolean;
+  timeout: number;          // ms per test
+  parallel: boolean;
+  filter?: string[];        // Run specific test IDs
+}
+
+interface ExecutionResult {
+  summary: {
+    total: number;
+    passed: number;
+    failed: number;
+    skipped: number;
+    duration: number;
+  };
+  tests: TestResult[];
+  errors: string[];
+}
+
+interface TestResult {
+  id: string;               // Maps to EvalScenario.id
+  name: string;
+  status: 'passed' | 'failed' | 'skipped' | 'error';
+  duration: number;
+  assertions: {
+    passed: number;
+    failed: number;
+    details: AssertionResult[];
+  };
+  error?: { message: string; stack?: string };
+  stdout?: string;
+  stderr?: string;
+}
+```
+
+---
+
+## Sandbox Configuration
+
+```typescript
+const SANDBOX_CONFIG = {
+  enabled: true,
+  autoAllowBashIfSandboxed: true,
+  network: {
+    allowLocalBinding: true,
+    allowOutbound: false,    // No external network
+  },
+  filesystem: {
+    readOnly: ['/'],
+    writable: ['/tmp', './test-output'],
+  },
+  env: {
+    inherit: ['PATH', 'HOME'],
+    set: { CI: 'true', NODE_ENV: 'test' },
+  },
+};
+```
+
+---
+
+## Runner Implementations
+
+### Pytest Runner
+
+```typescript
+async function runPytest(testDir: string, options: ExecutionOptions): Promise<ExecutionResult> {
+  const args = [
+    '-v',
+    '--tb=short',
+    '--json-report',
+    '--json-report-file=results.json',
+    options.parallel ? '-n auto' : '',
+    options.filter?.map(f => `-k ${f}`).join(' ') || '',
+  ].filter(Boolean);
+
+  const { exitCode, stdout, stderr } = await exec(
+    `pytest ${args.join(' ')} ${testDir}`,
+    { timeout: options.timeout, cwd: testDir }
+  );
+
+  const report = JSON.parse(await fs.readFile('results.json', 'utf-8'));
+  return parseJsonReport(report);
+}
+```
+
+### Vitest Runner
+
+```typescript
+async function runVitest(testDir: string, options: ExecutionOptions): Promise<ExecutionResult> {
+  const args = [
+    'run',
+    '--reporter=json',
+    '--outputFile=results.json',
+    options.filter?.length ? `--testNamePattern="${options.filter.join('|')}"` : '',
+  ].filter(Boolean);
+
+  const { exitCode } = await exec(
+    `npx vitest ${args.join(' ')}`,
+    { timeout: options.timeout, cwd: testDir }
+  );
+
+  const report = JSON.parse(await fs.readFile('results.json', 'utf-8'));
+  return parseVitestReport(report);
+}
+```
+
+---
+
+## Promptfoo Integration
+
+### Custom Provider (`providers/test-runner.py`)
+
+```python
+def get_provider_response(prompt: str, options: dict, context: dict) -> dict:
+    """Runs tests and returns structured results."""
+    import subprocess
+    import json
+
+    test_dir = options.get('test_dir', './tests')
+    framework = options.get('framework', 'pytest')
+
+    if framework == 'pytest':
+        result = subprocess.run(
+            ['pytest', '--json-report', '--json-report-file=/tmp/results.json', test_dir],
+            capture_output=True, text=True, timeout=300
+        )
+        with open('/tmp/results.json') as f:
+            report = json.load(f)
+    
+    return {
+        'output': json.dumps({
+            'passed': report['summary']['passed'],
+            'failed': report['summary']['failed'],
+            'tests': report['tests'],
+        }),
+        'error': None,
+    }
+```
+
+### Promptfoo Config
+
+```yaml
+providers:
+  - id: file://providers/test-runner.py
+    label: functional-tests
+    config:
+      test_dir: .evaluclaude/tests
+      framework: pytest
+      timeout: 300
+
+tests:
+  - vars:
+      scenario_id: auth-login-success
+    assert:
+      - type: python
+        value: |
+          import json
+          result = json.loads(output)
+          result['passed'] > 0 and result['failed'] == 0
+```
+
+---
+
+## File Structure
+
+```
+src/runners/
+├── index.ts              # Main entry + registry
+├── types.ts              # Interfaces
+├── sandbox.ts            # Isolation wrapper
+├── pytest-runner.ts      # Python test execution
+├── vitest-runner.ts      # Vitest execution
+├── jest-runner.ts        # Jest execution
+└── result-parser.ts      # Normalize results
+
+providers/
+└── test-runner.py        # Promptfoo provider
+```
+
+---
+
+## Result Parsing
+
+```typescript
+function parseJsonReport(report: any): ExecutionResult {
+  return {
+    summary: {
+      total: report.summary.total,
+      passed: report.summary.passed,
+      failed: report.summary.failed,
+      skipped: report.summary.skipped || 0,
+      duration: report.duration,
+    },
+    tests: report.tests.map((t: any) => ({
+      id: extractScenarioId(t.nodeid),
+      name: t.nodeid,
+      status: t.outcome,
+      duration: t.call?.duration || 0,
+      assertions: { passed: 0, failed: 0, details: [] },
+      error: t.call?.crash ? { message: t.call.crash.message } : undefined,
+    })),
+    errors: [],
+  };
+}
+```
+
+---
+
+## Dependencies
+
+```json
+{
+  "dependencies": {}
+}
+```
+
+**Test framework deps (installed in target repo):**
+- `pytest`, `pytest-json-report`, `pytest-xdist` (Python)
+- `vitest` (TypeScript)
+
+---
+
+## Success Criteria
+
+- [ ] Pytest tests run and produce JSON results
+- [ ] Vitest tests run and produce JSON results
+- [ ] Sandbox prevents network/filesystem escape
+- [ ] Results map back to EvalScenario IDs
+- [ ] Promptfoo integration works
+- [ ] Parallel execution supported
--- a/docs/05-llm-rubric-graders.md
+++ b/docs/05-llm-rubric-graders.md
@ -0,0 +1,305 @@
+# 5. LLM Rubric Graders - System Design
+
+> **Priority**: 🟢 MEDIUM — Subjective quality layer  
+> **Complexity**: Medium  
+> **Effort Estimate**: 4-6 hours
+
+---
+
+## Overview
+
+LLM Rubric Graders use Claude to evaluate **subjective quality** that deterministic tests can't measure:
+- Code readability
+- Error message helpfulness
+- Documentation quality
+- API design consistency
+
+These complement functional tests with human-like judgment.
+
+---
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                      LLM Grading Pipeline                       │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐      │
+│  │    Output    │───▶│   Rubric     │───▶│   Grading    │      │
+│  │   (code/    │    │   + Claude   │    │   Result     │      │
+│  │    text)     │    │              │    │              │      │
+│  └──────────────┘    └──────────────┘    └──────────────┘      │
+│                            │                                    │
+│                    Uses Promptfoo                               │
+│                    llm-rubric assertion                         │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Core Types
+
+```typescript
+interface Rubric {
+  name: string;
+  description: string;
+  criteria: RubricCriterion[];
+  passingThreshold: number;  // 0-1
+}
+
+interface RubricCriterion {
+  name: string;
+  description: string;
+  weight: number;            // Relative weight
+  examples?: {
+    good: string;
+    bad: string;
+  };
+}
+
+interface RubricGradingResult {
+  pass: boolean;
+  score: number;             // 0-1
+  reason: string;
+  criterionScores: {
+    name: string;
+    score: number;
+    feedback: string;
+  }[];
+}
+```
+
+---
+
+## Rubric Examples
+
+### Code Quality Rubric (`rubrics/code-quality.yaml`)
+
+```yaml
+name: code-quality
+description: Evaluates generated code for quality and maintainability
+passingThreshold: 0.7
+
+criteria:
+  - name: readability
+    weight: 0.3
+    description: Code is easy to read and understand
+    examples:
+      good: "Clear variable names, logical flow, proper indentation"
+      bad: "Single-letter variables, deeply nested logic, inconsistent style"
+  
+  - name: correctness
+    weight: 0.4
+    description: Code correctly implements the intended behavior
+    examples:
+      good: "Handles edge cases, correct algorithm, proper error handling"
+      bad: "Missing edge cases, off-by-one errors, swallowed exceptions"
+  
+  - name: efficiency
+    weight: 0.2
+    description: Code uses appropriate data structures and algorithms
+    examples:
+      good: "O(n) where O(n) is optimal, avoids unnecessary allocations"
+      bad: "O(n²) when O(n) is possible, creates objects in tight loops"
+  
+  - name: maintainability
+    weight: 0.1
+    description: Code is easy to modify and extend
+    examples:
+      good: "Single responsibility, low coupling, clear interfaces"
+      bad: "God functions, tight coupling, magic numbers"
+```
+
+### Error Messages Rubric (`rubrics/error-messages.yaml`)
+
+```yaml
+name: error-messages
+description: Evaluates quality of error messages
+passingThreshold: 0.6
+
+criteria:
+  - name: clarity
+    weight: 0.4
+    description: Error message clearly explains what went wrong
+  
+  - name: actionability
+    weight: 0.4
+    description: Error message suggests how to fix the problem
+  
+  - name: context
+    weight: 0.2
+    description: Error message includes relevant context (file, line, values)
+```
+
+---
+
+## Promptfoo Integration
+
+### Using `llm-rubric` Assertion
+
+```yaml
+# promptfooconfig.yaml
+tests:
+  - vars:
+      code_output: "{{generated_code}}"
+    assert:
+      - type: llm-rubric
+        value: |
+          Evaluate this code for quality:
+          
+          {{code_output}}
+          
+          Score on:
+          1. Readability (0-10)
+          2. Correctness (0-10)
+          3. Efficiency (0-10)
+          4. Maintainability (0-10)
+          
+          Provide overall score and specific feedback.
+        threshold: 0.7
+```
+
+### Custom Python Grader
+
+```python
+# graders/rubric_grader.py
+import json
+from anthropic import Anthropic
+
+def get_assert(output: str, context: dict) -> dict:
+    """Grade output using LLM rubric."""
+    rubric = context.get('config', {}).get('rubric', 'code-quality')
+    rubric_def = load_rubric(rubric)
+    
+    client = Anthropic()
+    
+    prompt = f"""
+You are evaluating code quality against this rubric:
+
+{json.dumps(rubric_def, indent=2)}
+
+Code to evaluate:
+```
+{output}
+```
+
+For each criterion, provide:
+1. Score (0-1)
+2. Brief feedback
+
+Return JSON:
+{{
+  "scores": {{"criterion_name": {{"score": 0.8, "feedback": "..."}}}},
+  "overall": 0.75,
+  "summary": "..."
+}}
+"""
+    
+    response = client.messages.create(
+        model="claude-sonnet-4-20250514",
+        max_tokens=1024,
+        messages=[{"role": "user", "content": prompt}]
+    )
+    
+    result = json.loads(response.content[0].text)
+    
+    return {
+        "pass": result["overall"] >= rubric_def["passingThreshold"],
+        "score": result["overall"],
+        "reason": result["summary"],
+        "namedScores": {k: v["score"] for k, v in result["scores"].items()},
+    }
+```
+
+---
+
+## Calibration
+
+LLM graders need calibration to ensure consistency:
+
+```typescript
+interface CalibrationSet {
+  rubric: string;
+  examples: CalibrationExample[];
+}
+
+interface CalibrationExample {
+  input: string;
+  expectedScore: number;
+  expectedFeedback: string[];
+}
+
+async function calibrate(rubric: Rubric, examples: CalibrationExample[]): Promise<CalibrationResult> {
+  const results = await Promise.all(
+    examples.map(ex => gradeWithRubric(ex.input, rubric))
+  );
+  
+  const agreement = results.filter((r, i) => 
+    Math.abs(r.score - examples[i].expectedScore) < 0.1
+  ).length / results.length;
+  
+  return {
+    agreement,
+    drift: results.map((r, i) => r.score - examples[i].expectedScore),
+    needsAdjustment: agreement < 0.8,
+  };
+}
+```
+
+---
+
+## File Structure
+
+```
+src/graders/
+├── llm/
+│   ├── index.ts          # Main entry
+│   ├── provider.ts       # Promptfoo custom provider
+│   ├── rubric-loader.ts  # Load YAML rubrics
+│   └── grader.ts         # Core grading logic
+└── calibration/
+    ├── calibrator.ts     # Calibration runner
+    └── examples/         # Calibration datasets
+
+rubrics/
+├── code-quality.yaml
+├── error-messages.yaml
+├── documentation.yaml
+└── api-design.yaml
+
+graders/
+└── rubric_grader.py      # Python grader for Promptfoo
+```
+
+---
+
+## When to Use LLM vs Deterministic
+
+| Use LLM Graders | Use Deterministic |
+|-----------------|-------------------|
+| Subjective quality | Pass/fail assertions |
+| Style/readability | Type checking |
+| Helpfulness | Value equality |
+| Consistency | Error presence |
+| User experience | Performance thresholds |
+
+---
+
+## Dependencies
+
+```json
+{
+  "js-yaml": "^4.1.0"
+}
+```
+
+---
+
+## Success Criteria
+
+- [ ] Rubrics load from YAML files
+- [ ] LLM grader produces consistent scores
+- [ ] Calibration detects drift
+- [ ] Integrates with Promptfoo `llm-rubric`
+- [ ] Custom Python grader works
+- [ ] >80% agreement with human judgment
--- a/docs/06-observability-tracing.md
+++ b/docs/06-observability-tracing.md
@ -0,0 +1,364 @@
+# 6. Observability & Tracing - System Design
+
+> **Priority**: 🟡 HIGH — Debugging is critical  
+> **Complexity**: Medium  
+> **Effort Estimate**: 4-6 hours
+
+---
+
+## Overview
+
+Every eval run produces a **trace** capturing what Claude did and why. No black boxes. When a test fails, you can see:
+- What files Claude analyzed
+- What questions it asked
+- What specs it generated
+- The reasoning behind each decision
+
+---
+
+## Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                      Observability Pipeline                     │
+├─────────────────────────────────────────────────────────────────┤
+│  ┌──────────────┐    ┌──────────────┐    ┌──────────────┐      │
+│  │ Claude Agent │───▶│   Tracer     │───▶│  Trace Store │      │
+│  │    Hooks     │    │  (collector) │    │   (.json)    │      │
+│  └──────────────┘    └──────────────┘    └──────────────┘      │
+│                                                │                │
+│                                                ▼                │
+│                                         ┌──────────────┐       │
+│                                         │ Trace Viewer │       │
+│                                         │ (Promptfoo)  │       │
+│                                         └──────────────┘       │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Core Types
+
+```typescript
+interface EvalTrace {
+  id: string;                    // UUID
+  evalId: string;                // Links to EvalSpec
+  startedAt: string;
+  completedAt: string;
+  duration: number;              // ms
+  
+  status: 'success' | 'partial' | 'failed';
+  
+  introspection: {
+    filesAnalyzed: string[];
+    totalFunctions: number;
+    totalClasses: number;
+    duration: number;
+  };
+  
+  analysis: {
+    promptTokens: number;
+    completionTokens: number;
+    toolCalls: ToolCall[];
+    questionsAsked: Question[];
+    decisions: Decision[];
+  };
+  
+  generation: {
+    scenariosGenerated: number;
+    filesWritten: string[];
+  };
+  
+  execution: {
+    testsPassed: number;
+    testsFailed: number;
+    testsSkipped: number;
+    failures: TestFailure[];
+  };
+  
+  errors: TraceError[];
+}
+
+interface ToolCall {
+  timestamp: string;
+  tool: string;
+  input: any;
+  output: any;
+  duration: number;
+}
+
+interface Decision {
+  timestamp: string;
+  type: 'include' | 'exclude' | 'prioritize' | 'question';
+  subject: string;              // What was decided about
+  reasoning: string;            // Why
+  confidence: number;           // 0-1
+}
+
+interface TestFailure {
+  scenarioId: string;
+  error: string;
+  stack?: string;
+  expected?: any;
+  actual?: any;
+}
+```
+
+---
+
+## Hook-Based Collection
+
+Use Claude Agent SDK hooks to capture everything:
+
+```typescript
+import { ClaudeAgentOptions } from '@anthropic-ai/claude-agent-sdk';
+import { Tracer } from './tracer';
+
+function createTracedOptions(tracer: Tracer): Partial<ClaudeAgentOptions> {
+  return {
+    hooks: {
+      preToolUse: [{
+        hooks: [async (input) => {
+          tracer.recordToolStart(input.tool_name, input.tool_input);
+          return { continue_: true };
+        }]
+      }],
+      postToolUse: [{
+        hooks: [async (input) => {
+          tracer.recordToolEnd(input.tool_name, input.tool_output);
+          return {};
+        }]
+      }],
+    },
+  };
+}
+```
+
+---
+
+## Tracer Implementation
+
+```typescript
+class Tracer {
+  private trace: EvalTrace;
+  private currentToolCall?: { name: string; input: any; startTime: number };
+
+  constructor(evalId: string) {
+    this.trace = {
+      id: crypto.randomUUID(),
+      evalId,
+      startedAt: new Date().toISOString(),
+      completedAt: '',
+      duration: 0,
+      status: 'success',
+      introspection: { filesAnalyzed: [], totalFunctions: 0, totalClasses: 0, duration: 0 },
+      analysis: { promptTokens: 0, completionTokens: 0, toolCalls: [], questionsAsked: [], decisions: [] },
+      generation: { scenariosGenerated: 0, filesWritten: [] },
+      execution: { testsPassed: 0, testsFailed: 0, testsSkipped: 0, failures: [] },
+      errors: [],
+    };
+  }
+
+  recordToolStart(name: string, input: any): void {
+    this.currentToolCall = { name, input, startTime: Date.now() };
+  }
+
+  recordToolEnd(name: string, output: any): void {
+    if (this.currentToolCall?.name === name) {
+      this.trace.analysis.toolCalls.push({
+        timestamp: new Date().toISOString(),
+        tool: name,
+        input: this.currentToolCall.input,
+        output,
+        duration: Date.now() - this.currentToolCall.startTime,
+      });
+      this.currentToolCall = undefined;
+    }
+  }
+
+  recordQuestion(question: any, answer: string): void {
+    this.trace.analysis.questionsAsked.push({
+      ...question,
+      answer,
+      timestamp: new Date().toISOString(),
+    });
+  }
+
+  recordDecision(type: Decision['type'], subject: string, reasoning: string, confidence: number): void {
+    this.trace.analysis.decisions.push({
+      timestamp: new Date().toISOString(),
+      type,
+      subject,
+      reasoning,
+      confidence,
+    });
+  }
+
+  recordError(error: Error, context?: string): void {
+    this.trace.errors.push({
+      timestamp: new Date().toISOString(),
+      message: error.message,
+      stack: error.stack,
+      context,
+    });
+    this.trace.status = 'failed';
+  }
+
+  finalize(): EvalTrace {
+    this.trace.completedAt = new Date().toISOString();
+    this.trace.duration = new Date(this.trace.completedAt).getTime() - new Date(this.trace.startedAt).getTime();
+    return this.trace;
+  }
+}
+```
+
+---
+
+## Trace Storage
+
+```typescript
+const TRACES_DIR = '.evaluclaude/traces';
+
+async function saveTrace(trace: EvalTrace): Promise<string> {
+  await fs.mkdir(TRACES_DIR, { recursive: true });
+  const filePath = path.join(TRACES_DIR, `${trace.id}.json`);
+  await fs.writeFile(filePath, JSON.stringify(trace, null, 2));
+  return filePath;
+}
+
+async function loadTrace(traceId: string): Promise<EvalTrace> {
+  const filePath = path.join(TRACES_DIR, `${traceId}.json`);
+  const content = await fs.readFile(filePath, 'utf-8');
+  return JSON.parse(content);
+}
+
+async function listTraces(evalId?: string): Promise<EvalTrace[]> {
+  const files = await fs.readdir(TRACES_DIR);
+  const traces = await Promise.all(
+    files.filter(f => f.endsWith('.json')).map(f => loadTrace(f.replace('.json', '')))
+  );
+  return evalId ? traces.filter(t => t.evalId === evalId) : traces;
+}
+```
+
+---
+
+## Promptfoo Integration
+
+Link traces to test results:
+
+```yaml
+# promptfooconfig.yaml
+defaultTest:
+  metadata:
+    traceFile: .evaluclaude/traces/{{evalId}}.json
+```
+
+---
+
+## Trace Viewer CLI
+
+```typescript
+// src/cli/commands/view.ts
+import { Command } from 'commander';
+import { loadTrace, listTraces } from '../observability/trace-store';
+
+export const viewCommand = new Command('view')
+  .description('View eval trace')
+  .argument('[trace-id]', 'Specific trace ID')
+  .option('--last', 'View most recent trace')
+  .option('--json', 'Output raw JSON')
+  .action(async (traceId, options) => {
+    let trace: EvalTrace;
+    
+    if (options.last) {
+      const traces = await listTraces();
+      trace = traces.sort((a, b) => 
+        new Date(b.startedAt).getTime() - new Date(a.startedAt).getTime()
+      )[0];
+    } else {
+      trace = await loadTrace(traceId);
+    }
+    
+    if (options.json) {
+      console.log(JSON.stringify(trace, null, 2));
+    } else {
+      displayTrace(trace);
+    }
+  });
+
+function displayTrace(trace: EvalTrace): void {
+  console.log(`\n📊 Trace: ${trace.id}`);
+  console.log(`   Status: ${trace.status}`);
+  console.log(`   Duration: ${trace.duration}ms`);
+  console.log(`\n📂 Introspection:`);
+  console.log(`   Files: ${trace.introspection.filesAnalyzed.length}`);
+  console.log(`   Functions: ${trace.introspection.totalFunctions}`);
+  console.log(`\n🤖 Analysis:`);
+  console.log(`   Tool calls: ${trace.analysis.toolCalls.length}`);
+  console.log(`   Questions: ${trace.analysis.questionsAsked.length}`);
+  console.log(`   Decisions: ${trace.analysis.decisions.length}`);
+  console.log(`\n🧪 Execution:`);
+  console.log(`   ✅ Passed: ${trace.execution.testsPassed}`);
+  console.log(`   ❌ Failed: ${trace.execution.testsFailed}`);
+  
+  if (trace.execution.failures.length > 0) {
+    console.log(`\n❌ Failures:`);
+    trace.execution.failures.forEach(f => {
+      console.log(`   - ${f.scenarioId}: ${f.error}`);
+    });
+  }
+}
+```
+
+---
+
+## File Structure
+
+```
+src/observability/
+├── index.ts              # Main exports
+├── tracer.ts             # Hook-based collection
+├── trace-store.ts        # Persist to filesystem
+├── trace-viewer.ts       # Format for display
+└── types.ts              # EvalTrace interface
+
+.evaluclaude/
+└── traces/
+    ├── abc123.json
+    ├── def456.json
+    └── ...
+```
+
+---
+
+## What Gets Traced
+
+| Phase | Captured |
+|-------|----------|
+| Introspection | Files parsed, functions/classes found, duration |
+| Analysis | Every tool call, questions asked, decisions made |
+| Generation | Scenarios created, files written |
+| Execution | Test results, failures with context |
+| Errors | Any exceptions with stack traces |
+
+---
+
+## Dependencies
+
+```json
+{
+  "dependencies": {}
+}
+```
+
+---
+
+## Success Criteria
+
+- [ ] Every eval run produces a trace
+- [ ] Traces capture all tool calls
+- [ ] Questions and answers are recorded
+- [ ] Test failures link to trace
+- [ ] CLI viewer displays traces clearly
+- [ ] Traces stored efficiently (<1MB each)
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -0,0 +1,46 @@
+{
+  "name": "evaluclaude-harness",
+  "version": "0.1.0",
+  "description": "Zero-to-evals in one command. Claude analyzes codebases and generates functional tests.",
+  "type": "module",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "bin": {
+    "evaluclaude": "./dist/cli/index.js"
+  },
+  "scripts": {
+    "build": "tsc",
+    "dev": "tsc --watch",
+    "start": "node dist/cli/index.js",
+    "test": "vitest",
+    "lint": "eslint src --ext .ts",
+    "typecheck": "tsc --noEmit"
+  },
+  "keywords": [
+    "eval",
+    "claude",
+    "testing",
+    "ai",
+    "code-analysis"
+  ],
+  "author": "",
+  "license": "MIT",
+  "dependencies": {
+    "commander": "^12.1.0",
+    "glob": "^10.4.0",
+    "inquirer": "^9.2.0",
+    "tree-sitter": "^0.21.1",
+    "tree-sitter-python": "^0.21.0",
+    "tree-sitter-typescript": "^0.21.0"
+  },
+  "devDependencies": {
+    "@types/inquirer": "^9.0.7",
+    "@types/node": "^20.14.0",
+    "eslint": "^8.57.0",
+    "typescript": "^5.4.5",
+    "vitest": "^1.6.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  }
+}
--- a/src/cli/commands/intro.ts
+++ b/src/cli/commands/intro.ts
@ -0,0 +1,118 @@
+import { Command } from 'commander';
+import * as path from 'node:path';
+import { analyze, treeToString } from '../../introspector/index.js';
+
+export const introCommand = new Command('intro')
+  .description('Introspect a codebase and output its structure (tree-sitter analysis)')
+  .argument('[path]', 'Path to the repository to analyze', '.')
+  .option('-o, --output <file>', 'Output file for the RepoSummary JSON')
+  .option('--json', 'Output as JSON (default)')
+  .option('--summary', 'Output a human-readable summary instead of JSON')
+  .option('--tree', 'Show file tree structure')
+  .action(async (repoPath: string, options: { output?: string; json?: boolean; summary?: boolean; tree?: boolean }) => {
+    const absolutePath = path.resolve(repoPath);
+    
+    console.log(`\n🔍 Analyzing: ${absolutePath}\n`);
+
+    try {
+      const summary = await analyze({
+        root: absolutePath,
+        onProgress: (msg) => console.log(`  ${msg}`),
+      });
+
+      console.log('');
+
+      if (options.tree && summary.tree) {
+        console.log('📁 File Tree:\n');
+        console.log(treeToString(summary.tree));
+        console.log('');
+      } else if (options.summary) {
+        printHumanSummary(summary);
+      } else {
+        const json = JSON.stringify(summary, null, 2);
+        
+        if (options.output) {
+          const fs = await import('node:fs/promises');
+          await fs.writeFile(options.output, json);
+          console.log(`📄 Written to: ${options.output}`);
+        } else {
+          console.log(json);
+        }
+      }
+    } catch (error) {
+      console.error('❌ Error analyzing repository:', error);
+      process.exit(1);
+    }
+  });
+
+function printHumanSummary(summary: import('../../introspector/types.js').RepoSummary): void {
+  console.log('📊 Repository Summary');
+  console.log('─'.repeat(50));
+  console.log(`📁 Root: ${summary.root}`);
+  console.log(`🗓️  Analyzed: ${summary.analyzedAt}`);
+  console.log(`🔤 Languages: ${summary.languages.join(', ') || 'none detected'}`);
+  
+  console.log('\n📂 Files:');
+  console.log(`  Total: ${summary.files.length}`);
+  console.log(`  Source: ${summary.files.filter(f => f.role === 'source').length}`);
+  console.log(`  Test: ${summary.files.filter(f => f.role === 'test').length}`);
+  console.log(`  Config: ${summary.files.filter(f => f.role === 'config').length}`);
+
+  console.log('\n📦 Modules:');
+  console.log(`  Total: ${summary.modules.length}`);
+  
+  const totalExports = summary.modules.reduce((sum, m) => sum + m.exports.length, 0);
+  const functions = summary.modules.flatMap(m => m.exports.filter(e => e.kind === 'function'));
+  const classes = summary.modules.flatMap(m => m.exports.filter(e => e.kind === 'class'));
+  
+  console.log(`  Functions: ${functions.length}`);
+  console.log(`  Classes: ${classes.length}`);
+  console.log(`  Total exports: ${totalExports}`);
+
+  if (summary.config.python) {
+    console.log('\n🐍 Python:');
+    console.log(`  Test framework: ${summary.config.python.testFramework}`);
+    console.log(`  pyproject.toml: ${summary.config.python.pyprojectToml ? '✓' : '✗'}`);
+    console.log(`  setup.py: ${summary.config.python.setupPy ? '✓' : '✗'}`);
+  }
+
+  if (summary.config.typescript) {
+    console.log('\n📘 TypeScript:');
+    console.log(`  Test framework: ${summary.config.typescript.testFramework}`);
+    console.log(`  package.json: ${summary.config.typescript.packageJson ? '✓' : '✗'}`);
+    console.log(`  tsconfig.json: ${summary.config.typescript.tsconfig ? '✓' : '✗'}`);
+  }
+
+  if (summary.git) {
+    console.log('\n📌 Git:');
+    console.log(`  Branch: ${summary.git.branch}`);
+    console.log(`  Commit: ${summary.git.currentCommit.slice(0, 8)}`);
+    
+    if (summary.git.recentCommits && summary.git.recentCommits.length > 0) {
+      console.log('\n📜 Recent Commits:');
+      for (const commit of summary.git.recentCommits.slice(0, 5)) {
+        const date = new Date(commit.date).toLocaleDateString();
+        console.log(`  ${commit.shortHash} ${date} - ${commit.message.slice(0, 50)}${commit.message.length > 50 ? '...' : ''}`);
+      }
+    }
+    
+    if (summary.git.fileHistory && summary.git.fileHistory.length > 0) {
+      console.log('\n🔥 Most Active Files (by commit count):');
+      for (const file of summary.git.fileHistory.slice(0, 5)) {
+        console.log(`  ${file.path} (${file.commitCount} commits)`);
+      }
+    }
+  }
+
+  // Show top modules by export count
+  const topModules = [...summary.modules]
+    .sort((a, b) => b.exports.length - a.exports.length)
+    .slice(0, 5);
+
+  if (topModules.length > 0) {
+    console.log('\n🏆 Top modules by exports:');
+    for (const mod of topModules) {
+      console.log(`  ${mod.path}: ${mod.exports.length} exports`);
+    }
+  }
+}
--- a/src/cli/index.ts
+++ b/src/cli/index.ts
@ -0,0 +1,15 @@
+#!/usr/bin/env node
+
+import { Command } from 'commander';
+import { introCommand } from './commands/intro.js';
+
+const program = new Command();
+
+program
+  .name('evaluclaude')
+  .description('Zero-to-evals in one command. Claude analyzes codebases and generates functional tests.')
+  .version('0.1.0');
+
+program.addCommand(introCommand);
+
+program.parse(process.argv);
--- a/src/index.ts
+++ b/src/index.ts
@ -0,0 +1 @@
+export * from './introspector/index.js';
--- a/src/introspector/git.ts
+++ b/src/introspector/git.ts
@ -0,0 +1,199 @@
+import { exec } from 'node:child_process';
+import { promisify } from 'node:util';
+import type { GitInfo, CommitInfo, FileHistoryInfo } from './types.js';
+
+const execAsync = promisify(exec);
+const MAX_COMMITS = 20;
+const MAX_FILE_HISTORY = 50;
+
+export async function getGitInfo(root: string, lastCommit?: string): Promise<GitInfo | undefined> {
+  try {
+    // Check if it's a git repo
+    await execAsync('git rev-parse --git-dir', { cwd: root });
+  } catch {
+    return undefined;
+  }
+
+  try {
+    const [currentCommitResult, branchResult] = await Promise.all([
+      execAsync('git rev-parse HEAD', { cwd: root }),
+      execAsync('git branch --show-current', { cwd: root }),
+    ]);
+
+    const currentCommit = currentCommitResult.stdout.trim();
+    const branch = branchResult.stdout.trim() || 'HEAD';
+
+    let changedSince: string[] = [];
+    if (lastCommit && lastCommit !== currentCommit) {
+      changedSince = await getChangedFiles(root, lastCommit);
+    }
+
+    // Fetch recent commits
+    const recentCommits = await getRecentCommits(root);
+    
+    // Fetch file history (most frequently changed files)
+    const fileHistory = await getFileHistory(root);
+
+    return {
+      currentCommit,
+      lastAnalyzedCommit: lastCommit || currentCommit,
+      changedSince,
+      branch,
+      recentCommits,
+      fileHistory,
+    };
+  } catch {
+    return undefined;
+  }
+}
+
+export async function getChangedFiles(root: string, since: string): Promise<string[]> {
+  try {
+    const { stdout } = await execAsync(`git diff --name-only ${since}`, { cwd: root });
+    return stdout
+      .split('\n')
+      .filter(f => f && isSourceFile(f));
+  } catch {
+    return [];
+  }
+}
+
+export async function getCurrentCommit(root: string): Promise<string | undefined> {
+  try {
+    const { stdout } = await execAsync('git rev-parse HEAD', { cwd: root });
+    return stdout.trim();
+  } catch {
+    return undefined;
+  }
+}
+
+export async function isGitRepo(root: string): Promise<boolean> {
+  try {
+    await execAsync('git rev-parse --git-dir', { cwd: root });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+function isSourceFile(filePath: string): boolean {
+  return /\.(py|ts|tsx|js|jsx)$/.test(filePath);
+}
+
+export async function getRecentCommits(root: string, limit: number = MAX_COMMITS): Promise<CommitInfo[]> {
+  try {
+    // Format: hash|short|author|date|message|filesChanged
+    const { stdout } = await execAsync(
+      `git log -${limit} --pretty=format:"%H|%h|%an|%aI|%s" --shortstat`,
+      { cwd: root, maxBuffer: 1024 * 1024 }
+    );
+
+    const commits: CommitInfo[] = [];
+    const lines = stdout.split('\n');
+    
+    let i = 0;
+    while (i < lines.length) {
+      const line = lines[i]?.trim();
+      if (!line) {
+        i++;
+        continue;
+      }
+
+      const parts = line.split('|');
+      if (parts.length >= 5) {
+        // Parse the commit line
+        const [hash, shortHash, author, date, ...messageParts] = parts;
+        const message = messageParts.join('|'); // In case message contains |
+        
+        // Look for stats line (next non-empty line)
+        let filesChanged = 0;
+        if (i + 1 < lines.length) {
+          const statsLine = lines[i + 1]?.trim();
+          if (statsLine) {
+            const match = statsLine.match(/(\d+) files? changed/);
+            if (match) {
+              filesChanged = parseInt(match[1], 10);
+              i++; // Skip stats line
+            }
+          }
+        }
+
+        commits.push({
+          hash,
+          shortHash,
+          author,
+          date,
+          message,
+          filesChanged,
+        });
+      }
+      i++;
+    }
+
+    return commits;
+  } catch {
+    return [];
+  }
+}
+
+export async function getFileHistory(root: string, limit: number = MAX_FILE_HISTORY): Promise<FileHistoryInfo[]> {
+  try {
+    // Get the most frequently modified source files
+    const { stdout } = await execAsync(
+      `git log --pretty=format: --name-only | grep -E '\\.(py|ts|tsx|js|jsx)$' | sort | uniq -c | sort -rn | head -${limit}`,
+      { cwd: root, maxBuffer: 1024 * 1024, shell: '/bin/bash' }
+    );
+
+    const files: FileHistoryInfo[] = [];
+    
+    for (const line of stdout.split('\n')) {
+      const trimmed = line.trim();
+      if (!trimmed) continue;
+      
+      const match = trimmed.match(/^\s*(\d+)\s+(.+)$/);
+      if (match) {
+        const commitCount = parseInt(match[1], 10);
+        const filePath = match[2];
+        
+        // Get contributors for this file
+        const contributors = await getFileContributors(root, filePath);
+        const lastModified = await getFileLastModified(root, filePath);
+        
+        files.push({
+          path: filePath,
+          commitCount,
+          lastModified,
+          contributors,
+        });
+      }
+    }
+
+    return files;
+  } catch {
+    return [];
+  }
+}
+
+async function getFileContributors(root: string, filePath: string): Promise<string[]> {
+  try {
+    const { stdout } = await execAsync(
+      `git log --pretty=format:"%an" -- "${filePath}" | sort -u | head -5`,
+      { cwd: root, shell: '/bin/bash' }
+    );
+    return stdout.split('\n').filter(s => s.trim()).slice(0, 5);
+  } catch {
+    return [];
+  }
+}
+
+async function getFileLastModified(root: string, filePath: string): Promise<string> {
+  try {
+    const { stdout } = await execAsync(
+      `git log -1 --pretty=format:"%aI" -- "${filePath}"`,
+      { cwd: root }
+    );
+    return stdout.trim();
+  } catch {
+    return '';
+  }
+}
--- a/src/introspector/index.ts
+++ b/src/introspector/index.ts
@ -0,0 +1,25 @@
+export { analyze, analyzeIncremental } from './summarizer.js';
+export { scanDirectory, detectConfig } from './scanner.js';
+export { getGitInfo, getChangedFiles, getCurrentCommit, isGitRepo, getRecentCommits, getFileHistory } from './git.js';
+export { buildFileTree, treeToString, getTreeStats } from './tree.js';
+export { PythonParser } from './parsers/python.js';
+export { TypeScriptParser } from './parsers/typescript.js';
+
+export type {
+  RepoSummary,
+  FileInfo,
+  ModuleInfo,
+  ExportInfo,
+  ConfigInfo,
+  GitInfo,
+  CommitInfo,
+  FileHistoryInfo,
+  FileTreeNode,
+  Language,
+} from './types.js';
+
+import { analyze as analyzeRepo } from './summarizer.js';
+
+export async function introspect(repoPath: string): Promise<import('./types.js').RepoSummary> {
+  return analyzeRepo({ root: repoPath });
+}
--- a/src/introspector/parsers/base.ts
+++ b/src/introspector/parsers/base.ts
@ -0,0 +1,29 @@
+import type { ModuleInfo, ExportInfo } from '../types.js';
+
+export interface ParserResult {
+  exports: ExportInfo[];
+  imports: string[];
+}
+
+export abstract class BaseParser {
+  abstract readonly language: string;
+  
+  abstract parse(source: string, filePath: string): ModuleInfo;
+  
+  protected getText(source: string, startIndex: number, endIndex: number): string {
+    return source.slice(startIndex, endIndex);
+  }
+  
+  protected calculateComplexity(exportCount: number): ModuleInfo['complexity'] {
+    if (exportCount <= 5) return 'low';
+    if (exportCount <= 15) return 'medium';
+    return 'high';
+  }
+  
+  protected extractFirstLineOfDocstring(docstring: string | undefined): string | undefined {
+    if (!docstring) return undefined;
+    const trimmed = docstring.trim();
+    const firstLine = trimmed.split('\n')[0];
+    return firstLine.replace(/^["']{1,3}|["']{1,3}$/g, '').trim() || undefined;
+  }
+}
--- a/src/introspector/parsers/python.ts
+++ b/src/introspector/parsers/python.ts
@ -0,0 +1,167 @@
+import Parser from 'tree-sitter';
+import Python from 'tree-sitter-python';
+import { BaseParser } from './base.js';
+import type { ModuleInfo, ExportInfo } from '../types.js';
+
+export class PythonParser extends BaseParser {
+  readonly language = 'python';
+  private parser: Parser;
+
+  constructor() {
+    super();
+    this.parser = new Parser();
+    this.parser.setLanguage(Python);
+  }
+
+  parse(source: string, filePath: string): ModuleInfo {
+    const tree = this.parser.parse(source);
+    const rootNode = tree.rootNode;
+
+    const exports: ExportInfo[] = [];
+    const imports: string[] = [];
+
+    // Walk the tree to extract functions, classes, and imports
+    this.walkNode(rootNode, source, exports, imports);
+
+    return {
+      path: filePath,
+      exports,
+      imports: [...new Set(imports)],
+      complexity: this.calculateComplexity(exports.length),
+    };
+  }
+
+  private walkNode(
+    node: Parser.SyntaxNode,
+    source: string,
+    exports: ExportInfo[],
+    imports: string[]
+  ): void {
+    switch (node.type) {
+      case 'function_definition':
+        exports.push(this.extractFunction(node, source));
+        break;
+
+      case 'class_definition':
+        exports.push(this.extractClass(node, source));
+        break;
+
+      case 'import_statement':
+        imports.push(...this.extractImport(node, source));
+        break;
+
+      case 'import_from_statement':
+        imports.push(...this.extractFromImport(node, source));
+        break;
+
+      default:
+        // Recurse into children for top-level nodes
+        if (node.type === 'module' || node.type === 'decorated_definition') {
+          for (const child of node.children) {
+            this.walkNode(child, source, exports, imports);
+          }
+        }
+    }
+  }
+
+  private extractFunction(node: Parser.SyntaxNode, source: string): ExportInfo {
+    const nameNode = node.childForFieldName('name');
+    const paramsNode = node.childForFieldName('parameters');
+    const returnTypeNode = node.childForFieldName('return_type');
+    const bodyNode = node.childForFieldName('body');
+
+    const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
+    
+    // Build signature
+    let signature = '';
+    if (paramsNode) {
+      signature = this.getText(source, paramsNode.startIndex, paramsNode.endIndex);
+    }
+    if (returnTypeNode) {
+      signature += ` -> ${this.getText(source, returnTypeNode.startIndex, returnTypeNode.endIndex)}`;
+    }
+
+    // Check for async
+    const isAsync = node.children.some(c => c.type === 'async');
+
+    // Try to extract docstring
+    let docstring: string | undefined;
+    if (bodyNode && bodyNode.firstChild?.type === 'expression_statement') {
+      const exprStmt = bodyNode.firstChild;
+      const strNode = exprStmt.firstChild;
+      if (strNode?.type === 'string') {
+        docstring = this.extractFirstLineOfDocstring(
+          this.getText(source, strNode.startIndex, strNode.endIndex)
+        );
+      }
+    }
+
+    return {
+      name,
+      kind: 'function',
+      signature: signature || undefined,
+      docstring,
+      lineNumber: node.startPosition.row + 1,
+      isAsync,
+    };
+  }
+
+  private extractClass(node: Parser.SyntaxNode, source: string): ExportInfo {
+    const nameNode = node.childForFieldName('name');
+    const bodyNode = node.childForFieldName('body');
+
+    const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
+
+    // Try to extract docstring
+    let docstring: string | undefined;
+    if (bodyNode && bodyNode.firstChild?.type === 'expression_statement') {
+      const exprStmt = bodyNode.firstChild;
+      const strNode = exprStmt.firstChild;
+      if (strNode?.type === 'string') {
+        docstring = this.extractFirstLineOfDocstring(
+          this.getText(source, strNode.startIndex, strNode.endIndex)
+        );
+      }
+    }
+
+    // Build a basic signature showing inheritance
+    let signature: string | undefined;
+    const superclassNode = node.childForFieldName('superclasses');
+    if (superclassNode) {
+      signature = this.getText(source, superclassNode.startIndex, superclassNode.endIndex);
+    }
+
+    return {
+      name,
+      kind: 'class',
+      signature,
+      docstring,
+      lineNumber: node.startPosition.row + 1,
+    };
+  }
+
+  private extractImport(node: Parser.SyntaxNode, source: string): string[] {
+    const imports: string[] = [];
+    
+    for (const child of node.children) {
+      if (child.type === 'dotted_name') {
+        imports.push(this.getText(source, child.startIndex, child.endIndex));
+      } else if (child.type === 'aliased_import') {
+        const nameNode = child.childForFieldName('name');
+        if (nameNode) {
+          imports.push(this.getText(source, nameNode.startIndex, nameNode.endIndex));
+        }
+      }
+    }
+    
+    return imports;
+  }
+
+  private extractFromImport(node: Parser.SyntaxNode, source: string): string[] {
+    const moduleNode = node.childForFieldName('module_name');
+    if (moduleNode) {
+      return [this.getText(source, moduleNode.startIndex, moduleNode.endIndex)];
+    }
+    return [];
+  }
+}
--- a/src/introspector/parsers/typescript.ts
+++ b/src/introspector/parsers/typescript.ts
@ -0,0 +1,188 @@
+import Parser from 'tree-sitter';
+import TypeScriptLang from 'tree-sitter-typescript';
+import { BaseParser } from './base.js';
+import type { ModuleInfo, ExportInfo } from '../types.js';
+
+const { typescript: TypeScript } = TypeScriptLang;
+
+export class TypeScriptParser extends BaseParser {
+  readonly language = 'typescript';
+  private parser: Parser;
+
+  constructor() {
+    super();
+    this.parser = new Parser();
+    this.parser.setLanguage(TypeScript);
+  }
+
+  parse(source: string, filePath: string): ModuleInfo {
+    const tree = this.parser.parse(source);
+    const rootNode = tree.rootNode;
+
+    const exports: ExportInfo[] = [];
+    const imports: string[] = [];
+
+    this.walkNode(rootNode, source, exports, imports, false);
+
+    return {
+      path: filePath,
+      exports,
+      imports: [...new Set(imports)],
+      complexity: this.calculateComplexity(exports.length),
+    };
+  }
+
+  private walkNode(
+    node: Parser.SyntaxNode,
+    source: string,
+    exports: ExportInfo[],
+    imports: string[],
+    isExported: boolean
+  ): void {
+    switch (node.type) {
+      case 'function_declaration':
+        exports.push(this.extractFunction(node, source, isExported));
+        break;
+
+      case 'class_declaration':
+        exports.push(this.extractClass(node, source, isExported));
+        break;
+
+      case 'lexical_declaration':
+      case 'variable_declaration':
+        exports.push(...this.extractVariables(node, source, isExported));
+        break;
+
+      case 'type_alias_declaration':
+      case 'interface_declaration':
+        exports.push(this.extractTypeDefinition(node, source, isExported));
+        break;
+
+      case 'export_statement':
+        // Recurse with isExported = true
+        for (const child of node.children) {
+          this.walkNode(child, source, exports, imports, true);
+        }
+        break;
+
+      case 'import_statement':
+        imports.push(...this.extractImport(node, source));
+        break;
+
+      case 'program':
+        // Recurse into top-level statements
+        for (const child of node.children) {
+          this.walkNode(child, source, exports, imports, false);
+        }
+        break;
+    }
+  }
+
+  private extractFunction(node: Parser.SyntaxNode, source: string, isExported: boolean): ExportInfo {
+    const nameNode = node.childForFieldName('name');
+    const paramsNode = node.childForFieldName('parameters');
+    const returnTypeNode = node.childForFieldName('return_type');
+
+    const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
+    
+    // Build signature
+    let signature = '';
+    if (paramsNode) {
+      signature = this.getText(source, paramsNode.startIndex, paramsNode.endIndex);
+    }
+    if (returnTypeNode) {
+      signature += `: ${this.getText(source, returnTypeNode.startIndex, returnTypeNode.endIndex)}`;
+    }
+
+    // Check for async
+    const isAsync = node.children.some(c => c.type === 'async');
+
+    return {
+      name,
+      kind: 'function',
+      signature: signature || undefined,
+      lineNumber: node.startPosition.row + 1,
+      isAsync,
+      isExported,
+    };
+  }
+
+  private extractClass(node: Parser.SyntaxNode, source: string, isExported: boolean): ExportInfo {
+    const nameNode = node.childForFieldName('name');
+    const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
+
+    // Get heritage clause for extends/implements
+    let signature: string | undefined;
+    const heritageNode = node.children.find(c => c.type === 'class_heritage');
+    if (heritageNode) {
+      signature = this.getText(source, heritageNode.startIndex, heritageNode.endIndex);
+    }
+
+    return {
+      name,
+      kind: 'class',
+      signature,
+      lineNumber: node.startPosition.row + 1,
+      isExported,
+    };
+  }
+
+  private extractVariables(node: Parser.SyntaxNode, source: string, isExported: boolean): ExportInfo[] {
+    const exports: ExportInfo[] = [];
+
+    for (const child of node.children) {
+      if (child.type === 'variable_declarator') {
+        const nameNode = child.childForFieldName('name');
+        const valueNode = child.childForFieldName('value');
+        
+        if (nameNode) {
+          const name = this.getText(source, nameNode.startIndex, nameNode.endIndex);
+          
+          // Check if it's a function expression or arrow function
+          const isFunction = valueNode && (
+            valueNode.type === 'arrow_function' ||
+            valueNode.type === 'function_expression' ||
+            valueNode.type === 'function'
+          );
+          
+          exports.push({
+            name,
+            kind: isFunction ? 'function' : 'constant',
+            lineNumber: child.startPosition.row + 1,
+            isExported,
+            isAsync: valueNode?.children.some(c => c.type === 'async'),
+          });
+        }
+      }
+    }
+
+    return exports;
+  }
+
+  private extractTypeDefinition(node: Parser.SyntaxNode, source: string, isExported: boolean): ExportInfo {
+    const nameNode = node.childForFieldName('name');
+    const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
+
+    return {
+      name,
+      kind: 'type',
+      lineNumber: node.startPosition.row + 1,
+      isExported,
+    };
+  }
+
+  private extractImport(node: Parser.SyntaxNode, source: string): string[] {
+    const imports: string[] = [];
+
+    for (const child of node.children) {
+      if (child.type === 'string') {
+        // Remove quotes from the import path
+        const importPath = this.getText(source, child.startIndex, child.endIndex)
+          .replace(/^["']|["']$/g, '');
+        imports.push(importPath);
+      }
+    }
+
+    return imports;
+  }
+}
--- a/src/introspector/scanner.ts
+++ b/src/introspector/scanner.ts
@ -0,0 +1,213 @@
+import { glob } from 'glob';
+import * as fs from 'node:fs/promises';
+import * as path from 'node:path';
+import type { FileInfo } from './types.js';
+
+const IGNORE_PATTERNS = [
+  'node_modules/**',
+  '.git/**',
+  '__pycache__/**',
+  '*.pyc',
+  'dist/**',
+  'build/**',
+  '.venv/**',
+  'venv/**',
+  '.env/**',
+  'env/**',
+  'coverage/**',
+  '.next/**',
+  '.nuxt/**',
+];
+
+export async function scanDirectory(root: string): Promise<FileInfo[]> {
+  const patterns = ['**/*.py', '**/*.ts', '**/*.tsx', '**/*.js', '**/*.jsx'];
+  
+  const files: string[] = [];
+  for (const pattern of patterns) {
+    const matches = await glob(pattern, {
+      cwd: root,
+      ignore: IGNORE_PATTERNS,
+      nodir: true,
+    });
+    files.push(...matches);
+  }
+
+  const uniqueFiles = [...new Set(files)];
+  
+  const fileInfos = await Promise.all(
+    uniqueFiles.map(async (relativePath) => {
+      const fullPath = path.join(root, relativePath);
+      try {
+        const stats = await fs.stat(fullPath);
+        return {
+          path: relativePath,
+          lang: detectLanguage(relativePath),
+          role: detectRole(relativePath),
+          size: stats.size,
+          lastModified: stats.mtime.toISOString(),
+        } satisfies FileInfo;
+      } catch {
+        return null;
+      }
+    })
+  );
+
+  return fileInfos.filter((f): f is FileInfo => f !== null);
+}
+
+function detectLanguage(filePath: string): FileInfo['lang'] {
+  const ext = path.extname(filePath).toLowerCase();
+  switch (ext) {
+    case '.py':
+      return 'python';
+    case '.ts':
+    case '.tsx':
+      return 'typescript';
+    case '.js':
+    case '.jsx':
+      return 'typescript'; // Treat JS as TS for parsing
+    default:
+      return 'other';
+  }
+}
+
+function detectRole(filePath: string): FileInfo['role'] {
+  const lowerPath = filePath.toLowerCase();
+  const fileName = lowerPath.split('/').pop() || '';
+  
+  // Test files - be more specific to avoid false positives
+  if (
+    lowerPath.includes('__tests__') ||
+    lowerPath.includes('/tests/') ||
+    lowerPath.includes('/test/') ||
+    fileName.endsWith('_test.py') ||
+    fileName.endsWith('.test.ts') ||
+    fileName.endsWith('.test.tsx') ||
+    fileName.endsWith('.test.js') ||
+    fileName.endsWith('.spec.ts') ||
+    fileName.endsWith('.spec.tsx') ||
+    fileName.endsWith('.spec.js') ||
+    fileName.startsWith('test_')
+  ) {
+    return 'test';
+  }
+  
+  // Config files
+  if (
+    lowerPath.includes('config') ||
+    lowerPath.includes('settings') ||
+    lowerPath.includes('.env') ||
+    lowerPath.endsWith('conftest.py') ||
+    lowerPath.endsWith('setup.py') ||
+    lowerPath.endsWith('pyproject.toml')
+  ) {
+    return 'config';
+  }
+  
+  // Documentation
+  if (
+    lowerPath.includes('docs') ||
+    lowerPath.includes('doc') ||
+    lowerPath.includes('readme')
+  ) {
+    return 'docs';
+  }
+  
+  return 'source';
+}
+
+export async function detectConfig(root: string): Promise<{
+  python?: {
+    entryPoints: string[];
+    testFramework: 'pytest' | 'unittest' | 'none';
+    hasTyping: boolean;
+    pyprojectToml: boolean;
+    setupPy: boolean;
+  };
+  typescript?: {
+    entryPoints: string[];
+    testFramework: 'vitest' | 'jest' | 'none';
+    hasTypes: boolean;
+    packageJson: boolean;
+    tsconfig: boolean;
+  };
+}> {
+  const config: ReturnType<typeof detectConfig> extends Promise<infer T> ? T : never = {};
+  
+  // Check for Python project
+  const hasPyprojectToml = await fileExists(path.join(root, 'pyproject.toml'));
+  const hasSetupPy = await fileExists(path.join(root, 'setup.py'));
+  const hasRequirementsTxt = await fileExists(path.join(root, 'requirements.txt'));
+  
+  if (hasPyprojectToml || hasSetupPy || hasRequirementsTxt) {
+    let testFramework: 'pytest' | 'unittest' | 'none' = 'none';
+    
+    // Check for pytest
+    if (hasPyprojectToml) {
+      try {
+        const content = await fs.readFile(path.join(root, 'pyproject.toml'), 'utf-8');
+        if (content.includes('pytest')) {
+          testFramework = 'pytest';
+        }
+      } catch {}
+    }
+    
+    if (testFramework === 'none' && hasRequirementsTxt) {
+      try {
+        const content = await fs.readFile(path.join(root, 'requirements.txt'), 'utf-8');
+        if (content.includes('pytest')) {
+          testFramework = 'pytest';
+        }
+      } catch {}
+    }
+    
+    config.python = {
+      entryPoints: [],
+      testFramework,
+      hasTyping: false,
+      pyprojectToml: hasPyprojectToml,
+      setupPy: hasSetupPy,
+    };
+  }
+  
+  // Check for TypeScript/JavaScript project
+  const hasPackageJson = await fileExists(path.join(root, 'package.json'));
+  const hasTsconfig = await fileExists(path.join(root, 'tsconfig.json'));
+  
+  if (hasPackageJson || hasTsconfig) {
+    let testFramework: 'vitest' | 'jest' | 'none' = 'none';
+    
+    if (hasPackageJson) {
+      try {
+        const content = await fs.readFile(path.join(root, 'package.json'), 'utf-8');
+        const pkg = JSON.parse(content);
+        const allDeps = { ...pkg.dependencies, ...pkg.devDependencies };
+        
+        if ('vitest' in allDeps) {
+          testFramework = 'vitest';
+        } else if ('jest' in allDeps) {
+          testFramework = 'jest';
+        }
+      } catch {}
+    }
+    
+    config.typescript = {
+      entryPoints: [],
+      testFramework,
+      hasTypes: hasTsconfig,
+      packageJson: hasPackageJson,
+      tsconfig: hasTsconfig,
+    };
+  }
+  
+  return config;
+}
+
+async function fileExists(filePath: string): Promise<boolean> {
+  try {
+    await fs.access(filePath);
+    return true;
+  } catch {
+    return false;
+  }
+}
--- a/src/introspector/summarizer.ts
+++ b/src/introspector/summarizer.ts
@ -0,0 +1,134 @@
+import * as fs from 'node:fs/promises';
+import * as path from 'node:path';
+import { scanDirectory, detectConfig } from './scanner.js';
+import { PythonParser } from './parsers/python.js';
+import { TypeScriptParser } from './parsers/typescript.js';
+import { getGitInfo, getChangedFiles } from './git.js';
+import { buildFileTree } from './tree.js';
+import type { RepoSummary, ModuleInfo, FileInfo, Language } from './types.js';
+
+export interface AnalyzeOptions {
+  root: string;
+  incremental?: boolean;
+  lastCommit?: string;
+  onlyFiles?: string[];
+  onProgress?: (message: string) => void;
+}
+
+export async function analyze(options: AnalyzeOptions): Promise<RepoSummary> {
+  const { root, incremental, lastCommit, onlyFiles, onProgress } = options;
+
+  onProgress?.('Scanning directory...');
+  let files = await scanDirectory(root);
+
+  // Filter for incremental analysis
+  if (onlyFiles && onlyFiles.length > 0) {
+    files = files.filter(f => onlyFiles.includes(f.path));
+    onProgress?.(`Filtered to ${files.length} changed files`);
+  }
+
+  onProgress?.(`Found ${files.length} source files`);
+
+  // Initialize parsers
+  const pythonParser = new PythonParser();
+  const tsParser = new TypeScriptParser();
+
+  const modules: ModuleInfo[] = [];
+  const sourceFiles = files.filter(f => f.role === 'source' && f.lang !== 'other');
+
+  onProgress?.(`Parsing ${sourceFiles.length} modules...`);
+
+  for (const file of sourceFiles) {
+    const fullPath = path.join(root, file.path);
+    
+    try {
+      const source = await fs.readFile(fullPath, 'utf-8');
+      
+      let moduleInfo: ModuleInfo;
+      if (file.lang === 'python') {
+        moduleInfo = pythonParser.parse(source, file.path);
+      } else if (file.lang === 'typescript') {
+        moduleInfo = tsParser.parse(source, file.path);
+      } else {
+        continue;
+      }
+      
+      modules.push(moduleInfo);
+    } catch (error) {
+      // Skip files that can't be parsed
+      onProgress?.(`Warning: Could not parse ${file.path}`);
+    }
+  }
+
+  onProgress?.('Detecting project configuration...');
+  const config = await detectConfig(root);
+
+  onProgress?.('Getting git info...');
+  const git = await getGitInfo(root, lastCommit);
+
+  onProgress?.('Building file tree...');
+  const tree = buildFileTree(files, path.basename(root));
+
+  // Detect languages used
+  const languages = detectLanguages(files);
+
+  onProgress?.('Analysis complete');
+
+  return {
+    languages,
+    root,
+    analyzedAt: new Date().toISOString(),
+    files,
+    modules,
+    config,
+    git,
+    tree,
+  };
+}
+
+export async function analyzeIncremental(
+  root: string,
+  lastCommit: string,
+  onProgress?: (message: string) => void
+): Promise<RepoSummary> {
+  onProgress?.('Getting changed files since last commit...');
+  const changedFiles = await getChangedFiles(root, lastCommit);
+  
+  if (changedFiles.length === 0) {
+    onProgress?.('No files changed');
+    // Return minimal summary
+    return {
+      languages: [],
+      root,
+      analyzedAt: new Date().toISOString(),
+      files: [],
+      modules: [],
+      config: {},
+      git: await getGitInfo(root, lastCommit),
+    };
+  }
+
+  onProgress?.(`Found ${changedFiles.length} changed files`);
+  
+  return analyze({
+    root,
+    incremental: true,
+    lastCommit,
+    onlyFiles: changedFiles,
+    onProgress,
+  });
+}
+
+function detectLanguages(files: FileInfo[]): Language[] {
+  const languages = new Set<Language>();
+  
+  for (const file of files) {
+    if (file.lang === 'python') {
+      languages.add('python');
+    } else if (file.lang === 'typescript') {
+      languages.add('typescript');
+    }
+  }
+  
+  return [...languages];
+}
--- a/src/introspector/tree.ts
+++ b/src/introspector/tree.ts
@ -0,0 +1,157 @@
+import type { FileInfo, FileTreeNode } from './types.js';
+
+export function buildFileTree(files: FileInfo[], rootName: string = '.'): FileTreeNode {
+  const root: FileTreeNode = {
+    name: rootName,
+    path: '',
+    type: 'directory',
+    children: [],
+  };
+
+  // Build a map for quick lookup
+  const nodeMap = new Map<string, FileTreeNode>();
+  nodeMap.set('', root);
+
+  // Sort files to ensure parents are created before children
+  const sortedFiles = [...files].sort((a, b) => a.path.localeCompare(b.path));
+
+  for (const file of sortedFiles) {
+    const parts = file.path.split('/');
+    let currentPath = '';
+
+    // Create all parent directories
+    for (let i = 0; i < parts.length - 1; i++) {
+      const parentPath = currentPath;
+      currentPath = currentPath ? `${currentPath}/${parts[i]}` : parts[i];
+
+      if (!nodeMap.has(currentPath)) {
+        const dirNode: FileTreeNode = {
+          name: parts[i],
+          path: currentPath,
+          type: 'directory',
+          children: [],
+        };
+        nodeMap.set(currentPath, dirNode);
+
+        // Add to parent
+        const parent = nodeMap.get(parentPath);
+        if (parent && parent.children) {
+          parent.children.push(dirNode);
+        }
+      }
+    }
+
+    // Create the file node
+    const fileName = parts[parts.length - 1];
+    const fileNode: FileTreeNode = {
+      name: fileName,
+      path: file.path,
+      type: 'file',
+      lang: file.lang,
+      role: file.role,
+    };
+
+    // Add to parent directory
+    const parentPath = parts.slice(0, -1).join('/');
+    const parent = nodeMap.get(parentPath);
+    if (parent && parent.children) {
+      parent.children.push(fileNode);
+    }
+  }
+
+  // Sort children alphabetically (directories first)
+  sortTreeRecursive(root);
+
+  return root;
+}
+
+function sortTreeRecursive(node: FileTreeNode): void {
+  if (node.children) {
+    node.children.sort((a, b) => {
+      // Directories first
+      if (a.type !== b.type) {
+        return a.type === 'directory' ? -1 : 1;
+      }
+      return a.name.localeCompare(b.name);
+    });
+
+    for (const child of node.children) {
+      sortTreeRecursive(child);
+    }
+  }
+}
+
+export function treeToString(node: FileTreeNode, prefix: string = '', isLast: boolean = true): string {
+  const lines: string[] = [];
+  
+  const connector = isLast ? '└── ' : '├── ';
+  const extension = isLast ? '    ' : '│   ';
+  
+  if (node.path === '') {
+    // Root node
+    lines.push(node.name);
+  } else {
+    const icon = node.type === 'directory' ? '📁' : getFileIcon(node.lang, node.role);
+    lines.push(`${prefix}${connector}${icon} ${node.name}`);
+  }
+
+  if (node.children) {
+    const children = node.children;
+    for (let i = 0; i < children.length; i++) {
+      const child = children[i];
+      const childIsLast = i === children.length - 1;
+      const newPrefix = node.path === '' ? '' : prefix + extension;
+      lines.push(treeToString(child, newPrefix, childIsLast));
+    }
+  }
+
+  return lines.join('\n');
+}
+
+function getFileIcon(lang?: string, role?: string): string {
+  if (role === 'test') return '🧪';
+  if (role === 'config') return '⚙️';
+  if (role === 'docs') return '📄';
+  
+  switch (lang) {
+    case 'python': return '🐍';
+    case 'typescript': return '📘';
+    default: return '📄';
+  }
+}
+
+export function getTreeStats(node: FileTreeNode): {
+  directories: number;
+  files: number;
+  byLang: Record<string, number>;
+  byRole: Record<string, number>;
+} {
+  const stats = {
+    directories: 0,
+    files: 0,
+    byLang: {} as Record<string, number>,
+    byRole: {} as Record<string, number>,
+  };
+
+  function traverse(n: FileTreeNode): void {
+    if (n.type === 'directory') {
+      stats.directories++;
+      if (n.children) {
+        for (const child of n.children) {
+          traverse(child);
+        }
+      }
+    } else {
+      stats.files++;
+      if (n.lang) {
+        stats.byLang[n.lang] = (stats.byLang[n.lang] || 0) + 1;
+      }
+      if (n.role) {
+        stats.byRole[n.role] = (stats.byRole[n.role] || 0) + 1;
+      }
+    }
+  }
+
+  traverse(node);
+  return stats;
+}
--- a/src/introspector/types.ts
+++ b/src/introspector/types.ts
@ -0,0 +1,88 @@
+export interface RepoSummary {
+  languages: ('python' | 'typescript')[];
+  root: string;
+  analyzedAt: string;
+  files: FileInfo[];
+  modules: ModuleInfo[];
+  config: ConfigInfo;
+  git?: GitInfo;
+  tree?: FileTreeNode;
+}
+
+export interface FileInfo {
+  path: string;
+  lang: 'python' | 'typescript' | 'other';
+  role: 'source' | 'test' | 'config' | 'docs';
+  size: number;
+  lastModified: string;
+}
+
+export interface ModuleInfo {
+  path: string;
+  exports: ExportInfo[];
+  imports: string[];
+  complexity: 'low' | 'medium' | 'high';
+}
+
+export interface ExportInfo {
+  name: string;
+  kind: 'function' | 'class' | 'constant' | 'type';
+  signature?: string;
+  docstring?: string;
+  lineNumber: number;
+  isAsync?: boolean;
+  isExported?: boolean;
+}
+
+export interface ConfigInfo {
+  python?: {
+    entryPoints: string[];
+    testFramework: 'pytest' | 'unittest' | 'none';
+    hasTyping: boolean;
+    pyprojectToml: boolean;
+    setupPy: boolean;
+  };
+  typescript?: {
+    entryPoints: string[];
+    testFramework: 'vitest' | 'jest' | 'none';
+    hasTypes: boolean;
+    packageJson: boolean;
+    tsconfig: boolean;
+  };
+}
+
+export interface GitInfo {
+  lastAnalyzedCommit: string;
+  currentCommit: string;
+  changedSince: string[];
+  branch: string;
+  recentCommits?: CommitInfo[];
+  fileHistory?: FileHistoryInfo[];
+}
+
+export interface CommitInfo {
+  hash: string;
+  shortHash: string;
+  author: string;
+  date: string;
+  message: string;
+  filesChanged: number;
+}
+
+export interface FileHistoryInfo {
+  path: string;
+  commitCount: number;
+  lastModified: string;
+  contributors: string[];
+}
+
+export interface FileTreeNode {
+  name: string;
+  path: string;
+  type: 'file' | 'directory';
+  children?: FileTreeNode[];
+  lang?: 'python' | 'typescript' | 'other';
+  role?: 'source' | 'test' | 'config' | 'docs';
+}
+
+export type Language = 'python' | 'typescript';
--- a/tsconfig.json
+++ b/tsconfig.json
@ -0,0 +1,21 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "NodeNext",
+    "moduleResolution": "NodeNext",
+    "lib": ["ES2022"],
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "declaration": true,
+    "declarationMap": true,
+    "sourceMap": true,
+    "resolveJsonModule": true,
+    "allowSyntheticDefaultImports": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist", "tests"]
+}