This commit is contained in:
Harivansh Rathi 2026-01-11 17:50:26 -05:00
parent 4b24606d0e
commit 9297f0b1ee
13 changed files with 1292 additions and 16 deletions

12
src/analyzer/index.ts Normal file
View file

@ -0,0 +1,12 @@
export { generateEvalSpec, generateEvalSpecInteractive, generateEvalSpecNonInteractive } from './spec-generator.js';
export type { GenerateResult, GenerateOptions } from './spec-generator.js';
export type {
EvalSpec,
EvalScenario,
Assertion,
MockSpec,
DeterministicGrade,
RubricGrade,
} from './types.js';
export { EVAL_SPEC_JSON_SCHEMA } from './types.js';
export { buildSystemPrompt, buildUserPrompt, optimizeForPrompt } from './prompt-builder.js';

View file

@ -0,0 +1,112 @@
import * as fs from 'node:fs/promises';
import * as path from 'node:path';
import { fileURLToPath } from 'node:url';
import type { RepoSummary } from '../introspector/types.js';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const PROMPTS_DIR = path.join(__dirname, '../../prompts');
export interface PromptConfig {
repoSummary: RepoSummary;
focus?: string[];
maxScenarios?: number;
}
export async function loadPrompt(name: string): Promise<string> {
const filePath = path.join(PROMPTS_DIR, `${name}.md`);
return fs.readFile(filePath, 'utf-8');
}
export async function buildSystemPrompt(): Promise<string> {
const system = await loadPrompt('analyzer-system');
const developer = await loadPrompt('analyzer-developer');
return `${system}\n\n${developer}`;
}
export async function buildUserPrompt(config: PromptConfig): Promise<string> {
const template = await loadPrompt('analyzer-user');
const optimizedSummary = optimizeForPrompt(config.repoSummary);
const summaryJson = JSON.stringify(optimizedSummary, null, 2);
const focusInstructions = config.focus?.length
? `Focus specifically on these modules/functions: ${config.focus.join(', ')}`
: 'Analyze the entire codebase and identify the most important testable functions.';
const maxScenarios = config.maxScenarios ?? 10;
return template
.replace('{{REPO_SUMMARY}}', summaryJson)
.replace('{{FOCUS_INSTRUCTIONS}}', focusInstructions)
.replace('{{MAX_SCENARIOS}}', String(maxScenarios));
}
export function optimizeForPrompt(summary: RepoSummary): OptimizedRepoSummary {
return {
name: path.basename(summary.root),
languages: summary.languages,
analyzedAt: summary.analyzedAt,
modules: summary.modules.map(m => ({
path: m.path,
complexity: m.complexity,
exports: m.exports.map(e => ({
name: e.name,
kind: e.kind,
signature: e.signature,
docstring: e.docstring,
line: e.lineNumber,
async: e.isAsync,
})).filter(e => !e.name.startsWith('_')),
imports: m.imports.slice(0, 10),
})).filter(m => m.exports.length > 0),
config: {
python: summary.config.python ? {
testFramework: summary.config.python.testFramework,
hasTyping: summary.config.python.hasTyping,
} : undefined,
typescript: summary.config.typescript ? {
testFramework: summary.config.typescript.testFramework,
hasTypes: summary.config.typescript.hasTypes,
} : undefined,
},
git: summary.git ? {
branch: summary.git.branch,
activeFiles: summary.git.fileHistory
?.sort((a, b) => b.commitCount - a.commitCount)
.slice(0, 10)
.map(f => ({ path: f.path, commits: f.commitCount })),
} : undefined,
};
}
export interface OptimizedRepoSummary {
name: string;
languages: string[];
analyzedAt: string;
modules: OptimizedModule[];
config: {
python?: { testFramework: string; hasTyping: boolean };
typescript?: { testFramework: string; hasTypes: boolean };
};
git?: {
branch: string;
activeFiles?: { path: string; commits: number }[];
};
}
interface OptimizedModule {
path: string;
complexity: string;
exports: {
name: string;
kind: string;
signature?: string;
docstring?: string;
line: number;
async?: boolean;
}[];
imports: string[];
}

View file

@ -0,0 +1,174 @@
import { query, type SDKMessage, type Options, type CanUseTool, type PermissionResult } from '@anthropic-ai/claude-agent-sdk';
import type { RepoSummary } from '../introspector/types.js';
import type { EvalSpec } from './types.js';
import { buildSystemPrompt, buildUserPrompt } from './prompt-builder.js';
import { EVAL_SPEC_JSON_SCHEMA } from './types.js';
export interface GenerateOptions {
interactive?: boolean;
onQuestion?: (question: string) => Promise<string>;
focus?: string[];
maxScenarios?: number;
}
export interface GenerateResult {
spec: EvalSpec;
tokensUsed: number;
questionsAsked: number;
}
export async function generateEvalSpec(
repoSummary: RepoSummary,
options: GenerateOptions = {}
): Promise<GenerateResult> {
const { interactive = false, onQuestion, focus, maxScenarios = 10 } = options;
const systemPrompt = await buildSystemPrompt();
const userPrompt = await buildUserPrompt({
repoSummary,
focus,
maxScenarios,
});
let tokensUsed = 0;
let questionsAsked = 0;
let spec: EvalSpec | null = null;
const canUseTool: CanUseTool = async (toolName, input): Promise<PermissionResult> => {
if (toolName === 'AskUserQuestion' && interactive && onQuestion) {
// Extract question from various possible field names
const inputObj = input as Record<string, unknown>;
const question = String(
inputObj.question ||
inputObj.text ||
inputObj.message ||
inputObj.prompt ||
JSON.stringify(input)
);
const answer = await onQuestion(question);
questionsAsked++;
return {
behavior: 'allow',
updatedInput: { ...input, answer },
};
}
// Allow all other tools in interactive mode
return { behavior: 'allow' };
};
const queryOptions: Options = {
// In interactive mode, allow all tools; in non-interactive, restrict to none
tools: interactive
? { type: 'preset', preset: 'claude_code' }
: [],
permissionMode: 'bypassPermissions',
allowDangerouslySkipPermissions: true,
outputFormat: {
type: 'json_schema',
schema: EVAL_SPEC_JSON_SCHEMA,
},
canUseTool: interactive ? canUseTool : undefined,
};
const fullPrompt = `${systemPrompt}\n\n---\n\n${userPrompt}`;
for await (const message of query({ prompt: fullPrompt, options: queryOptions })) {
handleMessage(message);
if (message.type === 'result') {
if (message.subtype === 'success') {
// SDK returns parsed JSON in structured_output when outputFormat is set
const structuredOutput = (message as { structured_output?: unknown }).structured_output;
const resultData = structuredOutput ?? message.result;
spec = parseResult(resultData);
tokensUsed = (message.usage?.input_tokens ?? 0) + (message.usage?.output_tokens ?? 0);
} else {
throw new Error(`Generation failed: ${message.subtype}`);
}
}
}
if (!spec) {
throw new Error('Failed to generate EvalSpec: no result received');
}
spec.metadata = {
...spec.metadata,
generatedBy: 'evaluclaude-harness',
totalTokens: tokensUsed,
questionsAsked,
};
return { spec, tokensUsed, questionsAsked };
}
function parseResult(result: unknown): EvalSpec {
if (typeof result === 'string') {
let jsonStr = result.trim();
// Try to extract JSON from markdown code blocks
const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
if (jsonMatch) {
jsonStr = jsonMatch[1].trim();
}
// Try to find JSON object in the string
const startIdx = jsonStr.indexOf('{');
const endIdx = jsonStr.lastIndexOf('}');
if (startIdx !== -1 && endIdx !== -1 && endIdx > startIdx) {
jsonStr = jsonStr.slice(startIdx, endIdx + 1);
}
try {
return JSON.parse(jsonStr) as EvalSpec;
} catch (e) {
console.error('Raw result:', result);
throw new Error(`Failed to parse result as JSON: ${e}`);
}
}
if (result && typeof result === 'object') {
return result as EvalSpec;
}
throw new Error(`Unexpected result type: ${typeof result}`);
}
function handleMessage(message: SDKMessage): void {
switch (message.type) {
case 'assistant':
if (message.message?.content) {
for (const block of message.message.content) {
if (block.type === 'text') {
process.stderr.write(`\n${block.text}\n`);
}
}
}
break;
case 'result':
if (message.subtype !== 'success') {
console.error('Error:', message.subtype);
}
break;
}
}
export async function generateEvalSpecNonInteractive(
repoSummary: RepoSummary,
options: Omit<GenerateOptions, 'interactive' | 'onQuestion'> = {}
): Promise<GenerateResult> {
return generateEvalSpec(repoSummary, { ...options, interactive: false });
}
export async function generateEvalSpecInteractive(
repoSummary: RepoSummary,
questionHandler: (question: string) => Promise<string>,
options: Omit<GenerateOptions, 'interactive' | 'onQuestion'> = {}
): Promise<GenerateResult> {
return generateEvalSpec(repoSummary, {
...options,
interactive: true,
onQuestion: questionHandler,
});
}

263
src/analyzer/types.ts Normal file
View file

@ -0,0 +1,263 @@
export interface EvalSpec {
version: '1.0';
repo: {
name: string;
languages: string[];
analyzedAt: string;
};
scenarios: EvalScenario[];
grading: {
deterministic: DeterministicGrade[];
rubrics: RubricGrade[];
};
metadata: {
generatedBy: string;
totalTokens: number;
questionsAsked: number;
confidence: 'low' | 'medium' | 'high';
};
}
export interface EvalScenario {
id: string;
name: string;
description: string;
target: {
module: string;
function: string;
type: 'function' | 'method' | 'class';
};
category: 'unit' | 'integration' | 'edge-case' | 'negative';
priority: 'critical' | 'high' | 'medium' | 'low';
setup?: {
fixtures: string[];
mocks: MockSpec[];
};
input: {
args: Record<string, unknown>;
kwargs?: Record<string, unknown>;
};
assertions: Assertion[];
tags: string[];
}
export interface MockSpec {
target: string;
returnValue?: unknown;
sideEffect?: string;
}
export type Assertion =
| EqualsAssertion
| ContainsAssertion
| ThrowsAssertion
| TypeAssertion
| MatchesAssertion
| TruthyAssertion
| CustomAssertion
| LLMRubricAssertion;
export interface LLMRubricAssertion extends BaseAssertion {
type: 'llm-rubric';
rubric: string;
criteria: string[];
passingThreshold?: number;
}
export interface BaseAssertion {
description?: string;
}
export interface EqualsAssertion extends BaseAssertion {
type: 'equals';
expected: unknown;
path?: string;
}
export interface ContainsAssertion extends BaseAssertion {
type: 'contains';
value: unknown;
path?: string;
}
export interface ThrowsAssertion extends BaseAssertion {
type: 'throws';
errorType?: string;
messageContains?: string;
}
export interface TypeAssertion extends BaseAssertion {
type: 'typeof';
expected: 'string' | 'number' | 'boolean' | 'object' | 'array' | 'null' | 'undefined';
path?: string;
}
export interface MatchesAssertion extends BaseAssertion {
type: 'matches';
pattern: string;
path?: string;
}
export interface TruthyAssertion extends BaseAssertion {
type: 'truthy' | 'falsy';
path?: string;
}
export interface CustomAssertion {
type: 'custom';
description: string;
check: string;
}
export interface DeterministicGrade {
scenarioId: string;
check: 'pass' | 'fail' | 'error';
score: number;
}
export interface RubricGrade {
scenarioId: string;
criteria: string;
maxScore: number;
}
export const EVAL_SPEC_JSON_SCHEMA = {
type: 'object',
properties: {
version: { type: 'string', const: '1.0' },
repo: {
type: 'object',
properties: {
name: { type: 'string' },
languages: { type: 'array', items: { type: 'string' } },
analyzedAt: { type: 'string' },
},
required: ['name', 'languages', 'analyzedAt'],
additionalProperties: false,
},
scenarios: {
type: 'array',
items: {
type: 'object',
properties: {
id: { type: 'string' },
name: { type: 'string' },
description: { type: 'string' },
target: {
type: 'object',
properties: {
module: { type: 'string' },
function: { type: 'string' },
type: { type: 'string', enum: ['function', 'method', 'class'] },
},
required: ['module', 'function', 'type'],
additionalProperties: false,
},
category: { type: 'string', enum: ['unit', 'integration', 'edge-case', 'negative'] },
priority: { type: 'string', enum: ['critical', 'high', 'medium', 'low'] },
setup: {
type: 'object',
properties: {
fixtures: { type: 'array', items: { type: 'string' } },
mocks: {
type: 'array',
items: {
type: 'object',
properties: {
target: { type: 'string' },
returnValue: {},
sideEffect: { type: 'string' },
},
required: ['target'],
additionalProperties: false,
},
},
},
required: ['fixtures', 'mocks'],
additionalProperties: false,
},
input: {
type: 'object',
properties: {
args: { type: 'object' },
kwargs: { type: 'object' },
},
required: ['args'],
additionalProperties: false,
},
assertions: {
type: 'array',
items: {
type: 'object',
properties: {
type: { type: 'string' },
expected: {},
value: {},
path: { type: 'string' },
errorType: { type: 'string' },
messageContains: { type: 'string' },
pattern: { type: 'string' },
description: { type: 'string' },
check: { type: 'string' },
rubric: { type: 'string' },
criteria: { type: 'array', items: { type: 'string' } },
passingThreshold: { type: 'number' },
},
required: ['type'],
additionalProperties: false,
},
},
tags: { type: 'array', items: { type: 'string' } },
},
required: ['id', 'name', 'description', 'target', 'category', 'priority', 'input', 'assertions', 'tags'],
additionalProperties: false,
},
},
grading: {
type: 'object',
properties: {
deterministic: {
type: 'array',
items: {
type: 'object',
properties: {
scenarioId: { type: 'string' },
check: { type: 'string', enum: ['pass', 'fail', 'error'] },
score: { type: 'number' },
},
required: ['scenarioId', 'check', 'score'],
additionalProperties: false,
},
},
rubrics: {
type: 'array',
items: {
type: 'object',
properties: {
scenarioId: { type: 'string' },
criteria: { type: 'string' },
maxScore: { type: 'number' },
},
required: ['scenarioId', 'criteria', 'maxScore'],
additionalProperties: false,
},
},
},
required: ['deterministic', 'rubrics'],
additionalProperties: false,
},
metadata: {
type: 'object',
properties: {
generatedBy: { type: 'string' },
totalTokens: { type: 'number' },
questionsAsked: { type: 'number' },
confidence: { type: 'string', enum: ['low', 'medium', 'high'] },
},
required: ['generatedBy', 'totalTokens', 'questionsAsked', 'confidence'],
additionalProperties: false,
},
},
required: ['version', 'repo', 'scenarios', 'grading', 'metadata'],
additionalProperties: false,
} as const;

145
src/cli/commands/analyze.ts Normal file
View file

@ -0,0 +1,145 @@
import { Command } from 'commander';
import * as path from 'node:path';
import * as fs from 'node:fs/promises';
import { analyze } from '../../introspector/index.js';
import { generateEvalSpec, generateEvalSpecInteractive } from '../../analyzer/index.js';
interface StructuredQuestion {
questions: {
question: string;
header?: string;
options?: {
label: string;
description?: string;
}[];
multiSelect?: boolean;
}[];
}
async function handleQuestion(questionData: string): Promise<string> {
const { default: inquirer } = await import('inquirer');
// Try to parse as structured question
let parsed: StructuredQuestion | null = null;
try {
parsed = JSON.parse(questionData);
} catch {
// Not JSON, treat as plain text
}
if (parsed?.questions && Array.isArray(parsed.questions)) {
const answers: string[] = [];
for (const q of parsed.questions) {
console.log(`\n🤖 ${q.header || 'Question'}:\n`);
if (q.options && q.options.length > 0) {
// Render as selection
const choices = q.options.map(opt => ({
name: opt.description ? `${opt.label} - ${opt.description}` : opt.label,
value: opt.label,
}));
const { selection } = await inquirer.prompt([{
type: q.multiSelect ? 'checkbox' : 'list',
name: 'selection',
message: q.question,
choices,
}]);
answers.push(Array.isArray(selection) ? selection.join(', ') : selection);
} else {
// Plain text input
const { answer } = await inquirer.prompt([{
type: 'input',
name: 'answer',
message: q.question,
}]);
answers.push(answer);
}
}
return answers.join('\n');
}
// Fallback: plain text question
const { answer } = await inquirer.prompt([{
type: 'input',
name: 'answer',
message: `🤖 Claude asks: ${questionData}`,
}]);
return answer;
}
export const analyzeCommand = new Command('analyze')
.description('Analyze a codebase and generate EvalSpec using Claude')
.argument('[path]', 'Path to the repository to analyze', '.')
.option('-o, --output <file>', 'Output file for the EvalSpec JSON')
.option('-i, --interactive', 'Enable interactive mode with clarifying questions')
.option('--focus <modules>', 'Comma-separated list of modules/functions to focus on')
.option('--max-scenarios <n>', 'Maximum number of test scenarios to generate', '10')
.option('--quiet', 'Suppress progress messages')
.action(async (repoPath: string, options: AnalyzeOptions) => {
const absolutePath = path.resolve(repoPath);
const log = options.quiet ? () => {} : console.log;
log(`\n🔬 Analyzing codebase: ${absolutePath}\n`);
try {
log('Step 1: Running tree-sitter introspection...');
const repoSummary = await analyze({
root: absolutePath,
onProgress: options.quiet ? undefined : (msg) => log(` ${msg}`),
});
log(`\nStep 2: Generating EvalSpec with Claude...\n`);
const focus = options.focus?.split(',').map(s => s.trim());
const maxScenarios = parseInt(options.maxScenarios, 10);
let result;
if (options.interactive) {
result = await generateEvalSpecInteractive(
repoSummary,
handleQuestion,
{ focus, maxScenarios }
);
} else {
result = await generateEvalSpec(repoSummary, {
interactive: false,
focus,
maxScenarios,
});
}
const { spec, tokensUsed, questionsAsked } = result;
log('\n✅ EvalSpec generated successfully!');
log(` Scenarios: ${spec.scenarios.length}`);
log(` Tokens used: ${tokensUsed}`);
log(` Questions asked: ${questionsAsked}`);
log(` Confidence: ${spec.metadata.confidence}`);
const json = JSON.stringify(spec, null, 2);
if (options.output) {
await fs.writeFile(options.output, json);
log(`\n📄 Written to: ${options.output}`);
} else {
console.log('\n' + json);
}
} catch (error) {
console.error('\n❌ Error:', error instanceof Error ? error.message : error);
process.exit(1);
}
});
interface AnalyzeOptions {
output?: string;
interactive?: boolean;
focus?: string;
maxScenarios: string;
quiet?: boolean;
}

View file

@ -2,6 +2,7 @@
import { Command } from 'commander';
import { introCommand } from './commands/intro.js';
import { analyzeCommand } from './commands/analyze.js';
const program = new Command();
@ -11,5 +12,6 @@ program
.version('0.1.0');
program.addCommand(introCommand);
program.addCommand(analyzeCommand);
program.parse(process.argv);

View file

@ -1 +1,2 @@
export * from './introspector/index.js';
export * from './analyzer/index.js';

View file

@ -3,26 +3,56 @@ import TypeScriptLang from 'tree-sitter-typescript';
import { BaseParser } from './base.js';
import type { ModuleInfo, ExportInfo } from '../types.js';
const { typescript: TypeScript } = TypeScriptLang;
const { typescript: TypeScript, tsx: TSX } = TypeScriptLang;
export class TypeScriptParser extends BaseParser {
readonly language = 'typescript';
private parser: Parser;
private tsParser: Parser;
private tsxParser: Parser;
constructor() {
super();
this.parser = new Parser();
this.parser.setLanguage(TypeScript);
this.tsParser = new Parser();
this.tsParser.setLanguage(TypeScript);
this.tsxParser = new Parser();
this.tsxParser.setLanguage(TSX);
}
parse(source: string, filePath: string): ModuleInfo {
const tree = this.parser.parse(source);
const isTsx = filePath.endsWith('.tsx') || filePath.endsWith('.jsx');
const parser = isTsx ? this.tsxParser : this.tsParser;
let tree: Parser.Tree;
try {
tree = parser.parse(source);
} catch (error) {
return this.createEmptyModule(filePath, `Parse error: ${error}`);
}
const rootNode = tree.rootNode;
if (rootNode.hasError) {
const errorCount = this.countErrors(rootNode);
if (errorCount > 10) {
return this.createEmptyModule(filePath, `Too many syntax errors (${errorCount})`);
}
}
const exports: ExportInfo[] = [];
const imports: string[] = [];
this.walkNode(rootNode, source, exports, imports, false);
try {
this.walkNode(rootNode, source, exports, imports, false, 0);
} catch (error) {
return {
path: filePath,
exports,
imports: [...new Set(imports)],
complexity: this.calculateComplexity(exports.length),
};
}
return {
path: filePath,
@ -32,13 +62,34 @@ export class TypeScriptParser extends BaseParser {
};
}
private createEmptyModule(path: string, reason: string): ModuleInfo {
return {
path,
exports: [],
imports: [],
complexity: 'low',
};
}
private countErrors(node: Parser.SyntaxNode): number {
let count = node.type === 'ERROR' ? 1 : 0;
for (const child of node.children) {
count += this.countErrors(child);
}
return count;
}
private walkNode(
node: Parser.SyntaxNode,
source: string,
exports: ExportInfo[],
imports: string[],
isExported: boolean
isExported: boolean,
depth: number
): void {
if (depth > 50) return;
if (node.type === 'ERROR') return;
switch (node.type) {
case 'function_declaration':
exports.push(this.extractFunction(node, source, isExported));
@ -59,9 +110,8 @@ export class TypeScriptParser extends BaseParser {
break;
case 'export_statement':
// Recurse with isExported = true
for (const child of node.children) {
this.walkNode(child, source, exports, imports, true);
this.walkNode(child, source, exports, imports, true, depth + 1);
}
break;
@ -70,9 +120,24 @@ export class TypeScriptParser extends BaseParser {
break;
case 'program':
// Recurse into top-level statements
for (const child of node.children) {
this.walkNode(child, source, exports, imports, false);
this.walkNode(child, source, exports, imports, false, depth + 1);
}
break;
case 'export_clause':
for (const child of node.children) {
if (child.type === 'export_specifier') {
const nameNode = child.childForFieldName('name') || child.firstChild;
if (nameNode && nameNode.type === 'identifier') {
exports.push({
name: this.getText(source, nameNode.startIndex, nameNode.endIndex),
kind: 'constant',
lineNumber: child.startPosition.row + 1,
isExported: true,
});
}
}
}
break;
}
@ -85,7 +150,6 @@ export class TypeScriptParser extends BaseParser {
const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
// Build signature
let signature = '';
if (paramsNode) {
signature = this.getText(source, paramsNode.startIndex, paramsNode.endIndex);
@ -94,7 +158,6 @@ export class TypeScriptParser extends BaseParser {
signature += `: ${this.getText(source, returnTypeNode.startIndex, returnTypeNode.endIndex)}`;
}
// Check for async
const isAsync = node.children.some(c => c.type === 'async');
return {
@ -111,7 +174,6 @@ export class TypeScriptParser extends BaseParser {
const nameNode = node.childForFieldName('name');
const name = nameNode ? this.getText(source, nameNode.startIndex, nameNode.endIndex) : 'unknown';
// Get heritage clause for extends/implements
let signature: string | undefined;
const heritageNode = node.children.find(c => c.type === 'class_heritage');
if (heritageNode) {
@ -138,7 +200,6 @@ export class TypeScriptParser extends BaseParser {
if (nameNode) {
const name = this.getText(source, nameNode.startIndex, nameNode.endIndex);
// Check if it's a function expression or arrow function
const isFunction = valueNode && (
valueNode.type === 'arrow_function' ||
valueNode.type === 'function_expression' ||
@ -176,7 +237,6 @@ export class TypeScriptParser extends BaseParser {
for (const child of node.children) {
if (child.type === 'string') {
// Remove quotes from the import path
const importPath = this.getText(source, child.startIndex, child.endIndex)
.replace(/^["']|["']$/g, '');
imports.push(importPath);