analyze

2026-04-17 12:04:13 +00:00 · 2026-01-11 17:50:26 -05:00 · 2026-01-11 17:50:26 -05:00 · 9297f0b1ee
commit 9297f0b1ee
parent 4b24606d0e
13 changed files with 1292 additions and 16 deletions
--- a/src/analyzer/index.ts
+++ b/src/analyzer/index.ts
@ -0,0 +1,12 @@
+export { generateEvalSpec, generateEvalSpecInteractive, generateEvalSpecNonInteractive } from './spec-generator.js';
+export type { GenerateResult, GenerateOptions } from './spec-generator.js';
+export type {
+  EvalSpec,
+  EvalScenario,
+  Assertion,
+  MockSpec,
+  DeterministicGrade,
+  RubricGrade,
+} from './types.js';
+export { EVAL_SPEC_JSON_SCHEMA } from './types.js';
+export { buildSystemPrompt, buildUserPrompt, optimizeForPrompt } from './prompt-builder.js';
--- a/src/analyzer/prompt-builder.ts
+++ b/src/analyzer/prompt-builder.ts
@ -0,0 +1,112 @@
+import * as fs from 'node:fs/promises';
+import * as path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import type { RepoSummary } from '../introspector/types.js';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const PROMPTS_DIR = path.join(__dirname, '../../prompts');
+
+export interface PromptConfig {
+  repoSummary: RepoSummary;
+  focus?: string[];
+  maxScenarios?: number;
+}
+
+export async function loadPrompt(name: string): Promise<string> {
+  const filePath = path.join(PROMPTS_DIR, `${name}.md`);
+  return fs.readFile(filePath, 'utf-8');
+}
+
+export async function buildSystemPrompt(): Promise<string> {
+  const system = await loadPrompt('analyzer-system');
+  const developer = await loadPrompt('analyzer-developer');
+  return `${system}\n\n${developer}`;
+}
+
+export async function buildUserPrompt(config: PromptConfig): Promise<string> {
+  const template = await loadPrompt('analyzer-user');
+  
+  const optimizedSummary = optimizeForPrompt(config.repoSummary);
+  const summaryJson = JSON.stringify(optimizedSummary, null, 2);
+  
+  const focusInstructions = config.focus?.length
+    ? `Focus specifically on these modules/functions: ${config.focus.join(', ')}`
+    : 'Analyze the entire codebase and identify the most important testable functions.';
+  
+  const maxScenarios = config.maxScenarios ?? 10;
+  
+  return template
+    .replace('{{REPO_SUMMARY}}', summaryJson)
+    .replace('{{FOCUS_INSTRUCTIONS}}', focusInstructions)
+    .replace('{{MAX_SCENARIOS}}', String(maxScenarios));
+}
+
+export function optimizeForPrompt(summary: RepoSummary): OptimizedRepoSummary {
+  return {
+    name: path.basename(summary.root),
+    languages: summary.languages,
+    analyzedAt: summary.analyzedAt,
+    
+    modules: summary.modules.map(m => ({
+      path: m.path,
+      complexity: m.complexity,
+      exports: m.exports.map(e => ({
+        name: e.name,
+        kind: e.kind,
+        signature: e.signature,
+        docstring: e.docstring,
+        line: e.lineNumber,
+        async: e.isAsync,
+      })).filter(e => !e.name.startsWith('_')),
+      imports: m.imports.slice(0, 10),
+    })).filter(m => m.exports.length > 0),
+    
+    config: {
+      python: summary.config.python ? {
+        testFramework: summary.config.python.testFramework,
+        hasTyping: summary.config.python.hasTyping,
+      } : undefined,
+      typescript: summary.config.typescript ? {
+        testFramework: summary.config.typescript.testFramework,
+        hasTypes: summary.config.typescript.hasTypes,
+      } : undefined,
+    },
+    
+    git: summary.git ? {
+      branch: summary.git.branch,
+      activeFiles: summary.git.fileHistory
+        ?.sort((a, b) => b.commitCount - a.commitCount)
+        .slice(0, 10)
+        .map(f => ({ path: f.path, commits: f.commitCount })),
+    } : undefined,
+  };
+}
+
+export interface OptimizedRepoSummary {
+  name: string;
+  languages: string[];
+  analyzedAt: string;
+  modules: OptimizedModule[];
+  config: {
+    python?: { testFramework: string; hasTyping: boolean };
+    typescript?: { testFramework: string; hasTypes: boolean };
+  };
+  git?: {
+    branch: string;
+    activeFiles?: { path: string; commits: number }[];
+  };
+}
+
+interface OptimizedModule {
+  path: string;
+  complexity: string;
+  exports: {
+    name: string;
+    kind: string;
+    signature?: string;
+    docstring?: string;
+    line: number;
+    async?: boolean;
+  }[];
+  imports: string[];
+}
--- a/src/analyzer/spec-generator.ts
+++ b/src/analyzer/spec-generator.ts
@ -0,0 +1,174 @@
+import { query, type SDKMessage, type Options, type CanUseTool, type PermissionResult } from '@anthropic-ai/claude-agent-sdk';
+import type { RepoSummary } from '../introspector/types.js';
+import type { EvalSpec } from './types.js';
+import { buildSystemPrompt, buildUserPrompt } from './prompt-builder.js';
+import { EVAL_SPEC_JSON_SCHEMA } from './types.js';
+
+export interface GenerateOptions {
+  interactive?: boolean;
+  onQuestion?: (question: string) => Promise<string>;
+  focus?: string[];
+  maxScenarios?: number;
+}
+
+export interface GenerateResult {
+  spec: EvalSpec;
+  tokensUsed: number;
+  questionsAsked: number;
+}
+
+export async function generateEvalSpec(
+  repoSummary: RepoSummary,
+  options: GenerateOptions = {}
+): Promise<GenerateResult> {
+  const { interactive = false, onQuestion, focus, maxScenarios = 10 } = options;
+
+  const systemPrompt = await buildSystemPrompt();
+  const userPrompt = await buildUserPrompt({
+    repoSummary,
+    focus,
+    maxScenarios,
+  });
+
+  let tokensUsed = 0;
+  let questionsAsked = 0;
+  let spec: EvalSpec | null = null;
+
+  const canUseTool: CanUseTool = async (toolName, input): Promise<PermissionResult> => {
+    if (toolName === 'AskUserQuestion' && interactive && onQuestion) {
+      // Extract question from various possible field names
+      const inputObj = input as Record<string, unknown>;
+      const question = String(
+        inputObj.question || 
+        inputObj.text || 
+        inputObj.message || 
+        inputObj.prompt ||
+        JSON.stringify(input)
+      );
+      
+      const answer = await onQuestion(question);
+      questionsAsked++;
+      return {
+        behavior: 'allow',
+        updatedInput: { ...input, answer },
+      };
+    }
+    // Allow all other tools in interactive mode
+    return { behavior: 'allow' };
+  };
+
+  const queryOptions: Options = {
+    // In interactive mode, allow all tools; in non-interactive, restrict to none
+    tools: interactive 
+      ? { type: 'preset', preset: 'claude_code' }
+      : [],
+    permissionMode: 'bypassPermissions',
+    allowDangerouslySkipPermissions: true,
+    outputFormat: {
+      type: 'json_schema',
+      schema: EVAL_SPEC_JSON_SCHEMA,
+    },
+    canUseTool: interactive ? canUseTool : undefined,
+  };
+
+  const fullPrompt = `${systemPrompt}\n\n---\n\n${userPrompt}`;
+
+  for await (const message of query({ prompt: fullPrompt, options: queryOptions })) {
+    handleMessage(message);
+    
+    if (message.type === 'result') {
+      if (message.subtype === 'success') {
+        // SDK returns parsed JSON in structured_output when outputFormat is set
+        const structuredOutput = (message as { structured_output?: unknown }).structured_output;
+        const resultData = structuredOutput ?? message.result;
+        spec = parseResult(resultData);
+        tokensUsed = (message.usage?.input_tokens ?? 0) + (message.usage?.output_tokens ?? 0);
+      } else {
+        throw new Error(`Generation failed: ${message.subtype}`);
+      }
+    }
+  }
+
+  if (!spec) {
+    throw new Error('Failed to generate EvalSpec: no result received');
+  }
+
+  spec.metadata = {
+    ...spec.metadata,
+    generatedBy: 'evaluclaude-harness',
+    totalTokens: tokensUsed,
+    questionsAsked,
+  };
+
+  return { spec, tokensUsed, questionsAsked };
+}
+
+function parseResult(result: unknown): EvalSpec {
+  if (typeof result === 'string') {
+    let jsonStr = result.trim();
+    
+    // Try to extract JSON from markdown code blocks
+    const jsonMatch = jsonStr.match(/```(?:json)?\s*([\s\S]*?)```/);
+    if (jsonMatch) {
+      jsonStr = jsonMatch[1].trim();
+    }
+    
+    // Try to find JSON object in the string
+    const startIdx = jsonStr.indexOf('{');
+    const endIdx = jsonStr.lastIndexOf('}');
+    if (startIdx !== -1 && endIdx !== -1 && endIdx > startIdx) {
+      jsonStr = jsonStr.slice(startIdx, endIdx + 1);
+    }
+    
+    try {
+      return JSON.parse(jsonStr) as EvalSpec;
+    } catch (e) {
+      console.error('Raw result:', result);
+      throw new Error(`Failed to parse result as JSON: ${e}`);
+    }
+  }
+  
+  if (result && typeof result === 'object') {
+    return result as EvalSpec;
+  }
+  
+  throw new Error(`Unexpected result type: ${typeof result}`);
+}
+
+function handleMessage(message: SDKMessage): void {
+  switch (message.type) {
+    case 'assistant':
+      if (message.message?.content) {
+        for (const block of message.message.content) {
+          if (block.type === 'text') {
+            process.stderr.write(`\n${block.text}\n`);
+          }
+        }
+      }
+      break;
+    case 'result':
+      if (message.subtype !== 'success') {
+        console.error('Error:', message.subtype);
+      }
+      break;
+  }
+}
+
+export async function generateEvalSpecNonInteractive(
+  repoSummary: RepoSummary,
+  options: Omit<GenerateOptions, 'interactive' | 'onQuestion'> = {}
+): Promise<GenerateResult> {
+  return generateEvalSpec(repoSummary, { ...options, interactive: false });
+}
+
+export async function generateEvalSpecInteractive(
+  repoSummary: RepoSummary,
+  questionHandler: (question: string) => Promise<string>,
+  options: Omit<GenerateOptions, 'interactive' | 'onQuestion'> = {}
+): Promise<GenerateResult> {
+  return generateEvalSpec(repoSummary, {
+    ...options,
+    interactive: true,
+    onQuestion: questionHandler,
+  });
+}
--- a/src/analyzer/types.ts
+++ b/src/analyzer/types.ts
@ -0,0 +1,263 @@
+export interface EvalSpec {
+  version: '1.0';
+  repo: {
+    name: string;
+    languages: string[];
+    analyzedAt: string;
+  };
+  scenarios: EvalScenario[];
+  grading: {
+    deterministic: DeterministicGrade[];
+    rubrics: RubricGrade[];
+  };
+  metadata: {
+    generatedBy: string;
+    totalTokens: number;
+    questionsAsked: number;
+    confidence: 'low' | 'medium' | 'high';
+  };
+}
+
+export interface EvalScenario {
+  id: string;
+  name: string;
+  description: string;
+  target: {
+    module: string;
+    function: string;
+    type: 'function' | 'method' | 'class';
+  };
+  category: 'unit' | 'integration' | 'edge-case' | 'negative';
+  priority: 'critical' | 'high' | 'medium' | 'low';
+  setup?: {
+    fixtures: string[];
+    mocks: MockSpec[];
+  };
+  input: {
+    args: Record<string, unknown>;
+    kwargs?: Record<string, unknown>;
+  };
+  assertions: Assertion[];
+  tags: string[];
+}
+
+export interface MockSpec {
+  target: string;
+  returnValue?: unknown;
+  sideEffect?: string;
+}
+
+export type Assertion =
+  | EqualsAssertion
+  | ContainsAssertion
+  | ThrowsAssertion
+  | TypeAssertion
+  | MatchesAssertion
+  | TruthyAssertion
+  | CustomAssertion
+  | LLMRubricAssertion;
+
+export interface LLMRubricAssertion extends BaseAssertion {
+  type: 'llm-rubric';
+  rubric: string;
+  criteria: string[];
+  passingThreshold?: number;
+}
+
+export interface BaseAssertion {
+  description?: string;
+}
+
+export interface EqualsAssertion extends BaseAssertion {
+  type: 'equals';
+  expected: unknown;
+  path?: string;
+}
+
+export interface ContainsAssertion extends BaseAssertion {
+  type: 'contains';
+  value: unknown;
+  path?: string;
+}
+
+export interface ThrowsAssertion extends BaseAssertion {
+  type: 'throws';
+  errorType?: string;
+  messageContains?: string;
+}
+
+export interface TypeAssertion extends BaseAssertion {
+  type: 'typeof';
+  expected: 'string' | 'number' | 'boolean' | 'object' | 'array' | 'null' | 'undefined';
+  path?: string;
+}
+
+export interface MatchesAssertion extends BaseAssertion {
+  type: 'matches';
+  pattern: string;
+  path?: string;
+}
+
+export interface TruthyAssertion extends BaseAssertion {
+  type: 'truthy' | 'falsy';
+  path?: string;
+}
+
+export interface CustomAssertion {
+  type: 'custom';
+  description: string;
+  check: string;
+}
+
+export interface DeterministicGrade {
+  scenarioId: string;
+  check: 'pass' | 'fail' | 'error';
+  score: number;
+}
+
+export interface RubricGrade {
+  scenarioId: string;
+  criteria: string;
+  maxScore: number;
+}
+
+export const EVAL_SPEC_JSON_SCHEMA = {
+  type: 'object',
+  properties: {
+    version: { type: 'string', const: '1.0' },
+    repo: {
+      type: 'object',
+      properties: {
+        name: { type: 'string' },
+        languages: { type: 'array', items: { type: 'string' } },
+        analyzedAt: { type: 'string' },
+      },
+      required: ['name', 'languages', 'analyzedAt'],
+      additionalProperties: false,
+    },
+    scenarios: {
+      type: 'array',
+      items: {
+        type: 'object',
+        properties: {
+          id: { type: 'string' },
+          name: { type: 'string' },
+          description: { type: 'string' },
+          target: {
+            type: 'object',
+            properties: {
+              module: { type: 'string' },
+              function: { type: 'string' },
+              type: { type: 'string', enum: ['function', 'method', 'class'] },
+            },
+            required: ['module', 'function', 'type'],
+            additionalProperties: false,
+          },
+          category: { type: 'string', enum: ['unit', 'integration', 'edge-case', 'negative'] },
+          priority: { type: 'string', enum: ['critical', 'high', 'medium', 'low'] },
+          setup: {
+            type: 'object',
+            properties: {
+              fixtures: { type: 'array', items: { type: 'string' } },
+              mocks: {
+                type: 'array',
+                items: {
+                  type: 'object',
+                  properties: {
+                    target: { type: 'string' },
+                    returnValue: {},
+                    sideEffect: { type: 'string' },
+                  },
+                  required: ['target'],
+                  additionalProperties: false,
+                },
+              },
+            },
+            required: ['fixtures', 'mocks'],
+            additionalProperties: false,
+          },
+          input: {
+            type: 'object',
+            properties: {
+              args: { type: 'object' },
+              kwargs: { type: 'object' },
+            },
+            required: ['args'],
+            additionalProperties: false,
+          },
+          assertions: {
+            type: 'array',
+            items: {
+              type: 'object',
+              properties: {
+                type: { type: 'string' },
+                expected: {},
+                value: {},
+                path: { type: 'string' },
+                errorType: { type: 'string' },
+                messageContains: { type: 'string' },
+                pattern: { type: 'string' },
+                description: { type: 'string' },
+                check: { type: 'string' },
+                rubric: { type: 'string' },
+                criteria: { type: 'array', items: { type: 'string' } },
+                passingThreshold: { type: 'number' },
+              },
+              required: ['type'],
+              additionalProperties: false,
+            },
+          },
+          tags: { type: 'array', items: { type: 'string' } },
+        },
+        required: ['id', 'name', 'description', 'target', 'category', 'priority', 'input', 'assertions', 'tags'],
+        additionalProperties: false,
+      },
+    },
+    grading: {
+      type: 'object',
+      properties: {
+        deterministic: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              scenarioId: { type: 'string' },
+              check: { type: 'string', enum: ['pass', 'fail', 'error'] },
+              score: { type: 'number' },
+            },
+            required: ['scenarioId', 'check', 'score'],
+            additionalProperties: false,
+          },
+        },
+        rubrics: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              scenarioId: { type: 'string' },
+              criteria: { type: 'string' },
+              maxScore: { type: 'number' },
+            },
+            required: ['scenarioId', 'criteria', 'maxScore'],
+            additionalProperties: false,
+          },
+        },
+      },
+      required: ['deterministic', 'rubrics'],
+      additionalProperties: false,
+    },
+    metadata: {
+      type: 'object',
+      properties: {
+        generatedBy: { type: 'string' },
+        totalTokens: { type: 'number' },
+        questionsAsked: { type: 'number' },
+        confidence: { type: 'string', enum: ['low', 'medium', 'high'] },
+      },
+      required: ['generatedBy', 'totalTokens', 'questionsAsked', 'confidence'],
+      additionalProperties: false,
+    },
+  },
+  required: ['version', 'repo', 'scenarios', 'grading', 'metadata'],
+  additionalProperties: false,
+} as const;