import { Command } from 'commander'; import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs'; import { join, resolve } from 'path'; import { analyze } from '../../introspector/index.js'; import { generateEvalSpec, generateEvalSpecInteractive } from '../../analyzer/index.js'; import { renderSpec, detectFramework as detectRenderFramework } from '../../renderers/index.js'; import { runTests, formatResults, DEFAULT_SANDBOX_CONFIG } from '../../runners/index.js'; import { createTracer, saveTrace } from '../../observability/index.js'; import { generatePromptfooConfig, generateTestProvider } from '../../promptfoo/index.js'; import type { EvalSpec } from '../../analyzer/types.js'; const EVALUCLAUDE_DIR = '.evaluclaude'; interface PipelineOptions { output?: string; interactive?: boolean; focus?: string; maxScenarios: string; testDir: string; framework?: string; skipAnalyze?: boolean; skipRender?: boolean; skipRun?: boolean; promptfoo?: boolean; quiet?: boolean; } export const pipelineCommand = new Command('pipeline') .description('Run the full eval generation pipeline: introspect โ†’ analyze โ†’ render โ†’ run') .argument('[path]', 'Path to the repository to analyze', '.') .option('-o, --output ', 'Output directory for all artifacts', '.evaluclaude') .option('-i, --interactive', 'Enable interactive mode with clarifying questions') .option('--focus ', 'Comma-separated list of modules/functions to focus on') .option('--max-scenarios ', 'Maximum number of test scenarios to generate', '10') .option('--test-dir ', 'Directory for generated tests', './tests/generated') .option('-f, --framework ', 'Test framework (pytest, vitest, jest)') .option('--skip-analyze', 'Skip analysis, use existing spec') .option('--skip-render', 'Skip rendering, use existing tests') .option('--skip-run', 'Skip test execution') .option('--promptfoo', 'Generate Promptfoo configuration for UI viewing') .option('--quiet', 'Suppress progress messages') .action(async (repoPath: string, options: PipelineOptions) => { const absolutePath = resolve(repoPath); const log = options.quiet ? () => {} : console.log; const outputDir = options.output || EVALUCLAUDE_DIR; console.log('\n๐Ÿš€ Evaluclaude Pipeline'); console.log('โ•'.repeat(50)); console.log(` Repository: ${absolutePath}`); console.log(` Output: ${outputDir}`); console.log('โ•'.repeat(50) + '\n'); // Ensure output directories exist mkdirSync(outputDir, { recursive: true }); mkdirSync(options.testDir, { recursive: true }); const specPath = join(outputDir, 'spec.json'); const tracesDir = join(outputDir, 'traces'); const resultsDir = join(outputDir, 'results'); mkdirSync(tracesDir, { recursive: true }); mkdirSync(resultsDir, { recursive: true }); let spec: EvalSpec; // Step 1: Introspection + Analysis if (options.skipAnalyze && existsSync(specPath)) { log('๐Ÿ“‹ Using existing EvalSpec...'); spec = JSON.parse(readFileSync(specPath, 'utf-8')); log(` Loaded: ${specPath} (${spec.scenarios.length} scenarios)\n`); } else { log('๐Ÿ”ฌ Step 1: Introspecting codebase...'); try { const repoSummary = await analyze({ root: absolutePath, onProgress: options.quiet ? undefined : (msg) => log(` ${msg}`), }); log(` Files: ${repoSummary.files.length}`); log(` Languages: ${repoSummary.languages.join(', ')}`); log(''); log('๐Ÿค– Step 2: Generating EvalSpec with Claude...\n'); const focus = options.focus?.split(',').map(s => s.trim()); const maxScenarios = parseInt(options.maxScenarios, 10); let result; if (options.interactive) { const { default: inquirer } = await import('inquirer'); result = await generateEvalSpecInteractive( repoSummary, async (question: string) => { const { answer } = await inquirer.prompt([{ type: 'input', name: 'answer', message: `๐Ÿค– Claude asks: ${question}`, }]); return answer; }, { focus, maxScenarios } ); } else { result = await generateEvalSpec(repoSummary, { interactive: false, focus, maxScenarios, }); } spec = result.spec; // Save the spec writeFileSync(specPath, JSON.stringify(spec, null, 2)); log(`\nโœ… EvalSpec generated!`); log(` Scenarios: ${spec.scenarios.length}`); log(` Tokens: ${result.tokensUsed}`); log(` Saved: ${specPath}\n`); } catch (error) { console.error('\nโŒ Analysis failed:', error instanceof Error ? error.message : error); process.exit(1); } } // Step 2: Render tests if (!options.skipRender) { log('๐Ÿ“ Step 3: Rendering test files...'); try { const framework = (options.framework as 'pytest' | 'vitest' | 'jest') || detectRenderFramework(spec); const renderResult = await renderSpec(spec, { outputDir: options.testDir, framework, includeFixtures: true, generateMocks: true, dryRun: false, }); log(` Framework: ${framework}`); log(` Files: ${renderResult.stats.fileCount}`); log(` Scenarios: ${renderResult.stats.scenarioCount}`); log(` Assertions: ${renderResult.stats.assertionCount}`); log(` Output: ${options.testDir}\n`); } catch (error) { console.error('\nโŒ Rendering failed:', error instanceof Error ? error.message : error); process.exit(1); } } // Step 3: Run tests if (!options.skipRun) { log('๐Ÿงช Step 4: Running tests...\n'); try { const framework = (options.framework as 'pytest' | 'vitest' | 'jest') || detectRenderFramework(spec); const tracer = createTracer(spec.repo.name); tracer.recordIntrospection({ filesAnalyzed: spec.scenarios.map(s => s.target.module), totalFunctions: spec.scenarios.length, duration: 0, }); tracer.recordGeneration({ scenariosGenerated: spec.scenarios.length, filesWritten: [options.testDir], }); const result = await runTests( options.testDir, { framework, sandbox: true, timeout: 300000, parallel: false, cwd: process.cwd(), }, DEFAULT_SANDBOX_CONFIG ); tracer.recordExecution({ testsPassed: result.summary.passed, testsFailed: result.summary.failed, testsSkipped: result.summary.skipped, }); for (const test of result.tests) { if (test.status === 'failed' || test.status === 'error') { tracer.recordTestFailure({ scenarioId: test.id, testName: test.name, error: test.error?.message || 'Unknown error', stack: test.error?.stack, }); } } const trace = tracer.finalize(); const tracePath = await saveTrace(trace); log(formatResults(result)); log(`๐Ÿ“Š Trace saved: ${tracePath}`); log(` View with: evaluclaude view ${trace.id}\n`); // Save results const resultsPath = join(resultsDir, `run-${Date.now()}.json`); writeFileSync(resultsPath, JSON.stringify(result, null, 2)); } catch (error) { console.error('\nโŒ Test execution failed:', error instanceof Error ? error.message : error); process.exit(1); } } // Step 4: Generate Promptfoo config if (options.promptfoo) { log('๐Ÿ“ฆ Step 5: Generating Promptfoo configuration...'); try { const configPath = join(outputDir, 'promptfooconfig.yaml'); const providerPath = join(outputDir, 'providers', 'test-runner.py'); const framework = (options.framework as 'pytest' | 'vitest' | 'jest') || detectRenderFramework(spec); await generatePromptfooConfig(spec, { testDir: options.testDir, outputPath: configPath, framework, includeTraceLinks: true, providerPath, }); await generateTestProvider(providerPath); log(` Config: ${configPath}`); log(` Provider: ${providerPath}`); log(`\n Launch UI with: evaluclaude ui\n`); } catch (error) { console.error('\nโŒ Promptfoo config generation failed:', error instanceof Error ? error.message : error); } } console.log('โ•'.repeat(50)); console.log('โœ… Pipeline complete!'); console.log('โ•'.repeat(50)); console.log(`\nNext steps:`); console.log(` View traces: evaluclaude view --last`); console.log(` List all traces: evaluclaude traces`); if (options.promptfoo) { console.log(` Launch UI: evaluclaude ui`); console.log(` Run Promptfoo: evaluclaude eval --spec ${specPath}`); } console.log(''); });