From 69c08c9d6bdec8af7d5ae81351aab2fe315096b1 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Sun, 11 Jan 2026 20:38:57 -0500 Subject: [PATCH] ui polish --- README.md | 202 +++++++++++++++++ src/cli/commands/analyze.ts | 97 ++++++-- src/cli/commands/grade.ts | 109 +++++++-- src/cli/commands/intro.ts | 102 +++++---- src/cli/commands/pipeline.ts | 196 +++++++++++----- src/cli/commands/render.ts | 66 ++++-- src/cli/commands/run.ts | 86 +++++-- src/cli/commands/ui.ts | 176 ++++++++++----- src/cli/commands/view.ts | 76 +++++-- src/cli/index.ts | 46 +++- src/cli/theme.ts | 357 ++++++++++++++++++++++++++++++ src/observability/trace-viewer.ts | 225 ++++++++++++------- 12 files changed, 1430 insertions(+), 308 deletions(-) create mode 100644 README.md create mode 100644 src/cli/theme.ts diff --git a/README.md b/README.md new file mode 100644 index 0000000..f251ac7 --- /dev/null +++ b/README.md @@ -0,0 +1,202 @@ +# evaluclaude + +> **Zero-to-evals in one command.** Claude analyzes your codebase and generates functional tests. + +![Version](https://img.shields.io/badge/version-0.1.0-blue) +![Node](https://img.shields.io/badge/node-%3E%3D18.0.0-green) +![License](https://img.shields.io/badge/license-MIT-brightgreen) + +## What is this? + +**evaluclaude** is a CLI tool that uses Claude to understand your codebase and generate real, runnable functional tests. Unlike traditional test generators that produce boilerplate, evaluclaude: + +- **Parses your code** with tree-sitter (no LLM tokens wasted on structure) +- **Asks smart questions** to understand your testing priorities +- **Generates specs, not code** — deterministic renderers create the actual tests +- **Full observability** — every run produces a trace you can inspect + +## Quick Start + +```bash +# Install +npm install -g evaluclaude-harness + +# Run the full pipeline +evaluclaude pipeline . + +# Or step by step +evaluclaude intro . # Introspect codebase +evaluclaude analyze . -o spec.json -i # Generate spec (interactive) +evaluclaude render spec.json # Create test files +evaluclaude run # Execute tests +``` + +## How It Works + +``` +┌─────────────────────────────────────────────────────────┐ +│ evaluclaude pipeline │ +├─────────────────────────────────────────────────────────┤ +│ │ +│ 1. INTROSPECT Parse code with tree-sitter │ +│ 📂 → 📋 Extract functions, classes │ +│ │ +│ 2. ANALYZE Claude generates EvalSpec │ +│ 📋 → 🧠 Asks clarifying questions │ +│ │ +│ 3. RENDER Deterministic code generation │ +│ 🧠 → 📄 pytest / vitest / jest │ +│ │ +│ 4. RUN Execute in sandbox │ +│ 📄 → 🧪 Collect results + traces │ +│ │ +└─────────────────────────────────────────────────────────┘ +``` + +## Commands + +### Core Pipeline + +| Command | Description | +|---------|-------------| +| `pipeline [path]` | Run the full pipeline: introspect → analyze → render → run | +| `intro [path]` | Introspect codebase with tree-sitter | +| `analyze [path]` | Generate EvalSpec with Claude | +| `render ` | Render EvalSpec to test files | +| `run [test-dir]` | Execute tests and collect results | + +### Grading & Rubrics + +| Command | Description | +|---------|-------------| +| `grade ` | Grade output using LLM rubric | +| `rubrics` | List available rubrics | +| `calibrate` | Calibrate rubric against examples | + +### Observability + +| Command | Description | +|---------|-------------| +| `view [trace-id]` | View trace details | +| `traces` | List all traces | +| `ui` | Launch Promptfoo dashboard | +| `eval` | Run Promptfoo evaluations | + +## Examples + +### Analyze a Python project interactively + +```bash +evaluclaude analyze ./my-python-project -i -o spec.json +``` + +Claude will ask questions like: +- "I see 3 database models. Which is the core domain object?" +- "Found 47 utility functions. Want me to prioritize the most-used ones?" + +### Focus on specific modules + +```bash +evaluclaude pipeline . --focus auth,payments --max-scenarios 20 +``` + +### View test results in browser + +```bash +evaluclaude run --export-promptfoo +evaluclaude ui +``` + +### Skip steps in the pipeline + +```bash +# Use existing spec, just run tests +evaluclaude pipeline . --skip-analyze --skip-render + +# Generate tests without running +evaluclaude pipeline . --skip-run +``` + +## Configuration + +### Environment Variables + +| Variable | Description | +|----------|-------------| +| `ANTHROPIC_API_KEY` | Your Anthropic API key | + +### Output Structure + +``` +.evaluclaude/ +├── spec.json # Generated EvalSpec +├── traces/ # Execution traces +│ └── trace-xxx.json +├── results/ # Test results +│ └── run-xxx.json +└── promptfooconfig.yaml # Promptfoo config (with --promptfoo) +``` + +## Rubrics + +Create custom grading rubrics in YAML: + +```yaml +# rubrics/my-rubric.yaml +name: my-rubric +description: Custom quality checks +passingThreshold: 0.7 + +criteria: + - name: correctness + description: Code produces correct results + weight: 0.5 + - name: clarity + description: Code is clear and readable + weight: 0.3 + - name: efficiency + description: Code is reasonably efficient + weight: 0.2 +``` + +Use it: +```bash +evaluclaude grade output.txt -r my-rubric +``` + +## Architecture + +evaluclaude follows key principles: + +1. **Tree-sitter for introspection** — Never send raw code to Claude for structure extraction +2. **Claude generates specs, not code** — EvalSpec JSON is LLM output; test code is deterministic +3. **Functional tests only** — Every test must invoke actual code, no syntax checks +4. **Full observability** — Every eval run produces an inspectable trace + +## Supported Languages + +| Language | Parser | Test Framework | +|----------|--------|----------------| +| Python | tree-sitter-python | pytest | +| TypeScript | tree-sitter-typescript | vitest, jest | +| JavaScript | tree-sitter-typescript | vitest, jest | + +## Development + +```bash +# Build +npm run build + +# Run in dev mode +npm run dev + +# Run tests +npm test + +# Type check +npm run typecheck +``` + +## License + +MIT diff --git a/src/cli/commands/analyze.ts b/src/cli/commands/analyze.ts index a3ce09e..1355f8e 100644 --- a/src/cli/commands/analyze.ts +++ b/src/cli/commands/analyze.ts @@ -3,6 +3,18 @@ import * as path from 'node:path'; import * as fs from 'node:fs/promises'; import { analyze } from '../../introspector/index.js'; import { generateEvalSpec, generateEvalSpecInteractive } from '../../analyzer/index.js'; +import { + style, + icons, + header, + step, + keyValue, + Spinner, + formatError, + nextSteps, + box, + BANNER_MINIMAL, +} from '../theme.js'; interface StructuredQuestion { questions: { @@ -19,7 +31,6 @@ interface StructuredQuestion { async function handleQuestion(questionData: string): Promise { const { default: inquirer } = await import('inquirer'); - // Try to parse as structured question let parsed: StructuredQuestion | null = null; try { parsed = JSON.parse(questionData); @@ -31,29 +42,27 @@ async function handleQuestion(questionData: string): Promise { const answers: string[] = []; for (const q of parsed.questions) { - console.log(`\n🤖 ${q.header || 'Question'}:\n`); + console.log(`\n${style.highlight(icons.brain)} ${style.bold(q.header || 'Question')}:\n`); if (q.options && q.options.length > 0) { - // Render as selection const choices = q.options.map(opt => ({ - name: opt.description ? `${opt.label} - ${opt.description}` : opt.label, + name: opt.description ? `${style.bold(opt.label)} ${style.dim('─')} ${opt.description}` : opt.label, value: opt.label, })); const { selection } = await inquirer.prompt([{ type: q.multiSelect ? 'checkbox' : 'list', name: 'selection', - message: q.question, + message: style.info(q.question), choices, }]); answers.push(Array.isArray(selection) ? selection.join(', ') : selection); } else { - // Plain text input const { answer } = await inquirer.prompt([{ type: 'input', name: 'answer', - message: q.question, + message: style.info(q.question), }]); answers.push(answer); } @@ -66,7 +75,7 @@ async function handleQuestion(questionData: string): Promise { const { answer } = await inquirer.prompt([{ type: 'input', name: 'answer', - message: `🤖 Claude asks: ${questionData}`, + message: `${style.highlight(icons.brain)} ${style.bold('Claude asks:')} ${questionData}`, }]); return answer; @@ -80,20 +89,40 @@ export const analyzeCommand = new Command('analyze') .option('--focus ', 'Comma-separated list of modules/functions to focus on') .option('--max-scenarios ', 'Maximum number of test scenarios to generate', '10') .option('--quiet', 'Suppress progress messages') + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude analyze .')} ${style.dim('Analyze current directory')} + ${style.command('evaluclaude analyze ./src -o spec.json')} ${style.dim('Save output to file')} + ${style.command('evaluclaude analyze . -i')} ${style.dim('Interactive mode with questions')} + ${style.command('evaluclaude analyze . --focus auth,api')} ${style.dim('Focus on specific modules')} + ${style.command('evaluclaude analyze . --max-scenarios 20')} ${style.dim('Generate more scenarios')} +`) .action(async (repoPath: string, options: AnalyzeOptions) => { const absolutePath = path.resolve(repoPath); - const log = options.quiet ? () => {} : console.log; + const quiet = options.quiet; - log(`\n🔬 Analyzing codebase: ${absolutePath}\n`); + if (!quiet) { + console.log(`\n${BANNER_MINIMAL}\n`); + console.log(header('Analyze Codebase')); + console.log(keyValue('Path', style.path(absolutePath))); + console.log(); + } try { - log('Step 1: Running tree-sitter introspection...'); + // Step 1: Tree-sitter introspection + const introSpinner = quiet ? null : new Spinner('Running tree-sitter introspection...'); + introSpinner?.start(); + const repoSummary = await analyze({ root: absolutePath, - onProgress: options.quiet ? undefined : (msg) => log(` ${msg}`), + onProgress: quiet ? undefined : (msg) => introSpinner?.update(`Introspecting: ${msg}`), }); - log(`\nStep 2: Generating EvalSpec with Claude...\n`); + introSpinner?.succeed('Tree-sitter introspection complete'); + + // Step 2: Claude analysis + const claudeSpinner = quiet ? null : new Spinner('Generating EvalSpec with Claude...'); + claudeSpinner?.start(); const focus = options.focus?.split(',').map(s => s.trim()); const maxScenarios = parseInt(options.maxScenarios, 10); @@ -101,6 +130,9 @@ export const analyzeCommand = new Command('analyze') let result; if (options.interactive) { + claudeSpinner?.stop(); + console.log(`\n${style.info(icons.info)} ${style.bold('Interactive mode enabled')}\n`); + result = await generateEvalSpecInteractive( repoSummary, handleQuestion, @@ -112,26 +144,51 @@ export const analyzeCommand = new Command('analyze') focus, maxScenarios, }); + claudeSpinner?.succeed('EvalSpec generated with Claude'); } const { spec, tokensUsed, questionsAsked } = result; - log('\n✅ EvalSpec generated successfully!'); - log(` Scenarios: ${spec.scenarios.length}`); - log(` Tokens used: ${tokensUsed}`); - log(` Questions asked: ${questionsAsked}`); - log(` Confidence: ${spec.metadata.confidence}`); + // Results summary + if (!quiet) { + console.log(); + console.log(`${style.success(icons.success)} ${style.bold('EvalSpec generated successfully!')}`); + console.log(); + console.log(` ${style.primary(box.vertical)} ${keyValue('Scenarios', style.number(String(spec.scenarios.length)))}`); + console.log(` ${style.primary(box.vertical)} ${keyValue('Tokens used', style.number(String(tokensUsed)))}`); + console.log(` ${style.primary(box.vertical)} ${keyValue('Questions asked', style.number(String(questionsAsked)))}`); + console.log(` ${style.primary(box.vertical)} ${keyValue('Confidence', style.highlight(spec.metadata.confidence))}`); + } const json = JSON.stringify(spec, null, 2); if (options.output) { await fs.writeFile(options.output, json); - log(`\n📄 Written to: ${options.output}`); + if (!quiet) { + console.log(); + console.log(`${style.success(icons.success)} Written to: ${style.path(options.output)}`); + console.log(nextSteps([ + { command: `evaluclaude render ${options.output}`, description: 'Render tests from the spec' }, + { command: `evaluclaude pipeline . -o ./tests`, description: 'Run the full pipeline' }, + ])); + } } else { console.log('\n' + json); + if (!quiet) { + console.log(nextSteps([ + { command: 'evaluclaude analyze . -o spec.json', description: 'Save the spec to a file' }, + { command: 'evaluclaude render spec.json', description: 'Then render tests from it' }, + ])); + } } } catch (error) { - console.error('\n❌ Error:', error instanceof Error ? error.message : error); + const message = error instanceof Error ? error.message : String(error); + console.error(formatError(message, [ + 'Check that the path exists and contains source files', + 'Ensure ANTHROPIC_API_KEY is set in your environment', + 'Try running with --quiet to see raw errors', + 'Use evaluclaude intro to verify introspection works', + ])); process.exit(1); } }); diff --git a/src/cli/commands/grade.ts b/src/cli/commands/grade.ts index 0a9f645..66b1dc7 100644 --- a/src/cli/commands/grade.ts +++ b/src/cli/commands/grade.ts @@ -2,6 +2,7 @@ import { Command } from 'commander'; import { readFileSync, existsSync } from 'fs'; import { gradeWithRubric, loadAllRubrics, analyzeCalibration, calibrate } from '../../graders/index.js'; import type { CalibrationExample } from '../../graders/types.js'; +import { style, icons, Spinner, formatError, progressBar, subheader, keyValue } from '../theme.js'; export const gradeCommand = new Command('grade') .description('Grade output using LLM rubric') @@ -9,6 +10,12 @@ export const gradeCommand = new Command('grade') .option('-r, --rubric ', 'Rubric name or path', 'code-quality') .option('--rubrics-dir ', 'Directory containing rubric YAML files', 'rubrics') .option('--json', 'Output result as JSON', false) + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude grade output.txt')} ${style.dim('Grade file with default rubric')} + ${style.command('evaluclaude grade output.txt -r safety')} ${style.dim('Use specific rubric')} + ${style.command('evaluclaude grade "inline text" --json')} ${style.dim('Grade string, output JSON')} +`) .action(async (input: string, options) => { try { let content: string; @@ -19,29 +26,48 @@ export const gradeCommand = new Command('grade') content = input; } - console.log(`Grading with rubric: ${options.rubric}`); + const spinner = new Spinner(`Grading with rubric ${style.highlight(options.rubric)}...`); + spinner.start(); const result = await gradeWithRubric(content, options.rubric, { rubricsDir: options.rubricsDir, }); if (options.json) { + spinner.stop(); console.log(JSON.stringify(result, null, 2)); return; } - console.log(`\n${result.pass ? '✅ PASS' : '❌ FAIL'}`); - console.log(`Score: ${(result.score * 100).toFixed(1)}%`); - console.log(`\nSummary: ${result.reason}`); + if (result.pass) { + spinner.succeed(`Graded with rubric ${style.highlight(options.rubric)}`); + } else { + spinner.fail(`Graded with rubric ${style.highlight(options.rubric)}`); + } + + console.log(); + console.log(result.pass + ? `${style.success(icons.passed)} ${style.bold(style.success('PASS'))}` + : `${style.error(icons.failed)} ${style.bold(style.error('FAIL'))}`); + console.log(keyValue('Score', style.number(`${(result.score * 100).toFixed(1)}%`))); + console.log(); + console.log(keyValue('Summary', result.reason)); - console.log('\nCriterion Scores:'); + console.log(subheader('Criterion Scores')); for (const cs of result.criterionScores) { - const bar = '█'.repeat(Math.round(cs.score * 10)) + '░'.repeat(10 - Math.round(cs.score * 10)); - console.log(` ${cs.name}: ${bar} ${(cs.score * 100).toFixed(0)}%`); - console.log(` ${cs.feedback}`); + const bar = progressBar(cs.score, 1, 20); + console.log(` ${style.bold(cs.name)}: ${bar}`); + console.log(` ${style.dim(cs.feedback)}`); } } catch (error) { - console.error('Error grading:', error instanceof Error ? error.message : error); + console.error(formatError( + error instanceof Error ? error.message : String(error), + [ + 'Check that the rubric exists in the rubrics directory', + 'Ensure ANTHROPIC_API_KEY is set', + `Run ${style.command('evaluclaude rubrics')} to list available rubrics`, + ] + )); process.exit(1); } }); @@ -49,26 +75,44 @@ export const gradeCommand = new Command('grade') export const listRubricsCommand = new Command('rubrics') .description('List available rubrics') .option('--rubrics-dir ', 'Directory containing rubric YAML files', 'rubrics') + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude rubrics')} ${style.dim('List all rubrics')} + ${style.command('evaluclaude rubrics --rubrics-dir ./my-rubrics')} ${style.dim('Use custom directory')} +`) .action(async (options) => { try { const rubrics = loadAllRubrics(options.rubricsDir); if (rubrics.size === 0) { - console.log(`No rubrics found in ${options.rubricsDir}`); + console.log(formatError( + `No rubrics found in ${style.path(options.rubricsDir)}`, + [ + 'Create rubric YAML files in the rubrics directory', + 'Use --rubrics-dir to specify a different location', + ] + )); return; } - console.log(`Available rubrics (${rubrics.size}):\n`); + console.log(subheader(`Available Rubrics (${style.number(String(rubrics.size))})`)); + console.log(); for (const [name, rubric] of rubrics) { - console.log(`📋 ${name}`); - console.log(` ${rubric.description}`); - console.log(` Threshold: ${(rubric.passingThreshold * 100).toFixed(0)}%`); - console.log(` Criteria: ${rubric.criteria.map(c => c.name).join(', ')}`); - console.log(''); + console.log(`${icons.spec} ${style.bold(style.primary(name))}`); + console.log(keyValue('Description', rubric.description, 1)); + console.log(keyValue('Threshold', style.number(`${(rubric.passingThreshold * 100).toFixed(0)}%`), 1)); + console.log(keyValue('Criteria', rubric.criteria.map(c => style.highlight(c.name)).join(', '), 1)); + console.log(); } } catch (error) { - console.error('Error listing rubrics:', error instanceof Error ? error.message : error); + console.error(formatError( + error instanceof Error ? error.message : String(error), + [ + 'Check that the rubrics directory exists', + 'Ensure rubric files are valid YAML', + ] + )); process.exit(1); } }); @@ -78,24 +122,49 @@ export const calibrateCommand = new Command('calibrate') .argument('', 'Rubric name or path') .argument('', 'Path to calibration examples JSON') .option('--rubrics-dir ', 'Directory containing rubric YAML files', 'rubrics') + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude calibrate code-quality examples.json')} ${style.dim('Calibrate with examples')} + +${style.bold('Examples file format:')} + ${style.dim('[')} + ${style.dim('{ "content": "...", "expectedPass": true, "expectedScore": 0.8 },')} + ${style.dim('{ "content": "...", "expectedPass": false }')} + ${style.dim(']')} +`) .action(async (rubricName: string, examplesPath: string, options) => { try { if (!existsSync(examplesPath)) { - console.error(`Examples file not found: ${examplesPath}`); + console.error(formatError( + `Examples file not found: ${style.path(examplesPath)}`, + [ + 'Check that the file path is correct', + 'Ensure the file exists and is readable', + ] + )); process.exit(1); } const examples: CalibrationExample[] = JSON.parse(readFileSync(examplesPath, 'utf-8')); - console.log(`Calibrating rubric '${rubricName}' with ${examples.length} examples...`); + const spinner = new Spinner(`Calibrating rubric ${style.highlight(rubricName)} with ${style.number(String(examples.length))} examples...`); + spinner.start(); const result = await calibrate(rubricName, examples, { rubricsDir: options.rubricsDir, }); + spinner.succeed(`Calibration complete for ${style.highlight(rubricName)}`); console.log('\n' + analyzeCalibration(result)); } catch (error) { - console.error('Error calibrating:', error instanceof Error ? error.message : error); + console.error(formatError( + error instanceof Error ? error.message : String(error), + [ + 'Check that the rubric exists', + 'Ensure the examples file is valid JSON', + 'Ensure ANTHROPIC_API_KEY is set', + ] + )); process.exit(1); } }); diff --git a/src/cli/commands/intro.ts b/src/cli/commands/intro.ts index 9439562..eb964a6 100644 --- a/src/cli/commands/intro.ts +++ b/src/cli/commands/intro.ts @@ -1,6 +1,7 @@ import { Command } from 'commander'; import * as path from 'node:path'; import { analyze, treeToString } from '../../introspector/index.js'; +import { style, icons, header, subheader, keyValue, Spinner, formatError, nextSteps, box } from '../theme.js'; export const introCommand = new Command('intro') .description('Introspect a codebase and output its structure (tree-sitter analysis)') @@ -9,21 +10,35 @@ export const introCommand = new Command('intro') .option('--json', 'Output as JSON (default)') .option('--summary', 'Output a human-readable summary instead of JSON') .option('--tree', 'Show file tree structure') + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude intro')} ${style.dim('Analyze current directory')} + ${style.command('evaluclaude intro ./my-project')} ${style.dim('Analyze specific path')} + ${style.command('evaluclaude intro . --summary')} ${style.dim('Human-readable summary')} + ${style.command('evaluclaude intro . --tree')} ${style.dim('Show file tree')} + ${style.command('evaluclaude intro . -o out.json')} ${style.dim('Save to file')} +`) .action(async (repoPath: string, options: { output?: string; json?: boolean; summary?: boolean; tree?: boolean }) => { const absolutePath = path.resolve(repoPath); - console.log(`\n🔍 Analyzing: ${absolutePath}\n`); + console.log(header('Introspecting Codebase')); + console.log(keyValue('Path', style.path(absolutePath))); + console.log(''); + + const spinner = new Spinner('Analyzing codebase with tree-sitter...'); + spinner.start(); try { const summary = await analyze({ root: absolutePath, - onProgress: (msg) => console.log(` ${msg}`), + onProgress: (msg) => spinner.update(msg), }); + spinner.succeed('Analysis complete'); console.log(''); if (options.tree && summary.tree) { - console.log('📁 File Tree:\n'); + console.log(subheader(`${icons.folder} File Tree`)); console.log(treeToString(summary.tree)); console.log(''); } else if (options.summary) { @@ -34,85 +49,96 @@ export const introCommand = new Command('intro') if (options.output) { const fs = await import('node:fs/promises'); await fs.writeFile(options.output, json); - console.log(`📄 Written to: ${options.output}`); + console.log(`${style.success(icons.success)} Written to: ${style.path(options.output)}`); } else { console.log(json); } } + + console.log(nextSteps([ + { command: 'evaluclaude analyze .', description: 'Generate EvalSpec with Claude' }, + { command: 'evaluclaude intro . --summary', description: 'View human-readable summary' }, + ])); } catch (error) { - console.error('❌ Error analyzing repository:', error); + spinner.fail('Analysis failed'); + console.error(formatError( + error instanceof Error ? error.message : 'Unknown error analyzing repository', + [ + 'Check that the path exists and is accessible', + 'Ensure the directory contains source files', + 'Try running with --tree to see the file structure', + ] + )); process.exit(1); } }); function printHumanSummary(summary: import('../../introspector/types.js').RepoSummary): void { - console.log('📊 Repository Summary'); - console.log('─'.repeat(50)); - console.log(`📁 Root: ${summary.root}`); - console.log(`🗓️ Analyzed: ${summary.analyzedAt}`); - console.log(`🔤 Languages: ${summary.languages.join(', ') || 'none detected'}`); + console.log(subheader(`${icons.trace} Repository Summary`)); + console.log(keyValue('Root', style.path(summary.root))); + console.log(keyValue('Analyzed', summary.analyzedAt)); + console.log(keyValue('Languages', summary.languages.join(', ') || style.muted('none detected'))); - console.log('\n📂 Files:'); - console.log(` Total: ${summary.files.length}`); - console.log(` Source: ${summary.files.filter(f => f.role === 'source').length}`); - console.log(` Test: ${summary.files.filter(f => f.role === 'test').length}`); - console.log(` Config: ${summary.files.filter(f => f.role === 'config').length}`); + console.log(subheader(`${icons.folder} Files`)); + console.log(keyValue('Total', style.number(String(summary.files.length)), 1)); + console.log(keyValue('Source', style.number(String(summary.files.filter(f => f.role === 'source').length)), 1)); + console.log(keyValue('Test', style.number(String(summary.files.filter(f => f.role === 'test').length)), 1)); + console.log(keyValue('Config', style.number(String(summary.files.filter(f => f.role === 'config').length)), 1)); - console.log('\n📦 Modules:'); - console.log(` Total: ${summary.modules.length}`); + console.log(subheader(`${icons.code} Modules`)); + console.log(keyValue('Total', style.number(String(summary.modules.length)), 1)); const totalExports = summary.modules.reduce((sum, m) => sum + m.exports.length, 0); const functions = summary.modules.flatMap(m => m.exports.filter(e => e.kind === 'function')); const classes = summary.modules.flatMap(m => m.exports.filter(e => e.kind === 'class')); - console.log(` Functions: ${functions.length}`); - console.log(` Classes: ${classes.length}`); - console.log(` Total exports: ${totalExports}`); + console.log(keyValue('Functions', style.number(String(functions.length)), 1)); + console.log(keyValue('Classes', style.number(String(classes.length)), 1)); + console.log(keyValue('Total exports', style.number(String(totalExports)), 1)); if (summary.config.python) { - console.log('\n🐍 Python:'); - console.log(` Test framework: ${summary.config.python.testFramework}`); - console.log(` pyproject.toml: ${summary.config.python.pyprojectToml ? '✓' : '✗'}`); - console.log(` setup.py: ${summary.config.python.setupPy ? '✓' : '✗'}`); + console.log(subheader(`${icons.python} Python`)); + console.log(keyValue('Test framework', summary.config.python.testFramework, 1)); + console.log(keyValue('pyproject.toml', summary.config.python.pyprojectToml ? style.success(icons.success) : style.error(icons.error), 1)); + console.log(keyValue('setup.py', summary.config.python.setupPy ? style.success(icons.success) : style.error(icons.error), 1)); } if (summary.config.typescript) { - console.log('\n📘 TypeScript:'); - console.log(` Test framework: ${summary.config.typescript.testFramework}`); - console.log(` package.json: ${summary.config.typescript.packageJson ? '✓' : '✗'}`); - console.log(` tsconfig.json: ${summary.config.typescript.tsconfig ? '✓' : '✗'}`); + console.log(subheader(`${icons.typescript} TypeScript`)); + console.log(keyValue('Test framework', summary.config.typescript.testFramework, 1)); + console.log(keyValue('package.json', summary.config.typescript.packageJson ? style.success(icons.success) : style.error(icons.error), 1)); + console.log(keyValue('tsconfig.json', summary.config.typescript.tsconfig ? style.success(icons.success) : style.error(icons.error), 1)); } if (summary.git) { - console.log('\n📌 Git:'); - console.log(` Branch: ${summary.git.branch}`); - console.log(` Commit: ${summary.git.currentCommit.slice(0, 8)}`); + console.log(subheader(`${icons.gear} Git`)); + console.log(keyValue('Branch', summary.git.branch, 1)); + console.log(keyValue('Commit', style.muted(summary.git.currentCommit.slice(0, 8)), 1)); if (summary.git.recentCommits && summary.git.recentCommits.length > 0) { - console.log('\n📜 Recent Commits:'); + console.log(subheader(`${icons.file} Recent Commits`)); for (const commit of summary.git.recentCommits.slice(0, 5)) { const date = new Date(commit.date).toLocaleDateString(); - console.log(` ${commit.shortHash} ${date} - ${commit.message.slice(0, 50)}${commit.message.length > 50 ? '...' : ''}`); + console.log(` ${style.muted(commit.shortHash)} ${style.dim(date)} ${box.horizontal} ${commit.message.slice(0, 50)}${commit.message.length > 50 ? '...' : ''}`); } } if (summary.git.fileHistory && summary.git.fileHistory.length > 0) { - console.log('\n🔥 Most Active Files (by commit count):'); + console.log(subheader(`${icons.lightning} Most Active Files`)); for (const file of summary.git.fileHistory.slice(0, 5)) { - console.log(` ${file.path} (${file.commitCount} commits)`); + console.log(` ${style.path(file.path)} ${style.dim(`(${style.number(String(file.commitCount))} commits)`)}`); } } } - // Show top modules by export count const topModules = [...summary.modules] .sort((a, b) => b.exports.length - a.exports.length) .slice(0, 5); if (topModules.length > 0) { - console.log('\n🏆 Top modules by exports:'); + console.log(subheader(`${icons.sparkle} Top Modules by Exports`)); for (const mod of topModules) { - console.log(` ${mod.path}: ${mod.exports.length} exports`); + console.log(` ${style.path(mod.path)}: ${style.number(String(mod.exports.length))} exports`); } } } diff --git a/src/cli/commands/pipeline.ts b/src/cli/commands/pipeline.ts index d1e7fa4..b455e63 100644 --- a/src/cli/commands/pipeline.ts +++ b/src/cli/commands/pipeline.ts @@ -8,6 +8,19 @@ import { runTests, formatResults, DEFAULT_SANDBOX_CONFIG } from '../../runners/i import { createTracer, saveTrace } from '../../observability/index.js'; import { generatePromptfooConfig, generateTestProvider } from '../../promptfoo/index.js'; import type { EvalSpec } from '../../analyzer/types.js'; +import { + style, + icons, + header, + step, + keyValue, + resultBox, + nextSteps, + Spinner, + formatError, + BANNER, + box +} from '../theme.js'; const EVALUCLAUDE_DIR = '.evaluclaude'; @@ -26,29 +39,49 @@ interface PipelineOptions { } export const pipelineCommand = new Command('pipeline') - .description('Run the full eval generation pipeline: introspect → analyze → render → run') + .description('Run the complete eval pipeline: introspect → analyze → render → run') .argument('[path]', 'Path to the repository to analyze', '.') - .option('-o, --output ', 'Output directory for all artifacts', '.evaluclaude') + .option('-o, --output ', 'Output directory for artifacts', '.evaluclaude') .option('-i, --interactive', 'Enable interactive mode with clarifying questions') .option('--focus ', 'Comma-separated list of modules/functions to focus on') - .option('--max-scenarios ', 'Maximum number of test scenarios to generate', '10') + .option('--max-scenarios ', 'Maximum number of test scenarios', '10') .option('--test-dir ', 'Directory for generated tests', './tests/generated') .option('-f, --framework ', 'Test framework (pytest, vitest, jest)') .option('--skip-analyze', 'Skip analysis, use existing spec') .option('--skip-render', 'Skip rendering, use existing tests') .option('--skip-run', 'Skip test execution') - .option('--promptfoo', 'Generate Promptfoo configuration for UI viewing') + .option('--promptfoo', 'Generate Promptfoo configuration') .option('--quiet', 'Suppress progress messages') + .addHelpText('after', ` +${style.bold('Examples:')} + + ${style.dim('# Analyze current directory')} + $ evaluclaude pipeline . + + ${style.dim('# Interactive mode with focus on specific modules')} + $ evaluclaude pipeline ./my-project -i --focus auth,payments + + ${style.dim('# Generate tests without running them')} + $ evaluclaude pipeline . --skip-run + + ${style.dim('# Use existing spec and run tests')} + $ evaluclaude pipeline . --skip-analyze +`) .action(async (repoPath: string, options: PipelineOptions) => { const absolutePath = resolve(repoPath); - const log = options.quiet ? () => {} : console.log; + const quiet = options.quiet; const outputDir = options.output || EVALUCLAUDE_DIR; - console.log('\n🚀 Evaluclaude Pipeline'); - console.log('═'.repeat(50)); - console.log(` Repository: ${absolutePath}`); - console.log(` Output: ${outputDir}`); - console.log('═'.repeat(50) + '\n'); + // Print header + console.log(BANNER); + console.log(style.primary(box.dHorizontal.repeat(55))); + console.log(` ${icons.folder} ${style.bold('Repository:')} ${style.path(absolutePath)}`); + console.log(` ${icons.file} ${style.bold('Output:')} ${style.path(outputDir)}`); + if (options.interactive) { + console.log(` ${icons.brain} ${style.bold('Mode:')} ${style.highlight('Interactive')}`); + } + console.log(style.primary(box.dHorizontal.repeat(55))); + console.log(''); // Ensure output directories exist mkdirSync(outputDir, { recursive: true }); @@ -65,23 +98,30 @@ export const pipelineCommand = new Command('pipeline') // Step 1: Introspection + Analysis if (options.skipAnalyze && existsSync(specPath)) { - log('📋 Using existing EvalSpec...'); + console.log(step(1, 'Using existing EvalSpec', 'done')); spec = JSON.parse(readFileSync(specPath, 'utf-8')); - log(` Loaded: ${specPath} (${spec.scenarios.length} scenarios)\n`); + console.log(` ${style.dim('└─')} Loaded ${style.number(String(spec.scenarios.length))} scenarios from ${style.path(specPath)}`); + console.log(''); } else { - log('🔬 Step 1: Introspecting codebase...'); + console.log(step(1, 'Introspecting codebase...', 'running')); + let spinner: Spinner | null = null; + if (!quiet) { + spinner = new Spinner('Parsing files with tree-sitter...'); + spinner.start(); + } + try { const repoSummary = await analyze({ root: absolutePath, - onProgress: options.quiet ? undefined : (msg) => log(` ${msg}`), + onProgress: quiet ? undefined : (msg) => spinner?.update(msg), }); - log(` Files: ${repoSummary.files.length}`); - log(` Languages: ${repoSummary.languages.join(', ')}`); - log(''); + spinner?.succeed(`Analyzed ${style.number(String(repoSummary.files.length))} files`); + console.log(` ${style.dim('└─')} Languages: ${repoSummary.languages.map(l => style.info(l)).join(', ')}`); + console.log(''); - log('🤖 Step 2: Generating EvalSpec with Claude...\n'); + console.log(step(2, 'Generating EvalSpec with Claude...', 'running')); const focus = options.focus?.split(',').map(s => s.trim()); const maxScenarios = parseInt(options.maxScenarios, 10); @@ -93,21 +133,30 @@ export const pipelineCommand = new Command('pipeline') result = await generateEvalSpecInteractive( repoSummary, async (question: string) => { + console.log(''); const { answer } = await inquirer.prompt([{ type: 'input', name: 'answer', - message: `🤖 Claude asks: ${question}`, + message: `${icons.brain} ${style.highlight('Claude asks:')} ${question}`, + prefix: '', }]); return answer; }, { focus, maxScenarios } ); } else { + if (!quiet) { + spinner = new Spinner('Claude is analyzing the codebase...'); + spinner.start(); + } + result = await generateEvalSpec(repoSummary, { interactive: false, focus, maxScenarios, }); + + spinner?.succeed('EvalSpec generated'); } spec = result.spec; @@ -115,19 +164,29 @@ export const pipelineCommand = new Command('pipeline') // Save the spec writeFileSync(specPath, JSON.stringify(spec, null, 2)); - log(`\n✅ EvalSpec generated!`); - log(` Scenarios: ${spec.scenarios.length}`); - log(` Tokens: ${result.tokensUsed}`); - log(` Saved: ${specPath}\n`); + console.log(` ${style.dim('├─')} Scenarios: ${style.number(String(spec.scenarios.length))}`); + console.log(` ${style.dim('├─')} Tokens: ${style.number(String(result.tokensUsed))}`); + console.log(` ${style.dim('└─')} Saved: ${style.path(specPath)}`); + console.log(''); } catch (error) { - console.error('\n❌ Analysis failed:', error instanceof Error ? error.message : error); + spinner?.fail('Analysis failed'); + console.error(formatError( + error instanceof Error ? error.message : String(error), + ['Check that ANTHROPIC_API_KEY is set', 'Verify the path exists and contains source files'] + )); process.exit(1); } } // Step 2: Render tests if (!options.skipRender) { - log('📝 Step 3: Rendering test files...'); + console.log(step(3, 'Rendering test files...', 'running')); + + let spinner: Spinner | null = null; + if (!quiet) { + spinner = new Spinner('Generating test code...'); + spinner.start(); + } try { const framework = (options.framework as 'pytest' | 'vitest' | 'jest') || detectRenderFramework(spec); @@ -140,20 +199,31 @@ export const pipelineCommand = new Command('pipeline') dryRun: false, }); - log(` Framework: ${framework}`); - log(` Files: ${renderResult.stats.fileCount}`); - log(` Scenarios: ${renderResult.stats.scenarioCount}`); - log(` Assertions: ${renderResult.stats.assertionCount}`); - log(` Output: ${options.testDir}\n`); + spinner?.succeed(`Generated ${style.number(String(renderResult.stats.fileCount))} test files`); + console.log(` ${style.dim('├─')} Framework: ${style.info(framework)}`); + console.log(` ${style.dim('├─')} Scenarios: ${style.number(String(renderResult.stats.scenarioCount))}`); + console.log(` ${style.dim('├─')} Assertions: ${style.number(String(renderResult.stats.assertionCount))}`); + console.log(` ${style.dim('└─')} Output: ${style.path(options.testDir)}`); + console.log(''); } catch (error) { - console.error('\n❌ Rendering failed:', error instanceof Error ? error.message : error); + spinner?.fail('Rendering failed'); + console.error(formatError( + error instanceof Error ? error.message : String(error), + ['Verify the EvalSpec is valid JSON', 'Check the output directory is writable'] + )); process.exit(1); } } // Step 3: Run tests if (!options.skipRun) { - log('🧪 Step 4: Running tests...\n'); + console.log(step(4, 'Running tests...', 'running')); + + let spinner: Spinner | null = null; + if (!quiet) { + spinner = new Spinner('Executing test suite...'); + spinner.start(); + } try { const framework = (options.framework as 'pytest' | 'vitest' | 'jest') || detectRenderFramework(spec); @@ -202,23 +272,41 @@ export const pipelineCommand = new Command('pipeline') const trace = tracer.finalize(); const tracePath = await saveTrace(trace); - log(formatResults(result)); - log(`📊 Trace saved: ${tracePath}`); - log(` View with: evaluclaude view ${trace.id}\n`); + spinner?.stop(); + + // Show results box + console.log(''); + console.log(resultBox({ + passed: result.summary.passed, + failed: result.summary.failed, + skipped: result.summary.skipped, + duration: result.summary.duration, + })); + console.log(''); + console.log(` ${icons.trace} Trace: ${style.path(tracePath)}`); + console.log(` ${style.dim('└─')} View: ${style.command(`evaluclaude view ${trace.id}`)}`); + console.log(''); // Save results const resultsPath = join(resultsDir, `run-${Date.now()}.json`); writeFileSync(resultsPath, JSON.stringify(result, null, 2)); } catch (error) { - console.error('\n❌ Test execution failed:', error instanceof Error ? error.message : error); + spinner?.fail('Test execution failed'); + console.error(formatError( + error instanceof Error ? error.message : String(error), + ['Check the test framework is installed', 'Verify the test directory exists'] + )); process.exit(1); } } // Step 4: Generate Promptfoo config if (options.promptfoo) { - log('📦 Step 5: Generating Promptfoo configuration...'); + console.log(step(5, 'Generating Promptfoo configuration...', 'running')); + + const spinner = new Spinner('Creating Promptfoo config...'); + spinner.start(); try { const configPath = join(outputDir, 'promptfooconfig.yaml'); @@ -235,23 +323,27 @@ export const pipelineCommand = new Command('pipeline') await generateTestProvider(providerPath); - log(` Config: ${configPath}`); - log(` Provider: ${providerPath}`); - log(`\n Launch UI with: evaluclaude ui\n`); + spinner.succeed('Promptfoo config created'); + console.log(` ${style.dim('├─')} Config: ${style.path(configPath)}`); + console.log(` ${style.dim('└─')} Provider: ${style.path(providerPath)}`); + console.log(''); } catch (error) { - console.error('\n❌ Promptfoo config generation failed:', error instanceof Error ? error.message : error); + spinner.fail('Promptfoo config generation failed'); + console.error(formatError(error instanceof Error ? error.message : String(error))); } } - console.log('═'.repeat(50)); - console.log('✅ Pipeline complete!'); - console.log('═'.repeat(50)); - console.log(`\nNext steps:`); - console.log(` View traces: evaluclaude view --last`); - console.log(` List all traces: evaluclaude traces`); - if (options.promptfoo) { - console.log(` Launch UI: evaluclaude ui`); - console.log(` Run Promptfoo: evaluclaude eval --spec ${specPath}`); - } - console.log(''); + // Final summary + console.log(style.success(box.dHorizontal.repeat(55))); + console.log(` ${icons.sparkle} ${style.success(style.bold('Pipeline complete!'))}`); + console.log(style.success(box.dHorizontal.repeat(55))); + + console.log(nextSteps([ + { command: 'evaluclaude view --last', description: 'View the latest trace' }, + { command: 'evaluclaude traces', description: 'List all traces' }, + ...(options.promptfoo ? [ + { command: 'evaluclaude ui', description: 'Launch the dashboard UI' }, + { command: `evaluclaude eval --spec ${specPath}`, description: 'Run Promptfoo evaluations' }, + ] : []), + ])); }); diff --git a/src/cli/commands/render.ts b/src/cli/commands/render.ts index 300c01e..ff55780 100644 --- a/src/cli/commands/render.ts +++ b/src/cli/commands/render.ts @@ -2,6 +2,7 @@ import { Command } from 'commander'; import { readFileSync, existsSync } from 'fs'; import { renderSpec, detectFramework, type Framework } from '../../renderers/index.js'; import type { EvalSpec } from '../../analyzer/types.js'; +import { style, icons, Spinner, formatError, nextSteps, keyValue } from '../theme.js'; export const renderCommand = new Command('render') .description('Render EvalSpec JSON into runnable test files') @@ -11,19 +12,41 @@ export const renderCommand = new Command('render') .option('--fixtures', 'Generate fixture stubs', false) .option('--mocks', 'Generate mock stubs', false) .option('--dry-run', 'Preview without writing files', false) + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude render spec.json')} ${style.dim('Render with auto-detected framework')} + ${style.command('evaluclaude render spec.json -f vitest')} ${style.dim('Use Vitest framework')} + ${style.command('evaluclaude render spec.json --dry-run')} ${style.dim('Preview output without writing')} + ${style.command('evaluclaude render spec.json --fixtures')} ${style.dim('Include fixture stubs')} +`) .action(async (specPath: string, options) => { try { if (!existsSync(specPath)) { - console.error(`Error: Spec file not found: ${specPath}`); + console.error(formatError(`Spec file not found: ${style.path(specPath)}`, [ + 'Check that the spec file exists', + 'Run `evaluclaude analyze` to generate a spec file first', + 'Verify the path is correct', + ])); process.exit(1); } const specContent = readFileSync(specPath, 'utf-8'); - const spec: EvalSpec = JSON.parse(specContent); + let spec: EvalSpec; + + try { + spec = JSON.parse(specContent); + } catch { + console.error(formatError('Invalid JSON in spec file', [ + 'Ensure the file contains valid JSON', + 'Check for syntax errors in the spec file', + ])); + process.exit(1); + } const framework = (options.framework as Framework) || detectFramework(spec); - console.log(`Rendering ${spec.scenarios.length} scenarios with ${framework}...`); + const spinner = new Spinner(`Rendering ${style.number(String(spec.scenarios.length))} scenarios with ${style.highlight(framework)}...`); + spinner.start(); const result = await renderSpec(spec, { outputDir: options.output, @@ -33,29 +56,44 @@ export const renderCommand = new Command('render') dryRun: options.dryRun, }); + spinner.succeed(`Rendered ${style.number(String(spec.scenarios.length))} scenarios with ${style.highlight(framework)}`); + if (options.dryRun) { - console.log('\n--- DRY RUN ---\n'); + console.log(`\n${style.warning('DRY RUN')} ${style.dim('─ Preview only, no files written')}\n`); for (const file of result.files) { - console.log(`📄 ${file.path}`); - console.log('---'); - console.log(file.content); - console.log('---\n'); + console.log(`${icons.file} ${style.path(file.path)}`); + console.log(style.dim('─'.repeat(50))); + console.log(style.muted(file.content)); + console.log(style.dim('─'.repeat(50)) + '\n'); } } - console.log(`\n✅ Rendered ${result.stats.scenarioCount} scenarios`); - console.log(` 📁 ${result.stats.fileCount} test files`); - console.log(` 🔍 ${result.stats.assertionCount} assertions`); + console.log(`\n${style.success(icons.check)} ${style.bold('Render complete')}`); + console.log(keyValue(` ${icons.spec} Scenarios`, style.number(String(result.stats.scenarioCount)), 0)); + console.log(keyValue(` ${icons.file} Test files`, style.number(String(result.stats.fileCount)), 0)); + console.log(keyValue(` ${icons.magnify} Assertions`, style.number(String(result.stats.assertionCount)), 0)); if (result.stats.skippedCount > 0) { - console.log(` ⏭️ ${result.stats.skippedCount} scenarios skipped (LLM rubric assertions)`); + console.log(keyValue(` ${icons.skipped} Skipped`, `${style.number(String(result.stats.skippedCount))} ${style.dim('(LLM rubric assertions)')}`, 0)); } if (!options.dryRun) { - console.log(`\n📂 Output: ${options.output}`); + console.log(`\n${icons.folder} ${style.label('Output:')} ${style.path(options.output)}`); + + console.log(nextSteps([ + { command: `evaluclaude run ${options.output}`, description: 'Run the generated tests' }, + { command: `evaluclaude render ${specPath} --dry-run`, description: 'Preview changes before writing' }, + ])); } } catch (error) { - console.error('Error rendering spec:', error instanceof Error ? error.message : error); + console.error(formatError( + error instanceof Error ? error.message : String(error), + [ + 'Check that the spec file is valid', + 'Ensure the output directory is writable', + 'Try running with --dry-run to debug', + ] + )); process.exit(1); } }); diff --git a/src/cli/commands/run.ts b/src/cli/commands/run.ts index a20c01e..18d3023 100644 --- a/src/cli/commands/run.ts +++ b/src/cli/commands/run.ts @@ -1,6 +1,5 @@ import { Command } from 'commander'; import { existsSync, readFileSync } from 'fs'; -import { join } from 'path'; import { runTests, formatResults, @@ -12,6 +11,17 @@ import { import { createTracer, saveTrace } from '../../observability/index.js'; import { exportToPromptfooFormat } from '../../promptfoo/results-exporter.js'; import type { EvalSpec } from '../../analyzer/types.js'; +import { + style, + icons, + Spinner, + formatError, + nextSteps, + keyValue, + resultBox, + section, + formatDuration +} from '../theme.js'; export const runCommand = new Command('run') .description('Run generated tests and collect results') @@ -28,24 +38,37 @@ export const runCommand = new Command('run') .option('--no-trace', 'Disable execution tracing') .option('--export-promptfoo', 'Export results in Promptfoo format', false) .option('-w, --watch', 'Watch mode (rerun on changes)', false) + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude run')} ${style.dim('Run tests from ./tests/generated')} + ${style.command('evaluclaude run ./my-tests')} ${style.dim('Run tests from custom directory')} + ${style.command('evaluclaude run -f pytest')} ${style.dim('Use pytest framework')} + ${style.command('evaluclaude run --spec eval-spec.json')} ${style.dim('Map results to EvalSpec')} + ${style.command('evaluclaude run --export-promptfoo')} ${style.dim('Export for Promptfoo UI')} + ${style.command('evaluclaude run --no-sandbox')} ${style.dim('Disable sandboxing')} +`) .action(async (testDir: string, options) => { try { - console.log(`\n🧪 Running tests from ${testDir}...\n`); + console.log(`\n${icons.test} ${style.bold('Running tests from')} ${style.path(testDir)}\n`); if (!existsSync(testDir)) { - console.error(`Error: Test directory not found: ${testDir}`); + console.log(formatError(`Test directory not found: ${testDir}`, [ + `Create the directory: ${style.command(`mkdir -p ${testDir}`)}`, + `Generate tests first: ${style.command('evaluclaude render ')}`, + 'Check the path is correct' + ])); process.exit(1); } const framework: TestFramework = options.framework || detectTestFramework(testDir); - console.log(` Framework: ${framework}`); - console.log(` Sandbox: ${options.sandbox ? 'enabled' : 'disabled'}`); - console.log(` Timeout: ${options.timeout}ms`); + console.log(keyValue('Framework', style.info(framework), 1)); + console.log(keyValue('Sandbox', options.sandbox ? style.success('enabled') : style.warning('disabled'), 1)); + console.log(keyValue('Timeout', style.number(`${options.timeout}ms`), 1)); let spec: EvalSpec | undefined; if (options.spec && existsSync(options.spec)) { spec = JSON.parse(readFileSync(options.spec, 'utf-8')) as EvalSpec; - console.log(` Spec: ${options.spec} (${spec.scenarios.length} scenarios)`); + console.log(keyValue('Spec', `${style.path(options.spec)} ${style.muted(`(${spec.scenarios.length} scenarios)`)}`, 1)); } const tracer = options.trace ? createTracer(spec?.repo.name || 'unknown') : null; @@ -66,7 +89,8 @@ export const runCommand = new Command('run') }); } - console.log('\n Running tests...\n'); + const spinner = new Spinner('Running tests...'); + spinner.start(); const startTime = Date.now(); const result = await runTests( @@ -75,6 +99,14 @@ export const runCommand = new Command('run') options.sandbox ? DEFAULT_SANDBOX_CONFIG : undefined ); + const duration = Date.now() - startTime; + + if (result.summary.failed > 0) { + spinner.fail(`Tests completed with ${style.error(`${result.summary.failed} failures`)}`); + } else { + spinner.succeed(`Tests completed in ${style.number(formatDuration(duration))}`); + } + if (tracer) { tracer.recordExecution({ testsPassed: result.summary.passed, @@ -94,13 +126,20 @@ export const runCommand = new Command('run') } } - console.log(formatResults(result)); + console.log('\n' + resultBox({ + passed: result.summary.passed, + failed: result.summary.failed, + skipped: result.summary.skipped, + duration, + })); if (spec) { const mappedResults = mapResultsToScenarios(result, spec); - console.log(`\n📊 Scenario Coverage:`); - console.log(` Covered: ${mappedResults.covered}/${spec.scenarios.length}`); - console.log(` Unmapped: ${mappedResults.unmapped}`); + console.log(section('Scenario Coverage')); + console.log(keyValue('Covered', `${style.success(String(mappedResults.covered))}/${style.number(String(spec.scenarios.length))}`, 1)); + if (mappedResults.unmapped > 0) { + console.log(keyValue('Unmapped', style.warning(String(mappedResults.unmapped)), 1)); + } } if (options.output) { @@ -108,31 +147,40 @@ export const runCommand = new Command('run') const { dirname } = await import('path'); mkdirSync(dirname(options.output), { recursive: true }); writeFileSync(options.output, JSON.stringify(result, null, 2)); - console.log(`\n📁 Results saved to: ${options.output}`); + console.log(`\n${icons.folder} Results saved to: ${style.path(options.output)}`); } - // Export to Promptfoo format for UI viewing if (options.exportPromptfoo) { const exportPath = await exportToPromptfooFormat(result, spec, { outputDir: '.evaluclaude/results', evalId: `eval-${Date.now()}`, }); - console.log(`\n📦 Promptfoo results exported: ${exportPath}`); - console.log(` View with: evaluclaude ui`); + console.log(`\n${icons.spec} Promptfoo results exported: ${style.path(exportPath)}`); } if (tracer) { const trace = tracer.finalize(); const tracePath = await saveTrace(trace); - console.log(`\n📊 Trace saved: ${tracePath}`); - console.log(` View with: evaluclaude view ${trace.id}`); + console.log(`\n${icons.trace} Trace saved: ${style.path(tracePath)}`); } + console.log(nextSteps([ + { command: 'evaluclaude view ', description: 'View execution trace' }, + { command: 'evaluclaude ui', description: 'Launch interactive results viewer' }, + ])); + if (result.summary.failed > 0) { process.exit(1); } } catch (error) { - console.error('Error running tests:', error instanceof Error ? error.message : error); + console.log(formatError( + error instanceof Error ? error.message : String(error), + [ + 'Check that the test directory exists and contains valid tests', + 'Ensure the test framework is installed', + `Run with ${style.command('--no-sandbox')} if sandbox is causing issues` + ] + )); process.exit(1); } }); diff --git a/src/cli/commands/ui.ts b/src/cli/commands/ui.ts index a775225..badf72a 100644 --- a/src/cli/commands/ui.ts +++ b/src/cli/commands/ui.ts @@ -4,6 +4,7 @@ import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs'; import { join, dirname, resolve as resolvePath } from 'path'; import type { EvalSpec } from '../../analyzer/types.js'; import { generatePromptfooConfig, generateTestProvider } from '../../promptfoo/index.js'; +import { style, icons, Spinner, formatError, nextSteps, header, keyValue } from '../theme.js'; const EVALUCLAUDE_DIR = '.evaluclaude'; const CONFIG_FILE = 'promptfooconfig.yaml'; @@ -15,6 +16,16 @@ export const uiCommand = new Command('ui') .option('-s, --spec ', 'Path to EvalSpec JSON file') .option('--generate', 'Regenerate Promptfoo config from spec') .option('--no-open', 'Do not auto-open browser') + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude ui')} Launch UI with existing results + ${style.command('evaluclaude ui -p 8080')} Use custom port + ${style.command('evaluclaude ui -s spec.json --generate')} Generate config and launch + +${style.bold('Workflow:')} + 1. Run ${style.command('evaluclaude run --export-promptfoo')} to generate results + 2. Run ${style.command('evaluclaude ui')} to view them in the dashboard +`) .action(async (options) => { try { const port = parseInt(options.port, 10); @@ -23,10 +34,15 @@ export const uiCommand = new Command('ui') // If spec provided with --generate, create/update Promptfoo config if (options.spec && options.generate) { - console.log('\n📄 Generating Promptfoo configuration...'); + const spinner = new Spinner('Generating Promptfoo configuration...'); + spinner.start(); if (!existsSync(options.spec)) { - console.error(`Error: Spec file not found: ${options.spec}`); + spinner.fail('Spec file not found'); + console.log(formatError(`Spec file not found: ${style.path(options.spec)}`, [ + `Check the file path and try again`, + `Generate a spec with: ${style.command('evaluclaude analyze ')}`, + ])); process.exit(1); } @@ -42,17 +58,20 @@ export const uiCommand = new Command('ui') await generateTestProvider(providerPath); - console.log(` Config: ${configPath}`); - console.log(` Provider: ${providerPath}`); + spinner.succeed('Promptfoo configuration generated'); + console.log(keyValue('Config', style.path(configPath), 1)); + console.log(keyValue('Provider', style.path(providerPath), 1)); } // Check for existing config, create default if missing if (!existsSync(configPath)) { - console.log('\n⚠️ No Promptfoo config found.'); - console.log(' Creating default configuration...\n'); + console.log(`\n${style.warning(icons.warning)} No Promptfoo config found.`); + const spinner = new Spinner('Creating default configuration...'); + spinner.start(); await createDefaultConfig(configPath, providerPath); - console.log(` Created: ${configPath}`); + spinner.succeed('Default configuration created'); + console.log(keyValue('Created', style.path(configPath), 1)); } // Check for results to display @@ -60,19 +79,27 @@ export const uiCommand = new Command('ui') const latestResults = join(resultsDir, 'latest.json'); if (!existsSync(latestResults)) { - console.log('\n⚠️ No evaluation results found.'); - console.log(' Run `evaluclaude run --export-promptfoo` first to generate results.\n'); - console.log(' Or run the full pipeline:'); - console.log(' evaluclaude pipeline --promptfoo\n'); + console.log(formatError('No evaluation results found.', [ + `Run ${style.command('evaluclaude run --export-promptfoo')} first to generate results`, + `Or run the full pipeline: ${style.command('evaluclaude pipeline --promptfoo')}`, + ])); } - console.log(`\n🚀 Starting Promptfoo UI on port ${port}...`); - console.log(` Results: ${latestResults}\n`); + console.log(header('Launching Promptfoo UI')); + console.log(keyValue('Port', style.number(String(port)), 1)); + console.log(keyValue('Results', style.path(latestResults), 1)); + console.log(''); + + const spinner = new Spinner(`${icons.rocket} Starting Promptfoo UI...`); + spinner.start(); // Use promptfoo view with the results file - await launchPromptfooView(port, latestResults, options.open); + await launchPromptfooView(port, latestResults, options.open, spinner); } catch (error) { - console.error('Error launching UI:', error instanceof Error ? error.message : error); + console.log(formatError( + error instanceof Error ? error.message : String(error), + ['Check the console output for more details'] + )); process.exit(1); } }); @@ -85,6 +112,17 @@ export const evalCommand = new Command('eval') .option('--view', 'Launch UI after evaluation', false) .option('-p, --port ', 'Port for UI', '3000') .option('--no-cache', 'Disable Promptfoo caching', false) + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude eval -s spec.json')} Run evals from spec + ${style.command('evaluclaude eval -c config.yaml')} Run with custom config + ${style.command('evaluclaude eval -s spec.json --view')} Run and launch UI + +${style.bold('Workflow:')} + 1. Generate spec: ${style.command('evaluclaude analyze -o spec.json')} + 2. Run evals: ${style.command('evaluclaude eval -s spec.json')} + 3. View results: ${style.command('evaluclaude ui')} +`) .action(async (options) => { try { const configPath = options.config || join(EVALUCLAUDE_DIR, CONFIG_FILE); @@ -92,10 +130,15 @@ export const evalCommand = new Command('eval') // Generate config from spec if provided if (options.spec) { - console.log('\n📄 Generating Promptfoo configuration from spec...'); + const spinner = new Spinner('Generating Promptfoo configuration from spec...'); + spinner.start(); if (!existsSync(options.spec)) { - console.error(`Error: Spec file not found: ${options.spec}`); + spinner.fail('Spec file not found'); + console.log(formatError(`Spec file not found: ${style.path(options.spec)}`, [ + `Check the file path and try again`, + `Generate a spec with: ${style.command('evaluclaude analyze ')}`, + ])); process.exit(1); } @@ -111,34 +154,41 @@ export const evalCommand = new Command('eval') await generateTestProvider(providerPath); - console.log(` Config: ${configPath}`); - console.log(` Provider: ${providerPath}`); - console.log(` Scenarios: ${spec.scenarios.length}`); + spinner.succeed('Promptfoo configuration generated'); + console.log(keyValue('Config', style.path(configPath), 1)); + console.log(keyValue('Provider', style.path(providerPath), 1)); + console.log(keyValue('Scenarios', style.number(String(spec.scenarios.length)), 1)); } if (!existsSync(configPath)) { - console.error(`\nError: Config not found: ${configPath}`); - console.log('Run with --spec to generate from EvalSpec, or create config manually.'); + console.log(formatError(`Config not found: ${style.path(configPath)}`, [ + `Run with ${style.command('--spec ')} to generate from EvalSpec`, + `Or create a config manually`, + ])); process.exit(1); } // Ensure output directory exists mkdirSync(options.output, { recursive: true }); - console.log('\n🧪 Running Promptfoo evaluations...'); - console.log(` Config: ${configPath}`); - console.log(` Output: ${options.output}\n`); + console.log(header('Running Promptfoo Evaluations')); + console.log(keyValue('Config', style.path(configPath), 1)); + console.log(keyValue('Output', style.path(options.output), 1)); + console.log(''); const outputFile = join(options.output, `eval-${Date.now()}.json`); - const exitCode = await runPromptfooEval(configPath, outputFile, !options.cache); + const spinner = new Spinner(`${icons.test} Running evaluations...`); + spinner.start(); + + const exitCode = await runPromptfooEval(configPath, outputFile, !options.cache, spinner); if (exitCode === 0) { - console.log(`\n✅ Evaluation complete!`); - console.log(`📁 Results: ${outputFile}`); + spinner.succeed('Evaluation complete!'); + console.log(keyValue('Results', style.path(outputFile), 1)); } else { - console.log(`\n⚠️ Evaluation finished with exit code ${exitCode}`); - console.log(`📁 Results: ${outputFile}`); + spinner.warn(`Evaluation finished with exit code ${exitCode}`); + console.log(keyValue('Results', style.path(outputFile), 1)); } // List traces generated during evaluation @@ -147,19 +197,27 @@ export const evalCommand = new Command('eval') const { readdirSync } = await import('fs'); const traces = readdirSync(tracesDir).filter(f => f.endsWith('.json')); if (traces.length > 0) { - console.log(`\n📊 Traces generated: ${traces.length}`); - console.log(` View with: evaluclaude view --last`); + console.log(`\n${icons.trace} ${style.bold('Traces generated:')} ${style.number(String(traces.length))}`); + console.log(style.dim(` View with: ${style.command('evaluclaude view --last')}`)); } } if (options.view) { - console.log(`\n🚀 Launching UI on port ${options.port}...`); - await launchPromptfooUI(parseInt(options.port, 10), configPath, true); + console.log(''); + const uiSpinner = new Spinner(`${icons.rocket} Launching UI on port ${options.port}...`); + uiSpinner.start(); + await launchPromptfooUI(parseInt(options.port, 10), configPath, true, uiSpinner); } else { - console.log(`\n View results: evaluclaude ui`); + console.log(nextSteps([ + { command: 'evaluclaude ui', description: 'View results in dashboard' }, + { command: 'evaluclaude view --last', description: 'View latest trace' }, + ])); } } catch (error) { - console.error('Error running eval:', error instanceof Error ? error.message : error); + console.log(formatError( + error instanceof Error ? error.message : String(error), + ['Check the console output for more details'] + )); process.exit(1); } }); @@ -170,7 +228,8 @@ export const evalCommand = new Command('eval') async function launchPromptfooView( port: number, resultsFile: string, - openBrowser: boolean + openBrowser: boolean, + spinner?: Spinner ): Promise { return new Promise((resolve, reject) => { // Use 'promptfoo view' which opens the web UI showing results from the output directory @@ -186,7 +245,11 @@ async function launchPromptfooView( // Pass the directory containing results args.push(resultsDir); - console.log(` Running: npx ${args.join(' ')}\n`); + if (spinner) { + spinner.succeed(`Promptfoo UI starting on port ${style.number(String(port))}`); + } + console.log(style.dim(` Running: npx ${args.join(' ')}`)); + console.log(''); const child = spawn('npx', args, { stdio: 'inherit', @@ -195,9 +258,10 @@ async function launchPromptfooView( child.on('error', (error) => { if ((error as NodeJS.ErrnoException).code === 'ENOENT') { - console.error('\n❌ Promptfoo not found.'); - console.error(' Install with: npm install -g promptfoo'); - console.error(' Or run: npx promptfoo --version\n'); + console.log(formatError('Promptfoo not found.', [ + `Install with: ${style.command('npm install -g promptfoo')}`, + `Or run: ${style.command('npx promptfoo --version')}`, + ])); } else { reject(error); } @@ -225,7 +289,8 @@ async function launchPromptfooView( async function launchPromptfooUI( port: number, configPath: string, - openBrowser: boolean + openBrowser: boolean, + spinner?: Spinner ): Promise { return new Promise((resolve, reject) => { const args = ['promptfoo', 'view', '--port', String(port)]; @@ -240,7 +305,11 @@ async function launchPromptfooUI( const configDir = dirname(resolvePath(configPath)); args.push(configDir); - console.log(` Running: npx ${args.join(' ')}\n`); + if (spinner) { + spinner.succeed(`Promptfoo UI starting on port ${style.number(String(port))}`); + } + console.log(style.dim(` Running: npx ${args.join(' ')}`)); + console.log(''); const child = spawn('npx', args, { stdio: 'inherit', @@ -249,9 +318,10 @@ async function launchPromptfooUI( child.on('error', (error) => { if ((error as NodeJS.ErrnoException).code === 'ENOENT') { - console.error('\n❌ Promptfoo not found.'); - console.error(' Install with: npm install -g promptfoo'); - console.error(' Or run: npx promptfoo --version\n'); + console.log(formatError('Promptfoo not found.', [ + `Install with: ${style.command('npm install -g promptfoo')}`, + `Or run: ${style.command('npx promptfoo --version')}`, + ])); } else { reject(error); } @@ -276,7 +346,8 @@ async function launchPromptfooUI( async function runPromptfooEval( configPath: string, outputFile: string, - noCache: boolean + noCache: boolean, + spinner?: Spinner ): Promise { return new Promise((resolve, reject) => { const args = [ @@ -290,7 +361,11 @@ async function runPromptfooEval( args.push('--no-cache'); } - console.log(` Running: npx ${args.join(' ')}\n`); + if (spinner) { + spinner.stop(); + } + console.log(style.dim(` Running: npx ${args.join(' ')}`)); + console.log(''); const child = spawn('npx', args, { stdio: 'inherit', @@ -299,8 +374,9 @@ async function runPromptfooEval( child.on('error', (error) => { if ((error as NodeJS.ErrnoException).code === 'ENOENT') { - console.error('\n❌ Promptfoo not found.'); - console.error(' Install with: npm install -g promptfoo\n'); + console.log(formatError('Promptfoo not found.', [ + `Install with: ${style.command('npm install -g promptfoo')}`, + ])); reject(error); } else { reject(error); diff --git a/src/cli/commands/view.ts b/src/cli/commands/view.ts index 2020aaa..5fe398b 100644 --- a/src/cli/commands/view.ts +++ b/src/cli/commands/view.ts @@ -6,6 +6,7 @@ import { formatTrace, formatTraceList } from '../../observability/index.js'; +import { style, icons, formatError, nextSteps } from '../theme.js'; export const viewCommand = new Command('view') .description('View evaluation traces') @@ -19,6 +20,14 @@ export const viewCommand = new Command('view') .option('--decisions', 'Show decisions made', true) .option('-n, --limit ', 'Limit number of traces listed', '20') .option('--eval ', 'Filter traces by eval ID') + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude view')} ${style.dim('View the most recent trace')} + ${style.command('evaluclaude view --list')} ${style.dim('List all available traces')} + ${style.command('evaluclaude view abc123')} ${style.dim('View a specific trace by ID')} + ${style.command('evaluclaude view --json')} ${style.dim('Output trace as raw JSON')} + ${style.command('evaluclaude view -v')} ${style.dim('Verbose output with tool calls')} +`) .action(async (traceId: string | undefined, options) => { try { if (options.list) { @@ -26,16 +35,19 @@ export const viewCommand = new Command('view') const limited = traces.slice(0, parseInt(options.limit, 10)); if (traces.length === 0) { - console.log('\nNo traces found.'); - console.log('Run `evaluclaude run` to generate traces.\n'); + console.log(`\n${style.warning(`${icons.warning} No traces found.`)}`); + console.log(nextSteps([ + { command: 'evaluclaude run', description: 'Run evals to generate traces' }, + { command: 'evaluclaude pipeline .', description: 'Run full pipeline from scratch' }, + ])); return; } console.log(formatTraceList(limited)); if (traces.length > limited.length) { - console.log(`Showing ${limited.length} of ${traces.length} traces.`); - console.log(`Use --limit to see more.\n`); + console.log(style.muted(`Showing ${limited.length} of ${traces.length} traces.`)); + console.log(style.muted(`Use ${style.command('--limit')} to see more.\n`)); } return; } @@ -45,15 +57,20 @@ export const viewCommand = new Command('view') if (options.last || !traceId) { trace = await getLatestTrace(); if (!trace) { - console.log('\nNo traces found.'); - console.log('Run `evaluclaude run` to generate traces.\n'); + console.log(`\n${style.warning(`${icons.warning} No traces found.`)}`); + console.log(nextSteps([ + { command: 'evaluclaude run', description: 'Run evals to generate traces' }, + { command: 'evaluclaude pipeline .', description: 'Run full pipeline from scratch' }, + ])); return; } } else { trace = await loadTrace(traceId); if (!trace) { - console.error(`\nTrace not found: ${traceId}`); - console.log('Use `evaluclaude view --list` to see available traces.\n'); + console.log(formatError(`Trace not found: ${style.path(traceId)}`, [ + `Run ${style.command('evaluclaude view --list')} to see available traces`, + `Check that the trace ID is correct`, + ])); process.exit(1); } } @@ -68,7 +85,10 @@ export const viewCommand = new Command('view') console.log(output); } catch (error) { - console.error('Error viewing trace:', error instanceof Error ? error.message : error); + console.log(formatError( + error instanceof Error ? error.message : String(error), + ['Run evaluclaude run first to generate traces'] + )); process.exit(1); } }); @@ -77,14 +97,36 @@ export const tracesCommand = new Command('traces') .description('List all evaluation traces (alias for view --list)') .option('-n, --limit ', 'Limit number of traces', '20') .option('--eval ', 'Filter by eval ID') + .addHelpText('after', ` +${style.bold('Examples:')} + ${style.command('evaluclaude traces')} ${style.dim('List all traces')} + ${style.command('evaluclaude traces -n 50')} ${style.dim('Show up to 50 traces')} + ${style.command('evaluclaude traces --eval X')} ${style.dim('Filter by eval ID')} +`) .action(async (options) => { - const traces = await listTraces(options.eval); - const limited = traces.slice(0, parseInt(options.limit, 10)); - - if (traces.length === 0) { - console.log('\nNo traces found.'); - return; + try { + const traces = await listTraces(options.eval); + const limited = traces.slice(0, parseInt(options.limit, 10)); + + if (traces.length === 0) { + console.log(`\n${style.warning(`${icons.warning} No traces found.`)}`); + console.log(nextSteps([ + { command: 'evaluclaude run', description: 'Run evals to generate traces' }, + ])); + return; + } + + console.log(formatTraceList(limited)); + + if (traces.length > limited.length) { + console.log(style.muted(`Showing ${limited.length} of ${traces.length} traces.`)); + console.log(style.muted(`Use ${style.command('--limit')} to see more.\n`)); + } + } catch (error) { + console.log(formatError( + error instanceof Error ? error.message : String(error), + ['Run evaluclaude run first to generate traces'] + )); + process.exit(1); } - - console.log(formatTraceList(limited)); }); diff --git a/src/cli/index.ts b/src/cli/index.ts index ffa056b..9143ccd 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -9,13 +9,43 @@ import { runCommand } from './commands/run.js'; import { viewCommand, tracesCommand } from './commands/view.js'; import { uiCommand, evalCommand } from './commands/ui.js'; import { pipelineCommand } from './commands/pipeline.js'; +import { BANNER_MINIMAL, style, welcomeMessage, icons } from './theme.js'; const program = new Command(); program .name('evaluclaude') - .description('Zero-to-evals in one command. Claude analyzes codebases and generates functional tests.') - .version('0.1.0'); + .description(`${BANNER_MINIMAL}\n\nClaude-powered functional test generation for any codebase.`) + .version('0.1.0') + .configureHelp({ + sortSubcommands: true, + subcommandTerm: (cmd) => style.command(cmd.name()) + ' ' + style.dim(cmd.usage()), + }) + .addHelpText('beforeAll', '') + .addHelpText('afterAll', ` +${style.bold('Examples:')} + + ${style.dim('# Run the full pipeline on current directory')} + $ evaluclaude pipeline . + + ${style.dim('# Analyze a Python project interactively')} + $ evaluclaude analyze ./my-project -i -o spec.json + + ${style.dim('# Generate and run tests')} + $ evaluclaude render spec.json && evaluclaude run + + ${style.dim('# View results in browser')} + $ evaluclaude run --export-promptfoo && evaluclaude ui + +${style.muted('For more info, run any command with --help')} +`); + +// Add welcome command for first-time users +const welcomeCmd = new Command('welcome') + .description('Show welcome message and quick start guide') + .action(() => { + console.log(welcomeMessage()); + }); // Core pipeline command - the "zero to evals" experience program.addCommand(pipelineCommand); @@ -39,4 +69,16 @@ program.addCommand(tracesCommand); program.addCommand(uiCommand); program.addCommand(evalCommand); +// Utility commands +program.addCommand(welcomeCmd); + +// Show welcome on no args if first time (check for .evaluclaude directory) +if (process.argv.length === 2) { + const fs = await import('fs'); + if (!fs.existsSync('.evaluclaude')) { + console.log(welcomeMessage()); + process.exit(0); + } +} + program.parse(process.argv); diff --git a/src/cli/theme.ts b/src/cli/theme.ts new file mode 100644 index 0000000..8db04ef --- /dev/null +++ b/src/cli/theme.ts @@ -0,0 +1,357 @@ +/** + * Evaluclaude CLI Theme + * Consistent styling, colors, and formatting for a beautiful CLI experience + */ + +// ANSI color codes +const colors = { + reset: '\x1b[0m', + bold: '\x1b[1m', + dim: '\x1b[2m', + italic: '\x1b[3m', + underline: '\x1b[4m', + + // Foreground colors + black: '\x1b[30m', + red: '\x1b[31m', + green: '\x1b[32m', + yellow: '\x1b[33m', + blue: '\x1b[34m', + magenta: '\x1b[35m', + cyan: '\x1b[36m', + white: '\x1b[37m', + + // Bright foreground colors + brightBlack: '\x1b[90m', + brightRed: '\x1b[91m', + brightGreen: '\x1b[92m', + brightYellow: '\x1b[93m', + brightBlue: '\x1b[94m', + brightMagenta: '\x1b[95m', + brightCyan: '\x1b[96m', + brightWhite: '\x1b[97m', + + // Background colors + bgBlack: '\x1b[40m', + bgRed: '\x1b[41m', + bgGreen: '\x1b[42m', + bgYellow: '\x1b[43m', + bgBlue: '\x1b[44m', + bgMagenta: '\x1b[45m', + bgCyan: '\x1b[46m', + bgWhite: '\x1b[47m', +}; + +// Semantic color helpers +export const style = { + // Text styles + bold: (text: string) => `${colors.bold}${text}${colors.reset}`, + dim: (text: string) => `${colors.dim}${text}${colors.reset}`, + italic: (text: string) => `${colors.italic}${text}${colors.reset}`, + + // Semantic colors + success: (text: string) => `${colors.green}${text}${colors.reset}`, + error: (text: string) => `${colors.red}${text}${colors.reset}`, + warning: (text: string) => `${colors.yellow}${text}${colors.reset}`, + info: (text: string) => `${colors.cyan}${text}${colors.reset}`, + highlight: (text: string) => `${colors.brightMagenta}${text}${colors.reset}`, + muted: (text: string) => `${colors.brightBlack}${text}${colors.reset}`, + + // Accent colors + primary: (text: string) => `${colors.brightCyan}${text}${colors.reset}`, + secondary: (text: string) => `${colors.brightBlue}${text}${colors.reset}`, + accent: (text: string) => `${colors.brightMagenta}${text}${colors.reset}`, + + // Special combinations + command: (text: string) => `${colors.bold}${colors.cyan}${text}${colors.reset}`, + path: (text: string) => `${colors.brightBlue}${text}${colors.reset}`, + number: (text: string) => `${colors.brightYellow}${text}${colors.reset}`, + label: (text: string) => `${colors.dim}${text}${colors.reset}`, +}; + +// Icons for consistent visual language +export const icons = { + // Status + success: '✓', + error: '✗', + warning: '⚠', + info: 'ℹ', + pending: '○', + running: '◐', + + // Actions + arrow: '→', + arrowRight: '▸', + bullet: '•', + check: '✓', + cross: '✗', + + // Objects + folder: '📁', + file: '📄', + code: '💻', + test: '🧪', + spec: '📋', + trace: '📊', + + // Process + rocket: '🚀', + gear: '⚙', + magnify: '🔍', + brain: '🧠', + lightning: '⚡', + sparkle: '✨', + + // Results + passed: '✅', + failed: '❌', + skipped: '⏭️', + + // Categories + python: '🐍', + typescript: '📘', + javascript: '📙', +}; + +// Box drawing characters +export const box = { + topLeft: '╭', + topRight: '╮', + bottomLeft: '╰', + bottomRight: '╯', + horizontal: '─', + vertical: '│', + tLeft: '├', + tRight: '┤', + cross: '┼', + + // Double lines + dHorizontal: '═', + dVertical: '║', + dTopLeft: '╔', + dTopRight: '╗', + dBottomLeft: '╚', + dBottomRight: '╝', +}; + +// Banner and branding +export const BANNER = ` +${style.primary(' ╔═══════════════════════════════════════════════════════╗')} +${style.primary(' ║')} ${style.bold(style.accent('evaluclaude'))}${style.muted(' · zero-to-evals in one command')} ${style.primary('║')} +${style.primary(' ╚═══════════════════════════════════════════════════════╝')} +`; + +export const BANNER_MINIMAL = `${style.accent('evaluclaude')} ${style.muted('·')} ${style.dim('zero-to-evals in one command')}`; + +// Common output formatters +export function header(title: string): string { + const width = 60; + const padding = Math.max(0, width - title.length - 4); + return `\n${style.primary(box.dHorizontal.repeat(width))} +${style.bold(title)} +${style.primary(box.dHorizontal.repeat(width))}\n`; +} + +export function subheader(title: string): string { + return `\n${style.bold(title)}\n${style.dim(box.horizontal.repeat(40))}`; +} + +export function section(title: string): string { + return `\n${style.dim(box.horizontal.repeat(4))} ${style.bold(title)} ${style.dim(box.horizontal.repeat(Math.max(0, 34 - title.length)))}`; +} + +export function keyValue(key: string, value: string | number, indent = 0): string { + const pad = ' '.repeat(indent); + return `${pad}${style.label(key + ':')} ${value}`; +} + +export function bullet(text: string, indent = 0): string { + const pad = ' '.repeat(indent); + return `${pad}${style.dim(icons.bullet)} ${text}`; +} + +export function step(num: number, text: string, status: 'pending' | 'running' | 'done' | 'error' = 'pending'): string { + const statusIcon = { + pending: style.dim(`${num}.`), + running: style.info(`${icons.running}`), + done: style.success(icons.success), + error: style.error(icons.error), + }[status]; + + return ` ${statusIcon} ${status === 'done' ? style.muted(text) : text}`; +} + +export function progressBar(current: number, total: number, width = 30): string { + const percentage = Math.round((current / total) * 100); + const filled = Math.round((current / total) * width); + const empty = width - filled; + + const bar = style.success('█'.repeat(filled)) + style.dim('░'.repeat(empty)); + return `${bar} ${style.muted(`${percentage}%`)}`; +} + +export function table(rows: string[][]): string { + if (rows.length === 0) return ''; + + const colWidths = rows[0].map((_, i) => + Math.max(...rows.map(row => (row[i] || '').length)) + ); + + return rows.map(row => + row.map((cell, i) => cell.padEnd(colWidths[i])).join(' ') + ).join('\n'); +} + +// Spinner for async operations +export class Spinner { + private frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']; + private frameIndex = 0; + private intervalId: NodeJS.Timeout | null = null; + private text: string; + + constructor(text: string) { + this.text = text; + } + + start(): void { + process.stdout.write('\x1b[?25l'); // Hide cursor + this.render(); + this.intervalId = setInterval(() => { + this.frameIndex = (this.frameIndex + 1) % this.frames.length; + this.render(); + }, 80); + } + + private render(): void { + process.stdout.write(`\r${style.info(this.frames[this.frameIndex])} ${this.text}`); + } + + update(text: string): void { + this.text = text; + this.render(); + } + + succeed(text?: string): void { + this.stop(); + console.log(`\r${style.success(icons.success)} ${text || this.text}`); + } + + fail(text?: string): void { + this.stop(); + console.log(`\r${style.error(icons.error)} ${text || this.text}`); + } + + warn(text?: string): void { + this.stop(); + console.log(`\r${style.warning(icons.warning)} ${text || this.text}`); + } + + stop(): void { + if (this.intervalId) { + clearInterval(this.intervalId); + this.intervalId = null; + } + process.stdout.write('\x1b[?25h'); // Show cursor + process.stdout.write('\r' + ' '.repeat(80) + '\r'); // Clear line + } +} + +// Result summary box +export function resultBox(results: { passed: number; failed: number; skipped?: number; duration?: number }): string { + const { passed, failed, skipped = 0, duration } = results; + const total = passed + failed + skipped; + const lines: string[] = []; + + lines.push(style.primary(` ${box.topLeft}${box.horizontal.repeat(38)}${box.topRight}`)); + lines.push(style.primary(` ${box.vertical}`) + ' '.repeat(38) + style.primary(box.vertical)); + lines.push(style.primary(` ${box.vertical}`) + ` ${style.bold('Test Results')}`.padEnd(45) + style.primary(box.vertical)); + lines.push(style.primary(` ${box.vertical}`) + ' '.repeat(38) + style.primary(box.vertical)); + lines.push(style.primary(` ${box.vertical}`) + ` ${style.success(icons.passed)} Passed: ${String(passed).padStart(4)}`.padEnd(45) + style.primary(box.vertical)); + lines.push(style.primary(` ${box.vertical}`) + ` ${style.error(icons.failed)} Failed: ${String(failed).padStart(4)}`.padEnd(45) + style.primary(box.vertical)); + + if (skipped > 0) { + lines.push(style.primary(` ${box.vertical}`) + ` ${icons.skipped} Skipped: ${String(skipped).padStart(4)}`.padEnd(42) + style.primary(box.vertical)); + } + + lines.push(style.primary(` ${box.vertical}`) + style.dim(` ${'─'.repeat(20)}`).padEnd(45) + style.primary(box.vertical)); + lines.push(style.primary(` ${box.vertical}`) + ` Total: ${String(total).padStart(4)}`.padEnd(45) + style.primary(box.vertical)); + + if (duration !== undefined) { + lines.push(style.primary(` ${box.vertical}`) + ` Duration: ${formatDuration(duration)}`.padEnd(45) + style.primary(box.vertical)); + } + + lines.push(style.primary(` ${box.vertical}`) + ' '.repeat(38) + style.primary(box.vertical)); + lines.push(style.primary(` ${box.bottomLeft}${box.horizontal.repeat(38)}${box.bottomRight}`)); + + return lines.join('\n'); +} + +export function formatDuration(ms: number): string { + if (ms < 1000) return `${ms}ms`; + if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`; + const minutes = Math.floor(ms / 60000); + const seconds = Math.floor((ms % 60000) / 1000); + return `${minutes}m ${seconds}s`; +} + +// Error formatting with suggestions +export function formatError(message: string, suggestions?: string[]): string { + const lines: string[] = []; + lines.push(`\n${style.error(`${icons.error} Error:`)} ${message}`); + + if (suggestions && suggestions.length > 0) { + lines.push(''); + lines.push(style.dim(' Suggestions:')); + for (const suggestion of suggestions) { + lines.push(` ${style.dim(icons.arrowRight)} ${suggestion}`); + } + } + + lines.push(''); + return lines.join('\n'); +} + +// Command examples helper +export function commandExample(command: string, description?: string): string { + if (description) { + return ` ${style.command(command)} ${style.dim(description)}`; + } + return ` ${style.command(command)}`; +} + +// Next steps helper +export function nextSteps(steps: { command: string; description: string }[]): string { + const lines: string[] = []; + lines.push(`\n${style.bold('Next steps:')}`); + + for (const step of steps) { + lines.push(commandExample(step.command, step.description)); + } + + lines.push(''); + return lines.join('\n'); +} + +// Welcome message for first-time users +export function welcomeMessage(): string { + return ` +${BANNER} + +${style.bold('Welcome to evaluclaude!')} ${icons.sparkle} + +Generate functional tests for any codebase with the power of Claude. + +${style.bold('Quick Start:')} + + ${style.command('evaluclaude pipeline .')} ${style.dim('Full pipeline: analyze → render → run')} + ${style.command('evaluclaude intro .')} ${style.dim('Introspect codebase structure')} + ${style.command('evaluclaude analyze .')} ${style.dim('Generate EvalSpec with Claude')} + +${style.bold('Learn More:')} + + ${style.command('evaluclaude --help')} ${style.dim('Show all commands')} + ${style.command('evaluclaude --help')} ${style.dim('Help for specific command')} + +${style.muted('Documentation: https://github.com/harivansh-afk/evaluclaude-harness')} +`; +} diff --git a/src/observability/trace-viewer.ts b/src/observability/trace-viewer.ts index 553081d..3a0977e 100644 --- a/src/observability/trace-viewer.ts +++ b/src/observability/trace-viewer.ts @@ -1,5 +1,47 @@ import type { EvalTrace, ToolCall, Question, Decision, TestFailure } from './types.js'; +// ANSI color codes for terminal styling +const colors = { + reset: '\x1b[0m', + bold: '\x1b[1m', + dim: '\x1b[2m', + green: '\x1b[32m', + red: '\x1b[31m', + yellow: '\x1b[33m', + cyan: '\x1b[36m', + magenta: '\x1b[35m', + blue: '\x1b[34m', + brightBlack: '\x1b[90m', + brightCyan: '\x1b[96m', + brightMagenta: '\x1b[95m', + brightYellow: '\x1b[93m', +}; + +const s = { + bold: (t: string) => `${colors.bold}${t}${colors.reset}`, + dim: (t: string) => `${colors.dim}${t}${colors.reset}`, + success: (t: string) => `${colors.green}${t}${colors.reset}`, + error: (t: string) => `${colors.red}${t}${colors.reset}`, + warning: (t: string) => `${colors.yellow}${t}${colors.reset}`, + info: (t: string) => `${colors.cyan}${t}${colors.reset}`, + highlight: (t: string) => `${colors.brightMagenta}${t}${colors.reset}`, + muted: (t: string) => `${colors.brightBlack}${t}${colors.reset}`, + number: (t: string) => `${colors.brightYellow}${t}${colors.reset}`, + primary: (t: string) => `${colors.brightCyan}${t}${colors.reset}`, +}; + +const box = { + horizontal: '─', + dHorizontal: '═', + topLeft: '╭', + topRight: '╮', + bottomLeft: '╰', + bottomRight: '╯', + vertical: '│', + tLeft: '├', + tRight: '┤', +}; + export interface ViewOptions { json: boolean; verbose: boolean; @@ -24,118 +66,129 @@ export function formatTrace(trace: EvalTrace, options: Partial = {} } const lines: string[] = []; + const w = 60; + // Header lines.push(''); - lines.push('═'.repeat(60)); - lines.push(`📊 Trace: ${trace.id}`); - lines.push('═'.repeat(60)); + lines.push(s.primary(box.dHorizontal.repeat(w))); + lines.push(` 📊 ${s.bold('Trace')} ${s.muted(trace.id)}`); + lines.push(s.primary(box.dHorizontal.repeat(w))); lines.push(''); - lines.push(` Status: ${formatStatus(trace.status)}`); - lines.push(` Started: ${formatDate(trace.startedAt)}`); - lines.push(` Duration: ${formatDuration(trace.duration)}`); - lines.push(` Eval ID: ${trace.evalId}`); + // Overview + lines.push(` ${s.dim('Status:')} ${formatStatus(trace.status)}`); + lines.push(` ${s.dim('Started:')} ${s.muted(formatDate(trace.startedAt))}`); + lines.push(` ${s.dim('Duration:')} ${s.number(formatDuration(trace.duration))}`); + lines.push(` ${s.dim('Eval ID:')} ${s.muted(trace.evalId)}`); lines.push(''); - lines.push('📂 Introspection'); - lines.push('─'.repeat(40)); - lines.push(` Files analyzed: ${trace.introspection.filesAnalyzed.length}`); - lines.push(` Functions found: ${trace.introspection.totalFunctions}`); - lines.push(` Classes found: ${trace.introspection.totalClasses}`); - lines.push(` Duration: ${formatDuration(trace.introspection.duration)}`); + // Introspection section + lines.push(sectionHeader('📂 Introspection')); + lines.push(kv('Files analyzed', s.number(String(trace.introspection.filesAnalyzed.length)))); + lines.push(kv('Functions found', s.number(String(trace.introspection.totalFunctions)))); + lines.push(kv('Classes found', s.number(String(trace.introspection.totalClasses)))); + lines.push(kv('Duration', s.number(formatDuration(trace.introspection.duration)))); lines.push(''); - lines.push('🤖 Analysis'); - lines.push('─'.repeat(40)); - lines.push(` Tool calls: ${trace.analysis.toolCalls.length}`); - lines.push(` Questions asked: ${trace.analysis.questionsAsked.length}`); - lines.push(` Decisions made: ${trace.analysis.decisions.length}`); - lines.push(` Prompt tokens: ${trace.analysis.promptTokens.toLocaleString()}`); - lines.push(` Completion tokens: ${trace.analysis.completionTokens.toLocaleString()}`); + // Analysis section + lines.push(sectionHeader('🧠 Analysis')); + lines.push(kv('Tool calls', s.number(String(trace.analysis.toolCalls.length)))); + lines.push(kv('Questions asked', s.number(String(trace.analysis.questionsAsked.length)))); + lines.push(kv('Decisions made', s.number(String(trace.analysis.decisions.length)))); + lines.push(kv('Prompt tokens', s.number(trace.analysis.promptTokens.toLocaleString()))); + lines.push(kv('Completion tokens', s.number(trace.analysis.completionTokens.toLocaleString()))); lines.push(''); - lines.push('📝 Generation'); - lines.push('─'.repeat(40)); - lines.push(` Scenarios: ${trace.generation.scenariosGenerated}`); - lines.push(` Files written: ${trace.generation.filesWritten.length}`); + // Generation section + lines.push(sectionHeader('📝 Generation')); + lines.push(kv('Scenarios', s.number(String(trace.generation.scenariosGenerated)))); + lines.push(kv('Files written', s.number(String(trace.generation.filesWritten.length)))); lines.push(''); - lines.push('🧪 Execution'); - lines.push('─'.repeat(40)); - lines.push(` ✅ Passed: ${trace.execution.testsPassed}`); - lines.push(` ❌ Failed: ${trace.execution.testsFailed}`); - lines.push(` ⏭️ Skipped: ${trace.execution.testsSkipped ?? 0}`); + // Execution section + lines.push(sectionHeader('🧪 Execution')); + lines.push(` ${s.success('✓')} Passed: ${s.success(String(trace.execution.testsPassed))}`); + lines.push(` ${s.error('✗')} Failed: ${s.error(String(trace.execution.testsFailed))}`); + lines.push(` ${s.muted('○')} Skipped: ${s.muted(String(trace.execution.testsSkipped ?? 0))}`); lines.push(''); + // Questions section if (opts.showQuestions && trace.analysis.questionsAsked.length > 0) { - lines.push('❓ Questions Asked'); - lines.push('─'.repeat(40)); + lines.push(sectionHeader('❓ Questions Asked')); for (const q of trace.analysis.questionsAsked) { lines.push(formatQuestion(q)); } lines.push(''); } + // Decisions section if (opts.showDecisions && trace.analysis.decisions.length > 0) { - lines.push('🎯 Key Decisions'); - lines.push('─'.repeat(40)); + lines.push(sectionHeader('🎯 Key Decisions')); for (const d of trace.analysis.decisions.slice(0, 10)) { lines.push(formatDecision(d)); } if (trace.analysis.decisions.length > 10) { - lines.push(` ... and ${trace.analysis.decisions.length - 10} more`); + lines.push(` ${s.dim(`... and ${trace.analysis.decisions.length - 10} more`)}`); } lines.push(''); } + // Tool calls section if (opts.showToolCalls && trace.analysis.toolCalls.length > 0) { - lines.push('🔧 Tool Calls'); - lines.push('─'.repeat(40)); + lines.push(sectionHeader('🔧 Tool Calls')); for (const tc of trace.analysis.toolCalls.slice(0, 20)) { lines.push(formatToolCall(tc, opts.verbose)); } if (trace.analysis.toolCalls.length > 20) { - lines.push(` ... and ${trace.analysis.toolCalls.length - 20} more`); + lines.push(` ${s.dim(`... and ${trace.analysis.toolCalls.length - 20} more`)}`); } lines.push(''); } + // Test failures section if (trace.execution.failures.length > 0) { - lines.push('❌ Test Failures'); - lines.push('─'.repeat(40)); + lines.push(sectionHeader('❌ Test Failures')); for (const f of trace.execution.failures) { lines.push(formatFailure(f)); } lines.push(''); } + // Errors section if (trace.errors.length > 0) { - lines.push('⚠️ Errors'); - lines.push('─'.repeat(40)); + lines.push(sectionHeader('⚠️ Errors')); for (const e of trace.errors) { - lines.push(` [${formatDate(e.timestamp)}]`); - lines.push(` ${e.message}`); + lines.push(` ${s.dim('[')}${s.muted(formatDate(e.timestamp))}${s.dim(']')}`); + lines.push(` ${s.error(e.message)}`); if (e.context) { - lines.push(` Context: ${e.context}`); + lines.push(` ${s.dim('Context:')} ${e.context}`); } lines.push(''); } } - lines.push('═'.repeat(60)); + lines.push(s.primary(box.dHorizontal.repeat(w))); lines.push(''); return lines.join('\n'); } +function sectionHeader(title: string): string { + return `${s.dim(box.horizontal.repeat(3))} ${s.bold(title)} ${s.dim(box.horizontal.repeat(Math.max(0, 35 - title.length)))}`; +} + +function kv(key: string, value: string): string { + return ` ${s.dim(key + ':')} ${value}`; +} + function formatStatus(status: EvalTrace['status']): string { switch (status) { case 'success': - return '✅ Success'; + return s.success('✓ Success'); case 'partial': - return '⚠️ Partial'; + return s.warning('⚠ Partial'); case 'failed': - return '❌ Failed'; + return s.error('✗ Failed'); default: return status; } @@ -146,12 +199,8 @@ function formatDate(iso: string): string { } function formatDuration(ms: number): string { - if (ms < 1000) { - return `${ms}ms`; - } - if (ms < 60000) { - return `${(ms / 1000).toFixed(1)}s`; - } + if (ms < 1000) return `${ms}ms`; + if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`; const minutes = Math.floor(ms / 60000); const seconds = ((ms % 60000) / 1000).toFixed(0); return `${minutes}m ${seconds}s`; @@ -159,37 +208,39 @@ function formatDuration(ms: number): string { function formatQuestion(q: Question): string { const lines: string[] = []; - lines.push(` Q: ${q.question}`); + lines.push(` ${s.highlight('Q:')} ${q.question}`); if (q.answer) { - lines.push(` A: ${q.answer}`); + lines.push(` ${s.info('A:')} ${q.answer}`); } else { - lines.push(` A: (no answer)`); + lines.push(` ${s.dim('A: (no answer)')}`); } lines.push(''); return lines.join('\n'); } function formatDecision(d: Decision): string { - const icon = d.type === 'include' ? '✓' : d.type === 'exclude' ? '✗' : '→'; - return ` ${icon} [${d.type}] ${d.subject}\n Reason: ${d.reasoning}\n Confidence: ${(d.confidence * 100).toFixed(0)}%\n`; + const icon = d.type === 'include' ? s.success('✓') : d.type === 'exclude' ? s.error('✗') : s.info('→'); + const conf = (d.confidence * 100).toFixed(0); + return ` ${icon} ${s.dim(`[${d.type}]`)} ${d.subject}\n ${s.dim('Reason:')} ${d.reasoning}\n ${s.dim('Confidence:')} ${s.number(conf + '%')}\n`; } function formatToolCall(tc: ToolCall, verbose: boolean): string { const duration = formatDuration(tc.duration); if (verbose) { - return ` [${tc.tool}] (${duration})\n Input: ${JSON.stringify(tc.input).slice(0, 100)}...\n`; + const input = JSON.stringify(tc.input).slice(0, 100); + return ` ${s.info(tc.tool)} ${s.dim(`(${duration})`)}\n ${s.dim('Input:')} ${input}...\n`; } - return ` ${tc.tool} (${duration})`; + return ` ${s.info(tc.tool)} ${s.dim(`(${duration})`)}`; } function formatFailure(f: TestFailure): string { const lines: string[] = []; - lines.push(` • ${f.testName}`); - lines.push(` Scenario: ${f.scenarioId}`); - lines.push(` Error: ${f.error}`); + lines.push(` ${s.error('•')} ${s.bold(f.testName)}`); + lines.push(` ${s.dim('Scenario:')} ${f.scenarioId}`); + lines.push(` ${s.dim('Error:')} ${s.error(f.error)}`); if (f.expected !== undefined && f.actual !== undefined) { - lines.push(` Expected: ${JSON.stringify(f.expected)}`); - lines.push(` Actual: ${JSON.stringify(f.actual)}`); + lines.push(` ${s.dim('Expected:')} ${s.success(JSON.stringify(f.expected))}`); + lines.push(` ${s.dim('Actual:')} ${s.error(JSON.stringify(f.actual))}`); } lines.push(''); return lines.join('\n'); @@ -206,21 +257,43 @@ export function formatTraceList(traces: Array<{ const lines: string[] = []; lines.push(''); - lines.push('📋 Recent Traces'); - lines.push('═'.repeat(80)); + lines.push(` ${s.bold('📋 Recent Traces')}`); + lines.push(s.primary(` ${box.dHorizontal.repeat(76)}`)); lines.push(''); - lines.push('ID Status Passed Failed Duration'); - lines.push('─'.repeat(80)); + + // Header row + const hId = s.dim('ID'.padEnd(38)); + const hStatus = s.dim('Status'.padEnd(10)); + const hPassed = s.dim('Passed'.padStart(8)); + const hFailed = s.dim('Failed'.padStart(8)); + const hDuration = s.dim('Duration'.padStart(10)); + lines.push(` ${hId}${hStatus}${hPassed}${hFailed}${hDuration}`); + lines.push(s.dim(` ${box.horizontal.repeat(76)}`)); for (const t of traces) { - const statusIcon = t.status === 'success' ? '✅' : t.status === 'partial' ? '⚠️ ' : '❌'; - const id = t.id.slice(0, 36); - const passed = String(t.testsPassed).padStart(6); - const failed = String(t.testsFailed).padStart(6); - const duration = formatDuration(t.duration).padStart(8); - lines.push(`${id} ${statusIcon} ${passed} ${failed} ${duration}`); + const id = s.muted(t.id.slice(0, 36).padEnd(38)); + + let statusIcon: string; + if (t.status === 'success') { + statusIcon = s.success('✓ Pass'.padEnd(10)); + } else if (t.status === 'partial') { + statusIcon = s.warning('⚠ Partial'.padEnd(10)); + } else { + statusIcon = s.error('✗ Fail'.padEnd(10)); + } + + const passed = s.success(String(t.testsPassed).padStart(8)); + const failed = t.testsFailed > 0 + ? s.error(String(t.testsFailed).padStart(8)) + : s.dim(String(t.testsFailed).padStart(8)); + const duration = s.number(formatDuration(t.duration).padStart(10)); + + lines.push(` ${id}${statusIcon}${passed}${failed}${duration}`); } lines.push(''); + lines.push(` ${s.dim('View a trace:')} ${s.info('evaluclaude view ')}`); + lines.push(''); + return lines.join('\n'); }