mirror of
https://github.com/harivansh-afk/evaluclaude-harness.git
synced 2026-04-15 05:02:09 +00:00
ui polish
This commit is contained in:
parent
ff5300f4e0
commit
69c08c9d6b
12 changed files with 1430 additions and 308 deletions
202
README.md
Normal file
202
README.md
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
# evaluclaude
|
||||
|
||||
> **Zero-to-evals in one command.** Claude analyzes your codebase and generates functional tests.
|
||||
|
||||

|
||||

|
||||

|
||||
|
||||
## What is this?
|
||||
|
||||
**evaluclaude** is a CLI tool that uses Claude to understand your codebase and generate real, runnable functional tests. Unlike traditional test generators that produce boilerplate, evaluclaude:
|
||||
|
||||
- **Parses your code** with tree-sitter (no LLM tokens wasted on structure)
|
||||
- **Asks smart questions** to understand your testing priorities
|
||||
- **Generates specs, not code** — deterministic renderers create the actual tests
|
||||
- **Full observability** — every run produces a trace you can inspect
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Install
|
||||
npm install -g evaluclaude-harness
|
||||
|
||||
# Run the full pipeline
|
||||
evaluclaude pipeline .
|
||||
|
||||
# Or step by step
|
||||
evaluclaude intro . # Introspect codebase
|
||||
evaluclaude analyze . -o spec.json -i # Generate spec (interactive)
|
||||
evaluclaude render spec.json # Create test files
|
||||
evaluclaude run # Execute tests
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────┐
|
||||
│ evaluclaude pipeline │
|
||||
├─────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ 1. INTROSPECT Parse code with tree-sitter │
|
||||
│ 📂 → 📋 Extract functions, classes │
|
||||
│ │
|
||||
│ 2. ANALYZE Claude generates EvalSpec │
|
||||
│ 📋 → 🧠 Asks clarifying questions │
|
||||
│ │
|
||||
│ 3. RENDER Deterministic code generation │
|
||||
│ 🧠 → 📄 pytest / vitest / jest │
|
||||
│ │
|
||||
│ 4. RUN Execute in sandbox │
|
||||
│ 📄 → 🧪 Collect results + traces │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Commands
|
||||
|
||||
### Core Pipeline
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `pipeline [path]` | Run the full pipeline: introspect → analyze → render → run |
|
||||
| `intro [path]` | Introspect codebase with tree-sitter |
|
||||
| `analyze [path]` | Generate EvalSpec with Claude |
|
||||
| `render <spec>` | Render EvalSpec to test files |
|
||||
| `run [test-dir]` | Execute tests and collect results |
|
||||
|
||||
### Grading & Rubrics
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `grade <input>` | Grade output using LLM rubric |
|
||||
| `rubrics` | List available rubrics |
|
||||
| `calibrate` | Calibrate rubric against examples |
|
||||
|
||||
### Observability
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `view [trace-id]` | View trace details |
|
||||
| `traces` | List all traces |
|
||||
| `ui` | Launch Promptfoo dashboard |
|
||||
| `eval` | Run Promptfoo evaluations |
|
||||
|
||||
## Examples
|
||||
|
||||
### Analyze a Python project interactively
|
||||
|
||||
```bash
|
||||
evaluclaude analyze ./my-python-project -i -o spec.json
|
||||
```
|
||||
|
||||
Claude will ask questions like:
|
||||
- "I see 3 database models. Which is the core domain object?"
|
||||
- "Found 47 utility functions. Want me to prioritize the most-used ones?"
|
||||
|
||||
### Focus on specific modules
|
||||
|
||||
```bash
|
||||
evaluclaude pipeline . --focus auth,payments --max-scenarios 20
|
||||
```
|
||||
|
||||
### View test results in browser
|
||||
|
||||
```bash
|
||||
evaluclaude run --export-promptfoo
|
||||
evaluclaude ui
|
||||
```
|
||||
|
||||
### Skip steps in the pipeline
|
||||
|
||||
```bash
|
||||
# Use existing spec, just run tests
|
||||
evaluclaude pipeline . --skip-analyze --skip-render
|
||||
|
||||
# Generate tests without running
|
||||
evaluclaude pipeline . --skip-run
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `ANTHROPIC_API_KEY` | Your Anthropic API key |
|
||||
|
||||
### Output Structure
|
||||
|
||||
```
|
||||
.evaluclaude/
|
||||
├── spec.json # Generated EvalSpec
|
||||
├── traces/ # Execution traces
|
||||
│ └── trace-xxx.json
|
||||
├── results/ # Test results
|
||||
│ └── run-xxx.json
|
||||
└── promptfooconfig.yaml # Promptfoo config (with --promptfoo)
|
||||
```
|
||||
|
||||
## Rubrics
|
||||
|
||||
Create custom grading rubrics in YAML:
|
||||
|
||||
```yaml
|
||||
# rubrics/my-rubric.yaml
|
||||
name: my-rubric
|
||||
description: Custom quality checks
|
||||
passingThreshold: 0.7
|
||||
|
||||
criteria:
|
||||
- name: correctness
|
||||
description: Code produces correct results
|
||||
weight: 0.5
|
||||
- name: clarity
|
||||
description: Code is clear and readable
|
||||
weight: 0.3
|
||||
- name: efficiency
|
||||
description: Code is reasonably efficient
|
||||
weight: 0.2
|
||||
```
|
||||
|
||||
Use it:
|
||||
```bash
|
||||
evaluclaude grade output.txt -r my-rubric
|
||||
```
|
||||
|
||||
## Architecture
|
||||
|
||||
evaluclaude follows key principles:
|
||||
|
||||
1. **Tree-sitter for introspection** — Never send raw code to Claude for structure extraction
|
||||
2. **Claude generates specs, not code** — EvalSpec JSON is LLM output; test code is deterministic
|
||||
3. **Functional tests only** — Every test must invoke actual code, no syntax checks
|
||||
4. **Full observability** — Every eval run produces an inspectable trace
|
||||
|
||||
## Supported Languages
|
||||
|
||||
| Language | Parser | Test Framework |
|
||||
|----------|--------|----------------|
|
||||
| Python | tree-sitter-python | pytest |
|
||||
| TypeScript | tree-sitter-typescript | vitest, jest |
|
||||
| JavaScript | tree-sitter-typescript | vitest, jest |
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
# Build
|
||||
npm run build
|
||||
|
||||
# Run in dev mode
|
||||
npm run dev
|
||||
|
||||
# Run tests
|
||||
npm test
|
||||
|
||||
# Type check
|
||||
npm run typecheck
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
|
|
@ -3,6 +3,18 @@ import * as path from 'node:path';
|
|||
import * as fs from 'node:fs/promises';
|
||||
import { analyze } from '../../introspector/index.js';
|
||||
import { generateEvalSpec, generateEvalSpecInteractive } from '../../analyzer/index.js';
|
||||
import {
|
||||
style,
|
||||
icons,
|
||||
header,
|
||||
step,
|
||||
keyValue,
|
||||
Spinner,
|
||||
formatError,
|
||||
nextSteps,
|
||||
box,
|
||||
BANNER_MINIMAL,
|
||||
} from '../theme.js';
|
||||
|
||||
interface StructuredQuestion {
|
||||
questions: {
|
||||
|
|
@ -19,7 +31,6 @@ interface StructuredQuestion {
|
|||
async function handleQuestion(questionData: string): Promise<string> {
|
||||
const { default: inquirer } = await import('inquirer');
|
||||
|
||||
// Try to parse as structured question
|
||||
let parsed: StructuredQuestion | null = null;
|
||||
try {
|
||||
parsed = JSON.parse(questionData);
|
||||
|
|
@ -31,29 +42,27 @@ async function handleQuestion(questionData: string): Promise<string> {
|
|||
const answers: string[] = [];
|
||||
|
||||
for (const q of parsed.questions) {
|
||||
console.log(`\n🤖 ${q.header || 'Question'}:\n`);
|
||||
console.log(`\n${style.highlight(icons.brain)} ${style.bold(q.header || 'Question')}:\n`);
|
||||
|
||||
if (q.options && q.options.length > 0) {
|
||||
// Render as selection
|
||||
const choices = q.options.map(opt => ({
|
||||
name: opt.description ? `${opt.label} - ${opt.description}` : opt.label,
|
||||
name: opt.description ? `${style.bold(opt.label)} ${style.dim('─')} ${opt.description}` : opt.label,
|
||||
value: opt.label,
|
||||
}));
|
||||
|
||||
const { selection } = await inquirer.prompt([{
|
||||
type: q.multiSelect ? 'checkbox' : 'list',
|
||||
name: 'selection',
|
||||
message: q.question,
|
||||
message: style.info(q.question),
|
||||
choices,
|
||||
}]);
|
||||
|
||||
answers.push(Array.isArray(selection) ? selection.join(', ') : selection);
|
||||
} else {
|
||||
// Plain text input
|
||||
const { answer } = await inquirer.prompt([{
|
||||
type: 'input',
|
||||
name: 'answer',
|
||||
message: q.question,
|
||||
message: style.info(q.question),
|
||||
}]);
|
||||
answers.push(answer);
|
||||
}
|
||||
|
|
@ -66,7 +75,7 @@ async function handleQuestion(questionData: string): Promise<string> {
|
|||
const { answer } = await inquirer.prompt([{
|
||||
type: 'input',
|
||||
name: 'answer',
|
||||
message: `🤖 Claude asks: ${questionData}`,
|
||||
message: `${style.highlight(icons.brain)} ${style.bold('Claude asks:')} ${questionData}`,
|
||||
}]);
|
||||
|
||||
return answer;
|
||||
|
|
@ -80,20 +89,40 @@ export const analyzeCommand = new Command('analyze')
|
|||
.option('--focus <modules>', 'Comma-separated list of modules/functions to focus on')
|
||||
.option('--max-scenarios <n>', 'Maximum number of test scenarios to generate', '10')
|
||||
.option('--quiet', 'Suppress progress messages')
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude analyze .')} ${style.dim('Analyze current directory')}
|
||||
${style.command('evaluclaude analyze ./src -o spec.json')} ${style.dim('Save output to file')}
|
||||
${style.command('evaluclaude analyze . -i')} ${style.dim('Interactive mode with questions')}
|
||||
${style.command('evaluclaude analyze . --focus auth,api')} ${style.dim('Focus on specific modules')}
|
||||
${style.command('evaluclaude analyze . --max-scenarios 20')} ${style.dim('Generate more scenarios')}
|
||||
`)
|
||||
.action(async (repoPath: string, options: AnalyzeOptions) => {
|
||||
const absolutePath = path.resolve(repoPath);
|
||||
const log = options.quiet ? () => {} : console.log;
|
||||
const quiet = options.quiet;
|
||||
|
||||
log(`\n🔬 Analyzing codebase: ${absolutePath}\n`);
|
||||
if (!quiet) {
|
||||
console.log(`\n${BANNER_MINIMAL}\n`);
|
||||
console.log(header('Analyze Codebase'));
|
||||
console.log(keyValue('Path', style.path(absolutePath)));
|
||||
console.log();
|
||||
}
|
||||
|
||||
try {
|
||||
log('Step 1: Running tree-sitter introspection...');
|
||||
// Step 1: Tree-sitter introspection
|
||||
const introSpinner = quiet ? null : new Spinner('Running tree-sitter introspection...');
|
||||
introSpinner?.start();
|
||||
|
||||
const repoSummary = await analyze({
|
||||
root: absolutePath,
|
||||
onProgress: options.quiet ? undefined : (msg) => log(` ${msg}`),
|
||||
onProgress: quiet ? undefined : (msg) => introSpinner?.update(`Introspecting: ${msg}`),
|
||||
});
|
||||
|
||||
log(`\nStep 2: Generating EvalSpec with Claude...\n`);
|
||||
introSpinner?.succeed('Tree-sitter introspection complete');
|
||||
|
||||
// Step 2: Claude analysis
|
||||
const claudeSpinner = quiet ? null : new Spinner('Generating EvalSpec with Claude...');
|
||||
claudeSpinner?.start();
|
||||
|
||||
const focus = options.focus?.split(',').map(s => s.trim());
|
||||
const maxScenarios = parseInt(options.maxScenarios, 10);
|
||||
|
|
@ -101,6 +130,9 @@ export const analyzeCommand = new Command('analyze')
|
|||
let result;
|
||||
|
||||
if (options.interactive) {
|
||||
claudeSpinner?.stop();
|
||||
console.log(`\n${style.info(icons.info)} ${style.bold('Interactive mode enabled')}\n`);
|
||||
|
||||
result = await generateEvalSpecInteractive(
|
||||
repoSummary,
|
||||
handleQuestion,
|
||||
|
|
@ -112,26 +144,51 @@ export const analyzeCommand = new Command('analyze')
|
|||
focus,
|
||||
maxScenarios,
|
||||
});
|
||||
claudeSpinner?.succeed('EvalSpec generated with Claude');
|
||||
}
|
||||
|
||||
const { spec, tokensUsed, questionsAsked } = result;
|
||||
|
||||
log('\n✅ EvalSpec generated successfully!');
|
||||
log(` Scenarios: ${spec.scenarios.length}`);
|
||||
log(` Tokens used: ${tokensUsed}`);
|
||||
log(` Questions asked: ${questionsAsked}`);
|
||||
log(` Confidence: ${spec.metadata.confidence}`);
|
||||
// Results summary
|
||||
if (!quiet) {
|
||||
console.log();
|
||||
console.log(`${style.success(icons.success)} ${style.bold('EvalSpec generated successfully!')}`);
|
||||
console.log();
|
||||
console.log(` ${style.primary(box.vertical)} ${keyValue('Scenarios', style.number(String(spec.scenarios.length)))}`);
|
||||
console.log(` ${style.primary(box.vertical)} ${keyValue('Tokens used', style.number(String(tokensUsed)))}`);
|
||||
console.log(` ${style.primary(box.vertical)} ${keyValue('Questions asked', style.number(String(questionsAsked)))}`);
|
||||
console.log(` ${style.primary(box.vertical)} ${keyValue('Confidence', style.highlight(spec.metadata.confidence))}`);
|
||||
}
|
||||
|
||||
const json = JSON.stringify(spec, null, 2);
|
||||
|
||||
if (options.output) {
|
||||
await fs.writeFile(options.output, json);
|
||||
log(`\n📄 Written to: ${options.output}`);
|
||||
if (!quiet) {
|
||||
console.log();
|
||||
console.log(`${style.success(icons.success)} Written to: ${style.path(options.output)}`);
|
||||
console.log(nextSteps([
|
||||
{ command: `evaluclaude render ${options.output}`, description: 'Render tests from the spec' },
|
||||
{ command: `evaluclaude pipeline . -o ./tests`, description: 'Run the full pipeline' },
|
||||
]));
|
||||
}
|
||||
} else {
|
||||
console.log('\n' + json);
|
||||
if (!quiet) {
|
||||
console.log(nextSteps([
|
||||
{ command: 'evaluclaude analyze . -o spec.json', description: 'Save the spec to a file' },
|
||||
{ command: 'evaluclaude render spec.json', description: 'Then render tests from it' },
|
||||
]));
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('\n❌ Error:', error instanceof Error ? error.message : error);
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
console.error(formatError(message, [
|
||||
'Check that the path exists and contains source files',
|
||||
'Ensure ANTHROPIC_API_KEY is set in your environment',
|
||||
'Try running with --quiet to see raw errors',
|
||||
'Use evaluclaude intro <path> to verify introspection works',
|
||||
]));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { Command } from 'commander';
|
|||
import { readFileSync, existsSync } from 'fs';
|
||||
import { gradeWithRubric, loadAllRubrics, analyzeCalibration, calibrate } from '../../graders/index.js';
|
||||
import type { CalibrationExample } from '../../graders/types.js';
|
||||
import { style, icons, Spinner, formatError, progressBar, subheader, keyValue } from '../theme.js';
|
||||
|
||||
export const gradeCommand = new Command('grade')
|
||||
.description('Grade output using LLM rubric')
|
||||
|
|
@ -9,6 +10,12 @@ export const gradeCommand = new Command('grade')
|
|||
.option('-r, --rubric <name>', 'Rubric name or path', 'code-quality')
|
||||
.option('--rubrics-dir <dir>', 'Directory containing rubric YAML files', 'rubrics')
|
||||
.option('--json', 'Output result as JSON', false)
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude grade output.txt')} ${style.dim('Grade file with default rubric')}
|
||||
${style.command('evaluclaude grade output.txt -r safety')} ${style.dim('Use specific rubric')}
|
||||
${style.command('evaluclaude grade "inline text" --json')} ${style.dim('Grade string, output JSON')}
|
||||
`)
|
||||
.action(async (input: string, options) => {
|
||||
try {
|
||||
let content: string;
|
||||
|
|
@ -19,29 +26,48 @@ export const gradeCommand = new Command('grade')
|
|||
content = input;
|
||||
}
|
||||
|
||||
console.log(`Grading with rubric: ${options.rubric}`);
|
||||
const spinner = new Spinner(`Grading with rubric ${style.highlight(options.rubric)}...`);
|
||||
spinner.start();
|
||||
|
||||
const result = await gradeWithRubric(content, options.rubric, {
|
||||
rubricsDir: options.rubricsDir,
|
||||
});
|
||||
|
||||
if (options.json) {
|
||||
spinner.stop();
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`\n${result.pass ? '✅ PASS' : '❌ FAIL'}`);
|
||||
console.log(`Score: ${(result.score * 100).toFixed(1)}%`);
|
||||
console.log(`\nSummary: ${result.reason}`);
|
||||
if (result.pass) {
|
||||
spinner.succeed(`Graded with rubric ${style.highlight(options.rubric)}`);
|
||||
} else {
|
||||
spinner.fail(`Graded with rubric ${style.highlight(options.rubric)}`);
|
||||
}
|
||||
|
||||
console.log('\nCriterion Scores:');
|
||||
console.log();
|
||||
console.log(result.pass
|
||||
? `${style.success(icons.passed)} ${style.bold(style.success('PASS'))}`
|
||||
: `${style.error(icons.failed)} ${style.bold(style.error('FAIL'))}`);
|
||||
console.log(keyValue('Score', style.number(`${(result.score * 100).toFixed(1)}%`)));
|
||||
console.log();
|
||||
console.log(keyValue('Summary', result.reason));
|
||||
|
||||
console.log(subheader('Criterion Scores'));
|
||||
for (const cs of result.criterionScores) {
|
||||
const bar = '█'.repeat(Math.round(cs.score * 10)) + '░'.repeat(10 - Math.round(cs.score * 10));
|
||||
console.log(` ${cs.name}: ${bar} ${(cs.score * 100).toFixed(0)}%`);
|
||||
console.log(` ${cs.feedback}`);
|
||||
const bar = progressBar(cs.score, 1, 20);
|
||||
console.log(` ${style.bold(cs.name)}: ${bar}`);
|
||||
console.log(` ${style.dim(cs.feedback)}`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error grading:', error instanceof Error ? error.message : error);
|
||||
console.error(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
[
|
||||
'Check that the rubric exists in the rubrics directory',
|
||||
'Ensure ANTHROPIC_API_KEY is set',
|
||||
`Run ${style.command('evaluclaude rubrics')} to list available rubrics`,
|
||||
]
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
@ -49,26 +75,44 @@ export const gradeCommand = new Command('grade')
|
|||
export const listRubricsCommand = new Command('rubrics')
|
||||
.description('List available rubrics')
|
||||
.option('--rubrics-dir <dir>', 'Directory containing rubric YAML files', 'rubrics')
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude rubrics')} ${style.dim('List all rubrics')}
|
||||
${style.command('evaluclaude rubrics --rubrics-dir ./my-rubrics')} ${style.dim('Use custom directory')}
|
||||
`)
|
||||
.action(async (options) => {
|
||||
try {
|
||||
const rubrics = loadAllRubrics(options.rubricsDir);
|
||||
|
||||
if (rubrics.size === 0) {
|
||||
console.log(`No rubrics found in ${options.rubricsDir}`);
|
||||
console.log(formatError(
|
||||
`No rubrics found in ${style.path(options.rubricsDir)}`,
|
||||
[
|
||||
'Create rubric YAML files in the rubrics directory',
|
||||
'Use --rubrics-dir to specify a different location',
|
||||
]
|
||||
));
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Available rubrics (${rubrics.size}):\n`);
|
||||
console.log(subheader(`Available Rubrics (${style.number(String(rubrics.size))})`));
|
||||
console.log();
|
||||
|
||||
for (const [name, rubric] of rubrics) {
|
||||
console.log(`📋 ${name}`);
|
||||
console.log(` ${rubric.description}`);
|
||||
console.log(` Threshold: ${(rubric.passingThreshold * 100).toFixed(0)}%`);
|
||||
console.log(` Criteria: ${rubric.criteria.map(c => c.name).join(', ')}`);
|
||||
console.log('');
|
||||
console.log(`${icons.spec} ${style.bold(style.primary(name))}`);
|
||||
console.log(keyValue('Description', rubric.description, 1));
|
||||
console.log(keyValue('Threshold', style.number(`${(rubric.passingThreshold * 100).toFixed(0)}%`), 1));
|
||||
console.log(keyValue('Criteria', rubric.criteria.map(c => style.highlight(c.name)).join(', '), 1));
|
||||
console.log();
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error listing rubrics:', error instanceof Error ? error.message : error);
|
||||
console.error(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
[
|
||||
'Check that the rubrics directory exists',
|
||||
'Ensure rubric files are valid YAML',
|
||||
]
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
@ -78,24 +122,49 @@ export const calibrateCommand = new Command('calibrate')
|
|||
.argument('<rubric>', 'Rubric name or path')
|
||||
.argument('<examples>', 'Path to calibration examples JSON')
|
||||
.option('--rubrics-dir <dir>', 'Directory containing rubric YAML files', 'rubrics')
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude calibrate code-quality examples.json')} ${style.dim('Calibrate with examples')}
|
||||
|
||||
${style.bold('Examples file format:')}
|
||||
${style.dim('[')}
|
||||
${style.dim('{ "content": "...", "expectedPass": true, "expectedScore": 0.8 },')}
|
||||
${style.dim('{ "content": "...", "expectedPass": false }')}
|
||||
${style.dim(']')}
|
||||
`)
|
||||
.action(async (rubricName: string, examplesPath: string, options) => {
|
||||
try {
|
||||
if (!existsSync(examplesPath)) {
|
||||
console.error(`Examples file not found: ${examplesPath}`);
|
||||
console.error(formatError(
|
||||
`Examples file not found: ${style.path(examplesPath)}`,
|
||||
[
|
||||
'Check that the file path is correct',
|
||||
'Ensure the file exists and is readable',
|
||||
]
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const examples: CalibrationExample[] = JSON.parse(readFileSync(examplesPath, 'utf-8'));
|
||||
|
||||
console.log(`Calibrating rubric '${rubricName}' with ${examples.length} examples...`);
|
||||
const spinner = new Spinner(`Calibrating rubric ${style.highlight(rubricName)} with ${style.number(String(examples.length))} examples...`);
|
||||
spinner.start();
|
||||
|
||||
const result = await calibrate(rubricName, examples, {
|
||||
rubricsDir: options.rubricsDir,
|
||||
});
|
||||
|
||||
spinner.succeed(`Calibration complete for ${style.highlight(rubricName)}`);
|
||||
console.log('\n' + analyzeCalibration(result));
|
||||
} catch (error) {
|
||||
console.error('Error calibrating:', error instanceof Error ? error.message : error);
|
||||
console.error(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
[
|
||||
'Check that the rubric exists',
|
||||
'Ensure the examples file is valid JSON',
|
||||
'Ensure ANTHROPIC_API_KEY is set',
|
||||
]
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { Command } from 'commander';
|
||||
import * as path from 'node:path';
|
||||
import { analyze, treeToString } from '../../introspector/index.js';
|
||||
import { style, icons, header, subheader, keyValue, Spinner, formatError, nextSteps, box } from '../theme.js';
|
||||
|
||||
export const introCommand = new Command('intro')
|
||||
.description('Introspect a codebase and output its structure (tree-sitter analysis)')
|
||||
|
|
@ -9,21 +10,35 @@ export const introCommand = new Command('intro')
|
|||
.option('--json', 'Output as JSON (default)')
|
||||
.option('--summary', 'Output a human-readable summary instead of JSON')
|
||||
.option('--tree', 'Show file tree structure')
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude intro')} ${style.dim('Analyze current directory')}
|
||||
${style.command('evaluclaude intro ./my-project')} ${style.dim('Analyze specific path')}
|
||||
${style.command('evaluclaude intro . --summary')} ${style.dim('Human-readable summary')}
|
||||
${style.command('evaluclaude intro . --tree')} ${style.dim('Show file tree')}
|
||||
${style.command('evaluclaude intro . -o out.json')} ${style.dim('Save to file')}
|
||||
`)
|
||||
.action(async (repoPath: string, options: { output?: string; json?: boolean; summary?: boolean; tree?: boolean }) => {
|
||||
const absolutePath = path.resolve(repoPath);
|
||||
|
||||
console.log(`\n🔍 Analyzing: ${absolutePath}\n`);
|
||||
console.log(header('Introspecting Codebase'));
|
||||
console.log(keyValue('Path', style.path(absolutePath)));
|
||||
console.log('');
|
||||
|
||||
const spinner = new Spinner('Analyzing codebase with tree-sitter...');
|
||||
spinner.start();
|
||||
|
||||
try {
|
||||
const summary = await analyze({
|
||||
root: absolutePath,
|
||||
onProgress: (msg) => console.log(` ${msg}`),
|
||||
onProgress: (msg) => spinner.update(msg),
|
||||
});
|
||||
|
||||
spinner.succeed('Analysis complete');
|
||||
console.log('');
|
||||
|
||||
if (options.tree && summary.tree) {
|
||||
console.log('📁 File Tree:\n');
|
||||
console.log(subheader(`${icons.folder} File Tree`));
|
||||
console.log(treeToString(summary.tree));
|
||||
console.log('');
|
||||
} else if (options.summary) {
|
||||
|
|
@ -34,85 +49,96 @@ export const introCommand = new Command('intro')
|
|||
if (options.output) {
|
||||
const fs = await import('node:fs/promises');
|
||||
await fs.writeFile(options.output, json);
|
||||
console.log(`📄 Written to: ${options.output}`);
|
||||
console.log(`${style.success(icons.success)} Written to: ${style.path(options.output)}`);
|
||||
} else {
|
||||
console.log(json);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(nextSteps([
|
||||
{ command: 'evaluclaude analyze .', description: 'Generate EvalSpec with Claude' },
|
||||
{ command: 'evaluclaude intro . --summary', description: 'View human-readable summary' },
|
||||
]));
|
||||
} catch (error) {
|
||||
console.error('❌ Error analyzing repository:', error);
|
||||
spinner.fail('Analysis failed');
|
||||
console.error(formatError(
|
||||
error instanceof Error ? error.message : 'Unknown error analyzing repository',
|
||||
[
|
||||
'Check that the path exists and is accessible',
|
||||
'Ensure the directory contains source files',
|
||||
'Try running with --tree to see the file structure',
|
||||
]
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
||||
function printHumanSummary(summary: import('../../introspector/types.js').RepoSummary): void {
|
||||
console.log('📊 Repository Summary');
|
||||
console.log('─'.repeat(50));
|
||||
console.log(`📁 Root: ${summary.root}`);
|
||||
console.log(`🗓️ Analyzed: ${summary.analyzedAt}`);
|
||||
console.log(`🔤 Languages: ${summary.languages.join(', ') || 'none detected'}`);
|
||||
console.log(subheader(`${icons.trace} Repository Summary`));
|
||||
console.log(keyValue('Root', style.path(summary.root)));
|
||||
console.log(keyValue('Analyzed', summary.analyzedAt));
|
||||
console.log(keyValue('Languages', summary.languages.join(', ') || style.muted('none detected')));
|
||||
|
||||
console.log('\n📂 Files:');
|
||||
console.log(` Total: ${summary.files.length}`);
|
||||
console.log(` Source: ${summary.files.filter(f => f.role === 'source').length}`);
|
||||
console.log(` Test: ${summary.files.filter(f => f.role === 'test').length}`);
|
||||
console.log(` Config: ${summary.files.filter(f => f.role === 'config').length}`);
|
||||
console.log(subheader(`${icons.folder} Files`));
|
||||
console.log(keyValue('Total', style.number(String(summary.files.length)), 1));
|
||||
console.log(keyValue('Source', style.number(String(summary.files.filter(f => f.role === 'source').length)), 1));
|
||||
console.log(keyValue('Test', style.number(String(summary.files.filter(f => f.role === 'test').length)), 1));
|
||||
console.log(keyValue('Config', style.number(String(summary.files.filter(f => f.role === 'config').length)), 1));
|
||||
|
||||
console.log('\n📦 Modules:');
|
||||
console.log(` Total: ${summary.modules.length}`);
|
||||
console.log(subheader(`${icons.code} Modules`));
|
||||
console.log(keyValue('Total', style.number(String(summary.modules.length)), 1));
|
||||
|
||||
const totalExports = summary.modules.reduce((sum, m) => sum + m.exports.length, 0);
|
||||
const functions = summary.modules.flatMap(m => m.exports.filter(e => e.kind === 'function'));
|
||||
const classes = summary.modules.flatMap(m => m.exports.filter(e => e.kind === 'class'));
|
||||
|
||||
console.log(` Functions: ${functions.length}`);
|
||||
console.log(` Classes: ${classes.length}`);
|
||||
console.log(` Total exports: ${totalExports}`);
|
||||
console.log(keyValue('Functions', style.number(String(functions.length)), 1));
|
||||
console.log(keyValue('Classes', style.number(String(classes.length)), 1));
|
||||
console.log(keyValue('Total exports', style.number(String(totalExports)), 1));
|
||||
|
||||
if (summary.config.python) {
|
||||
console.log('\n🐍 Python:');
|
||||
console.log(` Test framework: ${summary.config.python.testFramework}`);
|
||||
console.log(` pyproject.toml: ${summary.config.python.pyprojectToml ? '✓' : '✗'}`);
|
||||
console.log(` setup.py: ${summary.config.python.setupPy ? '✓' : '✗'}`);
|
||||
console.log(subheader(`${icons.python} Python`));
|
||||
console.log(keyValue('Test framework', summary.config.python.testFramework, 1));
|
||||
console.log(keyValue('pyproject.toml', summary.config.python.pyprojectToml ? style.success(icons.success) : style.error(icons.error), 1));
|
||||
console.log(keyValue('setup.py', summary.config.python.setupPy ? style.success(icons.success) : style.error(icons.error), 1));
|
||||
}
|
||||
|
||||
if (summary.config.typescript) {
|
||||
console.log('\n📘 TypeScript:');
|
||||
console.log(` Test framework: ${summary.config.typescript.testFramework}`);
|
||||
console.log(` package.json: ${summary.config.typescript.packageJson ? '✓' : '✗'}`);
|
||||
console.log(` tsconfig.json: ${summary.config.typescript.tsconfig ? '✓' : '✗'}`);
|
||||
console.log(subheader(`${icons.typescript} TypeScript`));
|
||||
console.log(keyValue('Test framework', summary.config.typescript.testFramework, 1));
|
||||
console.log(keyValue('package.json', summary.config.typescript.packageJson ? style.success(icons.success) : style.error(icons.error), 1));
|
||||
console.log(keyValue('tsconfig.json', summary.config.typescript.tsconfig ? style.success(icons.success) : style.error(icons.error), 1));
|
||||
}
|
||||
|
||||
if (summary.git) {
|
||||
console.log('\n📌 Git:');
|
||||
console.log(` Branch: ${summary.git.branch}`);
|
||||
console.log(` Commit: ${summary.git.currentCommit.slice(0, 8)}`);
|
||||
console.log(subheader(`${icons.gear} Git`));
|
||||
console.log(keyValue('Branch', summary.git.branch, 1));
|
||||
console.log(keyValue('Commit', style.muted(summary.git.currentCommit.slice(0, 8)), 1));
|
||||
|
||||
if (summary.git.recentCommits && summary.git.recentCommits.length > 0) {
|
||||
console.log('\n📜 Recent Commits:');
|
||||
console.log(subheader(`${icons.file} Recent Commits`));
|
||||
for (const commit of summary.git.recentCommits.slice(0, 5)) {
|
||||
const date = new Date(commit.date).toLocaleDateString();
|
||||
console.log(` ${commit.shortHash} ${date} - ${commit.message.slice(0, 50)}${commit.message.length > 50 ? '...' : ''}`);
|
||||
console.log(` ${style.muted(commit.shortHash)} ${style.dim(date)} ${box.horizontal} ${commit.message.slice(0, 50)}${commit.message.length > 50 ? '...' : ''}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (summary.git.fileHistory && summary.git.fileHistory.length > 0) {
|
||||
console.log('\n🔥 Most Active Files (by commit count):');
|
||||
console.log(subheader(`${icons.lightning} Most Active Files`));
|
||||
for (const file of summary.git.fileHistory.slice(0, 5)) {
|
||||
console.log(` ${file.path} (${file.commitCount} commits)`);
|
||||
console.log(` ${style.path(file.path)} ${style.dim(`(${style.number(String(file.commitCount))} commits)`)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Show top modules by export count
|
||||
const topModules = [...summary.modules]
|
||||
.sort((a, b) => b.exports.length - a.exports.length)
|
||||
.slice(0, 5);
|
||||
|
||||
if (topModules.length > 0) {
|
||||
console.log('\n🏆 Top modules by exports:');
|
||||
console.log(subheader(`${icons.sparkle} Top Modules by Exports`));
|
||||
for (const mod of topModules) {
|
||||
console.log(` ${mod.path}: ${mod.exports.length} exports`);
|
||||
console.log(` ${style.path(mod.path)}: ${style.number(String(mod.exports.length))} exports`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,19 @@ import { runTests, formatResults, DEFAULT_SANDBOX_CONFIG } from '../../runners/i
|
|||
import { createTracer, saveTrace } from '../../observability/index.js';
|
||||
import { generatePromptfooConfig, generateTestProvider } from '../../promptfoo/index.js';
|
||||
import type { EvalSpec } from '../../analyzer/types.js';
|
||||
import {
|
||||
style,
|
||||
icons,
|
||||
header,
|
||||
step,
|
||||
keyValue,
|
||||
resultBox,
|
||||
nextSteps,
|
||||
Spinner,
|
||||
formatError,
|
||||
BANNER,
|
||||
box
|
||||
} from '../theme.js';
|
||||
|
||||
const EVALUCLAUDE_DIR = '.evaluclaude';
|
||||
|
||||
|
|
@ -26,29 +39,49 @@ interface PipelineOptions {
|
|||
}
|
||||
|
||||
export const pipelineCommand = new Command('pipeline')
|
||||
.description('Run the full eval generation pipeline: introspect → analyze → render → run')
|
||||
.description('Run the complete eval pipeline: introspect → analyze → render → run')
|
||||
.argument('[path]', 'Path to the repository to analyze', '.')
|
||||
.option('-o, --output <dir>', 'Output directory for all artifacts', '.evaluclaude')
|
||||
.option('-o, --output <dir>', 'Output directory for artifacts', '.evaluclaude')
|
||||
.option('-i, --interactive', 'Enable interactive mode with clarifying questions')
|
||||
.option('--focus <modules>', 'Comma-separated list of modules/functions to focus on')
|
||||
.option('--max-scenarios <n>', 'Maximum number of test scenarios to generate', '10')
|
||||
.option('--max-scenarios <n>', 'Maximum number of test scenarios', '10')
|
||||
.option('--test-dir <dir>', 'Directory for generated tests', './tests/generated')
|
||||
.option('-f, --framework <framework>', 'Test framework (pytest, vitest, jest)')
|
||||
.option('--skip-analyze', 'Skip analysis, use existing spec')
|
||||
.option('--skip-render', 'Skip rendering, use existing tests')
|
||||
.option('--skip-run', 'Skip test execution')
|
||||
.option('--promptfoo', 'Generate Promptfoo configuration for UI viewing')
|
||||
.option('--promptfoo', 'Generate Promptfoo configuration')
|
||||
.option('--quiet', 'Suppress progress messages')
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
|
||||
${style.dim('# Analyze current directory')}
|
||||
$ evaluclaude pipeline .
|
||||
|
||||
${style.dim('# Interactive mode with focus on specific modules')}
|
||||
$ evaluclaude pipeline ./my-project -i --focus auth,payments
|
||||
|
||||
${style.dim('# Generate tests without running them')}
|
||||
$ evaluclaude pipeline . --skip-run
|
||||
|
||||
${style.dim('# Use existing spec and run tests')}
|
||||
$ evaluclaude pipeline . --skip-analyze
|
||||
`)
|
||||
.action(async (repoPath: string, options: PipelineOptions) => {
|
||||
const absolutePath = resolve(repoPath);
|
||||
const log = options.quiet ? () => {} : console.log;
|
||||
const quiet = options.quiet;
|
||||
const outputDir = options.output || EVALUCLAUDE_DIR;
|
||||
|
||||
console.log('\n🚀 Evaluclaude Pipeline');
|
||||
console.log('═'.repeat(50));
|
||||
console.log(` Repository: ${absolutePath}`);
|
||||
console.log(` Output: ${outputDir}`);
|
||||
console.log('═'.repeat(50) + '\n');
|
||||
// Print header
|
||||
console.log(BANNER);
|
||||
console.log(style.primary(box.dHorizontal.repeat(55)));
|
||||
console.log(` ${icons.folder} ${style.bold('Repository:')} ${style.path(absolutePath)}`);
|
||||
console.log(` ${icons.file} ${style.bold('Output:')} ${style.path(outputDir)}`);
|
||||
if (options.interactive) {
|
||||
console.log(` ${icons.brain} ${style.bold('Mode:')} ${style.highlight('Interactive')}`);
|
||||
}
|
||||
console.log(style.primary(box.dHorizontal.repeat(55)));
|
||||
console.log('');
|
||||
|
||||
// Ensure output directories exist
|
||||
mkdirSync(outputDir, { recursive: true });
|
||||
|
|
@ -65,23 +98,30 @@ export const pipelineCommand = new Command('pipeline')
|
|||
|
||||
// Step 1: Introspection + Analysis
|
||||
if (options.skipAnalyze && existsSync(specPath)) {
|
||||
log('📋 Using existing EvalSpec...');
|
||||
console.log(step(1, 'Using existing EvalSpec', 'done'));
|
||||
spec = JSON.parse(readFileSync(specPath, 'utf-8'));
|
||||
log(` Loaded: ${specPath} (${spec.scenarios.length} scenarios)\n`);
|
||||
console.log(` ${style.dim('└─')} Loaded ${style.number(String(spec.scenarios.length))} scenarios from ${style.path(specPath)}`);
|
||||
console.log('');
|
||||
} else {
|
||||
log('🔬 Step 1: Introspecting codebase...');
|
||||
console.log(step(1, 'Introspecting codebase...', 'running'));
|
||||
|
||||
let spinner: Spinner | null = null;
|
||||
if (!quiet) {
|
||||
spinner = new Spinner('Parsing files with tree-sitter...');
|
||||
spinner.start();
|
||||
}
|
||||
|
||||
try {
|
||||
const repoSummary = await analyze({
|
||||
root: absolutePath,
|
||||
onProgress: options.quiet ? undefined : (msg) => log(` ${msg}`),
|
||||
onProgress: quiet ? undefined : (msg) => spinner?.update(msg),
|
||||
});
|
||||
|
||||
log(` Files: ${repoSummary.files.length}`);
|
||||
log(` Languages: ${repoSummary.languages.join(', ')}`);
|
||||
log('');
|
||||
spinner?.succeed(`Analyzed ${style.number(String(repoSummary.files.length))} files`);
|
||||
console.log(` ${style.dim('└─')} Languages: ${repoSummary.languages.map(l => style.info(l)).join(', ')}`);
|
||||
console.log('');
|
||||
|
||||
log('🤖 Step 2: Generating EvalSpec with Claude...\n');
|
||||
console.log(step(2, 'Generating EvalSpec with Claude...', 'running'));
|
||||
|
||||
const focus = options.focus?.split(',').map(s => s.trim());
|
||||
const maxScenarios = parseInt(options.maxScenarios, 10);
|
||||
|
|
@ -93,21 +133,30 @@ export const pipelineCommand = new Command('pipeline')
|
|||
result = await generateEvalSpecInteractive(
|
||||
repoSummary,
|
||||
async (question: string) => {
|
||||
console.log('');
|
||||
const { answer } = await inquirer.prompt([{
|
||||
type: 'input',
|
||||
name: 'answer',
|
||||
message: `🤖 Claude asks: ${question}`,
|
||||
message: `${icons.brain} ${style.highlight('Claude asks:')} ${question}`,
|
||||
prefix: '',
|
||||
}]);
|
||||
return answer;
|
||||
},
|
||||
{ focus, maxScenarios }
|
||||
);
|
||||
} else {
|
||||
if (!quiet) {
|
||||
spinner = new Spinner('Claude is analyzing the codebase...');
|
||||
spinner.start();
|
||||
}
|
||||
|
||||
result = await generateEvalSpec(repoSummary, {
|
||||
interactive: false,
|
||||
focus,
|
||||
maxScenarios,
|
||||
});
|
||||
|
||||
spinner?.succeed('EvalSpec generated');
|
||||
}
|
||||
|
||||
spec = result.spec;
|
||||
|
|
@ -115,19 +164,29 @@ export const pipelineCommand = new Command('pipeline')
|
|||
// Save the spec
|
||||
writeFileSync(specPath, JSON.stringify(spec, null, 2));
|
||||
|
||||
log(`\n✅ EvalSpec generated!`);
|
||||
log(` Scenarios: ${spec.scenarios.length}`);
|
||||
log(` Tokens: ${result.tokensUsed}`);
|
||||
log(` Saved: ${specPath}\n`);
|
||||
console.log(` ${style.dim('├─')} Scenarios: ${style.number(String(spec.scenarios.length))}`);
|
||||
console.log(` ${style.dim('├─')} Tokens: ${style.number(String(result.tokensUsed))}`);
|
||||
console.log(` ${style.dim('└─')} Saved: ${style.path(specPath)}`);
|
||||
console.log('');
|
||||
} catch (error) {
|
||||
console.error('\n❌ Analysis failed:', error instanceof Error ? error.message : error);
|
||||
spinner?.fail('Analysis failed');
|
||||
console.error(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
['Check that ANTHROPIC_API_KEY is set', 'Verify the path exists and contains source files']
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Render tests
|
||||
if (!options.skipRender) {
|
||||
log('📝 Step 3: Rendering test files...');
|
||||
console.log(step(3, 'Rendering test files...', 'running'));
|
||||
|
||||
let spinner: Spinner | null = null;
|
||||
if (!quiet) {
|
||||
spinner = new Spinner('Generating test code...');
|
||||
spinner.start();
|
||||
}
|
||||
|
||||
try {
|
||||
const framework = (options.framework as 'pytest' | 'vitest' | 'jest') || detectRenderFramework(spec);
|
||||
|
|
@ -140,20 +199,31 @@ export const pipelineCommand = new Command('pipeline')
|
|||
dryRun: false,
|
||||
});
|
||||
|
||||
log(` Framework: ${framework}`);
|
||||
log(` Files: ${renderResult.stats.fileCount}`);
|
||||
log(` Scenarios: ${renderResult.stats.scenarioCount}`);
|
||||
log(` Assertions: ${renderResult.stats.assertionCount}`);
|
||||
log(` Output: ${options.testDir}\n`);
|
||||
spinner?.succeed(`Generated ${style.number(String(renderResult.stats.fileCount))} test files`);
|
||||
console.log(` ${style.dim('├─')} Framework: ${style.info(framework)}`);
|
||||
console.log(` ${style.dim('├─')} Scenarios: ${style.number(String(renderResult.stats.scenarioCount))}`);
|
||||
console.log(` ${style.dim('├─')} Assertions: ${style.number(String(renderResult.stats.assertionCount))}`);
|
||||
console.log(` ${style.dim('└─')} Output: ${style.path(options.testDir)}`);
|
||||
console.log('');
|
||||
} catch (error) {
|
||||
console.error('\n❌ Rendering failed:', error instanceof Error ? error.message : error);
|
||||
spinner?.fail('Rendering failed');
|
||||
console.error(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
['Verify the EvalSpec is valid JSON', 'Check the output directory is writable']
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 3: Run tests
|
||||
if (!options.skipRun) {
|
||||
log('🧪 Step 4: Running tests...\n');
|
||||
console.log(step(4, 'Running tests...', 'running'));
|
||||
|
||||
let spinner: Spinner | null = null;
|
||||
if (!quiet) {
|
||||
spinner = new Spinner('Executing test suite...');
|
||||
spinner.start();
|
||||
}
|
||||
|
||||
try {
|
||||
const framework = (options.framework as 'pytest' | 'vitest' | 'jest') || detectRenderFramework(spec);
|
||||
|
|
@ -202,23 +272,41 @@ export const pipelineCommand = new Command('pipeline')
|
|||
const trace = tracer.finalize();
|
||||
const tracePath = await saveTrace(trace);
|
||||
|
||||
log(formatResults(result));
|
||||
log(`📊 Trace saved: ${tracePath}`);
|
||||
log(` View with: evaluclaude view ${trace.id}\n`);
|
||||
spinner?.stop();
|
||||
|
||||
// Show results box
|
||||
console.log('');
|
||||
console.log(resultBox({
|
||||
passed: result.summary.passed,
|
||||
failed: result.summary.failed,
|
||||
skipped: result.summary.skipped,
|
||||
duration: result.summary.duration,
|
||||
}));
|
||||
console.log('');
|
||||
console.log(` ${icons.trace} Trace: ${style.path(tracePath)}`);
|
||||
console.log(` ${style.dim('└─')} View: ${style.command(`evaluclaude view ${trace.id}`)}`);
|
||||
console.log('');
|
||||
|
||||
// Save results
|
||||
const resultsPath = join(resultsDir, `run-${Date.now()}.json`);
|
||||
writeFileSync(resultsPath, JSON.stringify(result, null, 2));
|
||||
|
||||
} catch (error) {
|
||||
console.error('\n❌ Test execution failed:', error instanceof Error ? error.message : error);
|
||||
spinner?.fail('Test execution failed');
|
||||
console.error(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
['Check the test framework is installed', 'Verify the test directory exists']
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Generate Promptfoo config
|
||||
if (options.promptfoo) {
|
||||
log('📦 Step 5: Generating Promptfoo configuration...');
|
||||
console.log(step(5, 'Generating Promptfoo configuration...', 'running'));
|
||||
|
||||
const spinner = new Spinner('Creating Promptfoo config...');
|
||||
spinner.start();
|
||||
|
||||
try {
|
||||
const configPath = join(outputDir, 'promptfooconfig.yaml');
|
||||
|
|
@ -235,23 +323,27 @@ export const pipelineCommand = new Command('pipeline')
|
|||
|
||||
await generateTestProvider(providerPath);
|
||||
|
||||
log(` Config: ${configPath}`);
|
||||
log(` Provider: ${providerPath}`);
|
||||
log(`\n Launch UI with: evaluclaude ui\n`);
|
||||
spinner.succeed('Promptfoo config created');
|
||||
console.log(` ${style.dim('├─')} Config: ${style.path(configPath)}`);
|
||||
console.log(` ${style.dim('└─')} Provider: ${style.path(providerPath)}`);
|
||||
console.log('');
|
||||
} catch (error) {
|
||||
console.error('\n❌ Promptfoo config generation failed:', error instanceof Error ? error.message : error);
|
||||
spinner.fail('Promptfoo config generation failed');
|
||||
console.error(formatError(error instanceof Error ? error.message : String(error)));
|
||||
}
|
||||
}
|
||||
|
||||
console.log('═'.repeat(50));
|
||||
console.log('✅ Pipeline complete!');
|
||||
console.log('═'.repeat(50));
|
||||
console.log(`\nNext steps:`);
|
||||
console.log(` View traces: evaluclaude view --last`);
|
||||
console.log(` List all traces: evaluclaude traces`);
|
||||
if (options.promptfoo) {
|
||||
console.log(` Launch UI: evaluclaude ui`);
|
||||
console.log(` Run Promptfoo: evaluclaude eval --spec ${specPath}`);
|
||||
}
|
||||
console.log('');
|
||||
// Final summary
|
||||
console.log(style.success(box.dHorizontal.repeat(55)));
|
||||
console.log(` ${icons.sparkle} ${style.success(style.bold('Pipeline complete!'))}`);
|
||||
console.log(style.success(box.dHorizontal.repeat(55)));
|
||||
|
||||
console.log(nextSteps([
|
||||
{ command: 'evaluclaude view --last', description: 'View the latest trace' },
|
||||
{ command: 'evaluclaude traces', description: 'List all traces' },
|
||||
...(options.promptfoo ? [
|
||||
{ command: 'evaluclaude ui', description: 'Launch the dashboard UI' },
|
||||
{ command: `evaluclaude eval --spec ${specPath}`, description: 'Run Promptfoo evaluations' },
|
||||
] : []),
|
||||
]));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { Command } from 'commander';
|
|||
import { readFileSync, existsSync } from 'fs';
|
||||
import { renderSpec, detectFramework, type Framework } from '../../renderers/index.js';
|
||||
import type { EvalSpec } from '../../analyzer/types.js';
|
||||
import { style, icons, Spinner, formatError, nextSteps, keyValue } from '../theme.js';
|
||||
|
||||
export const renderCommand = new Command('render')
|
||||
.description('Render EvalSpec JSON into runnable test files')
|
||||
|
|
@ -11,19 +12,41 @@ export const renderCommand = new Command('render')
|
|||
.option('--fixtures', 'Generate fixture stubs', false)
|
||||
.option('--mocks', 'Generate mock stubs', false)
|
||||
.option('--dry-run', 'Preview without writing files', false)
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude render spec.json')} ${style.dim('Render with auto-detected framework')}
|
||||
${style.command('evaluclaude render spec.json -f vitest')} ${style.dim('Use Vitest framework')}
|
||||
${style.command('evaluclaude render spec.json --dry-run')} ${style.dim('Preview output without writing')}
|
||||
${style.command('evaluclaude render spec.json --fixtures')} ${style.dim('Include fixture stubs')}
|
||||
`)
|
||||
.action(async (specPath: string, options) => {
|
||||
try {
|
||||
if (!existsSync(specPath)) {
|
||||
console.error(`Error: Spec file not found: ${specPath}`);
|
||||
console.error(formatError(`Spec file not found: ${style.path(specPath)}`, [
|
||||
'Check that the spec file exists',
|
||||
'Run `evaluclaude analyze` to generate a spec file first',
|
||||
'Verify the path is correct',
|
||||
]));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const specContent = readFileSync(specPath, 'utf-8');
|
||||
const spec: EvalSpec = JSON.parse(specContent);
|
||||
let spec: EvalSpec;
|
||||
|
||||
try {
|
||||
spec = JSON.parse(specContent);
|
||||
} catch {
|
||||
console.error(formatError('Invalid JSON in spec file', [
|
||||
'Ensure the file contains valid JSON',
|
||||
'Check for syntax errors in the spec file',
|
||||
]));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const framework = (options.framework as Framework) || detectFramework(spec);
|
||||
|
||||
console.log(`Rendering ${spec.scenarios.length} scenarios with ${framework}...`);
|
||||
const spinner = new Spinner(`Rendering ${style.number(String(spec.scenarios.length))} scenarios with ${style.highlight(framework)}...`);
|
||||
spinner.start();
|
||||
|
||||
const result = await renderSpec(spec, {
|
||||
outputDir: options.output,
|
||||
|
|
@ -33,29 +56,44 @@ export const renderCommand = new Command('render')
|
|||
dryRun: options.dryRun,
|
||||
});
|
||||
|
||||
spinner.succeed(`Rendered ${style.number(String(spec.scenarios.length))} scenarios with ${style.highlight(framework)}`);
|
||||
|
||||
if (options.dryRun) {
|
||||
console.log('\n--- DRY RUN ---\n');
|
||||
console.log(`\n${style.warning('DRY RUN')} ${style.dim('─ Preview only, no files written')}\n`);
|
||||
for (const file of result.files) {
|
||||
console.log(`📄 ${file.path}`);
|
||||
console.log('---');
|
||||
console.log(file.content);
|
||||
console.log('---\n');
|
||||
console.log(`${icons.file} ${style.path(file.path)}`);
|
||||
console.log(style.dim('─'.repeat(50)));
|
||||
console.log(style.muted(file.content));
|
||||
console.log(style.dim('─'.repeat(50)) + '\n');
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`\n✅ Rendered ${result.stats.scenarioCount} scenarios`);
|
||||
console.log(` 📁 ${result.stats.fileCount} test files`);
|
||||
console.log(` 🔍 ${result.stats.assertionCount} assertions`);
|
||||
console.log(`\n${style.success(icons.check)} ${style.bold('Render complete')}`);
|
||||
console.log(keyValue(` ${icons.spec} Scenarios`, style.number(String(result.stats.scenarioCount)), 0));
|
||||
console.log(keyValue(` ${icons.file} Test files`, style.number(String(result.stats.fileCount)), 0));
|
||||
console.log(keyValue(` ${icons.magnify} Assertions`, style.number(String(result.stats.assertionCount)), 0));
|
||||
|
||||
if (result.stats.skippedCount > 0) {
|
||||
console.log(` ⏭️ ${result.stats.skippedCount} scenarios skipped (LLM rubric assertions)`);
|
||||
console.log(keyValue(` ${icons.skipped} Skipped`, `${style.number(String(result.stats.skippedCount))} ${style.dim('(LLM rubric assertions)')}`, 0));
|
||||
}
|
||||
|
||||
if (!options.dryRun) {
|
||||
console.log(`\n📂 Output: ${options.output}`);
|
||||
console.log(`\n${icons.folder} ${style.label('Output:')} ${style.path(options.output)}`);
|
||||
|
||||
console.log(nextSteps([
|
||||
{ command: `evaluclaude run ${options.output}`, description: 'Run the generated tests' },
|
||||
{ command: `evaluclaude render ${specPath} --dry-run`, description: 'Preview changes before writing' },
|
||||
]));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error rendering spec:', error instanceof Error ? error.message : error);
|
||||
console.error(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
[
|
||||
'Check that the spec file is valid',
|
||||
'Ensure the output directory is writable',
|
||||
'Try running with --dry-run to debug',
|
||||
]
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import { Command } from 'commander';
|
||||
import { existsSync, readFileSync } from 'fs';
|
||||
import { join } from 'path';
|
||||
import {
|
||||
runTests,
|
||||
formatResults,
|
||||
|
|
@ -12,6 +11,17 @@ import {
|
|||
import { createTracer, saveTrace } from '../../observability/index.js';
|
||||
import { exportToPromptfooFormat } from '../../promptfoo/results-exporter.js';
|
||||
import type { EvalSpec } from '../../analyzer/types.js';
|
||||
import {
|
||||
style,
|
||||
icons,
|
||||
Spinner,
|
||||
formatError,
|
||||
nextSteps,
|
||||
keyValue,
|
||||
resultBox,
|
||||
section,
|
||||
formatDuration
|
||||
} from '../theme.js';
|
||||
|
||||
export const runCommand = new Command('run')
|
||||
.description('Run generated tests and collect results')
|
||||
|
|
@ -28,24 +38,37 @@ export const runCommand = new Command('run')
|
|||
.option('--no-trace', 'Disable execution tracing')
|
||||
.option('--export-promptfoo', 'Export results in Promptfoo format', false)
|
||||
.option('-w, --watch', 'Watch mode (rerun on changes)', false)
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude run')} ${style.dim('Run tests from ./tests/generated')}
|
||||
${style.command('evaluclaude run ./my-tests')} ${style.dim('Run tests from custom directory')}
|
||||
${style.command('evaluclaude run -f pytest')} ${style.dim('Use pytest framework')}
|
||||
${style.command('evaluclaude run --spec eval-spec.json')} ${style.dim('Map results to EvalSpec')}
|
||||
${style.command('evaluclaude run --export-promptfoo')} ${style.dim('Export for Promptfoo UI')}
|
||||
${style.command('evaluclaude run --no-sandbox')} ${style.dim('Disable sandboxing')}
|
||||
`)
|
||||
.action(async (testDir: string, options) => {
|
||||
try {
|
||||
console.log(`\n🧪 Running tests from ${testDir}...\n`);
|
||||
console.log(`\n${icons.test} ${style.bold('Running tests from')} ${style.path(testDir)}\n`);
|
||||
|
||||
if (!existsSync(testDir)) {
|
||||
console.error(`Error: Test directory not found: ${testDir}`);
|
||||
console.log(formatError(`Test directory not found: ${testDir}`, [
|
||||
`Create the directory: ${style.command(`mkdir -p ${testDir}`)}`,
|
||||
`Generate tests first: ${style.command('evaluclaude render <spec>')}`,
|
||||
'Check the path is correct'
|
||||
]));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const framework: TestFramework = options.framework || detectTestFramework(testDir);
|
||||
console.log(` Framework: ${framework}`);
|
||||
console.log(` Sandbox: ${options.sandbox ? 'enabled' : 'disabled'}`);
|
||||
console.log(` Timeout: ${options.timeout}ms`);
|
||||
console.log(keyValue('Framework', style.info(framework), 1));
|
||||
console.log(keyValue('Sandbox', options.sandbox ? style.success('enabled') : style.warning('disabled'), 1));
|
||||
console.log(keyValue('Timeout', style.number(`${options.timeout}ms`), 1));
|
||||
|
||||
let spec: EvalSpec | undefined;
|
||||
if (options.spec && existsSync(options.spec)) {
|
||||
spec = JSON.parse(readFileSync(options.spec, 'utf-8')) as EvalSpec;
|
||||
console.log(` Spec: ${options.spec} (${spec.scenarios.length} scenarios)`);
|
||||
console.log(keyValue('Spec', `${style.path(options.spec)} ${style.muted(`(${spec.scenarios.length} scenarios)`)}`, 1));
|
||||
}
|
||||
|
||||
const tracer = options.trace ? createTracer(spec?.repo.name || 'unknown') : null;
|
||||
|
|
@ -66,7 +89,8 @@ export const runCommand = new Command('run')
|
|||
});
|
||||
}
|
||||
|
||||
console.log('\n Running tests...\n');
|
||||
const spinner = new Spinner('Running tests...');
|
||||
spinner.start();
|
||||
const startTime = Date.now();
|
||||
|
||||
const result = await runTests(
|
||||
|
|
@ -75,6 +99,14 @@ export const runCommand = new Command('run')
|
|||
options.sandbox ? DEFAULT_SANDBOX_CONFIG : undefined
|
||||
);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
if (result.summary.failed > 0) {
|
||||
spinner.fail(`Tests completed with ${style.error(`${result.summary.failed} failures`)}`);
|
||||
} else {
|
||||
spinner.succeed(`Tests completed in ${style.number(formatDuration(duration))}`);
|
||||
}
|
||||
|
||||
if (tracer) {
|
||||
tracer.recordExecution({
|
||||
testsPassed: result.summary.passed,
|
||||
|
|
@ -94,13 +126,20 @@ export const runCommand = new Command('run')
|
|||
}
|
||||
}
|
||||
|
||||
console.log(formatResults(result));
|
||||
console.log('\n' + resultBox({
|
||||
passed: result.summary.passed,
|
||||
failed: result.summary.failed,
|
||||
skipped: result.summary.skipped,
|
||||
duration,
|
||||
}));
|
||||
|
||||
if (spec) {
|
||||
const mappedResults = mapResultsToScenarios(result, spec);
|
||||
console.log(`\n📊 Scenario Coverage:`);
|
||||
console.log(` Covered: ${mappedResults.covered}/${spec.scenarios.length}`);
|
||||
console.log(` Unmapped: ${mappedResults.unmapped}`);
|
||||
console.log(section('Scenario Coverage'));
|
||||
console.log(keyValue('Covered', `${style.success(String(mappedResults.covered))}/${style.number(String(spec.scenarios.length))}`, 1));
|
||||
if (mappedResults.unmapped > 0) {
|
||||
console.log(keyValue('Unmapped', style.warning(String(mappedResults.unmapped)), 1));
|
||||
}
|
||||
}
|
||||
|
||||
if (options.output) {
|
||||
|
|
@ -108,31 +147,40 @@ export const runCommand = new Command('run')
|
|||
const { dirname } = await import('path');
|
||||
mkdirSync(dirname(options.output), { recursive: true });
|
||||
writeFileSync(options.output, JSON.stringify(result, null, 2));
|
||||
console.log(`\n📁 Results saved to: ${options.output}`);
|
||||
console.log(`\n${icons.folder} Results saved to: ${style.path(options.output)}`);
|
||||
}
|
||||
|
||||
// Export to Promptfoo format for UI viewing
|
||||
if (options.exportPromptfoo) {
|
||||
const exportPath = await exportToPromptfooFormat(result, spec, {
|
||||
outputDir: '.evaluclaude/results',
|
||||
evalId: `eval-${Date.now()}`,
|
||||
});
|
||||
console.log(`\n📦 Promptfoo results exported: ${exportPath}`);
|
||||
console.log(` View with: evaluclaude ui`);
|
||||
console.log(`\n${icons.spec} Promptfoo results exported: ${style.path(exportPath)}`);
|
||||
}
|
||||
|
||||
if (tracer) {
|
||||
const trace = tracer.finalize();
|
||||
const tracePath = await saveTrace(trace);
|
||||
console.log(`\n📊 Trace saved: ${tracePath}`);
|
||||
console.log(` View with: evaluclaude view ${trace.id}`);
|
||||
console.log(`\n${icons.trace} Trace saved: ${style.path(tracePath)}`);
|
||||
}
|
||||
|
||||
console.log(nextSteps([
|
||||
{ command: 'evaluclaude view <trace-id>', description: 'View execution trace' },
|
||||
{ command: 'evaluclaude ui', description: 'Launch interactive results viewer' },
|
||||
]));
|
||||
|
||||
if (result.summary.failed > 0) {
|
||||
process.exit(1);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error running tests:', error instanceof Error ? error.message : error);
|
||||
console.log(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
[
|
||||
'Check that the test directory exists and contains valid tests',
|
||||
'Ensure the test framework is installed',
|
||||
`Run with ${style.command('--no-sandbox')} if sandbox is causing issues`
|
||||
]
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
|
|||
import { join, dirname, resolve as resolvePath } from 'path';
|
||||
import type { EvalSpec } from '../../analyzer/types.js';
|
||||
import { generatePromptfooConfig, generateTestProvider } from '../../promptfoo/index.js';
|
||||
import { style, icons, Spinner, formatError, nextSteps, header, keyValue } from '../theme.js';
|
||||
|
||||
const EVALUCLAUDE_DIR = '.evaluclaude';
|
||||
const CONFIG_FILE = 'promptfooconfig.yaml';
|
||||
|
|
@ -15,6 +16,16 @@ export const uiCommand = new Command('ui')
|
|||
.option('-s, --spec <spec>', 'Path to EvalSpec JSON file')
|
||||
.option('--generate', 'Regenerate Promptfoo config from spec')
|
||||
.option('--no-open', 'Do not auto-open browser')
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude ui')} Launch UI with existing results
|
||||
${style.command('evaluclaude ui -p 8080')} Use custom port
|
||||
${style.command('evaluclaude ui -s spec.json --generate')} Generate config and launch
|
||||
|
||||
${style.bold('Workflow:')}
|
||||
1. Run ${style.command('evaluclaude run --export-promptfoo')} to generate results
|
||||
2. Run ${style.command('evaluclaude ui')} to view them in the dashboard
|
||||
`)
|
||||
.action(async (options) => {
|
||||
try {
|
||||
const port = parseInt(options.port, 10);
|
||||
|
|
@ -23,10 +34,15 @@ export const uiCommand = new Command('ui')
|
|||
|
||||
// If spec provided with --generate, create/update Promptfoo config
|
||||
if (options.spec && options.generate) {
|
||||
console.log('\n📄 Generating Promptfoo configuration...');
|
||||
const spinner = new Spinner('Generating Promptfoo configuration...');
|
||||
spinner.start();
|
||||
|
||||
if (!existsSync(options.spec)) {
|
||||
console.error(`Error: Spec file not found: ${options.spec}`);
|
||||
spinner.fail('Spec file not found');
|
||||
console.log(formatError(`Spec file not found: ${style.path(options.spec)}`, [
|
||||
`Check the file path and try again`,
|
||||
`Generate a spec with: ${style.command('evaluclaude analyze <path>')}`,
|
||||
]));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
|
|
@ -42,17 +58,20 @@ export const uiCommand = new Command('ui')
|
|||
|
||||
await generateTestProvider(providerPath);
|
||||
|
||||
console.log(` Config: ${configPath}`);
|
||||
console.log(` Provider: ${providerPath}`);
|
||||
spinner.succeed('Promptfoo configuration generated');
|
||||
console.log(keyValue('Config', style.path(configPath), 1));
|
||||
console.log(keyValue('Provider', style.path(providerPath), 1));
|
||||
}
|
||||
|
||||
// Check for existing config, create default if missing
|
||||
if (!existsSync(configPath)) {
|
||||
console.log('\n⚠️ No Promptfoo config found.');
|
||||
console.log(' Creating default configuration...\n');
|
||||
console.log(`\n${style.warning(icons.warning)} No Promptfoo config found.`);
|
||||
|
||||
const spinner = new Spinner('Creating default configuration...');
|
||||
spinner.start();
|
||||
await createDefaultConfig(configPath, providerPath);
|
||||
console.log(` Created: ${configPath}`);
|
||||
spinner.succeed('Default configuration created');
|
||||
console.log(keyValue('Created', style.path(configPath), 1));
|
||||
}
|
||||
|
||||
// Check for results to display
|
||||
|
|
@ -60,19 +79,27 @@ export const uiCommand = new Command('ui')
|
|||
const latestResults = join(resultsDir, 'latest.json');
|
||||
|
||||
if (!existsSync(latestResults)) {
|
||||
console.log('\n⚠️ No evaluation results found.');
|
||||
console.log(' Run `evaluclaude run --export-promptfoo` first to generate results.\n');
|
||||
console.log(' Or run the full pipeline:');
|
||||
console.log(' evaluclaude pipeline <path> --promptfoo\n');
|
||||
console.log(formatError('No evaluation results found.', [
|
||||
`Run ${style.command('evaluclaude run --export-promptfoo')} first to generate results`,
|
||||
`Or run the full pipeline: ${style.command('evaluclaude pipeline <path> --promptfoo')}`,
|
||||
]));
|
||||
}
|
||||
|
||||
console.log(`\n🚀 Starting Promptfoo UI on port ${port}...`);
|
||||
console.log(` Results: ${latestResults}\n`);
|
||||
console.log(header('Launching Promptfoo UI'));
|
||||
console.log(keyValue('Port', style.number(String(port)), 1));
|
||||
console.log(keyValue('Results', style.path(latestResults), 1));
|
||||
console.log('');
|
||||
|
||||
const spinner = new Spinner(`${icons.rocket} Starting Promptfoo UI...`);
|
||||
spinner.start();
|
||||
|
||||
// Use promptfoo view with the results file
|
||||
await launchPromptfooView(port, latestResults, options.open);
|
||||
await launchPromptfooView(port, latestResults, options.open, spinner);
|
||||
} catch (error) {
|
||||
console.error('Error launching UI:', error instanceof Error ? error.message : error);
|
||||
console.log(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
['Check the console output for more details']
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
@ -85,6 +112,17 @@ export const evalCommand = new Command('eval')
|
|||
.option('--view', 'Launch UI after evaluation', false)
|
||||
.option('-p, --port <port>', 'Port for UI', '3000')
|
||||
.option('--no-cache', 'Disable Promptfoo caching', false)
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude eval -s spec.json')} Run evals from spec
|
||||
${style.command('evaluclaude eval -c config.yaml')} Run with custom config
|
||||
${style.command('evaluclaude eval -s spec.json --view')} Run and launch UI
|
||||
|
||||
${style.bold('Workflow:')}
|
||||
1. Generate spec: ${style.command('evaluclaude analyze <path> -o spec.json')}
|
||||
2. Run evals: ${style.command('evaluclaude eval -s spec.json')}
|
||||
3. View results: ${style.command('evaluclaude ui')}
|
||||
`)
|
||||
.action(async (options) => {
|
||||
try {
|
||||
const configPath = options.config || join(EVALUCLAUDE_DIR, CONFIG_FILE);
|
||||
|
|
@ -92,10 +130,15 @@ export const evalCommand = new Command('eval')
|
|||
|
||||
// Generate config from spec if provided
|
||||
if (options.spec) {
|
||||
console.log('\n📄 Generating Promptfoo configuration from spec...');
|
||||
const spinner = new Spinner('Generating Promptfoo configuration from spec...');
|
||||
spinner.start();
|
||||
|
||||
if (!existsSync(options.spec)) {
|
||||
console.error(`Error: Spec file not found: ${options.spec}`);
|
||||
spinner.fail('Spec file not found');
|
||||
console.log(formatError(`Spec file not found: ${style.path(options.spec)}`, [
|
||||
`Check the file path and try again`,
|
||||
`Generate a spec with: ${style.command('evaluclaude analyze <path>')}`,
|
||||
]));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
|
|
@ -111,34 +154,41 @@ export const evalCommand = new Command('eval')
|
|||
|
||||
await generateTestProvider(providerPath);
|
||||
|
||||
console.log(` Config: ${configPath}`);
|
||||
console.log(` Provider: ${providerPath}`);
|
||||
console.log(` Scenarios: ${spec.scenarios.length}`);
|
||||
spinner.succeed('Promptfoo configuration generated');
|
||||
console.log(keyValue('Config', style.path(configPath), 1));
|
||||
console.log(keyValue('Provider', style.path(providerPath), 1));
|
||||
console.log(keyValue('Scenarios', style.number(String(spec.scenarios.length)), 1));
|
||||
}
|
||||
|
||||
if (!existsSync(configPath)) {
|
||||
console.error(`\nError: Config not found: ${configPath}`);
|
||||
console.log('Run with --spec <file> to generate from EvalSpec, or create config manually.');
|
||||
console.log(formatError(`Config not found: ${style.path(configPath)}`, [
|
||||
`Run with ${style.command('--spec <file>')} to generate from EvalSpec`,
|
||||
`Or create a config manually`,
|
||||
]));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Ensure output directory exists
|
||||
mkdirSync(options.output, { recursive: true });
|
||||
|
||||
console.log('\n🧪 Running Promptfoo evaluations...');
|
||||
console.log(` Config: ${configPath}`);
|
||||
console.log(` Output: ${options.output}\n`);
|
||||
console.log(header('Running Promptfoo Evaluations'));
|
||||
console.log(keyValue('Config', style.path(configPath), 1));
|
||||
console.log(keyValue('Output', style.path(options.output), 1));
|
||||
console.log('');
|
||||
|
||||
const outputFile = join(options.output, `eval-${Date.now()}.json`);
|
||||
|
||||
const exitCode = await runPromptfooEval(configPath, outputFile, !options.cache);
|
||||
const spinner = new Spinner(`${icons.test} Running evaluations...`);
|
||||
spinner.start();
|
||||
|
||||
const exitCode = await runPromptfooEval(configPath, outputFile, !options.cache, spinner);
|
||||
|
||||
if (exitCode === 0) {
|
||||
console.log(`\n✅ Evaluation complete!`);
|
||||
console.log(`📁 Results: ${outputFile}`);
|
||||
spinner.succeed('Evaluation complete!');
|
||||
console.log(keyValue('Results', style.path(outputFile), 1));
|
||||
} else {
|
||||
console.log(`\n⚠️ Evaluation finished with exit code ${exitCode}`);
|
||||
console.log(`📁 Results: ${outputFile}`);
|
||||
spinner.warn(`Evaluation finished with exit code ${exitCode}`);
|
||||
console.log(keyValue('Results', style.path(outputFile), 1));
|
||||
}
|
||||
|
||||
// List traces generated during evaluation
|
||||
|
|
@ -147,19 +197,27 @@ export const evalCommand = new Command('eval')
|
|||
const { readdirSync } = await import('fs');
|
||||
const traces = readdirSync(tracesDir).filter(f => f.endsWith('.json'));
|
||||
if (traces.length > 0) {
|
||||
console.log(`\n📊 Traces generated: ${traces.length}`);
|
||||
console.log(` View with: evaluclaude view --last`);
|
||||
console.log(`\n${icons.trace} ${style.bold('Traces generated:')} ${style.number(String(traces.length))}`);
|
||||
console.log(style.dim(` View with: ${style.command('evaluclaude view --last')}`));
|
||||
}
|
||||
}
|
||||
|
||||
if (options.view) {
|
||||
console.log(`\n🚀 Launching UI on port ${options.port}...`);
|
||||
await launchPromptfooUI(parseInt(options.port, 10), configPath, true);
|
||||
console.log('');
|
||||
const uiSpinner = new Spinner(`${icons.rocket} Launching UI on port ${options.port}...`);
|
||||
uiSpinner.start();
|
||||
await launchPromptfooUI(parseInt(options.port, 10), configPath, true, uiSpinner);
|
||||
} else {
|
||||
console.log(`\n View results: evaluclaude ui`);
|
||||
console.log(nextSteps([
|
||||
{ command: 'evaluclaude ui', description: 'View results in dashboard' },
|
||||
{ command: 'evaluclaude view --last', description: 'View latest trace' },
|
||||
]));
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error running eval:', error instanceof Error ? error.message : error);
|
||||
console.log(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
['Check the console output for more details']
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
@ -170,7 +228,8 @@ export const evalCommand = new Command('eval')
|
|||
async function launchPromptfooView(
|
||||
port: number,
|
||||
resultsFile: string,
|
||||
openBrowser: boolean
|
||||
openBrowser: boolean,
|
||||
spinner?: Spinner
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
// Use 'promptfoo view' which opens the web UI showing results from the output directory
|
||||
|
|
@ -186,7 +245,11 @@ async function launchPromptfooView(
|
|||
// Pass the directory containing results
|
||||
args.push(resultsDir);
|
||||
|
||||
console.log(` Running: npx ${args.join(' ')}\n`);
|
||||
if (spinner) {
|
||||
spinner.succeed(`Promptfoo UI starting on port ${style.number(String(port))}`);
|
||||
}
|
||||
console.log(style.dim(` Running: npx ${args.join(' ')}`));
|
||||
console.log('');
|
||||
|
||||
const child = spawn('npx', args, {
|
||||
stdio: 'inherit',
|
||||
|
|
@ -195,9 +258,10 @@ async function launchPromptfooView(
|
|||
|
||||
child.on('error', (error) => {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
console.error('\n❌ Promptfoo not found.');
|
||||
console.error(' Install with: npm install -g promptfoo');
|
||||
console.error(' Or run: npx promptfoo --version\n');
|
||||
console.log(formatError('Promptfoo not found.', [
|
||||
`Install with: ${style.command('npm install -g promptfoo')}`,
|
||||
`Or run: ${style.command('npx promptfoo --version')}`,
|
||||
]));
|
||||
} else {
|
||||
reject(error);
|
||||
}
|
||||
|
|
@ -225,7 +289,8 @@ async function launchPromptfooView(
|
|||
async function launchPromptfooUI(
|
||||
port: number,
|
||||
configPath: string,
|
||||
openBrowser: boolean
|
||||
openBrowser: boolean,
|
||||
spinner?: Spinner
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const args = ['promptfoo', 'view', '--port', String(port)];
|
||||
|
|
@ -240,7 +305,11 @@ async function launchPromptfooUI(
|
|||
const configDir = dirname(resolvePath(configPath));
|
||||
args.push(configDir);
|
||||
|
||||
console.log(` Running: npx ${args.join(' ')}\n`);
|
||||
if (spinner) {
|
||||
spinner.succeed(`Promptfoo UI starting on port ${style.number(String(port))}`);
|
||||
}
|
||||
console.log(style.dim(` Running: npx ${args.join(' ')}`));
|
||||
console.log('');
|
||||
|
||||
const child = spawn('npx', args, {
|
||||
stdio: 'inherit',
|
||||
|
|
@ -249,9 +318,10 @@ async function launchPromptfooUI(
|
|||
|
||||
child.on('error', (error) => {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
console.error('\n❌ Promptfoo not found.');
|
||||
console.error(' Install with: npm install -g promptfoo');
|
||||
console.error(' Or run: npx promptfoo --version\n');
|
||||
console.log(formatError('Promptfoo not found.', [
|
||||
`Install with: ${style.command('npm install -g promptfoo')}`,
|
||||
`Or run: ${style.command('npx promptfoo --version')}`,
|
||||
]));
|
||||
} else {
|
||||
reject(error);
|
||||
}
|
||||
|
|
@ -276,7 +346,8 @@ async function launchPromptfooUI(
|
|||
async function runPromptfooEval(
|
||||
configPath: string,
|
||||
outputFile: string,
|
||||
noCache: boolean
|
||||
noCache: boolean,
|
||||
spinner?: Spinner
|
||||
): Promise<number> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const args = [
|
||||
|
|
@ -290,7 +361,11 @@ async function runPromptfooEval(
|
|||
args.push('--no-cache');
|
||||
}
|
||||
|
||||
console.log(` Running: npx ${args.join(' ')}\n`);
|
||||
if (spinner) {
|
||||
spinner.stop();
|
||||
}
|
||||
console.log(style.dim(` Running: npx ${args.join(' ')}`));
|
||||
console.log('');
|
||||
|
||||
const child = spawn('npx', args, {
|
||||
stdio: 'inherit',
|
||||
|
|
@ -299,8 +374,9 @@ async function runPromptfooEval(
|
|||
|
||||
child.on('error', (error) => {
|
||||
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
||||
console.error('\n❌ Promptfoo not found.');
|
||||
console.error(' Install with: npm install -g promptfoo\n');
|
||||
console.log(formatError('Promptfoo not found.', [
|
||||
`Install with: ${style.command('npm install -g promptfoo')}`,
|
||||
]));
|
||||
reject(error);
|
||||
} else {
|
||||
reject(error);
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import {
|
|||
formatTrace,
|
||||
formatTraceList
|
||||
} from '../../observability/index.js';
|
||||
import { style, icons, formatError, nextSteps } from '../theme.js';
|
||||
|
||||
export const viewCommand = new Command('view')
|
||||
.description('View evaluation traces')
|
||||
|
|
@ -19,6 +20,14 @@ export const viewCommand = new Command('view')
|
|||
.option('--decisions', 'Show decisions made', true)
|
||||
.option('-n, --limit <count>', 'Limit number of traces listed', '20')
|
||||
.option('--eval <eval-id>', 'Filter traces by eval ID')
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude view')} ${style.dim('View the most recent trace')}
|
||||
${style.command('evaluclaude view --list')} ${style.dim('List all available traces')}
|
||||
${style.command('evaluclaude view abc123')} ${style.dim('View a specific trace by ID')}
|
||||
${style.command('evaluclaude view --json')} ${style.dim('Output trace as raw JSON')}
|
||||
${style.command('evaluclaude view -v')} ${style.dim('Verbose output with tool calls')}
|
||||
`)
|
||||
.action(async (traceId: string | undefined, options) => {
|
||||
try {
|
||||
if (options.list) {
|
||||
|
|
@ -26,16 +35,19 @@ export const viewCommand = new Command('view')
|
|||
const limited = traces.slice(0, parseInt(options.limit, 10));
|
||||
|
||||
if (traces.length === 0) {
|
||||
console.log('\nNo traces found.');
|
||||
console.log('Run `evaluclaude run` to generate traces.\n');
|
||||
console.log(`\n${style.warning(`${icons.warning} No traces found.`)}`);
|
||||
console.log(nextSteps([
|
||||
{ command: 'evaluclaude run', description: 'Run evals to generate traces' },
|
||||
{ command: 'evaluclaude pipeline .', description: 'Run full pipeline from scratch' },
|
||||
]));
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(formatTraceList(limited));
|
||||
|
||||
if (traces.length > limited.length) {
|
||||
console.log(`Showing ${limited.length} of ${traces.length} traces.`);
|
||||
console.log(`Use --limit to see more.\n`);
|
||||
console.log(style.muted(`Showing ${limited.length} of ${traces.length} traces.`));
|
||||
console.log(style.muted(`Use ${style.command('--limit')} to see more.\n`));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -45,15 +57,20 @@ export const viewCommand = new Command('view')
|
|||
if (options.last || !traceId) {
|
||||
trace = await getLatestTrace();
|
||||
if (!trace) {
|
||||
console.log('\nNo traces found.');
|
||||
console.log('Run `evaluclaude run` to generate traces.\n');
|
||||
console.log(`\n${style.warning(`${icons.warning} No traces found.`)}`);
|
||||
console.log(nextSteps([
|
||||
{ command: 'evaluclaude run', description: 'Run evals to generate traces' },
|
||||
{ command: 'evaluclaude pipeline .', description: 'Run full pipeline from scratch' },
|
||||
]));
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
trace = await loadTrace(traceId);
|
||||
if (!trace) {
|
||||
console.error(`\nTrace not found: ${traceId}`);
|
||||
console.log('Use `evaluclaude view --list` to see available traces.\n');
|
||||
console.log(formatError(`Trace not found: ${style.path(traceId)}`, [
|
||||
`Run ${style.command('evaluclaude view --list')} to see available traces`,
|
||||
`Check that the trace ID is correct`,
|
||||
]));
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
|
@ -68,7 +85,10 @@ export const viewCommand = new Command('view')
|
|||
|
||||
console.log(output);
|
||||
} catch (error) {
|
||||
console.error('Error viewing trace:', error instanceof Error ? error.message : error);
|
||||
console.log(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
['Run evaluclaude run first to generate traces']
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
@ -77,14 +97,36 @@ export const tracesCommand = new Command('traces')
|
|||
.description('List all evaluation traces (alias for view --list)')
|
||||
.option('-n, --limit <count>', 'Limit number of traces', '20')
|
||||
.option('--eval <eval-id>', 'Filter by eval ID')
|
||||
.addHelpText('after', `
|
||||
${style.bold('Examples:')}
|
||||
${style.command('evaluclaude traces')} ${style.dim('List all traces')}
|
||||
${style.command('evaluclaude traces -n 50')} ${style.dim('Show up to 50 traces')}
|
||||
${style.command('evaluclaude traces --eval X')} ${style.dim('Filter by eval ID')}
|
||||
`)
|
||||
.action(async (options) => {
|
||||
try {
|
||||
const traces = await listTraces(options.eval);
|
||||
const limited = traces.slice(0, parseInt(options.limit, 10));
|
||||
|
||||
if (traces.length === 0) {
|
||||
console.log('\nNo traces found.');
|
||||
console.log(`\n${style.warning(`${icons.warning} No traces found.`)}`);
|
||||
console.log(nextSteps([
|
||||
{ command: 'evaluclaude run', description: 'Run evals to generate traces' },
|
||||
]));
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(formatTraceList(limited));
|
||||
|
||||
if (traces.length > limited.length) {
|
||||
console.log(style.muted(`Showing ${limited.length} of ${traces.length} traces.`));
|
||||
console.log(style.muted(`Use ${style.command('--limit')} to see more.\n`));
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(formatError(
|
||||
error instanceof Error ? error.message : String(error),
|
||||
['Run evaluclaude run first to generate traces']
|
||||
));
|
||||
process.exit(1);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
|
|
@ -9,13 +9,43 @@ import { runCommand } from './commands/run.js';
|
|||
import { viewCommand, tracesCommand } from './commands/view.js';
|
||||
import { uiCommand, evalCommand } from './commands/ui.js';
|
||||
import { pipelineCommand } from './commands/pipeline.js';
|
||||
import { BANNER_MINIMAL, style, welcomeMessage, icons } from './theme.js';
|
||||
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name('evaluclaude')
|
||||
.description('Zero-to-evals in one command. Claude analyzes codebases and generates functional tests.')
|
||||
.version('0.1.0');
|
||||
.description(`${BANNER_MINIMAL}\n\nClaude-powered functional test generation for any codebase.`)
|
||||
.version('0.1.0')
|
||||
.configureHelp({
|
||||
sortSubcommands: true,
|
||||
subcommandTerm: (cmd) => style.command(cmd.name()) + ' ' + style.dim(cmd.usage()),
|
||||
})
|
||||
.addHelpText('beforeAll', '')
|
||||
.addHelpText('afterAll', `
|
||||
${style.bold('Examples:')}
|
||||
|
||||
${style.dim('# Run the full pipeline on current directory')}
|
||||
$ evaluclaude pipeline .
|
||||
|
||||
${style.dim('# Analyze a Python project interactively')}
|
||||
$ evaluclaude analyze ./my-project -i -o spec.json
|
||||
|
||||
${style.dim('# Generate and run tests')}
|
||||
$ evaluclaude render spec.json && evaluclaude run
|
||||
|
||||
${style.dim('# View results in browser')}
|
||||
$ evaluclaude run --export-promptfoo && evaluclaude ui
|
||||
|
||||
${style.muted('For more info, run any command with --help')}
|
||||
`);
|
||||
|
||||
// Add welcome command for first-time users
|
||||
const welcomeCmd = new Command('welcome')
|
||||
.description('Show welcome message and quick start guide')
|
||||
.action(() => {
|
||||
console.log(welcomeMessage());
|
||||
});
|
||||
|
||||
// Core pipeline command - the "zero to evals" experience
|
||||
program.addCommand(pipelineCommand);
|
||||
|
|
@ -39,4 +69,16 @@ program.addCommand(tracesCommand);
|
|||
program.addCommand(uiCommand);
|
||||
program.addCommand(evalCommand);
|
||||
|
||||
// Utility commands
|
||||
program.addCommand(welcomeCmd);
|
||||
|
||||
// Show welcome on no args if first time (check for .evaluclaude directory)
|
||||
if (process.argv.length === 2) {
|
||||
const fs = await import('fs');
|
||||
if (!fs.existsSync('.evaluclaude')) {
|
||||
console.log(welcomeMessage());
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
program.parse(process.argv);
|
||||
|
|
|
|||
357
src/cli/theme.ts
Normal file
357
src/cli/theme.ts
Normal file
|
|
@ -0,0 +1,357 @@
|
|||
/**
|
||||
* Evaluclaude CLI Theme
|
||||
* Consistent styling, colors, and formatting for a beautiful CLI experience
|
||||
*/
|
||||
|
||||
// ANSI color codes
|
||||
const colors = {
|
||||
reset: '\x1b[0m',
|
||||
bold: '\x1b[1m',
|
||||
dim: '\x1b[2m',
|
||||
italic: '\x1b[3m',
|
||||
underline: '\x1b[4m',
|
||||
|
||||
// Foreground colors
|
||||
black: '\x1b[30m',
|
||||
red: '\x1b[31m',
|
||||
green: '\x1b[32m',
|
||||
yellow: '\x1b[33m',
|
||||
blue: '\x1b[34m',
|
||||
magenta: '\x1b[35m',
|
||||
cyan: '\x1b[36m',
|
||||
white: '\x1b[37m',
|
||||
|
||||
// Bright foreground colors
|
||||
brightBlack: '\x1b[90m',
|
||||
brightRed: '\x1b[91m',
|
||||
brightGreen: '\x1b[92m',
|
||||
brightYellow: '\x1b[93m',
|
||||
brightBlue: '\x1b[94m',
|
||||
brightMagenta: '\x1b[95m',
|
||||
brightCyan: '\x1b[96m',
|
||||
brightWhite: '\x1b[97m',
|
||||
|
||||
// Background colors
|
||||
bgBlack: '\x1b[40m',
|
||||
bgRed: '\x1b[41m',
|
||||
bgGreen: '\x1b[42m',
|
||||
bgYellow: '\x1b[43m',
|
||||
bgBlue: '\x1b[44m',
|
||||
bgMagenta: '\x1b[45m',
|
||||
bgCyan: '\x1b[46m',
|
||||
bgWhite: '\x1b[47m',
|
||||
};
|
||||
|
||||
// Semantic color helpers
|
||||
export const style = {
|
||||
// Text styles
|
||||
bold: (text: string) => `${colors.bold}${text}${colors.reset}`,
|
||||
dim: (text: string) => `${colors.dim}${text}${colors.reset}`,
|
||||
italic: (text: string) => `${colors.italic}${text}${colors.reset}`,
|
||||
|
||||
// Semantic colors
|
||||
success: (text: string) => `${colors.green}${text}${colors.reset}`,
|
||||
error: (text: string) => `${colors.red}${text}${colors.reset}`,
|
||||
warning: (text: string) => `${colors.yellow}${text}${colors.reset}`,
|
||||
info: (text: string) => `${colors.cyan}${text}${colors.reset}`,
|
||||
highlight: (text: string) => `${colors.brightMagenta}${text}${colors.reset}`,
|
||||
muted: (text: string) => `${colors.brightBlack}${text}${colors.reset}`,
|
||||
|
||||
// Accent colors
|
||||
primary: (text: string) => `${colors.brightCyan}${text}${colors.reset}`,
|
||||
secondary: (text: string) => `${colors.brightBlue}${text}${colors.reset}`,
|
||||
accent: (text: string) => `${colors.brightMagenta}${text}${colors.reset}`,
|
||||
|
||||
// Special combinations
|
||||
command: (text: string) => `${colors.bold}${colors.cyan}${text}${colors.reset}`,
|
||||
path: (text: string) => `${colors.brightBlue}${text}${colors.reset}`,
|
||||
number: (text: string) => `${colors.brightYellow}${text}${colors.reset}`,
|
||||
label: (text: string) => `${colors.dim}${text}${colors.reset}`,
|
||||
};
|
||||
|
||||
// Icons for consistent visual language
|
||||
export const icons = {
|
||||
// Status
|
||||
success: '✓',
|
||||
error: '✗',
|
||||
warning: '⚠',
|
||||
info: 'ℹ',
|
||||
pending: '○',
|
||||
running: '◐',
|
||||
|
||||
// Actions
|
||||
arrow: '→',
|
||||
arrowRight: '▸',
|
||||
bullet: '•',
|
||||
check: '✓',
|
||||
cross: '✗',
|
||||
|
||||
// Objects
|
||||
folder: '📁',
|
||||
file: '📄',
|
||||
code: '💻',
|
||||
test: '🧪',
|
||||
spec: '📋',
|
||||
trace: '📊',
|
||||
|
||||
// Process
|
||||
rocket: '🚀',
|
||||
gear: '⚙',
|
||||
magnify: '🔍',
|
||||
brain: '🧠',
|
||||
lightning: '⚡',
|
||||
sparkle: '✨',
|
||||
|
||||
// Results
|
||||
passed: '✅',
|
||||
failed: '❌',
|
||||
skipped: '⏭️',
|
||||
|
||||
// Categories
|
||||
python: '🐍',
|
||||
typescript: '📘',
|
||||
javascript: '📙',
|
||||
};
|
||||
|
||||
// Box drawing characters
|
||||
export const box = {
|
||||
topLeft: '╭',
|
||||
topRight: '╮',
|
||||
bottomLeft: '╰',
|
||||
bottomRight: '╯',
|
||||
horizontal: '─',
|
||||
vertical: '│',
|
||||
tLeft: '├',
|
||||
tRight: '┤',
|
||||
cross: '┼',
|
||||
|
||||
// Double lines
|
||||
dHorizontal: '═',
|
||||
dVertical: '║',
|
||||
dTopLeft: '╔',
|
||||
dTopRight: '╗',
|
||||
dBottomLeft: '╚',
|
||||
dBottomRight: '╝',
|
||||
};
|
||||
|
||||
// Banner and branding
|
||||
export const BANNER = `
|
||||
${style.primary(' ╔═══════════════════════════════════════════════════════╗')}
|
||||
${style.primary(' ║')} ${style.bold(style.accent('evaluclaude'))}${style.muted(' · zero-to-evals in one command')} ${style.primary('║')}
|
||||
${style.primary(' ╚═══════════════════════════════════════════════════════╝')}
|
||||
`;
|
||||
|
||||
export const BANNER_MINIMAL = `${style.accent('evaluclaude')} ${style.muted('·')} ${style.dim('zero-to-evals in one command')}`;
|
||||
|
||||
// Common output formatters
|
||||
export function header(title: string): string {
|
||||
const width = 60;
|
||||
const padding = Math.max(0, width - title.length - 4);
|
||||
return `\n${style.primary(box.dHorizontal.repeat(width))}
|
||||
${style.bold(title)}
|
||||
${style.primary(box.dHorizontal.repeat(width))}\n`;
|
||||
}
|
||||
|
||||
export function subheader(title: string): string {
|
||||
return `\n${style.bold(title)}\n${style.dim(box.horizontal.repeat(40))}`;
|
||||
}
|
||||
|
||||
export function section(title: string): string {
|
||||
return `\n${style.dim(box.horizontal.repeat(4))} ${style.bold(title)} ${style.dim(box.horizontal.repeat(Math.max(0, 34 - title.length)))}`;
|
||||
}
|
||||
|
||||
export function keyValue(key: string, value: string | number, indent = 0): string {
|
||||
const pad = ' '.repeat(indent);
|
||||
return `${pad}${style.label(key + ':')} ${value}`;
|
||||
}
|
||||
|
||||
export function bullet(text: string, indent = 0): string {
|
||||
const pad = ' '.repeat(indent);
|
||||
return `${pad}${style.dim(icons.bullet)} ${text}`;
|
||||
}
|
||||
|
||||
export function step(num: number, text: string, status: 'pending' | 'running' | 'done' | 'error' = 'pending'): string {
|
||||
const statusIcon = {
|
||||
pending: style.dim(`${num}.`),
|
||||
running: style.info(`${icons.running}`),
|
||||
done: style.success(icons.success),
|
||||
error: style.error(icons.error),
|
||||
}[status];
|
||||
|
||||
return ` ${statusIcon} ${status === 'done' ? style.muted(text) : text}`;
|
||||
}
|
||||
|
||||
export function progressBar(current: number, total: number, width = 30): string {
|
||||
const percentage = Math.round((current / total) * 100);
|
||||
const filled = Math.round((current / total) * width);
|
||||
const empty = width - filled;
|
||||
|
||||
const bar = style.success('█'.repeat(filled)) + style.dim('░'.repeat(empty));
|
||||
return `${bar} ${style.muted(`${percentage}%`)}`;
|
||||
}
|
||||
|
||||
export function table(rows: string[][]): string {
|
||||
if (rows.length === 0) return '';
|
||||
|
||||
const colWidths = rows[0].map((_, i) =>
|
||||
Math.max(...rows.map(row => (row[i] || '').length))
|
||||
);
|
||||
|
||||
return rows.map(row =>
|
||||
row.map((cell, i) => cell.padEnd(colWidths[i])).join(' ')
|
||||
).join('\n');
|
||||
}
|
||||
|
||||
// Spinner for async operations
|
||||
export class Spinner {
|
||||
private frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
|
||||
private frameIndex = 0;
|
||||
private intervalId: NodeJS.Timeout | null = null;
|
||||
private text: string;
|
||||
|
||||
constructor(text: string) {
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
start(): void {
|
||||
process.stdout.write('\x1b[?25l'); // Hide cursor
|
||||
this.render();
|
||||
this.intervalId = setInterval(() => {
|
||||
this.frameIndex = (this.frameIndex + 1) % this.frames.length;
|
||||
this.render();
|
||||
}, 80);
|
||||
}
|
||||
|
||||
private render(): void {
|
||||
process.stdout.write(`\r${style.info(this.frames[this.frameIndex])} ${this.text}`);
|
||||
}
|
||||
|
||||
update(text: string): void {
|
||||
this.text = text;
|
||||
this.render();
|
||||
}
|
||||
|
||||
succeed(text?: string): void {
|
||||
this.stop();
|
||||
console.log(`\r${style.success(icons.success)} ${text || this.text}`);
|
||||
}
|
||||
|
||||
fail(text?: string): void {
|
||||
this.stop();
|
||||
console.log(`\r${style.error(icons.error)} ${text || this.text}`);
|
||||
}
|
||||
|
||||
warn(text?: string): void {
|
||||
this.stop();
|
||||
console.log(`\r${style.warning(icons.warning)} ${text || this.text}`);
|
||||
}
|
||||
|
||||
stop(): void {
|
||||
if (this.intervalId) {
|
||||
clearInterval(this.intervalId);
|
||||
this.intervalId = null;
|
||||
}
|
||||
process.stdout.write('\x1b[?25h'); // Show cursor
|
||||
process.stdout.write('\r' + ' '.repeat(80) + '\r'); // Clear line
|
||||
}
|
||||
}
|
||||
|
||||
// Result summary box
|
||||
export function resultBox(results: { passed: number; failed: number; skipped?: number; duration?: number }): string {
|
||||
const { passed, failed, skipped = 0, duration } = results;
|
||||
const total = passed + failed + skipped;
|
||||
const lines: string[] = [];
|
||||
|
||||
lines.push(style.primary(` ${box.topLeft}${box.horizontal.repeat(38)}${box.topRight}`));
|
||||
lines.push(style.primary(` ${box.vertical}`) + ' '.repeat(38) + style.primary(box.vertical));
|
||||
lines.push(style.primary(` ${box.vertical}`) + ` ${style.bold('Test Results')}`.padEnd(45) + style.primary(box.vertical));
|
||||
lines.push(style.primary(` ${box.vertical}`) + ' '.repeat(38) + style.primary(box.vertical));
|
||||
lines.push(style.primary(` ${box.vertical}`) + ` ${style.success(icons.passed)} Passed: ${String(passed).padStart(4)}`.padEnd(45) + style.primary(box.vertical));
|
||||
lines.push(style.primary(` ${box.vertical}`) + ` ${style.error(icons.failed)} Failed: ${String(failed).padStart(4)}`.padEnd(45) + style.primary(box.vertical));
|
||||
|
||||
if (skipped > 0) {
|
||||
lines.push(style.primary(` ${box.vertical}`) + ` ${icons.skipped} Skipped: ${String(skipped).padStart(4)}`.padEnd(42) + style.primary(box.vertical));
|
||||
}
|
||||
|
||||
lines.push(style.primary(` ${box.vertical}`) + style.dim(` ${'─'.repeat(20)}`).padEnd(45) + style.primary(box.vertical));
|
||||
lines.push(style.primary(` ${box.vertical}`) + ` Total: ${String(total).padStart(4)}`.padEnd(45) + style.primary(box.vertical));
|
||||
|
||||
if (duration !== undefined) {
|
||||
lines.push(style.primary(` ${box.vertical}`) + ` Duration: ${formatDuration(duration)}`.padEnd(45) + style.primary(box.vertical));
|
||||
}
|
||||
|
||||
lines.push(style.primary(` ${box.vertical}`) + ' '.repeat(38) + style.primary(box.vertical));
|
||||
lines.push(style.primary(` ${box.bottomLeft}${box.horizontal.repeat(38)}${box.bottomRight}`));
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
export function formatDuration(ms: number): string {
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
|
||||
const minutes = Math.floor(ms / 60000);
|
||||
const seconds = Math.floor((ms % 60000) / 1000);
|
||||
return `${minutes}m ${seconds}s`;
|
||||
}
|
||||
|
||||
// Error formatting with suggestions
|
||||
export function formatError(message: string, suggestions?: string[]): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(`\n${style.error(`${icons.error} Error:`)} ${message}`);
|
||||
|
||||
if (suggestions && suggestions.length > 0) {
|
||||
lines.push('');
|
||||
lines.push(style.dim(' Suggestions:'));
|
||||
for (const suggestion of suggestions) {
|
||||
lines.push(` ${style.dim(icons.arrowRight)} ${suggestion}`);
|
||||
}
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// Command examples helper
|
||||
export function commandExample(command: string, description?: string): string {
|
||||
if (description) {
|
||||
return ` ${style.command(command)} ${style.dim(description)}`;
|
||||
}
|
||||
return ` ${style.command(command)}`;
|
||||
}
|
||||
|
||||
// Next steps helper
|
||||
export function nextSteps(steps: { command: string; description: string }[]): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(`\n${style.bold('Next steps:')}`);
|
||||
|
||||
for (const step of steps) {
|
||||
lines.push(commandExample(step.command, step.description));
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// Welcome message for first-time users
|
||||
export function welcomeMessage(): string {
|
||||
return `
|
||||
${BANNER}
|
||||
|
||||
${style.bold('Welcome to evaluclaude!')} ${icons.sparkle}
|
||||
|
||||
Generate functional tests for any codebase with the power of Claude.
|
||||
|
||||
${style.bold('Quick Start:')}
|
||||
|
||||
${style.command('evaluclaude pipeline .')} ${style.dim('Full pipeline: analyze → render → run')}
|
||||
${style.command('evaluclaude intro .')} ${style.dim('Introspect codebase structure')}
|
||||
${style.command('evaluclaude analyze .')} ${style.dim('Generate EvalSpec with Claude')}
|
||||
|
||||
${style.bold('Learn More:')}
|
||||
|
||||
${style.command('evaluclaude --help')} ${style.dim('Show all commands')}
|
||||
${style.command('evaluclaude <cmd> --help')} ${style.dim('Help for specific command')}
|
||||
|
||||
${style.muted('Documentation: https://github.com/harivansh-afk/evaluclaude-harness')}
|
||||
`;
|
||||
}
|
||||
|
|
@ -1,5 +1,47 @@
|
|||
import type { EvalTrace, ToolCall, Question, Decision, TestFailure } from './types.js';
|
||||
|
||||
// ANSI color codes for terminal styling
|
||||
const colors = {
|
||||
reset: '\x1b[0m',
|
||||
bold: '\x1b[1m',
|
||||
dim: '\x1b[2m',
|
||||
green: '\x1b[32m',
|
||||
red: '\x1b[31m',
|
||||
yellow: '\x1b[33m',
|
||||
cyan: '\x1b[36m',
|
||||
magenta: '\x1b[35m',
|
||||
blue: '\x1b[34m',
|
||||
brightBlack: '\x1b[90m',
|
||||
brightCyan: '\x1b[96m',
|
||||
brightMagenta: '\x1b[95m',
|
||||
brightYellow: '\x1b[93m',
|
||||
};
|
||||
|
||||
const s = {
|
||||
bold: (t: string) => `${colors.bold}${t}${colors.reset}`,
|
||||
dim: (t: string) => `${colors.dim}${t}${colors.reset}`,
|
||||
success: (t: string) => `${colors.green}${t}${colors.reset}`,
|
||||
error: (t: string) => `${colors.red}${t}${colors.reset}`,
|
||||
warning: (t: string) => `${colors.yellow}${t}${colors.reset}`,
|
||||
info: (t: string) => `${colors.cyan}${t}${colors.reset}`,
|
||||
highlight: (t: string) => `${colors.brightMagenta}${t}${colors.reset}`,
|
||||
muted: (t: string) => `${colors.brightBlack}${t}${colors.reset}`,
|
||||
number: (t: string) => `${colors.brightYellow}${t}${colors.reset}`,
|
||||
primary: (t: string) => `${colors.brightCyan}${t}${colors.reset}`,
|
||||
};
|
||||
|
||||
const box = {
|
||||
horizontal: '─',
|
||||
dHorizontal: '═',
|
||||
topLeft: '╭',
|
||||
topRight: '╮',
|
||||
bottomLeft: '╰',
|
||||
bottomRight: '╯',
|
||||
vertical: '│',
|
||||
tLeft: '├',
|
||||
tRight: '┤',
|
||||
};
|
||||
|
||||
export interface ViewOptions {
|
||||
json: boolean;
|
||||
verbose: boolean;
|
||||
|
|
@ -24,118 +66,129 @@ export function formatTrace(trace: EvalTrace, options: Partial<ViewOptions> = {}
|
|||
}
|
||||
|
||||
const lines: string[] = [];
|
||||
const w = 60;
|
||||
|
||||
// Header
|
||||
lines.push('');
|
||||
lines.push('═'.repeat(60));
|
||||
lines.push(`📊 Trace: ${trace.id}`);
|
||||
lines.push('═'.repeat(60));
|
||||
lines.push(s.primary(box.dHorizontal.repeat(w)));
|
||||
lines.push(` 📊 ${s.bold('Trace')} ${s.muted(trace.id)}`);
|
||||
lines.push(s.primary(box.dHorizontal.repeat(w)));
|
||||
lines.push('');
|
||||
|
||||
lines.push(` Status: ${formatStatus(trace.status)}`);
|
||||
lines.push(` Started: ${formatDate(trace.startedAt)}`);
|
||||
lines.push(` Duration: ${formatDuration(trace.duration)}`);
|
||||
lines.push(` Eval ID: ${trace.evalId}`);
|
||||
// Overview
|
||||
lines.push(` ${s.dim('Status:')} ${formatStatus(trace.status)}`);
|
||||
lines.push(` ${s.dim('Started:')} ${s.muted(formatDate(trace.startedAt))}`);
|
||||
lines.push(` ${s.dim('Duration:')} ${s.number(formatDuration(trace.duration))}`);
|
||||
lines.push(` ${s.dim('Eval ID:')} ${s.muted(trace.evalId)}`);
|
||||
lines.push('');
|
||||
|
||||
lines.push('📂 Introspection');
|
||||
lines.push('─'.repeat(40));
|
||||
lines.push(` Files analyzed: ${trace.introspection.filesAnalyzed.length}`);
|
||||
lines.push(` Functions found: ${trace.introspection.totalFunctions}`);
|
||||
lines.push(` Classes found: ${trace.introspection.totalClasses}`);
|
||||
lines.push(` Duration: ${formatDuration(trace.introspection.duration)}`);
|
||||
// Introspection section
|
||||
lines.push(sectionHeader('📂 Introspection'));
|
||||
lines.push(kv('Files analyzed', s.number(String(trace.introspection.filesAnalyzed.length))));
|
||||
lines.push(kv('Functions found', s.number(String(trace.introspection.totalFunctions))));
|
||||
lines.push(kv('Classes found', s.number(String(trace.introspection.totalClasses))));
|
||||
lines.push(kv('Duration', s.number(formatDuration(trace.introspection.duration))));
|
||||
lines.push('');
|
||||
|
||||
lines.push('🤖 Analysis');
|
||||
lines.push('─'.repeat(40));
|
||||
lines.push(` Tool calls: ${trace.analysis.toolCalls.length}`);
|
||||
lines.push(` Questions asked: ${trace.analysis.questionsAsked.length}`);
|
||||
lines.push(` Decisions made: ${trace.analysis.decisions.length}`);
|
||||
lines.push(` Prompt tokens: ${trace.analysis.promptTokens.toLocaleString()}`);
|
||||
lines.push(` Completion tokens: ${trace.analysis.completionTokens.toLocaleString()}`);
|
||||
// Analysis section
|
||||
lines.push(sectionHeader('🧠 Analysis'));
|
||||
lines.push(kv('Tool calls', s.number(String(trace.analysis.toolCalls.length))));
|
||||
lines.push(kv('Questions asked', s.number(String(trace.analysis.questionsAsked.length))));
|
||||
lines.push(kv('Decisions made', s.number(String(trace.analysis.decisions.length))));
|
||||
lines.push(kv('Prompt tokens', s.number(trace.analysis.promptTokens.toLocaleString())));
|
||||
lines.push(kv('Completion tokens', s.number(trace.analysis.completionTokens.toLocaleString())));
|
||||
lines.push('');
|
||||
|
||||
lines.push('📝 Generation');
|
||||
lines.push('─'.repeat(40));
|
||||
lines.push(` Scenarios: ${trace.generation.scenariosGenerated}`);
|
||||
lines.push(` Files written: ${trace.generation.filesWritten.length}`);
|
||||
// Generation section
|
||||
lines.push(sectionHeader('📝 Generation'));
|
||||
lines.push(kv('Scenarios', s.number(String(trace.generation.scenariosGenerated))));
|
||||
lines.push(kv('Files written', s.number(String(trace.generation.filesWritten.length))));
|
||||
lines.push('');
|
||||
|
||||
lines.push('🧪 Execution');
|
||||
lines.push('─'.repeat(40));
|
||||
lines.push(` ✅ Passed: ${trace.execution.testsPassed}`);
|
||||
lines.push(` ❌ Failed: ${trace.execution.testsFailed}`);
|
||||
lines.push(` ⏭️ Skipped: ${trace.execution.testsSkipped ?? 0}`);
|
||||
// Execution section
|
||||
lines.push(sectionHeader('🧪 Execution'));
|
||||
lines.push(` ${s.success('✓')} Passed: ${s.success(String(trace.execution.testsPassed))}`);
|
||||
lines.push(` ${s.error('✗')} Failed: ${s.error(String(trace.execution.testsFailed))}`);
|
||||
lines.push(` ${s.muted('○')} Skipped: ${s.muted(String(trace.execution.testsSkipped ?? 0))}`);
|
||||
lines.push('');
|
||||
|
||||
// Questions section
|
||||
if (opts.showQuestions && trace.analysis.questionsAsked.length > 0) {
|
||||
lines.push('❓ Questions Asked');
|
||||
lines.push('─'.repeat(40));
|
||||
lines.push(sectionHeader('❓ Questions Asked'));
|
||||
for (const q of trace.analysis.questionsAsked) {
|
||||
lines.push(formatQuestion(q));
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Decisions section
|
||||
if (opts.showDecisions && trace.analysis.decisions.length > 0) {
|
||||
lines.push('🎯 Key Decisions');
|
||||
lines.push('─'.repeat(40));
|
||||
lines.push(sectionHeader('🎯 Key Decisions'));
|
||||
for (const d of trace.analysis.decisions.slice(0, 10)) {
|
||||
lines.push(formatDecision(d));
|
||||
}
|
||||
if (trace.analysis.decisions.length > 10) {
|
||||
lines.push(` ... and ${trace.analysis.decisions.length - 10} more`);
|
||||
lines.push(` ${s.dim(`... and ${trace.analysis.decisions.length - 10} more`)}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Tool calls section
|
||||
if (opts.showToolCalls && trace.analysis.toolCalls.length > 0) {
|
||||
lines.push('🔧 Tool Calls');
|
||||
lines.push('─'.repeat(40));
|
||||
lines.push(sectionHeader('🔧 Tool Calls'));
|
||||
for (const tc of trace.analysis.toolCalls.slice(0, 20)) {
|
||||
lines.push(formatToolCall(tc, opts.verbose));
|
||||
}
|
||||
if (trace.analysis.toolCalls.length > 20) {
|
||||
lines.push(` ... and ${trace.analysis.toolCalls.length - 20} more`);
|
||||
lines.push(` ${s.dim(`... and ${trace.analysis.toolCalls.length - 20} more`)}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Test failures section
|
||||
if (trace.execution.failures.length > 0) {
|
||||
lines.push('❌ Test Failures');
|
||||
lines.push('─'.repeat(40));
|
||||
lines.push(sectionHeader('❌ Test Failures'));
|
||||
for (const f of trace.execution.failures) {
|
||||
lines.push(formatFailure(f));
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
// Errors section
|
||||
if (trace.errors.length > 0) {
|
||||
lines.push('⚠️ Errors');
|
||||
lines.push('─'.repeat(40));
|
||||
lines.push(sectionHeader('⚠️ Errors'));
|
||||
for (const e of trace.errors) {
|
||||
lines.push(` [${formatDate(e.timestamp)}]`);
|
||||
lines.push(` ${e.message}`);
|
||||
lines.push(` ${s.dim('[')}${s.muted(formatDate(e.timestamp))}${s.dim(']')}`);
|
||||
lines.push(` ${s.error(e.message)}`);
|
||||
if (e.context) {
|
||||
lines.push(` Context: ${e.context}`);
|
||||
lines.push(` ${s.dim('Context:')} ${e.context}`);
|
||||
}
|
||||
lines.push('');
|
||||
}
|
||||
}
|
||||
|
||||
lines.push('═'.repeat(60));
|
||||
lines.push(s.primary(box.dHorizontal.repeat(w)));
|
||||
lines.push('');
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function sectionHeader(title: string): string {
|
||||
return `${s.dim(box.horizontal.repeat(3))} ${s.bold(title)} ${s.dim(box.horizontal.repeat(Math.max(0, 35 - title.length)))}`;
|
||||
}
|
||||
|
||||
function kv(key: string, value: string): string {
|
||||
return ` ${s.dim(key + ':')} ${value}`;
|
||||
}
|
||||
|
||||
function formatStatus(status: EvalTrace['status']): string {
|
||||
switch (status) {
|
||||
case 'success':
|
||||
return '✅ Success';
|
||||
return s.success('✓ Success');
|
||||
case 'partial':
|
||||
return '⚠️ Partial';
|
||||
return s.warning('⚠ Partial');
|
||||
case 'failed':
|
||||
return '❌ Failed';
|
||||
return s.error('✗ Failed');
|
||||
default:
|
||||
return status;
|
||||
}
|
||||
|
|
@ -146,12 +199,8 @@ function formatDate(iso: string): string {
|
|||
}
|
||||
|
||||
function formatDuration(ms: number): string {
|
||||
if (ms < 1000) {
|
||||
return `${ms}ms`;
|
||||
}
|
||||
if (ms < 60000) {
|
||||
return `${(ms / 1000).toFixed(1)}s`;
|
||||
}
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;
|
||||
const minutes = Math.floor(ms / 60000);
|
||||
const seconds = ((ms % 60000) / 1000).toFixed(0);
|
||||
return `${minutes}m ${seconds}s`;
|
||||
|
|
@ -159,37 +208,39 @@ function formatDuration(ms: number): string {
|
|||
|
||||
function formatQuestion(q: Question): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(` Q: ${q.question}`);
|
||||
lines.push(` ${s.highlight('Q:')} ${q.question}`);
|
||||
if (q.answer) {
|
||||
lines.push(` A: ${q.answer}`);
|
||||
lines.push(` ${s.info('A:')} ${q.answer}`);
|
||||
} else {
|
||||
lines.push(` A: (no answer)`);
|
||||
lines.push(` ${s.dim('A: (no answer)')}`);
|
||||
}
|
||||
lines.push('');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
function formatDecision(d: Decision): string {
|
||||
const icon = d.type === 'include' ? '✓' : d.type === 'exclude' ? '✗' : '→';
|
||||
return ` ${icon} [${d.type}] ${d.subject}\n Reason: ${d.reasoning}\n Confidence: ${(d.confidence * 100).toFixed(0)}%\n`;
|
||||
const icon = d.type === 'include' ? s.success('✓') : d.type === 'exclude' ? s.error('✗') : s.info('→');
|
||||
const conf = (d.confidence * 100).toFixed(0);
|
||||
return ` ${icon} ${s.dim(`[${d.type}]`)} ${d.subject}\n ${s.dim('Reason:')} ${d.reasoning}\n ${s.dim('Confidence:')} ${s.number(conf + '%')}\n`;
|
||||
}
|
||||
|
||||
function formatToolCall(tc: ToolCall, verbose: boolean): string {
|
||||
const duration = formatDuration(tc.duration);
|
||||
if (verbose) {
|
||||
return ` [${tc.tool}] (${duration})\n Input: ${JSON.stringify(tc.input).slice(0, 100)}...\n`;
|
||||
const input = JSON.stringify(tc.input).slice(0, 100);
|
||||
return ` ${s.info(tc.tool)} ${s.dim(`(${duration})`)}\n ${s.dim('Input:')} ${input}...\n`;
|
||||
}
|
||||
return ` ${tc.tool} (${duration})`;
|
||||
return ` ${s.info(tc.tool)} ${s.dim(`(${duration})`)}`;
|
||||
}
|
||||
|
||||
function formatFailure(f: TestFailure): string {
|
||||
const lines: string[] = [];
|
||||
lines.push(` • ${f.testName}`);
|
||||
lines.push(` Scenario: ${f.scenarioId}`);
|
||||
lines.push(` Error: ${f.error}`);
|
||||
lines.push(` ${s.error('•')} ${s.bold(f.testName)}`);
|
||||
lines.push(` ${s.dim('Scenario:')} ${f.scenarioId}`);
|
||||
lines.push(` ${s.dim('Error:')} ${s.error(f.error)}`);
|
||||
if (f.expected !== undefined && f.actual !== undefined) {
|
||||
lines.push(` Expected: ${JSON.stringify(f.expected)}`);
|
||||
lines.push(` Actual: ${JSON.stringify(f.actual)}`);
|
||||
lines.push(` ${s.dim('Expected:')} ${s.success(JSON.stringify(f.expected))}`);
|
||||
lines.push(` ${s.dim('Actual:')} ${s.error(JSON.stringify(f.actual))}`);
|
||||
}
|
||||
lines.push('');
|
||||
return lines.join('\n');
|
||||
|
|
@ -206,21 +257,43 @@ export function formatTraceList(traces: Array<{
|
|||
const lines: string[] = [];
|
||||
|
||||
lines.push('');
|
||||
lines.push('📋 Recent Traces');
|
||||
lines.push('═'.repeat(80));
|
||||
lines.push(` ${s.bold('📋 Recent Traces')}`);
|
||||
lines.push(s.primary(` ${box.dHorizontal.repeat(76)}`));
|
||||
lines.push('');
|
||||
lines.push('ID Status Passed Failed Duration');
|
||||
lines.push('─'.repeat(80));
|
||||
|
||||
// Header row
|
||||
const hId = s.dim('ID'.padEnd(38));
|
||||
const hStatus = s.dim('Status'.padEnd(10));
|
||||
const hPassed = s.dim('Passed'.padStart(8));
|
||||
const hFailed = s.dim('Failed'.padStart(8));
|
||||
const hDuration = s.dim('Duration'.padStart(10));
|
||||
lines.push(` ${hId}${hStatus}${hPassed}${hFailed}${hDuration}`);
|
||||
lines.push(s.dim(` ${box.horizontal.repeat(76)}`));
|
||||
|
||||
for (const t of traces) {
|
||||
const statusIcon = t.status === 'success' ? '✅' : t.status === 'partial' ? '⚠️ ' : '❌';
|
||||
const id = t.id.slice(0, 36);
|
||||
const passed = String(t.testsPassed).padStart(6);
|
||||
const failed = String(t.testsFailed).padStart(6);
|
||||
const duration = formatDuration(t.duration).padStart(8);
|
||||
lines.push(`${id} ${statusIcon} ${passed} ${failed} ${duration}`);
|
||||
const id = s.muted(t.id.slice(0, 36).padEnd(38));
|
||||
|
||||
let statusIcon: string;
|
||||
if (t.status === 'success') {
|
||||
statusIcon = s.success('✓ Pass'.padEnd(10));
|
||||
} else if (t.status === 'partial') {
|
||||
statusIcon = s.warning('⚠ Partial'.padEnd(10));
|
||||
} else {
|
||||
statusIcon = s.error('✗ Fail'.padEnd(10));
|
||||
}
|
||||
|
||||
const passed = s.success(String(t.testsPassed).padStart(8));
|
||||
const failed = t.testsFailed > 0
|
||||
? s.error(String(t.testsFailed).padStart(8))
|
||||
: s.dim(String(t.testsFailed).padStart(8));
|
||||
const duration = s.number(formatDuration(t.duration).padStart(10));
|
||||
|
||||
lines.push(` ${id}${statusIcon}${passed}${failed}${duration}`);
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
lines.push(` ${s.dim('View a trace:')} ${s.info('evaluclaude view <trace-id>')}`);
|
||||
lines.push('');
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue