promptfoo ui and testcon

2026-04-17 19:03:55 +00:00 · 2026-01-11 18:28:03 -05:00 · 2026-01-11 18:28:03 -05:00 · 6698c12e5b
commit 6698c12e5b
parent e0c36241b0
18 changed files with 2169 additions and 0 deletions
--- a/src/promptfoo/config-generator.ts
+++ b/src/promptfoo/config-generator.ts
@ -0,0 +1,271 @@
+import { writeFile, mkdir } from 'fs/promises';
+import { dirname, join } from 'path';
+import * as yaml from 'js-yaml';
+import type { EvalSpec, EvalScenario } from '../analyzer/types.js';
+import type { PromptfooConfig, PromptfooTest, PromptfooAssertion } from './types.js';
+
+export interface ConfigOptions {
+  testDir: string;
+  outputPath: string;
+  framework: 'pytest' | 'vitest' | 'jest';
+  includeTraceLinks: boolean;
+}
+
+export async function generatePromptfooConfig(
+  spec: EvalSpec,
+  options: ConfigOptions
+): Promise<string> {
+  const config = buildConfig(spec, options);
+  const yamlContent = yaml.dump(config, { 
+    lineWidth: 120,
+    quotingType: '"',
+  });
+
+  await mkdir(dirname(options.outputPath), { recursive: true });
+  await writeFile(options.outputPath, yamlContent);
+
+  return yamlContent;
+}
+
+function buildConfig(spec: EvalSpec, options: ConfigOptions): PromptfooConfig {
+  const tests = spec.scenarios.map(scenario => buildTest(scenario, options));
+
+  return {
+    description: `Evaluclaude functional tests for ${spec.repo.name}`,
+    providers: [
+      {
+        id: `file://providers/test-runner.py`,
+        label: 'functional-tests',
+        config: {
+          test_dir: options.testDir,
+          framework: options.framework,
+          timeout: 300,
+        },
+      },
+    ],
+    prompts: ['{{scenario_id}}'],
+    tests,
+    defaultTest: options.includeTraceLinks
+      ? {
+          metadata: {
+            traceFile: '.evaluclaude/traces/{{evalId}}.json',
+          },
+        }
+      : undefined,
+    outputPath: '.evaluclaude/results/promptfoo-results.json',
+  };
+}
+
+function buildTest(scenario: EvalScenario, options: ConfigOptions): PromptfooTest {
+  const assertions = scenario.assertions
+    .filter(a => a.type !== 'llm-rubric')
+    .map(a => buildAssertion(a));
+
+  const llmRubrics = scenario.assertions
+    .filter(a => a.type === 'llm-rubric')
+    .map(a => ({
+      type: 'llm-rubric' as const,
+      value: (a as any).rubric,
+      threshold: (a as any).passingThreshold ?? 0.7,
+    }));
+
+  return {
+    description: scenario.description,
+    vars: {
+      scenario_id: scenario.id,
+      target_module: scenario.target.module,
+      target_function: scenario.target.function,
+      input_args: scenario.input.args,
+      input_kwargs: scenario.input.kwargs,
+    },
+    assert: [...assertions, ...llmRubrics],
+    metadata: {
+      category: scenario.category,
+      priority: scenario.priority,
+      tags: scenario.tags,
+    },
+  };
+}
+
+function buildAssertion(assertion: any): PromptfooAssertion {
+  switch (assertion.type) {
+    case 'equals':
+      return {
+        type: 'equals',
+        value: assertion.expected,
+      };
+
+    case 'contains':
+      return {
+        type: 'contains',
+        value: assertion.value,
+      };
+
+    case 'matches':
+      return {
+        type: 'regex',
+        value: assertion.pattern,
+      };
+
+    case 'typeof':
+      return {
+        type: 'python',
+        value: `type(output).__name__ == '${assertion.expected}'`,
+      };
+
+    case 'throws':
+      return {
+        type: 'python',
+        value: `'${assertion.errorType || 'Error'}' in str(output.get('error', ''))`,
+      };
+
+    case 'truthy':
+      return {
+        type: 'python',
+        value: 'bool(output)',
+      };
+
+    case 'falsy':
+      return {
+        type: 'python',
+        value: 'not bool(output)',
+      };
+
+    case 'custom':
+      return {
+        type: 'python',
+        value: assertion.check,
+      };
+
+    default:
+      return {
+        type: 'python',
+        value: 'True',
+      };
+  }
+}
+
+export async function generateTestProvider(outputPath: string): Promise<void> {
+  const providerCode = `#!/usr/bin/env python3
+"""Promptfoo provider that executes tests and returns structured results."""
+
+import subprocess
+import json
+import sys
+import os
+
+def get_provider_response(prompt: str, options: dict, context: dict) -> dict:
+    """Runs tests and returns structured results."""
+    
+    test_dir = options.get('config', {}).get('test_dir', './tests')
+    framework = options.get('config', {}).get('framework', 'pytest')
+    timeout = options.get('config', {}).get('timeout', 300)
+    
+    scenario_id = prompt.strip()
+    
+    try:
+        if framework == 'pytest':
+            result = subprocess.run(
+                [
+                    'python', '-m', 'pytest',
+                    '--json-report',
+                    '--json-report-file=/tmp/pytest_results.json',
+                    '-k', scenario_id,
+                    test_dir
+                ],
+                capture_output=True,
+                text=True,
+                timeout=timeout
+            )
+            
+            try:
+                with open('/tmp/pytest_results.json') as f:
+                    report = json.load(f)
+                    
+                output = {
+                    'passed': report.get('summary', {}).get('passed', 0),
+                    'failed': report.get('summary', {}).get('failed', 0),
+                    'skipped': report.get('summary', {}).get('skipped', 0),
+                    'tests': report.get('tests', []),
+                    'stdout': result.stdout,
+                    'stderr': result.stderr,
+                    'exit_code': result.returncode,
+                }
+            except FileNotFoundError:
+                output = {
+                    'passed': 0,
+                    'failed': 1,
+                    'error': 'Failed to generate pytest report',
+                    'stdout': result.stdout,
+                    'stderr': result.stderr,
+                }
+                
+        elif framework in ('vitest', 'jest'):
+            cmd = ['npx', framework, 'run', '--reporter=json']
+            if scenario_id:
+                cmd.extend(['--testNamePattern', scenario_id])
+            cmd.append(test_dir)
+            
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout
+            )
+            
+            try:
+                report = json.loads(result.stdout)
+                output = {
+                    'passed': report.get('numPassedTests', 0),
+                    'failed': report.get('numFailedTests', 0),
+                    'skipped': report.get('numSkippedTests', 0),
+                    'tests': report.get('testResults', []),
+                    'exit_code': result.returncode,
+                }
+            except json.JSONDecodeError:
+                output = {
+                    'passed': 0,
+                    'failed': 1,
+                    'error': 'Failed to parse test output',
+                    'stdout': result.stdout,
+                    'stderr': result.stderr,
+                }
+        else:
+            output = {'error': f'Unknown framework: {framework}'}
+            
+        return {
+            'output': json.dumps(output),
+            'error': None,
+        }
+        
+    except subprocess.TimeoutExpired:
+        return {
+            'output': json.dumps({'error': 'Test execution timed out', 'passed': 0, 'failed': 1}),
+            'error': None,
+        }
+    except Exception as e:
+        return {
+            'output': None,
+            'error': str(e),
+        }
+
+if __name__ == '__main__':
+    # For testing the provider directly
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--scenario', default='')
+    parser.add_argument('--test-dir', default='./tests')
+    parser.add_argument('--framework', default='pytest')
+    args = parser.parse_args()
+    
+    result = get_provider_response(
+        args.scenario,
+        {'config': {'test_dir': args.test_dir, 'framework': args.framework}},
+        {}
+    )
+    print(json.dumps(result, indent=2))
+`;
+
+  await mkdir(dirname(outputPath), { recursive: true });
+  await writeFile(outputPath, providerCode);
+}