improvements and promptfoo

This commit is contained in:
Harivansh Rathi 2026-01-11 20:02:30 -05:00
parent 6698c12e5b
commit ff5300f4e0
13 changed files with 1082 additions and 117 deletions

32
rubrics/code-quality.yaml Normal file
View file

@ -0,0 +1,32 @@
name: code-quality
description: Evaluates generated code for quality and maintainability
passingThreshold: 0.7
criteria:
- name: readability
weight: 0.3
description: Code is easy to read and understand
examples:
good: "Clear variable names, logical flow, proper indentation"
bad: "Single-letter variables, deeply nested logic, inconsistent style"
- name: correctness
weight: 0.4
description: Code correctly implements the intended behavior
examples:
good: "Handles edge cases, correct algorithm, proper error handling"
bad: "Missing edge cases, off-by-one errors, swallowed exceptions"
- name: efficiency
weight: 0.2
description: Code uses appropriate data structures and algorithms
examples:
good: "O(n) where O(n) is optimal, avoids unnecessary allocations"
bad: "O(n²) when O(n) is possible, creates objects in tight loops"
- name: maintainability
weight: 0.1
description: Code is easy to modify and extend
examples:
good: "Single responsibility, low coupling, clear interfaces"
bad: "God functions, tight coupling, magic numbers"

View file

@ -0,0 +1,32 @@
name: documentation
description: Evaluates quality of code documentation and docstrings
passingThreshold: 0.65
criteria:
- name: completeness
weight: 0.35
description: Documentation covers all parameters, return values, and exceptions
examples:
good: "Fully documents args, returns, raises, and includes usage example"
bad: "Missing parameter descriptions or return type"
- name: accuracy
weight: 0.35
description: Documentation accurately describes the function's behavior
examples:
good: "Description matches implementation, types are correct"
bad: "Outdated docs that don't match current behavior"
- name: examples
weight: 0.2
description: Includes helpful usage examples
examples:
good: "Shows common use cases with expected outputs"
bad: "No examples or only trivial ones"
- name: style
weight: 0.1
description: Follows project/language documentation conventions
examples:
good: "Uses standard docstring format (Google, NumPy, or reStructuredText)"
bad: "Inconsistent or non-standard format"

View file

@ -0,0 +1,25 @@
name: error-messages
description: Evaluates quality of error messages
passingThreshold: 0.6
criteria:
- name: clarity
weight: 0.4
description: Error message clearly explains what went wrong
examples:
good: "Invalid email format: 'not-an-email' is missing '@' symbol"
bad: "Error: validation failed"
- name: actionability
weight: 0.4
description: Error message suggests how to fix the problem
examples:
good: "File not found. Create the file or check the path spelling."
bad: "ENOENT"
- name: context
weight: 0.2
description: Error message includes relevant context (file, line, values)
examples:
good: "TypeError at line 42 in auth.py: expected str, got int (value=123)"
bad: "type error"