mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-17 01:04:36 +00:00
mom: remove dynamic timestamp from system prompt for better cache hits
This commit is contained in:
parent
078661c3b1
commit
2a0f239288
4 changed files with 892 additions and 5 deletions
502
packages/coding-agent/docs/compaction-strategies.ts
Normal file
502
packages/coding-agent/docs/compaction-strategies.ts
Normal file
|
|
@ -0,0 +1,502 @@
|
|||
/**
|
||||
* CLI tool to test different compaction strategies on session fixtures.
|
||||
*
|
||||
* Usage:
|
||||
* npx tsx test/compaction-strategies.ts [fixture-name]
|
||||
*
|
||||
* Examples:
|
||||
* npx tsx test/compaction-strategies.ts large-session
|
||||
* npx tsx test/compaction-strategies.ts before-compaction
|
||||
*
|
||||
* Output:
|
||||
* test/compaction-results/[fixture]-[strategy].md
|
||||
*/
|
||||
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
import { complete, getModel, type UserMessage } from "@mariozechner/pi-ai";
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
interface SessionEntry {
|
||||
type: string;
|
||||
timestamp: string;
|
||||
message?: {
|
||||
role: string;
|
||||
content: unknown;
|
||||
stopReason?: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface SimpleMessage {
|
||||
role: "user" | "assistant";
|
||||
content: string;
|
||||
tokens: number; // estimated
|
||||
}
|
||||
|
||||
interface SliceSummary {
|
||||
sliceIndex: number;
|
||||
summary: string;
|
||||
tokens: number;
|
||||
}
|
||||
|
||||
interface StrategyResult {
|
||||
name: string;
|
||||
summary: string;
|
||||
totalInputTokens: number;
|
||||
totalOutputTokens: number;
|
||||
numCalls: number;
|
||||
timeMs: number;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Config
|
||||
// ============================================================================
|
||||
|
||||
const MODEL = getModel("anthropic", "claude-sonnet-4-5");
|
||||
const SLICE_TOKENS = 10000; // target tokens per slice (smaller for testing)
|
||||
const SUMMARY_BUDGET = 2000; // max tokens for each summary call
|
||||
const FINAL_SUMMARY_BUDGET = 4000; // max tokens for final/stitched summary
|
||||
|
||||
// ============================================================================
|
||||
// Utilities
|
||||
// ============================================================================
|
||||
|
||||
function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
function extractTextContent(content: unknown): string {
|
||||
if (typeof content === "string") return content;
|
||||
if (Array.isArray(content)) {
|
||||
return content
|
||||
.map((block) => {
|
||||
if (typeof block === "string") return block;
|
||||
if (block.type === "text") return block.text || "";
|
||||
if (block.type === "tool_use")
|
||||
return `[Tool: ${block.name}]\n${JSON.stringify(block.arguments || block.input, null, 2)}`;
|
||||
if (block.type === "tool_result") {
|
||||
const text = typeof block.content === "string" ? block.content : JSON.stringify(block.content);
|
||||
return `[Tool Result: ${block.tool_use_id}]\n${text.slice(0, 2000)}${text.length > 2000 ? "..." : ""}`;
|
||||
}
|
||||
if (block.type === "thinking") return `[Thinking]\n${block.thinking}`;
|
||||
return "";
|
||||
})
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
}
|
||||
return JSON.stringify(content);
|
||||
}
|
||||
|
||||
function loadSession(fixturePath: string): SimpleMessage[] {
|
||||
const content = fs.readFileSync(fixturePath, "utf-8");
|
||||
const lines = content.trim().split("\n");
|
||||
const messages: SimpleMessage[] = [];
|
||||
|
||||
for (const line of lines) {
|
||||
try {
|
||||
const entry: SessionEntry = JSON.parse(line);
|
||||
if (entry.type === "message" && entry.message) {
|
||||
const role = entry.message.role;
|
||||
if (role !== "user" && role !== "assistant") continue;
|
||||
if (entry.message.stopReason === "aborted" || entry.message.stopReason === "error") continue;
|
||||
|
||||
const text = extractTextContent(entry.message.content);
|
||||
if (!text.trim()) continue;
|
||||
|
||||
messages.push({
|
||||
role: role as "user" | "assistant",
|
||||
content: text,
|
||||
tokens: estimateTokens(text),
|
||||
});
|
||||
}
|
||||
} catch {
|
||||
// skip malformed lines
|
||||
}
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
function segmentByTokens(messages: SimpleMessage[], sliceTokens: number): SimpleMessage[][] {
|
||||
const slices: SimpleMessage[][] = [];
|
||||
let current: SimpleMessage[] = [];
|
||||
let currentTokens = 0;
|
||||
|
||||
for (const msg of messages) {
|
||||
if (currentTokens + msg.tokens > sliceTokens && current.length > 0) {
|
||||
slices.push(current);
|
||||
current = [];
|
||||
currentTokens = 0;
|
||||
}
|
||||
current.push(msg);
|
||||
currentTokens += msg.tokens;
|
||||
}
|
||||
|
||||
if (current.length > 0) {
|
||||
slices.push(current);
|
||||
}
|
||||
|
||||
return slices;
|
||||
}
|
||||
|
||||
function messagesToTranscript(messages: SimpleMessage[]): string {
|
||||
return messages
|
||||
.map((m) => {
|
||||
const prefix = m.role === "user" ? "USER:" : "ASSISTANT:";
|
||||
return `${prefix}\n${m.content}`;
|
||||
})
|
||||
.join("\n\n---\n\n");
|
||||
}
|
||||
|
||||
async function callLLM(
|
||||
systemPrompt: string,
|
||||
userPrompt: string,
|
||||
maxTokens: number,
|
||||
): Promise<{ text: string; inputTokens: number; outputTokens: number }> {
|
||||
const apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
if (!apiKey) throw new Error("ANTHROPIC_API_KEY not set");
|
||||
|
||||
const messages: UserMessage[] = [
|
||||
{
|
||||
role: "user",
|
||||
content: userPrompt,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
];
|
||||
|
||||
const result = await complete(
|
||||
MODEL,
|
||||
{
|
||||
system: systemPrompt,
|
||||
messages,
|
||||
},
|
||||
{
|
||||
maxTokens,
|
||||
apiKey,
|
||||
},
|
||||
);
|
||||
|
||||
const text = result.content
|
||||
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("\n");
|
||||
|
||||
return {
|
||||
text,
|
||||
inputTokens: result.usage.input + result.usage.cacheRead,
|
||||
outputTokens: result.usage.output,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Strategy 1: Single-shot (current approach)
|
||||
// ============================================================================
|
||||
|
||||
const SINGLE_SHOT_SYSTEM = `You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
|
||||
|
||||
Include:
|
||||
- Current progress and key decisions made
|
||||
- Important context, constraints, or user preferences
|
||||
- Absolute file paths of any relevant files that were read or modified
|
||||
- What remains to be done (clear next steps)
|
||||
- Any critical data, examples, or references needed to continue
|
||||
|
||||
Be concise, structured, and focused on helping the next LLM seamlessly continue the work.`;
|
||||
|
||||
async function strategySingleShot(messages: SimpleMessage[]): Promise<StrategyResult> {
|
||||
const start = Date.now();
|
||||
const transcript = messagesToTranscript(messages);
|
||||
|
||||
const { text, inputTokens, outputTokens } = await callLLM(
|
||||
SINGLE_SHOT_SYSTEM,
|
||||
`Here is the conversation to summarize:\n\n<conversation>\n${transcript}\n</conversation>\n\nProvide your summary now:`,
|
||||
FINAL_SUMMARY_BUDGET,
|
||||
);
|
||||
|
||||
return {
|
||||
name: "single-shot",
|
||||
summary: text,
|
||||
totalInputTokens: inputTokens,
|
||||
totalOutputTokens: outputTokens,
|
||||
numCalls: 1,
|
||||
timeMs: Date.now() - start,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Strategy 2: Parallel slices with LLM stitch
|
||||
// ============================================================================
|
||||
|
||||
const SLICE_SYSTEM = `You are summarizing one segment of a longer coding session.
|
||||
Be concise but capture key information: user requests, files modified, decisions made, errors fixed.
|
||||
Preserve file paths and important code snippets.`;
|
||||
|
||||
const STITCH_SYSTEM = `You are combining multiple chronological summaries of a coding session into one coherent handoff document.
|
||||
Remove redundancy. Preserve all file paths and key details. Emphasize the most recent work (last segment).`;
|
||||
|
||||
async function strategyParallelStitch(messages: SimpleMessage[]): Promise<StrategyResult> {
|
||||
const start = Date.now();
|
||||
const slices = segmentByTokens(messages, SLICE_TOKENS);
|
||||
let totalInput = 0;
|
||||
let totalOutput = 0;
|
||||
|
||||
console.log(` Parallel: ${slices.length} slices`);
|
||||
|
||||
// Summarize all slices in parallel
|
||||
const sliceSummaries = await Promise.all(
|
||||
slices.map(async (slice, i) => {
|
||||
const isLast = i === slices.length - 1;
|
||||
const transcript = messagesToTranscript(slice);
|
||||
const prompt = `Segment ${i + 1} of ${slices.length}${isLast ? " (MOST RECENT)" : ""}:
|
||||
|
||||
${transcript}
|
||||
|
||||
${isLast ? "This is the most recent activity. Be detailed about current state and next steps." : "Summarize the key points from this segment."}`;
|
||||
|
||||
const { text, inputTokens, outputTokens } = await callLLM(SLICE_SYSTEM, prompt, SUMMARY_BUDGET);
|
||||
totalInput += inputTokens;
|
||||
totalOutput += outputTokens;
|
||||
|
||||
return { sliceIndex: i, summary: text, tokens: estimateTokens(text) };
|
||||
}),
|
||||
);
|
||||
|
||||
// Stitch summaries together
|
||||
const stitchPrompt = sliceSummaries.map((s) => `=== Segment ${s.sliceIndex + 1} ===\n${s.summary}`).join("\n\n");
|
||||
|
||||
const {
|
||||
text: finalSummary,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
} = await callLLM(
|
||||
STITCH_SYSTEM,
|
||||
`Combine these ${sliceSummaries.length} chronological segment summaries into one unified handoff summary:\n\n${stitchPrompt}`,
|
||||
FINAL_SUMMARY_BUDGET,
|
||||
);
|
||||
totalInput += inputTokens;
|
||||
totalOutput += outputTokens;
|
||||
|
||||
return {
|
||||
name: "parallel-stitch",
|
||||
summary: finalSummary,
|
||||
totalInputTokens: totalInput,
|
||||
totalOutputTokens: totalOutput,
|
||||
numCalls: slices.length + 1,
|
||||
timeMs: Date.now() - start,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Strategy 3: Sequential slices with accumulated context
|
||||
// ============================================================================
|
||||
|
||||
const SEQUENTIAL_SYSTEM = `You are summarizing one segment of a longer coding session.
|
||||
You may be given summaries of earlier segments for context.
|
||||
Create a summary of THIS segment's content. Do not repeat information from previous summaries.
|
||||
Be concise but capture: user requests, files modified, decisions made, errors fixed.`;
|
||||
|
||||
async function strategySequentialAccumulated(messages: SimpleMessage[]): Promise<StrategyResult> {
|
||||
const start = Date.now();
|
||||
const slices = segmentByTokens(messages, SLICE_TOKENS);
|
||||
let totalInput = 0;
|
||||
let totalOutput = 0;
|
||||
|
||||
console.log(` Sequential: ${slices.length} slices`);
|
||||
|
||||
const sliceSummaries: SliceSummary[] = [];
|
||||
|
||||
for (let i = 0; i < slices.length; i++) {
|
||||
const slice = slices[i];
|
||||
const isLast = i === slices.length - 1;
|
||||
const transcript = messagesToTranscript(slice);
|
||||
|
||||
// Build context from previous summaries
|
||||
const previousContext =
|
||||
sliceSummaries.length > 0
|
||||
? `Previous segments summary:\n${sliceSummaries.map((s) => `[Segment ${s.sliceIndex + 1}] ${s.summary}`).join("\n\n")}\n\n---\n\n`
|
||||
: "";
|
||||
|
||||
const prompt = `${previousContext}Current segment (${i + 1} of ${slices.length})${isLast ? " - MOST RECENT" : ""}:
|
||||
|
||||
${transcript}
|
||||
|
||||
${isLast ? "This is the most recent activity. Be detailed about current state, pending work, and next steps." : "Summarize the key NEW information from this segment (don't repeat what's in previous summaries)."}`;
|
||||
|
||||
const { text, inputTokens, outputTokens } = await callLLM(
|
||||
SEQUENTIAL_SYSTEM,
|
||||
prompt,
|
||||
isLast ? FINAL_SUMMARY_BUDGET : SUMMARY_BUDGET,
|
||||
);
|
||||
totalInput += inputTokens;
|
||||
totalOutput += outputTokens;
|
||||
|
||||
sliceSummaries.push({
|
||||
sliceIndex: i,
|
||||
summary: text,
|
||||
tokens: estimateTokens(text),
|
||||
});
|
||||
|
||||
console.log(` Slice ${i + 1}/${slices.length} done`);
|
||||
}
|
||||
|
||||
// Combine all slice summaries into final output
|
||||
const finalSummary = sliceSummaries.map((s) => `## Segment ${s.sliceIndex + 1}\n\n${s.summary}`).join("\n\n---\n\n");
|
||||
|
||||
return {
|
||||
name: "sequential-accumulated",
|
||||
summary: finalSummary,
|
||||
totalInputTokens: totalInput,
|
||||
totalOutputTokens: totalOutput,
|
||||
numCalls: slices.length,
|
||||
timeMs: Date.now() - start,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Strategy 4: Sequential with rolling summary
|
||||
// ============================================================================
|
||||
|
||||
const ROLLING_SYSTEM = `You are creating a rolling summary of a coding session.
|
||||
Given a previous summary and new conversation content, produce an UPDATED summary that incorporates the new information.
|
||||
Keep the summary focused and under the token budget. Condense older details as needed to make room for recent work.`;
|
||||
|
||||
async function strategySequentialRolling(messages: SimpleMessage[]): Promise<StrategyResult> {
|
||||
const start = Date.now();
|
||||
const slices = segmentByTokens(messages, SLICE_TOKENS);
|
||||
let totalInput = 0;
|
||||
let totalOutput = 0;
|
||||
|
||||
console.log(` Rolling: ${slices.length} slices`);
|
||||
|
||||
let runningSummary = "";
|
||||
|
||||
for (let i = 0; i < slices.length; i++) {
|
||||
const slice = slices[i];
|
||||
const isLast = i === slices.length - 1;
|
||||
const transcript = messagesToTranscript(slice);
|
||||
|
||||
const prompt = runningSummary
|
||||
? `Current summary so far:\n${runningSummary}\n\n---\n\nNew content (segment ${i + 1} of ${slices.length}):\n${transcript}\n\n${isLast ? "This is the final segment. Produce the complete handoff summary with emphasis on current state and next steps." : "Update the summary to incorporate this new content. Condense older details if needed."}`
|
||||
: `First segment of the conversation:\n${transcript}\n\nCreate an initial summary capturing the key points.`;
|
||||
|
||||
const { text, inputTokens, outputTokens } = await callLLM(
|
||||
ROLLING_SYSTEM,
|
||||
prompt,
|
||||
isLast ? FINAL_SUMMARY_BUDGET : SUMMARY_BUDGET,
|
||||
);
|
||||
totalInput += inputTokens;
|
||||
totalOutput += outputTokens;
|
||||
|
||||
runningSummary = text;
|
||||
console.log(` Slice ${i + 1}/${slices.length} done`);
|
||||
}
|
||||
|
||||
return {
|
||||
name: "sequential-rolling",
|
||||
summary: runningSummary,
|
||||
totalInputTokens: totalInput,
|
||||
totalOutputTokens: totalOutput,
|
||||
numCalls: slices.length,
|
||||
timeMs: Date.now() - start,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Main
|
||||
// ============================================================================
|
||||
|
||||
async function main() {
|
||||
const fixtureName = process.argv[2] || "large-session";
|
||||
const fixturesDir = path.join(__dirname, "fixtures");
|
||||
const fixturePath = path.join(fixturesDir, `${fixtureName}.jsonl`);
|
||||
|
||||
if (!fs.existsSync(fixturePath)) {
|
||||
console.error(`Fixture not found: ${fixturePath}`);
|
||||
console.error(`Available fixtures:`);
|
||||
for (const f of fs.readdirSync(fixturesDir).filter((f) => f.endsWith(".jsonl"))) {
|
||||
console.error(` - ${f.replace(".jsonl", "")}`);
|
||||
}
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`Loading fixture: ${fixtureName}`);
|
||||
const messages = loadSession(fixturePath);
|
||||
const totalTokens = messages.reduce((sum, m) => sum + m.tokens, 0);
|
||||
console.log(` ${messages.length} messages, ~${totalTokens} tokens\n`);
|
||||
|
||||
const resultsDir = path.join(__dirname, "compaction-results");
|
||||
fs.mkdirSync(resultsDir, { recursive: true });
|
||||
|
||||
const strategies: Array<{
|
||||
name: string;
|
||||
fn: (msgs: SimpleMessage[]) => Promise<StrategyResult>;
|
||||
}> = [
|
||||
{ name: "single-shot", fn: strategySingleShot },
|
||||
{ name: "parallel-stitch", fn: strategyParallelStitch },
|
||||
{ name: "sequential-accumulated", fn: strategySequentialAccumulated },
|
||||
{ name: "sequential-rolling", fn: strategySequentialRolling },
|
||||
];
|
||||
|
||||
const results: StrategyResult[] = [];
|
||||
|
||||
for (const strategy of strategies) {
|
||||
console.log(`Running strategy: ${strategy.name}`);
|
||||
try {
|
||||
const result = await strategy.fn(messages);
|
||||
results.push(result);
|
||||
|
||||
// Write individual result
|
||||
const outputPath = path.join(resultsDir, `${fixtureName}-${strategy.name}.md`);
|
||||
const output = `# Compaction Result: ${strategy.name}
|
||||
|
||||
## Stats
|
||||
- Input tokens: ${result.totalInputTokens}
|
||||
- Output tokens: ${result.totalOutputTokens}
|
||||
- API calls: ${result.numCalls}
|
||||
- Time: ${result.timeMs}ms
|
||||
|
||||
## Summary
|
||||
|
||||
${result.summary}
|
||||
`;
|
||||
fs.writeFileSync(outputPath, output);
|
||||
console.log(` ✓ Wrote ${outputPath}\n`);
|
||||
} catch (err) {
|
||||
console.error(` ✗ Failed: ${err}\n`);
|
||||
}
|
||||
}
|
||||
|
||||
// Write comparison summary
|
||||
const comparisonPath = path.join(resultsDir, `${fixtureName}-comparison.md`);
|
||||
const comparison = `# Compaction Strategy Comparison: ${fixtureName}
|
||||
|
||||
## Input
|
||||
- Messages: ${messages.length}
|
||||
- Estimated tokens: ${totalTokens}
|
||||
|
||||
## Results
|
||||
|
||||
| Strategy | Input Tokens | Output Tokens | API Calls | Time (ms) |
|
||||
|----------|-------------|---------------|-----------|-----------|
|
||||
${results.map((r) => `| ${r.name} | ${r.totalInputTokens} | ${r.totalOutputTokens} | ${r.numCalls} | ${r.timeMs} |`).join("\n")}
|
||||
|
||||
## Summaries
|
||||
|
||||
${results.map((r) => `### ${r.name}\n\n${r.summary}\n`).join("\n---\n\n")}
|
||||
`;
|
||||
fs.writeFileSync(comparisonPath, comparison);
|
||||
console.log(`Wrote comparison: ${comparisonPath}`);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue