co-mono/packages/coding-agent/test/compaction.test.ts
Mario Zechner 9478a3c1f5 Fix SessionEntry type to exclude SessionHeader
- SessionEntry now only contains conversation entries (messages, compaction, etc.)
- SessionHeader is separate, not part of SessionEntry
- FileEntry = SessionHeader | SessionEntry (for file storage)
- getEntries() filters out header, returns SessionEntry[]
- Added getHeader() for accessing session metadata
- Updated compaction and tests to not expect header in entries
- Updated mom package to use FileEntry for internal storage
2025-12-30 22:42:17 +01:00

466 lines
16 KiB
TypeScript

import type { AppMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
import { getModel } from "@mariozechner/pi-ai";
import { readFileSync } from "fs";
import { join } from "path";
import { beforeEach, describe, expect, it } from "vitest";
import {
type CompactionSettings,
calculateContextTokens,
compact,
DEFAULT_COMPACTION_SETTINGS,
findCutPoint,
getLastAssistantUsage,
shouldCompact,
} from "../src/core/compaction.js";
import {
buildSessionContext,
type CompactionEntry,
createSummaryMessage,
type ModelChangeEntry,
migrateSessionEntries,
parseSessionEntries,
type SessionEntry,
type SessionMessageEntry,
type ThinkingLevelChangeEntry,
} from "../src/core/session-manager.js";
// ============================================================================
// Test fixtures
// ============================================================================
function loadLargeSessionEntries(): SessionEntry[] {
const sessionPath = join(__dirname, "fixtures/large-session.jsonl");
const content = readFileSync(sessionPath, "utf-8");
const entries = parseSessionEntries(content);
migrateSessionEntries(entries); // Add id/parentId for v1 fixtures
return entries.filter((e): e is SessionEntry => e.type !== "session");
}
function createMockUsage(input: number, output: number, cacheRead = 0, cacheWrite = 0): Usage {
return {
input,
output,
cacheRead,
cacheWrite,
totalTokens: input + output + cacheRead + cacheWrite,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
}
function createUserMessage(text: string): AppMessage {
return { role: "user", content: text, timestamp: Date.now() };
}
function createAssistantMessage(text: string, usage?: Usage): AssistantMessage {
return {
role: "assistant",
content: [{ type: "text", text }],
usage: usage || createMockUsage(100, 50),
stopReason: "stop",
timestamp: Date.now(),
api: "anthropic-messages",
provider: "anthropic",
model: "claude-sonnet-4-5",
};
}
let entryCounter = 0;
let lastId: string | null = null;
function resetEntryCounter() {
entryCounter = 0;
lastId = null;
}
// Reset counter before each test to get predictable IDs
beforeEach(() => {
resetEntryCounter();
});
function createMessageEntry(message: AppMessage): SessionMessageEntry {
const id = `test-id-${entryCounter++}`;
const entry: SessionMessageEntry = {
type: "message",
id,
parentId: lastId,
timestamp: new Date().toISOString(),
message,
};
lastId = id;
return entry;
}
function createCompactionEntry(summary: string, firstKeptEntryId: string): CompactionEntry {
const id = `test-id-${entryCounter++}`;
const entry: CompactionEntry = {
type: "compaction",
id,
parentId: lastId,
timestamp: new Date().toISOString(),
summary,
firstKeptEntryId,
tokensBefore: 10000,
};
lastId = id;
return entry;
}
function createModelChangeEntry(provider: string, modelId: string): ModelChangeEntry {
const id = `test-id-${entryCounter++}`;
const entry: ModelChangeEntry = {
type: "model_change",
id,
parentId: lastId,
timestamp: new Date().toISOString(),
provider,
modelId,
};
lastId = id;
return entry;
}
function createThinkingLevelEntry(thinkingLevel: string): ThinkingLevelChangeEntry {
const id = `test-id-${entryCounter++}`;
const entry: ThinkingLevelChangeEntry = {
type: "thinking_level_change",
id,
parentId: lastId,
timestamp: new Date().toISOString(),
thinkingLevel,
};
lastId = id;
return entry;
}
// ============================================================================
// Unit tests
// ============================================================================
describe("Token calculation", () => {
it("should calculate total context tokens from usage", () => {
const usage = createMockUsage(1000, 500, 200, 100);
expect(calculateContextTokens(usage)).toBe(1800);
});
it("should handle zero values", () => {
const usage = createMockUsage(0, 0, 0, 0);
expect(calculateContextTokens(usage)).toBe(0);
});
});
describe("getLastAssistantUsage", () => {
it("should find the last non-aborted assistant message usage", () => {
const entries: SessionEntry[] = [
createMessageEntry(createUserMessage("Hello")),
createMessageEntry(createAssistantMessage("Hi", createMockUsage(100, 50))),
createMessageEntry(createUserMessage("How are you?")),
createMessageEntry(createAssistantMessage("Good", createMockUsage(200, 100))),
];
const usage = getLastAssistantUsage(entries);
expect(usage).not.toBeNull();
expect(usage!.input).toBe(200);
});
it("should skip aborted messages", () => {
const abortedMsg: AssistantMessage = {
...createAssistantMessage("Aborted", createMockUsage(300, 150)),
stopReason: "aborted",
};
const entries: SessionEntry[] = [
createMessageEntry(createUserMessage("Hello")),
createMessageEntry(createAssistantMessage("Hi", createMockUsage(100, 50))),
createMessageEntry(createUserMessage("How are you?")),
createMessageEntry(abortedMsg),
];
const usage = getLastAssistantUsage(entries);
expect(usage).not.toBeNull();
expect(usage!.input).toBe(100);
});
it("should return null if no assistant messages", () => {
const entries: SessionEntry[] = [createMessageEntry(createUserMessage("Hello"))];
expect(getLastAssistantUsage(entries)).toBeNull();
});
});
describe("shouldCompact", () => {
it("should return true when context exceeds threshold", () => {
const settings: CompactionSettings = {
enabled: true,
reserveTokens: 10000,
keepRecentTokens: 20000,
};
expect(shouldCompact(95000, 100000, settings)).toBe(true);
expect(shouldCompact(89000, 100000, settings)).toBe(false);
});
it("should return false when disabled", () => {
const settings: CompactionSettings = {
enabled: false,
reserveTokens: 10000,
keepRecentTokens: 20000,
};
expect(shouldCompact(95000, 100000, settings)).toBe(false);
});
});
describe("findCutPoint", () => {
it("should find cut point based on actual token differences", () => {
// Create entries with cumulative token counts
const entries: SessionEntry[] = [];
for (let i = 0; i < 10; i++) {
entries.push(createMessageEntry(createUserMessage(`User ${i}`)));
entries.push(
createMessageEntry(createAssistantMessage(`Assistant ${i}`, createMockUsage(0, 100, (i + 1) * 1000, 0))),
);
}
// 20 entries, last assistant has 10000 tokens
// keepRecentTokens = 2500: keep entries where diff < 2500
const result = findCutPoint(entries, 0, entries.length, 2500);
// Should cut at a valid cut point (user or assistant message)
expect(entries[result.firstKeptEntryIndex].type).toBe("message");
const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry).message.role;
expect(role === "user" || role === "assistant").toBe(true);
});
it("should return startIndex if no valid cut points in range", () => {
const entries: SessionEntry[] = [createMessageEntry(createAssistantMessage("a"))];
const result = findCutPoint(entries, 0, entries.length, 1000);
expect(result.firstKeptEntryIndex).toBe(0);
});
it("should keep everything if all messages fit within budget", () => {
const entries: SessionEntry[] = [
createMessageEntry(createUserMessage("1")),
createMessageEntry(createAssistantMessage("a", createMockUsage(0, 50, 500, 0))),
createMessageEntry(createUserMessage("2")),
createMessageEntry(createAssistantMessage("b", createMockUsage(0, 50, 1000, 0))),
];
const result = findCutPoint(entries, 0, entries.length, 50000);
expect(result.firstKeptEntryIndex).toBe(0);
});
it("should indicate split turn when cutting at assistant message", () => {
// Create a scenario where we cut at an assistant message mid-turn
const entries: SessionEntry[] = [
createMessageEntry(createUserMessage("Turn 1")),
createMessageEntry(createAssistantMessage("A1", createMockUsage(0, 100, 1000, 0))),
createMessageEntry(createUserMessage("Turn 2")), // index 2
createMessageEntry(createAssistantMessage("A2-1", createMockUsage(0, 100, 5000, 0))), // index 3
createMessageEntry(createAssistantMessage("A2-2", createMockUsage(0, 100, 8000, 0))), // index 4
createMessageEntry(createAssistantMessage("A2-3", createMockUsage(0, 100, 10000, 0))), // index 5
];
// With keepRecentTokens = 3000, should cut somewhere in Turn 2
const result = findCutPoint(entries, 0, entries.length, 3000);
// If cut at assistant message (not user), should indicate split turn
const cutEntry = entries[result.firstKeptEntryIndex] as SessionMessageEntry;
if (cutEntry.message.role === "assistant") {
expect(result.isSplitTurn).toBe(true);
expect(result.turnStartIndex).toBe(2); // Turn 2 starts at index 2
}
});
});
describe("createSummaryMessage", () => {
it("should create user message with prefix", () => {
const msg = createSummaryMessage("This is the summary");
expect(msg.role).toBe("user");
if (msg.role === "user") {
expect(msg.content).toContain(
"The conversation history before this point was compacted into the following summary:",
);
expect(msg.content).toContain("This is the summary");
}
});
});
describe("buildSessionContext", () => {
it("should load all messages when no compaction", () => {
const entries: SessionEntry[] = [
createMessageEntry(createUserMessage("1")),
createMessageEntry(createAssistantMessage("a")),
createMessageEntry(createUserMessage("2")),
createMessageEntry(createAssistantMessage("b")),
];
const loaded = buildSessionContext(entries);
expect(loaded.messages.length).toBe(4);
expect(loaded.thinkingLevel).toBe("off");
expect(loaded.model).toEqual({ provider: "anthropic", modelId: "claude-sonnet-4-5" });
});
it("should handle single compaction", () => {
// IDs: u1=test-id-0, a1=test-id-1, u2=test-id-2, a2=test-id-3, compaction=test-id-4, u3=test-id-5, a3=test-id-6
const u1 = createMessageEntry(createUserMessage("1"));
const a1 = createMessageEntry(createAssistantMessage("a"));
const u2 = createMessageEntry(createUserMessage("2"));
const a2 = createMessageEntry(createAssistantMessage("b"));
const compaction = createCompactionEntry("Summary of 1,a,2,b", u2.id); // keep from u2 onwards
const u3 = createMessageEntry(createUserMessage("3"));
const a3 = createMessageEntry(createAssistantMessage("c"));
const entries: SessionEntry[] = [u1, a1, u2, a2, compaction, u3, a3];
const loaded = buildSessionContext(entries);
// summary + kept (u2, a2) + after (u3, a3) = 5
expect(loaded.messages.length).toBe(5);
expect(loaded.messages[0].role).toBe("user");
expect((loaded.messages[0] as any).content).toContain("Summary of 1,a,2,b");
});
it("should handle multiple compactions (only latest matters)", () => {
// First batch
const u1 = createMessageEntry(createUserMessage("1"));
const a1 = createMessageEntry(createAssistantMessage("a"));
const compact1 = createCompactionEntry("First summary", u1.id);
// Second batch
const u2 = createMessageEntry(createUserMessage("2"));
const b = createMessageEntry(createAssistantMessage("b"));
const u3 = createMessageEntry(createUserMessage("3"));
const c = createMessageEntry(createAssistantMessage("c"));
const compact2 = createCompactionEntry("Second summary", u3.id); // keep from u3 onwards
// After second compaction
const u4 = createMessageEntry(createUserMessage("4"));
const d = createMessageEntry(createAssistantMessage("d"));
const entries: SessionEntry[] = [u1, a1, compact1, u2, b, u3, c, compact2, u4, d];
const loaded = buildSessionContext(entries);
// summary + kept from u3 (u3, c) + after (u4, d) = 5
expect(loaded.messages.length).toBe(5);
expect((loaded.messages[0] as any).content).toContain("Second summary");
});
it("should keep all messages when firstKeptEntryId is first entry", () => {
const u1 = createMessageEntry(createUserMessage("1"));
const a1 = createMessageEntry(createAssistantMessage("a"));
const compact1 = createCompactionEntry("First summary", u1.id); // keep from first entry
const u2 = createMessageEntry(createUserMessage("2"));
const b = createMessageEntry(createAssistantMessage("b"));
const entries: SessionEntry[] = [u1, a1, compact1, u2, b];
const loaded = buildSessionContext(entries);
// summary + all messages (u1, a1, u2, b) = 5
expect(loaded.messages.length).toBe(5);
});
it("should track model and thinking level changes", () => {
const entries: SessionEntry[] = [
createMessageEntry(createUserMessage("1")),
createModelChangeEntry("openai", "gpt-4"),
createMessageEntry(createAssistantMessage("a")),
createThinkingLevelEntry("high"),
];
const loaded = buildSessionContext(entries);
// model_change is later overwritten by assistant message's model info
expect(loaded.model).toEqual({ provider: "anthropic", modelId: "claude-sonnet-4-5" });
expect(loaded.thinkingLevel).toBe("high");
});
});
// ============================================================================
// Integration tests with real session data
// ============================================================================
describe("Large session fixture", () => {
it("should parse the large session", () => {
const entries = loadLargeSessionEntries();
expect(entries.length).toBeGreaterThan(100);
const messageCount = entries.filter((e) => e.type === "message").length;
expect(messageCount).toBeGreaterThan(100);
});
it("should find cut point in large session", () => {
const entries = loadLargeSessionEntries();
const result = findCutPoint(entries, 0, entries.length, DEFAULT_COMPACTION_SETTINGS.keepRecentTokens);
// Cut point should be at a message entry (user or assistant)
expect(entries[result.firstKeptEntryIndex].type).toBe("message");
const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry).message.role;
expect(role === "user" || role === "assistant").toBe(true);
});
it("should load session correctly", () => {
const entries = loadLargeSessionEntries();
const loaded = buildSessionContext(entries);
expect(loaded.messages.length).toBeGreaterThan(100);
expect(loaded.model).not.toBeNull();
});
});
// ============================================================================
// LLM integration tests (skipped without API key)
// ============================================================================
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("LLM summarization", () => {
it("should generate a compaction result for the large session", async () => {
const entries = loadLargeSessionEntries();
const model = getModel("anthropic", "claude-sonnet-4-5")!;
const compactionResult = await compact(
entries,
model,
DEFAULT_COMPACTION_SETTINGS,
process.env.ANTHROPIC_OAUTH_TOKEN!,
);
expect(compactionResult.summary.length).toBeGreaterThan(100);
expect(compactionResult.firstKeptEntryId).toBeTruthy();
expect(compactionResult.tokensBefore).toBeGreaterThan(0);
console.log("Summary length:", compactionResult.summary.length);
console.log("First kept entry ID:", compactionResult.firstKeptEntryId);
console.log("Tokens before:", compactionResult.tokensBefore);
console.log("\n--- SUMMARY ---\n");
console.log(compactionResult.summary);
}, 60000);
it("should produce valid session after compaction", async () => {
const entries = loadLargeSessionEntries();
const loaded = buildSessionContext(entries);
const model = getModel("anthropic", "claude-sonnet-4-5")!;
const compactionResult = await compact(
entries,
model,
DEFAULT_COMPACTION_SETTINGS,
process.env.ANTHROPIC_OAUTH_TOKEN!,
);
// Simulate appending compaction to entries by creating a proper entry
const lastEntry = entries[entries.length - 1];
const parentId = lastEntry.id;
const compactionEntry: CompactionEntry = {
type: "compaction",
id: "compaction-test-id",
parentId,
timestamp: new Date().toISOString(),
...compactionResult,
};
const newEntries = [...entries, compactionEntry];
const reloaded = buildSessionContext(newEntries);
// Should have summary + kept messages
expect(reloaded.messages.length).toBeLessThan(loaded.messages.length);
expect(reloaded.messages[0].role).toBe("user");
expect((reloaded.messages[0] as any).content).toContain(compactionResult.summary);
console.log("Original messages:", loaded.messages.length);
console.log("After compaction:", reloaded.messages.length);
}, 60000);
});