mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 18:01:22 +00:00
- Add CompactionEntry type with firstKeptEntryIndex - Add loadSessionFromEntries() for compaction-aware loading - Add compact() function that returns CompactionEntry - Add token calculation and cut point detection - Add tests with real session fixture and LLM integration Still TODO: settings, /compact and /autocompact commands, auto-trigger in TUI, /branch rework
266 lines
7.9 KiB
TypeScript
266 lines
7.9 KiB
TypeScript
/**
|
|
* Context compaction for long sessions.
|
|
*
|
|
* Pure functions for compaction logic. The session manager handles I/O,
|
|
* and after compaction the session is reloaded.
|
|
*/
|
|
|
|
import type { AppMessage } from "@mariozechner/pi-agent-core";
|
|
import type { AssistantMessage, Model, Usage } from "@mariozechner/pi-ai";
|
|
import { complete } from "@mariozechner/pi-ai";
|
|
import { type CompactionEntry, loadSessionFromEntries, type SessionEntry } from "./session-manager.js";
|
|
|
|
// ============================================================================
|
|
// Types
|
|
// ============================================================================
|
|
|
|
export interface CompactionSettings {
|
|
enabled: boolean;
|
|
reserveTokens: number;
|
|
keepRecentTokens: number;
|
|
}
|
|
|
|
export const DEFAULT_COMPACTION_SETTINGS: CompactionSettings = {
|
|
enabled: true,
|
|
reserveTokens: 16384,
|
|
keepRecentTokens: 20000,
|
|
};
|
|
|
|
// ============================================================================
|
|
// Token calculation
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Calculate total context tokens from usage.
|
|
*/
|
|
export function calculateContextTokens(usage: Usage): number {
|
|
return usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
|
|
}
|
|
|
|
/**
|
|
* Get usage from an assistant message if available.
|
|
*/
|
|
function getAssistantUsage(msg: AppMessage): Usage | null {
|
|
if (msg.role === "assistant" && "usage" in msg) {
|
|
const assistantMsg = msg as AssistantMessage;
|
|
if (assistantMsg.stopReason !== "aborted" && assistantMsg.usage) {
|
|
return assistantMsg.usage;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Find the last non-aborted assistant message usage from session entries.
|
|
*/
|
|
export function getLastAssistantUsage(entries: SessionEntry[]): Usage | null {
|
|
for (let i = entries.length - 1; i >= 0; i--) {
|
|
const entry = entries[i];
|
|
if (entry.type === "message") {
|
|
const usage = getAssistantUsage(entry.message);
|
|
if (usage) return usage;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Check if compaction should trigger based on context usage.
|
|
*/
|
|
export function shouldCompact(contextTokens: number, contextWindow: number, settings: CompactionSettings): boolean {
|
|
if (!settings.enabled) return false;
|
|
return contextTokens > contextWindow - settings.reserveTokens;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Cut point detection
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Find indices of message entries that are user messages (turn boundaries).
|
|
*/
|
|
function findTurnBoundaries(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
|
|
const boundaries: number[] = [];
|
|
for (let i = startIndex; i < endIndex; i++) {
|
|
const entry = entries[i];
|
|
if (entry.type === "message" && entry.message.role === "user") {
|
|
boundaries.push(i);
|
|
}
|
|
}
|
|
return boundaries;
|
|
}
|
|
|
|
/**
|
|
* Find the cut point in session entries that keeps approximately `keepRecentTokens`.
|
|
* Returns the entry index of the first message to keep (a user message for turn integrity).
|
|
*
|
|
* Only considers entries between `startIndex` and `endIndex` (exclusive).
|
|
*/
|
|
export function findCutPoint(
|
|
entries: SessionEntry[],
|
|
startIndex: number,
|
|
endIndex: number,
|
|
keepRecentTokens: number,
|
|
): number {
|
|
const boundaries = findTurnBoundaries(entries, startIndex, endIndex);
|
|
|
|
if (boundaries.length === 0) {
|
|
return startIndex; // No user messages, keep everything in range
|
|
}
|
|
|
|
// Collect assistant usages walking backwards from endIndex
|
|
const assistantUsages: Array<{ index: number; tokens: number }> = [];
|
|
for (let i = endIndex - 1; i >= startIndex; i--) {
|
|
const entry = entries[i];
|
|
if (entry.type === "message") {
|
|
const usage = getAssistantUsage(entry.message);
|
|
if (usage) {
|
|
assistantUsages.push({
|
|
index: i,
|
|
tokens: calculateContextTokens(usage),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
if (assistantUsages.length === 0) {
|
|
// No usage info, keep last turn only
|
|
return boundaries[boundaries.length - 1];
|
|
}
|
|
|
|
// Walk through and find where cumulative token difference exceeds keepRecentTokens
|
|
const newestTokens = assistantUsages[0].tokens;
|
|
let cutIndex = startIndex; // Default: keep everything in range
|
|
|
|
for (let i = 1; i < assistantUsages.length; i++) {
|
|
const tokenDiff = newestTokens - assistantUsages[i].tokens;
|
|
if (tokenDiff >= keepRecentTokens) {
|
|
// Find the turn boundary at or before the assistant we want to keep
|
|
const lastKeptAssistantIndex = assistantUsages[i - 1].index;
|
|
|
|
for (let b = boundaries.length - 1; b >= 0; b--) {
|
|
if (boundaries[b] <= lastKeptAssistantIndex) {
|
|
cutIndex = boundaries[b];
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
return cutIndex;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Summarization
|
|
// ============================================================================
|
|
|
|
const SUMMARIZATION_PROMPT = `You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff summary for another LLM that will resume the task.
|
|
|
|
Include:
|
|
- Current progress and key decisions made
|
|
- Important context, constraints, or user preferences
|
|
- Absolute file paths of any relevant files that were read or modified
|
|
- What remains to be done (clear next steps)
|
|
- Any critical data, examples, or references needed to continue
|
|
|
|
Be concise, structured, and focused on helping the next LLM seamlessly continue the work.`;
|
|
|
|
/**
|
|
* Generate a summary of the conversation using the LLM.
|
|
*/
|
|
export async function generateSummary(
|
|
currentMessages: AppMessage[],
|
|
model: Model<any>,
|
|
reserveTokens: number,
|
|
apiKey: string,
|
|
signal?: AbortSignal,
|
|
customInstructions?: string,
|
|
): Promise<string> {
|
|
const maxTokens = Math.floor(0.8 * reserveTokens);
|
|
|
|
const prompt = customInstructions
|
|
? `${SUMMARIZATION_PROMPT}\n\nAdditional focus: ${customInstructions}`
|
|
: SUMMARIZATION_PROMPT;
|
|
|
|
const summarizationMessages = [
|
|
...currentMessages,
|
|
{
|
|
role: "user" as const,
|
|
content: prompt,
|
|
timestamp: Date.now(),
|
|
},
|
|
];
|
|
|
|
const response = await complete(model, { messages: summarizationMessages }, { maxTokens, signal, apiKey });
|
|
|
|
const textContent = response.content
|
|
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
|
.map((c) => c.text)
|
|
.join("\n");
|
|
|
|
return textContent;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Main compaction function
|
|
// ============================================================================
|
|
|
|
/**
|
|
* Calculate compaction and generate summary.
|
|
* Returns the CompactionEntry to append to the session file.
|
|
*
|
|
* @param entries - All session entries
|
|
* @param model - Model to use for summarization
|
|
* @param settings - Compaction settings
|
|
* @param apiKey - API key for LLM
|
|
* @param signal - Optional abort signal
|
|
* @param customInstructions - Optional custom focus for the summary
|
|
*/
|
|
export async function compact(
|
|
entries: SessionEntry[],
|
|
model: Model<any>,
|
|
settings: CompactionSettings,
|
|
apiKey: string,
|
|
signal?: AbortSignal,
|
|
customInstructions?: string,
|
|
): Promise<CompactionEntry> {
|
|
// Reconstruct current messages from entries
|
|
const { messages: currentMessages } = loadSessionFromEntries(entries);
|
|
|
|
// Find previous compaction boundary
|
|
let prevCompactionIndex = -1;
|
|
for (let i = entries.length - 1; i >= 0; i--) {
|
|
if (entries[i].type === "compaction") {
|
|
prevCompactionIndex = i;
|
|
break;
|
|
}
|
|
}
|
|
const boundaryStart = prevCompactionIndex + 1;
|
|
const boundaryEnd = entries.length;
|
|
|
|
// Get token count before compaction
|
|
const lastUsage = getLastAssistantUsage(entries);
|
|
const tokensBefore = lastUsage ? calculateContextTokens(lastUsage) : 0;
|
|
|
|
// Find cut point (entry index) within the valid range
|
|
const firstKeptEntryIndex = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens);
|
|
|
|
// Generate summary from the full current context
|
|
const summary = await generateSummary(
|
|
currentMessages,
|
|
model,
|
|
settings.reserveTokens,
|
|
apiKey,
|
|
signal,
|
|
customInstructions,
|
|
);
|
|
|
|
return {
|
|
type: "compaction",
|
|
timestamp: new Date().toISOString(),
|
|
summary,
|
|
firstKeptEntryIndex,
|
|
tokensBefore,
|
|
};
|
|
}
|