feat(coding-agent): implement new compaction system with overflow recovery

Phase 1: Updated compaction.ts
- findCutPoint now returns CutPointResult with isSplitTurn and turnStartIndex
- Can cut at user, assistant, or bashExecution messages (never tool results)
- Added turnPrefixSummary support for split turns (parallel summarization)
- estimateTokens helper for context size estimation

Phase 2: Updated session-manager.ts
- CompactionEntry now has optional turnPrefixSummary field
- loadSessionFromEntries injects both summaries when turn was split

Phase 3: Updated agent-session.ts
- Overflow detection via isContextOverflow after agent_end
- Proactive compaction check on turn_end before next LLM call
- _abortingForCompaction flag to skip saving aborted messages
- Auto-retry after overflow recovery or proactive compaction
- New event fields: reason (overflow/threshold), willRetry

Phase 4: Updated interactive-mode.ts
- Shows reason in compaction status (Context overflow detected...)
- Shows retry status after compaction

Tests updated for new CutPointResult return type.
This commit is contained in:
Mario Zechner 2025-12-09 17:18:53 +01:00
parent ee9acdb49d
commit a38e619095
5 changed files with 411 additions and 91 deletions

View file

@ -79,26 +79,100 @@ export function shouldCompact(contextTokens: number, contextWindow: number, sett
// ============================================================================
/**
* Find indices of message entries that are user messages (turn boundaries).
* Estimate token count for a message using chars/4 heuristic.
* This is conservative (overestimates tokens).
* Accepts any message type (AppMessage, ToolResultMessage, etc.)
*/
function findTurnBoundaries(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
const boundaries: number[] = [];
for (let i = startIndex; i < endIndex; i++) {
const entry = entries[i];
if (entry.type === "message" && entry.message.role === "user") {
boundaries.push(i);
export function estimateTokens(message: {
role: string;
content?: unknown;
command?: string;
output?: string;
}): number {
let chars = 0;
// Handle custom message types that don't have standard content
if (message.role === "bashExecution") {
chars = (message.command?.length || 0) + (message.output?.length || 0);
return Math.ceil(chars / 4);
}
// Standard messages with content
const content = message.content;
if (typeof content === "string") {
chars = content.length;
} else if (Array.isArray(content)) {
for (const block of content) {
if (block.type === "text") {
chars += block.text.length;
} else if (block.type === "thinking") {
chars += block.thinking.length;
}
}
}
return boundaries;
return Math.ceil(chars / 4);
}
/**
* Find valid cut points: indices of user, assistant, or bashExecution messages.
* Never cut at tool results (they must follow their tool call).
* When we cut at an assistant message with tool calls, its tool results follow it
* and will be kept.
* BashExecutionMessage is treated like a user message (user-initiated context).
*/
function findValidCutPoints(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
const cutPoints: number[] = [];
for (let i = startIndex; i < endIndex; i++) {
const entry = entries[i];
if (entry.type === "message") {
const role = entry.message.role;
// user, assistant, and bashExecution are valid cut points
// toolResult must stay with its preceding tool call
if (role === "user" || role === "assistant" || role === "bashExecution") {
cutPoints.push(i);
}
}
}
return cutPoints;
}
/**
* Find the user message (or bashExecution) that starts the turn containing the given entry index.
* Returns -1 if no turn start found before the index.
* BashExecutionMessage is treated like a user message for turn boundaries.
*/
export function findTurnStartIndex(entries: SessionEntry[], entryIndex: number, startIndex: number): number {
for (let i = entryIndex; i >= startIndex; i--) {
const entry = entries[i];
if (entry.type === "message") {
const role = entry.message.role;
if (role === "user" || role === "bashExecution") {
return i;
}
}
}
return -1;
}
export interface CutPointResult {
/** Index of first entry to keep */
firstKeptEntryIndex: number;
/** Index of user message that starts the turn being split, or -1 if not splitting */
turnStartIndex: number;
/** Whether this cut splits a turn (cut point is not a user message) */
isSplitTurn: boolean;
}
/**
* Find the cut point in session entries that keeps approximately `keepRecentTokens`.
* Returns the entry index of the first entry to keep.
*
* The cut point targets a user message (turn boundary), but then scans backwards
* to include any preceding non-turn entries (bash executions, settings changes, etc.)
* that should logically be part of the kept context.
* Can cut at user OR assistant messages (never tool results). When cutting at an
* assistant message with tool calls, its tool results come after and will be kept.
*
* Returns CutPointResult with:
* - firstKeptEntryIndex: the entry index to start keeping from
* - turnStartIndex: if cutting mid-turn, the user message that started that turn
* - isSplitTurn: whether we're cutting in the middle of a turn
*
* Only considers entries between `startIndex` and `endIndex` (exclusive).
*/
@ -107,11 +181,11 @@ export function findCutPoint(
startIndex: number,
endIndex: number,
keepRecentTokens: number,
): number {
const boundaries = findTurnBoundaries(entries, startIndex, endIndex);
): CutPointResult {
const cutPoints = findValidCutPoints(entries, startIndex, endIndex);
if (boundaries.length === 0) {
return startIndex; // No user messages, keep everything in range
if (cutPoints.length === 0) {
return { firstKeptEntryIndex: startIndex, turnStartIndex: -1, isSplitTurn: false };
}
// Collect assistant usages walking backwards from endIndex
@ -130,8 +204,15 @@ export function findCutPoint(
}
if (assistantUsages.length === 0) {
// No usage info, keep last turn only
return boundaries[boundaries.length - 1];
// No usage info, keep from last cut point
const lastCutPoint = cutPoints[cutPoints.length - 1];
const entry = entries[lastCutPoint];
const isUser = entry.type === "message" && entry.message.role === "user";
return {
firstKeptEntryIndex: lastCutPoint,
turnStartIndex: isUser ? -1 : findTurnStartIndex(entries, lastCutPoint, startIndex),
isSplitTurn: !isUser,
};
}
// Walk through and find where cumulative token difference exceeds keepRecentTokens
@ -141,12 +222,13 @@ export function findCutPoint(
for (let i = 1; i < assistantUsages.length; i++) {
const tokenDiff = newestTokens - assistantUsages[i].tokens;
if (tokenDiff >= keepRecentTokens) {
// Find the turn boundary at or before the assistant we want to keep
// Find the valid cut point at or after the assistant we want to keep
const lastKeptAssistantIndex = assistantUsages[i - 1].index;
for (let b = boundaries.length - 1; b >= 0; b--) {
if (boundaries[b] <= lastKeptAssistantIndex) {
cutIndex = boundaries[b];
// Find closest valid cut point at or before lastKeptAssistantIndex
for (let c = cutPoints.length - 1; c >= 0; c--) {
if (cutPoints[c] <= lastKeptAssistantIndex) {
cutIndex = cutPoints[c];
break;
}
}
@ -154,8 +236,7 @@ export function findCutPoint(
}
}
// Scan backwards from cutIndex to include any non-turn entries (bash, settings, etc.)
// that should logically be part of the kept context
// Scan backwards from cutIndex to include any non-message entries (bash, settings, etc.)
while (cutIndex > startIndex) {
const prevEntry = entries[cutIndex - 1];
// Stop at compaction boundaries
@ -163,17 +244,23 @@ export function findCutPoint(
break;
}
if (prevEntry.type === "message") {
const role = prevEntry.message.role;
// Stop if we hit an assistant, user, or tool result (all part of previous turn)
if (role === "assistant" || role === "user" || role === "toolResult") {
break;
}
// Stop if we hit any message
break;
}
// Include this non-turn entry (bash, settings change, etc.)
// Include this non-message entry (bash, settings change, etc.)
cutIndex--;
}
return cutIndex;
// Determine if this is a split turn
const cutEntry = entries[cutIndex];
const isUserMessage = cutEntry.type === "message" && cutEntry.message.role === "user";
const turnStartIndex = isUserMessage ? -1 : findTurnStartIndex(entries, cutIndex, startIndex);
return {
firstKeptEntryIndex: cutIndex,
turnStartIndex,
isSplitTurn: !isUserMessage && turnStartIndex !== -1,
};
}
// ============================================================================
@ -234,6 +321,16 @@ export async function generateSummary(
// Main compaction function
// ============================================================================
const TURN_PREFIX_SUMMARIZATION_PROMPT = `You are performing a CONTEXT CHECKPOINT COMPACTION for a split turn.
This is the PREFIX of a turn that was too large to keep in full. The SUFFIX (recent work) is being kept.
Create a handoff summary that captures:
- What the user originally asked for in this turn
- Key decisions and progress made early in this turn
- Important context needed to understand the kept suffix
Be concise. Focus on information needed to understand the retained recent work.`;
/**
* Calculate compaction and generate summary.
* Returns the CompactionEntry to append to the session file.
@ -274,43 +371,101 @@ export async function compact(
const tokensBefore = lastUsage ? calculateContextTokens(lastUsage) : 0;
// Find cut point (entry index) within the valid range
const firstKeptEntryIndex = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens);
const cutResult = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens);
// Extract messages to summarize (before the cut point)
const messagesToSummarize: AppMessage[] = [];
for (let i = boundaryStart; i < firstKeptEntryIndex; i++) {
// Extract messages for history summary (before the turn that contains the cut point)
const historyEnd = cutResult.isSplitTurn ? cutResult.turnStartIndex : cutResult.firstKeptEntryIndex;
const historyMessages: AppMessage[] = [];
for (let i = boundaryStart; i < historyEnd; i++) {
const entry = entries[i];
if (entry.type === "message") {
messagesToSummarize.push(entry.message);
historyMessages.push(entry.message);
}
}
// Also include the previous summary if there was a compaction
// Include previous summary if there was a compaction
if (prevCompactionIndex >= 0) {
const prevCompaction = entries[prevCompactionIndex] as CompactionEntry;
// Prepend the previous summary as context
messagesToSummarize.unshift({
historyMessages.unshift({
role: "user",
content: `Previous session summary:\n${prevCompaction.summary}`,
timestamp: Date.now(),
});
}
// Generate summary from messages before the cut point
const summary = await generateSummary(
messagesToSummarize,
model,
settings.reserveTokens,
apiKey,
signal,
customInstructions,
);
// Extract messages for turn prefix summary (if splitting a turn)
const turnPrefixMessages: AppMessage[] = [];
if (cutResult.isSplitTurn) {
for (let i = cutResult.turnStartIndex; i < cutResult.firstKeptEntryIndex; i++) {
const entry = entries[i];
if (entry.type === "message") {
turnPrefixMessages.push(entry.message);
}
}
}
// Generate summaries (can be parallel if both needed)
let summary: string;
let turnPrefixSummary: string | undefined;
if (cutResult.isSplitTurn && turnPrefixMessages.length > 0) {
// Generate both summaries in parallel
const [historyResult, turnPrefixResult] = await Promise.all([
historyMessages.length > 0
? generateSummary(historyMessages, model, settings.reserveTokens, apiKey, signal, customInstructions)
: Promise.resolve("No prior history."),
generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal),
]);
summary = historyResult;
turnPrefixSummary = turnPrefixResult;
} else {
// Just generate history summary
summary = await generateSummary(
historyMessages,
model,
settings.reserveTokens,
apiKey,
signal,
customInstructions,
);
}
return {
type: "compaction",
timestamp: new Date().toISOString(),
summary,
firstKeptEntryIndex,
turnPrefixSummary,
firstKeptEntryIndex: cutResult.firstKeptEntryIndex,
tokensBefore,
};
}
/**
* Generate a summary for a turn prefix (when splitting a turn).
*/
async function generateTurnPrefixSummary(
messages: AppMessage[],
model: Model<any>,
reserveTokens: number,
apiKey: string,
signal?: AbortSignal,
): Promise<string> {
const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
const transformedMessages = messageTransformer(messages);
const summarizationMessages = [
...transformedMessages,
{
role: "user" as const,
content: [{ type: "text" as const, text: TURN_PREFIX_SUMMARIZATION_PROMPT }],
timestamp: Date.now(),
},
];
const response = await complete(model, { messages: summarizationMessages }, { maxTokens, signal, apiKey });
return response.content
.filter((c): c is { type: "text"; text: string } => c.type === "text")
.map((c) => c.text)
.join("\n");
}