mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-16 20:01:24 +00:00
feat(coding-agent): implement new compaction system with overflow recovery
Phase 1: Updated compaction.ts - findCutPoint now returns CutPointResult with isSplitTurn and turnStartIndex - Can cut at user, assistant, or bashExecution messages (never tool results) - Added turnPrefixSummary support for split turns (parallel summarization) - estimateTokens helper for context size estimation Phase 2: Updated session-manager.ts - CompactionEntry now has optional turnPrefixSummary field - loadSessionFromEntries injects both summaries when turn was split Phase 3: Updated agent-session.ts - Overflow detection via isContextOverflow after agent_end - Proactive compaction check on turn_end before next LLM call - _abortingForCompaction flag to skip saving aborted messages - Auto-retry after overflow recovery or proactive compaction - New event fields: reason (overflow/threshold), willRetry Phase 4: Updated interactive-mode.ts - Shows reason in compaction status (Context overflow detected...) - Shows retry status after compaction Tests updated for new CutPointResult return type.
This commit is contained in:
parent
ee9acdb49d
commit
a38e619095
5 changed files with 411 additions and 91 deletions
|
|
@ -14,10 +14,11 @@
|
|||
*/
|
||||
|
||||
import type { Agent, AgentEvent, AgentState, AppMessage, Attachment, ThinkingLevel } from "@mariozechner/pi-agent-core";
|
||||
import type { AssistantMessage, Model } from "@mariozechner/pi-ai";
|
||||
import type { AssistantMessage, Model, ToolResultMessage } from "@mariozechner/pi-ai";
|
||||
import { isContextOverflow } from "@mariozechner/pi-ai";
|
||||
import { getModelsPath } from "../config.js";
|
||||
import { type BashResult, executeBash as executeBashCommand } from "./bash-executor.js";
|
||||
import { calculateContextTokens, compact, shouldCompact } from "./compaction.js";
|
||||
import { calculateContextTokens, compact, estimateTokens, shouldCompact } from "./compaction.js";
|
||||
import { exportSessionToHtml } from "./export-html.js";
|
||||
import type { BashExecutionMessage } from "./messages.js";
|
||||
import { getApiKeyForModel, getAvailableModels } from "./model-config.js";
|
||||
|
|
@ -28,8 +29,8 @@ import { expandSlashCommand, type FileSlashCommand } from "./slash-commands.js";
|
|||
/** Session-specific events that extend the core AgentEvent */
|
||||
export type AgentSessionEvent =
|
||||
| AgentEvent
|
||||
| { type: "auto_compaction_start" }
|
||||
| { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean };
|
||||
| { type: "auto_compaction_start"; reason: "threshold" | "overflow" }
|
||||
| { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean; willRetry: boolean };
|
||||
|
||||
/** Listener function for agent session events */
|
||||
export type AgentSessionEventListener = (event: AgentSessionEvent) => void;
|
||||
|
|
@ -111,6 +112,8 @@ export class AgentSession {
|
|||
// Compaction state
|
||||
private _compactionAbortController: AbortController | null = null;
|
||||
private _autoCompactionAbortController: AbortController | null = null;
|
||||
private _abortingForCompaction = false;
|
||||
private _lastUserMessageText: string | null = null;
|
||||
|
||||
// Bash execution state
|
||||
private _bashAbortController: AbortController | null = null;
|
||||
|
|
@ -145,24 +148,53 @@ export class AgentSession {
|
|||
|
||||
// Handle session persistence
|
||||
if (event.type === "message_end") {
|
||||
this.sessionManager.saveMessage(event.message);
|
||||
// Skip saving aborted message if we're aborting for compaction
|
||||
const isAbortedForCompaction =
|
||||
this._abortingForCompaction &&
|
||||
event.message.role === "assistant" &&
|
||||
(event.message as AssistantMessage).stopReason === "aborted";
|
||||
|
||||
if (!isAbortedForCompaction) {
|
||||
this.sessionManager.saveMessage(event.message);
|
||||
}
|
||||
|
||||
// Initialize session after first user+assistant exchange
|
||||
if (this.sessionManager.shouldInitializeSession(this.agent.state.messages)) {
|
||||
this.sessionManager.startSession(this.agent.state);
|
||||
}
|
||||
|
||||
// Track user message text for potential retry after overflow
|
||||
if (event.message.role === "user") {
|
||||
const content = (event.message as { content: unknown }).content;
|
||||
if (typeof content === "string") {
|
||||
this._lastUserMessageText = content;
|
||||
} else if (Array.isArray(content)) {
|
||||
this._lastUserMessageText = content
|
||||
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Track assistant message for auto-compaction (checked on agent_end)
|
||||
if (event.message.role === "assistant") {
|
||||
this._lastAssistantMessage = event.message as AssistantMessage;
|
||||
}
|
||||
}
|
||||
|
||||
// Check auto-compaction after agent completes (after agent_end clears UI)
|
||||
// Handle turn_end for proactive compaction check
|
||||
if (event.type === "turn_end") {
|
||||
await this._checkProactiveCompaction(
|
||||
event.message as AssistantMessage,
|
||||
event.toolResults as ToolResultMessage[],
|
||||
);
|
||||
}
|
||||
|
||||
// Check auto-compaction after agent completes
|
||||
if (event.type === "agent_end" && this._lastAssistantMessage) {
|
||||
const msg = this._lastAssistantMessage;
|
||||
this._lastAssistantMessage = null;
|
||||
this._runAutoCompaction(msg).catch(() => {});
|
||||
await this._handleAgentEndCompaction(msg);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -591,38 +623,113 @@ export class AgentSession {
|
|||
}
|
||||
|
||||
/**
|
||||
* Internal: Run auto-compaction with events.
|
||||
* Called after assistant messages complete.
|
||||
* Check for proactive compaction after turn_end (before next LLM call).
|
||||
* Estimates context size and aborts if threshold would be crossed.
|
||||
*/
|
||||
private async _runAutoCompaction(assistantMessage: AssistantMessage): Promise<void> {
|
||||
private async _checkProactiveCompaction(
|
||||
assistantMessage: AssistantMessage,
|
||||
toolResults: ToolResultMessage[],
|
||||
): Promise<void> {
|
||||
const settings = this.settingsManager.getCompactionSettings();
|
||||
if (!settings.enabled) return;
|
||||
|
||||
// Skip if message was aborted
|
||||
if (assistantMessage.stopReason === "aborted") return;
|
||||
// Skip if message was aborted or errored
|
||||
if (assistantMessage.stopReason === "aborted" || assistantMessage.stopReason === "error") return;
|
||||
|
||||
// Only check if there are tool calls (meaning another turn will happen)
|
||||
const hasToolCalls = assistantMessage.content.some((c) => c.type === "toolCall");
|
||||
if (!hasToolCalls) return;
|
||||
|
||||
// Estimate context size: last usage + tool results
|
||||
const contextTokens = calculateContextTokens(assistantMessage.usage);
|
||||
const toolResultTokens = toolResults.reduce((sum, msg) => sum + estimateTokens(msg), 0);
|
||||
const estimatedTotal = contextTokens + toolResultTokens;
|
||||
|
||||
const contextWindow = this.model?.contextWindow ?? 0;
|
||||
|
||||
if (!shouldCompact(contextTokens, contextWindow, settings)) return;
|
||||
if (!shouldCompact(estimatedTotal, contextWindow, settings)) return;
|
||||
|
||||
// Emit start event
|
||||
this._emit({ type: "auto_compaction_start" });
|
||||
// Threshold crossed - abort for compaction
|
||||
this._abortingForCompaction = true;
|
||||
this.agent.abort();
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle compaction after agent_end.
|
||||
* Checks for overflow (reactive) or threshold (proactive after abort).
|
||||
*/
|
||||
private async _handleAgentEndCompaction(assistantMessage: AssistantMessage): Promise<void> {
|
||||
const settings = this.settingsManager.getCompactionSettings();
|
||||
const contextWindow = this.model?.contextWindow ?? 0;
|
||||
|
||||
// Check 1: Overflow detection (reactive recovery)
|
||||
const isOverflow = isContextOverflow(assistantMessage, contextWindow);
|
||||
|
||||
// Check 2: Aborted for compaction (proactive)
|
||||
const wasAbortedForCompaction = this._abortingForCompaction;
|
||||
this._abortingForCompaction = false;
|
||||
|
||||
// Check 3: Threshold crossed but turn succeeded (maintenance compaction)
|
||||
const contextTokens =
|
||||
assistantMessage.stopReason === "error" ? 0 : calculateContextTokens(assistantMessage.usage);
|
||||
const thresholdCrossed = settings.enabled && shouldCompact(contextTokens, contextWindow, settings);
|
||||
|
||||
// Determine which action to take
|
||||
let reason: "overflow" | "threshold" | null = null;
|
||||
let willRetry = false;
|
||||
|
||||
if (isOverflow) {
|
||||
reason = "overflow";
|
||||
willRetry = true;
|
||||
// Remove the overflow error message from agent state
|
||||
const messages = this.agent.state.messages;
|
||||
if (messages.length > 0 && messages[messages.length - 1].role === "assistant") {
|
||||
this.agent.replaceMessages(messages.slice(0, -1));
|
||||
}
|
||||
} else if (wasAbortedForCompaction) {
|
||||
reason = "threshold";
|
||||
willRetry = true;
|
||||
// Remove the aborted message from agent state
|
||||
const messages = this.agent.state.messages;
|
||||
if (
|
||||
messages.length > 0 &&
|
||||
messages[messages.length - 1].role === "assistant" &&
|
||||
(messages[messages.length - 1] as AssistantMessage).stopReason === "aborted"
|
||||
) {
|
||||
this.agent.replaceMessages(messages.slice(0, -1));
|
||||
}
|
||||
} else if (thresholdCrossed) {
|
||||
reason = "threshold";
|
||||
willRetry = false; // Turn succeeded, no retry needed
|
||||
}
|
||||
|
||||
if (!reason) return;
|
||||
|
||||
// Run compaction
|
||||
await this._runAutoCompaction(reason, willRetry);
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal: Run auto-compaction with events.
|
||||
*/
|
||||
private async _runAutoCompaction(reason: "overflow" | "threshold", willRetry: boolean): Promise<void> {
|
||||
const settings = this.settingsManager.getCompactionSettings();
|
||||
|
||||
this._emit({ type: "auto_compaction_start", reason });
|
||||
this._autoCompactionAbortController = new AbortController();
|
||||
|
||||
try {
|
||||
if (!this.model) {
|
||||
this._emit({ type: "auto_compaction_end", result: null, aborted: false });
|
||||
this._emit({ type: "auto_compaction_end", result: null, aborted: false, willRetry: false });
|
||||
return;
|
||||
}
|
||||
|
||||
const apiKey = await getApiKeyForModel(this.model);
|
||||
if (!apiKey) {
|
||||
this._emit({ type: "auto_compaction_end", result: null, aborted: false });
|
||||
this._emit({ type: "auto_compaction_end", result: null, aborted: false, willRetry: false });
|
||||
return;
|
||||
}
|
||||
|
||||
// Load entries (sync file read) then yield to let UI render
|
||||
const entries = this.sessionManager.loadEntries();
|
||||
const compactionEntry = await compact(
|
||||
entries,
|
||||
|
|
@ -633,7 +740,7 @@ export class AgentSession {
|
|||
);
|
||||
|
||||
if (this._autoCompactionAbortController.signal.aborted) {
|
||||
this._emit({ type: "auto_compaction_end", result: null, aborted: true });
|
||||
this._emit({ type: "auto_compaction_end", result: null, aborted: true, willRetry: false });
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -645,10 +752,24 @@ export class AgentSession {
|
|||
tokensBefore: compactionEntry.tokensBefore,
|
||||
summary: compactionEntry.summary,
|
||||
};
|
||||
this._emit({ type: "auto_compaction_end", result, aborted: false });
|
||||
} catch {
|
||||
// Silently fail auto-compaction but emit end event
|
||||
this._emit({ type: "auto_compaction_end", result: null, aborted: false });
|
||||
this._emit({ type: "auto_compaction_end", result, aborted: false, willRetry });
|
||||
|
||||
// Auto-retry if needed
|
||||
if (willRetry && this._lastUserMessageText) {
|
||||
// Small delay to let UI update
|
||||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||
await this.prompt(this._lastUserMessageText);
|
||||
}
|
||||
} catch (error) {
|
||||
// Compaction failed - emit end event without retry
|
||||
this._emit({ type: "auto_compaction_end", result: null, aborted: false, willRetry: false });
|
||||
|
||||
// If this was overflow recovery and compaction failed, we have a hard stop
|
||||
if (reason === "overflow") {
|
||||
throw new Error(
|
||||
`Context overflow: ${error instanceof Error ? error.message : "compaction failed"}. Your input may be too large for the context window.`,
|
||||
);
|
||||
}
|
||||
} finally {
|
||||
this._autoCompactionAbortController = null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,26 +79,100 @@ export function shouldCompact(contextTokens: number, contextWindow: number, sett
|
|||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Find indices of message entries that are user messages (turn boundaries).
|
||||
* Estimate token count for a message using chars/4 heuristic.
|
||||
* This is conservative (overestimates tokens).
|
||||
* Accepts any message type (AppMessage, ToolResultMessage, etc.)
|
||||
*/
|
||||
function findTurnBoundaries(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
|
||||
const boundaries: number[] = [];
|
||||
for (let i = startIndex; i < endIndex; i++) {
|
||||
const entry = entries[i];
|
||||
if (entry.type === "message" && entry.message.role === "user") {
|
||||
boundaries.push(i);
|
||||
export function estimateTokens(message: {
|
||||
role: string;
|
||||
content?: unknown;
|
||||
command?: string;
|
||||
output?: string;
|
||||
}): number {
|
||||
let chars = 0;
|
||||
|
||||
// Handle custom message types that don't have standard content
|
||||
if (message.role === "bashExecution") {
|
||||
chars = (message.command?.length || 0) + (message.output?.length || 0);
|
||||
return Math.ceil(chars / 4);
|
||||
}
|
||||
|
||||
// Standard messages with content
|
||||
const content = message.content;
|
||||
if (typeof content === "string") {
|
||||
chars = content.length;
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === "text") {
|
||||
chars += block.text.length;
|
||||
} else if (block.type === "thinking") {
|
||||
chars += block.thinking.length;
|
||||
}
|
||||
}
|
||||
}
|
||||
return boundaries;
|
||||
return Math.ceil(chars / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find valid cut points: indices of user, assistant, or bashExecution messages.
|
||||
* Never cut at tool results (they must follow their tool call).
|
||||
* When we cut at an assistant message with tool calls, its tool results follow it
|
||||
* and will be kept.
|
||||
* BashExecutionMessage is treated like a user message (user-initiated context).
|
||||
*/
|
||||
function findValidCutPoints(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
|
||||
const cutPoints: number[] = [];
|
||||
for (let i = startIndex; i < endIndex; i++) {
|
||||
const entry = entries[i];
|
||||
if (entry.type === "message") {
|
||||
const role = entry.message.role;
|
||||
// user, assistant, and bashExecution are valid cut points
|
||||
// toolResult must stay with its preceding tool call
|
||||
if (role === "user" || role === "assistant" || role === "bashExecution") {
|
||||
cutPoints.push(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return cutPoints;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the user message (or bashExecution) that starts the turn containing the given entry index.
|
||||
* Returns -1 if no turn start found before the index.
|
||||
* BashExecutionMessage is treated like a user message for turn boundaries.
|
||||
*/
|
||||
export function findTurnStartIndex(entries: SessionEntry[], entryIndex: number, startIndex: number): number {
|
||||
for (let i = entryIndex; i >= startIndex; i--) {
|
||||
const entry = entries[i];
|
||||
if (entry.type === "message") {
|
||||
const role = entry.message.role;
|
||||
if (role === "user" || role === "bashExecution") {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
export interface CutPointResult {
|
||||
/** Index of first entry to keep */
|
||||
firstKeptEntryIndex: number;
|
||||
/** Index of user message that starts the turn being split, or -1 if not splitting */
|
||||
turnStartIndex: number;
|
||||
/** Whether this cut splits a turn (cut point is not a user message) */
|
||||
isSplitTurn: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the cut point in session entries that keeps approximately `keepRecentTokens`.
|
||||
* Returns the entry index of the first entry to keep.
|
||||
*
|
||||
* The cut point targets a user message (turn boundary), but then scans backwards
|
||||
* to include any preceding non-turn entries (bash executions, settings changes, etc.)
|
||||
* that should logically be part of the kept context.
|
||||
* Can cut at user OR assistant messages (never tool results). When cutting at an
|
||||
* assistant message with tool calls, its tool results come after and will be kept.
|
||||
*
|
||||
* Returns CutPointResult with:
|
||||
* - firstKeptEntryIndex: the entry index to start keeping from
|
||||
* - turnStartIndex: if cutting mid-turn, the user message that started that turn
|
||||
* - isSplitTurn: whether we're cutting in the middle of a turn
|
||||
*
|
||||
* Only considers entries between `startIndex` and `endIndex` (exclusive).
|
||||
*/
|
||||
|
|
@ -107,11 +181,11 @@ export function findCutPoint(
|
|||
startIndex: number,
|
||||
endIndex: number,
|
||||
keepRecentTokens: number,
|
||||
): number {
|
||||
const boundaries = findTurnBoundaries(entries, startIndex, endIndex);
|
||||
): CutPointResult {
|
||||
const cutPoints = findValidCutPoints(entries, startIndex, endIndex);
|
||||
|
||||
if (boundaries.length === 0) {
|
||||
return startIndex; // No user messages, keep everything in range
|
||||
if (cutPoints.length === 0) {
|
||||
return { firstKeptEntryIndex: startIndex, turnStartIndex: -1, isSplitTurn: false };
|
||||
}
|
||||
|
||||
// Collect assistant usages walking backwards from endIndex
|
||||
|
|
@ -130,8 +204,15 @@ export function findCutPoint(
|
|||
}
|
||||
|
||||
if (assistantUsages.length === 0) {
|
||||
// No usage info, keep last turn only
|
||||
return boundaries[boundaries.length - 1];
|
||||
// No usage info, keep from last cut point
|
||||
const lastCutPoint = cutPoints[cutPoints.length - 1];
|
||||
const entry = entries[lastCutPoint];
|
||||
const isUser = entry.type === "message" && entry.message.role === "user";
|
||||
return {
|
||||
firstKeptEntryIndex: lastCutPoint,
|
||||
turnStartIndex: isUser ? -1 : findTurnStartIndex(entries, lastCutPoint, startIndex),
|
||||
isSplitTurn: !isUser,
|
||||
};
|
||||
}
|
||||
|
||||
// Walk through and find where cumulative token difference exceeds keepRecentTokens
|
||||
|
|
@ -141,12 +222,13 @@ export function findCutPoint(
|
|||
for (let i = 1; i < assistantUsages.length; i++) {
|
||||
const tokenDiff = newestTokens - assistantUsages[i].tokens;
|
||||
if (tokenDiff >= keepRecentTokens) {
|
||||
// Find the turn boundary at or before the assistant we want to keep
|
||||
// Find the valid cut point at or after the assistant we want to keep
|
||||
const lastKeptAssistantIndex = assistantUsages[i - 1].index;
|
||||
|
||||
for (let b = boundaries.length - 1; b >= 0; b--) {
|
||||
if (boundaries[b] <= lastKeptAssistantIndex) {
|
||||
cutIndex = boundaries[b];
|
||||
// Find closest valid cut point at or before lastKeptAssistantIndex
|
||||
for (let c = cutPoints.length - 1; c >= 0; c--) {
|
||||
if (cutPoints[c] <= lastKeptAssistantIndex) {
|
||||
cutIndex = cutPoints[c];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -154,8 +236,7 @@ export function findCutPoint(
|
|||
}
|
||||
}
|
||||
|
||||
// Scan backwards from cutIndex to include any non-turn entries (bash, settings, etc.)
|
||||
// that should logically be part of the kept context
|
||||
// Scan backwards from cutIndex to include any non-message entries (bash, settings, etc.)
|
||||
while (cutIndex > startIndex) {
|
||||
const prevEntry = entries[cutIndex - 1];
|
||||
// Stop at compaction boundaries
|
||||
|
|
@ -163,17 +244,23 @@ export function findCutPoint(
|
|||
break;
|
||||
}
|
||||
if (prevEntry.type === "message") {
|
||||
const role = prevEntry.message.role;
|
||||
// Stop if we hit an assistant, user, or tool result (all part of previous turn)
|
||||
if (role === "assistant" || role === "user" || role === "toolResult") {
|
||||
break;
|
||||
}
|
||||
// Stop if we hit any message
|
||||
break;
|
||||
}
|
||||
// Include this non-turn entry (bash, settings change, etc.)
|
||||
// Include this non-message entry (bash, settings change, etc.)
|
||||
cutIndex--;
|
||||
}
|
||||
|
||||
return cutIndex;
|
||||
// Determine if this is a split turn
|
||||
const cutEntry = entries[cutIndex];
|
||||
const isUserMessage = cutEntry.type === "message" && cutEntry.message.role === "user";
|
||||
const turnStartIndex = isUserMessage ? -1 : findTurnStartIndex(entries, cutIndex, startIndex);
|
||||
|
||||
return {
|
||||
firstKeptEntryIndex: cutIndex,
|
||||
turnStartIndex,
|
||||
isSplitTurn: !isUserMessage && turnStartIndex !== -1,
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
|
@ -234,6 +321,16 @@ export async function generateSummary(
|
|||
// Main compaction function
|
||||
// ============================================================================
|
||||
|
||||
const TURN_PREFIX_SUMMARIZATION_PROMPT = `You are performing a CONTEXT CHECKPOINT COMPACTION for a split turn.
|
||||
This is the PREFIX of a turn that was too large to keep in full. The SUFFIX (recent work) is being kept.
|
||||
|
||||
Create a handoff summary that captures:
|
||||
- What the user originally asked for in this turn
|
||||
- Key decisions and progress made early in this turn
|
||||
- Important context needed to understand the kept suffix
|
||||
|
||||
Be concise. Focus on information needed to understand the retained recent work.`;
|
||||
|
||||
/**
|
||||
* Calculate compaction and generate summary.
|
||||
* Returns the CompactionEntry to append to the session file.
|
||||
|
|
@ -274,43 +371,101 @@ export async function compact(
|
|||
const tokensBefore = lastUsage ? calculateContextTokens(lastUsage) : 0;
|
||||
|
||||
// Find cut point (entry index) within the valid range
|
||||
const firstKeptEntryIndex = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens);
|
||||
const cutResult = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens);
|
||||
|
||||
// Extract messages to summarize (before the cut point)
|
||||
const messagesToSummarize: AppMessage[] = [];
|
||||
for (let i = boundaryStart; i < firstKeptEntryIndex; i++) {
|
||||
// Extract messages for history summary (before the turn that contains the cut point)
|
||||
const historyEnd = cutResult.isSplitTurn ? cutResult.turnStartIndex : cutResult.firstKeptEntryIndex;
|
||||
const historyMessages: AppMessage[] = [];
|
||||
for (let i = boundaryStart; i < historyEnd; i++) {
|
||||
const entry = entries[i];
|
||||
if (entry.type === "message") {
|
||||
messagesToSummarize.push(entry.message);
|
||||
historyMessages.push(entry.message);
|
||||
}
|
||||
}
|
||||
|
||||
// Also include the previous summary if there was a compaction
|
||||
// Include previous summary if there was a compaction
|
||||
if (prevCompactionIndex >= 0) {
|
||||
const prevCompaction = entries[prevCompactionIndex] as CompactionEntry;
|
||||
// Prepend the previous summary as context
|
||||
messagesToSummarize.unshift({
|
||||
historyMessages.unshift({
|
||||
role: "user",
|
||||
content: `Previous session summary:\n${prevCompaction.summary}`,
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
// Generate summary from messages before the cut point
|
||||
const summary = await generateSummary(
|
||||
messagesToSummarize,
|
||||
model,
|
||||
settings.reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
customInstructions,
|
||||
);
|
||||
// Extract messages for turn prefix summary (if splitting a turn)
|
||||
const turnPrefixMessages: AppMessage[] = [];
|
||||
if (cutResult.isSplitTurn) {
|
||||
for (let i = cutResult.turnStartIndex; i < cutResult.firstKeptEntryIndex; i++) {
|
||||
const entry = entries[i];
|
||||
if (entry.type === "message") {
|
||||
turnPrefixMessages.push(entry.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Generate summaries (can be parallel if both needed)
|
||||
let summary: string;
|
||||
let turnPrefixSummary: string | undefined;
|
||||
|
||||
if (cutResult.isSplitTurn && turnPrefixMessages.length > 0) {
|
||||
// Generate both summaries in parallel
|
||||
const [historyResult, turnPrefixResult] = await Promise.all([
|
||||
historyMessages.length > 0
|
||||
? generateSummary(historyMessages, model, settings.reserveTokens, apiKey, signal, customInstructions)
|
||||
: Promise.resolve("No prior history."),
|
||||
generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal),
|
||||
]);
|
||||
summary = historyResult;
|
||||
turnPrefixSummary = turnPrefixResult;
|
||||
} else {
|
||||
// Just generate history summary
|
||||
summary = await generateSummary(
|
||||
historyMessages,
|
||||
model,
|
||||
settings.reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
customInstructions,
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
type: "compaction",
|
||||
timestamp: new Date().toISOString(),
|
||||
summary,
|
||||
firstKeptEntryIndex,
|
||||
turnPrefixSummary,
|
||||
firstKeptEntryIndex: cutResult.firstKeptEntryIndex,
|
||||
tokensBefore,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a summary for a turn prefix (when splitting a turn).
|
||||
*/
|
||||
async function generateTurnPrefixSummary(
|
||||
messages: AppMessage[],
|
||||
model: Model<any>,
|
||||
reserveTokens: number,
|
||||
apiKey: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<string> {
|
||||
const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
|
||||
|
||||
const transformedMessages = messageTransformer(messages);
|
||||
const summarizationMessages = [
|
||||
...transformedMessages,
|
||||
{
|
||||
role: "user" as const,
|
||||
content: [{ type: "text" as const, text: TURN_PREFIX_SUMMARIZATION_PROMPT }],
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
];
|
||||
|
||||
const response = await complete(model, { messages: summarizationMessages }, { maxTokens, signal, apiKey });
|
||||
|
||||
return response.content
|
||||
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("\n");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,6 +50,8 @@ export interface CompactionEntry {
|
|||
type: "compaction";
|
||||
timestamp: string;
|
||||
summary: string;
|
||||
/** Summary of turn prefix when a turn was split (user message to first kept message) */
|
||||
turnPrefixSummary?: string;
|
||||
firstKeptEntryIndex: number; // Index into session entries where we start keeping
|
||||
tokensBefore: number;
|
||||
}
|
||||
|
|
@ -178,9 +180,19 @@ export function loadSessionFromEntries(entries: SessionEntry[]): LoadedSession {
|
|||
}
|
||||
}
|
||||
|
||||
// Build final messages: summary + kept messages
|
||||
const summaryMessage = createSummaryMessage(compactionEvent.summary);
|
||||
const messages = [summaryMessage, ...keptMessages];
|
||||
// Build final messages: summaries + kept messages
|
||||
const messages: AppMessage[] = [];
|
||||
|
||||
// Add history summary
|
||||
messages.push(createSummaryMessage(compactionEvent.summary));
|
||||
|
||||
// Add turn prefix summary if present (when a turn was split)
|
||||
if (compactionEvent.turnPrefixSummary) {
|
||||
messages.push(createSummaryMessage(compactionEvent.turnPrefixSummary));
|
||||
}
|
||||
|
||||
// Add kept messages
|
||||
messages.push(...keptMessages);
|
||||
|
||||
return { messages, thinkingLevel, model };
|
||||
}
|
||||
|
|
|
|||
|
|
@ -560,25 +560,27 @@ export class InteractiveMode {
|
|||
this.ui.requestRender();
|
||||
break;
|
||||
|
||||
case "auto_compaction_start":
|
||||
case "auto_compaction_start": {
|
||||
// Set up escape to abort auto-compaction
|
||||
this.autoCompactionEscapeHandler = this.editor.onEscape;
|
||||
this.editor.onEscape = () => {
|
||||
this.session.abortCompaction();
|
||||
};
|
||||
// Show compacting indicator
|
||||
// Show compacting indicator with reason
|
||||
this.statusContainer.clear();
|
||||
const reasonText = event.reason === "overflow" ? "Context overflow detected, " : "";
|
||||
this.autoCompactionLoader = new Loader(
|
||||
this.ui,
|
||||
(spinner) => theme.fg("accent", spinner),
|
||||
(text) => theme.fg("muted", text),
|
||||
"Auto-compacting... (esc to cancel)",
|
||||
`${reasonText}Auto-compacting... (esc to cancel)`,
|
||||
);
|
||||
this.statusContainer.addChild(this.autoCompactionLoader);
|
||||
this.ui.requestRender();
|
||||
break;
|
||||
}
|
||||
|
||||
case "auto_compaction_end":
|
||||
case "auto_compaction_end": {
|
||||
// Restore escape handler
|
||||
if (this.autoCompactionEscapeHandler) {
|
||||
this.editor.onEscape = this.autoCompactionEscapeHandler;
|
||||
|
|
@ -602,9 +604,14 @@ export class InteractiveMode {
|
|||
compactionComponent.setExpanded(this.toolOutputExpanded);
|
||||
this.chatContainer.addChild(compactionComponent);
|
||||
this.footer.updateState(this.session.state);
|
||||
|
||||
if (event.willRetry) {
|
||||
this.showStatus("Compacted context, retrying...");
|
||||
}
|
||||
}
|
||||
this.ui.requestRender();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -164,16 +164,18 @@ describe("findCutPoint", () => {
|
|||
|
||||
// 20 entries, last assistant has 10000 tokens
|
||||
// keepRecentTokens = 2500: keep entries where diff < 2500
|
||||
const cutPoint = findCutPoint(entries, 0, entries.length, 2500);
|
||||
const result = findCutPoint(entries, 0, entries.length, 2500);
|
||||
|
||||
// Should cut at a user message entry
|
||||
expect(entries[cutPoint].type).toBe("message");
|
||||
expect((entries[cutPoint] as SessionMessageEntry).message.role).toBe("user");
|
||||
// Should cut at a valid cut point (user or assistant message)
|
||||
expect(entries[result.firstKeptEntryIndex].type).toBe("message");
|
||||
const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry).message.role;
|
||||
expect(role === "user" || role === "assistant").toBe(true);
|
||||
});
|
||||
|
||||
it("should return startIndex if no user messages in range", () => {
|
||||
it("should return startIndex if no valid cut points in range", () => {
|
||||
const entries: SessionEntry[] = [createMessageEntry(createAssistantMessage("a"))];
|
||||
expect(findCutPoint(entries, 0, entries.length, 1000)).toBe(0);
|
||||
const result = findCutPoint(entries, 0, entries.length, 1000);
|
||||
expect(result.firstKeptEntryIndex).toBe(0);
|
||||
});
|
||||
|
||||
it("should keep everything if all messages fit within budget", () => {
|
||||
|
|
@ -184,8 +186,30 @@ describe("findCutPoint", () => {
|
|||
createMessageEntry(createAssistantMessage("b", createMockUsage(0, 50, 1000, 0))),
|
||||
];
|
||||
|
||||
const cutPoint = findCutPoint(entries, 0, entries.length, 50000);
|
||||
expect(cutPoint).toBe(0);
|
||||
const result = findCutPoint(entries, 0, entries.length, 50000);
|
||||
expect(result.firstKeptEntryIndex).toBe(0);
|
||||
});
|
||||
|
||||
it("should indicate split turn when cutting at assistant message", () => {
|
||||
// Create a scenario where we cut at an assistant message mid-turn
|
||||
const entries: SessionEntry[] = [
|
||||
createMessageEntry(createUserMessage("Turn 1")),
|
||||
createMessageEntry(createAssistantMessage("A1", createMockUsage(0, 100, 1000, 0))),
|
||||
createMessageEntry(createUserMessage("Turn 2")), // index 2
|
||||
createMessageEntry(createAssistantMessage("A2-1", createMockUsage(0, 100, 5000, 0))), // index 3
|
||||
createMessageEntry(createAssistantMessage("A2-2", createMockUsage(0, 100, 8000, 0))), // index 4
|
||||
createMessageEntry(createAssistantMessage("A2-3", createMockUsage(0, 100, 10000, 0))), // index 5
|
||||
];
|
||||
|
||||
// With keepRecentTokens = 3000, should cut somewhere in Turn 2
|
||||
const result = findCutPoint(entries, 0, entries.length, 3000);
|
||||
|
||||
// If cut at assistant message (not user), should indicate split turn
|
||||
const cutEntry = entries[result.firstKeptEntryIndex] as SessionMessageEntry;
|
||||
if (cutEntry.message.role === "assistant") {
|
||||
expect(result.isSplitTurn).toBe(true);
|
||||
expect(result.turnStartIndex).toBe(2); // Turn 2 starts at index 2
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -348,11 +372,12 @@ describe("Large session fixture", () => {
|
|||
|
||||
it("should find cut point in large session", () => {
|
||||
const entries = loadLargeSessionEntries();
|
||||
const cutPoint = findCutPoint(entries, 0, entries.length, DEFAULT_COMPACTION_SETTINGS.keepRecentTokens);
|
||||
const result = findCutPoint(entries, 0, entries.length, DEFAULT_COMPACTION_SETTINGS.keepRecentTokens);
|
||||
|
||||
// Cut point should be at a message entry with user role
|
||||
expect(entries[cutPoint].type).toBe("message");
|
||||
expect((entries[cutPoint] as SessionMessageEntry).message.role).toBe("user");
|
||||
// Cut point should be at a message entry (user or assistant)
|
||||
expect(entries[result.firstKeptEntryIndex].type).toBe("message");
|
||||
const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry).message.role;
|
||||
expect(role === "user" || role === "assistant").toBe(true);
|
||||
});
|
||||
|
||||
it("should load session correctly", () => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue