diff --git a/packages/coding-agent/src/core/agent-session.ts b/packages/coding-agent/src/core/agent-session.ts index be31fc90..95f2789a 100644 --- a/packages/coding-agent/src/core/agent-session.ts +++ b/packages/coding-agent/src/core/agent-session.ts @@ -14,10 +14,11 @@ */ import type { Agent, AgentEvent, AgentState, AppMessage, Attachment, ThinkingLevel } from "@mariozechner/pi-agent-core"; -import type { AssistantMessage, Model } from "@mariozechner/pi-ai"; +import type { AssistantMessage, Model, ToolResultMessage } from "@mariozechner/pi-ai"; +import { isContextOverflow } from "@mariozechner/pi-ai"; import { getModelsPath } from "../config.js"; import { type BashResult, executeBash as executeBashCommand } from "./bash-executor.js"; -import { calculateContextTokens, compact, shouldCompact } from "./compaction.js"; +import { calculateContextTokens, compact, estimateTokens, shouldCompact } from "./compaction.js"; import { exportSessionToHtml } from "./export-html.js"; import type { BashExecutionMessage } from "./messages.js"; import { getApiKeyForModel, getAvailableModels } from "./model-config.js"; @@ -28,8 +29,8 @@ import { expandSlashCommand, type FileSlashCommand } from "./slash-commands.js"; /** Session-specific events that extend the core AgentEvent */ export type AgentSessionEvent = | AgentEvent - | { type: "auto_compaction_start" } - | { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean }; + | { type: "auto_compaction_start"; reason: "threshold" | "overflow" } + | { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean; willRetry: boolean }; /** Listener function for agent session events */ export type AgentSessionEventListener = (event: AgentSessionEvent) => void; @@ -111,6 +112,8 @@ export class AgentSession { // Compaction state private _compactionAbortController: AbortController | null = null; private _autoCompactionAbortController: AbortController | null = null; + private _abortingForCompaction = false; + private _lastUserMessageText: string | null = null; // Bash execution state private _bashAbortController: AbortController | null = null; @@ -145,24 +148,53 @@ export class AgentSession { // Handle session persistence if (event.type === "message_end") { - this.sessionManager.saveMessage(event.message); + // Skip saving aborted message if we're aborting for compaction + const isAbortedForCompaction = + this._abortingForCompaction && + event.message.role === "assistant" && + (event.message as AssistantMessage).stopReason === "aborted"; + + if (!isAbortedForCompaction) { + this.sessionManager.saveMessage(event.message); + } // Initialize session after first user+assistant exchange if (this.sessionManager.shouldInitializeSession(this.agent.state.messages)) { this.sessionManager.startSession(this.agent.state); } + // Track user message text for potential retry after overflow + if (event.message.role === "user") { + const content = (event.message as { content: unknown }).content; + if (typeof content === "string") { + this._lastUserMessageText = content; + } else if (Array.isArray(content)) { + this._lastUserMessageText = content + .filter((c): c is { type: "text"; text: string } => c.type === "text") + .map((c) => c.text) + .join("\n"); + } + } + // Track assistant message for auto-compaction (checked on agent_end) if (event.message.role === "assistant") { this._lastAssistantMessage = event.message as AssistantMessage; } } - // Check auto-compaction after agent completes (after agent_end clears UI) + // Handle turn_end for proactive compaction check + if (event.type === "turn_end") { + await this._checkProactiveCompaction( + event.message as AssistantMessage, + event.toolResults as ToolResultMessage[], + ); + } + + // Check auto-compaction after agent completes if (event.type === "agent_end" && this._lastAssistantMessage) { const msg = this._lastAssistantMessage; this._lastAssistantMessage = null; - this._runAutoCompaction(msg).catch(() => {}); + await this._handleAgentEndCompaction(msg); } }; @@ -591,38 +623,113 @@ export class AgentSession { } /** - * Internal: Run auto-compaction with events. - * Called after assistant messages complete. + * Check for proactive compaction after turn_end (before next LLM call). + * Estimates context size and aborts if threshold would be crossed. */ - private async _runAutoCompaction(assistantMessage: AssistantMessage): Promise { + private async _checkProactiveCompaction( + assistantMessage: AssistantMessage, + toolResults: ToolResultMessage[], + ): Promise { const settings = this.settingsManager.getCompactionSettings(); if (!settings.enabled) return; - // Skip if message was aborted - if (assistantMessage.stopReason === "aborted") return; + // Skip if message was aborted or errored + if (assistantMessage.stopReason === "aborted" || assistantMessage.stopReason === "error") return; + // Only check if there are tool calls (meaning another turn will happen) + const hasToolCalls = assistantMessage.content.some((c) => c.type === "toolCall"); + if (!hasToolCalls) return; + + // Estimate context size: last usage + tool results const contextTokens = calculateContextTokens(assistantMessage.usage); + const toolResultTokens = toolResults.reduce((sum, msg) => sum + estimateTokens(msg), 0); + const estimatedTotal = contextTokens + toolResultTokens; + const contextWindow = this.model?.contextWindow ?? 0; - if (!shouldCompact(contextTokens, contextWindow, settings)) return; + if (!shouldCompact(estimatedTotal, contextWindow, settings)) return; - // Emit start event - this._emit({ type: "auto_compaction_start" }); + // Threshold crossed - abort for compaction + this._abortingForCompaction = true; + this.agent.abort(); + } + + /** + * Handle compaction after agent_end. + * Checks for overflow (reactive) or threshold (proactive after abort). + */ + private async _handleAgentEndCompaction(assistantMessage: AssistantMessage): Promise { + const settings = this.settingsManager.getCompactionSettings(); + const contextWindow = this.model?.contextWindow ?? 0; + + // Check 1: Overflow detection (reactive recovery) + const isOverflow = isContextOverflow(assistantMessage, contextWindow); + + // Check 2: Aborted for compaction (proactive) + const wasAbortedForCompaction = this._abortingForCompaction; + this._abortingForCompaction = false; + + // Check 3: Threshold crossed but turn succeeded (maintenance compaction) + const contextTokens = + assistantMessage.stopReason === "error" ? 0 : calculateContextTokens(assistantMessage.usage); + const thresholdCrossed = settings.enabled && shouldCompact(contextTokens, contextWindow, settings); + + // Determine which action to take + let reason: "overflow" | "threshold" | null = null; + let willRetry = false; + + if (isOverflow) { + reason = "overflow"; + willRetry = true; + // Remove the overflow error message from agent state + const messages = this.agent.state.messages; + if (messages.length > 0 && messages[messages.length - 1].role === "assistant") { + this.agent.replaceMessages(messages.slice(0, -1)); + } + } else if (wasAbortedForCompaction) { + reason = "threshold"; + willRetry = true; + // Remove the aborted message from agent state + const messages = this.agent.state.messages; + if ( + messages.length > 0 && + messages[messages.length - 1].role === "assistant" && + (messages[messages.length - 1] as AssistantMessage).stopReason === "aborted" + ) { + this.agent.replaceMessages(messages.slice(0, -1)); + } + } else if (thresholdCrossed) { + reason = "threshold"; + willRetry = false; // Turn succeeded, no retry needed + } + + if (!reason) return; + + // Run compaction + await this._runAutoCompaction(reason, willRetry); + } + + /** + * Internal: Run auto-compaction with events. + */ + private async _runAutoCompaction(reason: "overflow" | "threshold", willRetry: boolean): Promise { + const settings = this.settingsManager.getCompactionSettings(); + + this._emit({ type: "auto_compaction_start", reason }); this._autoCompactionAbortController = new AbortController(); try { if (!this.model) { - this._emit({ type: "auto_compaction_end", result: null, aborted: false }); + this._emit({ type: "auto_compaction_end", result: null, aborted: false, willRetry: false }); return; } const apiKey = await getApiKeyForModel(this.model); if (!apiKey) { - this._emit({ type: "auto_compaction_end", result: null, aborted: false }); + this._emit({ type: "auto_compaction_end", result: null, aborted: false, willRetry: false }); return; } - // Load entries (sync file read) then yield to let UI render const entries = this.sessionManager.loadEntries(); const compactionEntry = await compact( entries, @@ -633,7 +740,7 @@ export class AgentSession { ); if (this._autoCompactionAbortController.signal.aborted) { - this._emit({ type: "auto_compaction_end", result: null, aborted: true }); + this._emit({ type: "auto_compaction_end", result: null, aborted: true, willRetry: false }); return; } @@ -645,10 +752,24 @@ export class AgentSession { tokensBefore: compactionEntry.tokensBefore, summary: compactionEntry.summary, }; - this._emit({ type: "auto_compaction_end", result, aborted: false }); - } catch { - // Silently fail auto-compaction but emit end event - this._emit({ type: "auto_compaction_end", result: null, aborted: false }); + this._emit({ type: "auto_compaction_end", result, aborted: false, willRetry }); + + // Auto-retry if needed + if (willRetry && this._lastUserMessageText) { + // Small delay to let UI update + await new Promise((resolve) => setTimeout(resolve, 100)); + await this.prompt(this._lastUserMessageText); + } + } catch (error) { + // Compaction failed - emit end event without retry + this._emit({ type: "auto_compaction_end", result: null, aborted: false, willRetry: false }); + + // If this was overflow recovery and compaction failed, we have a hard stop + if (reason === "overflow") { + throw new Error( + `Context overflow: ${error instanceof Error ? error.message : "compaction failed"}. Your input may be too large for the context window.`, + ); + } } finally { this._autoCompactionAbortController = null; } diff --git a/packages/coding-agent/src/core/compaction.ts b/packages/coding-agent/src/core/compaction.ts index 3037f88c..de7b6332 100644 --- a/packages/coding-agent/src/core/compaction.ts +++ b/packages/coding-agent/src/core/compaction.ts @@ -79,26 +79,100 @@ export function shouldCompact(contextTokens: number, contextWindow: number, sett // ============================================================================ /** - * Find indices of message entries that are user messages (turn boundaries). + * Estimate token count for a message using chars/4 heuristic. + * This is conservative (overestimates tokens). + * Accepts any message type (AppMessage, ToolResultMessage, etc.) */ -function findTurnBoundaries(entries: SessionEntry[], startIndex: number, endIndex: number): number[] { - const boundaries: number[] = []; - for (let i = startIndex; i < endIndex; i++) { - const entry = entries[i]; - if (entry.type === "message" && entry.message.role === "user") { - boundaries.push(i); +export function estimateTokens(message: { + role: string; + content?: unknown; + command?: string; + output?: string; +}): number { + let chars = 0; + + // Handle custom message types that don't have standard content + if (message.role === "bashExecution") { + chars = (message.command?.length || 0) + (message.output?.length || 0); + return Math.ceil(chars / 4); + } + + // Standard messages with content + const content = message.content; + if (typeof content === "string") { + chars = content.length; + } else if (Array.isArray(content)) { + for (const block of content) { + if (block.type === "text") { + chars += block.text.length; + } else if (block.type === "thinking") { + chars += block.thinking.length; + } } } - return boundaries; + return Math.ceil(chars / 4); +} + +/** + * Find valid cut points: indices of user, assistant, or bashExecution messages. + * Never cut at tool results (they must follow their tool call). + * When we cut at an assistant message with tool calls, its tool results follow it + * and will be kept. + * BashExecutionMessage is treated like a user message (user-initiated context). + */ +function findValidCutPoints(entries: SessionEntry[], startIndex: number, endIndex: number): number[] { + const cutPoints: number[] = []; + for (let i = startIndex; i < endIndex; i++) { + const entry = entries[i]; + if (entry.type === "message") { + const role = entry.message.role; + // user, assistant, and bashExecution are valid cut points + // toolResult must stay with its preceding tool call + if (role === "user" || role === "assistant" || role === "bashExecution") { + cutPoints.push(i); + } + } + } + return cutPoints; +} + +/** + * Find the user message (or bashExecution) that starts the turn containing the given entry index. + * Returns -1 if no turn start found before the index. + * BashExecutionMessage is treated like a user message for turn boundaries. + */ +export function findTurnStartIndex(entries: SessionEntry[], entryIndex: number, startIndex: number): number { + for (let i = entryIndex; i >= startIndex; i--) { + const entry = entries[i]; + if (entry.type === "message") { + const role = entry.message.role; + if (role === "user" || role === "bashExecution") { + return i; + } + } + } + return -1; +} + +export interface CutPointResult { + /** Index of first entry to keep */ + firstKeptEntryIndex: number; + /** Index of user message that starts the turn being split, or -1 if not splitting */ + turnStartIndex: number; + /** Whether this cut splits a turn (cut point is not a user message) */ + isSplitTurn: boolean; } /** * Find the cut point in session entries that keeps approximately `keepRecentTokens`. - * Returns the entry index of the first entry to keep. * - * The cut point targets a user message (turn boundary), but then scans backwards - * to include any preceding non-turn entries (bash executions, settings changes, etc.) - * that should logically be part of the kept context. + * Can cut at user OR assistant messages (never tool results). When cutting at an + * assistant message with tool calls, its tool results come after and will be kept. + * + * Returns CutPointResult with: + * - firstKeptEntryIndex: the entry index to start keeping from + * - turnStartIndex: if cutting mid-turn, the user message that started that turn + * - isSplitTurn: whether we're cutting in the middle of a turn * * Only considers entries between `startIndex` and `endIndex` (exclusive). */ @@ -107,11 +181,11 @@ export function findCutPoint( startIndex: number, endIndex: number, keepRecentTokens: number, -): number { - const boundaries = findTurnBoundaries(entries, startIndex, endIndex); +): CutPointResult { + const cutPoints = findValidCutPoints(entries, startIndex, endIndex); - if (boundaries.length === 0) { - return startIndex; // No user messages, keep everything in range + if (cutPoints.length === 0) { + return { firstKeptEntryIndex: startIndex, turnStartIndex: -1, isSplitTurn: false }; } // Collect assistant usages walking backwards from endIndex @@ -130,8 +204,15 @@ export function findCutPoint( } if (assistantUsages.length === 0) { - // No usage info, keep last turn only - return boundaries[boundaries.length - 1]; + // No usage info, keep from last cut point + const lastCutPoint = cutPoints[cutPoints.length - 1]; + const entry = entries[lastCutPoint]; + const isUser = entry.type === "message" && entry.message.role === "user"; + return { + firstKeptEntryIndex: lastCutPoint, + turnStartIndex: isUser ? -1 : findTurnStartIndex(entries, lastCutPoint, startIndex), + isSplitTurn: !isUser, + }; } // Walk through and find where cumulative token difference exceeds keepRecentTokens @@ -141,12 +222,13 @@ export function findCutPoint( for (let i = 1; i < assistantUsages.length; i++) { const tokenDiff = newestTokens - assistantUsages[i].tokens; if (tokenDiff >= keepRecentTokens) { - // Find the turn boundary at or before the assistant we want to keep + // Find the valid cut point at or after the assistant we want to keep const lastKeptAssistantIndex = assistantUsages[i - 1].index; - for (let b = boundaries.length - 1; b >= 0; b--) { - if (boundaries[b] <= lastKeptAssistantIndex) { - cutIndex = boundaries[b]; + // Find closest valid cut point at or before lastKeptAssistantIndex + for (let c = cutPoints.length - 1; c >= 0; c--) { + if (cutPoints[c] <= lastKeptAssistantIndex) { + cutIndex = cutPoints[c]; break; } } @@ -154,8 +236,7 @@ export function findCutPoint( } } - // Scan backwards from cutIndex to include any non-turn entries (bash, settings, etc.) - // that should logically be part of the kept context + // Scan backwards from cutIndex to include any non-message entries (bash, settings, etc.) while (cutIndex > startIndex) { const prevEntry = entries[cutIndex - 1]; // Stop at compaction boundaries @@ -163,17 +244,23 @@ export function findCutPoint( break; } if (prevEntry.type === "message") { - const role = prevEntry.message.role; - // Stop if we hit an assistant, user, or tool result (all part of previous turn) - if (role === "assistant" || role === "user" || role === "toolResult") { - break; - } + // Stop if we hit any message + break; } - // Include this non-turn entry (bash, settings change, etc.) + // Include this non-message entry (bash, settings change, etc.) cutIndex--; } - return cutIndex; + // Determine if this is a split turn + const cutEntry = entries[cutIndex]; + const isUserMessage = cutEntry.type === "message" && cutEntry.message.role === "user"; + const turnStartIndex = isUserMessage ? -1 : findTurnStartIndex(entries, cutIndex, startIndex); + + return { + firstKeptEntryIndex: cutIndex, + turnStartIndex, + isSplitTurn: !isUserMessage && turnStartIndex !== -1, + }; } // ============================================================================ @@ -234,6 +321,16 @@ export async function generateSummary( // Main compaction function // ============================================================================ +const TURN_PREFIX_SUMMARIZATION_PROMPT = `You are performing a CONTEXT CHECKPOINT COMPACTION for a split turn. +This is the PREFIX of a turn that was too large to keep in full. The SUFFIX (recent work) is being kept. + +Create a handoff summary that captures: +- What the user originally asked for in this turn +- Key decisions and progress made early in this turn +- Important context needed to understand the kept suffix + +Be concise. Focus on information needed to understand the retained recent work.`; + /** * Calculate compaction and generate summary. * Returns the CompactionEntry to append to the session file. @@ -274,43 +371,101 @@ export async function compact( const tokensBefore = lastUsage ? calculateContextTokens(lastUsage) : 0; // Find cut point (entry index) within the valid range - const firstKeptEntryIndex = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens); + const cutResult = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens); - // Extract messages to summarize (before the cut point) - const messagesToSummarize: AppMessage[] = []; - for (let i = boundaryStart; i < firstKeptEntryIndex; i++) { + // Extract messages for history summary (before the turn that contains the cut point) + const historyEnd = cutResult.isSplitTurn ? cutResult.turnStartIndex : cutResult.firstKeptEntryIndex; + const historyMessages: AppMessage[] = []; + for (let i = boundaryStart; i < historyEnd; i++) { const entry = entries[i]; if (entry.type === "message") { - messagesToSummarize.push(entry.message); + historyMessages.push(entry.message); } } - // Also include the previous summary if there was a compaction + // Include previous summary if there was a compaction if (prevCompactionIndex >= 0) { const prevCompaction = entries[prevCompactionIndex] as CompactionEntry; - // Prepend the previous summary as context - messagesToSummarize.unshift({ + historyMessages.unshift({ role: "user", content: `Previous session summary:\n${prevCompaction.summary}`, timestamp: Date.now(), }); } - // Generate summary from messages before the cut point - const summary = await generateSummary( - messagesToSummarize, - model, - settings.reserveTokens, - apiKey, - signal, - customInstructions, - ); + // Extract messages for turn prefix summary (if splitting a turn) + const turnPrefixMessages: AppMessage[] = []; + if (cutResult.isSplitTurn) { + for (let i = cutResult.turnStartIndex; i < cutResult.firstKeptEntryIndex; i++) { + const entry = entries[i]; + if (entry.type === "message") { + turnPrefixMessages.push(entry.message); + } + } + } + + // Generate summaries (can be parallel if both needed) + let summary: string; + let turnPrefixSummary: string | undefined; + + if (cutResult.isSplitTurn && turnPrefixMessages.length > 0) { + // Generate both summaries in parallel + const [historyResult, turnPrefixResult] = await Promise.all([ + historyMessages.length > 0 + ? generateSummary(historyMessages, model, settings.reserveTokens, apiKey, signal, customInstructions) + : Promise.resolve("No prior history."), + generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal), + ]); + summary = historyResult; + turnPrefixSummary = turnPrefixResult; + } else { + // Just generate history summary + summary = await generateSummary( + historyMessages, + model, + settings.reserveTokens, + apiKey, + signal, + customInstructions, + ); + } return { type: "compaction", timestamp: new Date().toISOString(), summary, - firstKeptEntryIndex, + turnPrefixSummary, + firstKeptEntryIndex: cutResult.firstKeptEntryIndex, tokensBefore, }; } + +/** + * Generate a summary for a turn prefix (when splitting a turn). + */ +async function generateTurnPrefixSummary( + messages: AppMessage[], + model: Model, + reserveTokens: number, + apiKey: string, + signal?: AbortSignal, +): Promise { + const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix + + const transformedMessages = messageTransformer(messages); + const summarizationMessages = [ + ...transformedMessages, + { + role: "user" as const, + content: [{ type: "text" as const, text: TURN_PREFIX_SUMMARIZATION_PROMPT }], + timestamp: Date.now(), + }, + ]; + + const response = await complete(model, { messages: summarizationMessages }, { maxTokens, signal, apiKey }); + + return response.content + .filter((c): c is { type: "text"; text: string } => c.type === "text") + .map((c) => c.text) + .join("\n"); +} diff --git a/packages/coding-agent/src/core/session-manager.ts b/packages/coding-agent/src/core/session-manager.ts index 71291d54..05f492d6 100644 --- a/packages/coding-agent/src/core/session-manager.ts +++ b/packages/coding-agent/src/core/session-manager.ts @@ -50,6 +50,8 @@ export interface CompactionEntry { type: "compaction"; timestamp: string; summary: string; + /** Summary of turn prefix when a turn was split (user message to first kept message) */ + turnPrefixSummary?: string; firstKeptEntryIndex: number; // Index into session entries where we start keeping tokensBefore: number; } @@ -178,9 +180,19 @@ export function loadSessionFromEntries(entries: SessionEntry[]): LoadedSession { } } - // Build final messages: summary + kept messages - const summaryMessage = createSummaryMessage(compactionEvent.summary); - const messages = [summaryMessage, ...keptMessages]; + // Build final messages: summaries + kept messages + const messages: AppMessage[] = []; + + // Add history summary + messages.push(createSummaryMessage(compactionEvent.summary)); + + // Add turn prefix summary if present (when a turn was split) + if (compactionEvent.turnPrefixSummary) { + messages.push(createSummaryMessage(compactionEvent.turnPrefixSummary)); + } + + // Add kept messages + messages.push(...keptMessages); return { messages, thinkingLevel, model }; } diff --git a/packages/coding-agent/src/modes/interactive/interactive-mode.ts b/packages/coding-agent/src/modes/interactive/interactive-mode.ts index e73af0bb..f28e2b06 100644 --- a/packages/coding-agent/src/modes/interactive/interactive-mode.ts +++ b/packages/coding-agent/src/modes/interactive/interactive-mode.ts @@ -560,25 +560,27 @@ export class InteractiveMode { this.ui.requestRender(); break; - case "auto_compaction_start": + case "auto_compaction_start": { // Set up escape to abort auto-compaction this.autoCompactionEscapeHandler = this.editor.onEscape; this.editor.onEscape = () => { this.session.abortCompaction(); }; - // Show compacting indicator + // Show compacting indicator with reason this.statusContainer.clear(); + const reasonText = event.reason === "overflow" ? "Context overflow detected, " : ""; this.autoCompactionLoader = new Loader( this.ui, (spinner) => theme.fg("accent", spinner), (text) => theme.fg("muted", text), - "Auto-compacting... (esc to cancel)", + `${reasonText}Auto-compacting... (esc to cancel)`, ); this.statusContainer.addChild(this.autoCompactionLoader); this.ui.requestRender(); break; + } - case "auto_compaction_end": + case "auto_compaction_end": { // Restore escape handler if (this.autoCompactionEscapeHandler) { this.editor.onEscape = this.autoCompactionEscapeHandler; @@ -602,9 +604,14 @@ export class InteractiveMode { compactionComponent.setExpanded(this.toolOutputExpanded); this.chatContainer.addChild(compactionComponent); this.footer.updateState(this.session.state); + + if (event.willRetry) { + this.showStatus("Compacted context, retrying..."); + } } this.ui.requestRender(); break; + } } } diff --git a/packages/coding-agent/test/compaction.test.ts b/packages/coding-agent/test/compaction.test.ts index afde511a..64eebcf0 100644 --- a/packages/coding-agent/test/compaction.test.ts +++ b/packages/coding-agent/test/compaction.test.ts @@ -164,16 +164,18 @@ describe("findCutPoint", () => { // 20 entries, last assistant has 10000 tokens // keepRecentTokens = 2500: keep entries where diff < 2500 - const cutPoint = findCutPoint(entries, 0, entries.length, 2500); + const result = findCutPoint(entries, 0, entries.length, 2500); - // Should cut at a user message entry - expect(entries[cutPoint].type).toBe("message"); - expect((entries[cutPoint] as SessionMessageEntry).message.role).toBe("user"); + // Should cut at a valid cut point (user or assistant message) + expect(entries[result.firstKeptEntryIndex].type).toBe("message"); + const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry).message.role; + expect(role === "user" || role === "assistant").toBe(true); }); - it("should return startIndex if no user messages in range", () => { + it("should return startIndex if no valid cut points in range", () => { const entries: SessionEntry[] = [createMessageEntry(createAssistantMessage("a"))]; - expect(findCutPoint(entries, 0, entries.length, 1000)).toBe(0); + const result = findCutPoint(entries, 0, entries.length, 1000); + expect(result.firstKeptEntryIndex).toBe(0); }); it("should keep everything if all messages fit within budget", () => { @@ -184,8 +186,30 @@ describe("findCutPoint", () => { createMessageEntry(createAssistantMessage("b", createMockUsage(0, 50, 1000, 0))), ]; - const cutPoint = findCutPoint(entries, 0, entries.length, 50000); - expect(cutPoint).toBe(0); + const result = findCutPoint(entries, 0, entries.length, 50000); + expect(result.firstKeptEntryIndex).toBe(0); + }); + + it("should indicate split turn when cutting at assistant message", () => { + // Create a scenario where we cut at an assistant message mid-turn + const entries: SessionEntry[] = [ + createMessageEntry(createUserMessage("Turn 1")), + createMessageEntry(createAssistantMessage("A1", createMockUsage(0, 100, 1000, 0))), + createMessageEntry(createUserMessage("Turn 2")), // index 2 + createMessageEntry(createAssistantMessage("A2-1", createMockUsage(0, 100, 5000, 0))), // index 3 + createMessageEntry(createAssistantMessage("A2-2", createMockUsage(0, 100, 8000, 0))), // index 4 + createMessageEntry(createAssistantMessage("A2-3", createMockUsage(0, 100, 10000, 0))), // index 5 + ]; + + // With keepRecentTokens = 3000, should cut somewhere in Turn 2 + const result = findCutPoint(entries, 0, entries.length, 3000); + + // If cut at assistant message (not user), should indicate split turn + const cutEntry = entries[result.firstKeptEntryIndex] as SessionMessageEntry; + if (cutEntry.message.role === "assistant") { + expect(result.isSplitTurn).toBe(true); + expect(result.turnStartIndex).toBe(2); // Turn 2 starts at index 2 + } }); }); @@ -348,11 +372,12 @@ describe("Large session fixture", () => { it("should find cut point in large session", () => { const entries = loadLargeSessionEntries(); - const cutPoint = findCutPoint(entries, 0, entries.length, DEFAULT_COMPACTION_SETTINGS.keepRecentTokens); + const result = findCutPoint(entries, 0, entries.length, DEFAULT_COMPACTION_SETTINGS.keepRecentTokens); - // Cut point should be at a message entry with user role - expect(entries[cutPoint].type).toBe("message"); - expect((entries[cutPoint] as SessionMessageEntry).message.role).toBe("user"); + // Cut point should be at a message entry (user or assistant) + expect(entries[result.firstKeptEntryIndex].type).toBe("message"); + const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry).message.role; + expect(role === "user" || role === "assistant").toBe(true); }); it("should load session correctly", () => {