feat(coding-agent): implement new compaction system with overflow recovery

Phase 1: Updated compaction.ts - findCutPoint now returns CutPointResult with isSplitTurn and turnStartIndex - Can cut at user, assistant, or bashExecution messages (never tool results) - Added turnPrefixSummary support for split turns (parallel summarization) - estimateTokens helper for context size estimation Phase 2: Updated session-manager.ts - CompactionEntry now has optional turnPrefixSummary field - loadSessionFromEntries injects both summaries when turn was split Phase 3: Updated agent-session.ts - Overflow detection via isContextOverflow after agent_end - Proactive compaction check on turn_end before next LLM call - _abortingForCompaction flag to skip saving aborted messages - Auto-retry after overflow recovery or proactive compaction - New event fields: reason (overflow/threshold), willRetry Phase 4: Updated interactive-mode.ts - Shows reason in compaction status (Context overflow detected...) - Shows retry status after compaction Tests updated for new CutPointResult return type.
2026-04-16 20:01:24 +00:00 · 2025-12-09 17:18:53 +01:00 · 2025-12-09 17:18:53 +01:00 · a38e619095
commit a38e619095
parent ee9acdb49d
5 changed files with 411 additions and 91 deletions
--- a/packages/coding-agent/src/core/agent-session.ts
+++ b/packages/coding-agent/src/core/agent-session.ts
@ -14,10 +14,11 @@
 */

 import type { Agent, AgentEvent, AgentState, AppMessage, Attachment, ThinkingLevel } from "@mariozechner/pi-agent-core";
-import type { AssistantMessage, Model } from "@mariozechner/pi-ai";
+import type { AssistantMessage, Model, ToolResultMessage } from "@mariozechner/pi-ai";
+import { isContextOverflow } from "@mariozechner/pi-ai";
 import { getModelsPath } from "../config.js";
 import { type BashResult, executeBash as executeBashCommand } from "./bash-executor.js";
-import { calculateContextTokens, compact, shouldCompact } from "./compaction.js";
+import { calculateContextTokens, compact, estimateTokens, shouldCompact } from "./compaction.js";
 import { exportSessionToHtml } from "./export-html.js";
 import type { BashExecutionMessage } from "./messages.js";
 import { getApiKeyForModel, getAvailableModels } from "./model-config.js";
@ -28,8 +29,8 @@ import { expandSlashCommand, type FileSlashCommand } from "./slash-commands.js";
 /** Session-specific events that extend the core AgentEvent */
 export type AgentSessionEvent =
 	| AgentEvent
-	| { type: "auto_compaction_start" }
-	| { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean };
+	| { type: "auto_compaction_start"; reason: "threshold" | "overflow" }
+	| { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean; willRetry: boolean };

 /** Listener function for agent session events */
 export type AgentSessionEventListener = (event: AgentSessionEvent) => void;
@ -111,6 +112,8 @@ export class AgentSession {
 	// Compaction state
 	private _compactionAbortController: AbortController | null = null;
 	private _autoCompactionAbortController: AbortController | null = null;
+	private _abortingForCompaction = false;
+	private _lastUserMessageText: string | null = null;

 	// Bash execution state
 	private _bashAbortController: AbortController | null = null;
@ -145,24 +148,53 @@ export class AgentSession {

 		// Handle session persistence
 		if (event.type === "message_end") {
-			this.sessionManager.saveMessage(event.message);
+			// Skip saving aborted message if we're aborting for compaction
+			const isAbortedForCompaction =
+				this._abortingForCompaction &&
+				event.message.role === "assistant" &&
+				(event.message as AssistantMessage).stopReason === "aborted";
+
+			if (!isAbortedForCompaction) {
+				this.sessionManager.saveMessage(event.message);
+			}

 			// Initialize session after first user+assistant exchange
 			if (this.sessionManager.shouldInitializeSession(this.agent.state.messages)) {
 				this.sessionManager.startSession(this.agent.state);
 			}

+			// Track user message text for potential retry after overflow
+			if (event.message.role === "user") {
+				const content = (event.message as { content: unknown }).content;
+				if (typeof content === "string") {
+					this._lastUserMessageText = content;
+				} else if (Array.isArray(content)) {
+					this._lastUserMessageText = content
+						.filter((c): c is { type: "text"; text: string } => c.type === "text")
+						.map((c) => c.text)
+						.join("\n");
+				}
+			}
+
 			// Track assistant message for auto-compaction (checked on agent_end)
 			if (event.message.role === "assistant") {
 				this._lastAssistantMessage = event.message as AssistantMessage;
 			}
 		}

-		// Check auto-compaction after agent completes (after agent_end clears UI)
+		// Handle turn_end for proactive compaction check
+		if (event.type === "turn_end") {
+			await this._checkProactiveCompaction(
+				event.message as AssistantMessage,
+				event.toolResults as ToolResultMessage[],
+			);
+		}
+
+		// Check auto-compaction after agent completes
 		if (event.type === "agent_end" && this._lastAssistantMessage) {
 			const msg = this._lastAssistantMessage;
 			this._lastAssistantMessage = null;
-			this._runAutoCompaction(msg).catch(() => {});
+			await this._handleAgentEndCompaction(msg);
 		}
 	};

@ -591,38 +623,113 @@ export class AgentSession {
 	}

 	/**
-	 * Internal: Run auto-compaction with events.
-	 * Called after assistant messages complete.
+	 * Check for proactive compaction after turn_end (before next LLM call).
+	 * Estimates context size and aborts if threshold would be crossed.
 	 */
-	private async _runAutoCompaction(assistantMessage: AssistantMessage): Promise<void> {
+	private async _checkProactiveCompaction(
+		assistantMessage: AssistantMessage,
+		toolResults: ToolResultMessage[],
+	): Promise<void> {
 		const settings = this.settingsManager.getCompactionSettings();
 		if (!settings.enabled) return;

-		// Skip if message was aborted
-		if (assistantMessage.stopReason === "aborted") return;
+		// Skip if message was aborted or errored
+		if (assistantMessage.stopReason === "aborted" || assistantMessage.stopReason === "error") return;

+		// Only check if there are tool calls (meaning another turn will happen)
+		const hasToolCalls = assistantMessage.content.some((c) => c.type === "toolCall");
+		if (!hasToolCalls) return;
+
+		// Estimate context size: last usage + tool results
 		const contextTokens = calculateContextTokens(assistantMessage.usage);
+		const toolResultTokens = toolResults.reduce((sum, msg) => sum + estimateTokens(msg), 0);
+		const estimatedTotal = contextTokens + toolResultTokens;
+
 		const contextWindow = this.model?.contextWindow ?? 0;

-		if (!shouldCompact(contextTokens, contextWindow, settings)) return;
+		if (!shouldCompact(estimatedTotal, contextWindow, settings)) return;

-		// Emit start event
-		this._emit({ type: "auto_compaction_start" });
+		// Threshold crossed - abort for compaction
+		this._abortingForCompaction = true;
+		this.agent.abort();
+	}
+
+	/**
+	 * Handle compaction after agent_end.
+	 * Checks for overflow (reactive) or threshold (proactive after abort).
+	 */
+	private async _handleAgentEndCompaction(assistantMessage: AssistantMessage): Promise<void> {
+		const settings = this.settingsManager.getCompactionSettings();
+		const contextWindow = this.model?.contextWindow ?? 0;
+
+		// Check 1: Overflow detection (reactive recovery)
+		const isOverflow = isContextOverflow(assistantMessage, contextWindow);
+
+		// Check 2: Aborted for compaction (proactive)
+		const wasAbortedForCompaction = this._abortingForCompaction;
+		this._abortingForCompaction = false;
+
+		// Check 3: Threshold crossed but turn succeeded (maintenance compaction)
+		const contextTokens =
+			assistantMessage.stopReason === "error" ? 0 : calculateContextTokens(assistantMessage.usage);
+		const thresholdCrossed = settings.enabled && shouldCompact(contextTokens, contextWindow, settings);
+
+		// Determine which action to take
+		let reason: "overflow" | "threshold" | null = null;
+		let willRetry = false;
+
+		if (isOverflow) {
+			reason = "overflow";
+			willRetry = true;
+			// Remove the overflow error message from agent state
+			const messages = this.agent.state.messages;
+			if (messages.length > 0 && messages[messages.length - 1].role === "assistant") {
+				this.agent.replaceMessages(messages.slice(0, -1));
+			}
+		} else if (wasAbortedForCompaction) {
+			reason = "threshold";
+			willRetry = true;
+			// Remove the aborted message from agent state
+			const messages = this.agent.state.messages;
+			if (
+				messages.length > 0 &&
+				messages[messages.length - 1].role === "assistant" &&
+				(messages[messages.length - 1] as AssistantMessage).stopReason === "aborted"
+			) {
+				this.agent.replaceMessages(messages.slice(0, -1));
+			}
+		} else if (thresholdCrossed) {
+			reason = "threshold";
+			willRetry = false; // Turn succeeded, no retry needed
+		}
+
+		if (!reason) return;
+
+		// Run compaction
+		await this._runAutoCompaction(reason, willRetry);
+	}
+
+	/**
+	 * Internal: Run auto-compaction with events.
+	 */
+	private async _runAutoCompaction(reason: "overflow" | "threshold", willRetry: boolean): Promise<void> {
+		const settings = this.settingsManager.getCompactionSettings();
+
+		this._emit({ type: "auto_compaction_start", reason });
 		this._autoCompactionAbortController = new AbortController();

 		try {
 			if (!this.model) {
-				this._emit({ type: "auto_compaction_end", result: null, aborted: false });
+				this._emit({ type: "auto_compaction_end", result: null, aborted: false, willRetry: false });
 				return;
 			}

 			const apiKey = await getApiKeyForModel(this.model);
 			if (!apiKey) {
-				this._emit({ type: "auto_compaction_end", result: null, aborted: false });
+				this._emit({ type: "auto_compaction_end", result: null, aborted: false, willRetry: false });
 				return;
 			}

-			// Load entries (sync file read) then yield to let UI render
 			const entries = this.sessionManager.loadEntries();
 			const compactionEntry = await compact(
 				entries,
@ -633,7 +740,7 @@ export class AgentSession {
 			);

 			if (this._autoCompactionAbortController.signal.aborted) {
-				this._emit({ type: "auto_compaction_end", result: null, aborted: true });
+				this._emit({ type: "auto_compaction_end", result: null, aborted: true, willRetry: false });
 				return;
 			}

@ -645,10 +752,24 @@ export class AgentSession {
 				tokensBefore: compactionEntry.tokensBefore,
 				summary: compactionEntry.summary,
 			};
-			this._emit({ type: "auto_compaction_end", result, aborted: false });
-		} catch {
-			// Silently fail auto-compaction but emit end event
-			this._emit({ type: "auto_compaction_end", result: null, aborted: false });
+			this._emit({ type: "auto_compaction_end", result, aborted: false, willRetry });
+
+			// Auto-retry if needed
+			if (willRetry && this._lastUserMessageText) {
+				// Small delay to let UI update
+				await new Promise((resolve) => setTimeout(resolve, 100));
+				await this.prompt(this._lastUserMessageText);
+			}
+		} catch (error) {
+			// Compaction failed - emit end event without retry
+			this._emit({ type: "auto_compaction_end", result: null, aborted: false, willRetry: false });
+
+			// If this was overflow recovery and compaction failed, we have a hard stop
+			if (reason === "overflow") {
+				throw new Error(
+					`Context overflow: ${error instanceof Error ? error.message : "compaction failed"}. Your input may be too large for the context window.`,
+				);
+			}
 		} finally {
 			this._autoCompactionAbortController = null;
 		}
--- a/packages/coding-agent/src/core/compaction.ts
+++ b/packages/coding-agent/src/core/compaction.ts
@ -79,26 +79,100 @@ export function shouldCompact(contextTokens: number, contextWindow: number, sett
 // ============================================================================

 /**
- * Find indices of message entries that are user messages (turn boundaries).
+ * Estimate token count for a message using chars/4 heuristic.
+ * This is conservative (overestimates tokens).
+ * Accepts any message type (AppMessage, ToolResultMessage, etc.)
 */
-function findTurnBoundaries(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
-	const boundaries: number[] = [];
-	for (let i = startIndex; i < endIndex; i++) {
-		const entry = entries[i];
-		if (entry.type === "message" && entry.message.role === "user") {
-			boundaries.push(i);
+export function estimateTokens(message: {
+	role: string;
+	content?: unknown;
+	command?: string;
+	output?: string;
+}): number {
+	let chars = 0;
+
+	// Handle custom message types that don't have standard content
+	if (message.role === "bashExecution") {
+		chars = (message.command?.length || 0) + (message.output?.length || 0);
+		return Math.ceil(chars / 4);
+	}
+
+	// Standard messages with content
+	const content = message.content;
+	if (typeof content === "string") {
+		chars = content.length;
+	} else if (Array.isArray(content)) {
+		for (const block of content) {
+			if (block.type === "text") {
+				chars += block.text.length;
+			} else if (block.type === "thinking") {
+				chars += block.thinking.length;
+			}
 		}
 	}
-	return boundaries;
+	return Math.ceil(chars / 4);
+}
+
+/**
+ * Find valid cut points: indices of user, assistant, or bashExecution messages.
+ * Never cut at tool results (they must follow their tool call).
+ * When we cut at an assistant message with tool calls, its tool results follow it
+ * and will be kept.
+ * BashExecutionMessage is treated like a user message (user-initiated context).
+ */
+function findValidCutPoints(entries: SessionEntry[], startIndex: number, endIndex: number): number[] {
+	const cutPoints: number[] = [];
+	for (let i = startIndex; i < endIndex; i++) {
+		const entry = entries[i];
+		if (entry.type === "message") {
+			const role = entry.message.role;
+			// user, assistant, and bashExecution are valid cut points
+			// toolResult must stay with its preceding tool call
+			if (role === "user" || role === "assistant" || role === "bashExecution") {
+				cutPoints.push(i);
+			}
+		}
+	}
+	return cutPoints;
+}
+
+/**
+ * Find the user message (or bashExecution) that starts the turn containing the given entry index.
+ * Returns -1 if no turn start found before the index.
+ * BashExecutionMessage is treated like a user message for turn boundaries.
+ */
+export function findTurnStartIndex(entries: SessionEntry[], entryIndex: number, startIndex: number): number {
+	for (let i = entryIndex; i >= startIndex; i--) {
+		const entry = entries[i];
+		if (entry.type === "message") {
+			const role = entry.message.role;
+			if (role === "user" || role === "bashExecution") {
+				return i;
+			}
+		}
+	}
+	return -1;
+}
+
+export interface CutPointResult {
+	/** Index of first entry to keep */
+	firstKeptEntryIndex: number;
+	/** Index of user message that starts the turn being split, or -1 if not splitting */
+	turnStartIndex: number;
+	/** Whether this cut splits a turn (cut point is not a user message) */
+	isSplitTurn: boolean;
 }

 /**
 * Find the cut point in session entries that keeps approximately `keepRecentTokens`.
- * Returns the entry index of the first entry to keep.
 *
- * The cut point targets a user message (turn boundary), but then scans backwards
- * to include any preceding non-turn entries (bash executions, settings changes, etc.)
- * that should logically be part of the kept context.
+ * Can cut at user OR assistant messages (never tool results). When cutting at an
+ * assistant message with tool calls, its tool results come after and will be kept.
+ *
+ * Returns CutPointResult with:
+ * - firstKeptEntryIndex: the entry index to start keeping from
+ * - turnStartIndex: if cutting mid-turn, the user message that started that turn
+ * - isSplitTurn: whether we're cutting in the middle of a turn
 *
 * Only considers entries between `startIndex` and `endIndex` (exclusive).
 */
@ -107,11 +181,11 @@ export function findCutPoint(
 	startIndex: number,
 	endIndex: number,
 	keepRecentTokens: number,
-): number {
-	const boundaries = findTurnBoundaries(entries, startIndex, endIndex);
+): CutPointResult {
+	const cutPoints = findValidCutPoints(entries, startIndex, endIndex);

-	if (boundaries.length === 0) {
-		return startIndex; // No user messages, keep everything in range
+	if (cutPoints.length === 0) {
+		return { firstKeptEntryIndex: startIndex, turnStartIndex: -1, isSplitTurn: false };
 	}

 	// Collect assistant usages walking backwards from endIndex
@ -130,8 +204,15 @@ export function findCutPoint(
 	}

 	if (assistantUsages.length === 0) {
-		// No usage info, keep last turn only
-		return boundaries[boundaries.length - 1];
+		// No usage info, keep from last cut point
+		const lastCutPoint = cutPoints[cutPoints.length - 1];
+		const entry = entries[lastCutPoint];
+		const isUser = entry.type === "message" && entry.message.role === "user";
+		return {
+			firstKeptEntryIndex: lastCutPoint,
+			turnStartIndex: isUser ? -1 : findTurnStartIndex(entries, lastCutPoint, startIndex),
+			isSplitTurn: !isUser,
+		};
 	}

 	// Walk through and find where cumulative token difference exceeds keepRecentTokens
@ -141,12 +222,13 @@ export function findCutPoint(
 	for (let i = 1; i < assistantUsages.length; i++) {
 		const tokenDiff = newestTokens - assistantUsages[i].tokens;
 		if (tokenDiff >= keepRecentTokens) {
-			// Find the turn boundary at or before the assistant we want to keep
+			// Find the valid cut point at or after the assistant we want to keep
 			const lastKeptAssistantIndex = assistantUsages[i - 1].index;

-			for (let b = boundaries.length - 1; b >= 0; b--) {
-				if (boundaries[b] <= lastKeptAssistantIndex) {
-					cutIndex = boundaries[b];
+			// Find closest valid cut point at or before lastKeptAssistantIndex
+			for (let c = cutPoints.length - 1; c >= 0; c--) {
+				if (cutPoints[c] <= lastKeptAssistantIndex) {
+					cutIndex = cutPoints[c];
 					break;
 				}
 			}
@ -154,8 +236,7 @@ export function findCutPoint(
 		}
 	}

-	// Scan backwards from cutIndex to include any non-turn entries (bash, settings, etc.)
-	// that should logically be part of the kept context
+	// Scan backwards from cutIndex to include any non-message entries (bash, settings, etc.)
 	while (cutIndex > startIndex) {
 		const prevEntry = entries[cutIndex - 1];
 		// Stop at compaction boundaries
@ -163,17 +244,23 @@ export function findCutPoint(
 			break;
 		}
 		if (prevEntry.type === "message") {
-			const role = prevEntry.message.role;
-			// Stop if we hit an assistant, user, or tool result (all part of previous turn)
-			if (role === "assistant" || role === "user" || role === "toolResult") {
-				break;
-			}
+			// Stop if we hit any message
+			break;
 		}
-		// Include this non-turn entry (bash, settings change, etc.)
+		// Include this non-message entry (bash, settings change, etc.)
 		cutIndex--;
 	}

-	return cutIndex;
+	// Determine if this is a split turn
+	const cutEntry = entries[cutIndex];
+	const isUserMessage = cutEntry.type === "message" && cutEntry.message.role === "user";
+	const turnStartIndex = isUserMessage ? -1 : findTurnStartIndex(entries, cutIndex, startIndex);
+
+	return {
+		firstKeptEntryIndex: cutIndex,
+		turnStartIndex,
+		isSplitTurn: !isUserMessage && turnStartIndex !== -1,
+	};
 }

 // ============================================================================
@ -234,6 +321,16 @@ export async function generateSummary(
 // Main compaction function
 // ============================================================================

+const TURN_PREFIX_SUMMARIZATION_PROMPT = `You are performing a CONTEXT CHECKPOINT COMPACTION for a split turn. 
+This is the PREFIX of a turn that was too large to keep in full. The SUFFIX (recent work) is being kept.
+
+Create a handoff summary that captures:
+- What the user originally asked for in this turn
+- Key decisions and progress made early in this turn
+- Important context needed to understand the kept suffix
+
+Be concise. Focus on information needed to understand the retained recent work.`;
+
 /**
 * Calculate compaction and generate summary.
 * Returns the CompactionEntry to append to the session file.
@ -274,43 +371,101 @@ export async function compact(
 	const tokensBefore = lastUsage ? calculateContextTokens(lastUsage) : 0;

 	// Find cut point (entry index) within the valid range
-	const firstKeptEntryIndex = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens);
+	const cutResult = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens);

-	// Extract messages to summarize (before the cut point)
-	const messagesToSummarize: AppMessage[] = [];
-	for (let i = boundaryStart; i < firstKeptEntryIndex; i++) {
+	// Extract messages for history summary (before the turn that contains the cut point)
+	const historyEnd = cutResult.isSplitTurn ? cutResult.turnStartIndex : cutResult.firstKeptEntryIndex;
+	const historyMessages: AppMessage[] = [];
+	for (let i = boundaryStart; i < historyEnd; i++) {
 		const entry = entries[i];
 		if (entry.type === "message") {
-			messagesToSummarize.push(entry.message);
+			historyMessages.push(entry.message);
 		}
 	}

-	// Also include the previous summary if there was a compaction
+	// Include previous summary if there was a compaction
 	if (prevCompactionIndex >= 0) {
 		const prevCompaction = entries[prevCompactionIndex] as CompactionEntry;
-		// Prepend the previous summary as context
-		messagesToSummarize.unshift({
+		historyMessages.unshift({
 			role: "user",
 			content: `Previous session summary:\n${prevCompaction.summary}`,
 			timestamp: Date.now(),
 		});
 	}

-	// Generate summary from messages before the cut point
-	const summary = await generateSummary(
-		messagesToSummarize,
-		model,
-		settings.reserveTokens,
-		apiKey,
-		signal,
-		customInstructions,
-	);
+	// Extract messages for turn prefix summary (if splitting a turn)
+	const turnPrefixMessages: AppMessage[] = [];
+	if (cutResult.isSplitTurn) {
+		for (let i = cutResult.turnStartIndex; i < cutResult.firstKeptEntryIndex; i++) {
+			const entry = entries[i];
+			if (entry.type === "message") {
+				turnPrefixMessages.push(entry.message);
+			}
+		}
+	}
+
+	// Generate summaries (can be parallel if both needed)
+	let summary: string;
+	let turnPrefixSummary: string | undefined;
+
+	if (cutResult.isSplitTurn && turnPrefixMessages.length > 0) {
+		// Generate both summaries in parallel
+		const [historyResult, turnPrefixResult] = await Promise.all([
+			historyMessages.length > 0
+				? generateSummary(historyMessages, model, settings.reserveTokens, apiKey, signal, customInstructions)
+				: Promise.resolve("No prior history."),
+			generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal),
+		]);
+		summary = historyResult;
+		turnPrefixSummary = turnPrefixResult;
+	} else {
+		// Just generate history summary
+		summary = await generateSummary(
+			historyMessages,
+			model,
+			settings.reserveTokens,
+			apiKey,
+			signal,
+			customInstructions,
+		);
+	}

 	return {
 		type: "compaction",
 		timestamp: new Date().toISOString(),
 		summary,
-		firstKeptEntryIndex,
+		turnPrefixSummary,
+		firstKeptEntryIndex: cutResult.firstKeptEntryIndex,
 		tokensBefore,
 	};
 }
+
+/**
+ * Generate a summary for a turn prefix (when splitting a turn).
+ */
+async function generateTurnPrefixSummary(
+	messages: AppMessage[],
+	model: Model<any>,
+	reserveTokens: number,
+	apiKey: string,
+	signal?: AbortSignal,
+): Promise<string> {
+	const maxTokens = Math.floor(0.5 * reserveTokens); // Smaller budget for turn prefix
+
+	const transformedMessages = messageTransformer(messages);
+	const summarizationMessages = [
+		...transformedMessages,
+		{
+			role: "user" as const,
+			content: [{ type: "text" as const, text: TURN_PREFIX_SUMMARIZATION_PROMPT }],
+			timestamp: Date.now(),
+		},
+	];
+
+	const response = await complete(model, { messages: summarizationMessages }, { maxTokens, signal, apiKey });
+
+	return response.content
+		.filter((c): c is { type: "text"; text: string } => c.type === "text")
+		.map((c) => c.text)
+		.join("\n");
+}
--- a/packages/coding-agent/src/core/session-manager.ts
+++ b/packages/coding-agent/src/core/session-manager.ts
@ -50,6 +50,8 @@ export interface CompactionEntry {
 	type: "compaction";
 	timestamp: string;
 	summary: string;
+	/** Summary of turn prefix when a turn was split (user message to first kept message) */
+	turnPrefixSummary?: string;
 	firstKeptEntryIndex: number; // Index into session entries where we start keeping
 	tokensBefore: number;
 }
@ -178,9 +180,19 @@ export function loadSessionFromEntries(entries: SessionEntry[]): LoadedSession {
 		}
 	}

-	// Build final messages: summary + kept messages
-	const summaryMessage = createSummaryMessage(compactionEvent.summary);
-	const messages = [summaryMessage, ...keptMessages];
+	// Build final messages: summaries + kept messages
+	const messages: AppMessage[] = [];
+
+	// Add history summary
+	messages.push(createSummaryMessage(compactionEvent.summary));
+
+	// Add turn prefix summary if present (when a turn was split)
+	if (compactionEvent.turnPrefixSummary) {
+		messages.push(createSummaryMessage(compactionEvent.turnPrefixSummary));
+	}
+
+	// Add kept messages
+	messages.push(...keptMessages);

 	return { messages, thinkingLevel, model };
 }
--- a/packages/coding-agent/src/modes/interactive/interactive-mode.ts
+++ b/packages/coding-agent/src/modes/interactive/interactive-mode.ts
@ -560,25 +560,27 @@ export class InteractiveMode {
 				this.ui.requestRender();
 				break;

-			case "auto_compaction_start":
+			case "auto_compaction_start": {
 				// Set up escape to abort auto-compaction
 				this.autoCompactionEscapeHandler = this.editor.onEscape;
 				this.editor.onEscape = () => {
 					this.session.abortCompaction();
 				};
-				// Show compacting indicator
+				// Show compacting indicator with reason
 				this.statusContainer.clear();
+				const reasonText = event.reason === "overflow" ? "Context overflow detected, " : "";
 				this.autoCompactionLoader = new Loader(
 					this.ui,
 					(spinner) => theme.fg("accent", spinner),
 					(text) => theme.fg("muted", text),
-					"Auto-compacting... (esc to cancel)",
+					`${reasonText}Auto-compacting... (esc to cancel)`,
 				);
 				this.statusContainer.addChild(this.autoCompactionLoader);
 				this.ui.requestRender();
 				break;
+			}

-			case "auto_compaction_end":
+			case "auto_compaction_end": {
 				// Restore escape handler
 				if (this.autoCompactionEscapeHandler) {
 					this.editor.onEscape = this.autoCompactionEscapeHandler;
@ -602,9 +604,14 @@ export class InteractiveMode {
 					compactionComponent.setExpanded(this.toolOutputExpanded);
 					this.chatContainer.addChild(compactionComponent);
 					this.footer.updateState(this.session.state);
+
+					if (event.willRetry) {
+						this.showStatus("Compacted context, retrying...");
+					}
 				}
 				this.ui.requestRender();
 				break;
+			}
 		}
 	}

--- a/packages/coding-agent/test/compaction.test.ts
+++ b/packages/coding-agent/test/compaction.test.ts
@ -164,16 +164,18 @@ describe("findCutPoint", () => {

 		// 20 entries, last assistant has 10000 tokens
 		// keepRecentTokens = 2500: keep entries where diff < 2500
-		const cutPoint = findCutPoint(entries, 0, entries.length, 2500);
+		const result = findCutPoint(entries, 0, entries.length, 2500);

-		// Should cut at a user message entry
-		expect(entries[cutPoint].type).toBe("message");
-		expect((entries[cutPoint] as SessionMessageEntry).message.role).toBe("user");
+		// Should cut at a valid cut point (user or assistant message)
+		expect(entries[result.firstKeptEntryIndex].type).toBe("message");
+		const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry).message.role;
+		expect(role === "user" || role === "assistant").toBe(true);
 	});

-	it("should return startIndex if no user messages in range", () => {
+	it("should return startIndex if no valid cut points in range", () => {
 		const entries: SessionEntry[] = [createMessageEntry(createAssistantMessage("a"))];
-		expect(findCutPoint(entries, 0, entries.length, 1000)).toBe(0);
+		const result = findCutPoint(entries, 0, entries.length, 1000);
+		expect(result.firstKeptEntryIndex).toBe(0);
 	});

 	it("should keep everything if all messages fit within budget", () => {
@ -184,8 +186,30 @@ describe("findCutPoint", () => {
 			createMessageEntry(createAssistantMessage("b", createMockUsage(0, 50, 1000, 0))),
 		];

-		const cutPoint = findCutPoint(entries, 0, entries.length, 50000);
-		expect(cutPoint).toBe(0);
+		const result = findCutPoint(entries, 0, entries.length, 50000);
+		expect(result.firstKeptEntryIndex).toBe(0);
+	});
+
+	it("should indicate split turn when cutting at assistant message", () => {
+		// Create a scenario where we cut at an assistant message mid-turn
+		const entries: SessionEntry[] = [
+			createMessageEntry(createUserMessage("Turn 1")),
+			createMessageEntry(createAssistantMessage("A1", createMockUsage(0, 100, 1000, 0))),
+			createMessageEntry(createUserMessage("Turn 2")), // index 2
+			createMessageEntry(createAssistantMessage("A2-1", createMockUsage(0, 100, 5000, 0))), // index 3
+			createMessageEntry(createAssistantMessage("A2-2", createMockUsage(0, 100, 8000, 0))), // index 4
+			createMessageEntry(createAssistantMessage("A2-3", createMockUsage(0, 100, 10000, 0))), // index 5
+		];
+
+		// With keepRecentTokens = 3000, should cut somewhere in Turn 2
+		const result = findCutPoint(entries, 0, entries.length, 3000);
+
+		// If cut at assistant message (not user), should indicate split turn
+		const cutEntry = entries[result.firstKeptEntryIndex] as SessionMessageEntry;
+		if (cutEntry.message.role === "assistant") {
+			expect(result.isSplitTurn).toBe(true);
+			expect(result.turnStartIndex).toBe(2); // Turn 2 starts at index 2
+		}
 	});
 });

@ -348,11 +372,12 @@ describe("Large session fixture", () => {

 	it("should find cut point in large session", () => {
 		const entries = loadLargeSessionEntries();
-		const cutPoint = findCutPoint(entries, 0, entries.length, DEFAULT_COMPACTION_SETTINGS.keepRecentTokens);
+		const result = findCutPoint(entries, 0, entries.length, DEFAULT_COMPACTION_SETTINGS.keepRecentTokens);

-		// Cut point should be at a message entry with user role
-		expect(entries[cutPoint].type).toBe("message");
-		expect((entries[cutPoint] as SessionMessageEntry).message.role).toBe("user");
+		// Cut point should be at a message entry (user or assistant)
+		expect(entries[result.firstKeptEntryIndex].type).toBe("message");
+		const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry).message.role;
+		expect(role === "user" || role === "assistant").toBe(true);
 	});

 	it("should load session correctly", () => {