Simplify compaction: remove proactive abort, use Agent.continue() for retry

- Add agentLoopContinue() to pi-ai for resuming from existing context - Add Agent.continue() method and transport.continue() interface - Simplify AgentSession compaction to two cases: overflow (auto-retry) and threshold (no retry) - Remove proactive mid-turn compaction abort - Merge turn prefix summary into main summary - Add isCompacting property to AgentSession and RPC state - Block input during compaction in interactive mode - Show compaction count on session resume - Rename RPC.md to rpc.md for consistency Related to #128
2026-04-16 14:01:06 +00:00 · 2025-12-09 21:43:49 +01:00 · 2025-12-09 21:43:49 +01:00 · 5a9d844f9a
commit 5a9d844f9a
parent d67c69c6e9
27 changed files with 1261 additions and 1011 deletions
--- a/packages/coding-agent/src/core/agent-session.ts
+++ b/packages/coding-agent/src/core/agent-session.ts
@ -14,11 +14,11 @@
 */

 import type { Agent, AgentEvent, AgentState, AppMessage, Attachment, ThinkingLevel } from "@mariozechner/pi-agent-core";
-import type { AssistantMessage, Model, ToolResultMessage } from "@mariozechner/pi-ai";
+import type { AssistantMessage, Model } from "@mariozechner/pi-ai";
 import { isContextOverflow } from "@mariozechner/pi-ai";
 import { getModelsPath } from "../config.js";
 import { type BashResult, executeBash as executeBashCommand } from "./bash-executor.js";
-import { calculateContextTokens, compact, estimateTokens, shouldCompact } from "./compaction.js";
+import { calculateContextTokens, compact, shouldCompact } from "./compaction.js";
 import { exportSessionToHtml } from "./export-html.js";
 import type { BashExecutionMessage } from "./messages.js";
 import { getApiKeyForModel, getAvailableModels } from "./model-config.js";
@ -112,8 +112,6 @@ export class AgentSession {
 	// Compaction state
 	private _compactionAbortController: AbortController | null = null;
 	private _autoCompactionAbortController: AbortController | null = null;
-	private _abortingForCompaction = false;
-	private _lastUserMessageText: string | null = null;

 	// Bash execution state
 	private _bashAbortController: AbortController | null = null;
@ -148,48 +146,19 @@ export class AgentSession {

 		// Handle session persistence
 		if (event.type === "message_end") {
-			// Skip saving aborted message if we're aborting for compaction
-			const isAbortedForCompaction =
-				this._abortingForCompaction &&
-				event.message.role === "assistant" &&
-				(event.message as AssistantMessage).stopReason === "aborted";
-
-			if (!isAbortedForCompaction) {
-				this.sessionManager.saveMessage(event.message);
-			}
+			this.sessionManager.saveMessage(event.message);

 			// Initialize session after first user+assistant exchange
 			if (this.sessionManager.shouldInitializeSession(this.agent.state.messages)) {
 				this.sessionManager.startSession(this.agent.state);
 			}

-			// Track user message text for potential retry after overflow
-			if (event.message.role === "user") {
-				const content = (event.message as { content: unknown }).content;
-				if (typeof content === "string") {
-					this._lastUserMessageText = content;
-				} else if (Array.isArray(content)) {
-					this._lastUserMessageText = content
-						.filter((c): c is { type: "text"; text: string } => c.type === "text")
-						.map((c) => c.text)
-						.join("\n");
-				}
-			}
-
 			// Track assistant message for auto-compaction (checked on agent_end)
 			if (event.message.role === "assistant") {
-				this._lastAssistantMessage = event.message as AssistantMessage;
+				this._lastAssistantMessage = event.message;
 			}
 		}

-		// Handle turn_end for proactive compaction check
-		if (event.type === "turn_end") {
-			await this._checkProactiveCompaction(
-				event.message as AssistantMessage,
-				event.toolResults as ToolResultMessage[],
-			);
-		}
-
 		// Check auto-compaction after agent completes
 		if (event.type === "agent_end" && this._lastAssistantMessage) {
 			const msg = this._lastAssistantMessage;
@ -274,6 +243,11 @@ export class AgentSession {
 		return this.agent.state.isStreaming;
 	}

+	/** Whether auto-compaction is currently running */
+	get isCompacting(): boolean {
+		return this._autoCompactionAbortController !== null || this._compactionAbortController !== null;
+	}
+
 	/** All messages including custom types like BashExecutionMessage */
 	get messages(): AppMessage[] {
 		return this.agent.state.messages;
@ -622,91 +596,41 @@ export class AgentSession {
 		this._autoCompactionAbortController?.abort();
 	}

-	/**
-	 * Check for proactive compaction after turn_end (before next LLM call).
-	 * Estimates context size and aborts if threshold would be crossed.
-	 */
-	private async _checkProactiveCompaction(
-		assistantMessage: AssistantMessage,
-		toolResults: ToolResultMessage[],
-	): Promise<void> {
-		const settings = this.settingsManager.getCompactionSettings();
-		if (!settings.enabled) return;
-
-		// Skip if message was aborted or errored
-		if (assistantMessage.stopReason === "aborted" || assistantMessage.stopReason === "error") return;
-
-		// Only check if there are tool calls (meaning another turn will happen)
-		const hasToolCalls = assistantMessage.content.some((c) => c.type === "toolCall");
-		if (!hasToolCalls) return;
-
-		// Estimate context size: last usage + tool results
-		const contextTokens = calculateContextTokens(assistantMessage.usage);
-		const toolResultTokens = toolResults.reduce((sum, msg) => sum + estimateTokens(msg), 0);
-		const estimatedTotal = contextTokens + toolResultTokens;
-
-		const contextWindow = this.model?.contextWindow ?? 0;
-
-		if (!shouldCompact(estimatedTotal, contextWindow, settings)) return;
-
-		// Threshold crossed - abort for compaction
-		this._abortingForCompaction = true;
-		this.agent.abort();
-	}
-
 	/**
 	 * Handle compaction after agent_end.
-	 * Checks for overflow (reactive) or threshold (proactive after abort).
+	 * Two cases:
+	 * 1. Overflow: LLM returned context overflow error, remove error message from agent state, compact, auto-retry
+	 * 2. Threshold: Turn succeeded but context over threshold, compact, NO auto-retry (user continues manually)
 	 */
 	private async _handleAgentEndCompaction(assistantMessage: AssistantMessage): Promise<void> {
 		const settings = this.settingsManager.getCompactionSettings();
+		if (!settings.enabled) return;
+
+		// Skip if message was aborted (user cancelled)
+		if (assistantMessage.stopReason === "aborted") return;
+
 		const contextWindow = this.model?.contextWindow ?? 0;

-		// Check 1: Overflow detection (reactive recovery)
-		const isOverflow = isContextOverflow(assistantMessage, contextWindow);
-
-		// Check 2: Aborted for compaction (proactive)
-		const wasAbortedForCompaction = this._abortingForCompaction;
-		this._abortingForCompaction = false;
-
-		// Check 3: Threshold crossed but turn succeeded (maintenance compaction)
-		const contextTokens =
-			assistantMessage.stopReason === "error" ? 0 : calculateContextTokens(assistantMessage.usage);
-		const thresholdCrossed = settings.enabled && shouldCompact(contextTokens, contextWindow, settings);
-
-		// Determine which action to take
-		let reason: "overflow" | "threshold" | null = null;
-		let willRetry = false;
-
-		if (isOverflow) {
-			reason = "overflow";
-			willRetry = true;
-			// Remove the overflow error message from agent state
+		// Case 1: Overflow - LLM returned context overflow error
+		if (isContextOverflow(assistantMessage, contextWindow)) {
+			// Remove the error message from agent state (it IS saved to session for history,
+			// but we don't want it in context for the retry)
 			const messages = this.agent.state.messages;
 			if (messages.length > 0 && messages[messages.length - 1].role === "assistant") {
 				this.agent.replaceMessages(messages.slice(0, -1));
 			}
-		} else if (wasAbortedForCompaction) {
-			reason = "threshold";
-			willRetry = true;
-			// Remove the aborted message from agent state
-			const messages = this.agent.state.messages;
-			if (
-				messages.length > 0 &&
-				messages[messages.length - 1].role === "assistant" &&
-				(messages[messages.length - 1] as AssistantMessage).stopReason === "aborted"
-			) {
-				this.agent.replaceMessages(messages.slice(0, -1));
-			}
-		} else if (thresholdCrossed) {
-			reason = "threshold";
-			willRetry = false; // Turn succeeded, no retry needed
+			await this._runAutoCompaction("overflow", true);
+			return;
 		}

-		if (!reason) return;
+		// Case 2: Threshold - turn succeeded but context is getting large
+		// Skip if this was an error (non-overflow errors don't have usage data)
+		if (assistantMessage.stopReason === "error") return;

-		// Run compaction
-		await this._runAutoCompaction(reason, willRetry);
+		const contextTokens = calculateContextTokens(assistantMessage.usage);
+		if (shouldCompact(contextTokens, contextWindow, settings)) {
+			await this._runAutoCompaction("threshold", false);
+		}
 	}

 	/**
@ -754,11 +678,22 @@ export class AgentSession {
 			};
 			this._emit({ type: "auto_compaction_end", result, aborted: false, willRetry });

-			// Auto-retry if needed
-			if (willRetry && this._lastUserMessageText) {
-				// Small delay to let UI update
-				await new Promise((resolve) => setTimeout(resolve, 100));
-				await this.prompt(this._lastUserMessageText);
+			// Auto-retry if needed - use continue() since user message is already in context
+			if (willRetry) {
+				// Remove trailing error message from agent state (it's kept in session file for history)
+				// This is needed because continue() requires last message to be user or toolResult
+				const messages = this.agent.state.messages;
+				const lastMsg = messages[messages.length - 1];
+				if (lastMsg?.role === "assistant" && (lastMsg as AssistantMessage).stopReason === "error") {
+					this.agent.replaceMessages(messages.slice(0, -1));
+				}
+
+				// Use setTimeout to break out of the event handler chain
+				setTimeout(() => {
+					this.agent.continue().catch(() => {
+						// Retry failed - silently ignore, user can manually retry
+					});
+				}, 100);
 			}
 		} catch (error) {
 			// Compaction failed - emit end event without retry
--- a/packages/coding-agent/src/core/compaction.ts
+++ b/packages/coding-agent/src/core/compaction.ts
@ -41,11 +41,12 @@ export function calculateContextTokens(usage: Usage): number {

 /**
 * Get usage from an assistant message if available.
+ * Skips aborted and error messages as they don't have valid usage data.
 */
 function getAssistantUsage(msg: AppMessage): Usage | null {
 	if (msg.role === "assistant" && "usage" in msg) {
 		const assistantMsg = msg as AssistantMessage;
-		if (assistantMsg.stopReason !== "aborted" && assistantMsg.usage) {
+		if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
 			return assistantMsg.usage;
 		}
 	}
@ -81,36 +82,59 @@ export function shouldCompact(contextTokens: number, contextWindow: number, sett
 /**
 * Estimate token count for a message using chars/4 heuristic.
 * This is conservative (overestimates tokens).
- * Accepts any message type (AppMessage, ToolResultMessage, etc.)
 */
-export function estimateTokens(message: {
-	role: string;
-	content?: unknown;
-	command?: string;
-	output?: string;
-}): number {
+export function estimateTokens(message: AppMessage): number {
 	let chars = 0;

-	// Handle custom message types that don't have standard content
+	// Handle bashExecution messages
 	if (message.role === "bashExecution") {
-		chars = (message.command?.length || 0) + (message.output?.length || 0);
+		const bash = message as unknown as { command: string; output: string };
+		chars = bash.command.length + bash.output.length;
 		return Math.ceil(chars / 4);
 	}

-	// Standard messages with content
-	const content = message.content;
-	if (typeof content === "string") {
-		chars = content.length;
-	} else if (Array.isArray(content)) {
-		for (const block of content) {
+	// Handle user messages
+	if (message.role === "user") {
+		const content = (message as { content: string | Array<{ type: string; text?: string }> }).content;
+		if (typeof content === "string") {
+			chars = content.length;
+		} else if (Array.isArray(content)) {
+			for (const block of content) {
+				if (block.type === "text" && block.text) {
+					chars += block.text.length;
+				}
+			}
+		}
+		return Math.ceil(chars / 4);
+	}
+
+	// Handle assistant messages
+	if (message.role === "assistant") {
+		const assistant = message as AssistantMessage;
+		for (const block of assistant.content) {
 			if (block.type === "text") {
 				chars += block.text.length;
 			} else if (block.type === "thinking") {
 				chars += block.thinking.length;
+			} else if (block.type === "toolCall") {
+				chars += block.name.length + JSON.stringify(block.arguments).length;
 			}
 		}
+		return Math.ceil(chars / 4);
 	}
-	return Math.ceil(chars / 4);
+
+	// Handle tool results
+	if (message.role === "toolResult") {
+		const toolResult = message as { content: Array<{ type: string; text?: string }> };
+		for (const block of toolResult.content) {
+			if (block.type === "text" && block.text) {
+				chars += block.text.length;
+			}
+		}
+		return Math.ceil(chars / 4);
+	}
+
+	return 0;
 }

 /**
@ -166,6 +190,9 @@ export interface CutPointResult {
 /**
 * Find the cut point in session entries that keeps approximately `keepRecentTokens`.
 *
+ * Algorithm: Walk backwards from newest, accumulating estimated message sizes.
+ * Stop when we've accumulated >= keepRecentTokens. Cut at that point.
+ *
 * Can cut at user OR assistant messages (never tool results). When cutting at an
 * assistant message with tool calls, its tool results come after and will be kept.
 *
@ -188,46 +215,23 @@ export function findCutPoint(
 		return { firstKeptEntryIndex: startIndex, turnStartIndex: -1, isSplitTurn: false };
 	}

-	// Collect assistant usages walking backwards from endIndex
-	const assistantUsages: Array<{ index: number; tokens: number }> = [];
-	for (let i = endIndex - 1; i >= startIndex; i--) {
-		const entry = entries[i];
-		if (entry.type === "message") {
-			const usage = getAssistantUsage(entry.message);
-			if (usage) {
-				assistantUsages.push({
-					index: i,
-					tokens: calculateContextTokens(usage),
-				});
-			}
-		}
-	}
-
-	if (assistantUsages.length === 0) {
-		// No usage info, keep from last cut point
-		const lastCutPoint = cutPoints[cutPoints.length - 1];
-		const entry = entries[lastCutPoint];
-		const isUser = entry.type === "message" && entry.message.role === "user";
-		return {
-			firstKeptEntryIndex: lastCutPoint,
-			turnStartIndex: isUser ? -1 : findTurnStartIndex(entries, lastCutPoint, startIndex),
-			isSplitTurn: !isUser,
-		};
-	}
-
-	// Walk through and find where cumulative token difference exceeds keepRecentTokens
-	const newestTokens = assistantUsages[0].tokens;
+	// Walk backwards from newest, accumulating estimated message sizes
+	let accumulatedTokens = 0;
 	let cutIndex = startIndex; // Default: keep everything in range

-	for (let i = 1; i < assistantUsages.length; i++) {
-		const tokenDiff = newestTokens - assistantUsages[i].tokens;
-		if (tokenDiff >= keepRecentTokens) {
-			// Find the valid cut point at or after the assistant we want to keep
-			const lastKeptAssistantIndex = assistantUsages[i - 1].index;
+	for (let i = endIndex - 1; i >= startIndex; i--) {
+		const entry = entries[i];
+		if (entry.type !== "message") continue;

-			// Find closest valid cut point at or before lastKeptAssistantIndex
-			for (let c = cutPoints.length - 1; c >= 0; c--) {
-				if (cutPoints[c] <= lastKeptAssistantIndex) {
+		// Estimate this message's size
+		const messageTokens = estimateTokens(entry.message);
+		accumulatedTokens += messageTokens;
+
+		// Check if we've exceeded the budget
+		if (accumulatedTokens >= keepRecentTokens) {
+			// Find the closest valid cut point at or after this entry
+			for (let c = 0; c < cutPoints.length; c++) {
+				if (cutPoints[c] >= i) {
 					cutIndex = cutPoints[c];
 					break;
 				}
@ -404,9 +408,8 @@ export async function compact(
 		}
 	}

-	// Generate summaries (can be parallel if both needed)
+	// Generate summaries (can be parallel if both needed) and merge into one
 	let summary: string;
-	let turnPrefixSummary: string | undefined;

 	if (cutResult.isSplitTurn && turnPrefixMessages.length > 0) {
 		// Generate both summaries in parallel
@ -416,8 +419,8 @@ export async function compact(
 				: Promise.resolve("No prior history."),
 			generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal),
 		]);
-		summary = historyResult;
-		turnPrefixSummary = turnPrefixResult;
+		// Merge into single summary
+		summary = historyResult + "\n\n---\n\n**Turn Context (split turn):**\n\n" + turnPrefixResult;
 	} else {
 		// Just generate history summary
 		summary = await generateSummary(
@ -434,7 +437,6 @@ export async function compact(
 		type: "compaction",
 		timestamp: new Date().toISOString(),
 		summary,
-		turnPrefixSummary,
 		firstKeptEntryIndex: cutResult.firstKeptEntryIndex,
 		tokensBefore,
 	};
--- a/packages/coding-agent/src/core/session-manager.ts
+++ b/packages/coding-agent/src/core/session-manager.ts
@ -50,8 +50,6 @@ export interface CompactionEntry {
 	type: "compaction";
 	timestamp: string;
 	summary: string;
-	/** Summary of turn prefix when a turn was split (user message to first kept message) */
-	turnPrefixSummary?: string;
 	firstKeptEntryIndex: number; // Index into session entries where we start keeping
 	tokensBefore: number;
 }
@ -180,18 +178,9 @@ export function loadSessionFromEntries(entries: SessionEntry[]): LoadedSession {
 		}
 	}

-	// Build final messages: summaries + kept messages
+	// Build final messages: summary + kept messages
 	const messages: AppMessage[] = [];
-
-	// Add history summary
 	messages.push(createSummaryMessage(compactionEvent.summary));
-
-	// Add turn prefix summary if present (when a turn was split)
-	if (compactionEvent.turnPrefixSummary) {
-		messages.push(createSummaryMessage(compactionEvent.turnPrefixSummary));
-	}
-
-	// Add kept messages
 	messages.push(...keptMessages);

 	return { messages, thinkingLevel, model };
--- a/packages/coding-agent/src/modes/interactive/interactive-mode.ts
+++ b/packages/coding-agent/src/modes/interactive/interactive-mode.ts
@ -407,6 +407,11 @@ export class InteractiveMode {
 				}
 			}

+			// Block input during compaction (will retry automatically)
+			if (this.session.isCompacting) {
+				return;
+			}
+
 			// Queue message if agent is streaming
 			if (this.session.isStreaming) {
 				await this.session.queueMessage(text);
@ -604,10 +609,6 @@ export class InteractiveMode {
 					compactionComponent.setExpanded(this.toolOutputExpanded);
 					this.chatContainer.addChild(compactionComponent);
 					this.footer.updateState(this.session.state);
-
-					if (event.willRetry) {
-						this.showStatus("Compacted context, retrying...");
-					}
 				}
 				this.ui.requestRender();
 				break;
@ -743,6 +744,14 @@ export class InteractiveMode {

 	renderInitialMessages(state: AgentState): void {
 		this.renderMessages(state.messages, { updateFooter: true, populateHistory: true });
+
+		// Show compaction info if session was compacted
+		const entries = this.sessionManager.loadEntries();
+		const compactionCount = entries.filter((e) => e.type === "compaction").length;
+		if (compactionCount > 0) {
+			const times = compactionCount === 1 ? "1 time" : `${compactionCount} times`;
+			this.showStatus(`Session compacted ${times}`);
+		}
 	}

 	async getUserInput(): Promise<string> {
--- a/packages/coding-agent/src/modes/rpc/rpc-mode.ts
+++ b/packages/coding-agent/src/modes/rpc/rpc-mode.ts
@ -90,6 +90,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
 					model: session.model,
 					thinkingLevel: session.thinkingLevel,
 					isStreaming: session.isStreaming,
+					isCompacting: session.isCompacting,
 					queueMode: session.queueMode,
 					sessionFile: session.sessionFile,
 					sessionId: session.sessionId,
--- a/packages/coding-agent/src/modes/rpc/rpc-types.ts
+++ b/packages/coding-agent/src/modes/rpc/rpc-types.ts
@ -63,6 +63,7 @@ export interface RpcSessionState {
 	model: Model<any> | null;
 	thinkingLevel: ThinkingLevel;
 	isStreaming: boolean;
+	isCompacting: boolean;
 	queueMode: "all" | "one-at-a-time";
 	sessionFile: string;
 	sessionId: string;