mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-16 14:01:06 +00:00
Simplify compaction: remove proactive abort, use Agent.continue() for retry
- Add agentLoopContinue() to pi-ai for resuming from existing context - Add Agent.continue() method and transport.continue() interface - Simplify AgentSession compaction to two cases: overflow (auto-retry) and threshold (no retry) - Remove proactive mid-turn compaction abort - Merge turn prefix summary into main summary - Add isCompacting property to AgentSession and RPC state - Block input during compaction in interactive mode - Show compaction count on session resume - Rename RPC.md to rpc.md for consistency Related to #128
This commit is contained in:
parent
d67c69c6e9
commit
5a9d844f9a
27 changed files with 1261 additions and 1011 deletions
|
|
@ -14,11 +14,11 @@
|
|||
*/
|
||||
|
||||
import type { Agent, AgentEvent, AgentState, AppMessage, Attachment, ThinkingLevel } from "@mariozechner/pi-agent-core";
|
||||
import type { AssistantMessage, Model, ToolResultMessage } from "@mariozechner/pi-ai";
|
||||
import type { AssistantMessage, Model } from "@mariozechner/pi-ai";
|
||||
import { isContextOverflow } from "@mariozechner/pi-ai";
|
||||
import { getModelsPath } from "../config.js";
|
||||
import { type BashResult, executeBash as executeBashCommand } from "./bash-executor.js";
|
||||
import { calculateContextTokens, compact, estimateTokens, shouldCompact } from "./compaction.js";
|
||||
import { calculateContextTokens, compact, shouldCompact } from "./compaction.js";
|
||||
import { exportSessionToHtml } from "./export-html.js";
|
||||
import type { BashExecutionMessage } from "./messages.js";
|
||||
import { getApiKeyForModel, getAvailableModels } from "./model-config.js";
|
||||
|
|
@ -112,8 +112,6 @@ export class AgentSession {
|
|||
// Compaction state
|
||||
private _compactionAbortController: AbortController | null = null;
|
||||
private _autoCompactionAbortController: AbortController | null = null;
|
||||
private _abortingForCompaction = false;
|
||||
private _lastUserMessageText: string | null = null;
|
||||
|
||||
// Bash execution state
|
||||
private _bashAbortController: AbortController | null = null;
|
||||
|
|
@ -148,48 +146,19 @@ export class AgentSession {
|
|||
|
||||
// Handle session persistence
|
||||
if (event.type === "message_end") {
|
||||
// Skip saving aborted message if we're aborting for compaction
|
||||
const isAbortedForCompaction =
|
||||
this._abortingForCompaction &&
|
||||
event.message.role === "assistant" &&
|
||||
(event.message as AssistantMessage).stopReason === "aborted";
|
||||
|
||||
if (!isAbortedForCompaction) {
|
||||
this.sessionManager.saveMessage(event.message);
|
||||
}
|
||||
this.sessionManager.saveMessage(event.message);
|
||||
|
||||
// Initialize session after first user+assistant exchange
|
||||
if (this.sessionManager.shouldInitializeSession(this.agent.state.messages)) {
|
||||
this.sessionManager.startSession(this.agent.state);
|
||||
}
|
||||
|
||||
// Track user message text for potential retry after overflow
|
||||
if (event.message.role === "user") {
|
||||
const content = (event.message as { content: unknown }).content;
|
||||
if (typeof content === "string") {
|
||||
this._lastUserMessageText = content;
|
||||
} else if (Array.isArray(content)) {
|
||||
this._lastUserMessageText = content
|
||||
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
||||
.map((c) => c.text)
|
||||
.join("\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Track assistant message for auto-compaction (checked on agent_end)
|
||||
if (event.message.role === "assistant") {
|
||||
this._lastAssistantMessage = event.message as AssistantMessage;
|
||||
this._lastAssistantMessage = event.message;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle turn_end for proactive compaction check
|
||||
if (event.type === "turn_end") {
|
||||
await this._checkProactiveCompaction(
|
||||
event.message as AssistantMessage,
|
||||
event.toolResults as ToolResultMessage[],
|
||||
);
|
||||
}
|
||||
|
||||
// Check auto-compaction after agent completes
|
||||
if (event.type === "agent_end" && this._lastAssistantMessage) {
|
||||
const msg = this._lastAssistantMessage;
|
||||
|
|
@ -274,6 +243,11 @@ export class AgentSession {
|
|||
return this.agent.state.isStreaming;
|
||||
}
|
||||
|
||||
/** Whether auto-compaction is currently running */
|
||||
get isCompacting(): boolean {
|
||||
return this._autoCompactionAbortController !== null || this._compactionAbortController !== null;
|
||||
}
|
||||
|
||||
/** All messages including custom types like BashExecutionMessage */
|
||||
get messages(): AppMessage[] {
|
||||
return this.agent.state.messages;
|
||||
|
|
@ -622,91 +596,41 @@ export class AgentSession {
|
|||
this._autoCompactionAbortController?.abort();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for proactive compaction after turn_end (before next LLM call).
|
||||
* Estimates context size and aborts if threshold would be crossed.
|
||||
*/
|
||||
private async _checkProactiveCompaction(
|
||||
assistantMessage: AssistantMessage,
|
||||
toolResults: ToolResultMessage[],
|
||||
): Promise<void> {
|
||||
const settings = this.settingsManager.getCompactionSettings();
|
||||
if (!settings.enabled) return;
|
||||
|
||||
// Skip if message was aborted or errored
|
||||
if (assistantMessage.stopReason === "aborted" || assistantMessage.stopReason === "error") return;
|
||||
|
||||
// Only check if there are tool calls (meaning another turn will happen)
|
||||
const hasToolCalls = assistantMessage.content.some((c) => c.type === "toolCall");
|
||||
if (!hasToolCalls) return;
|
||||
|
||||
// Estimate context size: last usage + tool results
|
||||
const contextTokens = calculateContextTokens(assistantMessage.usage);
|
||||
const toolResultTokens = toolResults.reduce((sum, msg) => sum + estimateTokens(msg), 0);
|
||||
const estimatedTotal = contextTokens + toolResultTokens;
|
||||
|
||||
const contextWindow = this.model?.contextWindow ?? 0;
|
||||
|
||||
if (!shouldCompact(estimatedTotal, contextWindow, settings)) return;
|
||||
|
||||
// Threshold crossed - abort for compaction
|
||||
this._abortingForCompaction = true;
|
||||
this.agent.abort();
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle compaction after agent_end.
|
||||
* Checks for overflow (reactive) or threshold (proactive after abort).
|
||||
* Two cases:
|
||||
* 1. Overflow: LLM returned context overflow error, remove error message from agent state, compact, auto-retry
|
||||
* 2. Threshold: Turn succeeded but context over threshold, compact, NO auto-retry (user continues manually)
|
||||
*/
|
||||
private async _handleAgentEndCompaction(assistantMessage: AssistantMessage): Promise<void> {
|
||||
const settings = this.settingsManager.getCompactionSettings();
|
||||
if (!settings.enabled) return;
|
||||
|
||||
// Skip if message was aborted (user cancelled)
|
||||
if (assistantMessage.stopReason === "aborted") return;
|
||||
|
||||
const contextWindow = this.model?.contextWindow ?? 0;
|
||||
|
||||
// Check 1: Overflow detection (reactive recovery)
|
||||
const isOverflow = isContextOverflow(assistantMessage, contextWindow);
|
||||
|
||||
// Check 2: Aborted for compaction (proactive)
|
||||
const wasAbortedForCompaction = this._abortingForCompaction;
|
||||
this._abortingForCompaction = false;
|
||||
|
||||
// Check 3: Threshold crossed but turn succeeded (maintenance compaction)
|
||||
const contextTokens =
|
||||
assistantMessage.stopReason === "error" ? 0 : calculateContextTokens(assistantMessage.usage);
|
||||
const thresholdCrossed = settings.enabled && shouldCompact(contextTokens, contextWindow, settings);
|
||||
|
||||
// Determine which action to take
|
||||
let reason: "overflow" | "threshold" | null = null;
|
||||
let willRetry = false;
|
||||
|
||||
if (isOverflow) {
|
||||
reason = "overflow";
|
||||
willRetry = true;
|
||||
// Remove the overflow error message from agent state
|
||||
// Case 1: Overflow - LLM returned context overflow error
|
||||
if (isContextOverflow(assistantMessage, contextWindow)) {
|
||||
// Remove the error message from agent state (it IS saved to session for history,
|
||||
// but we don't want it in context for the retry)
|
||||
const messages = this.agent.state.messages;
|
||||
if (messages.length > 0 && messages[messages.length - 1].role === "assistant") {
|
||||
this.agent.replaceMessages(messages.slice(0, -1));
|
||||
}
|
||||
} else if (wasAbortedForCompaction) {
|
||||
reason = "threshold";
|
||||
willRetry = true;
|
||||
// Remove the aborted message from agent state
|
||||
const messages = this.agent.state.messages;
|
||||
if (
|
||||
messages.length > 0 &&
|
||||
messages[messages.length - 1].role === "assistant" &&
|
||||
(messages[messages.length - 1] as AssistantMessage).stopReason === "aborted"
|
||||
) {
|
||||
this.agent.replaceMessages(messages.slice(0, -1));
|
||||
}
|
||||
} else if (thresholdCrossed) {
|
||||
reason = "threshold";
|
||||
willRetry = false; // Turn succeeded, no retry needed
|
||||
await this._runAutoCompaction("overflow", true);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!reason) return;
|
||||
// Case 2: Threshold - turn succeeded but context is getting large
|
||||
// Skip if this was an error (non-overflow errors don't have usage data)
|
||||
if (assistantMessage.stopReason === "error") return;
|
||||
|
||||
// Run compaction
|
||||
await this._runAutoCompaction(reason, willRetry);
|
||||
const contextTokens = calculateContextTokens(assistantMessage.usage);
|
||||
if (shouldCompact(contextTokens, contextWindow, settings)) {
|
||||
await this._runAutoCompaction("threshold", false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -754,11 +678,22 @@ export class AgentSession {
|
|||
};
|
||||
this._emit({ type: "auto_compaction_end", result, aborted: false, willRetry });
|
||||
|
||||
// Auto-retry if needed
|
||||
if (willRetry && this._lastUserMessageText) {
|
||||
// Small delay to let UI update
|
||||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||
await this.prompt(this._lastUserMessageText);
|
||||
// Auto-retry if needed - use continue() since user message is already in context
|
||||
if (willRetry) {
|
||||
// Remove trailing error message from agent state (it's kept in session file for history)
|
||||
// This is needed because continue() requires last message to be user or toolResult
|
||||
const messages = this.agent.state.messages;
|
||||
const lastMsg = messages[messages.length - 1];
|
||||
if (lastMsg?.role === "assistant" && (lastMsg as AssistantMessage).stopReason === "error") {
|
||||
this.agent.replaceMessages(messages.slice(0, -1));
|
||||
}
|
||||
|
||||
// Use setTimeout to break out of the event handler chain
|
||||
setTimeout(() => {
|
||||
this.agent.continue().catch(() => {
|
||||
// Retry failed - silently ignore, user can manually retry
|
||||
});
|
||||
}, 100);
|
||||
}
|
||||
} catch (error) {
|
||||
// Compaction failed - emit end event without retry
|
||||
|
|
|
|||
|
|
@ -41,11 +41,12 @@ export function calculateContextTokens(usage: Usage): number {
|
|||
|
||||
/**
|
||||
* Get usage from an assistant message if available.
|
||||
* Skips aborted and error messages as they don't have valid usage data.
|
||||
*/
|
||||
function getAssistantUsage(msg: AppMessage): Usage | null {
|
||||
if (msg.role === "assistant" && "usage" in msg) {
|
||||
const assistantMsg = msg as AssistantMessage;
|
||||
if (assistantMsg.stopReason !== "aborted" && assistantMsg.usage) {
|
||||
if (assistantMsg.stopReason !== "aborted" && assistantMsg.stopReason !== "error" && assistantMsg.usage) {
|
||||
return assistantMsg.usage;
|
||||
}
|
||||
}
|
||||
|
|
@ -81,36 +82,59 @@ export function shouldCompact(contextTokens: number, contextWindow: number, sett
|
|||
/**
|
||||
* Estimate token count for a message using chars/4 heuristic.
|
||||
* This is conservative (overestimates tokens).
|
||||
* Accepts any message type (AppMessage, ToolResultMessage, etc.)
|
||||
*/
|
||||
export function estimateTokens(message: {
|
||||
role: string;
|
||||
content?: unknown;
|
||||
command?: string;
|
||||
output?: string;
|
||||
}): number {
|
||||
export function estimateTokens(message: AppMessage): number {
|
||||
let chars = 0;
|
||||
|
||||
// Handle custom message types that don't have standard content
|
||||
// Handle bashExecution messages
|
||||
if (message.role === "bashExecution") {
|
||||
chars = (message.command?.length || 0) + (message.output?.length || 0);
|
||||
const bash = message as unknown as { command: string; output: string };
|
||||
chars = bash.command.length + bash.output.length;
|
||||
return Math.ceil(chars / 4);
|
||||
}
|
||||
|
||||
// Standard messages with content
|
||||
const content = message.content;
|
||||
if (typeof content === "string") {
|
||||
chars = content.length;
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
// Handle user messages
|
||||
if (message.role === "user") {
|
||||
const content = (message as { content: string | Array<{ type: string; text?: string }> }).content;
|
||||
if (typeof content === "string") {
|
||||
chars = content.length;
|
||||
} else if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === "text" && block.text) {
|
||||
chars += block.text.length;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Math.ceil(chars / 4);
|
||||
}
|
||||
|
||||
// Handle assistant messages
|
||||
if (message.role === "assistant") {
|
||||
const assistant = message as AssistantMessage;
|
||||
for (const block of assistant.content) {
|
||||
if (block.type === "text") {
|
||||
chars += block.text.length;
|
||||
} else if (block.type === "thinking") {
|
||||
chars += block.thinking.length;
|
||||
} else if (block.type === "toolCall") {
|
||||
chars += block.name.length + JSON.stringify(block.arguments).length;
|
||||
}
|
||||
}
|
||||
return Math.ceil(chars / 4);
|
||||
}
|
||||
return Math.ceil(chars / 4);
|
||||
|
||||
// Handle tool results
|
||||
if (message.role === "toolResult") {
|
||||
const toolResult = message as { content: Array<{ type: string; text?: string }> };
|
||||
for (const block of toolResult.content) {
|
||||
if (block.type === "text" && block.text) {
|
||||
chars += block.text.length;
|
||||
}
|
||||
}
|
||||
return Math.ceil(chars / 4);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -166,6 +190,9 @@ export interface CutPointResult {
|
|||
/**
|
||||
* Find the cut point in session entries that keeps approximately `keepRecentTokens`.
|
||||
*
|
||||
* Algorithm: Walk backwards from newest, accumulating estimated message sizes.
|
||||
* Stop when we've accumulated >= keepRecentTokens. Cut at that point.
|
||||
*
|
||||
* Can cut at user OR assistant messages (never tool results). When cutting at an
|
||||
* assistant message with tool calls, its tool results come after and will be kept.
|
||||
*
|
||||
|
|
@ -188,46 +215,23 @@ export function findCutPoint(
|
|||
return { firstKeptEntryIndex: startIndex, turnStartIndex: -1, isSplitTurn: false };
|
||||
}
|
||||
|
||||
// Collect assistant usages walking backwards from endIndex
|
||||
const assistantUsages: Array<{ index: number; tokens: number }> = [];
|
||||
for (let i = endIndex - 1; i >= startIndex; i--) {
|
||||
const entry = entries[i];
|
||||
if (entry.type === "message") {
|
||||
const usage = getAssistantUsage(entry.message);
|
||||
if (usage) {
|
||||
assistantUsages.push({
|
||||
index: i,
|
||||
tokens: calculateContextTokens(usage),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (assistantUsages.length === 0) {
|
||||
// No usage info, keep from last cut point
|
||||
const lastCutPoint = cutPoints[cutPoints.length - 1];
|
||||
const entry = entries[lastCutPoint];
|
||||
const isUser = entry.type === "message" && entry.message.role === "user";
|
||||
return {
|
||||
firstKeptEntryIndex: lastCutPoint,
|
||||
turnStartIndex: isUser ? -1 : findTurnStartIndex(entries, lastCutPoint, startIndex),
|
||||
isSplitTurn: !isUser,
|
||||
};
|
||||
}
|
||||
|
||||
// Walk through and find where cumulative token difference exceeds keepRecentTokens
|
||||
const newestTokens = assistantUsages[0].tokens;
|
||||
// Walk backwards from newest, accumulating estimated message sizes
|
||||
let accumulatedTokens = 0;
|
||||
let cutIndex = startIndex; // Default: keep everything in range
|
||||
|
||||
for (let i = 1; i < assistantUsages.length; i++) {
|
||||
const tokenDiff = newestTokens - assistantUsages[i].tokens;
|
||||
if (tokenDiff >= keepRecentTokens) {
|
||||
// Find the valid cut point at or after the assistant we want to keep
|
||||
const lastKeptAssistantIndex = assistantUsages[i - 1].index;
|
||||
for (let i = endIndex - 1; i >= startIndex; i--) {
|
||||
const entry = entries[i];
|
||||
if (entry.type !== "message") continue;
|
||||
|
||||
// Find closest valid cut point at or before lastKeptAssistantIndex
|
||||
for (let c = cutPoints.length - 1; c >= 0; c--) {
|
||||
if (cutPoints[c] <= lastKeptAssistantIndex) {
|
||||
// Estimate this message's size
|
||||
const messageTokens = estimateTokens(entry.message);
|
||||
accumulatedTokens += messageTokens;
|
||||
|
||||
// Check if we've exceeded the budget
|
||||
if (accumulatedTokens >= keepRecentTokens) {
|
||||
// Find the closest valid cut point at or after this entry
|
||||
for (let c = 0; c < cutPoints.length; c++) {
|
||||
if (cutPoints[c] >= i) {
|
||||
cutIndex = cutPoints[c];
|
||||
break;
|
||||
}
|
||||
|
|
@ -404,9 +408,8 @@ export async function compact(
|
|||
}
|
||||
}
|
||||
|
||||
// Generate summaries (can be parallel if both needed)
|
||||
// Generate summaries (can be parallel if both needed) and merge into one
|
||||
let summary: string;
|
||||
let turnPrefixSummary: string | undefined;
|
||||
|
||||
if (cutResult.isSplitTurn && turnPrefixMessages.length > 0) {
|
||||
// Generate both summaries in parallel
|
||||
|
|
@ -416,8 +419,8 @@ export async function compact(
|
|||
: Promise.resolve("No prior history."),
|
||||
generateTurnPrefixSummary(turnPrefixMessages, model, settings.reserveTokens, apiKey, signal),
|
||||
]);
|
||||
summary = historyResult;
|
||||
turnPrefixSummary = turnPrefixResult;
|
||||
// Merge into single summary
|
||||
summary = historyResult + "\n\n---\n\n**Turn Context (split turn):**\n\n" + turnPrefixResult;
|
||||
} else {
|
||||
// Just generate history summary
|
||||
summary = await generateSummary(
|
||||
|
|
@ -434,7 +437,6 @@ export async function compact(
|
|||
type: "compaction",
|
||||
timestamp: new Date().toISOString(),
|
||||
summary,
|
||||
turnPrefixSummary,
|
||||
firstKeptEntryIndex: cutResult.firstKeptEntryIndex,
|
||||
tokensBefore,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -50,8 +50,6 @@ export interface CompactionEntry {
|
|||
type: "compaction";
|
||||
timestamp: string;
|
||||
summary: string;
|
||||
/** Summary of turn prefix when a turn was split (user message to first kept message) */
|
||||
turnPrefixSummary?: string;
|
||||
firstKeptEntryIndex: number; // Index into session entries where we start keeping
|
||||
tokensBefore: number;
|
||||
}
|
||||
|
|
@ -180,18 +178,9 @@ export function loadSessionFromEntries(entries: SessionEntry[]): LoadedSession {
|
|||
}
|
||||
}
|
||||
|
||||
// Build final messages: summaries + kept messages
|
||||
// Build final messages: summary + kept messages
|
||||
const messages: AppMessage[] = [];
|
||||
|
||||
// Add history summary
|
||||
messages.push(createSummaryMessage(compactionEvent.summary));
|
||||
|
||||
// Add turn prefix summary if present (when a turn was split)
|
||||
if (compactionEvent.turnPrefixSummary) {
|
||||
messages.push(createSummaryMessage(compactionEvent.turnPrefixSummary));
|
||||
}
|
||||
|
||||
// Add kept messages
|
||||
messages.push(...keptMessages);
|
||||
|
||||
return { messages, thinkingLevel, model };
|
||||
|
|
|
|||
|
|
@ -407,6 +407,11 @@ export class InteractiveMode {
|
|||
}
|
||||
}
|
||||
|
||||
// Block input during compaction (will retry automatically)
|
||||
if (this.session.isCompacting) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Queue message if agent is streaming
|
||||
if (this.session.isStreaming) {
|
||||
await this.session.queueMessage(text);
|
||||
|
|
@ -604,10 +609,6 @@ export class InteractiveMode {
|
|||
compactionComponent.setExpanded(this.toolOutputExpanded);
|
||||
this.chatContainer.addChild(compactionComponent);
|
||||
this.footer.updateState(this.session.state);
|
||||
|
||||
if (event.willRetry) {
|
||||
this.showStatus("Compacted context, retrying...");
|
||||
}
|
||||
}
|
||||
this.ui.requestRender();
|
||||
break;
|
||||
|
|
@ -743,6 +744,14 @@ export class InteractiveMode {
|
|||
|
||||
renderInitialMessages(state: AgentState): void {
|
||||
this.renderMessages(state.messages, { updateFooter: true, populateHistory: true });
|
||||
|
||||
// Show compaction info if session was compacted
|
||||
const entries = this.sessionManager.loadEntries();
|
||||
const compactionCount = entries.filter((e) => e.type === "compaction").length;
|
||||
if (compactionCount > 0) {
|
||||
const times = compactionCount === 1 ? "1 time" : `${compactionCount} times`;
|
||||
this.showStatus(`Session compacted ${times}`);
|
||||
}
|
||||
}
|
||||
|
||||
async getUserInput(): Promise<string> {
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
|
|||
model: session.model,
|
||||
thinkingLevel: session.thinkingLevel,
|
||||
isStreaming: session.isStreaming,
|
||||
isCompacting: session.isCompacting,
|
||||
queueMode: session.queueMode,
|
||||
sessionFile: session.sessionFile,
|
||||
sessionId: session.sessionId,
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ export interface RpcSessionState {
|
|||
model: Model<any> | null;
|
||||
thinkingLevel: ThinkingLevel;
|
||||
isStreaming: boolean;
|
||||
isCompacting: boolean;
|
||||
queueMode: "all" | "one-at-a-time";
|
||||
sessionFile: string;
|
||||
sessionId: string;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue