mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 23:01:30 +00:00
Auto-retry on transient provider errors (overloaded, rate limit, 5xx)
- Add retry logic with exponential backoff (2s, 4s, 8s) in AgentSession - Disable Anthropic SDK built-in retries (maxRetries: 0) to allow app-level handling - TUI shows retry status with Escape to cancel - RPC mode: add set_auto_retry, abort_retry commands and auto_retry_start/end events - Configurable via settings.json: retry.enabled, retry.maxRetries, retry.baseDelayMs - Exclude context overflow errors from retry (handled by compaction) fixes #157
This commit is contained in:
parent
79f5c6d22e
commit
bb445d24f1
11 changed files with 379 additions and 3 deletions
|
|
@ -31,7 +31,9 @@ import { expandSlashCommand, type FileSlashCommand } from "./slash-commands.js";
|
|||
export type AgentSessionEvent =
|
||||
| AgentEvent
|
||||
| { type: "auto_compaction_start"; reason: "threshold" | "overflow" }
|
||||
| { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean; willRetry: boolean };
|
||||
| { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean; willRetry: boolean }
|
||||
| { type: "auto_retry_start"; attempt: number; maxAttempts: number; delayMs: number; errorMessage: string }
|
||||
| { type: "auto_retry_end"; success: boolean; attempt: number; finalError?: string };
|
||||
|
||||
/** Listener function for agent session events */
|
||||
export type AgentSessionEventListener = (event: AgentSessionEvent) => void;
|
||||
|
|
@ -116,6 +118,12 @@ export class AgentSession {
|
|||
private _compactionAbortController: AbortController | null = null;
|
||||
private _autoCompactionAbortController: AbortController | null = null;
|
||||
|
||||
// Retry state
|
||||
private _retryAbortController: AbortController | null = null;
|
||||
private _retryAttempt = 0;
|
||||
private _retryPromise: Promise<void> | null = null;
|
||||
private _retryResolve: (() => void) | null = null;
|
||||
|
||||
// Bash execution state
|
||||
private _bashAbortController: AbortController | null = null;
|
||||
private _pendingBashMessages: BashExecutionMessage[] = [];
|
||||
|
|
@ -184,14 +192,40 @@ export class AgentSession {
|
|||
}
|
||||
}
|
||||
|
||||
// Check auto-compaction after agent completes
|
||||
// Check auto-retry and auto-compaction after agent completes
|
||||
if (event.type === "agent_end" && this._lastAssistantMessage) {
|
||||
const msg = this._lastAssistantMessage;
|
||||
this._lastAssistantMessage = null;
|
||||
|
||||
// Check for retryable errors first (overloaded, rate limit, server errors)
|
||||
if (this._isRetryableError(msg)) {
|
||||
const didRetry = await this._handleRetryableError(msg);
|
||||
if (didRetry) return; // Retry was initiated, don't proceed to compaction
|
||||
} else if (this._retryAttempt > 0) {
|
||||
// Previous retry succeeded - emit success event and reset counter
|
||||
this._emit({
|
||||
type: "auto_retry_end",
|
||||
success: true,
|
||||
attempt: this._retryAttempt,
|
||||
});
|
||||
this._retryAttempt = 0;
|
||||
// Resolve the retry promise so waitForRetry() completes
|
||||
this._resolveRetry();
|
||||
}
|
||||
|
||||
await this._handleAgentEndCompaction(msg);
|
||||
}
|
||||
};
|
||||
|
||||
/** Resolve the pending retry promise */
|
||||
private _resolveRetry(): void {
|
||||
if (this._retryResolve) {
|
||||
this._retryResolve();
|
||||
this._retryResolve = null;
|
||||
this._retryPromise = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Extract text content from a message */
|
||||
private _getUserMessageText(message: Message): string {
|
||||
if (message.role !== "user") return "";
|
||||
|
|
@ -379,6 +413,7 @@ export class AgentSession {
|
|||
const expandedText = expandCommands ? expandSlashCommand(text, [...this._fileCommands]) : text;
|
||||
|
||||
await this.agent.prompt(expandedText, options?.attachments);
|
||||
await this.waitForRetry();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -419,6 +454,7 @@ export class AgentSession {
|
|||
* Abort current operation and wait for agent to become idle.
|
||||
*/
|
||||
async abort(): Promise<void> {
|
||||
this.abortRetry();
|
||||
this.agent.abort();
|
||||
await this.agent.waitForIdle();
|
||||
}
|
||||
|
|
@ -784,6 +820,159 @@ export class AgentSession {
|
|||
return this.settingsManager.getCompactionEnabled();
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Auto-Retry
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Check if an error is retryable (overloaded, rate limit, server errors).
|
||||
* Context overflow errors are NOT retryable (handled by compaction instead).
|
||||
*/
|
||||
private _isRetryableError(message: AssistantMessage): boolean {
|
||||
if (message.stopReason !== "error" || !message.errorMessage) return false;
|
||||
|
||||
// Context overflow is handled by compaction, not retry
|
||||
const contextWindow = this.model?.contextWindow ?? 0;
|
||||
if (isContextOverflow(message, contextWindow)) return false;
|
||||
|
||||
const err = message.errorMessage;
|
||||
// Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable
|
||||
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error/i.test(
|
||||
err,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle retryable errors with exponential backoff.
|
||||
* @returns true if retry was initiated, false if max retries exceeded or disabled
|
||||
*/
|
||||
private async _handleRetryableError(message: AssistantMessage): Promise<boolean> {
|
||||
const settings = this.settingsManager.getRetrySettings();
|
||||
if (!settings.enabled) return false;
|
||||
|
||||
this._retryAttempt++;
|
||||
|
||||
// Create retry promise on first attempt so waitForRetry() can await it
|
||||
if (this._retryAttempt === 1 && !this._retryPromise) {
|
||||
this._retryPromise = new Promise((resolve) => {
|
||||
this._retryResolve = resolve;
|
||||
});
|
||||
}
|
||||
|
||||
if (this._retryAttempt > settings.maxRetries) {
|
||||
// Max retries exceeded, emit final failure and reset
|
||||
this._emit({
|
||||
type: "auto_retry_end",
|
||||
success: false,
|
||||
attempt: this._retryAttempt - 1,
|
||||
finalError: message.errorMessage,
|
||||
});
|
||||
this._retryAttempt = 0;
|
||||
this._resolveRetry(); // Resolve so waitForRetry() completes
|
||||
return false;
|
||||
}
|
||||
|
||||
const delayMs = settings.baseDelayMs * 2 ** (this._retryAttempt - 1);
|
||||
|
||||
this._emit({
|
||||
type: "auto_retry_start",
|
||||
attempt: this._retryAttempt,
|
||||
maxAttempts: settings.maxRetries,
|
||||
delayMs,
|
||||
errorMessage: message.errorMessage || "Unknown error",
|
||||
});
|
||||
|
||||
// Remove error message from agent state (keep in session for history)
|
||||
const messages = this.agent.state.messages;
|
||||
if (messages.length > 0 && messages[messages.length - 1].role === "assistant") {
|
||||
this.agent.replaceMessages(messages.slice(0, -1));
|
||||
}
|
||||
|
||||
// Wait with exponential backoff (abortable)
|
||||
this._retryAbortController = new AbortController();
|
||||
try {
|
||||
await this._sleep(delayMs, this._retryAbortController.signal);
|
||||
} catch {
|
||||
// Aborted during sleep - emit end event so UI can clean up
|
||||
const attempt = this._retryAttempt;
|
||||
this._retryAttempt = 0;
|
||||
this._retryAbortController = null;
|
||||
this._emit({
|
||||
type: "auto_retry_end",
|
||||
success: false,
|
||||
attempt,
|
||||
finalError: "Retry cancelled",
|
||||
});
|
||||
this._resolveRetry();
|
||||
return false;
|
||||
}
|
||||
this._retryAbortController = null;
|
||||
|
||||
// Retry via continue() - use setTimeout to break out of event handler chain
|
||||
setTimeout(() => {
|
||||
this.agent.continue().catch(() => {
|
||||
// Retry failed - will be caught by next agent_end
|
||||
});
|
||||
}, 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleep helper that respects abort signal.
|
||||
*/
|
||||
private _sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (signal?.aborted) {
|
||||
reject(new Error("Aborted"));
|
||||
return;
|
||||
}
|
||||
|
||||
const timeout = setTimeout(resolve, ms);
|
||||
|
||||
signal?.addEventListener("abort", () => {
|
||||
clearTimeout(timeout);
|
||||
reject(new Error("Aborted"));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel in-progress retry.
|
||||
*/
|
||||
abortRetry(): void {
|
||||
this._retryAbortController?.abort();
|
||||
this._retryAttempt = 0;
|
||||
this._resolveRetry();
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for any in-progress retry to complete.
|
||||
* Returns immediately if no retry is in progress.
|
||||
*/
|
||||
private async waitForRetry(): Promise<void> {
|
||||
if (this._retryPromise) {
|
||||
await this._retryPromise;
|
||||
}
|
||||
}
|
||||
|
||||
/** Whether auto-retry is currently in progress */
|
||||
get isRetrying(): boolean {
|
||||
return this._retryPromise !== null;
|
||||
}
|
||||
|
||||
/** Whether auto-retry is enabled */
|
||||
get autoRetryEnabled(): boolean {
|
||||
return this.settingsManager.getRetryEnabled();
|
||||
}
|
||||
|
||||
/**
|
||||
* Toggle auto-retry setting.
|
||||
*/
|
||||
setAutoRetryEnabled(enabled: boolean): void {
|
||||
this.settingsManager.setRetryEnabled(enabled);
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Bash Execution
|
||||
// =========================================================================
|
||||
|
|
|
|||
|
|
@ -8,6 +8,12 @@ export interface CompactionSettings {
|
|||
keepRecentTokens?: number; // default: 20000
|
||||
}
|
||||
|
||||
export interface RetrySettings {
|
||||
enabled?: boolean; // default: true
|
||||
maxRetries?: number; // default: 3
|
||||
baseDelayMs?: number; // default: 2000 (exponential backoff: 2s, 4s, 8s)
|
||||
}
|
||||
|
||||
export interface Settings {
|
||||
lastChangelogVersion?: string;
|
||||
defaultProvider?: string;
|
||||
|
|
@ -16,6 +22,7 @@ export interface Settings {
|
|||
queueMode?: "all" | "one-at-a-time";
|
||||
theme?: string;
|
||||
compaction?: CompactionSettings;
|
||||
retry?: RetrySettings;
|
||||
hideThinkingBlock?: boolean;
|
||||
shellPath?: string; // Custom shell path (e.g., for Cygwin users on Windows)
|
||||
collapseChangelog?: boolean; // Show condensed changelog after update (use /changelog for full)
|
||||
|
|
@ -149,6 +156,26 @@ export class SettingsManager {
|
|||
};
|
||||
}
|
||||
|
||||
getRetryEnabled(): boolean {
|
||||
return this.settings.retry?.enabled ?? true;
|
||||
}
|
||||
|
||||
setRetryEnabled(enabled: boolean): void {
|
||||
if (!this.settings.retry) {
|
||||
this.settings.retry = {};
|
||||
}
|
||||
this.settings.retry.enabled = enabled;
|
||||
this.save();
|
||||
}
|
||||
|
||||
getRetrySettings(): { enabled: boolean; maxRetries: number; baseDelayMs: number } {
|
||||
return {
|
||||
enabled: this.getRetryEnabled(),
|
||||
maxRetries: this.settings.retry?.maxRetries ?? 3,
|
||||
baseDelayMs: this.settings.retry?.baseDelayMs ?? 2000,
|
||||
};
|
||||
}
|
||||
|
||||
getHideThinkingBlock(): boolean {
|
||||
return this.settings.hideThinkingBlock ?? false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -102,6 +102,10 @@ export class InteractiveMode {
|
|||
private autoCompactionLoader: Loader | null = null;
|
||||
private autoCompactionEscapeHandler?: () => void;
|
||||
|
||||
// Auto-retry state
|
||||
private retryLoader: Loader | null = null;
|
||||
private retryEscapeHandler?: () => void;
|
||||
|
||||
// Hook UI state
|
||||
private hookSelector: HookSelectorComponent | null = null;
|
||||
private hookInput: HookInputComponent | null = null;
|
||||
|
|
@ -806,6 +810,46 @@ export class InteractiveMode {
|
|||
this.ui.requestRender();
|
||||
break;
|
||||
}
|
||||
|
||||
case "auto_retry_start": {
|
||||
// Set up escape to abort retry
|
||||
this.retryEscapeHandler = this.editor.onEscape;
|
||||
this.editor.onEscape = () => {
|
||||
this.session.abortRetry();
|
||||
};
|
||||
// Show retry indicator
|
||||
this.statusContainer.clear();
|
||||
const delaySeconds = Math.round(event.delayMs / 1000);
|
||||
this.retryLoader = new Loader(
|
||||
this.ui,
|
||||
(spinner) => theme.fg("warning", spinner),
|
||||
(text) => theme.fg("muted", text),
|
||||
`Retrying (${event.attempt}/${event.maxAttempts}) in ${delaySeconds}s... (esc to cancel)`,
|
||||
);
|
||||
this.statusContainer.addChild(this.retryLoader);
|
||||
this.ui.requestRender();
|
||||
break;
|
||||
}
|
||||
|
||||
case "auto_retry_end": {
|
||||
// Restore escape handler
|
||||
if (this.retryEscapeHandler) {
|
||||
this.editor.onEscape = this.retryEscapeHandler;
|
||||
this.retryEscapeHandler = undefined;
|
||||
}
|
||||
// Stop loader
|
||||
if (this.retryLoader) {
|
||||
this.retryLoader.stop();
|
||||
this.retryLoader = null;
|
||||
this.statusContainer.clear();
|
||||
}
|
||||
// Show error only on final failure (success shows normal response)
|
||||
if (!event.success) {
|
||||
this.showError(`Retry failed after ${event.attempt} attempts: ${event.finalError || "Unknown error"}`);
|
||||
}
|
||||
this.ui.requestRender();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -264,6 +264,20 @@ export class RpcClient {
|
|||
await this.send({ type: "set_auto_compaction", enabled });
|
||||
}
|
||||
|
||||
/**
|
||||
* Set auto-retry enabled/disabled.
|
||||
*/
|
||||
async setAutoRetry(enabled: boolean): Promise<void> {
|
||||
await this.send({ type: "set_auto_retry", enabled });
|
||||
}
|
||||
|
||||
/**
|
||||
* Abort in-progress retry.
|
||||
*/
|
||||
async abortRetry(): Promise<void> {
|
||||
await this.send({ type: "abort_retry" });
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a bash command.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -270,6 +270,20 @@ export async function runRpcMode(session: AgentSession): Promise<never> {
|
|||
return success(id, "set_auto_compaction");
|
||||
}
|
||||
|
||||
// =================================================================
|
||||
// Retry
|
||||
// =================================================================
|
||||
|
||||
case "set_auto_retry": {
|
||||
session.setAutoRetryEnabled(command.enabled);
|
||||
return success(id, "set_auto_retry");
|
||||
}
|
||||
|
||||
case "abort_retry": {
|
||||
session.abortRetry();
|
||||
return success(id, "abort_retry");
|
||||
}
|
||||
|
||||
// =================================================================
|
||||
// Bash
|
||||
// =================================================================
|
||||
|
|
|
|||
|
|
@ -40,6 +40,10 @@ export type RpcCommand =
|
|||
| { id?: string; type: "compact"; customInstructions?: string }
|
||||
| { id?: string; type: "set_auto_compaction"; enabled: boolean }
|
||||
|
||||
// Retry
|
||||
| { id?: string; type: "set_auto_retry"; enabled: boolean }
|
||||
| { id?: string; type: "abort_retry" }
|
||||
|
||||
// Bash
|
||||
| { id?: string; type: "bash"; command: string }
|
||||
| { id?: string; type: "abort_bash" }
|
||||
|
|
@ -127,6 +131,10 @@ export type RpcResponse =
|
|||
| { id?: string; type: "response"; command: "compact"; success: true; data: CompactionResult }
|
||||
| { id?: string; type: "response"; command: "set_auto_compaction"; success: true }
|
||||
|
||||
// Retry
|
||||
| { id?: string; type: "response"; command: "set_auto_retry"; success: true }
|
||||
| { id?: string; type: "response"; command: "abort_retry"; success: true }
|
||||
|
||||
// Bash
|
||||
| { id?: string; type: "response"; command: "bash"; success: true; data: BashResult }
|
||||
| { id?: string; type: "response"; command: "abort_bash"; success: true }
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue