diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 492b5b05..7d766b74 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Changed + +- **Anthropic SDK retries disabled**: Set `maxRetries: 0` on Anthropic client to allow application-level retry handling. The SDK's built-in retries were interfering with coding-agent's retry logic. ([#157](https://github.com/badlogic/pi-mono/issues/157)) + ## [0.18.1] - 2025-12-10 ### Added diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts index a1791b8a..08d4e4f6 100644 --- a/packages/ai/src/providers/anthropic.ts +++ b/packages/ai/src/providers/anthropic.ts @@ -295,6 +295,7 @@ function createClient( baseURL: model.baseUrl, defaultHeaders, dangerouslyAllowBrowser: true, + maxRetries: 0, // Disable SDK retries, handled by coding-agent }); return { client, isOAuthToken: true }; @@ -311,6 +312,7 @@ function createClient( baseURL: model.baseUrl, dangerouslyAllowBrowser: true, defaultHeaders, + maxRetries: 0, // Disable SDK retries, handled by coding-agent }); return { client, isOAuthToken: false }; diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 769ce7e5..116ff1af 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -4,6 +4,8 @@ ### Added +- **Auto-retry on transient errors**: Automatically retries requests when providers return overloaded, rate limit, or server errors (429, 500, 502, 503, 504). Uses exponential backoff (2s, 4s, 8s). Shows retry status in TUI with option to cancel via Escape. Configurable in `settings.json` via `retry.enabled`, `retry.maxRetries`, `retry.baseDelayMs`. RPC mode emits `auto_retry_start` and `auto_retry_end` events. ([#157](https://github.com/badlogic/pi-mono/issues/157)) + - **HTML export line numbers**: Read tool calls in HTML exports now display line number ranges (e.g., `file.txt:10-20`) when offset/limit parameters are used, matching the TUI display format. Line numbers appear in yellow color for better visibility. ([#166](https://github.com/badlogic/pi-mono/issues/166)) ### Fixed diff --git a/packages/coding-agent/README.md b/packages/coding-agent/README.md index 2ac5aa5e..715d3b54 100644 --- a/packages/coding-agent/README.md +++ b/packages/coding-agent/README.md @@ -524,13 +524,23 @@ See [Hooks Documentation](docs/hooks.md) for full API reference. "shellPath": "C:\\path\\to\\bash.exe", "queueMode": "one-at-a-time", "compaction": { - "enabled": false, + "enabled": true, "reserveTokens": 16384, "keepRecentTokens": 20000 + }, + "retry": { + "enabled": true, + "maxRetries": 3, + "baseDelayMs": 2000 } } ``` +**Retry settings:** +- `enabled`: Auto-retry on transient errors (overloaded, rate limit, 5xx). Default: `true` +- `maxRetries`: Maximum retry attempts. Default: `3` +- `baseDelayMs`: Base delay for exponential backoff (2s, 4s, 8s). Default: `2000` + --- ## CLI Reference diff --git a/packages/coding-agent/docs/rpc.md b/packages/coding-agent/docs/rpc.md index 67c17942..95259d68 100644 --- a/packages/coding-agent/docs/rpc.md +++ b/packages/coding-agent/docs/rpc.md @@ -303,6 +303,34 @@ Response: {"type": "response", "command": "set_auto_compaction", "success": true} ``` +### Retry + +#### set_auto_retry + +Enable or disable automatic retry on transient errors (overloaded, rate limit, 5xx). + +```json +{"type": "set_auto_retry", "enabled": true} +``` + +Response: +```json +{"type": "response", "command": "set_auto_retry", "success": true} +``` + +#### abort_retry + +Abort an in-progress retry (cancel the delay and stop retrying). + +```json +{"type": "abort_retry"} +``` + +Response: +```json +{"type": "response", "command": "abort_retry", "success": true} +``` + ### Bash #### bash @@ -528,6 +556,8 @@ Events are streamed to stdout as JSON lines during agent operation. Events do NO | `tool_execution_end` | Tool completes | | `auto_compaction_start` | Auto-compaction begins | | `auto_compaction_end` | Auto-compaction completes | +| `auto_retry_start` | Auto-retry begins (after transient error) | +| `auto_retry_end` | Auto-retry completes (success or final failure) | ### agent_start @@ -664,6 +694,38 @@ Emitted when automatic compaction runs (when context is nearly full). If compaction was aborted, `result` is `null` and `aborted` is `true`. +### auto_retry_start / auto_retry_end + +Emitted when automatic retry is triggered after a transient error (overloaded, rate limit, 5xx). + +```json +{ + "type": "auto_retry_start", + "attempt": 1, + "maxAttempts": 3, + "delayMs": 2000, + "errorMessage": "529 {\"type\":\"error\",\"error\":{\"type\":\"overloaded_error\",\"message\":\"Overloaded\"}}" +} +``` + +```json +{ + "type": "auto_retry_end", + "success": true, + "attempt": 2 +} +``` + +On final failure (max retries exceeded): +```json +{ + "type": "auto_retry_end", + "success": false, + "attempt": 3, + "finalError": "529 overloaded_error: Overloaded" +} +``` + ## Error Handling Failed commands return a response with `success: false`: diff --git a/packages/coding-agent/src/core/agent-session.ts b/packages/coding-agent/src/core/agent-session.ts index 91f624f1..1c55fba5 100644 --- a/packages/coding-agent/src/core/agent-session.ts +++ b/packages/coding-agent/src/core/agent-session.ts @@ -31,7 +31,9 @@ import { expandSlashCommand, type FileSlashCommand } from "./slash-commands.js"; export type AgentSessionEvent = | AgentEvent | { type: "auto_compaction_start"; reason: "threshold" | "overflow" } - | { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean; willRetry: boolean }; + | { type: "auto_compaction_end"; result: CompactionResult | null; aborted: boolean; willRetry: boolean } + | { type: "auto_retry_start"; attempt: number; maxAttempts: number; delayMs: number; errorMessage: string } + | { type: "auto_retry_end"; success: boolean; attempt: number; finalError?: string }; /** Listener function for agent session events */ export type AgentSessionEventListener = (event: AgentSessionEvent) => void; @@ -116,6 +118,12 @@ export class AgentSession { private _compactionAbortController: AbortController | null = null; private _autoCompactionAbortController: AbortController | null = null; + // Retry state + private _retryAbortController: AbortController | null = null; + private _retryAttempt = 0; + private _retryPromise: Promise | null = null; + private _retryResolve: (() => void) | null = null; + // Bash execution state private _bashAbortController: AbortController | null = null; private _pendingBashMessages: BashExecutionMessage[] = []; @@ -184,14 +192,40 @@ export class AgentSession { } } - // Check auto-compaction after agent completes + // Check auto-retry and auto-compaction after agent completes if (event.type === "agent_end" && this._lastAssistantMessage) { const msg = this._lastAssistantMessage; this._lastAssistantMessage = null; + + // Check for retryable errors first (overloaded, rate limit, server errors) + if (this._isRetryableError(msg)) { + const didRetry = await this._handleRetryableError(msg); + if (didRetry) return; // Retry was initiated, don't proceed to compaction + } else if (this._retryAttempt > 0) { + // Previous retry succeeded - emit success event and reset counter + this._emit({ + type: "auto_retry_end", + success: true, + attempt: this._retryAttempt, + }); + this._retryAttempt = 0; + // Resolve the retry promise so waitForRetry() completes + this._resolveRetry(); + } + await this._handleAgentEndCompaction(msg); } }; + /** Resolve the pending retry promise */ + private _resolveRetry(): void { + if (this._retryResolve) { + this._retryResolve(); + this._retryResolve = null; + this._retryPromise = null; + } + } + /** Extract text content from a message */ private _getUserMessageText(message: Message): string { if (message.role !== "user") return ""; @@ -379,6 +413,7 @@ export class AgentSession { const expandedText = expandCommands ? expandSlashCommand(text, [...this._fileCommands]) : text; await this.agent.prompt(expandedText, options?.attachments); + await this.waitForRetry(); } /** @@ -419,6 +454,7 @@ export class AgentSession { * Abort current operation and wait for agent to become idle. */ async abort(): Promise { + this.abortRetry(); this.agent.abort(); await this.agent.waitForIdle(); } @@ -784,6 +820,159 @@ export class AgentSession { return this.settingsManager.getCompactionEnabled(); } + // ========================================================================= + // Auto-Retry + // ========================================================================= + + /** + * Check if an error is retryable (overloaded, rate limit, server errors). + * Context overflow errors are NOT retryable (handled by compaction instead). + */ + private _isRetryableError(message: AssistantMessage): boolean { + if (message.stopReason !== "error" || !message.errorMessage) return false; + + // Context overflow is handled by compaction, not retry + const contextWindow = this.model?.contextWindow ?? 0; + if (isContextOverflow(message, contextWindow)) return false; + + const err = message.errorMessage; + // Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable + return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error/i.test( + err, + ); + } + + /** + * Handle retryable errors with exponential backoff. + * @returns true if retry was initiated, false if max retries exceeded or disabled + */ + private async _handleRetryableError(message: AssistantMessage): Promise { + const settings = this.settingsManager.getRetrySettings(); + if (!settings.enabled) return false; + + this._retryAttempt++; + + // Create retry promise on first attempt so waitForRetry() can await it + if (this._retryAttempt === 1 && !this._retryPromise) { + this._retryPromise = new Promise((resolve) => { + this._retryResolve = resolve; + }); + } + + if (this._retryAttempt > settings.maxRetries) { + // Max retries exceeded, emit final failure and reset + this._emit({ + type: "auto_retry_end", + success: false, + attempt: this._retryAttempt - 1, + finalError: message.errorMessage, + }); + this._retryAttempt = 0; + this._resolveRetry(); // Resolve so waitForRetry() completes + return false; + } + + const delayMs = settings.baseDelayMs * 2 ** (this._retryAttempt - 1); + + this._emit({ + type: "auto_retry_start", + attempt: this._retryAttempt, + maxAttempts: settings.maxRetries, + delayMs, + errorMessage: message.errorMessage || "Unknown error", + }); + + // Remove error message from agent state (keep in session for history) + const messages = this.agent.state.messages; + if (messages.length > 0 && messages[messages.length - 1].role === "assistant") { + this.agent.replaceMessages(messages.slice(0, -1)); + } + + // Wait with exponential backoff (abortable) + this._retryAbortController = new AbortController(); + try { + await this._sleep(delayMs, this._retryAbortController.signal); + } catch { + // Aborted during sleep - emit end event so UI can clean up + const attempt = this._retryAttempt; + this._retryAttempt = 0; + this._retryAbortController = null; + this._emit({ + type: "auto_retry_end", + success: false, + attempt, + finalError: "Retry cancelled", + }); + this._resolveRetry(); + return false; + } + this._retryAbortController = null; + + // Retry via continue() - use setTimeout to break out of event handler chain + setTimeout(() => { + this.agent.continue().catch(() => { + // Retry failed - will be caught by next agent_end + }); + }, 0); + + return true; + } + + /** + * Sleep helper that respects abort signal. + */ + private _sleep(ms: number, signal?: AbortSignal): Promise { + return new Promise((resolve, reject) => { + if (signal?.aborted) { + reject(new Error("Aborted")); + return; + } + + const timeout = setTimeout(resolve, ms); + + signal?.addEventListener("abort", () => { + clearTimeout(timeout); + reject(new Error("Aborted")); + }); + }); + } + + /** + * Cancel in-progress retry. + */ + abortRetry(): void { + this._retryAbortController?.abort(); + this._retryAttempt = 0; + this._resolveRetry(); + } + + /** + * Wait for any in-progress retry to complete. + * Returns immediately if no retry is in progress. + */ + private async waitForRetry(): Promise { + if (this._retryPromise) { + await this._retryPromise; + } + } + + /** Whether auto-retry is currently in progress */ + get isRetrying(): boolean { + return this._retryPromise !== null; + } + + /** Whether auto-retry is enabled */ + get autoRetryEnabled(): boolean { + return this.settingsManager.getRetryEnabled(); + } + + /** + * Toggle auto-retry setting. + */ + setAutoRetryEnabled(enabled: boolean): void { + this.settingsManager.setRetryEnabled(enabled); + } + // ========================================================================= // Bash Execution // ========================================================================= diff --git a/packages/coding-agent/src/core/settings-manager.ts b/packages/coding-agent/src/core/settings-manager.ts index 0582a99a..129f9c5f 100644 --- a/packages/coding-agent/src/core/settings-manager.ts +++ b/packages/coding-agent/src/core/settings-manager.ts @@ -8,6 +8,12 @@ export interface CompactionSettings { keepRecentTokens?: number; // default: 20000 } +export interface RetrySettings { + enabled?: boolean; // default: true + maxRetries?: number; // default: 3 + baseDelayMs?: number; // default: 2000 (exponential backoff: 2s, 4s, 8s) +} + export interface Settings { lastChangelogVersion?: string; defaultProvider?: string; @@ -16,6 +22,7 @@ export interface Settings { queueMode?: "all" | "one-at-a-time"; theme?: string; compaction?: CompactionSettings; + retry?: RetrySettings; hideThinkingBlock?: boolean; shellPath?: string; // Custom shell path (e.g., for Cygwin users on Windows) collapseChangelog?: boolean; // Show condensed changelog after update (use /changelog for full) @@ -149,6 +156,26 @@ export class SettingsManager { }; } + getRetryEnabled(): boolean { + return this.settings.retry?.enabled ?? true; + } + + setRetryEnabled(enabled: boolean): void { + if (!this.settings.retry) { + this.settings.retry = {}; + } + this.settings.retry.enabled = enabled; + this.save(); + } + + getRetrySettings(): { enabled: boolean; maxRetries: number; baseDelayMs: number } { + return { + enabled: this.getRetryEnabled(), + maxRetries: this.settings.retry?.maxRetries ?? 3, + baseDelayMs: this.settings.retry?.baseDelayMs ?? 2000, + }; + } + getHideThinkingBlock(): boolean { return this.settings.hideThinkingBlock ?? false; } diff --git a/packages/coding-agent/src/modes/interactive/interactive-mode.ts b/packages/coding-agent/src/modes/interactive/interactive-mode.ts index 1817b23a..9fae0e05 100644 --- a/packages/coding-agent/src/modes/interactive/interactive-mode.ts +++ b/packages/coding-agent/src/modes/interactive/interactive-mode.ts @@ -102,6 +102,10 @@ export class InteractiveMode { private autoCompactionLoader: Loader | null = null; private autoCompactionEscapeHandler?: () => void; + // Auto-retry state + private retryLoader: Loader | null = null; + private retryEscapeHandler?: () => void; + // Hook UI state private hookSelector: HookSelectorComponent | null = null; private hookInput: HookInputComponent | null = null; @@ -806,6 +810,46 @@ export class InteractiveMode { this.ui.requestRender(); break; } + + case "auto_retry_start": { + // Set up escape to abort retry + this.retryEscapeHandler = this.editor.onEscape; + this.editor.onEscape = () => { + this.session.abortRetry(); + }; + // Show retry indicator + this.statusContainer.clear(); + const delaySeconds = Math.round(event.delayMs / 1000); + this.retryLoader = new Loader( + this.ui, + (spinner) => theme.fg("warning", spinner), + (text) => theme.fg("muted", text), + `Retrying (${event.attempt}/${event.maxAttempts}) in ${delaySeconds}s... (esc to cancel)`, + ); + this.statusContainer.addChild(this.retryLoader); + this.ui.requestRender(); + break; + } + + case "auto_retry_end": { + // Restore escape handler + if (this.retryEscapeHandler) { + this.editor.onEscape = this.retryEscapeHandler; + this.retryEscapeHandler = undefined; + } + // Stop loader + if (this.retryLoader) { + this.retryLoader.stop(); + this.retryLoader = null; + this.statusContainer.clear(); + } + // Show error only on final failure (success shows normal response) + if (!event.success) { + this.showError(`Retry failed after ${event.attempt} attempts: ${event.finalError || "Unknown error"}`); + } + this.ui.requestRender(); + break; + } } } diff --git a/packages/coding-agent/src/modes/rpc/rpc-client.ts b/packages/coding-agent/src/modes/rpc/rpc-client.ts index e269bea1..99b95f75 100644 --- a/packages/coding-agent/src/modes/rpc/rpc-client.ts +++ b/packages/coding-agent/src/modes/rpc/rpc-client.ts @@ -264,6 +264,20 @@ export class RpcClient { await this.send({ type: "set_auto_compaction", enabled }); } + /** + * Set auto-retry enabled/disabled. + */ + async setAutoRetry(enabled: boolean): Promise { + await this.send({ type: "set_auto_retry", enabled }); + } + + /** + * Abort in-progress retry. + */ + async abortRetry(): Promise { + await this.send({ type: "abort_retry" }); + } + /** * Execute a bash command. */ diff --git a/packages/coding-agent/src/modes/rpc/rpc-mode.ts b/packages/coding-agent/src/modes/rpc/rpc-mode.ts index 827dbb98..f4ecd23f 100644 --- a/packages/coding-agent/src/modes/rpc/rpc-mode.ts +++ b/packages/coding-agent/src/modes/rpc/rpc-mode.ts @@ -270,6 +270,20 @@ export async function runRpcMode(session: AgentSession): Promise { return success(id, "set_auto_compaction"); } + // ================================================================= + // Retry + // ================================================================= + + case "set_auto_retry": { + session.setAutoRetryEnabled(command.enabled); + return success(id, "set_auto_retry"); + } + + case "abort_retry": { + session.abortRetry(); + return success(id, "abort_retry"); + } + // ================================================================= // Bash // ================================================================= diff --git a/packages/coding-agent/src/modes/rpc/rpc-types.ts b/packages/coding-agent/src/modes/rpc/rpc-types.ts index 4925a018..f70153d3 100644 --- a/packages/coding-agent/src/modes/rpc/rpc-types.ts +++ b/packages/coding-agent/src/modes/rpc/rpc-types.ts @@ -40,6 +40,10 @@ export type RpcCommand = | { id?: string; type: "compact"; customInstructions?: string } | { id?: string; type: "set_auto_compaction"; enabled: boolean } + // Retry + | { id?: string; type: "set_auto_retry"; enabled: boolean } + | { id?: string; type: "abort_retry" } + // Bash | { id?: string; type: "bash"; command: string } | { id?: string; type: "abort_bash" } @@ -127,6 +131,10 @@ export type RpcResponse = | { id?: string; type: "response"; command: "compact"; success: true; data: CompactionResult } | { id?: string; type: "response"; command: "set_auto_compaction"; success: true } + // Retry + | { id?: string; type: "response"; command: "set_auto_retry"; success: true } + | { id?: string; type: "response"; command: "abort_retry"; success: true } + // Bash | { id?: string; type: "response"; command: "bash"; success: true; data: BashResult } | { id?: string; type: "response"; command: "abort_bash"; success: true }