diff --git a/packages/agent/CHANGELOG.md b/packages/agent/CHANGELOG.md index e0035d25..ba439f2a 100644 --- a/packages/agent/CHANGELOG.md +++ b/packages/agent/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- Added `maxRetryDelayMs` option to `AgentOptions` to cap server-requested retry delays. Passed through to the underlying stream function. ([#1123](https://github.com/badlogic/pi-mono/issues/1123)) + ## [0.50.7] - 2026-01-31 ## [0.50.6] - 2026-01-30 diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts index c79f63e9..e99630da 100644 --- a/packages/agent/src/agent.ts +++ b/packages/agent/src/agent.ts @@ -77,6 +77,14 @@ export interface AgentOptions { * Custom token budgets for thinking levels (token-based providers only). */ thinkingBudgets?: ThinkingBudgets; + + /** + * Maximum delay in milliseconds to wait for a retry when the server requests a long wait. + * If the server's requested delay exceeds this value, the request fails immediately, + * allowing higher-level retry logic to handle it with user visibility. + * Default: 60000 (60 seconds). Set to 0 to disable the cap. + */ + maxRetryDelayMs?: number; } export class Agent { @@ -106,6 +114,7 @@ export class Agent { private runningPrompt?: Promise; private resolveRunningPrompt?: () => void; private _thinkingBudgets?: ThinkingBudgets; + private _maxRetryDelayMs?: number; constructor(opts: AgentOptions = {}) { this._state = { ...this._state, ...opts.initialState }; @@ -117,6 +126,7 @@ export class Agent { this._sessionId = opts.sessionId; this.getApiKey = opts.getApiKey; this._thinkingBudgets = opts.thinkingBudgets; + this._maxRetryDelayMs = opts.maxRetryDelayMs; } /** @@ -148,6 +158,21 @@ export class Agent { this._thinkingBudgets = value; } + /** + * Get the current max retry delay in milliseconds. + */ + get maxRetryDelayMs(): number | undefined { + return this._maxRetryDelayMs; + } + + /** + * Set the maximum delay to wait for server-requested retries. + * Set to 0 to disable the cap. + */ + set maxRetryDelayMs(value: number | undefined) { + this._maxRetryDelayMs = value; + } + get state(): AgentState { return this._state; } @@ -333,6 +358,7 @@ export class Agent { reasoning, sessionId: this._sessionId, thinkingBudgets: this._thinkingBudgets, + maxRetryDelayMs: this._maxRetryDelayMs, convertToLlm: this.convertToLlm, transformContext: this.transformContext, getApiKey: this.getApiKey, diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 5ab1e9ca..9c725217 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- Added `maxRetryDelayMs` option to `StreamOptions` to cap server-requested retry delays. When a provider (e.g., Google Gemini CLI) requests a delay longer than this value, the request fails immediately with an informative error instead of waiting silently. Default: 60000ms (60 seconds). Set to 0 to disable the cap. ([#1123](https://github.com/badlogic/pi-mono/issues/1123)) + ## [0.50.7] - 2026-01-31 ## [0.50.6] - 2026-01-30 diff --git a/packages/ai/src/providers/google-gemini-cli.ts b/packages/ai/src/providers/google-gemini-cli.ts index 5e6f5d58..7d655a21 100644 --- a/packages/ai/src/providers/google-gemini-cli.ts +++ b/packages/ai/src/providers/google-gemini-cli.ts @@ -473,6 +473,16 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli", GoogleGe // Use server-provided delay or exponential backoff const serverDelay = extractRetryDelay(errorText, response); const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt; + + // Check if server delay exceeds max allowed (default: 60s) + const maxDelayMs = options?.maxRetryDelayMs ?? 60000; + if (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) { + const delaySeconds = Math.ceil(serverDelay / 1000); + throw new Error( + `Server requested ${delaySeconds}s retry delay (max: ${Math.ceil(maxDelayMs / 1000)}s). ${extractErrorMessage(errorText)}`, + ); + } + await sleep(delayMs, options?.signal); continue; } diff --git a/packages/ai/src/providers/simple-options.ts b/packages/ai/src/providers/simple-options.ts index be90c7d9..f6cbfa65 100644 --- a/packages/ai/src/providers/simple-options.ts +++ b/packages/ai/src/providers/simple-options.ts @@ -9,6 +9,7 @@ export function buildBaseOptions(model: Model, options?: SimpleStreamOption sessionId: options?.sessionId, headers: options?.headers, onPayload: options?.onPayload, + maxRetryDelayMs: options?.maxRetryDelayMs, }; } diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index 3b9822f2..8cdf4dc1 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -72,6 +72,14 @@ export interface StreamOptions { * Not supported by all providers (e.g., AWS Bedrock uses SDK auth). */ headers?: Record; + /** + * Maximum delay in milliseconds to wait for a retry when the server requests a long wait. + * If the server's requested delay exceeds this value, the request fails immediately + * with an error containing the requested delay, allowing higher-level retry logic + * to handle it with user visibility. + * Default: 60000 (60 seconds). Set to 0 to disable the cap. + */ + maxRetryDelayMs?: number; } export type ProviderStreamOptions = StreamOptions & Record; diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 5366ead3..75eed68a 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -5,6 +5,7 @@ ### Added - Added `newSession`, `tree`, and `fork` keybinding actions for `/new`, `/tree`, and `/fork` commands. All unbound by default. ([#1114](https://github.com/badlogic/pi-mono/pull/1114) by [@juanibiapina](https://github.com/juanibiapina)) +- Added `retry.maxDelayMs` setting to cap maximum server-requested retry delay. When a provider requests a longer delay (e.g., Google's "quota will reset after 5h"), the request fails immediately with an informative error instead of waiting silently. Default: 60000ms (60 seconds). ([#1123](https://github.com/badlogic/pi-mono/issues/1123)) ### Fixed diff --git a/packages/coding-agent/docs/settings.md b/packages/coding-agent/docs/settings.md index 1690dec6..7bcf58e0 100644 --- a/packages/coding-agent/docs/settings.md +++ b/packages/coding-agent/docs/settings.md @@ -77,13 +77,17 @@ Edit directly or use `/settings` for common options. | `retry.enabled` | boolean | `true` | Enable automatic retry on transient errors | | `retry.maxRetries` | number | `3` | Maximum retry attempts | | `retry.baseDelayMs` | number | `2000` | Base delay for exponential backoff (2s, 4s, 8s) | +| `retry.maxDelayMs` | number | `60000` | Max server-requested delay before failing (60s) | + +When a provider requests a retry delay longer than `maxDelayMs` (e.g., Google's "quota will reset after 5h"), the request fails immediately with an informative error instead of waiting silently. Set to `0` to disable the cap. ```json { "retry": { "enabled": true, "maxRetries": 3, - "baseDelayMs": 2000 + "baseDelayMs": 2000, + "maxDelayMs": 60000 } } ``` diff --git a/packages/coding-agent/src/core/agent-session.ts b/packages/coding-agent/src/core/agent-session.ts index f85ce35c..d01e44ce 100644 --- a/packages/coding-agent/src/core/agent-session.ts +++ b/packages/coding-agent/src/core/agent-session.ts @@ -1901,8 +1901,8 @@ export class AgentSession { if (isContextOverflow(message, contextWindow)) return false; const err = message.errorMessage; - // Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable, connection errors, fetch failed, terminated - return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated/i.test( + // Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable, connection errors, fetch failed, terminated, retry delay exceeded + return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay/i.test( err, ); } diff --git a/packages/coding-agent/src/core/sdk.ts b/packages/coding-agent/src/core/sdk.ts index 2f8c9959..3edac009 100644 --- a/packages/coding-agent/src/core/sdk.ts +++ b/packages/coding-agent/src/core/sdk.ts @@ -295,6 +295,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {} steeringMode: settingsManager.getSteeringMode(), followUpMode: settingsManager.getFollowUpMode(), thinkingBudgets: settingsManager.getThinkingBudgets(), + maxRetryDelayMs: settingsManager.getRetrySettings().maxDelayMs, getApiKey: async (provider) => { // Use the provider argument from the in-flight request; // agent.state.model may already be switched mid-turn. diff --git a/packages/coding-agent/src/core/settings-manager.ts b/packages/coding-agent/src/core/settings-manager.ts index af9aa791..8419455f 100644 --- a/packages/coding-agent/src/core/settings-manager.ts +++ b/packages/coding-agent/src/core/settings-manager.ts @@ -16,6 +16,7 @@ export interface RetrySettings { enabled?: boolean; // default: true maxRetries?: number; // default: 3 baseDelayMs?: number; // default: 2000 (exponential backoff: 2s, 4s, 8s) + maxDelayMs?: number; // default: 60000 (max server-requested delay before failing) } export interface TerminalSettings { @@ -456,11 +457,12 @@ export class SettingsManager { this.save(); } - getRetrySettings(): { enabled: boolean; maxRetries: number; baseDelayMs: number } { + getRetrySettings(): { enabled: boolean; maxRetries: number; baseDelayMs: number; maxDelayMs: number } { return { enabled: this.getRetryEnabled(), maxRetries: this.settings.retry?.maxRetries ?? 3, baseDelayMs: this.settings.retry?.baseDelayMs ?? 2000, + maxDelayMs: this.settings.retry?.maxDelayMs ?? 60000, }; }