feat: add maxDelayMs setting to cap server-requested retry delays

When a provider (e.g., Google Gemini CLI) requests a retry delay longer than maxDelayMs (default: 60s), the request fails immediately with an informative error instead of waiting silently for hours. The error is then handled by agent-level auto-retry, which shows the delay to the user and allows aborting with Escape. - Add maxRetryDelayMs to StreamOptions (packages/ai) - Add maxRetryDelayMs to AgentOptions (packages/agent) - Add retry.maxDelayMs to settings (packages/coding-agent) - Update _isRetryableError to match 'retry delay' errors fixes #1123
2026-04-15 09:01:14 +00:00 · 2026-02-01 00:50:41 +01:00 · 2026-02-01 00:50:41 +01:00 · 030a61d88c
commit 030a61d88c
parent 1bd68327f3
11 changed files with 65 additions and 4 deletions
--- a/packages/agent/CHANGELOG.md
+++ b/packages/agent/CHANGELOG.md
@ -2,6 +2,10 @@

 ## [Unreleased]

+### Added
+
+- Added `maxRetryDelayMs` option to `AgentOptions` to cap server-requested retry delays. Passed through to the underlying stream function. ([#1123](https://github.com/badlogic/pi-mono/issues/1123))
+
 ## [0.50.7] - 2026-01-31

 ## [0.50.6] - 2026-01-30
--- a/packages/agent/src/agent.ts
+++ b/packages/agent/src/agent.ts
@ -77,6 +77,14 @@ export interface AgentOptions {
 	 * Custom token budgets for thinking levels (token-based providers only).
 	 */
 	thinkingBudgets?: ThinkingBudgets;
+
+	/**
+	 * Maximum delay in milliseconds to wait for a retry when the server requests a long wait.
+	 * If the server's requested delay exceeds this value, the request fails immediately,
+	 * allowing higher-level retry logic to handle it with user visibility.
+	 * Default: 60000 (60 seconds). Set to 0 to disable the cap.
+	 */
+	maxRetryDelayMs?: number;
 }

 export class Agent {
@ -106,6 +114,7 @@ export class Agent {
 	private runningPrompt?: Promise<void>;
 	private resolveRunningPrompt?: () => void;
 	private _thinkingBudgets?: ThinkingBudgets;
+	private _maxRetryDelayMs?: number;

 	constructor(opts: AgentOptions = {}) {
 		this._state = { ...this._state, ...opts.initialState };
@ -117,6 +126,7 @@ export class Agent {
 		this._sessionId = opts.sessionId;
 		this.getApiKey = opts.getApiKey;
 		this._thinkingBudgets = opts.thinkingBudgets;
+		this._maxRetryDelayMs = opts.maxRetryDelayMs;
 	}

 	/**
@ -148,6 +158,21 @@ export class Agent {
 		this._thinkingBudgets = value;
 	}

+	/**
+	 * Get the current max retry delay in milliseconds.
+	 */
+	get maxRetryDelayMs(): number | undefined {
+		return this._maxRetryDelayMs;
+	}
+
+	/**
+	 * Set the maximum delay to wait for server-requested retries.
+	 * Set to 0 to disable the cap.
+	 */
+	set maxRetryDelayMs(value: number | undefined) {
+		this._maxRetryDelayMs = value;
+	}
+
 	get state(): AgentState {
 		return this._state;
 	}
@ -333,6 +358,7 @@ export class Agent {
 			reasoning,
 			sessionId: this._sessionId,
 			thinkingBudgets: this._thinkingBudgets,
+			maxRetryDelayMs: this._maxRetryDelayMs,
 			convertToLlm: this.convertToLlm,
 			transformContext: this.transformContext,
 			getApiKey: this.getApiKey,