feat: add maxDelayMs setting to cap server-requested retry delays

When a provider (e.g., Google Gemini CLI) requests a retry delay longer
than maxDelayMs (default: 60s), the request fails immediately with an
informative error instead of waiting silently for hours.

The error is then handled by agent-level auto-retry, which shows the
delay to the user and allows aborting with Escape.

- Add maxRetryDelayMs to StreamOptions (packages/ai)
- Add maxRetryDelayMs to AgentOptions (packages/agent)
- Add retry.maxDelayMs to settings (packages/coding-agent)
- Update _isRetryableError to match 'retry delay' errors

fixes #1123
This commit is contained in:
Mario Zechner 2026-02-01 00:50:41 +01:00
parent 1bd68327f3
commit 030a61d88c
11 changed files with 65 additions and 4 deletions

View file

@ -2,6 +2,10 @@
## [Unreleased]
### Added
- Added `maxRetryDelayMs` option to `AgentOptions` to cap server-requested retry delays. Passed through to the underlying stream function. ([#1123](https://github.com/badlogic/pi-mono/issues/1123))
## [0.50.7] - 2026-01-31
## [0.50.6] - 2026-01-30

View file

@ -77,6 +77,14 @@ export interface AgentOptions {
* Custom token budgets for thinking levels (token-based providers only).
*/
thinkingBudgets?: ThinkingBudgets;
/**
* Maximum delay in milliseconds to wait for a retry when the server requests a long wait.
* If the server's requested delay exceeds this value, the request fails immediately,
* allowing higher-level retry logic to handle it with user visibility.
* Default: 60000 (60 seconds). Set to 0 to disable the cap.
*/
maxRetryDelayMs?: number;
}
export class Agent {
@ -106,6 +114,7 @@ export class Agent {
private runningPrompt?: Promise<void>;
private resolveRunningPrompt?: () => void;
private _thinkingBudgets?: ThinkingBudgets;
private _maxRetryDelayMs?: number;
constructor(opts: AgentOptions = {}) {
this._state = { ...this._state, ...opts.initialState };
@ -117,6 +126,7 @@ export class Agent {
this._sessionId = opts.sessionId;
this.getApiKey = opts.getApiKey;
this._thinkingBudgets = opts.thinkingBudgets;
this._maxRetryDelayMs = opts.maxRetryDelayMs;
}
/**
@ -148,6 +158,21 @@ export class Agent {
this._thinkingBudgets = value;
}
/**
* Get the current max retry delay in milliseconds.
*/
get maxRetryDelayMs(): number | undefined {
return this._maxRetryDelayMs;
}
/**
* Set the maximum delay to wait for server-requested retries.
* Set to 0 to disable the cap.
*/
set maxRetryDelayMs(value: number | undefined) {
this._maxRetryDelayMs = value;
}
get state(): AgentState {
return this._state;
}
@ -333,6 +358,7 @@ export class Agent {
reasoning,
sessionId: this._sessionId,
thinkingBudgets: this._thinkingBudgets,
maxRetryDelayMs: this._maxRetryDelayMs,
convertToLlm: this.convertToLlm,
transformContext: this.transformContext,
getApiKey: this.getApiKey,

View file

@ -2,6 +2,10 @@
## [Unreleased]
### Added
- Added `maxRetryDelayMs` option to `StreamOptions` to cap server-requested retry delays. When a provider (e.g., Google Gemini CLI) requests a delay longer than this value, the request fails immediately with an informative error instead of waiting silently. Default: 60000ms (60 seconds). Set to 0 to disable the cap. ([#1123](https://github.com/badlogic/pi-mono/issues/1123))
## [0.50.7] - 2026-01-31
## [0.50.6] - 2026-01-30

View file

@ -473,6 +473,16 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli", GoogleGe
// Use server-provided delay or exponential backoff
const serverDelay = extractRetryDelay(errorText, response);
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
// Check if server delay exceeds max allowed (default: 60s)
const maxDelayMs = options?.maxRetryDelayMs ?? 60000;
if (maxDelayMs > 0 && serverDelay && serverDelay > maxDelayMs) {
const delaySeconds = Math.ceil(serverDelay / 1000);
throw new Error(
`Server requested ${delaySeconds}s retry delay (max: ${Math.ceil(maxDelayMs / 1000)}s). ${extractErrorMessage(errorText)}`,
);
}
await sleep(delayMs, options?.signal);
continue;
}

View file

@ -9,6 +9,7 @@ export function buildBaseOptions(model: Model<Api>, options?: SimpleStreamOption
sessionId: options?.sessionId,
headers: options?.headers,
onPayload: options?.onPayload,
maxRetryDelayMs: options?.maxRetryDelayMs,
};
}

View file

@ -72,6 +72,14 @@ export interface StreamOptions {
* Not supported by all providers (e.g., AWS Bedrock uses SDK auth).
*/
headers?: Record<string, string>;
/**
* Maximum delay in milliseconds to wait for a retry when the server requests a long wait.
* If the server's requested delay exceeds this value, the request fails immediately
* with an error containing the requested delay, allowing higher-level retry logic
* to handle it with user visibility.
* Default: 60000 (60 seconds). Set to 0 to disable the cap.
*/
maxRetryDelayMs?: number;
}
export type ProviderStreamOptions = StreamOptions & Record<string, unknown>;

View file

@ -5,6 +5,7 @@
### Added
- Added `newSession`, `tree`, and `fork` keybinding actions for `/new`, `/tree`, and `/fork` commands. All unbound by default. ([#1114](https://github.com/badlogic/pi-mono/pull/1114) by [@juanibiapina](https://github.com/juanibiapina))
- Added `retry.maxDelayMs` setting to cap maximum server-requested retry delay. When a provider requests a longer delay (e.g., Google's "quota will reset after 5h"), the request fails immediately with an informative error instead of waiting silently. Default: 60000ms (60 seconds). ([#1123](https://github.com/badlogic/pi-mono/issues/1123))
### Fixed

View file

@ -77,13 +77,17 @@ Edit directly or use `/settings` for common options.
| `retry.enabled` | boolean | `true` | Enable automatic retry on transient errors |
| `retry.maxRetries` | number | `3` | Maximum retry attempts |
| `retry.baseDelayMs` | number | `2000` | Base delay for exponential backoff (2s, 4s, 8s) |
| `retry.maxDelayMs` | number | `60000` | Max server-requested delay before failing (60s) |
When a provider requests a retry delay longer than `maxDelayMs` (e.g., Google's "quota will reset after 5h"), the request fails immediately with an informative error instead of waiting silently. Set to `0` to disable the cap.
```json
{
"retry": {
"enabled": true,
"maxRetries": 3,
"baseDelayMs": 2000
"baseDelayMs": 2000,
"maxDelayMs": 60000
}
}
```

View file

@ -1901,8 +1901,8 @@ export class AgentSession {
if (isContextOverflow(message, contextWindow)) return false;
const err = message.errorMessage;
// Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable, connection errors, fetch failed, terminated
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated/i.test(
// Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable, connection errors, fetch failed, terminated, retry delay exceeded
return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error|connection.?error|connection.?refused|other side closed|fetch failed|upstream.?connect|reset before headers|terminated|retry delay/i.test(
err,
);
}

View file

@ -295,6 +295,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
steeringMode: settingsManager.getSteeringMode(),
followUpMode: settingsManager.getFollowUpMode(),
thinkingBudgets: settingsManager.getThinkingBudgets(),
maxRetryDelayMs: settingsManager.getRetrySettings().maxDelayMs,
getApiKey: async (provider) => {
// Use the provider argument from the in-flight request;
// agent.state.model may already be switched mid-turn.

View file

@ -16,6 +16,7 @@ export interface RetrySettings {
enabled?: boolean; // default: true
maxRetries?: number; // default: 3
baseDelayMs?: number; // default: 2000 (exponential backoff: 2s, 4s, 8s)
maxDelayMs?: number; // default: 60000 (max server-requested delay before failing)
}
export interface TerminalSettings {
@ -456,11 +457,12 @@ export class SettingsManager {
this.save();
}
getRetrySettings(): { enabled: boolean; maxRetries: number; baseDelayMs: number } {
getRetrySettings(): { enabled: boolean; maxRetries: number; baseDelayMs: number; maxDelayMs: number } {
return {
enabled: this.getRetryEnabled(),
maxRetries: this.settings.retry?.maxRetries ?? 3,
baseDelayMs: this.settings.retry?.baseDelayMs ?? 2000,
maxDelayMs: this.settings.retry?.maxDelayMs ?? 60000,
};
}