mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-20 16:05:11 +00:00
fix(ai): add retry with server-provided delay for Gemini CLI rate limits, fixes #370
This commit is contained in:
parent
0d1424d8e5
commit
fd35d9188c
2 changed files with 136 additions and 14 deletions
|
|
@ -2,6 +2,10 @@
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
|
||||||
|
- **Gemini CLI rate limit handling**: Added automatic retry with server-provided delay for 429 errors. Parses delay from error messages like "Your quota will reset after 39s" and waits accordingly. Falls back to exponential backoff for other transient errors. ([#370](https://github.com/badlogic/pi-mono/issues/370))
|
||||||
|
|
||||||
## [0.31.1] - 2026-01-02
|
## [0.31.1] - 2026-01-02
|
||||||
|
|
||||||
## [0.31.0] - 2026-01-02
|
## [0.31.0] - 2026-01-02
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,83 @@ const ANTIGRAVITY_HEADERS = {
|
||||||
// Counter for generating unique tool call IDs
|
// Counter for generating unique tool call IDs
|
||||||
let toolCallCounter = 0;
|
let toolCallCounter = 0;
|
||||||
|
|
||||||
|
// Retry configuration
|
||||||
|
const MAX_RETRIES = 3;
|
||||||
|
const BASE_DELAY_MS = 1000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract retry delay from Gemini error response (in milliseconds).
|
||||||
|
* Parses patterns like:
|
||||||
|
* - "Your quota will reset after 39s"
|
||||||
|
* - "Your quota will reset after 18h31m10s"
|
||||||
|
* - "Please retry in Xs" or "Please retry in Xms"
|
||||||
|
* - "retryDelay": "34.074824224s" (JSON field)
|
||||||
|
*/
|
||||||
|
function extractRetryDelay(errorText: string): number | undefined {
|
||||||
|
// Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
|
||||||
|
const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
|
||||||
|
if (durationMatch) {
|
||||||
|
const hours = durationMatch[1] ? parseInt(durationMatch[1], 10) : 0;
|
||||||
|
const minutes = durationMatch[2] ? parseInt(durationMatch[2], 10) : 0;
|
||||||
|
const seconds = parseFloat(durationMatch[3]);
|
||||||
|
if (!Number.isNaN(seconds)) {
|
||||||
|
const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
|
||||||
|
if (totalMs > 0) {
|
||||||
|
return Math.ceil(totalMs + 1000); // Add 1s buffer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pattern 2: "Please retry in X[ms|s]"
|
||||||
|
const retryInMatch = errorText.match(/Please retry in ([0-9.]+)(ms|s)/i);
|
||||||
|
if (retryInMatch?.[1]) {
|
||||||
|
const value = parseFloat(retryInMatch[1]);
|
||||||
|
if (!Number.isNaN(value) && value > 0) {
|
||||||
|
const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
||||||
|
return Math.ceil(ms + 1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pattern 3: "retryDelay": "34.074824224s" (JSON field in error details)
|
||||||
|
const retryDelayMatch = errorText.match(/"retryDelay":\s*"([0-9.]+)(ms|s)"/i);
|
||||||
|
if (retryDelayMatch?.[1]) {
|
||||||
|
const value = parseFloat(retryDelayMatch[1]);
|
||||||
|
if (!Number.isNaN(value) && value > 0) {
|
||||||
|
const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
||||||
|
return Math.ceil(ms + 1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if an error is retryable (rate limit, server error, etc.)
|
||||||
|
*/
|
||||||
|
function isRetryableError(status: number, errorText: string): boolean {
|
||||||
|
if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable/i.test(errorText);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sleep for a given number of milliseconds, respecting abort signal.
|
||||||
|
*/
|
||||||
|
function sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
if (signal?.aborted) {
|
||||||
|
reject(new Error("Request was aborted"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const timeout = setTimeout(resolve, ms);
|
||||||
|
signal?.addEventListener("abort", () => {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
reject(new Error("Request was aborted"));
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
interface CloudCodeAssistRequest {
|
interface CloudCodeAssistRequest {
|
||||||
project: string;
|
project: string;
|
||||||
model: string;
|
model: string;
|
||||||
|
|
@ -181,7 +258,17 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
||||||
const isAntigravity = endpoint.includes("sandbox.googleapis.com");
|
const isAntigravity = endpoint.includes("sandbox.googleapis.com");
|
||||||
const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
|
const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
|
||||||
|
|
||||||
const response = await fetch(url, {
|
// Fetch with retry logic for rate limits and transient errors
|
||||||
|
let response: Response | undefined;
|
||||||
|
let lastError: Error | undefined;
|
||||||
|
|
||||||
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
||||||
|
if (options?.signal?.aborted) {
|
||||||
|
throw new Error("Request was aborted");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
response = await fetch(url, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
Authorization: `Bearer ${accessToken}`,
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
|
@ -193,9 +280,40 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
||||||
signal: options?.signal,
|
signal: options?.signal,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!response.ok) {
|
if (response.ok) {
|
||||||
|
break; // Success, exit retry loop
|
||||||
|
}
|
||||||
|
|
||||||
const errorText = await response.text();
|
const errorText = await response.text();
|
||||||
|
|
||||||
|
// Check if retryable
|
||||||
|
if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
|
||||||
|
// Use server-provided delay or exponential backoff
|
||||||
|
const serverDelay = extractRetryDelay(errorText);
|
||||||
|
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
|
||||||
|
await sleep(delayMs, options?.signal);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not retryable or max retries exceeded
|
||||||
throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
|
throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
|
||||||
|
} catch (error) {
|
||||||
|
if (error instanceof Error && error.message === "Request was aborted") {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
lastError = error instanceof Error ? error : new Error(String(error));
|
||||||
|
// Network errors are retryable
|
||||||
|
if (attempt < MAX_RETRIES) {
|
||||||
|
const delayMs = BASE_DELAY_MS * 2 ** attempt;
|
||||||
|
await sleep(delayMs, options?.signal);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
throw lastError;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response || !response.ok) {
|
||||||
|
throw lastError ?? new Error("Failed to get response after retries");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!response.body) {
|
if (!response.body) {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue