Improve Gemini CLI provider retries and headers (#670)

Improve Gemini CLI provider retries and headers

- Add Antigravity endpoint fallback (tries daily sandbox then prod when baseUrl is unset)
- Parse retry delays from headers (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after) before body parsing
- Derive stable sessionId from first user message for cache affinity
- Retry empty SSE streams with backoff without duplicate start/done events
- Add anthropic-beta header for Claude thinking models only
This commit is contained in:
Ahmed Kamal 2026-01-13 02:04:53 +02:00 committed by GitHub
parent 9e4ae98358
commit ff15414258
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 693 additions and 189 deletions

View file

@ -0,0 +1,103 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
import type { Context, Model } from "../src/types.js";
const originalFetch = global.fetch;
const apiKey = JSON.stringify({ token: "token", projectId: "project" });
const createSseResponse = () => {
const sse = `${[
`data: ${JSON.stringify({
response: {
candidates: [
{
content: { role: "model", parts: [{ text: "Hello" }] },
finishReason: "STOP",
},
],
},
})}`,
].join("\n\n")}\n\n`;
const encoder = new TextEncoder();
const stream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(sse));
controller.close();
},
});
return new Response(stream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
};
afterEach(() => {
global.fetch = originalFetch;
vi.restoreAllMocks();
});
describe("google-gemini-cli Claude thinking header", () => {
const context: Context = {
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
};
it("adds anthropic-beta for Claude thinking models", async () => {
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
const headers = new Headers(init?.headers);
expect(headers.get("anthropic-beta")).toBe("interleaved-thinking-2025-05-14");
return createSseResponse();
});
global.fetch = fetchMock as typeof fetch;
const model: Model<"google-gemini-cli"> = {
id: "claude-opus-4-5-thinking",
name: "Claude Opus 4.5 Thinking",
api: "google-gemini-cli",
provider: "google-antigravity",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
const stream = streamGoogleGeminiCli(model, context, { apiKey });
for await (const _event of stream) {
// exhaust stream
}
await stream.result();
});
it("does not add anthropic-beta for Gemini models", async () => {
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
const headers = new Headers(init?.headers);
expect(headers.has("anthropic-beta")).toBe(false);
return createSseResponse();
});
global.fetch = fetchMock as typeof fetch;
const model: Model<"google-gemini-cli"> = {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
const stream = streamGoogleGeminiCli(model, context, { apiKey });
for await (const _event of stream) {
// exhaust stream
}
await stream.result();
});
});

View file

@ -0,0 +1,108 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
import type { Context, Model } from "../src/types.js";
const originalFetch = global.fetch;
afterEach(() => {
global.fetch = originalFetch;
vi.restoreAllMocks();
});
describe("google-gemini-cli empty stream retry", () => {
it("retries empty SSE responses without duplicate start", async () => {
const emptyStream = new ReadableStream<Uint8Array>({
start(controller) {
controller.close();
},
});
const sse = `${[
`data: ${JSON.stringify({
response: {
candidates: [
{
content: { role: "model", parts: [{ text: "Hello" }] },
finishReason: "STOP",
},
],
usageMetadata: {
promptTokenCount: 1,
candidatesTokenCount: 1,
totalTokenCount: 2,
},
},
})}`,
].join("\n\n")}\n\n`;
const encoder = new TextEncoder();
const dataStream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(sse));
controller.close();
},
});
let callCount = 0;
const fetchMock = vi.fn(async () => {
callCount += 1;
if (callCount === 1) {
return new Response(emptyStream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
}
return new Response(dataStream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
});
global.fetch = fetchMock as typeof fetch;
const model: Model<"google-gemini-cli"> = {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
const context: Context = {
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
};
const stream = streamGoogleGeminiCli(model, context, {
apiKey: JSON.stringify({ token: "token", projectId: "project" }),
});
let startCount = 0;
let doneCount = 0;
let text = "";
for await (const event of stream) {
if (event.type === "start") {
startCount += 1;
}
if (event.type === "done") {
doneCount += 1;
}
if (event.type === "text_delta") {
text += event.delta;
}
}
const result = await stream.result();
expect(text).toBe("Hello");
expect(result.stopReason).toBe("stop");
expect(startCount).toBe(1);
expect(doneCount).toBe(1);
expect(fetchMock).toHaveBeenCalledTimes(2);
});
});

View file

@ -0,0 +1,53 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { extractRetryDelay } from "../src/providers/google-gemini-cli.js";
describe("extractRetryDelay header parsing", () => {
afterEach(() => {
vi.useRealTimers();
});
it("prefers Retry-After seconds header", () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
const response = new Response("", { headers: { "Retry-After": "5" } });
const delay = extractRetryDelay("Please retry in 1s", response);
expect(delay).toBe(6000);
});
it("parses Retry-After HTTP date header", () => {
vi.useFakeTimers();
const now = new Date("2025-01-01T00:00:00Z");
vi.setSystemTime(now);
const retryAt = new Date(now.getTime() + 12000).toUTCString();
const response = new Response("", { headers: { "Retry-After": retryAt } });
const delay = extractRetryDelay("", response);
expect(delay).toBe(13000);
});
it("parses x-ratelimit-reset header", () => {
vi.useFakeTimers();
const now = new Date("2025-01-01T00:00:00Z");
vi.setSystemTime(now);
const resetAtMs = now.getTime() + 20000;
const resetSeconds = Math.floor(resetAtMs / 1000).toString();
const response = new Response("", { headers: { "x-ratelimit-reset": resetSeconds } });
const delay = extractRetryDelay("", response);
expect(delay).toBe(21000);
});
it("parses x-ratelimit-reset-after header", () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
const response = new Response("", { headers: { "x-ratelimit-reset-after": "30" } });
const delay = extractRetryDelay("", response);
expect(delay).toBe(31000);
});
});

View file

@ -0,0 +1,50 @@
import { createHash } from "node:crypto";
import { describe, expect, it } from "vitest";
import { buildRequest } from "../src/providers/google-gemini-cli.js";
import type { Context, Model } from "../src/types.js";
const model: Model<"google-gemini-cli"> = {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
describe("buildRequest sessionId", () => {
it("derives sessionId from the first user message", () => {
const context: Context = {
messages: [
{ role: "user", content: "First message", timestamp: Date.now() },
{ role: "user", content: "Second message", timestamp: Date.now() },
],
};
const result = buildRequest(model, context, "project-id");
const expected = createHash("sha256").update("First message").digest("hex").slice(0, 32);
expect(result.request.sessionId).toBe(expected);
});
it("omits sessionId when the first user message has no text", () => {
const context: Context = {
messages: [
{
role: "user",
content: [{ type: "image", data: "Zm9v", mimeType: "image/png" }],
timestamp: Date.now(),
},
{ role: "user", content: "Later text", timestamp: Date.now() },
],
};
const result = buildRequest(model, context, "project-id");
expect(result.request.sessionId).toBeUndefined();
});
});