Improve Gemini CLI provider retries and headers (#670)

Improve Gemini CLI provider retries and headers

- Add Antigravity endpoint fallback (tries daily sandbox then prod when baseUrl is unset)
- Parse retry delays from headers (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after) before body parsing
- Derive stable sessionId from first user message for cache affinity
- Retry empty SSE streams with backoff without duplicate start/done events
- Add anthropic-beta header for Claude thinking models only
This commit is contained in:
Ahmed Kamal 2026-01-13 02:04:53 +02:00 committed by GitHub
parent 9e4ae98358
commit ff15414258
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 693 additions and 189 deletions

View file

@ -4,6 +4,7 @@
* Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
*/
import { createHash } from "node:crypto";
import type { Content, ThinkingConfig } from "@google/genai";
import { calculateCost } from "../models.js";
import type {
@ -54,6 +55,8 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
}
const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT] as const;
// Headers for Gemini CLI (prod endpoint)
const GEMINI_CLI_HEADERS = {
"User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
@ -163,16 +166,66 @@ let toolCallCounter = 0;
// Retry configuration
const MAX_RETRIES = 3;
const BASE_DELAY_MS = 1000;
const MAX_EMPTY_STREAM_RETRIES = 2;
const EMPTY_STREAM_BASE_DELAY_MS = 500;
const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
/**
* Extract retry delay from Gemini error response (in milliseconds).
* Parses patterns like:
* Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
* then parses body patterns like:
* - "Your quota will reset after 39s"
* - "Your quota will reset after 18h31m10s"
* - "Please retry in Xs" or "Please retry in Xms"
* - "retryDelay": "34.074824224s" (JSON field)
*/
function extractRetryDelay(errorText: string): number | undefined {
export function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined {
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
const headers = response instanceof Headers ? response : response?.headers;
if (headers) {
const retryAfter = headers.get("retry-after");
if (retryAfter) {
const retryAfterSeconds = Number(retryAfter);
if (Number.isFinite(retryAfterSeconds)) {
const delay = normalizeDelay(retryAfterSeconds * 1000);
if (delay !== undefined) {
return delay;
}
}
const retryAfterDate = new Date(retryAfter);
const retryAfterMs = retryAfterDate.getTime();
if (!Number.isNaN(retryAfterMs)) {
const delay = normalizeDelay(retryAfterMs - Date.now());
if (delay !== undefined) {
return delay;
}
}
}
const rateLimitReset = headers.get("x-ratelimit-reset");
if (rateLimitReset) {
const resetSeconds = Number.parseInt(rateLimitReset, 10);
if (!Number.isNaN(resetSeconds)) {
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
if (delay !== undefined) {
return delay;
}
}
}
const rateLimitResetAfter = headers.get("x-ratelimit-reset-after");
if (rateLimitResetAfter) {
const resetAfterSeconds = Number(rateLimitResetAfter);
if (Number.isFinite(resetAfterSeconds)) {
const delay = normalizeDelay(resetAfterSeconds * 1000);
if (delay !== undefined) {
return delay;
}
}
}
}
// Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
if (durationMatch) {
@ -181,8 +234,9 @@ function extractRetryDelay(errorText: string): number | undefined {
const seconds = parseFloat(durationMatch[3]);
if (!Number.isNaN(seconds)) {
const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
if (totalMs > 0) {
return Math.ceil(totalMs + 1000); // Add 1s buffer
const delay = normalizeDelay(totalMs);
if (delay !== undefined) {
return delay;
}
}
}
@ -193,7 +247,10 @@ function extractRetryDelay(errorText: string): number | undefined {
const value = parseFloat(retryInMatch[1]);
if (!Number.isNaN(value) && value > 0) {
const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
return Math.ceil(ms + 1000);
const delay = normalizeDelay(ms);
if (delay !== undefined) {
return delay;
}
}
}
@ -203,13 +260,21 @@ function extractRetryDelay(errorText: string): number | undefined {
const value = parseFloat(retryDelayMatch[1]);
if (!Number.isNaN(value) && value > 0) {
const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
return Math.ceil(ms + 1000);
const delay = normalizeDelay(ms);
if (delay !== undefined) {
return delay;
}
}
}
return undefined;
}
function isClaudeThinkingModel(modelId: string): boolean {
const normalized = modelId.toLowerCase();
return normalized.includes("claude") && normalized.includes("thinking");
}
/**
* Check if an error is retryable (rate limit, server error, network error, etc.)
*/
@ -258,6 +323,7 @@ interface CloudCodeAssistRequest {
model: string;
request: {
contents: Content[];
sessionId?: string;
systemInstruction?: { role?: string; parts: { text: string }[] };
generationConfig?: {
maxOutputTokens?: number;
@ -355,17 +421,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
}
const endpoint = model.baseUrl || DEFAULT_ENDPOINT;
const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
const isAntigravity = model.provider === "google-antigravity";
const baseUrl = model.baseUrl?.trim();
const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
// Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
const isAntigravity = endpoint.includes("sandbox.googleapis.com");
const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
const requestHeaders = {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
Accept: "text/event-stream",
...headers,
...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
};
const requestBodyJson = JSON.stringify(requestBody);
// Fetch with retry logic for rate limits and transient errors
let response: Response | undefined;
let lastError: Error | undefined;
let requestUrl: string | undefined;
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
if (options?.signal?.aborted) {
@ -373,15 +448,12 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
}
try {
response = await fetch(url, {
const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
response = await fetch(requestUrl, {
method: "POST",
headers: {
Authorization: `Bearer ${accessToken}`,
"Content-Type": "application/json",
Accept: "text/event-stream",
...headers,
},
body: JSON.stringify(requestBody),
headers: requestHeaders,
body: requestBodyJson,
signal: options?.signal,
});
@ -394,7 +466,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
// Check if retryable
if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
// Use server-provided delay or exponential backoff
const serverDelay = extractRetryDelay(errorText);
const serverDelay = extractRetryDelay(errorText, response);
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
await sleep(delayMs, options?.signal);
continue;
@ -428,73 +500,160 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
throw lastError ?? new Error("Failed to get response after retries");
}
if (!response.body) {
throw new Error("No response body");
}
stream.push({ type: "start", partial: output });
let currentBlock: TextContent | ThinkingContent | null = null;
const blocks = output.content;
const blockIndex = () => blocks.length - 1;
// Read SSE stream
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = "";
// Set up abort handler to cancel reader when signal fires
const abortHandler = () => {
void reader.cancel().catch(() => {});
let started = false;
const ensureStarted = () => {
if (!started) {
stream.push({ type: "start", partial: output });
started = true;
}
};
options?.signal?.addEventListener("abort", abortHandler);
try {
while (true) {
// Check abort signal before each read
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
const resetOutput = () => {
output.content = [];
output.usage = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
output.stopReason = "stop";
output.errorMessage = undefined;
output.timestamp = Date.now();
started = false;
};
const { done, value } = await reader.read();
if (done) break;
const streamResponse = async (activeResponse: Response): Promise<boolean> => {
if (!activeResponse.body) {
throw new Error("No response body");
}
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() || "";
let hasContent = false;
let currentBlock: TextContent | ThinkingContent | null = null;
const blocks = output.content;
const blockIndex = () => blocks.length - 1;
for (const line of lines) {
if (!line.startsWith("data:")) continue;
// Read SSE stream
const reader = activeResponse.body.getReader();
const decoder = new TextDecoder();
let buffer = "";
const jsonStr = line.slice(5).trim();
if (!jsonStr) continue;
// Set up abort handler to cancel reader when signal fires
const abortHandler = () => {
void reader.cancel().catch(() => {});
};
options?.signal?.addEventListener("abort", abortHandler);
let chunk: CloudCodeAssistResponseChunk;
try {
chunk = JSON.parse(jsonStr);
} catch {
continue;
try {
while (true) {
// Check abort signal before each read
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
// Unwrap the response
const responseData = chunk.response;
if (!responseData) continue;
const { done, value } = await reader.read();
if (done) break;
const candidate = responseData.candidates?.[0];
if (candidate?.content?.parts) {
for (const part of candidate.content.parts) {
if (part.text !== undefined) {
const isThinking = isThinkingPart(part);
if (
!currentBlock ||
(isThinking && currentBlock.type !== "thinking") ||
(!isThinking && currentBlock.type !== "text")
) {
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) {
if (!line.startsWith("data:")) continue;
const jsonStr = line.slice(5).trim();
if (!jsonStr) continue;
let chunk: CloudCodeAssistResponseChunk;
try {
chunk = JSON.parse(jsonStr);
} catch {
continue;
}
// Unwrap the response
const responseData = chunk.response;
if (!responseData) continue;
const candidate = responseData.candidates?.[0];
if (candidate?.content?.parts) {
for (const part of candidate.content.parts) {
if (part.text !== undefined) {
hasContent = true;
const isThinking = isThinkingPart(part);
if (
!currentBlock ||
(isThinking && currentBlock.type !== "thinking") ||
(!isThinking && currentBlock.type !== "text")
) {
if (currentBlock) {
if (currentBlock.type === "text") {
stream.push({
type: "text_end",
contentIndex: blocks.length - 1,
content: currentBlock.text,
partial: output,
});
} else {
stream.push({
type: "thinking_end",
contentIndex: blockIndex(),
content: currentBlock.thinking,
partial: output,
});
}
}
if (isThinking) {
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
output.content.push(currentBlock);
ensureStarted();
stream.push({
type: "thinking_start",
contentIndex: blockIndex(),
partial: output,
});
} else {
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
ensureStarted();
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
}
}
if (currentBlock.type === "thinking") {
currentBlock.thinking += part.text;
currentBlock.thinkingSignature = retainThoughtSignature(
currentBlock.thinkingSignature,
part.thoughtSignature,
);
stream.push({
type: "thinking_delta",
contentIndex: blockIndex(),
delta: part.text,
partial: output,
});
} else {
currentBlock.text += part.text;
currentBlock.textSignature = retainThoughtSignature(
currentBlock.textSignature,
part.thoughtSignature,
);
stream.push({
type: "text_delta",
contentIndex: blockIndex(),
delta: part.text,
partial: output,
});
}
}
if (part.functionCall) {
hasContent = true;
if (currentBlock) {
if (currentBlock.type === "text") {
stream.push({
type: "text_end",
contentIndex: blocks.length - 1,
contentIndex: blockIndex(),
content: currentBlock.text,
partial: output,
});
@ -506,143 +665,142 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
partial: output,
});
}
currentBlock = null;
}
if (isThinking) {
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
output.content.push(currentBlock);
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
} else {
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
}
}
if (currentBlock.type === "thinking") {
currentBlock.thinking += part.text;
currentBlock.thinkingSignature = retainThoughtSignature(
currentBlock.thinkingSignature,
part.thoughtSignature,
);
const providedId = part.functionCall.id;
const needsNewId =
!providedId ||
output.content.some((b) => b.type === "toolCall" && b.id === providedId);
const toolCallId = needsNewId
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
: providedId;
const toolCall: ToolCall = {
type: "toolCall",
id: toolCallId,
name: part.functionCall.name || "",
arguments: part.functionCall.args as Record<string, unknown>,
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
};
output.content.push(toolCall);
ensureStarted();
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
stream.push({
type: "thinking_delta",
type: "toolcall_delta",
contentIndex: blockIndex(),
delta: part.text,
delta: JSON.stringify(toolCall.arguments),
partial: output,
});
} else {
currentBlock.text += part.text;
currentBlock.textSignature = retainThoughtSignature(
currentBlock.textSignature,
part.thoughtSignature,
);
stream.push({
type: "text_delta",
type: "toolcall_end",
contentIndex: blockIndex(),
delta: part.text,
toolCall,
partial: output,
});
}
}
}
if (part.functionCall) {
if (currentBlock) {
if (currentBlock.type === "text") {
stream.push({
type: "text_end",
contentIndex: blockIndex(),
content: currentBlock.text,
partial: output,
});
} else {
stream.push({
type: "thinking_end",
contentIndex: blockIndex(),
content: currentBlock.thinking,
partial: output,
});
}
currentBlock = null;
}
const providedId = part.functionCall.id;
const needsNewId =
!providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
const toolCallId = needsNewId
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
: providedId;
const toolCall: ToolCall = {
type: "toolCall",
id: toolCallId,
name: part.functionCall.name || "",
arguments: part.functionCall.args as Record<string, unknown>,
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
};
output.content.push(toolCall);
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
stream.push({
type: "toolcall_delta",
contentIndex: blockIndex(),
delta: JSON.stringify(toolCall.arguments),
partial: output,
});
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
if (candidate?.finishReason) {
output.stopReason = mapStopReasonString(candidate.finishReason);
if (output.content.some((b) => b.type === "toolCall")) {
output.stopReason = "toolUse";
}
}
}
if (candidate?.finishReason) {
output.stopReason = mapStopReasonString(candidate.finishReason);
if (output.content.some((b) => b.type === "toolCall")) {
output.stopReason = "toolUse";
}
}
if (responseData.usageMetadata) {
// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
output.usage = {
input: promptTokens - cacheReadTokens,
output:
(responseData.usageMetadata.candidatesTokenCount || 0) +
(responseData.usageMetadata.thoughtsTokenCount || 0),
cacheRead: cacheReadTokens,
cacheWrite: 0,
totalTokens: responseData.usageMetadata.totalTokenCount || 0,
cost: {
input: 0,
output: 0,
cacheRead: 0,
if (responseData.usageMetadata) {
// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
output.usage = {
input: promptTokens - cacheReadTokens,
output:
(responseData.usageMetadata.candidatesTokenCount || 0) +
(responseData.usageMetadata.thoughtsTokenCount || 0),
cacheRead: cacheReadTokens,
cacheWrite: 0,
total: 0,
},
};
calculateCost(model, output.usage);
totalTokens: responseData.usageMetadata.totalTokenCount || 0,
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
},
};
calculateCost(model, output.usage);
}
}
}
} finally {
options?.signal?.removeEventListener("abort", abortHandler);
}
if (currentBlock) {
if (currentBlock.type === "text") {
stream.push({
type: "text_end",
contentIndex: blockIndex(),
content: currentBlock.text,
partial: output,
});
} else {
stream.push({
type: "thinking_end",
contentIndex: blockIndex(),
content: currentBlock.thinking,
partial: output,
});
}
}
return hasContent;
};
let receivedContent = false;
let currentResponse = response;
for (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
if (emptyAttempt > 0) {
const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
await sleep(backoffMs, options?.signal);
if (!requestUrl) {
throw new Error("Missing request URL");
}
currentResponse = await fetch(requestUrl, {
method: "POST",
headers: requestHeaders,
body: requestBodyJson,
signal: options?.signal,
});
if (!currentResponse.ok) {
const retryErrorText = await currentResponse.text();
throw new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);
}
}
const streamed = await streamResponse(currentResponse);
if (streamed) {
receivedContent = true;
break;
}
if (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {
resetOutput();
}
} finally {
options?.signal?.removeEventListener("abort", abortHandler);
}
if (currentBlock) {
if (currentBlock.type === "text") {
stream.push({
type: "text_end",
contentIndex: blockIndex(),
content: currentBlock.text,
partial: output,
});
} else {
stream.push({
type: "thinking_end",
contentIndex: blockIndex(),
content: currentBlock.thinking,
partial: output,
});
}
if (!receivedContent) {
throw new Error("Cloud Code Assist API returned an empty response");
}
if (options?.signal?.aborted) {
@ -671,7 +829,34 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
return stream;
};
function buildRequest(
function deriveSessionId(context: Context): string | undefined {
for (const message of context.messages) {
if (message.role !== "user") {
continue;
}
let text = "";
if (typeof message.content === "string") {
text = message.content;
} else if (Array.isArray(message.content)) {
text = message.content
.filter((item): item is TextContent => item.type === "text")
.map((item) => item.text)
.join("\n");
}
if (!text || text.trim().length === 0) {
return undefined;
}
const hash = createHash("sha256").update(text).digest("hex");
return hash.slice(0, 32);
}
return undefined;
}
export function buildRequest(
model: Model<"google-gemini-cli">,
context: Context,
projectId: string,
@ -706,6 +891,11 @@ function buildRequest(
contents,
};
const sessionId = deriveSessionId(context);
if (sessionId) {
request.sessionId = sessionId;
}
// System instruction must be object with parts, not plain string
if (context.systemPrompt) {
request.systemInstruction = {

View file

@ -0,0 +1,103 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
import type { Context, Model } from "../src/types.js";
const originalFetch = global.fetch;
const apiKey = JSON.stringify({ token: "token", projectId: "project" });
const createSseResponse = () => {
const sse = `${[
`data: ${JSON.stringify({
response: {
candidates: [
{
content: { role: "model", parts: [{ text: "Hello" }] },
finishReason: "STOP",
},
],
},
})}`,
].join("\n\n")}\n\n`;
const encoder = new TextEncoder();
const stream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(sse));
controller.close();
},
});
return new Response(stream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
};
afterEach(() => {
global.fetch = originalFetch;
vi.restoreAllMocks();
});
describe("google-gemini-cli Claude thinking header", () => {
const context: Context = {
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
};
it("adds anthropic-beta for Claude thinking models", async () => {
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
const headers = new Headers(init?.headers);
expect(headers.get("anthropic-beta")).toBe("interleaved-thinking-2025-05-14");
return createSseResponse();
});
global.fetch = fetchMock as typeof fetch;
const model: Model<"google-gemini-cli"> = {
id: "claude-opus-4-5-thinking",
name: "Claude Opus 4.5 Thinking",
api: "google-gemini-cli",
provider: "google-antigravity",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
const stream = streamGoogleGeminiCli(model, context, { apiKey });
for await (const _event of stream) {
// exhaust stream
}
await stream.result();
});
it("does not add anthropic-beta for Gemini models", async () => {
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
const headers = new Headers(init?.headers);
expect(headers.has("anthropic-beta")).toBe(false);
return createSseResponse();
});
global.fetch = fetchMock as typeof fetch;
const model: Model<"google-gemini-cli"> = {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
const stream = streamGoogleGeminiCli(model, context, { apiKey });
for await (const _event of stream) {
// exhaust stream
}
await stream.result();
});
});

View file

@ -0,0 +1,108 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
import type { Context, Model } from "../src/types.js";
const originalFetch = global.fetch;
afterEach(() => {
global.fetch = originalFetch;
vi.restoreAllMocks();
});
describe("google-gemini-cli empty stream retry", () => {
it("retries empty SSE responses without duplicate start", async () => {
const emptyStream = new ReadableStream<Uint8Array>({
start(controller) {
controller.close();
},
});
const sse = `${[
`data: ${JSON.stringify({
response: {
candidates: [
{
content: { role: "model", parts: [{ text: "Hello" }] },
finishReason: "STOP",
},
],
usageMetadata: {
promptTokenCount: 1,
candidatesTokenCount: 1,
totalTokenCount: 2,
},
},
})}`,
].join("\n\n")}\n\n`;
const encoder = new TextEncoder();
const dataStream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(sse));
controller.close();
},
});
let callCount = 0;
const fetchMock = vi.fn(async () => {
callCount += 1;
if (callCount === 1) {
return new Response(emptyStream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
}
return new Response(dataStream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
});
global.fetch = fetchMock as typeof fetch;
const model: Model<"google-gemini-cli"> = {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
const context: Context = {
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
};
const stream = streamGoogleGeminiCli(model, context, {
apiKey: JSON.stringify({ token: "token", projectId: "project" }),
});
let startCount = 0;
let doneCount = 0;
let text = "";
for await (const event of stream) {
if (event.type === "start") {
startCount += 1;
}
if (event.type === "done") {
doneCount += 1;
}
if (event.type === "text_delta") {
text += event.delta;
}
}
const result = await stream.result();
expect(text).toBe("Hello");
expect(result.stopReason).toBe("stop");
expect(startCount).toBe(1);
expect(doneCount).toBe(1);
expect(fetchMock).toHaveBeenCalledTimes(2);
});
});

View file

@ -0,0 +1,53 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { extractRetryDelay } from "../src/providers/google-gemini-cli.js";
describe("extractRetryDelay header parsing", () => {
afterEach(() => {
vi.useRealTimers();
});
it("prefers Retry-After seconds header", () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
const response = new Response("", { headers: { "Retry-After": "5" } });
const delay = extractRetryDelay("Please retry in 1s", response);
expect(delay).toBe(6000);
});
it("parses Retry-After HTTP date header", () => {
vi.useFakeTimers();
const now = new Date("2025-01-01T00:00:00Z");
vi.setSystemTime(now);
const retryAt = new Date(now.getTime() + 12000).toUTCString();
const response = new Response("", { headers: { "Retry-After": retryAt } });
const delay = extractRetryDelay("", response);
expect(delay).toBe(13000);
});
it("parses x-ratelimit-reset header", () => {
vi.useFakeTimers();
const now = new Date("2025-01-01T00:00:00Z");
vi.setSystemTime(now);
const resetAtMs = now.getTime() + 20000;
const resetSeconds = Math.floor(resetAtMs / 1000).toString();
const response = new Response("", { headers: { "x-ratelimit-reset": resetSeconds } });
const delay = extractRetryDelay("", response);
expect(delay).toBe(21000);
});
it("parses x-ratelimit-reset-after header", () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
const response = new Response("", { headers: { "x-ratelimit-reset-after": "30" } });
const delay = extractRetryDelay("", response);
expect(delay).toBe(31000);
});
});

View file

@ -0,0 +1,50 @@
import { createHash } from "node:crypto";
import { describe, expect, it } from "vitest";
import { buildRequest } from "../src/providers/google-gemini-cli.js";
import type { Context, Model } from "../src/types.js";
const model: Model<"google-gemini-cli"> = {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
describe("buildRequest sessionId", () => {
it("derives sessionId from the first user message", () => {
const context: Context = {
messages: [
{ role: "user", content: "First message", timestamp: Date.now() },
{ role: "user", content: "Second message", timestamp: Date.now() },
],
};
const result = buildRequest(model, context, "project-id");
const expected = createHash("sha256").update("First message").digest("hex").slice(0, 32);
expect(result.request.sessionId).toBe(expected);
});
it("omits sessionId when the first user message has no text", () => {
const context: Context = {
messages: [
{
role: "user",
content: [{ type: "image", data: "Zm9v", mimeType: "image/png" }],
timestamp: Date.now(),
},
{ role: "user", content: "Later text", timestamp: Date.now() },
],
};
const result = buildRequest(model, context, "project-id");
expect(result.request.sessionId).toBeUndefined();
});
});