mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 14:03:49 +00:00
Improve Gemini CLI provider retries and headers (#670)
Improve Gemini CLI provider retries and headers - Add Antigravity endpoint fallback (tries daily sandbox then prod when baseUrl is unset) - Parse retry delays from headers (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after) before body parsing - Derive stable sessionId from first user message for cache affinity - Retry empty SSE streams with backoff without duplicate start/done events - Add anthropic-beta header for Claude thinking models only
This commit is contained in:
parent
9e4ae98358
commit
ff15414258
5 changed files with 693 additions and 189 deletions
|
|
@ -4,6 +4,7 @@
|
|||
* Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
|
||||
*/
|
||||
|
||||
import { createHash } from "node:crypto";
|
||||
import type { Content, ThinkingConfig } from "@google/genai";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
|
|
@ -54,6 +55,8 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
|
|||
}
|
||||
|
||||
const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
|
||||
const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
|
||||
const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT] as const;
|
||||
// Headers for Gemini CLI (prod endpoint)
|
||||
const GEMINI_CLI_HEADERS = {
|
||||
"User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
|
||||
|
|
@ -163,16 +166,66 @@ let toolCallCounter = 0;
|
|||
// Retry configuration
|
||||
const MAX_RETRIES = 3;
|
||||
const BASE_DELAY_MS = 1000;
|
||||
const MAX_EMPTY_STREAM_RETRIES = 2;
|
||||
const EMPTY_STREAM_BASE_DELAY_MS = 500;
|
||||
const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
|
||||
|
||||
/**
|
||||
* Extract retry delay from Gemini error response (in milliseconds).
|
||||
* Parses patterns like:
|
||||
* Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
|
||||
* then parses body patterns like:
|
||||
* - "Your quota will reset after 39s"
|
||||
* - "Your quota will reset after 18h31m10s"
|
||||
* - "Please retry in Xs" or "Please retry in Xms"
|
||||
* - "retryDelay": "34.074824224s" (JSON field)
|
||||
*/
|
||||
function extractRetryDelay(errorText: string): number | undefined {
|
||||
export function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined {
|
||||
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
|
||||
|
||||
const headers = response instanceof Headers ? response : response?.headers;
|
||||
if (headers) {
|
||||
const retryAfter = headers.get("retry-after");
|
||||
if (retryAfter) {
|
||||
const retryAfterSeconds = Number(retryAfter);
|
||||
if (Number.isFinite(retryAfterSeconds)) {
|
||||
const delay = normalizeDelay(retryAfterSeconds * 1000);
|
||||
if (delay !== undefined) {
|
||||
return delay;
|
||||
}
|
||||
}
|
||||
const retryAfterDate = new Date(retryAfter);
|
||||
const retryAfterMs = retryAfterDate.getTime();
|
||||
if (!Number.isNaN(retryAfterMs)) {
|
||||
const delay = normalizeDelay(retryAfterMs - Date.now());
|
||||
if (delay !== undefined) {
|
||||
return delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const rateLimitReset = headers.get("x-ratelimit-reset");
|
||||
if (rateLimitReset) {
|
||||
const resetSeconds = Number.parseInt(rateLimitReset, 10);
|
||||
if (!Number.isNaN(resetSeconds)) {
|
||||
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
|
||||
if (delay !== undefined) {
|
||||
return delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const rateLimitResetAfter = headers.get("x-ratelimit-reset-after");
|
||||
if (rateLimitResetAfter) {
|
||||
const resetAfterSeconds = Number(rateLimitResetAfter);
|
||||
if (Number.isFinite(resetAfterSeconds)) {
|
||||
const delay = normalizeDelay(resetAfterSeconds * 1000);
|
||||
if (delay !== undefined) {
|
||||
return delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
|
||||
const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
|
||||
if (durationMatch) {
|
||||
|
|
@ -181,8 +234,9 @@ function extractRetryDelay(errorText: string): number | undefined {
|
|||
const seconds = parseFloat(durationMatch[3]);
|
||||
if (!Number.isNaN(seconds)) {
|
||||
const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
|
||||
if (totalMs > 0) {
|
||||
return Math.ceil(totalMs + 1000); // Add 1s buffer
|
||||
const delay = normalizeDelay(totalMs);
|
||||
if (delay !== undefined) {
|
||||
return delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -193,7 +247,10 @@ function extractRetryDelay(errorText: string): number | undefined {
|
|||
const value = parseFloat(retryInMatch[1]);
|
||||
if (!Number.isNaN(value) && value > 0) {
|
||||
const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
||||
return Math.ceil(ms + 1000);
|
||||
const delay = normalizeDelay(ms);
|
||||
if (delay !== undefined) {
|
||||
return delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -203,13 +260,21 @@ function extractRetryDelay(errorText: string): number | undefined {
|
|||
const value = parseFloat(retryDelayMatch[1]);
|
||||
if (!Number.isNaN(value) && value > 0) {
|
||||
const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
||||
return Math.ceil(ms + 1000);
|
||||
const delay = normalizeDelay(ms);
|
||||
if (delay !== undefined) {
|
||||
return delay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function isClaudeThinkingModel(modelId: string): boolean {
|
||||
const normalized = modelId.toLowerCase();
|
||||
return normalized.includes("claude") && normalized.includes("thinking");
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an error is retryable (rate limit, server error, network error, etc.)
|
||||
*/
|
||||
|
|
@ -258,6 +323,7 @@ interface CloudCodeAssistRequest {
|
|||
model: string;
|
||||
request: {
|
||||
contents: Content[];
|
||||
sessionId?: string;
|
||||
systemInstruction?: { role?: string; parts: { text: string }[] };
|
||||
generationConfig?: {
|
||||
maxOutputTokens?: number;
|
||||
|
|
@ -355,17 +421,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|||
throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
|
||||
}
|
||||
|
||||
const endpoint = model.baseUrl || DEFAULT_ENDPOINT;
|
||||
const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
||||
const isAntigravity = model.provider === "google-antigravity";
|
||||
const baseUrl = model.baseUrl?.trim();
|
||||
const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
|
||||
|
||||
// Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
|
||||
const isAntigravity = endpoint.includes("sandbox.googleapis.com");
|
||||
const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
|
||||
const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
|
||||
|
||||
const requestHeaders = {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
Accept: "text/event-stream",
|
||||
...headers,
|
||||
...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
|
||||
};
|
||||
const requestBodyJson = JSON.stringify(requestBody);
|
||||
|
||||
// Fetch with retry logic for rate limits and transient errors
|
||||
let response: Response | undefined;
|
||||
let lastError: Error | undefined;
|
||||
let requestUrl: string | undefined;
|
||||
|
||||
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
||||
if (options?.signal?.aborted) {
|
||||
|
|
@ -373,15 +448,12 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|||
}
|
||||
|
||||
try {
|
||||
response = await fetch(url, {
|
||||
const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
|
||||
requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
||||
response = await fetch(requestUrl, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
"Content-Type": "application/json",
|
||||
Accept: "text/event-stream",
|
||||
...headers,
|
||||
},
|
||||
body: JSON.stringify(requestBody),
|
||||
headers: requestHeaders,
|
||||
body: requestBodyJson,
|
||||
signal: options?.signal,
|
||||
});
|
||||
|
||||
|
|
@ -394,7 +466,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|||
// Check if retryable
|
||||
if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
|
||||
// Use server-provided delay or exponential backoff
|
||||
const serverDelay = extractRetryDelay(errorText);
|
||||
const serverDelay = extractRetryDelay(errorText, response);
|
||||
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
|
||||
await sleep(delayMs, options?.signal);
|
||||
continue;
|
||||
|
|
@ -428,73 +500,160 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|||
throw lastError ?? new Error("Failed to get response after retries");
|
||||
}
|
||||
|
||||
if (!response.body) {
|
||||
throw new Error("No response body");
|
||||
}
|
||||
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
let currentBlock: TextContent | ThinkingContent | null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
|
||||
// Read SSE stream
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
|
||||
// Set up abort handler to cancel reader when signal fires
|
||||
const abortHandler = () => {
|
||||
void reader.cancel().catch(() => {});
|
||||
let started = false;
|
||||
const ensureStarted = () => {
|
||||
if (!started) {
|
||||
stream.push({ type: "start", partial: output });
|
||||
started = true;
|
||||
}
|
||||
};
|
||||
options?.signal?.addEventListener("abort", abortHandler);
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
// Check abort signal before each read
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
const resetOutput = () => {
|
||||
output.content = [];
|
||||
output.usage = {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
output.stopReason = "stop";
|
||||
output.errorMessage = undefined;
|
||||
output.timestamp = Date.now();
|
||||
started = false;
|
||||
};
|
||||
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
const streamResponse = async (activeResponse: Response): Promise<boolean> => {
|
||||
if (!activeResponse.body) {
|
||||
throw new Error("No response body");
|
||||
}
|
||||
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() || "";
|
||||
let hasContent = false;
|
||||
let currentBlock: TextContent | ThinkingContent | null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith("data:")) continue;
|
||||
// Read SSE stream
|
||||
const reader = activeResponse.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = "";
|
||||
|
||||
const jsonStr = line.slice(5).trim();
|
||||
if (!jsonStr) continue;
|
||||
// Set up abort handler to cancel reader when signal fires
|
||||
const abortHandler = () => {
|
||||
void reader.cancel().catch(() => {});
|
||||
};
|
||||
options?.signal?.addEventListener("abort", abortHandler);
|
||||
|
||||
let chunk: CloudCodeAssistResponseChunk;
|
||||
try {
|
||||
chunk = JSON.parse(jsonStr);
|
||||
} catch {
|
||||
continue;
|
||||
try {
|
||||
while (true) {
|
||||
// Check abort signal before each read
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
// Unwrap the response
|
||||
const responseData = chunk.response;
|
||||
if (!responseData) continue;
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
|
||||
const candidate = responseData.candidates?.[0];
|
||||
if (candidate?.content?.parts) {
|
||||
for (const part of candidate.content.parts) {
|
||||
if (part.text !== undefined) {
|
||||
const isThinking = isThinkingPart(part);
|
||||
if (
|
||||
!currentBlock ||
|
||||
(isThinking && currentBlock.type !== "thinking") ||
|
||||
(!isThinking && currentBlock.type !== "text")
|
||||
) {
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
const lines = buffer.split("\n");
|
||||
buffer = lines.pop() || "";
|
||||
|
||||
for (const line of lines) {
|
||||
if (!line.startsWith("data:")) continue;
|
||||
|
||||
const jsonStr = line.slice(5).trim();
|
||||
if (!jsonStr) continue;
|
||||
|
||||
let chunk: CloudCodeAssistResponseChunk;
|
||||
try {
|
||||
chunk = JSON.parse(jsonStr);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Unwrap the response
|
||||
const responseData = chunk.response;
|
||||
if (!responseData) continue;
|
||||
|
||||
const candidate = responseData.candidates?.[0];
|
||||
if (candidate?.content?.parts) {
|
||||
for (const part of candidate.content.parts) {
|
||||
if (part.text !== undefined) {
|
||||
hasContent = true;
|
||||
const isThinking = isThinkingPart(part);
|
||||
if (
|
||||
!currentBlock ||
|
||||
(isThinking && currentBlock.type !== "thinking") ||
|
||||
(!isThinking && currentBlock.type !== "text")
|
||||
) {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blocks.length - 1,
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (isThinking) {
|
||||
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
||||
output.content.push(currentBlock);
|
||||
ensureStarted();
|
||||
stream.push({
|
||||
type: "thinking_start",
|
||||
contentIndex: blockIndex(),
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
ensureStarted();
|
||||
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
||||
}
|
||||
}
|
||||
if (currentBlock.type === "thinking") {
|
||||
currentBlock.thinking += part.text;
|
||||
currentBlock.thinkingSignature = retainThoughtSignature(
|
||||
currentBlock.thinkingSignature,
|
||||
part.thoughtSignature,
|
||||
);
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
currentBlock.text += part.text;
|
||||
currentBlock.textSignature = retainThoughtSignature(
|
||||
currentBlock.textSignature,
|
||||
part.thoughtSignature,
|
||||
);
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (part.functionCall) {
|
||||
hasContent = true;
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blocks.length - 1,
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
|
|
@ -506,143 +665,142 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock = null;
|
||||
}
|
||||
if (isThinking) {
|
||||
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
||||
} else {
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
||||
}
|
||||
}
|
||||
if (currentBlock.type === "thinking") {
|
||||
currentBlock.thinking += part.text;
|
||||
currentBlock.thinkingSignature = retainThoughtSignature(
|
||||
currentBlock.thinkingSignature,
|
||||
part.thoughtSignature,
|
||||
);
|
||||
|
||||
const providedId = part.functionCall.id;
|
||||
const needsNewId =
|
||||
!providedId ||
|
||||
output.content.some((b) => b.type === "toolCall" && b.id === providedId);
|
||||
const toolCallId = needsNewId
|
||||
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
||||
: providedId;
|
||||
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: toolCallId,
|
||||
name: part.functionCall.name || "",
|
||||
arguments: part.functionCall.args as Record<string, unknown>,
|
||||
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
||||
};
|
||||
|
||||
output.content.push(toolCall);
|
||||
ensureStarted();
|
||||
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
type: "toolcall_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
delta: JSON.stringify(toolCall.arguments),
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
currentBlock.text += part.text;
|
||||
currentBlock.textSignature = retainThoughtSignature(
|
||||
currentBlock.textSignature,
|
||||
part.thoughtSignature,
|
||||
);
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
type: "toolcall_end",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
toolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (part.functionCall) {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock = null;
|
||||
}
|
||||
|
||||
const providedId = part.functionCall.id;
|
||||
const needsNewId =
|
||||
!providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
|
||||
const toolCallId = needsNewId
|
||||
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
||||
: providedId;
|
||||
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: toolCallId,
|
||||
name: part.functionCall.name || "",
|
||||
arguments: part.functionCall.args as Record<string, unknown>,
|
||||
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
||||
};
|
||||
|
||||
output.content.push(toolCall);
|
||||
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: JSON.stringify(toolCall.arguments),
|
||||
partial: output,
|
||||
});
|
||||
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
|
||||
if (candidate?.finishReason) {
|
||||
output.stopReason = mapStopReasonString(candidate.finishReason);
|
||||
if (output.content.some((b) => b.type === "toolCall")) {
|
||||
output.stopReason = "toolUse";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (candidate?.finishReason) {
|
||||
output.stopReason = mapStopReasonString(candidate.finishReason);
|
||||
if (output.content.some((b) => b.type === "toolCall")) {
|
||||
output.stopReason = "toolUse";
|
||||
}
|
||||
}
|
||||
|
||||
if (responseData.usageMetadata) {
|
||||
// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
|
||||
const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
|
||||
const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
|
||||
output.usage = {
|
||||
input: promptTokens - cacheReadTokens,
|
||||
output:
|
||||
(responseData.usageMetadata.candidatesTokenCount || 0) +
|
||||
(responseData.usageMetadata.thoughtsTokenCount || 0),
|
||||
cacheRead: cacheReadTokens,
|
||||
cacheWrite: 0,
|
||||
totalTokens: responseData.usageMetadata.totalTokenCount || 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
if (responseData.usageMetadata) {
|
||||
// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
|
||||
const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
|
||||
const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
|
||||
output.usage = {
|
||||
input: promptTokens - cacheReadTokens,
|
||||
output:
|
||||
(responseData.usageMetadata.candidatesTokenCount || 0) +
|
||||
(responseData.usageMetadata.thoughtsTokenCount || 0),
|
||||
cacheRead: cacheReadTokens,
|
||||
cacheWrite: 0,
|
||||
total: 0,
|
||||
},
|
||||
};
|
||||
calculateCost(model, output.usage);
|
||||
totalTokens: responseData.usageMetadata.totalTokenCount || 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
total: 0,
|
||||
},
|
||||
};
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
options?.signal?.removeEventListener("abort", abortHandler);
|
||||
}
|
||||
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return hasContent;
|
||||
};
|
||||
|
||||
let receivedContent = false;
|
||||
let currentResponse = response;
|
||||
|
||||
for (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
if (emptyAttempt > 0) {
|
||||
const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
|
||||
await sleep(backoffMs, options?.signal);
|
||||
|
||||
if (!requestUrl) {
|
||||
throw new Error("Missing request URL");
|
||||
}
|
||||
|
||||
currentResponse = await fetch(requestUrl, {
|
||||
method: "POST",
|
||||
headers: requestHeaders,
|
||||
body: requestBodyJson,
|
||||
signal: options?.signal,
|
||||
});
|
||||
|
||||
if (!currentResponse.ok) {
|
||||
const retryErrorText = await currentResponse.text();
|
||||
throw new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);
|
||||
}
|
||||
}
|
||||
|
||||
const streamed = await streamResponse(currentResponse);
|
||||
if (streamed) {
|
||||
receivedContent = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {
|
||||
resetOutput();
|
||||
}
|
||||
} finally {
|
||||
options?.signal?.removeEventListener("abort", abortHandler);
|
||||
}
|
||||
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
if (!receivedContent) {
|
||||
throw new Error("Cloud Code Assist API returned an empty response");
|
||||
}
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
|
|
@ -671,7 +829,34 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|||
return stream;
|
||||
};
|
||||
|
||||
function buildRequest(
|
||||
function deriveSessionId(context: Context): string | undefined {
|
||||
for (const message of context.messages) {
|
||||
if (message.role !== "user") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let text = "";
|
||||
if (typeof message.content === "string") {
|
||||
text = message.content;
|
||||
} else if (Array.isArray(message.content)) {
|
||||
text = message.content
|
||||
.filter((item): item is TextContent => item.type === "text")
|
||||
.map((item) => item.text)
|
||||
.join("\n");
|
||||
}
|
||||
|
||||
if (!text || text.trim().length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const hash = createHash("sha256").update(text).digest("hex");
|
||||
return hash.slice(0, 32);
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function buildRequest(
|
||||
model: Model<"google-gemini-cli">,
|
||||
context: Context,
|
||||
projectId: string,
|
||||
|
|
@ -706,6 +891,11 @@ function buildRequest(
|
|||
contents,
|
||||
};
|
||||
|
||||
const sessionId = deriveSessionId(context);
|
||||
if (sessionId) {
|
||||
request.sessionId = sessionId;
|
||||
}
|
||||
|
||||
// System instruction must be object with parts, not plain string
|
||||
if (context.systemPrompt) {
|
||||
request.systemInstruction = {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,103 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
|
||||
import type { Context, Model } from "../src/types.js";
|
||||
|
||||
const originalFetch = global.fetch;
|
||||
const apiKey = JSON.stringify({ token: "token", projectId: "project" });
|
||||
|
||||
const createSseResponse = () => {
|
||||
const sse = `${[
|
||||
`data: ${JSON.stringify({
|
||||
response: {
|
||||
candidates: [
|
||||
{
|
||||
content: { role: "model", parts: [{ text: "Hello" }] },
|
||||
finishReason: "STOP",
|
||||
},
|
||||
],
|
||||
},
|
||||
})}`,
|
||||
].join("\n\n")}\n\n`;
|
||||
|
||||
const encoder = new TextEncoder();
|
||||
const stream = new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
controller.enqueue(encoder.encode(sse));
|
||||
controller.close();
|
||||
},
|
||||
});
|
||||
|
||||
return new Response(stream, {
|
||||
status: 200,
|
||||
headers: { "content-type": "text/event-stream" },
|
||||
});
|
||||
};
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe("google-gemini-cli Claude thinking header", () => {
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
|
||||
};
|
||||
|
||||
it("adds anthropic-beta for Claude thinking models", async () => {
|
||||
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
|
||||
const headers = new Headers(init?.headers);
|
||||
expect(headers.get("anthropic-beta")).toBe("interleaved-thinking-2025-05-14");
|
||||
return createSseResponse();
|
||||
});
|
||||
|
||||
global.fetch = fetchMock as typeof fetch;
|
||||
|
||||
const model: Model<"google-gemini-cli"> = {
|
||||
id: "claude-opus-4-5-thinking",
|
||||
name: "Claude Opus 4.5 Thinking",
|
||||
api: "google-gemini-cli",
|
||||
provider: "google-antigravity",
|
||||
baseUrl: "https://cloudcode-pa.googleapis.com",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 8192,
|
||||
};
|
||||
|
||||
const stream = streamGoogleGeminiCli(model, context, { apiKey });
|
||||
for await (const _event of stream) {
|
||||
// exhaust stream
|
||||
}
|
||||
await stream.result();
|
||||
});
|
||||
|
||||
it("does not add anthropic-beta for Gemini models", async () => {
|
||||
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
|
||||
const headers = new Headers(init?.headers);
|
||||
expect(headers.has("anthropic-beta")).toBe(false);
|
||||
return createSseResponse();
|
||||
});
|
||||
|
||||
global.fetch = fetchMock as typeof fetch;
|
||||
|
||||
const model: Model<"google-gemini-cli"> = {
|
||||
id: "gemini-2.5-flash",
|
||||
name: "Gemini 2.5 Flash",
|
||||
api: "google-gemini-cli",
|
||||
provider: "google-gemini-cli",
|
||||
baseUrl: "https://cloudcode-pa.googleapis.com",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 8192,
|
||||
};
|
||||
|
||||
const stream = streamGoogleGeminiCli(model, context, { apiKey });
|
||||
for await (const _event of stream) {
|
||||
// exhaust stream
|
||||
}
|
||||
await stream.result();
|
||||
});
|
||||
});
|
||||
108
packages/ai/test/google-gemini-cli-empty-stream.test.ts
Normal file
108
packages/ai/test/google-gemini-cli-empty-stream.test.ts
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
|
||||
import type { Context, Model } from "../src/types.js";
|
||||
|
||||
const originalFetch = global.fetch;
|
||||
|
||||
afterEach(() => {
|
||||
global.fetch = originalFetch;
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
describe("google-gemini-cli empty stream retry", () => {
|
||||
it("retries empty SSE responses without duplicate start", async () => {
|
||||
const emptyStream = new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
controller.close();
|
||||
},
|
||||
});
|
||||
|
||||
const sse = `${[
|
||||
`data: ${JSON.stringify({
|
||||
response: {
|
||||
candidates: [
|
||||
{
|
||||
content: { role: "model", parts: [{ text: "Hello" }] },
|
||||
finishReason: "STOP",
|
||||
},
|
||||
],
|
||||
usageMetadata: {
|
||||
promptTokenCount: 1,
|
||||
candidatesTokenCount: 1,
|
||||
totalTokenCount: 2,
|
||||
},
|
||||
},
|
||||
})}`,
|
||||
].join("\n\n")}\n\n`;
|
||||
|
||||
const encoder = new TextEncoder();
|
||||
const dataStream = new ReadableStream<Uint8Array>({
|
||||
start(controller) {
|
||||
controller.enqueue(encoder.encode(sse));
|
||||
controller.close();
|
||||
},
|
||||
});
|
||||
|
||||
let callCount = 0;
|
||||
const fetchMock = vi.fn(async () => {
|
||||
callCount += 1;
|
||||
if (callCount === 1) {
|
||||
return new Response(emptyStream, {
|
||||
status: 200,
|
||||
headers: { "content-type": "text/event-stream" },
|
||||
});
|
||||
}
|
||||
return new Response(dataStream, {
|
||||
status: 200,
|
||||
headers: { "content-type": "text/event-stream" },
|
||||
});
|
||||
});
|
||||
|
||||
global.fetch = fetchMock as typeof fetch;
|
||||
|
||||
const model: Model<"google-gemini-cli"> = {
|
||||
id: "gemini-2.5-flash",
|
||||
name: "Gemini 2.5 Flash",
|
||||
api: "google-gemini-cli",
|
||||
provider: "google-gemini-cli",
|
||||
baseUrl: "https://cloudcode-pa.googleapis.com",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 8192,
|
||||
};
|
||||
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
|
||||
};
|
||||
|
||||
const stream = streamGoogleGeminiCli(model, context, {
|
||||
apiKey: JSON.stringify({ token: "token", projectId: "project" }),
|
||||
});
|
||||
|
||||
let startCount = 0;
|
||||
let doneCount = 0;
|
||||
let text = "";
|
||||
|
||||
for await (const event of stream) {
|
||||
if (event.type === "start") {
|
||||
startCount += 1;
|
||||
}
|
||||
if (event.type === "done") {
|
||||
doneCount += 1;
|
||||
}
|
||||
if (event.type === "text_delta") {
|
||||
text += event.delta;
|
||||
}
|
||||
}
|
||||
|
||||
const result = await stream.result();
|
||||
|
||||
expect(text).toBe("Hello");
|
||||
expect(result.stopReason).toBe("stop");
|
||||
expect(startCount).toBe(1);
|
||||
expect(doneCount).toBe(1);
|
||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
53
packages/ai/test/google-gemini-cli-retry-delay.test.ts
Normal file
53
packages/ai/test/google-gemini-cli-retry-delay.test.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
import { extractRetryDelay } from "../src/providers/google-gemini-cli.js";
|
||||
|
||||
describe("extractRetryDelay header parsing", () => {
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
});
|
||||
|
||||
it("prefers Retry-After seconds header", () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
|
||||
|
||||
const response = new Response("", { headers: { "Retry-After": "5" } });
|
||||
const delay = extractRetryDelay("Please retry in 1s", response);
|
||||
|
||||
expect(delay).toBe(6000);
|
||||
});
|
||||
|
||||
it("parses Retry-After HTTP date header", () => {
|
||||
vi.useFakeTimers();
|
||||
const now = new Date("2025-01-01T00:00:00Z");
|
||||
vi.setSystemTime(now);
|
||||
|
||||
const retryAt = new Date(now.getTime() + 12000).toUTCString();
|
||||
const response = new Response("", { headers: { "Retry-After": retryAt } });
|
||||
const delay = extractRetryDelay("", response);
|
||||
|
||||
expect(delay).toBe(13000);
|
||||
});
|
||||
|
||||
it("parses x-ratelimit-reset header", () => {
|
||||
vi.useFakeTimers();
|
||||
const now = new Date("2025-01-01T00:00:00Z");
|
||||
vi.setSystemTime(now);
|
||||
|
||||
const resetAtMs = now.getTime() + 20000;
|
||||
const resetSeconds = Math.floor(resetAtMs / 1000).toString();
|
||||
const response = new Response("", { headers: { "x-ratelimit-reset": resetSeconds } });
|
||||
const delay = extractRetryDelay("", response);
|
||||
|
||||
expect(delay).toBe(21000);
|
||||
});
|
||||
|
||||
it("parses x-ratelimit-reset-after header", () => {
|
||||
vi.useFakeTimers();
|
||||
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
|
||||
|
||||
const response = new Response("", { headers: { "x-ratelimit-reset-after": "30" } });
|
||||
const delay = extractRetryDelay("", response);
|
||||
|
||||
expect(delay).toBe(31000);
|
||||
});
|
||||
});
|
||||
50
packages/ai/test/google-gemini-cli-session-id.test.ts
Normal file
50
packages/ai/test/google-gemini-cli-session-id.test.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
import { createHash } from "node:crypto";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { buildRequest } from "../src/providers/google-gemini-cli.js";
|
||||
import type { Context, Model } from "../src/types.js";
|
||||
|
||||
const model: Model<"google-gemini-cli"> = {
|
||||
id: "gemini-2.5-flash",
|
||||
name: "Gemini 2.5 Flash",
|
||||
api: "google-gemini-cli",
|
||||
provider: "google-gemini-cli",
|
||||
baseUrl: "https://cloudcode-pa.googleapis.com",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 8192,
|
||||
};
|
||||
|
||||
describe("buildRequest sessionId", () => {
|
||||
it("derives sessionId from the first user message", () => {
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{ role: "user", content: "First message", timestamp: Date.now() },
|
||||
{ role: "user", content: "Second message", timestamp: Date.now() },
|
||||
],
|
||||
};
|
||||
|
||||
const result = buildRequest(model, context, "project-id");
|
||||
const expected = createHash("sha256").update("First message").digest("hex").slice(0, 32);
|
||||
|
||||
expect(result.request.sessionId).toBe(expected);
|
||||
});
|
||||
|
||||
it("omits sessionId when the first user message has no text", () => {
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [{ type: "image", data: "Zm9v", mimeType: "image/png" }],
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
{ role: "user", content: "Later text", timestamp: Date.now() },
|
||||
],
|
||||
};
|
||||
|
||||
const result = buildRequest(model, context, "project-id");
|
||||
|
||||
expect(result.request.sessionId).toBeUndefined();
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue