mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-19 22:01:38 +00:00
Improve Gemini CLI provider retries and headers (#670)
Improve Gemini CLI provider retries and headers - Add Antigravity endpoint fallback (tries daily sandbox then prod when baseUrl is unset) - Parse retry delays from headers (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after) before body parsing - Derive stable sessionId from first user message for cache affinity - Retry empty SSE streams with backoff without duplicate start/done events - Add anthropic-beta header for Claude thinking models only
This commit is contained in:
parent
9e4ae98358
commit
ff15414258
5 changed files with 693 additions and 189 deletions
|
|
@ -4,6 +4,7 @@
|
||||||
* Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
|
* Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import { createHash } from "node:crypto";
|
||||||
import type { Content, ThinkingConfig } from "@google/genai";
|
import type { Content, ThinkingConfig } from "@google/genai";
|
||||||
import { calculateCost } from "../models.js";
|
import { calculateCost } from "../models.js";
|
||||||
import type {
|
import type {
|
||||||
|
|
@ -54,6 +55,8 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
|
||||||
}
|
}
|
||||||
|
|
||||||
const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
|
const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
|
||||||
|
const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
|
||||||
|
const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT] as const;
|
||||||
// Headers for Gemini CLI (prod endpoint)
|
// Headers for Gemini CLI (prod endpoint)
|
||||||
const GEMINI_CLI_HEADERS = {
|
const GEMINI_CLI_HEADERS = {
|
||||||
"User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
|
"User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
|
||||||
|
|
@ -163,16 +166,66 @@ let toolCallCounter = 0;
|
||||||
// Retry configuration
|
// Retry configuration
|
||||||
const MAX_RETRIES = 3;
|
const MAX_RETRIES = 3;
|
||||||
const BASE_DELAY_MS = 1000;
|
const BASE_DELAY_MS = 1000;
|
||||||
|
const MAX_EMPTY_STREAM_RETRIES = 2;
|
||||||
|
const EMPTY_STREAM_BASE_DELAY_MS = 500;
|
||||||
|
const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract retry delay from Gemini error response (in milliseconds).
|
* Extract retry delay from Gemini error response (in milliseconds).
|
||||||
* Parses patterns like:
|
* Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
|
||||||
|
* then parses body patterns like:
|
||||||
* - "Your quota will reset after 39s"
|
* - "Your quota will reset after 39s"
|
||||||
* - "Your quota will reset after 18h31m10s"
|
* - "Your quota will reset after 18h31m10s"
|
||||||
* - "Please retry in Xs" or "Please retry in Xms"
|
* - "Please retry in Xs" or "Please retry in Xms"
|
||||||
* - "retryDelay": "34.074824224s" (JSON field)
|
* - "retryDelay": "34.074824224s" (JSON field)
|
||||||
*/
|
*/
|
||||||
function extractRetryDelay(errorText: string): number | undefined {
|
export function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined {
|
||||||
|
const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
|
||||||
|
|
||||||
|
const headers = response instanceof Headers ? response : response?.headers;
|
||||||
|
if (headers) {
|
||||||
|
const retryAfter = headers.get("retry-after");
|
||||||
|
if (retryAfter) {
|
||||||
|
const retryAfterSeconds = Number(retryAfter);
|
||||||
|
if (Number.isFinite(retryAfterSeconds)) {
|
||||||
|
const delay = normalizeDelay(retryAfterSeconds * 1000);
|
||||||
|
if (delay !== undefined) {
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const retryAfterDate = new Date(retryAfter);
|
||||||
|
const retryAfterMs = retryAfterDate.getTime();
|
||||||
|
if (!Number.isNaN(retryAfterMs)) {
|
||||||
|
const delay = normalizeDelay(retryAfterMs - Date.now());
|
||||||
|
if (delay !== undefined) {
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const rateLimitReset = headers.get("x-ratelimit-reset");
|
||||||
|
if (rateLimitReset) {
|
||||||
|
const resetSeconds = Number.parseInt(rateLimitReset, 10);
|
||||||
|
if (!Number.isNaN(resetSeconds)) {
|
||||||
|
const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
|
||||||
|
if (delay !== undefined) {
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const rateLimitResetAfter = headers.get("x-ratelimit-reset-after");
|
||||||
|
if (rateLimitResetAfter) {
|
||||||
|
const resetAfterSeconds = Number(rateLimitResetAfter);
|
||||||
|
if (Number.isFinite(resetAfterSeconds)) {
|
||||||
|
const delay = normalizeDelay(resetAfterSeconds * 1000);
|
||||||
|
if (delay !== undefined) {
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
|
// Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
|
||||||
const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
|
const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
|
||||||
if (durationMatch) {
|
if (durationMatch) {
|
||||||
|
|
@ -181,8 +234,9 @@ function extractRetryDelay(errorText: string): number | undefined {
|
||||||
const seconds = parseFloat(durationMatch[3]);
|
const seconds = parseFloat(durationMatch[3]);
|
||||||
if (!Number.isNaN(seconds)) {
|
if (!Number.isNaN(seconds)) {
|
||||||
const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
|
const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
|
||||||
if (totalMs > 0) {
|
const delay = normalizeDelay(totalMs);
|
||||||
return Math.ceil(totalMs + 1000); // Add 1s buffer
|
if (delay !== undefined) {
|
||||||
|
return delay;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -193,7 +247,10 @@ function extractRetryDelay(errorText: string): number | undefined {
|
||||||
const value = parseFloat(retryInMatch[1]);
|
const value = parseFloat(retryInMatch[1]);
|
||||||
if (!Number.isNaN(value) && value > 0) {
|
if (!Number.isNaN(value) && value > 0) {
|
||||||
const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
||||||
return Math.ceil(ms + 1000);
|
const delay = normalizeDelay(ms);
|
||||||
|
if (delay !== undefined) {
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -203,13 +260,21 @@ function extractRetryDelay(errorText: string): number | undefined {
|
||||||
const value = parseFloat(retryDelayMatch[1]);
|
const value = parseFloat(retryDelayMatch[1]);
|
||||||
if (!Number.isNaN(value) && value > 0) {
|
if (!Number.isNaN(value) && value > 0) {
|
||||||
const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
|
||||||
return Math.ceil(ms + 1000);
|
const delay = normalizeDelay(ms);
|
||||||
|
if (delay !== undefined) {
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function isClaudeThinkingModel(modelId: string): boolean {
|
||||||
|
const normalized = modelId.toLowerCase();
|
||||||
|
return normalized.includes("claude") && normalized.includes("thinking");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if an error is retryable (rate limit, server error, network error, etc.)
|
* Check if an error is retryable (rate limit, server error, network error, etc.)
|
||||||
*/
|
*/
|
||||||
|
|
@ -258,6 +323,7 @@ interface CloudCodeAssistRequest {
|
||||||
model: string;
|
model: string;
|
||||||
request: {
|
request: {
|
||||||
contents: Content[];
|
contents: Content[];
|
||||||
|
sessionId?: string;
|
||||||
systemInstruction?: { role?: string; parts: { text: string }[] };
|
systemInstruction?: { role?: string; parts: { text: string }[] };
|
||||||
generationConfig?: {
|
generationConfig?: {
|
||||||
maxOutputTokens?: number;
|
maxOutputTokens?: number;
|
||||||
|
|
@ -355,17 +421,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
||||||
throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
|
throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
|
||||||
}
|
}
|
||||||
|
|
||||||
const endpoint = model.baseUrl || DEFAULT_ENDPOINT;
|
const isAntigravity = model.provider === "google-antigravity";
|
||||||
const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
const baseUrl = model.baseUrl?.trim();
|
||||||
|
const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
|
||||||
|
|
||||||
// Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
|
|
||||||
const isAntigravity = endpoint.includes("sandbox.googleapis.com");
|
|
||||||
const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
|
const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
|
||||||
const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
|
const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
|
||||||
|
|
||||||
|
const requestHeaders = {
|
||||||
|
Authorization: `Bearer ${accessToken}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
Accept: "text/event-stream",
|
||||||
|
...headers,
|
||||||
|
...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
|
||||||
|
};
|
||||||
|
const requestBodyJson = JSON.stringify(requestBody);
|
||||||
|
|
||||||
// Fetch with retry logic for rate limits and transient errors
|
// Fetch with retry logic for rate limits and transient errors
|
||||||
let response: Response | undefined;
|
let response: Response | undefined;
|
||||||
let lastError: Error | undefined;
|
let lastError: Error | undefined;
|
||||||
|
let requestUrl: string | undefined;
|
||||||
|
|
||||||
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
||||||
if (options?.signal?.aborted) {
|
if (options?.signal?.aborted) {
|
||||||
|
|
@ -373,15 +448,12 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
response = await fetch(url, {
|
const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
|
||||||
|
requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
|
||||||
|
response = await fetch(requestUrl, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: requestHeaders,
|
||||||
Authorization: `Bearer ${accessToken}`,
|
body: requestBodyJson,
|
||||||
"Content-Type": "application/json",
|
|
||||||
Accept: "text/event-stream",
|
|
||||||
...headers,
|
|
||||||
},
|
|
||||||
body: JSON.stringify(requestBody),
|
|
||||||
signal: options?.signal,
|
signal: options?.signal,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
@ -394,7 +466,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
||||||
// Check if retryable
|
// Check if retryable
|
||||||
if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
|
if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
|
||||||
// Use server-provided delay or exponential backoff
|
// Use server-provided delay or exponential backoff
|
||||||
const serverDelay = extractRetryDelay(errorText);
|
const serverDelay = extractRetryDelay(errorText, response);
|
||||||
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
|
const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
|
||||||
await sleep(delayMs, options?.signal);
|
await sleep(delayMs, options?.signal);
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -428,73 +500,160 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
||||||
throw lastError ?? new Error("Failed to get response after retries");
|
throw lastError ?? new Error("Failed to get response after retries");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!response.body) {
|
let started = false;
|
||||||
throw new Error("No response body");
|
const ensureStarted = () => {
|
||||||
}
|
if (!started) {
|
||||||
|
stream.push({ type: "start", partial: output });
|
||||||
stream.push({ type: "start", partial: output });
|
started = true;
|
||||||
|
}
|
||||||
let currentBlock: TextContent | ThinkingContent | null = null;
|
|
||||||
const blocks = output.content;
|
|
||||||
const blockIndex = () => blocks.length - 1;
|
|
||||||
|
|
||||||
// Read SSE stream
|
|
||||||
const reader = response.body.getReader();
|
|
||||||
const decoder = new TextDecoder();
|
|
||||||
let buffer = "";
|
|
||||||
|
|
||||||
// Set up abort handler to cancel reader when signal fires
|
|
||||||
const abortHandler = () => {
|
|
||||||
void reader.cancel().catch(() => {});
|
|
||||||
};
|
};
|
||||||
options?.signal?.addEventListener("abort", abortHandler);
|
|
||||||
|
|
||||||
try {
|
const resetOutput = () => {
|
||||||
while (true) {
|
output.content = [];
|
||||||
// Check abort signal before each read
|
output.usage = {
|
||||||
if (options?.signal?.aborted) {
|
input: 0,
|
||||||
throw new Error("Request was aborted");
|
output: 0,
|
||||||
}
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
totalTokens: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
|
};
|
||||||
|
output.stopReason = "stop";
|
||||||
|
output.errorMessage = undefined;
|
||||||
|
output.timestamp = Date.now();
|
||||||
|
started = false;
|
||||||
|
};
|
||||||
|
|
||||||
const { done, value } = await reader.read();
|
const streamResponse = async (activeResponse: Response): Promise<boolean> => {
|
||||||
if (done) break;
|
if (!activeResponse.body) {
|
||||||
|
throw new Error("No response body");
|
||||||
|
}
|
||||||
|
|
||||||
buffer += decoder.decode(value, { stream: true });
|
let hasContent = false;
|
||||||
const lines = buffer.split("\n");
|
let currentBlock: TextContent | ThinkingContent | null = null;
|
||||||
buffer = lines.pop() || "";
|
const blocks = output.content;
|
||||||
|
const blockIndex = () => blocks.length - 1;
|
||||||
|
|
||||||
for (const line of lines) {
|
// Read SSE stream
|
||||||
if (!line.startsWith("data:")) continue;
|
const reader = activeResponse.body.getReader();
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
let buffer = "";
|
||||||
|
|
||||||
const jsonStr = line.slice(5).trim();
|
// Set up abort handler to cancel reader when signal fires
|
||||||
if (!jsonStr) continue;
|
const abortHandler = () => {
|
||||||
|
void reader.cancel().catch(() => {});
|
||||||
|
};
|
||||||
|
options?.signal?.addEventListener("abort", abortHandler);
|
||||||
|
|
||||||
let chunk: CloudCodeAssistResponseChunk;
|
try {
|
||||||
try {
|
while (true) {
|
||||||
chunk = JSON.parse(jsonStr);
|
// Check abort signal before each read
|
||||||
} catch {
|
if (options?.signal?.aborted) {
|
||||||
continue;
|
throw new Error("Request was aborted");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unwrap the response
|
const { done, value } = await reader.read();
|
||||||
const responseData = chunk.response;
|
if (done) break;
|
||||||
if (!responseData) continue;
|
|
||||||
|
|
||||||
const candidate = responseData.candidates?.[0];
|
buffer += decoder.decode(value, { stream: true });
|
||||||
if (candidate?.content?.parts) {
|
const lines = buffer.split("\n");
|
||||||
for (const part of candidate.content.parts) {
|
buffer = lines.pop() || "";
|
||||||
if (part.text !== undefined) {
|
|
||||||
const isThinking = isThinkingPart(part);
|
for (const line of lines) {
|
||||||
if (
|
if (!line.startsWith("data:")) continue;
|
||||||
!currentBlock ||
|
|
||||||
(isThinking && currentBlock.type !== "thinking") ||
|
const jsonStr = line.slice(5).trim();
|
||||||
(!isThinking && currentBlock.type !== "text")
|
if (!jsonStr) continue;
|
||||||
) {
|
|
||||||
|
let chunk: CloudCodeAssistResponseChunk;
|
||||||
|
try {
|
||||||
|
chunk = JSON.parse(jsonStr);
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unwrap the response
|
||||||
|
const responseData = chunk.response;
|
||||||
|
if (!responseData) continue;
|
||||||
|
|
||||||
|
const candidate = responseData.candidates?.[0];
|
||||||
|
if (candidate?.content?.parts) {
|
||||||
|
for (const part of candidate.content.parts) {
|
||||||
|
if (part.text !== undefined) {
|
||||||
|
hasContent = true;
|
||||||
|
const isThinking = isThinkingPart(part);
|
||||||
|
if (
|
||||||
|
!currentBlock ||
|
||||||
|
(isThinking && currentBlock.type !== "thinking") ||
|
||||||
|
(!isThinking && currentBlock.type !== "text")
|
||||||
|
) {
|
||||||
|
if (currentBlock) {
|
||||||
|
if (currentBlock.type === "text") {
|
||||||
|
stream.push({
|
||||||
|
type: "text_end",
|
||||||
|
contentIndex: blocks.length - 1,
|
||||||
|
content: currentBlock.text,
|
||||||
|
partial: output,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
stream.push({
|
||||||
|
type: "thinking_end",
|
||||||
|
contentIndex: blockIndex(),
|
||||||
|
content: currentBlock.thinking,
|
||||||
|
partial: output,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (isThinking) {
|
||||||
|
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
||||||
|
output.content.push(currentBlock);
|
||||||
|
ensureStarted();
|
||||||
|
stream.push({
|
||||||
|
type: "thinking_start",
|
||||||
|
contentIndex: blockIndex(),
|
||||||
|
partial: output,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
currentBlock = { type: "text", text: "" };
|
||||||
|
output.content.push(currentBlock);
|
||||||
|
ensureStarted();
|
||||||
|
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (currentBlock.type === "thinking") {
|
||||||
|
currentBlock.thinking += part.text;
|
||||||
|
currentBlock.thinkingSignature = retainThoughtSignature(
|
||||||
|
currentBlock.thinkingSignature,
|
||||||
|
part.thoughtSignature,
|
||||||
|
);
|
||||||
|
stream.push({
|
||||||
|
type: "thinking_delta",
|
||||||
|
contentIndex: blockIndex(),
|
||||||
|
delta: part.text,
|
||||||
|
partial: output,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
currentBlock.text += part.text;
|
||||||
|
currentBlock.textSignature = retainThoughtSignature(
|
||||||
|
currentBlock.textSignature,
|
||||||
|
part.thoughtSignature,
|
||||||
|
);
|
||||||
|
stream.push({
|
||||||
|
type: "text_delta",
|
||||||
|
contentIndex: blockIndex(),
|
||||||
|
delta: part.text,
|
||||||
|
partial: output,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (part.functionCall) {
|
||||||
|
hasContent = true;
|
||||||
if (currentBlock) {
|
if (currentBlock) {
|
||||||
if (currentBlock.type === "text") {
|
if (currentBlock.type === "text") {
|
||||||
stream.push({
|
stream.push({
|
||||||
type: "text_end",
|
type: "text_end",
|
||||||
contentIndex: blocks.length - 1,
|
contentIndex: blockIndex(),
|
||||||
content: currentBlock.text,
|
content: currentBlock.text,
|
||||||
partial: output,
|
partial: output,
|
||||||
});
|
});
|
||||||
|
|
@ -506,143 +665,142 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
||||||
partial: output,
|
partial: output,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
currentBlock = null;
|
||||||
}
|
}
|
||||||
if (isThinking) {
|
|
||||||
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
const providedId = part.functionCall.id;
|
||||||
output.content.push(currentBlock);
|
const needsNewId =
|
||||||
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
!providedId ||
|
||||||
} else {
|
output.content.some((b) => b.type === "toolCall" && b.id === providedId);
|
||||||
currentBlock = { type: "text", text: "" };
|
const toolCallId = needsNewId
|
||||||
output.content.push(currentBlock);
|
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
||||||
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
: providedId;
|
||||||
}
|
|
||||||
}
|
const toolCall: ToolCall = {
|
||||||
if (currentBlock.type === "thinking") {
|
type: "toolCall",
|
||||||
currentBlock.thinking += part.text;
|
id: toolCallId,
|
||||||
currentBlock.thinkingSignature = retainThoughtSignature(
|
name: part.functionCall.name || "",
|
||||||
currentBlock.thinkingSignature,
|
arguments: part.functionCall.args as Record<string, unknown>,
|
||||||
part.thoughtSignature,
|
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
||||||
);
|
};
|
||||||
|
|
||||||
|
output.content.push(toolCall);
|
||||||
|
ensureStarted();
|
||||||
|
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
||||||
stream.push({
|
stream.push({
|
||||||
type: "thinking_delta",
|
type: "toolcall_delta",
|
||||||
contentIndex: blockIndex(),
|
contentIndex: blockIndex(),
|
||||||
delta: part.text,
|
delta: JSON.stringify(toolCall.arguments),
|
||||||
partial: output,
|
partial: output,
|
||||||
});
|
});
|
||||||
} else {
|
|
||||||
currentBlock.text += part.text;
|
|
||||||
currentBlock.textSignature = retainThoughtSignature(
|
|
||||||
currentBlock.textSignature,
|
|
||||||
part.thoughtSignature,
|
|
||||||
);
|
|
||||||
stream.push({
|
stream.push({
|
||||||
type: "text_delta",
|
type: "toolcall_end",
|
||||||
contentIndex: blockIndex(),
|
contentIndex: blockIndex(),
|
||||||
delta: part.text,
|
toolCall,
|
||||||
partial: output,
|
partial: output,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (part.functionCall) {
|
if (candidate?.finishReason) {
|
||||||
if (currentBlock) {
|
output.stopReason = mapStopReasonString(candidate.finishReason);
|
||||||
if (currentBlock.type === "text") {
|
if (output.content.some((b) => b.type === "toolCall")) {
|
||||||
stream.push({
|
output.stopReason = "toolUse";
|
||||||
type: "text_end",
|
|
||||||
contentIndex: blockIndex(),
|
|
||||||
content: currentBlock.text,
|
|
||||||
partial: output,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
stream.push({
|
|
||||||
type: "thinking_end",
|
|
||||||
contentIndex: blockIndex(),
|
|
||||||
content: currentBlock.thinking,
|
|
||||||
partial: output,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
currentBlock = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
const providedId = part.functionCall.id;
|
|
||||||
const needsNewId =
|
|
||||||
!providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
|
|
||||||
const toolCallId = needsNewId
|
|
||||||
? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
|
|
||||||
: providedId;
|
|
||||||
|
|
||||||
const toolCall: ToolCall = {
|
|
||||||
type: "toolCall",
|
|
||||||
id: toolCallId,
|
|
||||||
name: part.functionCall.name || "",
|
|
||||||
arguments: part.functionCall.args as Record<string, unknown>,
|
|
||||||
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
|
|
||||||
};
|
|
||||||
|
|
||||||
output.content.push(toolCall);
|
|
||||||
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
|
||||||
stream.push({
|
|
||||||
type: "toolcall_delta",
|
|
||||||
contentIndex: blockIndex(),
|
|
||||||
delta: JSON.stringify(toolCall.arguments),
|
|
||||||
partial: output,
|
|
||||||
});
|
|
||||||
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (candidate?.finishReason) {
|
if (responseData.usageMetadata) {
|
||||||
output.stopReason = mapStopReasonString(candidate.finishReason);
|
// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
|
||||||
if (output.content.some((b) => b.type === "toolCall")) {
|
const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
|
||||||
output.stopReason = "toolUse";
|
const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
|
||||||
}
|
output.usage = {
|
||||||
}
|
input: promptTokens - cacheReadTokens,
|
||||||
|
output:
|
||||||
if (responseData.usageMetadata) {
|
(responseData.usageMetadata.candidatesTokenCount || 0) +
|
||||||
// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
|
(responseData.usageMetadata.thoughtsTokenCount || 0),
|
||||||
const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
|
cacheRead: cacheReadTokens,
|
||||||
const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
|
|
||||||
output.usage = {
|
|
||||||
input: promptTokens - cacheReadTokens,
|
|
||||||
output:
|
|
||||||
(responseData.usageMetadata.candidatesTokenCount || 0) +
|
|
||||||
(responseData.usageMetadata.thoughtsTokenCount || 0),
|
|
||||||
cacheRead: cacheReadTokens,
|
|
||||||
cacheWrite: 0,
|
|
||||||
totalTokens: responseData.usageMetadata.totalTokenCount || 0,
|
|
||||||
cost: {
|
|
||||||
input: 0,
|
|
||||||
output: 0,
|
|
||||||
cacheRead: 0,
|
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
total: 0,
|
totalTokens: responseData.usageMetadata.totalTokenCount || 0,
|
||||||
},
|
cost: {
|
||||||
};
|
input: 0,
|
||||||
calculateCost(model, output.usage);
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
total: 0,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
calculateCost(model, output.usage);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} finally {
|
||||||
|
options?.signal?.removeEventListener("abort", abortHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentBlock) {
|
||||||
|
if (currentBlock.type === "text") {
|
||||||
|
stream.push({
|
||||||
|
type: "text_end",
|
||||||
|
contentIndex: blockIndex(),
|
||||||
|
content: currentBlock.text,
|
||||||
|
partial: output,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
stream.push({
|
||||||
|
type: "thinking_end",
|
||||||
|
contentIndex: blockIndex(),
|
||||||
|
content: currentBlock.thinking,
|
||||||
|
partial: output,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return hasContent;
|
||||||
|
};
|
||||||
|
|
||||||
|
let receivedContent = false;
|
||||||
|
let currentResponse = response;
|
||||||
|
|
||||||
|
for (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {
|
||||||
|
if (options?.signal?.aborted) {
|
||||||
|
throw new Error("Request was aborted");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (emptyAttempt > 0) {
|
||||||
|
const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
|
||||||
|
await sleep(backoffMs, options?.signal);
|
||||||
|
|
||||||
|
if (!requestUrl) {
|
||||||
|
throw new Error("Missing request URL");
|
||||||
|
}
|
||||||
|
|
||||||
|
currentResponse = await fetch(requestUrl, {
|
||||||
|
method: "POST",
|
||||||
|
headers: requestHeaders,
|
||||||
|
body: requestBodyJson,
|
||||||
|
signal: options?.signal,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!currentResponse.ok) {
|
||||||
|
const retryErrorText = await currentResponse.text();
|
||||||
|
throw new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const streamed = await streamResponse(currentResponse);
|
||||||
|
if (streamed) {
|
||||||
|
receivedContent = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {
|
||||||
|
resetOutput();
|
||||||
}
|
}
|
||||||
} finally {
|
|
||||||
options?.signal?.removeEventListener("abort", abortHandler);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (currentBlock) {
|
if (!receivedContent) {
|
||||||
if (currentBlock.type === "text") {
|
throw new Error("Cloud Code Assist API returned an empty response");
|
||||||
stream.push({
|
|
||||||
type: "text_end",
|
|
||||||
contentIndex: blockIndex(),
|
|
||||||
content: currentBlock.text,
|
|
||||||
partial: output,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
stream.push({
|
|
||||||
type: "thinking_end",
|
|
||||||
contentIndex: blockIndex(),
|
|
||||||
content: currentBlock.thinking,
|
|
||||||
partial: output,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options?.signal?.aborted) {
|
if (options?.signal?.aborted) {
|
||||||
|
|
@ -671,7 +829,34 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
||||||
return stream;
|
return stream;
|
||||||
};
|
};
|
||||||
|
|
||||||
function buildRequest(
|
function deriveSessionId(context: Context): string | undefined {
|
||||||
|
for (const message of context.messages) {
|
||||||
|
if (message.role !== "user") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let text = "";
|
||||||
|
if (typeof message.content === "string") {
|
||||||
|
text = message.content;
|
||||||
|
} else if (Array.isArray(message.content)) {
|
||||||
|
text = message.content
|
||||||
|
.filter((item): item is TextContent => item.type === "text")
|
||||||
|
.map((item) => item.text)
|
||||||
|
.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!text || text.trim().length === 0) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
const hash = createHash("sha256").update(text).digest("hex");
|
||||||
|
return hash.slice(0, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildRequest(
|
||||||
model: Model<"google-gemini-cli">,
|
model: Model<"google-gemini-cli">,
|
||||||
context: Context,
|
context: Context,
|
||||||
projectId: string,
|
projectId: string,
|
||||||
|
|
@ -706,6 +891,11 @@ function buildRequest(
|
||||||
contents,
|
contents,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const sessionId = deriveSessionId(context);
|
||||||
|
if (sessionId) {
|
||||||
|
request.sessionId = sessionId;
|
||||||
|
}
|
||||||
|
|
||||||
// System instruction must be object with parts, not plain string
|
// System instruction must be object with parts, not plain string
|
||||||
if (context.systemPrompt) {
|
if (context.systemPrompt) {
|
||||||
request.systemInstruction = {
|
request.systemInstruction = {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,103 @@
|
||||||
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||||
|
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
|
||||||
|
import type { Context, Model } from "../src/types.js";
|
||||||
|
|
||||||
|
const originalFetch = global.fetch;
|
||||||
|
const apiKey = JSON.stringify({ token: "token", projectId: "project" });
|
||||||
|
|
||||||
|
const createSseResponse = () => {
|
||||||
|
const sse = `${[
|
||||||
|
`data: ${JSON.stringify({
|
||||||
|
response: {
|
||||||
|
candidates: [
|
||||||
|
{
|
||||||
|
content: { role: "model", parts: [{ text: "Hello" }] },
|
||||||
|
finishReason: "STOP",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
})}`,
|
||||||
|
].join("\n\n")}\n\n`;
|
||||||
|
|
||||||
|
const encoder = new TextEncoder();
|
||||||
|
const stream = new ReadableStream<Uint8Array>({
|
||||||
|
start(controller) {
|
||||||
|
controller.enqueue(encoder.encode(sse));
|
||||||
|
controller.close();
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
return new Response(stream, {
|
||||||
|
status: 200,
|
||||||
|
headers: { "content-type": "text/event-stream" },
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
global.fetch = originalFetch;
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("google-gemini-cli Claude thinking header", () => {
|
||||||
|
const context: Context = {
|
||||||
|
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
|
||||||
|
};
|
||||||
|
|
||||||
|
it("adds anthropic-beta for Claude thinking models", async () => {
|
||||||
|
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
|
||||||
|
const headers = new Headers(init?.headers);
|
||||||
|
expect(headers.get("anthropic-beta")).toBe("interleaved-thinking-2025-05-14");
|
||||||
|
return createSseResponse();
|
||||||
|
});
|
||||||
|
|
||||||
|
global.fetch = fetchMock as typeof fetch;
|
||||||
|
|
||||||
|
const model: Model<"google-gemini-cli"> = {
|
||||||
|
id: "claude-opus-4-5-thinking",
|
||||||
|
name: "Claude Opus 4.5 Thinking",
|
||||||
|
api: "google-gemini-cli",
|
||||||
|
provider: "google-antigravity",
|
||||||
|
baseUrl: "https://cloudcode-pa.googleapis.com",
|
||||||
|
reasoning: true,
|
||||||
|
input: ["text"],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
contextWindow: 128000,
|
||||||
|
maxTokens: 8192,
|
||||||
|
};
|
||||||
|
|
||||||
|
const stream = streamGoogleGeminiCli(model, context, { apiKey });
|
||||||
|
for await (const _event of stream) {
|
||||||
|
// exhaust stream
|
||||||
|
}
|
||||||
|
await stream.result();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not add anthropic-beta for Gemini models", async () => {
|
||||||
|
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
|
||||||
|
const headers = new Headers(init?.headers);
|
||||||
|
expect(headers.has("anthropic-beta")).toBe(false);
|
||||||
|
return createSseResponse();
|
||||||
|
});
|
||||||
|
|
||||||
|
global.fetch = fetchMock as typeof fetch;
|
||||||
|
|
||||||
|
const model: Model<"google-gemini-cli"> = {
|
||||||
|
id: "gemini-2.5-flash",
|
||||||
|
name: "Gemini 2.5 Flash",
|
||||||
|
api: "google-gemini-cli",
|
||||||
|
provider: "google-gemini-cli",
|
||||||
|
baseUrl: "https://cloudcode-pa.googleapis.com",
|
||||||
|
reasoning: false,
|
||||||
|
input: ["text"],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
contextWindow: 128000,
|
||||||
|
maxTokens: 8192,
|
||||||
|
};
|
||||||
|
|
||||||
|
const stream = streamGoogleGeminiCli(model, context, { apiKey });
|
||||||
|
for await (const _event of stream) {
|
||||||
|
// exhaust stream
|
||||||
|
}
|
||||||
|
await stream.result();
|
||||||
|
});
|
||||||
|
});
|
||||||
108
packages/ai/test/google-gemini-cli-empty-stream.test.ts
Normal file
108
packages/ai/test/google-gemini-cli-empty-stream.test.ts
Normal file
|
|
@ -0,0 +1,108 @@
|
||||||
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||||
|
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
|
||||||
|
import type { Context, Model } from "../src/types.js";
|
||||||
|
|
||||||
|
const originalFetch = global.fetch;
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
global.fetch = originalFetch;
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("google-gemini-cli empty stream retry", () => {
|
||||||
|
it("retries empty SSE responses without duplicate start", async () => {
|
||||||
|
const emptyStream = new ReadableStream<Uint8Array>({
|
||||||
|
start(controller) {
|
||||||
|
controller.close();
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const sse = `${[
|
||||||
|
`data: ${JSON.stringify({
|
||||||
|
response: {
|
||||||
|
candidates: [
|
||||||
|
{
|
||||||
|
content: { role: "model", parts: [{ text: "Hello" }] },
|
||||||
|
finishReason: "STOP",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
usageMetadata: {
|
||||||
|
promptTokenCount: 1,
|
||||||
|
candidatesTokenCount: 1,
|
||||||
|
totalTokenCount: 2,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})}`,
|
||||||
|
].join("\n\n")}\n\n`;
|
||||||
|
|
||||||
|
const encoder = new TextEncoder();
|
||||||
|
const dataStream = new ReadableStream<Uint8Array>({
|
||||||
|
start(controller) {
|
||||||
|
controller.enqueue(encoder.encode(sse));
|
||||||
|
controller.close();
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
let callCount = 0;
|
||||||
|
const fetchMock = vi.fn(async () => {
|
||||||
|
callCount += 1;
|
||||||
|
if (callCount === 1) {
|
||||||
|
return new Response(emptyStream, {
|
||||||
|
status: 200,
|
||||||
|
headers: { "content-type": "text/event-stream" },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return new Response(dataStream, {
|
||||||
|
status: 200,
|
||||||
|
headers: { "content-type": "text/event-stream" },
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
global.fetch = fetchMock as typeof fetch;
|
||||||
|
|
||||||
|
const model: Model<"google-gemini-cli"> = {
|
||||||
|
id: "gemini-2.5-flash",
|
||||||
|
name: "Gemini 2.5 Flash",
|
||||||
|
api: "google-gemini-cli",
|
||||||
|
provider: "google-gemini-cli",
|
||||||
|
baseUrl: "https://cloudcode-pa.googleapis.com",
|
||||||
|
reasoning: false,
|
||||||
|
input: ["text"],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
contextWindow: 128000,
|
||||||
|
maxTokens: 8192,
|
||||||
|
};
|
||||||
|
|
||||||
|
const context: Context = {
|
||||||
|
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
|
||||||
|
};
|
||||||
|
|
||||||
|
const stream = streamGoogleGeminiCli(model, context, {
|
||||||
|
apiKey: JSON.stringify({ token: "token", projectId: "project" }),
|
||||||
|
});
|
||||||
|
|
||||||
|
let startCount = 0;
|
||||||
|
let doneCount = 0;
|
||||||
|
let text = "";
|
||||||
|
|
||||||
|
for await (const event of stream) {
|
||||||
|
if (event.type === "start") {
|
||||||
|
startCount += 1;
|
||||||
|
}
|
||||||
|
if (event.type === "done") {
|
||||||
|
doneCount += 1;
|
||||||
|
}
|
||||||
|
if (event.type === "text_delta") {
|
||||||
|
text += event.delta;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await stream.result();
|
||||||
|
|
||||||
|
expect(text).toBe("Hello");
|
||||||
|
expect(result.stopReason).toBe("stop");
|
||||||
|
expect(startCount).toBe(1);
|
||||||
|
expect(doneCount).toBe(1);
|
||||||
|
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||||
|
});
|
||||||
|
});
|
||||||
53
packages/ai/test/google-gemini-cli-retry-delay.test.ts
Normal file
53
packages/ai/test/google-gemini-cli-retry-delay.test.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||||
|
import { extractRetryDelay } from "../src/providers/google-gemini-cli.js";
|
||||||
|
|
||||||
|
describe("extractRetryDelay header parsing", () => {
|
||||||
|
afterEach(() => {
|
||||||
|
vi.useRealTimers();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("prefers Retry-After seconds header", () => {
|
||||||
|
vi.useFakeTimers();
|
||||||
|
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
|
||||||
|
|
||||||
|
const response = new Response("", { headers: { "Retry-After": "5" } });
|
||||||
|
const delay = extractRetryDelay("Please retry in 1s", response);
|
||||||
|
|
||||||
|
expect(delay).toBe(6000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("parses Retry-After HTTP date header", () => {
|
||||||
|
vi.useFakeTimers();
|
||||||
|
const now = new Date("2025-01-01T00:00:00Z");
|
||||||
|
vi.setSystemTime(now);
|
||||||
|
|
||||||
|
const retryAt = new Date(now.getTime() + 12000).toUTCString();
|
||||||
|
const response = new Response("", { headers: { "Retry-After": retryAt } });
|
||||||
|
const delay = extractRetryDelay("", response);
|
||||||
|
|
||||||
|
expect(delay).toBe(13000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("parses x-ratelimit-reset header", () => {
|
||||||
|
vi.useFakeTimers();
|
||||||
|
const now = new Date("2025-01-01T00:00:00Z");
|
||||||
|
vi.setSystemTime(now);
|
||||||
|
|
||||||
|
const resetAtMs = now.getTime() + 20000;
|
||||||
|
const resetSeconds = Math.floor(resetAtMs / 1000).toString();
|
||||||
|
const response = new Response("", { headers: { "x-ratelimit-reset": resetSeconds } });
|
||||||
|
const delay = extractRetryDelay("", response);
|
||||||
|
|
||||||
|
expect(delay).toBe(21000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("parses x-ratelimit-reset-after header", () => {
|
||||||
|
vi.useFakeTimers();
|
||||||
|
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
|
||||||
|
|
||||||
|
const response = new Response("", { headers: { "x-ratelimit-reset-after": "30" } });
|
||||||
|
const delay = extractRetryDelay("", response);
|
||||||
|
|
||||||
|
expect(delay).toBe(31000);
|
||||||
|
});
|
||||||
|
});
|
||||||
50
packages/ai/test/google-gemini-cli-session-id.test.ts
Normal file
50
packages/ai/test/google-gemini-cli-session-id.test.ts
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
import { createHash } from "node:crypto";
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
import { buildRequest } from "../src/providers/google-gemini-cli.js";
|
||||||
|
import type { Context, Model } from "../src/types.js";
|
||||||
|
|
||||||
|
const model: Model<"google-gemini-cli"> = {
|
||||||
|
id: "gemini-2.5-flash",
|
||||||
|
name: "Gemini 2.5 Flash",
|
||||||
|
api: "google-gemini-cli",
|
||||||
|
provider: "google-gemini-cli",
|
||||||
|
baseUrl: "https://cloudcode-pa.googleapis.com",
|
||||||
|
reasoning: false,
|
||||||
|
input: ["text"],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
contextWindow: 128000,
|
||||||
|
maxTokens: 8192,
|
||||||
|
};
|
||||||
|
|
||||||
|
describe("buildRequest sessionId", () => {
|
||||||
|
it("derives sessionId from the first user message", () => {
|
||||||
|
const context: Context = {
|
||||||
|
messages: [
|
||||||
|
{ role: "user", content: "First message", timestamp: Date.now() },
|
||||||
|
{ role: "user", content: "Second message", timestamp: Date.now() },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = buildRequest(model, context, "project-id");
|
||||||
|
const expected = createHash("sha256").update("First message").digest("hex").slice(0, 32);
|
||||||
|
|
||||||
|
expect(result.request.sessionId).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("omits sessionId when the first user message has no text", () => {
|
||||||
|
const context: Context = {
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [{ type: "image", data: "Zm9v", mimeType: "image/png" }],
|
||||||
|
timestamp: Date.now(),
|
||||||
|
},
|
||||||
|
{ role: "user", content: "Later text", timestamp: Date.now() },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = buildRequest(model, context, "project-id");
|
||||||
|
|
||||||
|
expect(result.request.sessionId).toBeUndefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Add table
Add a link
Reference in a new issue