co-mono/packages/ai/src/providers/google-shared.ts

305 lines
11 KiB
TypeScript

/**
* Shared utilities for Google Generative AI and Google Cloud Code Assist providers.
*/
import { type Content, FinishReason, FunctionCallingConfigMode, type Part, type Schema } from "@google/genai";
import type { Context, ImageContent, Model, StopReason, TextContent, Tool } from "../types.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { transformMessages } from "./transform-messages.js";
type GoogleApiType = "google-generative-ai" | "google-gemini-cli" | "google-vertex";
/**
* Determines whether a streamed Gemini `Part` should be treated as "thinking".
*
* Protocol note (Gemini / Vertex AI thought signatures):
* - `thought: true` is the definitive marker for thinking content (thought summaries).
* - `thoughtSignature` is an encrypted representation of the model's internal thought process
* used to preserve reasoning context across multi-turn interactions.
* - `thoughtSignature` can appear on ANY part type (text, functionCall, etc.) - it does NOT
* indicate the part itself is thinking content.
* - For non-functionCall responses, the signature appears on the last part for context replay.
* - When persisting/replaying model outputs, signature-bearing parts must be preserved as-is;
* do not merge/move signatures across parts.
*
* See: https://ai.google.dev/gemini-api/docs/thought-signatures
*/
export function isThinkingPart(part: Pick<Part, "thought" | "thoughtSignature">): boolean {
return part.thought === true;
}
/**
* Retain thought signatures during streaming.
*
* Some backends only send `thoughtSignature` on the first delta for a given part/block; later deltas may omit it.
* This helper preserves the last non-empty signature for the current block.
*
* Note: this does NOT merge or move signatures across distinct response parts. It only prevents
* a signature from being overwritten with `undefined` within the same streamed block.
*/
export function retainThoughtSignature(existing: string | undefined, incoming: string | undefined): string | undefined {
if (typeof incoming === "string" && incoming.length > 0) return incoming;
return existing;
}
// Thought signatures must be base64 for Google APIs (TYPE_BYTES).
const base64SignaturePattern = /^[A-Za-z0-9+/]+={0,2}$/;
function isValidThoughtSignature(signature: string | undefined): boolean {
if (!signature) return false;
if (signature.length % 4 !== 0) return false;
return base64SignaturePattern.test(signature);
}
/**
* Only keep signatures from the same provider/model and with valid base64.
*/
function resolveThoughtSignature(isSameProviderAndModel: boolean, signature: string | undefined): string | undefined {
return isSameProviderAndModel && isValidThoughtSignature(signature) ? signature : undefined;
}
/**
* Claude models via Google APIs require explicit tool call IDs in function calls/responses.
*/
export function requiresToolCallId(modelId: string): boolean {
return modelId.startsWith("claude-");
}
/**
* Convert internal messages to Gemini Content[] format.
*/
export function convertMessages<T extends GoogleApiType>(model: Model<T>, context: Context): Content[] {
const contents: Content[] = [];
const transformedMessages = transformMessages(context.messages, model);
for (const msg of transformedMessages) {
if (msg.role === "user") {
if (typeof msg.content === "string") {
contents.push({
role: "user",
parts: [{ text: sanitizeSurrogates(msg.content) }],
});
} else {
const parts: Part[] = msg.content.map((item) => {
if (item.type === "text") {
return { text: sanitizeSurrogates(item.text) };
} else {
return {
inlineData: {
mimeType: item.mimeType,
data: item.data,
},
};
}
});
const filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts;
if (filteredParts.length === 0) continue;
contents.push({
role: "user",
parts: filteredParts,
});
}
} else if (msg.role === "assistant") {
const parts: Part[] = [];
// Check if message is from same provider and model - only then keep thinking blocks
const isSameProviderAndModel = msg.provider === model.provider && msg.model === model.id;
for (const block of msg.content) {
if (block.type === "text") {
// Skip empty text blocks - they can cause issues with some models (e.g. Claude via Antigravity)
if (!block.text || block.text.trim() === "") continue;
const thoughtSignature = resolveThoughtSignature(isSameProviderAndModel, block.textSignature);
parts.push({
text: sanitizeSurrogates(block.text),
...(thoughtSignature && { thoughtSignature }),
});
} else if (block.type === "thinking") {
// Skip empty thinking blocks
if (!block.thinking || block.thinking.trim() === "") continue;
// Only keep as thinking block if same provider AND same model
// Otherwise convert to plain text (no tags to avoid model mimicking them)
if (isSameProviderAndModel) {
const thoughtSignature = resolveThoughtSignature(isSameProviderAndModel, block.thinkingSignature);
parts.push({
thought: true,
text: sanitizeSurrogates(block.thinking),
...(thoughtSignature && { thoughtSignature }),
});
} else {
parts.push({
text: sanitizeSurrogates(block.thinking),
});
}
} else if (block.type === "toolCall") {
const thoughtSignature = resolveThoughtSignature(isSameProviderAndModel, block.thoughtSignature);
// Gemini 3 requires thoughtSignature on all function calls when thinking mode is enabled.
// When replaying history from providers without thought signatures (e.g. Claude via Antigravity),
// convert unsigned function calls to text to avoid API validation errors.
const isGemini3 = model.id.toLowerCase().includes("gemini-3");
if (isGemini3 && !thoughtSignature) {
const argsStr = JSON.stringify(block.arguments, null, 2);
parts.push({
text: `[Tool Call: ${block.name}]\nArguments: ${argsStr}`,
});
} else {
const part: Part = {
functionCall: {
name: block.name,
args: block.arguments,
...(requiresToolCallId(model.id) ? { id: block.id } : {}),
},
};
if (thoughtSignature) {
part.thoughtSignature = thoughtSignature;
}
parts.push(part);
}
}
}
if (parts.length === 0) continue;
contents.push({
role: "model",
parts,
});
} else if (msg.role === "toolResult") {
// Extract text and image content
const textContent = msg.content.filter((c): c is TextContent => c.type === "text");
const textResult = textContent.map((c) => c.text).join("\n");
const imageContent = model.input.includes("image")
? msg.content.filter((c): c is ImageContent => c.type === "image")
: [];
const hasText = textResult.length > 0;
const hasImages = imageContent.length > 0;
// Gemini 3 supports multimodal function responses with images nested inside functionResponse.parts
// See: https://ai.google.dev/gemini-api/docs/function-calling#multimodal
// Older models don't support this, so we put images in a separate user message.
const supportsMultimodalFunctionResponse = model.id.includes("gemini-3");
// Use "output" key for success, "error" key for errors as per SDK documentation
const responseValue = hasText ? sanitizeSurrogates(textResult) : hasImages ? "(see attached image)" : "";
const imageParts: Part[] = imageContent.map((imageBlock) => ({
inlineData: {
mimeType: imageBlock.mimeType,
data: imageBlock.data,
},
}));
const includeId = requiresToolCallId(model.id);
const functionResponsePart: Part = {
functionResponse: {
name: msg.toolName,
response: msg.isError ? { error: responseValue } : { output: responseValue },
// Nest images inside functionResponse.parts for Gemini 3
...(hasImages && supportsMultimodalFunctionResponse && { parts: imageParts }),
...(includeId ? { id: msg.toolCallId } : {}),
},
};
// Cloud Code Assist API requires all function responses to be in a single user turn.
// Check if the last content is already a user turn with function responses and merge.
const lastContent = contents[contents.length - 1];
if (lastContent?.role === "user" && lastContent.parts?.some((p) => p.functionResponse)) {
lastContent.parts.push(functionResponsePart);
} else {
contents.push({
role: "user",
parts: [functionResponsePart],
});
}
// For older models, add images in a separate user message
if (hasImages && !supportsMultimodalFunctionResponse) {
contents.push({
role: "user",
parts: [{ text: "Tool result image:" }, ...imageParts],
});
}
}
}
return contents;
}
/**
* Convert tools to Gemini function declarations format.
*/
export function convertTools(
tools: Tool[],
): { functionDeclarations: { name: string; description?: string; parameters: Schema }[] }[] | undefined {
if (tools.length === 0) return undefined;
return [
{
functionDeclarations: tools.map((tool) => ({
name: tool.name,
description: tool.description,
parameters: tool.parameters as Schema,
})),
},
];
}
/**
* Map tool choice string to Gemini FunctionCallingConfigMode.
*/
export function mapToolChoice(choice: string): FunctionCallingConfigMode {
switch (choice) {
case "auto":
return FunctionCallingConfigMode.AUTO;
case "none":
return FunctionCallingConfigMode.NONE;
case "any":
return FunctionCallingConfigMode.ANY;
default:
return FunctionCallingConfigMode.AUTO;
}
}
/**
* Map Gemini FinishReason to our StopReason.
*/
export function mapStopReason(reason: FinishReason): StopReason {
switch (reason) {
case FinishReason.STOP:
return "stop";
case FinishReason.MAX_TOKENS:
return "length";
case FinishReason.BLOCKLIST:
case FinishReason.PROHIBITED_CONTENT:
case FinishReason.SPII:
case FinishReason.SAFETY:
case FinishReason.IMAGE_SAFETY:
case FinishReason.IMAGE_PROHIBITED_CONTENT:
case FinishReason.IMAGE_RECITATION:
case FinishReason.IMAGE_OTHER:
case FinishReason.RECITATION:
case FinishReason.FINISH_REASON_UNSPECIFIED:
case FinishReason.OTHER:
case FinishReason.LANGUAGE:
case FinishReason.MALFORMED_FUNCTION_CALL:
case FinishReason.UNEXPECTED_TOOL_CALL:
case FinishReason.NO_IMAGE:
return "error";
default: {
const _exhaustive: never = reason;
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
}
}
}
/**
* Map string finish reason to our StopReason (for raw API responses).
*/
export function mapStopReasonString(reason: string): StopReason {
switch (reason) {
case "STOP":
return "stop";
case "MAX_TOKENS":
return "length";
default:
return "error";
}
}