diff --git a/packages/ai/src/providers/google.ts b/packages/ai/src/providers/google.ts index 135d32ef..1d3e0ff6 100644 --- a/packages/ai/src/providers/google.ts +++ b/packages/ai/src/providers/google.ts @@ -16,6 +16,8 @@ import type { Message, Model, StopReason, + TextContent, + ThinkingContent, Tool, ToolCall, Usage, @@ -96,10 +98,8 @@ export class GoogleLLM implements LLM { const stream = await this.client.models.generateContentStream(params); - let content = ""; - let thinking = ""; - let thoughtSignature: string | undefined; - const toolCalls: ToolCall[] = []; + const blocks: AssistantMessage["content"] = []; + let currentBlock: TextContent | ThinkingContent | null = null; let usage: Usage = { input: 0, output: 0, @@ -108,8 +108,6 @@ export class GoogleLLM implements LLM { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; let stopReason: StopReason = "stop"; - let inTextBlock = false; - let inThinkingBlock = false; // Process the stream for await (const chunk of stream) { @@ -117,52 +115,69 @@ export class GoogleLLM implements LLM { const candidate = chunk.candidates?.[0]; if (candidate?.content?.parts) { for (const part of candidate.content.parts) { - // Cast to any to access thinking properties not yet in SDK types - const partWithThinking = part; - if (partWithThinking.text !== undefined) { - // Check if it's thinking content using the thought boolean flag - if (partWithThinking.thought === true) { - if (inTextBlock) { - options?.onText?.("", true); - inTextBlock = false; + if (part.text !== undefined) { + const isThinking = part.thought === true; + + // Check if we need to switch blocks + if ( + !currentBlock || + (isThinking && currentBlock.type !== "thinking") || + (!isThinking && currentBlock.type !== "text") + ) { + // Save and finalize current block + if (currentBlock) { + if (currentBlock.type === "text") { + options?.onEvent?.({ type: "text_end", content: currentBlock.text }); + } else { + options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking }); + } + blocks.push(currentBlock); } - thinking += partWithThinking.text; - options?.onThinking?.(partWithThinking.text, false); - inThinkingBlock = true; - // Capture thought signature if present - if (partWithThinking.thoughtSignature) { - thoughtSignature = partWithThinking.thoughtSignature; + + // Start new block + if (isThinking) { + currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined }; + options?.onEvent?.({ type: "thinking_start" }); + } else { + currentBlock = { type: "text", text: "" }; + options?.onEvent?.({ type: "text_start" }); } + } + + // Append content to current block + if (currentBlock.type === "thinking") { + currentBlock.thinking += part.text; + currentBlock.thinkingSignature = part.thoughtSignature; + options?.onEvent?.({type: "thinking_delta", content: currentBlock.thinking, delta: part.text }); } else { - if (inThinkingBlock) { - options?.onThinking?.("", true); - inThinkingBlock = false; - } - content += partWithThinking.text; - options?.onText?.(partWithThinking.text, false); - inTextBlock = true; + currentBlock.text += part.text; + options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: part.text }); } } // Handle function calls if (part.functionCall) { - if (inTextBlock) { - options?.onText?.("", true); - inTextBlock = false; - } - if (inThinkingBlock) { - options?.onThinking?.("", true); - inThinkingBlock = false; + // Save current block if exists + if (currentBlock) { + if (currentBlock.type === "text") { + options?.onEvent?.({ type: "text_end", content: currentBlock.text }); + } else { + options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking }); + } + blocks.push(currentBlock); + currentBlock = null; } - // Gemini doesn't provide tool call IDs, so we need to generate them - // Use the function name as part of the ID for better debugging - const toolCallId = `${part.functionCall.name}_${Date.now()}`; - toolCalls.push({ + // Add tool call + const toolCallId = part.functionCall.id || `${part.functionCall.name}_${Date.now()}`; + const toolCall: ToolCall = { + type: "toolCall", id: toolCallId, name: part.functionCall.name || "", arguments: part.functionCall.args as Record, - }); + }; + blocks.push(toolCall); + options?.onEvent?.({ type: "toolCall", toolCall }); } } } @@ -170,7 +185,8 @@ export class GoogleLLM implements LLM { // Map finish reason if (candidate?.finishReason) { stopReason = this.mapStopReason(candidate.finishReason); - if (toolCalls.length > 0) { + // Check if we have tool calls in blocks + if (blocks.some((b) => b.type === "toolCall")) { stopReason = "toolUse"; } } @@ -194,46 +210,32 @@ export class GoogleLLM implements LLM { } } - // Signal end of blocks - if (inTextBlock) { - options?.onText?.("", true); - } - if (inThinkingBlock) { - options?.onThinking?.("", true); + // Save final block if exists + if (currentBlock) { + if (currentBlock.type === "text") { + options?.onEvent?.({ type: "text_end", content: currentBlock.text }); + } else { + options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking }); + } + blocks.push(currentBlock); } - // Generate a thinking signature if we have thinking content but no signature from API - // This is needed for proper multi-turn conversations with thinking - if (thinking && !thoughtSignature) { - // Create a base64-encoded signature as Gemini expects - // In production, Gemini API should provide this - const encoder = new TextEncoder(); - const data = encoder.encode(thinking); - // Create a simple hash-like signature and encode to base64 - const signature = `gemini_thinking_${data.length}_${Date.now()}`; - thoughtSignature = Buffer.from(signature).toString("base64"); - } - - // Calculate cost calculateCost(this.model, usage); - // Usage metadata is in the last chunk - // Already captured during streaming - - return { + const output = { role: "assistant", - content: content || undefined, - thinking: thinking || undefined, - thinkingSignature: thoughtSignature, - toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + content: blocks, provider: this.model.provider, model: this.model.id, usage, stopReason, - }; + } satisfies AssistantMessage; + options?.onEvent?.({ type: "done", reason: stopReason, message: output }); + return output; } catch (error) { - return { + const output = { role: "assistant", + content: [], provider: this.model.provider, model: this.model.id, usage: { @@ -244,8 +246,10 @@ export class GoogleLLM implements LLM { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "error", - error: error instanceof Error ? error.message : String(error), - }; + error: error instanceof Error ? error.message : JSON.stringify(error), + } satisfies AssistantMessage; + options?.onEvent?.({ type: "error", error: output.error }); + return output; } } @@ -283,28 +287,23 @@ export class GoogleLLM implements LLM { } else if (msg.role === "assistant") { const parts: Part[] = []; - // Add thinking if present - // Note: We include thinkingSignature in our response for multi-turn context, - // but don't send it back to Gemini API as it may cause errors - if (msg.thinking) { - parts.push({ - text: msg.thinking, - thought: true, - // Don't include thoughtSignature when sending back to API - // thoughtSignature: msg.thinkingSignature, - }); - } - - if (msg.content) { - parts.push({ text: msg.content }); - } - - if (msg.toolCalls) { - for (const toolCall of msg.toolCalls) { + // Process content blocks + for (const block of msg.content) { + if (block.type === "text") { + parts.push({ text: block.text }); + } else if (block.type === "thinking") { + const thinkingPart: Part = { + thought: true, + thoughtSignature: block.thinkingSignature, + text: block.thinking, + }; + parts.push(thinkingPart); + } else if (block.type === "toolCall") { parts.push({ functionCall: { - name: toolCall.name, - args: toolCall.arguments, + id: block.id, + name: block.name, + args: block.arguments, }, }); } @@ -317,18 +316,16 @@ export class GoogleLLM implements LLM { }); } } else if (msg.role === "toolResult") { - // Tool results are sent as function responses - // Extract function name from the tool call ID (format: "functionName_timestamp") - const functionName = msg.toolCallId.substring(0, msg.toolCallId.lastIndexOf("_")); contents.push({ role: "user", parts: [ { functionResponse: { - name: functionName, + id: msg.toolCallId, + name: msg.toolName, response: { result: msg.content, - isError: msg.isError || false, + isError: msg.isError, }, }, },