diff --git a/packages/ai/src/providers/google.ts b/packages/ai/src/providers/google.ts index 8fcd9ef3..3b1dc894 100644 --- a/packages/ai/src/providers/google.ts +++ b/packages/ai/src/providers/google.ts @@ -6,6 +6,7 @@ import { type GenerateContentParameters, GoogleGenAI, type Part, + type Schema, type ThinkingConfig, type ThinkingLevel, } from "@google/genai"; @@ -14,6 +15,7 @@ import type { Api, AssistantMessage, Context, + ImageContent, Model, StopReason, StreamFunction, @@ -238,7 +240,12 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = ( stream.push({ type: "done", reason: output.stopReason, message: output }); stream.end(); } catch (error) { - for (const block of output.content) delete (block as any).index; + // Remove internal index property used during streaming + for (const block of output.content) { + if ("index" in block) { + delete (block as { index?: number }).index; + } + } output.stopReason = options?.signal?.aborted ? "aborted" : "error"; output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error); stream.push({ type: "error", reason: output.stopReason, error: output }); @@ -389,33 +396,33 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context) const parts: Part[] = []; // Extract text and image content - const textResult = msg.content - .filter((c) => c.type === "text") - .map((c) => (c as any).text) - .join("\n"); - const imageBlocks = model.input.includes("image") ? msg.content.filter((c) => c.type === "image") : []; + const textContent = msg.content.filter((c): c is TextContent => c.type === "text"); + const textResult = textContent.map((c) => c.text).join("\n"); + const imageContent = model.input.includes("image") + ? msg.content.filter((c): c is ImageContent => c.type === "image") + : []; // Always add functionResponse with text result (or placeholder if only images) const hasText = textResult.length > 0; - const hasImages = imageBlocks.length > 0; + const hasImages = imageContent.length > 0; + + // Use "output" key for success, "error" key for errors as per SDK documentation + const responseValue = hasText ? sanitizeSurrogates(textResult) : hasImages ? "(see attached image)" : ""; parts.push({ functionResponse: { id: msg.toolCallId, name: msg.toolName, - response: { - result: hasText ? sanitizeSurrogates(textResult) : hasImages ? "(see attached image)" : "", - isError: msg.isError, - }, + response: msg.isError ? { error: responseValue } : { output: responseValue }, }, }); // Add any images as inlineData parts - for (const imageBlock of imageBlocks) { + for (const imageBlock of imageContent) { parts.push({ inlineData: { - mimeType: (imageBlock as any).mimeType, - data: (imageBlock as any).data, + mimeType: imageBlock.mimeType, + data: imageBlock.data, }, }); } @@ -430,14 +437,16 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context) return contents; } -function convertTools(tools: Tool[]): any[] | undefined { +function convertTools( + tools: Tool[], +): { functionDeclarations: { name: string; description?: string; parameters: Schema }[] }[] | undefined { if (tools.length === 0) return undefined; return [ { functionDeclarations: tools.map((tool) => ({ name: tool.name, description: tool.description, - parameters: tool.parameters as any, // TypeBox already generates JSON Schema + parameters: tool.parameters as Schema, // TypeBox generates JSON Schema compatible with SDK Schema type })), }, ]; diff --git a/packages/ai/test/gemini-3-flash-tool-calling.test.ts b/packages/ai/test/gemini-3-flash-tool-calling.test.ts new file mode 100644 index 00000000..a0d9d370 --- /dev/null +++ b/packages/ai/test/gemini-3-flash-tool-calling.test.ts @@ -0,0 +1,166 @@ +import { Type } from "@sinclair/typebox"; +import { describe, expect, it } from "vitest"; +import { getModel } from "../src/models.js"; +import { complete } from "../src/stream.js"; +import type { Context, Tool, ToolResultMessage } from "../src/types.js"; +import { StringEnum } from "../src/utils/typebox-helpers.js"; + +/** + * Test for Gemini 3 Flash Preview tool calling compatibility. + * + * Issue #213: The model works and tool calling works, but the problem is how pi-ai + * formats the tool result message when sending it back to Gemini 3 Flash Preview. + * + * The SDK documentation states: + * "Use 'output' key to specify function output and 'error' key to specify error details" + * + * But the code was using `result` and `isError` keys, which Gemini 3 Flash Preview + * rejects (older models were more lenient). + */ + +// Calculator tool definition +const calculatorSchema = Type.Object({ + a: Type.Number({ description: "First number" }), + b: Type.Number({ description: "Second number" }), + operation: StringEnum(["add", "subtract", "multiply", "divide"], { + description: "The operation to perform. One of 'add', 'subtract', 'multiply', 'divide'.", + }), +}); + +const calculatorTool: Tool = { + name: "calculator", + description: "Perform basic arithmetic operations", + parameters: calculatorSchema, +}; + +describe("Gemini 3 Flash Preview Tool Calling", () => { + it("should handle tool calls and tool results with correct format", async () => { + if (!process.env.GEMINI_API_KEY) { + console.log("Skipping test - GEMINI_API_KEY not set"); + return; + } + + const model = getModel("google", "gemini-3-flash-preview"); + + const context: Context = { + systemPrompt: "You are a helpful assistant that uses tools when asked.", + messages: [ + { + role: "user", + content: "Calculate 15 + 27 using the calculator tool.", + timestamp: Date.now(), + }, + ], + tools: [calculatorTool], + }; + + // First call - model should request tool call + const firstResponse = await complete(model, context); + + expect(firstResponse.role).toBe("assistant"); + expect(firstResponse.stopReason).toBe("toolUse"); + expect(firstResponse.errorMessage).toBeFalsy(); + + const toolCall = firstResponse.content.find((b) => b.type === "toolCall"); + expect(toolCall).toBeTruthy(); + expect(toolCall?.type).toBe("toolCall"); + + if (toolCall?.type === "toolCall") { + expect(toolCall.name).toBe("calculator"); + expect(toolCall.id).toBeTruthy(); + expect(toolCall.arguments).toBeTruthy(); + + const { a, b, operation } = toolCall.arguments; + expect(a).toBe(15); + expect(b).toBe(27); + expect(operation).toBe("add"); + + // Execute the tool + const result = 15 + 27; + + // Add tool result to context - this is where the bug was + // The SDK expects { output: value } for success, not { result: value, isError: false } + context.messages.push(firstResponse); + const toolResult: ToolResultMessage = { + role: "toolResult", + toolCallId: toolCall.id, + toolName: toolCall.name, + content: [{ type: "text", text: `${result}` }], + isError: false, + timestamp: Date.now(), + }; + context.messages.push(toolResult); + + // Second call - model should process the tool result and respond + // This is where Gemini 3 Flash Preview would fail with the old format + const secondResponse = await complete(model, context); + + expect(secondResponse.role).toBe("assistant"); + expect(secondResponse.stopReason).toBe("stop"); + expect(secondResponse.errorMessage).toBeFalsy(); + + const textContent = secondResponse.content + .filter((b) => b.type === "text") + .map((b) => (b.type === "text" ? b.text : "")) + .join(""); + + expect(textContent).toBeTruthy(); + // Should mention the result 42 + expect(textContent.toLowerCase()).toMatch(/42/); + } + }, 30000); // 30 second timeout + + it("should handle tool errors with correct format", async () => { + if (!process.env.GEMINI_API_KEY) { + console.log("Skipping test - GEMINI_API_KEY not set"); + return; + } + + const model = getModel("google", "gemini-3-flash-preview"); + + const context: Context = { + systemPrompt: "You are a helpful assistant that uses tools when asked.", + messages: [ + { + role: "user", + content: "Calculate 10 divided by 0 using the calculator tool.", + timestamp: Date.now(), + }, + ], + tools: [calculatorTool], + }; + + const firstResponse = await complete(model, context); + expect(firstResponse.stopReason).toBe("toolUse"); + + const toolCall = firstResponse.content.find((b) => b.type === "toolCall"); + if (toolCall?.type === "toolCall") { + // Add error result - should use { error: message } format + context.messages.push(firstResponse); + const errorResult: ToolResultMessage = { + role: "toolResult", + toolCallId: toolCall.id, + toolName: toolCall.name, + content: [{ type: "text", text: "Error: Division by zero" }], + isError: true, + timestamp: Date.now(), + }; + context.messages.push(errorResult); + + // Model should handle the error response + const secondResponse = await complete(model, context); + + expect(secondResponse.role).toBe("assistant"); + expect(secondResponse.errorMessage).toBeFalsy(); + + const textContent = secondResponse.content + .filter((b) => b.type === "text") + .map((b) => (b.type === "text" ? b.text : "")) + .join(""); + + expect(textContent).toBeTruthy(); + // Should acknowledge the error + expect(textContent.toLowerCase()).toMatch(/error|cannot|division|zero/); + } + }, 30000); +});