co-mono/packages/ai/test/gemini-3-flash-tool-calling.test.ts
Mario Zechner 84018b0707 fix(ai): correct Gemini tool result format and improve type safety
- Fix tool result format for Gemini 3 Flash Preview compatibility
  - Use 'output' key for successful results (not 'result')
  - Use 'error' key for error results (not 'isError')
  - Per Google SDK documentation for FunctionResponse.response

- Improve type safety in google.ts provider
  - Add ImageContent import and use proper type guards
  - Replace 'as any' casts with proper typing
  - Import and use Schema type for tool parameters
  - Add proper typing for index deletion in error handler

- Add comprehensive test for Gemini 3 Flash tool calling
  - Tests successful tool call and result handling
  - Tests error tool result handling
  - Verifies fix for issue #213

Fixes #213
2025-12-18 13:43:39 +00:00

166 lines
5.3 KiB
TypeScript

import { Type } from "@sinclair/typebox";
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Context, Tool, ToolResultMessage } from "../src/types.js";
import { StringEnum } from "../src/utils/typebox-helpers.js";
/**
* Test for Gemini 3 Flash Preview tool calling compatibility.
*
* Issue #213: The model works and tool calling works, but the problem is how pi-ai
* formats the tool result message when sending it back to Gemini 3 Flash Preview.
*
* The SDK documentation states:
* "Use 'output' key to specify function output and 'error' key to specify error details"
*
* But the code was using `result` and `isError` keys, which Gemini 3 Flash Preview
* rejects (older models were more lenient).
*/
// Calculator tool definition
const calculatorSchema = Type.Object({
a: Type.Number({ description: "First number" }),
b: Type.Number({ description: "Second number" }),
operation: StringEnum(["add", "subtract", "multiply", "divide"], {
description: "The operation to perform. One of 'add', 'subtract', 'multiply', 'divide'.",
}),
});
const calculatorTool: Tool<typeof calculatorSchema> = {
name: "calculator",
description: "Perform basic arithmetic operations",
parameters: calculatorSchema,
};
describe("Gemini 3 Flash Preview Tool Calling", () => {
it("should handle tool calls and tool results with correct format", async () => {
if (!process.env.GEMINI_API_KEY) {
console.log("Skipping test - GEMINI_API_KEY not set");
return;
}
const model = getModel("google", "gemini-3-flash-preview");
const context: Context = {
systemPrompt: "You are a helpful assistant that uses tools when asked.",
messages: [
{
role: "user",
content: "Calculate 15 + 27 using the calculator tool.",
timestamp: Date.now(),
},
],
tools: [calculatorTool],
};
// First call - model should request tool call
const firstResponse = await complete(model, context);
expect(firstResponse.role).toBe("assistant");
expect(firstResponse.stopReason).toBe("toolUse");
expect(firstResponse.errorMessage).toBeFalsy();
const toolCall = firstResponse.content.find((b) => b.type === "toolCall");
expect(toolCall).toBeTruthy();
expect(toolCall?.type).toBe("toolCall");
if (toolCall?.type === "toolCall") {
expect(toolCall.name).toBe("calculator");
expect(toolCall.id).toBeTruthy();
expect(toolCall.arguments).toBeTruthy();
const { a, b, operation } = toolCall.arguments;
expect(a).toBe(15);
expect(b).toBe(27);
expect(operation).toBe("add");
// Execute the tool
const result = 15 + 27;
// Add tool result to context - this is where the bug was
// The SDK expects { output: value } for success, not { result: value, isError: false }
context.messages.push(firstResponse);
const toolResult: ToolResultMessage = {
role: "toolResult",
toolCallId: toolCall.id,
toolName: toolCall.name,
content: [{ type: "text", text: `${result}` }],
isError: false,
timestamp: Date.now(),
};
context.messages.push(toolResult);
// Second call - model should process the tool result and respond
// This is where Gemini 3 Flash Preview would fail with the old format
const secondResponse = await complete(model, context);
expect(secondResponse.role).toBe("assistant");
expect(secondResponse.stopReason).toBe("stop");
expect(secondResponse.errorMessage).toBeFalsy();
const textContent = secondResponse.content
.filter((b) => b.type === "text")
.map((b) => (b.type === "text" ? b.text : ""))
.join("");
expect(textContent).toBeTruthy();
// Should mention the result 42
expect(textContent.toLowerCase()).toMatch(/42/);
}
}, 30000); // 30 second timeout
it("should handle tool errors with correct format", async () => {
if (!process.env.GEMINI_API_KEY) {
console.log("Skipping test - GEMINI_API_KEY not set");
return;
}
const model = getModel("google", "gemini-3-flash-preview");
const context: Context = {
systemPrompt: "You are a helpful assistant that uses tools when asked.",
messages: [
{
role: "user",
content: "Calculate 10 divided by 0 using the calculator tool.",
timestamp: Date.now(),
},
],
tools: [calculatorTool],
};
const firstResponse = await complete(model, context);
expect(firstResponse.stopReason).toBe("toolUse");
const toolCall = firstResponse.content.find((b) => b.type === "toolCall");
if (toolCall?.type === "toolCall") {
// Add error result - should use { error: message } format
context.messages.push(firstResponse);
const errorResult: ToolResultMessage = {
role: "toolResult",
toolCallId: toolCall.id,
toolName: toolCall.name,
content: [{ type: "text", text: "Error: Division by zero" }],
isError: true,
timestamp: Date.now(),
};
context.messages.push(errorResult);
// Model should handle the error response
const secondResponse = await complete(model, context);
expect(secondResponse.role).toBe("assistant");
expect(secondResponse.errorMessage).toBeFalsy();
const textContent = secondResponse.content
.filter((b) => b.type === "text")
.map((b) => (b.type === "text" ? b.text : ""))
.join("");
expect(textContent).toBeTruthy();
// Should acknowledge the error
expect(textContent.toLowerCase()).toMatch(/error|cannot|division|zero/);
}
}, 30000);
});