diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 8fd65a81..a9ecf1a4 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## [Unreleased] +## [0.25.1] - 2025-12-21 ### Added @@ -8,6 +8,8 @@ ### Fixed +- **Gemini multimodal tool results**: Fixed images in tool results causing flaky/broken responses with Gemini models. For Gemini 3, images are now nested inside `functionResponse.parts` per the [docs](https://ai.google.dev/gemini-api/docs/function-calling#multimodal). For older models (which don't support multimodal function responses), images are sent in a separate user message. + - **Queued message steering**: When `getQueuedMessages` is provided, the agent loop now checks for queued user messages after each tool call and skips remaining tool calls in the current assistant message when a queued message arrives (emitting error tool results). - **Double API version path in Google provider URL**: Fixed Gemini API calls returning 404 after baseUrl support was added. The SDK was appending its default apiVersion to baseUrl which already included the version path. ([#251](https://github.com/badlogic/pi-mono/pull/251) by [@shellfyred](https://github.com/shellfyred)) diff --git a/packages/ai/src/providers/google-shared.ts b/packages/ai/src/providers/google-shared.ts index d32d171e..e8b00d2a 100644 --- a/packages/ai/src/providers/google-shared.ts +++ b/packages/ai/src/providers/google-shared.ts @@ -86,9 +86,6 @@ export function convertMessages(model: Model, contex parts, }); } else if (msg.role === "toolResult") { - // Build parts array with functionResponse and/or images - const parts: Part[] = []; - // Extract text and image content const textContent = msg.content.filter((c): c is TextContent => c.type === "text"); const textResult = textContent.map((c) => c.text).join("\n"); @@ -96,40 +93,51 @@ export function convertMessages(model: Model, contex ? msg.content.filter((c): c is ImageContent => c.type === "image") : []; - // Always add functionResponse with text result (or placeholder if only images) const hasText = textResult.length > 0; const hasImages = imageContent.length > 0; + // Gemini 3 supports multimodal function responses with images nested inside functionResponse.parts + // See: https://ai.google.dev/gemini-api/docs/function-calling#multimodal + // Older models don't support this, so we put images in a separate user message. + const supportsMultimodalFunctionResponse = model.id.includes("gemini-3"); + // Use "output" key for success, "error" key for errors as per SDK documentation const responseValue = hasText ? sanitizeSurrogates(textResult) : hasImages ? "(see attached image)" : ""; - parts.push({ + const imageParts: Part[] = imageContent.map((imageBlock) => ({ + inlineData: { + mimeType: imageBlock.mimeType, + data: imageBlock.data, + }, + })); + + const functionResponsePart: Part = { functionResponse: { id: msg.toolCallId, name: msg.toolName, response: msg.isError ? { error: responseValue } : { output: responseValue }, + // Nest images inside functionResponse.parts for Gemini 3 + ...(hasImages && supportsMultimodalFunctionResponse && { parts: imageParts }), }, - }); - - // Add any images as inlineData parts - for (const imageBlock of imageContent) { - parts.push({ - inlineData: { - mimeType: imageBlock.mimeType, - data: imageBlock.data, - }, - }); - } + }; // Cloud Code Assist API requires all function responses to be in a single user turn. // Check if the last content is already a user turn with function responses and merge. const lastContent = contents[contents.length - 1]; if (lastContent?.role === "user" && lastContent.parts?.some((p) => p.functionResponse)) { - lastContent.parts.push(...parts); + lastContent.parts.push(functionResponsePart); } else { contents.push({ role: "user", - parts, + parts: [functionResponsePart], + }); + } + + // For older models, add images in a separate user message + if (hasImages && !supportsMultimodalFunctionResponse) { + contents.push({ + role: "user", + parts: [{ text: "Tool result image:" }, ...imageParts], }); } }