Fix Gemini multimodal tool results causing flaky responses

For Gemini 3, images are now nested inside functionResponse.parts per the docs.
For older models, images are sent in a separate user message.

See: https://ai.google.dev/gemini-api/docs/function-calling#multimodal
This commit is contained in:
Mario Zechner 2025-12-21 02:41:27 +01:00
parent ce950ae96e
commit bf51dd4126
2 changed files with 29 additions and 19 deletions

View file

@ -1,6 +1,6 @@
# Changelog
## [Unreleased]
## [0.25.1] - 2025-12-21
### Added
@ -8,6 +8,8 @@
### Fixed
- **Gemini multimodal tool results**: Fixed images in tool results causing flaky/broken responses with Gemini models. For Gemini 3, images are now nested inside `functionResponse.parts` per the [docs](https://ai.google.dev/gemini-api/docs/function-calling#multimodal). For older models (which don't support multimodal function responses), images are sent in a separate user message.
- **Queued message steering**: When `getQueuedMessages` is provided, the agent loop now checks for queued user messages after each tool call and skips remaining tool calls in the current assistant message when a queued message arrives (emitting error tool results).
- **Double API version path in Google provider URL**: Fixed Gemini API calls returning 404 after baseUrl support was added. The SDK was appending its default apiVersion to baseUrl which already included the version path. ([#251](https://github.com/badlogic/pi-mono/pull/251) by [@shellfyred](https://github.com/shellfyred))

View file

@ -86,9 +86,6 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
parts,
});
} else if (msg.role === "toolResult") {
// Build parts array with functionResponse and/or images
const parts: Part[] = [];
// Extract text and image content
const textContent = msg.content.filter((c): c is TextContent => c.type === "text");
const textResult = textContent.map((c) => c.text).join("\n");
@ -96,40 +93,51 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
? msg.content.filter((c): c is ImageContent => c.type === "image")
: [];
// Always add functionResponse with text result (or placeholder if only images)
const hasText = textResult.length > 0;
const hasImages = imageContent.length > 0;
// Gemini 3 supports multimodal function responses with images nested inside functionResponse.parts
// See: https://ai.google.dev/gemini-api/docs/function-calling#multimodal
// Older models don't support this, so we put images in a separate user message.
const supportsMultimodalFunctionResponse = model.id.includes("gemini-3");
// Use "output" key for success, "error" key for errors as per SDK documentation
const responseValue = hasText ? sanitizeSurrogates(textResult) : hasImages ? "(see attached image)" : "";
parts.push({
const imageParts: Part[] = imageContent.map((imageBlock) => ({
inlineData: {
mimeType: imageBlock.mimeType,
data: imageBlock.data,
},
}));
const functionResponsePart: Part = {
functionResponse: {
id: msg.toolCallId,
name: msg.toolName,
response: msg.isError ? { error: responseValue } : { output: responseValue },
// Nest images inside functionResponse.parts for Gemini 3
...(hasImages && supportsMultimodalFunctionResponse && { parts: imageParts }),
},
});
// Add any images as inlineData parts
for (const imageBlock of imageContent) {
parts.push({
inlineData: {
mimeType: imageBlock.mimeType,
data: imageBlock.data,
},
});
}
};
// Cloud Code Assist API requires all function responses to be in a single user turn.
// Check if the last content is already a user turn with function responses and merge.
const lastContent = contents[contents.length - 1];
if (lastContent?.role === "user" && lastContent.parts?.some((p) => p.functionResponse)) {
lastContent.parts.push(...parts);
lastContent.parts.push(functionResponsePart);
} else {
contents.push({
role: "user",
parts,
parts: [functionResponsePart],
});
}
// For older models, add images in a separate user message
if (hasImages && !supportsMultimodalFunctionResponse) {
contents.push({
role: "user",
parts: [{ text: "Tool result image:" }, ...imageParts],
});
}
}