mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-21 02:04:32 +00:00
Fix Gemini multimodal tool results causing flaky responses
For Gemini 3, images are now nested inside functionResponse.parts per the docs. For older models, images are sent in a separate user message. See: https://ai.google.dev/gemini-api/docs/function-calling#multimodal
This commit is contained in:
parent
ce950ae96e
commit
bf51dd4126
2 changed files with 29 additions and 19 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
## [Unreleased]
|
## [0.25.1] - 2025-12-21
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
|
|
@ -8,6 +8,8 @@
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
|
- **Gemini multimodal tool results**: Fixed images in tool results causing flaky/broken responses with Gemini models. For Gemini 3, images are now nested inside `functionResponse.parts` per the [docs](https://ai.google.dev/gemini-api/docs/function-calling#multimodal). For older models (which don't support multimodal function responses), images are sent in a separate user message.
|
||||||
|
|
||||||
- **Queued message steering**: When `getQueuedMessages` is provided, the agent loop now checks for queued user messages after each tool call and skips remaining tool calls in the current assistant message when a queued message arrives (emitting error tool results).
|
- **Queued message steering**: When `getQueuedMessages` is provided, the agent loop now checks for queued user messages after each tool call and skips remaining tool calls in the current assistant message when a queued message arrives (emitting error tool results).
|
||||||
|
|
||||||
- **Double API version path in Google provider URL**: Fixed Gemini API calls returning 404 after baseUrl support was added. The SDK was appending its default apiVersion to baseUrl which already included the version path. ([#251](https://github.com/badlogic/pi-mono/pull/251) by [@shellfyred](https://github.com/shellfyred))
|
- **Double API version path in Google provider URL**: Fixed Gemini API calls returning 404 after baseUrl support was added. The SDK was appending its default apiVersion to baseUrl which already included the version path. ([#251](https://github.com/badlogic/pi-mono/pull/251) by [@shellfyred](https://github.com/shellfyred))
|
||||||
|
|
|
||||||
|
|
@ -86,9 +86,6 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
|
||||||
parts,
|
parts,
|
||||||
});
|
});
|
||||||
} else if (msg.role === "toolResult") {
|
} else if (msg.role === "toolResult") {
|
||||||
// Build parts array with functionResponse and/or images
|
|
||||||
const parts: Part[] = [];
|
|
||||||
|
|
||||||
// Extract text and image content
|
// Extract text and image content
|
||||||
const textContent = msg.content.filter((c): c is TextContent => c.type === "text");
|
const textContent = msg.content.filter((c): c is TextContent => c.type === "text");
|
||||||
const textResult = textContent.map((c) => c.text).join("\n");
|
const textResult = textContent.map((c) => c.text).join("\n");
|
||||||
|
|
@ -96,40 +93,51 @@ export function convertMessages<T extends GoogleApiType>(model: Model<T>, contex
|
||||||
? msg.content.filter((c): c is ImageContent => c.type === "image")
|
? msg.content.filter((c): c is ImageContent => c.type === "image")
|
||||||
: [];
|
: [];
|
||||||
|
|
||||||
// Always add functionResponse with text result (or placeholder if only images)
|
|
||||||
const hasText = textResult.length > 0;
|
const hasText = textResult.length > 0;
|
||||||
const hasImages = imageContent.length > 0;
|
const hasImages = imageContent.length > 0;
|
||||||
|
|
||||||
|
// Gemini 3 supports multimodal function responses with images nested inside functionResponse.parts
|
||||||
|
// See: https://ai.google.dev/gemini-api/docs/function-calling#multimodal
|
||||||
|
// Older models don't support this, so we put images in a separate user message.
|
||||||
|
const supportsMultimodalFunctionResponse = model.id.includes("gemini-3");
|
||||||
|
|
||||||
// Use "output" key for success, "error" key for errors as per SDK documentation
|
// Use "output" key for success, "error" key for errors as per SDK documentation
|
||||||
const responseValue = hasText ? sanitizeSurrogates(textResult) : hasImages ? "(see attached image)" : "";
|
const responseValue = hasText ? sanitizeSurrogates(textResult) : hasImages ? "(see attached image)" : "";
|
||||||
|
|
||||||
parts.push({
|
const imageParts: Part[] = imageContent.map((imageBlock) => ({
|
||||||
|
inlineData: {
|
||||||
|
mimeType: imageBlock.mimeType,
|
||||||
|
data: imageBlock.data,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
|
||||||
|
const functionResponsePart: Part = {
|
||||||
functionResponse: {
|
functionResponse: {
|
||||||
id: msg.toolCallId,
|
id: msg.toolCallId,
|
||||||
name: msg.toolName,
|
name: msg.toolName,
|
||||||
response: msg.isError ? { error: responseValue } : { output: responseValue },
|
response: msg.isError ? { error: responseValue } : { output: responseValue },
|
||||||
|
// Nest images inside functionResponse.parts for Gemini 3
|
||||||
|
...(hasImages && supportsMultimodalFunctionResponse && { parts: imageParts }),
|
||||||
},
|
},
|
||||||
});
|
};
|
||||||
|
|
||||||
// Add any images as inlineData parts
|
|
||||||
for (const imageBlock of imageContent) {
|
|
||||||
parts.push({
|
|
||||||
inlineData: {
|
|
||||||
mimeType: imageBlock.mimeType,
|
|
||||||
data: imageBlock.data,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cloud Code Assist API requires all function responses to be in a single user turn.
|
// Cloud Code Assist API requires all function responses to be in a single user turn.
|
||||||
// Check if the last content is already a user turn with function responses and merge.
|
// Check if the last content is already a user turn with function responses and merge.
|
||||||
const lastContent = contents[contents.length - 1];
|
const lastContent = contents[contents.length - 1];
|
||||||
if (lastContent?.role === "user" && lastContent.parts?.some((p) => p.functionResponse)) {
|
if (lastContent?.role === "user" && lastContent.parts?.some((p) => p.functionResponse)) {
|
||||||
lastContent.parts.push(...parts);
|
lastContent.parts.push(functionResponsePart);
|
||||||
} else {
|
} else {
|
||||||
contents.push({
|
contents.push({
|
||||||
role: "user",
|
role: "user",
|
||||||
parts,
|
parts: [functionResponsePart],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// For older models, add images in a separate user message
|
||||||
|
if (hasImages && !supportsMultimodalFunctionResponse) {
|
||||||
|
contents.push({
|
||||||
|
role: "user",
|
||||||
|
parts: [{ text: "Tool result image:" }, ...imageParts],
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue