fix(ai): batch tool-result images after consecutive tool results (#902)

Fixes 400 errors when reading multiple images via GitHub Copilot's
Claude models. Claude requires tool_use -> tool_result adjacency with
no user messages interleaved.

Before: assistant(tool_calls) -> tool -> user(images) -> tool -> user(images)
After:  assistant(tool_calls) -> tool -> tool -> user(all images)
This commit is contained in:
Michael Renner 2026-01-22 13:10:10 +01:00 committed by GitHub
parent c083e195ad
commit 6289c144bf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 153 additions and 40 deletions

View file

@ -23,6 +23,7 @@ import type {
ThinkingContent, ThinkingContent,
Tool, Tool,
ToolCall, ToolCall,
ToolResultMessage,
} from "../types.js"; } from "../types.js";
import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js"; import { parseStreamingJson } from "../utils/json-parse.js";
@ -459,7 +460,7 @@ function maybeAddOpenRouterAnthropicCacheControl(
} }
} }
function convertMessages( export function convertMessages(
model: Model<"openai-completions">, model: Model<"openai-completions">,
context: Context, context: Context,
compat: Required<OpenAICompletionsCompat>, compat: Required<OpenAICompletionsCompat>,
@ -486,7 +487,8 @@ function convertMessages(
let lastRole: string | null = null; let lastRole: string | null = null;
for (const msg of transformedMessages) { for (let i = 0; i < transformedMessages.length; i++) {
const msg = transformedMessages[i];
// Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results // Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
// Insert a synthetic assistant message to bridge the gap // Insert a synthetic assistant message to bridge the gap
if (compat.requiresAssistantAfterToolResult && lastRole === "toolResult" && msg.role === "user") { if (compat.requiresAssistantAfterToolResult && lastRole === "toolResult" && msg.role === "user") {
@ -610,12 +612,18 @@ function convertMessages(
} }
params.push(assistantMsg); params.push(assistantMsg);
} else if (msg.role === "toolResult") { } else if (msg.role === "toolResult") {
const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
let j = i;
for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
const toolMsg = transformedMessages[j] as ToolResultMessage;
// Extract text and image content // Extract text and image content
const textResult = msg.content const textResult = toolMsg.content
.filter((c) => c.type === "text") .filter((c) => c.type === "text")
.map((c) => (c as any).text) .map((c) => (c as any).text)
.join("\n"); .join("\n");
const hasImages = msg.content.some((c) => c.type === "image"); const hasImages = toolMsg.content.some((c) => c.type === "image");
// Always send tool result with text (or placeholder if only images) // Always send tool result with text (or placeholder if only images)
const hasText = textResult.length > 0; const hasText = textResult.length > 0;
@ -623,29 +631,17 @@ function convertMessages(
const toolResultMsg: ChatCompletionToolMessageParam = { const toolResultMsg: ChatCompletionToolMessageParam = {
role: "tool", role: "tool",
content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"), content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
tool_call_id: msg.toolCallId, tool_call_id: toolMsg.toolCallId,
}; };
if (compat.requiresToolResultName && msg.toolName) { if (compat.requiresToolResultName && toolMsg.toolName) {
(toolResultMsg as any).name = msg.toolName; (toolResultMsg as any).name = toolMsg.toolName;
} }
params.push(toolResultMsg); params.push(toolResultMsg);
// If there are images and model supports them, send a follow-up user message with images
if (hasImages && model.input.includes("image")) { if (hasImages && model.input.includes("image")) {
const contentBlocks: Array< for (const block of toolMsg.content) {
{ type: "text"; text: string } | { type: "image_url"; image_url: { url: string } }
> = [];
// Add text prefix
contentBlocks.push({
type: "text",
text: "Attached image(s) from tool result:",
});
// Add images
for (const block of msg.content) {
if (block.type === "image") { if (block.type === "image") {
contentBlocks.push({ imageBlocks.push({
type: "image_url", type: "image_url",
image_url: { image_url: {
url: `data:${(block as any).mimeType};base64,${(block as any).data}`, url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
@ -653,12 +649,34 @@ function convertMessages(
}); });
} }
} }
}
}
i = j - 1;
if (imageBlocks.length > 0) {
if (compat.requiresAssistantAfterToolResult) {
params.push({
role: "assistant",
content: "I have processed the tool results.",
});
}
params.push({ params.push({
role: "user", role: "user",
content: contentBlocks, content: [
{
type: "text",
text: "Attached image(s) from tool result:",
},
...imageBlocks,
],
}); });
lastRole = "user";
} else {
lastRole = "toolResult";
} }
continue;
} }
lastRole = msg.role; lastRole = msg.role;

View file

@ -0,0 +1,95 @@
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { convertMessages } from "../src/providers/openai-completions.js";
import type {
AssistantMessage,
Context,
Model,
OpenAICompletionsCompat,
ToolResultMessage,
Usage,
} from "../src/types.js";
const emptyUsage: Usage = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
const compat: Required<OpenAICompletionsCompat> = {
supportsStore: true,
supportsDeveloperRole: true,
supportsReasoningEffort: true,
supportsUsageInStreaming: true,
maxTokensField: "max_completion_tokens",
requiresToolResultName: false,
requiresAssistantAfterToolResult: false,
requiresThinkingAsText: false,
requiresMistralToolIds: false,
thinkingFormat: "openai",
};
function buildToolResult(toolCallId: string, timestamp: number): ToolResultMessage {
return {
role: "toolResult",
toolCallId,
toolName: "read",
content: [
{ type: "text", text: "Read image file [image/png]" },
{ type: "image", data: "ZmFrZQ==", mimeType: "image/png" },
],
isError: false,
timestamp,
};
}
describe("openai-completions convertMessages", () => {
it("batches tool-result images after consecutive tool results", () => {
const baseModel = getModel("openai", "gpt-4o-mini");
const model: Model<"openai-completions"> = {
...baseModel,
api: "openai-completions",
input: ["text", "image"],
};
const now = Date.now();
const assistantMessage: AssistantMessage = {
role: "assistant",
content: [
{ type: "toolCall", id: "tool-1", name: "read", arguments: { path: "img-1.png" } },
{ type: "toolCall", id: "tool-2", name: "read", arguments: { path: "img-2.png" } },
],
api: model.api,
provider: model.provider,
model: model.id,
usage: emptyUsage,
stopReason: "toolUse",
timestamp: now,
};
const context: Context = {
messages: [
{ role: "user", content: "Read the images", timestamp: now - 2 },
assistantMessage,
buildToolResult("tool-1", now + 1),
buildToolResult("tool-2", now + 2),
],
};
const messages = convertMessages(model, context, compat);
const roles = messages.map((message) => message.role);
expect(roles).toEqual(["user", "assistant", "tool", "tool", "user"]);
const imageMessage = messages[messages.length - 1];
expect(imageMessage.role).toBe("user");
expect(Array.isArray(imageMessage.content)).toBe(true);
const imageParts = (imageMessage.content as Array<{ type?: string }>).filter(
(part) => part?.type === "image_url",
);
expect(imageParts.length).toBe(2);
});
});