fix(ai): batch tool-result images after consecutive tool results (#902)

Fixes 400 errors when reading multiple images via GitHub Copilot's
Claude models. Claude requires tool_use -> tool_result adjacency with
no user messages interleaved.

Before: assistant(tool_calls) -> tool -> user(images) -> tool -> user(images)
After:  assistant(tool_calls) -> tool -> tool -> user(all images)
This commit is contained in:
Michael Renner 2026-01-22 13:10:10 +01:00 committed by GitHub
parent c083e195ad
commit 6289c144bf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 153 additions and 40 deletions

View file

@ -23,6 +23,7 @@ import type {
ThinkingContent,
Tool,
ToolCall,
ToolResultMessage,
} from "../types.js";
import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js";
@ -459,7 +460,7 @@ function maybeAddOpenRouterAnthropicCacheControl(
}
}
function convertMessages(
export function convertMessages(
model: Model<"openai-completions">,
context: Context,
compat: Required<OpenAICompletionsCompat>,
@ -486,7 +487,8 @@ function convertMessages(
let lastRole: string | null = null;
for (const msg of transformedMessages) {
for (let i = 0; i < transformedMessages.length; i++) {
const msg = transformedMessages[i];
// Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
// Insert a synthetic assistant message to bridge the gap
if (compat.requiresAssistantAfterToolResult && lastRole === "toolResult" && msg.role === "user") {
@ -610,55 +612,71 @@ function convertMessages(
}
params.push(assistantMsg);
} else if (msg.role === "toolResult") {
// Extract text and image content
const textResult = msg.content
.filter((c) => c.type === "text")
.map((c) => (c as any).text)
.join("\n");
const hasImages = msg.content.some((c) => c.type === "image");
const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
let j = i;
// Always send tool result with text (or placeholder if only images)
const hasText = textResult.length > 0;
// Some providers (e.g. Mistral) require the 'name' field in tool results
const toolResultMsg: ChatCompletionToolMessageParam = {
role: "tool",
content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
tool_call_id: msg.toolCallId,
};
if (compat.requiresToolResultName && msg.toolName) {
(toolResultMsg as any).name = msg.toolName;
}
params.push(toolResultMsg);
for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
const toolMsg = transformedMessages[j] as ToolResultMessage;
// If there are images and model supports them, send a follow-up user message with images
if (hasImages && model.input.includes("image")) {
const contentBlocks: Array<
{ type: "text"; text: string } | { type: "image_url"; image_url: { url: string } }
> = [];
// Extract text and image content
const textResult = toolMsg.content
.filter((c) => c.type === "text")
.map((c) => (c as any).text)
.join("\n");
const hasImages = toolMsg.content.some((c) => c.type === "image");
// Add text prefix
contentBlocks.push({
type: "text",
text: "Attached image(s) from tool result:",
});
// Always send tool result with text (or placeholder if only images)
const hasText = textResult.length > 0;
// Some providers (e.g. Mistral) require the 'name' field in tool results
const toolResultMsg: ChatCompletionToolMessageParam = {
role: "tool",
content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
tool_call_id: toolMsg.toolCallId,
};
if (compat.requiresToolResultName && toolMsg.toolName) {
(toolResultMsg as any).name = toolMsg.toolName;
}
params.push(toolResultMsg);
// Add images
for (const block of msg.content) {
if (block.type === "image") {
contentBlocks.push({
type: "image_url",
image_url: {
url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
},
});
if (hasImages && model.input.includes("image")) {
for (const block of toolMsg.content) {
if (block.type === "image") {
imageBlocks.push({
type: "image_url",
image_url: {
url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
},
});
}
}
}
}
i = j - 1;
if (imageBlocks.length > 0) {
if (compat.requiresAssistantAfterToolResult) {
params.push({
role: "assistant",
content: "I have processed the tool results.",
});
}
params.push({
role: "user",
content: contentBlocks,
content: [
{
type: "text",
text: "Attached image(s) from tool result:",
},
...imageBlocks,
],
});
lastRole = "user";
} else {
lastRole = "toolResult";
}
continue;
}
lastRole = msg.role;

View file

@ -0,0 +1,95 @@
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { convertMessages } from "../src/providers/openai-completions.js";
import type {
AssistantMessage,
Context,
Model,
OpenAICompletionsCompat,
ToolResultMessage,
Usage,
} from "../src/types.js";
const emptyUsage: Usage = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
const compat: Required<OpenAICompletionsCompat> = {
supportsStore: true,
supportsDeveloperRole: true,
supportsReasoningEffort: true,
supportsUsageInStreaming: true,
maxTokensField: "max_completion_tokens",
requiresToolResultName: false,
requiresAssistantAfterToolResult: false,
requiresThinkingAsText: false,
requiresMistralToolIds: false,
thinkingFormat: "openai",
};
function buildToolResult(toolCallId: string, timestamp: number): ToolResultMessage {
return {
role: "toolResult",
toolCallId,
toolName: "read",
content: [
{ type: "text", text: "Read image file [image/png]" },
{ type: "image", data: "ZmFrZQ==", mimeType: "image/png" },
],
isError: false,
timestamp,
};
}
describe("openai-completions convertMessages", () => {
it("batches tool-result images after consecutive tool results", () => {
const baseModel = getModel("openai", "gpt-4o-mini");
const model: Model<"openai-completions"> = {
...baseModel,
api: "openai-completions",
input: ["text", "image"],
};
const now = Date.now();
const assistantMessage: AssistantMessage = {
role: "assistant",
content: [
{ type: "toolCall", id: "tool-1", name: "read", arguments: { path: "img-1.png" } },
{ type: "toolCall", id: "tool-2", name: "read", arguments: { path: "img-2.png" } },
],
api: model.api,
provider: model.provider,
model: model.id,
usage: emptyUsage,
stopReason: "toolUse",
timestamp: now,
};
const context: Context = {
messages: [
{ role: "user", content: "Read the images", timestamp: now - 2 },
assistantMessage,
buildToolResult("tool-1", now + 1),
buildToolResult("tool-2", now + 2),
],
};
const messages = convertMessages(model, context, compat);
const roles = messages.map((message) => message.role);
expect(roles).toEqual(["user", "assistant", "tool", "tool", "user"]);
const imageMessage = messages[messages.length - 1];
expect(imageMessage.role).toBe("user");
expect(Array.isArray(imageMessage.content)).toBe(true);
const imageParts = (imageMessage.content as Array<{ type?: string }>).filter(
(part) => part?.type === "image_url",
);
expect(imageParts.length).toBe(2);
});
});