From 6289c144bf7b4b45810ca15bf3e07f229338ca6a Mon Sep 17 00:00:00 2001 From: Michael Renner Date: Thu, 22 Jan 2026 13:10:10 +0100 Subject: [PATCH] fix(ai): batch tool-result images after consecutive tool results (#902) Fixes 400 errors when reading multiple images via GitHub Copilot's Claude models. Claude requires tool_use -> tool_result adjacency with no user messages interleaved. Before: assistant(tool_calls) -> tool -> user(images) -> tool -> user(images) After: assistant(tool_calls) -> tool -> tool -> user(all images) --- .../ai/src/providers/openai-completions.ts | 98 +++++++++++-------- ...nai-completions-tool-result-images.test.ts | 95 ++++++++++++++++++ 2 files changed, 153 insertions(+), 40 deletions(-) create mode 100644 packages/ai/test/openai-completions-tool-result-images.test.ts diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index 42576f0f..e410e43f 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -23,6 +23,7 @@ import type { ThinkingContent, Tool, ToolCall, + ToolResultMessage, } from "../types.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; @@ -459,7 +460,7 @@ function maybeAddOpenRouterAnthropicCacheControl( } } -function convertMessages( +export function convertMessages( model: Model<"openai-completions">, context: Context, compat: Required, @@ -486,7 +487,8 @@ function convertMessages( let lastRole: string | null = null; - for (const msg of transformedMessages) { + for (let i = 0; i < transformedMessages.length; i++) { + const msg = transformedMessages[i]; // Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results // Insert a synthetic assistant message to bridge the gap if (compat.requiresAssistantAfterToolResult && lastRole === "toolResult" && msg.role === "user") { @@ -610,55 +612,71 @@ function convertMessages( } params.push(assistantMsg); } else if (msg.role === "toolResult") { - // Extract text and image content - const textResult = msg.content - .filter((c) => c.type === "text") - .map((c) => (c as any).text) - .join("\n"); - const hasImages = msg.content.some((c) => c.type === "image"); + const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = []; + let j = i; - // Always send tool result with text (or placeholder if only images) - const hasText = textResult.length > 0; - // Some providers (e.g. Mistral) require the 'name' field in tool results - const toolResultMsg: ChatCompletionToolMessageParam = { - role: "tool", - content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"), - tool_call_id: msg.toolCallId, - }; - if (compat.requiresToolResultName && msg.toolName) { - (toolResultMsg as any).name = msg.toolName; - } - params.push(toolResultMsg); + for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) { + const toolMsg = transformedMessages[j] as ToolResultMessage; - // If there are images and model supports them, send a follow-up user message with images - if (hasImages && model.input.includes("image")) { - const contentBlocks: Array< - { type: "text"; text: string } | { type: "image_url"; image_url: { url: string } } - > = []; + // Extract text and image content + const textResult = toolMsg.content + .filter((c) => c.type === "text") + .map((c) => (c as any).text) + .join("\n"); + const hasImages = toolMsg.content.some((c) => c.type === "image"); - // Add text prefix - contentBlocks.push({ - type: "text", - text: "Attached image(s) from tool result:", - }); + // Always send tool result with text (or placeholder if only images) + const hasText = textResult.length > 0; + // Some providers (e.g. Mistral) require the 'name' field in tool results + const toolResultMsg: ChatCompletionToolMessageParam = { + role: "tool", + content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"), + tool_call_id: toolMsg.toolCallId, + }; + if (compat.requiresToolResultName && toolMsg.toolName) { + (toolResultMsg as any).name = toolMsg.toolName; + } + params.push(toolResultMsg); - // Add images - for (const block of msg.content) { - if (block.type === "image") { - contentBlocks.push({ - type: "image_url", - image_url: { - url: `data:${(block as any).mimeType};base64,${(block as any).data}`, - }, - }); + if (hasImages && model.input.includes("image")) { + for (const block of toolMsg.content) { + if (block.type === "image") { + imageBlocks.push({ + type: "image_url", + image_url: { + url: `data:${(block as any).mimeType};base64,${(block as any).data}`, + }, + }); + } } } + } + + i = j - 1; + + if (imageBlocks.length > 0) { + if (compat.requiresAssistantAfterToolResult) { + params.push({ + role: "assistant", + content: "I have processed the tool results.", + }); + } params.push({ role: "user", - content: contentBlocks, + content: [ + { + type: "text", + text: "Attached image(s) from tool result:", + }, + ...imageBlocks, + ], }); + lastRole = "user"; + } else { + lastRole = "toolResult"; } + continue; } lastRole = msg.role; diff --git a/packages/ai/test/openai-completions-tool-result-images.test.ts b/packages/ai/test/openai-completions-tool-result-images.test.ts new file mode 100644 index 00000000..3a909022 --- /dev/null +++ b/packages/ai/test/openai-completions-tool-result-images.test.ts @@ -0,0 +1,95 @@ +import { describe, expect, it } from "vitest"; +import { getModel } from "../src/models.js"; +import { convertMessages } from "../src/providers/openai-completions.js"; +import type { + AssistantMessage, + Context, + Model, + OpenAICompletionsCompat, + ToolResultMessage, + Usage, +} from "../src/types.js"; + +const emptyUsage: Usage = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, +}; + +const compat: Required = { + supportsStore: true, + supportsDeveloperRole: true, + supportsReasoningEffort: true, + supportsUsageInStreaming: true, + maxTokensField: "max_completion_tokens", + requiresToolResultName: false, + requiresAssistantAfterToolResult: false, + requiresThinkingAsText: false, + requiresMistralToolIds: false, + thinkingFormat: "openai", +}; + +function buildToolResult(toolCallId: string, timestamp: number): ToolResultMessage { + return { + role: "toolResult", + toolCallId, + toolName: "read", + content: [ + { type: "text", text: "Read image file [image/png]" }, + { type: "image", data: "ZmFrZQ==", mimeType: "image/png" }, + ], + isError: false, + timestamp, + }; +} + +describe("openai-completions convertMessages", () => { + it("batches tool-result images after consecutive tool results", () => { + const baseModel = getModel("openai", "gpt-4o-mini"); + const model: Model<"openai-completions"> = { + ...baseModel, + api: "openai-completions", + input: ["text", "image"], + }; + + const now = Date.now(); + const assistantMessage: AssistantMessage = { + role: "assistant", + content: [ + { type: "toolCall", id: "tool-1", name: "read", arguments: { path: "img-1.png" } }, + { type: "toolCall", id: "tool-2", name: "read", arguments: { path: "img-2.png" } }, + ], + api: model.api, + provider: model.provider, + model: model.id, + usage: emptyUsage, + stopReason: "toolUse", + timestamp: now, + }; + + const context: Context = { + messages: [ + { role: "user", content: "Read the images", timestamp: now - 2 }, + assistantMessage, + buildToolResult("tool-1", now + 1), + buildToolResult("tool-2", now + 2), + ], + }; + + const messages = convertMessages(model, context, compat); + const roles = messages.map((message) => message.role); + expect(roles).toEqual(["user", "assistant", "tool", "tool", "user"]); + + const imageMessage = messages[messages.length - 1]; + expect(imageMessage.role).toBe("user"); + expect(Array.isArray(imageMessage.content)).toBe(true); + + const imageParts = (imageMessage.content as Array<{ type?: string }>).filter( + (part) => part?.type === "image_url", + ); + expect(imageParts.length).toBe(2); + }); +});