mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 06:04:40 +00:00
fix(ai): batch tool-result images after consecutive tool results (#902)
Fixes 400 errors when reading multiple images via GitHub Copilot's Claude models. Claude requires tool_use -> tool_result adjacency with no user messages interleaved. Before: assistant(tool_calls) -> tool -> user(images) -> tool -> user(images) After: assistant(tool_calls) -> tool -> tool -> user(all images)
This commit is contained in:
parent
c083e195ad
commit
6289c144bf
2 changed files with 153 additions and 40 deletions
|
|
@ -23,6 +23,7 @@ import type {
|
|||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
ToolResultMessage,
|
||||
} from "../types.js";
|
||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||
|
|
@ -459,7 +460,7 @@ function maybeAddOpenRouterAnthropicCacheControl(
|
|||
}
|
||||
}
|
||||
|
||||
function convertMessages(
|
||||
export function convertMessages(
|
||||
model: Model<"openai-completions">,
|
||||
context: Context,
|
||||
compat: Required<OpenAICompletionsCompat>,
|
||||
|
|
@ -486,7 +487,8 @@ function convertMessages(
|
|||
|
||||
let lastRole: string | null = null;
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
for (let i = 0; i < transformedMessages.length; i++) {
|
||||
const msg = transformedMessages[i];
|
||||
// Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
|
||||
// Insert a synthetic assistant message to bridge the gap
|
||||
if (compat.requiresAssistantAfterToolResult && lastRole === "toolResult" && msg.role === "user") {
|
||||
|
|
@ -610,55 +612,71 @@ function convertMessages(
|
|||
}
|
||||
params.push(assistantMsg);
|
||||
} else if (msg.role === "toolResult") {
|
||||
// Extract text and image content
|
||||
const textResult = msg.content
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => (c as any).text)
|
||||
.join("\n");
|
||||
const hasImages = msg.content.some((c) => c.type === "image");
|
||||
const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
|
||||
let j = i;
|
||||
|
||||
// Always send tool result with text (or placeholder if only images)
|
||||
const hasText = textResult.length > 0;
|
||||
// Some providers (e.g. Mistral) require the 'name' field in tool results
|
||||
const toolResultMsg: ChatCompletionToolMessageParam = {
|
||||
role: "tool",
|
||||
content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
|
||||
tool_call_id: msg.toolCallId,
|
||||
};
|
||||
if (compat.requiresToolResultName && msg.toolName) {
|
||||
(toolResultMsg as any).name = msg.toolName;
|
||||
}
|
||||
params.push(toolResultMsg);
|
||||
for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
|
||||
const toolMsg = transformedMessages[j] as ToolResultMessage;
|
||||
|
||||
// If there are images and model supports them, send a follow-up user message with images
|
||||
if (hasImages && model.input.includes("image")) {
|
||||
const contentBlocks: Array<
|
||||
{ type: "text"; text: string } | { type: "image_url"; image_url: { url: string } }
|
||||
> = [];
|
||||
// Extract text and image content
|
||||
const textResult = toolMsg.content
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => (c as any).text)
|
||||
.join("\n");
|
||||
const hasImages = toolMsg.content.some((c) => c.type === "image");
|
||||
|
||||
// Add text prefix
|
||||
contentBlocks.push({
|
||||
type: "text",
|
||||
text: "Attached image(s) from tool result:",
|
||||
});
|
||||
// Always send tool result with text (or placeholder if only images)
|
||||
const hasText = textResult.length > 0;
|
||||
// Some providers (e.g. Mistral) require the 'name' field in tool results
|
||||
const toolResultMsg: ChatCompletionToolMessageParam = {
|
||||
role: "tool",
|
||||
content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
|
||||
tool_call_id: toolMsg.toolCallId,
|
||||
};
|
||||
if (compat.requiresToolResultName && toolMsg.toolName) {
|
||||
(toolResultMsg as any).name = toolMsg.toolName;
|
||||
}
|
||||
params.push(toolResultMsg);
|
||||
|
||||
// Add images
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "image") {
|
||||
contentBlocks.push({
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
|
||||
},
|
||||
});
|
||||
if (hasImages && model.input.includes("image")) {
|
||||
for (const block of toolMsg.content) {
|
||||
if (block.type === "image") {
|
||||
imageBlocks.push({
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
i = j - 1;
|
||||
|
||||
if (imageBlocks.length > 0) {
|
||||
if (compat.requiresAssistantAfterToolResult) {
|
||||
params.push({
|
||||
role: "assistant",
|
||||
content: "I have processed the tool results.",
|
||||
});
|
||||
}
|
||||
|
||||
params.push({
|
||||
role: "user",
|
||||
content: contentBlocks,
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "Attached image(s) from tool result:",
|
||||
},
|
||||
...imageBlocks,
|
||||
],
|
||||
});
|
||||
lastRole = "user";
|
||||
} else {
|
||||
lastRole = "toolResult";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
lastRole = msg.role;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,95 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import { getModel } from "../src/models.js";
|
||||
import { convertMessages } from "../src/providers/openai-completions.js";
|
||||
import type {
|
||||
AssistantMessage,
|
||||
Context,
|
||||
Model,
|
||||
OpenAICompletionsCompat,
|
||||
ToolResultMessage,
|
||||
Usage,
|
||||
} from "../src/types.js";
|
||||
|
||||
const emptyUsage: Usage = {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
|
||||
const compat: Required<OpenAICompletionsCompat> = {
|
||||
supportsStore: true,
|
||||
supportsDeveloperRole: true,
|
||||
supportsReasoningEffort: true,
|
||||
supportsUsageInStreaming: true,
|
||||
maxTokensField: "max_completion_tokens",
|
||||
requiresToolResultName: false,
|
||||
requiresAssistantAfterToolResult: false,
|
||||
requiresThinkingAsText: false,
|
||||
requiresMistralToolIds: false,
|
||||
thinkingFormat: "openai",
|
||||
};
|
||||
|
||||
function buildToolResult(toolCallId: string, timestamp: number): ToolResultMessage {
|
||||
return {
|
||||
role: "toolResult",
|
||||
toolCallId,
|
||||
toolName: "read",
|
||||
content: [
|
||||
{ type: "text", text: "Read image file [image/png]" },
|
||||
{ type: "image", data: "ZmFrZQ==", mimeType: "image/png" },
|
||||
],
|
||||
isError: false,
|
||||
timestamp,
|
||||
};
|
||||
}
|
||||
|
||||
describe("openai-completions convertMessages", () => {
|
||||
it("batches tool-result images after consecutive tool results", () => {
|
||||
const baseModel = getModel("openai", "gpt-4o-mini");
|
||||
const model: Model<"openai-completions"> = {
|
||||
...baseModel,
|
||||
api: "openai-completions",
|
||||
input: ["text", "image"],
|
||||
};
|
||||
|
||||
const now = Date.now();
|
||||
const assistantMessage: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "toolCall", id: "tool-1", name: "read", arguments: { path: "img-1.png" } },
|
||||
{ type: "toolCall", id: "tool-2", name: "read", arguments: { path: "img-2.png" } },
|
||||
],
|
||||
api: model.api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: emptyUsage,
|
||||
stopReason: "toolUse",
|
||||
timestamp: now,
|
||||
};
|
||||
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{ role: "user", content: "Read the images", timestamp: now - 2 },
|
||||
assistantMessage,
|
||||
buildToolResult("tool-1", now + 1),
|
||||
buildToolResult("tool-2", now + 2),
|
||||
],
|
||||
};
|
||||
|
||||
const messages = convertMessages(model, context, compat);
|
||||
const roles = messages.map((message) => message.role);
|
||||
expect(roles).toEqual(["user", "assistant", "tool", "tool", "user"]);
|
||||
|
||||
const imageMessage = messages[messages.length - 1];
|
||||
expect(imageMessage.role).toBe("user");
|
||||
expect(Array.isArray(imageMessage.content)).toBe(true);
|
||||
|
||||
const imageParts = (imageMessage.content as Array<{ type?: string }>).filter(
|
||||
(part) => part?.type === "image_url",
|
||||
);
|
||||
expect(imageParts.length).toBe(2);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue