mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-21 23:04:41 +00:00
fix(ai): batch tool-result images after consecutive tool results (#902)
Fixes 400 errors when reading multiple images via GitHub Copilot's Claude models. Claude requires tool_use -> tool_result adjacency with no user messages interleaved. Before: assistant(tool_calls) -> tool -> user(images) -> tool -> user(images) After: assistant(tool_calls) -> tool -> tool -> user(all images)
This commit is contained in:
parent
c083e195ad
commit
6289c144bf
2 changed files with 153 additions and 40 deletions
|
|
@ -23,6 +23,7 @@ import type {
|
||||||
ThinkingContent,
|
ThinkingContent,
|
||||||
Tool,
|
Tool,
|
||||||
ToolCall,
|
ToolCall,
|
||||||
|
ToolResultMessage,
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
||||||
import { parseStreamingJson } from "../utils/json-parse.js";
|
import { parseStreamingJson } from "../utils/json-parse.js";
|
||||||
|
|
@ -459,7 +460,7 @@ function maybeAddOpenRouterAnthropicCacheControl(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function convertMessages(
|
export function convertMessages(
|
||||||
model: Model<"openai-completions">,
|
model: Model<"openai-completions">,
|
||||||
context: Context,
|
context: Context,
|
||||||
compat: Required<OpenAICompletionsCompat>,
|
compat: Required<OpenAICompletionsCompat>,
|
||||||
|
|
@ -486,7 +487,8 @@ function convertMessages(
|
||||||
|
|
||||||
let lastRole: string | null = null;
|
let lastRole: string | null = null;
|
||||||
|
|
||||||
for (const msg of transformedMessages) {
|
for (let i = 0; i < transformedMessages.length; i++) {
|
||||||
|
const msg = transformedMessages[i];
|
||||||
// Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
|
// Some providers (e.g. Mistral/Devstral) don't allow user messages directly after tool results
|
||||||
// Insert a synthetic assistant message to bridge the gap
|
// Insert a synthetic assistant message to bridge the gap
|
||||||
if (compat.requiresAssistantAfterToolResult && lastRole === "toolResult" && msg.role === "user") {
|
if (compat.requiresAssistantAfterToolResult && lastRole === "toolResult" && msg.role === "user") {
|
||||||
|
|
@ -610,12 +612,18 @@ function convertMessages(
|
||||||
}
|
}
|
||||||
params.push(assistantMsg);
|
params.push(assistantMsg);
|
||||||
} else if (msg.role === "toolResult") {
|
} else if (msg.role === "toolResult") {
|
||||||
|
const imageBlocks: Array<{ type: "image_url"; image_url: { url: string } }> = [];
|
||||||
|
let j = i;
|
||||||
|
|
||||||
|
for (; j < transformedMessages.length && transformedMessages[j].role === "toolResult"; j++) {
|
||||||
|
const toolMsg = transformedMessages[j] as ToolResultMessage;
|
||||||
|
|
||||||
// Extract text and image content
|
// Extract text and image content
|
||||||
const textResult = msg.content
|
const textResult = toolMsg.content
|
||||||
.filter((c) => c.type === "text")
|
.filter((c) => c.type === "text")
|
||||||
.map((c) => (c as any).text)
|
.map((c) => (c as any).text)
|
||||||
.join("\n");
|
.join("\n");
|
||||||
const hasImages = msg.content.some((c) => c.type === "image");
|
const hasImages = toolMsg.content.some((c) => c.type === "image");
|
||||||
|
|
||||||
// Always send tool result with text (or placeholder if only images)
|
// Always send tool result with text (or placeholder if only images)
|
||||||
const hasText = textResult.length > 0;
|
const hasText = textResult.length > 0;
|
||||||
|
|
@ -623,29 +631,17 @@ function convertMessages(
|
||||||
const toolResultMsg: ChatCompletionToolMessageParam = {
|
const toolResultMsg: ChatCompletionToolMessageParam = {
|
||||||
role: "tool",
|
role: "tool",
|
||||||
content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
|
content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
|
||||||
tool_call_id: msg.toolCallId,
|
tool_call_id: toolMsg.toolCallId,
|
||||||
};
|
};
|
||||||
if (compat.requiresToolResultName && msg.toolName) {
|
if (compat.requiresToolResultName && toolMsg.toolName) {
|
||||||
(toolResultMsg as any).name = msg.toolName;
|
(toolResultMsg as any).name = toolMsg.toolName;
|
||||||
}
|
}
|
||||||
params.push(toolResultMsg);
|
params.push(toolResultMsg);
|
||||||
|
|
||||||
// If there are images and model supports them, send a follow-up user message with images
|
|
||||||
if (hasImages && model.input.includes("image")) {
|
if (hasImages && model.input.includes("image")) {
|
||||||
const contentBlocks: Array<
|
for (const block of toolMsg.content) {
|
||||||
{ type: "text"; text: string } | { type: "image_url"; image_url: { url: string } }
|
|
||||||
> = [];
|
|
||||||
|
|
||||||
// Add text prefix
|
|
||||||
contentBlocks.push({
|
|
||||||
type: "text",
|
|
||||||
text: "Attached image(s) from tool result:",
|
|
||||||
});
|
|
||||||
|
|
||||||
// Add images
|
|
||||||
for (const block of msg.content) {
|
|
||||||
if (block.type === "image") {
|
if (block.type === "image") {
|
||||||
contentBlocks.push({
|
imageBlocks.push({
|
||||||
type: "image_url",
|
type: "image_url",
|
||||||
image_url: {
|
image_url: {
|
||||||
url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
|
url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
|
||||||
|
|
@ -653,12 +649,34 @@ function convertMessages(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
i = j - 1;
|
||||||
|
|
||||||
|
if (imageBlocks.length > 0) {
|
||||||
|
if (compat.requiresAssistantAfterToolResult) {
|
||||||
|
params.push({
|
||||||
|
role: "assistant",
|
||||||
|
content: "I have processed the tool results.",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
params.push({
|
params.push({
|
||||||
role: "user",
|
role: "user",
|
||||||
content: contentBlocks,
|
content: [
|
||||||
|
{
|
||||||
|
type: "text",
|
||||||
|
text: "Attached image(s) from tool result:",
|
||||||
|
},
|
||||||
|
...imageBlocks,
|
||||||
|
],
|
||||||
});
|
});
|
||||||
|
lastRole = "user";
|
||||||
|
} else {
|
||||||
|
lastRole = "toolResult";
|
||||||
}
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
lastRole = msg.role;
|
lastRole = msg.role;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,95 @@
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
import { getModel } from "../src/models.js";
|
||||||
|
import { convertMessages } from "../src/providers/openai-completions.js";
|
||||||
|
import type {
|
||||||
|
AssistantMessage,
|
||||||
|
Context,
|
||||||
|
Model,
|
||||||
|
OpenAICompletionsCompat,
|
||||||
|
ToolResultMessage,
|
||||||
|
Usage,
|
||||||
|
} from "../src/types.js";
|
||||||
|
|
||||||
|
const emptyUsage: Usage = {
|
||||||
|
input: 0,
|
||||||
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
totalTokens: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
|
};
|
||||||
|
|
||||||
|
const compat: Required<OpenAICompletionsCompat> = {
|
||||||
|
supportsStore: true,
|
||||||
|
supportsDeveloperRole: true,
|
||||||
|
supportsReasoningEffort: true,
|
||||||
|
supportsUsageInStreaming: true,
|
||||||
|
maxTokensField: "max_completion_tokens",
|
||||||
|
requiresToolResultName: false,
|
||||||
|
requiresAssistantAfterToolResult: false,
|
||||||
|
requiresThinkingAsText: false,
|
||||||
|
requiresMistralToolIds: false,
|
||||||
|
thinkingFormat: "openai",
|
||||||
|
};
|
||||||
|
|
||||||
|
function buildToolResult(toolCallId: string, timestamp: number): ToolResultMessage {
|
||||||
|
return {
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId,
|
||||||
|
toolName: "read",
|
||||||
|
content: [
|
||||||
|
{ type: "text", text: "Read image file [image/png]" },
|
||||||
|
{ type: "image", data: "ZmFrZQ==", mimeType: "image/png" },
|
||||||
|
],
|
||||||
|
isError: false,
|
||||||
|
timestamp,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("openai-completions convertMessages", () => {
|
||||||
|
it("batches tool-result images after consecutive tool results", () => {
|
||||||
|
const baseModel = getModel("openai", "gpt-4o-mini");
|
||||||
|
const model: Model<"openai-completions"> = {
|
||||||
|
...baseModel,
|
||||||
|
api: "openai-completions",
|
||||||
|
input: ["text", "image"],
|
||||||
|
};
|
||||||
|
|
||||||
|
const now = Date.now();
|
||||||
|
const assistantMessage: AssistantMessage = {
|
||||||
|
role: "assistant",
|
||||||
|
content: [
|
||||||
|
{ type: "toolCall", id: "tool-1", name: "read", arguments: { path: "img-1.png" } },
|
||||||
|
{ type: "toolCall", id: "tool-2", name: "read", arguments: { path: "img-2.png" } },
|
||||||
|
],
|
||||||
|
api: model.api,
|
||||||
|
provider: model.provider,
|
||||||
|
model: model.id,
|
||||||
|
usage: emptyUsage,
|
||||||
|
stopReason: "toolUse",
|
||||||
|
timestamp: now,
|
||||||
|
};
|
||||||
|
|
||||||
|
const context: Context = {
|
||||||
|
messages: [
|
||||||
|
{ role: "user", content: "Read the images", timestamp: now - 2 },
|
||||||
|
assistantMessage,
|
||||||
|
buildToolResult("tool-1", now + 1),
|
||||||
|
buildToolResult("tool-2", now + 2),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const messages = convertMessages(model, context, compat);
|
||||||
|
const roles = messages.map((message) => message.role);
|
||||||
|
expect(roles).toEqual(["user", "assistant", "tool", "tool", "user"]);
|
||||||
|
|
||||||
|
const imageMessage = messages[messages.length - 1];
|
||||||
|
expect(imageMessage.role).toBe("user");
|
||||||
|
expect(Array.isArray(imageMessage.content)).toBe(true);
|
||||||
|
|
||||||
|
const imageParts = (imageMessage.content as Array<{ type?: string }>).filter(
|
||||||
|
(part) => part?.type === "image_url",
|
||||||
|
);
|
||||||
|
expect(imageParts.length).toBe(2);
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Add table
Add a link
Reference in a new issue