feat(ai): Add image input tests for vision-capable models

- Added image tests to OpenAI Completions (gpt-4o-mini)
- Added image tests to Anthropic (claude-sonnet-4-0)
- Added image tests to Google (gemini-2.5-flash)
- Tests verify models can process and describe the red circle test image
This commit is contained in:
Mario Zechner 2025-08-30 18:37:17 +02:00
parent 4ac0c6ea28
commit 796e48b80e
10 changed files with 692 additions and 27 deletions

View file

@ -3,6 +3,9 @@ import type {
Tool as OpenAITool,
ResponseCreateParamsStreaming,
ResponseInput,
ResponseInputContent,
ResponseInputImage,
ResponseInputText,
ResponseReasoningItem,
} from "openai/resources/responses/responses.js";
import type {
@ -205,10 +208,34 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
// Convert messages
for (const msg of messages) {
if (msg.role === "user") {
input.push({
role: "user",
content: [{ type: "input_text", text: msg.content }],
});
// Handle both string and array content
if (typeof msg.content === "string") {
input.push({
role: "user",
content: [{ type: "input_text", text: msg.content }],
});
} else {
// Convert array content to OpenAI Responses format
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
if (item.type === "text") {
return {
type: "input_text",
text: item.text,
} satisfies ResponseInputText;
} else {
// Image content - OpenAI Responses uses data URLs
return {
type: "input_image",
detail: "auto",
image_url: `data:${item.mimeType};base64,${item.data}`,
} satisfies ResponseInputImage;
}
});
input.push({
role: "user",
content,
});
}
} else if (msg.role === "assistant") {
// Assistant messages - add both content and tool calls to output
const output: ResponseInput = [];