feat(ai): Add image input tests for vision-capable models

- Added image tests to OpenAI Completions (gpt-4o-mini)
- Added image tests to Anthropic (claude-sonnet-4-0)
- Added image tests to Google (gemini-2.5-flash)
- Tests verify models can process and describe the red circle test image
This commit is contained in:
Mario Zechner 2025-08-30 18:37:17 +02:00
parent 4ac0c6ea28
commit 796e48b80e
10 changed files with 692 additions and 27 deletions

View file

@ -1,9 +1,11 @@
import {
type Content,
type FinishReason,
FunctionCallingConfigMode,
type GenerateContentConfig,
type GenerateContentParameters,
GoogleGenAI,
type Part,
} from "@google/genai";
import { calculateCost } from "../models.js";
import type {
@ -247,17 +249,39 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
}
}
private convertMessages(messages: Message[]): any[] {
const contents: any[] = [];
private convertMessages(messages: Message[]): Content[] {
const contents: Content[] = [];
for (const msg of messages) {
if (msg.role === "user") {
contents.push({
role: "user",
parts: [{ text: msg.content }],
});
// Handle both string and array content
if (typeof msg.content === "string") {
contents.push({
role: "user",
parts: [{ text: msg.content }],
});
} else {
// Convert array content to Google format
const parts: Part[] = msg.content.map((item) => {
if (item.type === "text") {
return { text: item.text };
} else {
// Image content - Google uses inlineData
return {
inlineData: {
mimeType: item.mimeType,
data: item.data,
},
};
}
});
contents.push({
role: "user",
parts,
});
}
} else if (msg.role === "assistant") {
const parts: any[] = [];
const parts: Part[] = [];
// Add thinking if present
// Note: We include thinkingSignature in our response for multi-turn context,