feat(ai): Add image input tests for vision-capable models

- Added image tests to OpenAI Completions (gpt-4o-mini) - Added image tests to Anthropic (claude-sonnet-4-0) - Added image tests to Google (gemini-2.5-flash) - Tests verify models can process and describe the red circle test image
2026-04-20 18:02:11 +00:00 · 2025-08-30 18:37:17 +02:00 · 2025-08-30 18:37:17 +02:00 · 796e48b80e
commit 796e48b80e
parent 4ac0c6ea28
10 changed files with 692 additions and 27 deletions
--- a/packages/ai/src/providers/google.ts
+++ b/packages/ai/src/providers/google.ts
@ -1,9 +1,11 @@
 import {
+	type Content,
 	type FinishReason,
 	FunctionCallingConfigMode,
 	type GenerateContentConfig,
 	type GenerateContentParameters,
 	GoogleGenAI,
+	type Part,
 } from "@google/genai";
 import { calculateCost } from "../models.js";
 import type {
@ -247,17 +249,39 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 		}
 	}

-	private convertMessages(messages: Message[]): any[] {
-		const contents: any[] = [];
+	private convertMessages(messages: Message[]): Content[] {
+		const contents: Content[] = [];

 		for (const msg of messages) {
 			if (msg.role === "user") {
-				contents.push({
-					role: "user",
-					parts: [{ text: msg.content }],
-				});
+				// Handle both string and array content
+				if (typeof msg.content === "string") {
+					contents.push({
+						role: "user",
+						parts: [{ text: msg.content }],
+					});
+				} else {
+					// Convert array content to Google format
+					const parts: Part[] = msg.content.map((item) => {
+						if (item.type === "text") {
+							return { text: item.text };
+						} else {
+							// Image content - Google uses inlineData
+							return {
+								inlineData: {
+									mimeType: item.mimeType,
+									data: item.data,
+								},
+							};
+						}
+					});
+					contents.push({
+						role: "user",
+						parts,
+					});
+				}
 			} else if (msg.role === "assistant") {
-				const parts: any[] = [];
+				const parts: Part[] = [];

 				// Add thinking if present
 				// Note: We include thinkingSignature in our response for multi-turn context,