mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-17 08:00:59 +00:00
feat(ai): Add image input tests for vision-capable models
- Added image tests to OpenAI Completions (gpt-4o-mini) - Added image tests to Anthropic (claude-sonnet-4-0) - Added image tests to Google (gemini-2.5-flash) - Tests verify models can process and describe the red circle test image
This commit is contained in:
parent
4ac0c6ea28
commit
796e48b80e
10 changed files with 692 additions and 27 deletions
BIN
packages/ai/test/data/red-circle.png
Normal file
BIN
packages/ai/test/data/red-circle.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.5 KiB |
|
|
@ -3,9 +3,15 @@ import { GoogleLLM } from "../src/providers/google.js";
|
|||
import { OpenAICompletionsLLM } from "../src/providers/openai-completions.js";
|
||||
import { OpenAIResponsesLLM } from "../src/providers/openai-responses.js";
|
||||
import { AnthropicLLM } from "../src/providers/anthropic.js";
|
||||
import type { LLM, LLMOptions, Context, Tool, AssistantMessage, Model } from "../src/types.js";
|
||||
import type { LLM, LLMOptions, Context, Tool, AssistantMessage, Model, ImageContent } from "../src/types.js";
|
||||
import { spawn, ChildProcess, execSync } from "child_process";
|
||||
import { createLLM, getModel } from "../src/models.js";
|
||||
import { readFileSync } from "fs";
|
||||
import { join, dirname } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// Calculator tool definition (same as examples)
|
||||
const calculatorTool: Tool = {
|
||||
|
|
@ -105,6 +111,46 @@ async function handleThinking<T extends LLMOptions>(llm: LLM<T>, options: T, req
|
|||
}
|
||||
}
|
||||
|
||||
async function handleImage<T extends LLMOptions>(llm: LLM<T>) {
|
||||
// Check if the model supports images
|
||||
const model = llm.getModel();
|
||||
if (!model.input.includes("image")) {
|
||||
console.log(`Skipping image test - model ${model.id} doesn't support images`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Read the test image
|
||||
const imagePath = join(__dirname, "data", "red-circle.png");
|
||||
const imageBuffer = readFileSync(imagePath);
|
||||
const base64Image = imageBuffer.toString("base64");
|
||||
|
||||
const imageContent: ImageContent = {
|
||||
type: "image",
|
||||
data: base64Image,
|
||||
mimeType: "image/png",
|
||||
};
|
||||
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "What do you see in this image? Please describe the shape and color." },
|
||||
imageContent,
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response = await llm.complete(context);
|
||||
|
||||
// Check the response mentions red and circle
|
||||
expect(response.content).toBeTruthy();
|
||||
const lowerContent = response.content?.toLowerCase() || "";
|
||||
expect(lowerContent).toContain("red");
|
||||
expect(lowerContent).toContain("circle");
|
||||
}
|
||||
|
||||
async function multiTurn<T extends LLMOptions>(llm: LLM<T>, thinkingOptions: T) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
|
||||
|
|
@ -259,6 +305,10 @@ describe("AI Providers E2E Tests", () => {
|
|||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => {
|
||||
|
|
@ -287,6 +337,10 @@ describe("AI Providers E2E Tests", () => {
|
|||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider", () => {
|
||||
|
|
@ -315,6 +369,10 @@ describe("AI Providers E2E Tests", () => {
|
|||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (via OpenAI Completions)", () => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue