mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-21 04:00:10 +00:00
feat(ai): Add image input tests for vision-capable models
- Added image tests to OpenAI Completions (gpt-4o-mini) - Added image tests to Anthropic (claude-sonnet-4-0) - Added image tests to Google (gemini-2.5-flash) - Tests verify models can process and describe the red circle test image
This commit is contained in:
parent
4ac0c6ea28
commit
796e48b80e
10 changed files with 692 additions and 27 deletions
|
|
@ -236,10 +236,37 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
|||
|
||||
for (const msg of messages) {
|
||||
if (msg.role === "user") {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: msg.content,
|
||||
});
|
||||
// Handle both string and array content
|
||||
if (typeof msg.content === "string") {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: msg.content,
|
||||
});
|
||||
} else {
|
||||
// Convert array content to Anthropic format
|
||||
const blocks: ContentBlockParam[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: item.text,
|
||||
};
|
||||
} else {
|
||||
// Image content
|
||||
return {
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
});
|
||||
params.push({
|
||||
role: "user",
|
||||
content: blocks,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const blocks: ContentBlockParam[] = [];
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
import {
|
||||
type Content,
|
||||
type FinishReason,
|
||||
FunctionCallingConfigMode,
|
||||
type GenerateContentConfig,
|
||||
type GenerateContentParameters,
|
||||
GoogleGenAI,
|
||||
type Part,
|
||||
} from "@google/genai";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
|
|
@ -247,17 +249,39 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
|||
}
|
||||
}
|
||||
|
||||
private convertMessages(messages: Message[]): any[] {
|
||||
const contents: any[] = [];
|
||||
private convertMessages(messages: Message[]): Content[] {
|
||||
const contents: Content[] = [];
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role === "user") {
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: [{ text: msg.content }],
|
||||
});
|
||||
// Handle both string and array content
|
||||
if (typeof msg.content === "string") {
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: [{ text: msg.content }],
|
||||
});
|
||||
} else {
|
||||
// Convert array content to Google format
|
||||
const parts: Part[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return { text: item.text };
|
||||
} else {
|
||||
// Image content - Google uses inlineData
|
||||
return {
|
||||
inlineData: {
|
||||
mimeType: item.mimeType,
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
});
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const parts: any[] = [];
|
||||
const parts: Part[] = [];
|
||||
|
||||
// Add thinking if present
|
||||
// Note: We include thinkingSignature in our response for multi-turn context,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,11 @@
|
|||
import OpenAI from "openai";
|
||||
import type { ChatCompletionChunk, ChatCompletionMessageParam } from "openai/resources/chat/completions.js";
|
||||
import type {
|
||||
ChatCompletionChunk,
|
||||
ChatCompletionContentPart,
|
||||
ChatCompletionContentPartImage,
|
||||
ChatCompletionContentPartText,
|
||||
ChatCompletionMessageParam,
|
||||
} from "openai/resources/chat/completions.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
AssistantMessage,
|
||||
|
|
@ -264,10 +270,35 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
|||
// Convert messages
|
||||
for (const msg of messages) {
|
||||
if (msg.role === "user") {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: msg.content,
|
||||
});
|
||||
// Handle both string and array content
|
||||
if (typeof msg.content === "string") {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: msg.content,
|
||||
});
|
||||
} else {
|
||||
// Convert array content to OpenAI format
|
||||
const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: item.text,
|
||||
} satisfies ChatCompletionContentPartText;
|
||||
} else {
|
||||
// Image content - OpenAI uses data URLs
|
||||
return {
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:${item.mimeType};base64,${item.data}`,
|
||||
},
|
||||
} satisfies ChatCompletionContentPartImage;
|
||||
}
|
||||
});
|
||||
params.push({
|
||||
role: "user",
|
||||
content,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const assistantMsg: ChatCompletionMessageParam = {
|
||||
role: "assistant",
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ import type {
|
|||
Tool as OpenAITool,
|
||||
ResponseCreateParamsStreaming,
|
||||
ResponseInput,
|
||||
ResponseInputContent,
|
||||
ResponseInputImage,
|
||||
ResponseInputText,
|
||||
ResponseReasoningItem,
|
||||
} from "openai/resources/responses/responses.js";
|
||||
import type {
|
||||
|
|
@ -205,10 +208,34 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
|||
// Convert messages
|
||||
for (const msg of messages) {
|
||||
if (msg.role === "user") {
|
||||
input.push({
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: msg.content }],
|
||||
});
|
||||
// Handle both string and array content
|
||||
if (typeof msg.content === "string") {
|
||||
input.push({
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: msg.content }],
|
||||
});
|
||||
} else {
|
||||
// Convert array content to OpenAI Responses format
|
||||
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "input_text",
|
||||
text: item.text,
|
||||
} satisfies ResponseInputText;
|
||||
} else {
|
||||
// Image content - OpenAI Responses uses data URLs
|
||||
return {
|
||||
type: "input_image",
|
||||
detail: "auto",
|
||||
image_url: `data:${item.mimeType};base64,${item.data}`,
|
||||
} satisfies ResponseInputImage;
|
||||
}
|
||||
});
|
||||
input.push({
|
||||
role: "user",
|
||||
content,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
// Assistant messages - add both content and tool calls to output
|
||||
const output: ResponseInput = [];
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue