mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-22 02:03:42 +00:00
Massive refactor of API
- Switch to function based API - Anthropic SDK style async generator - Fully typed with escape hatches for custom models
This commit is contained in:
parent
004de3c9d0
commit
66cefb236e
29 changed files with 5835 additions and 6225 deletions
|
|
@ -1,128 +1,103 @@
|
|||
import { describe, it, beforeAll, expect } from "vitest";
|
||||
import { GoogleLLM } from "../src/providers/google.js";
|
||||
import { OpenAICompletionsLLM } from "../src/providers/openai-completions.js";
|
||||
import { OpenAIResponsesLLM } from "../src/providers/openai-responses.js";
|
||||
import { AnthropicLLM } from "../src/providers/anthropic.js";
|
||||
import type { LLM, LLMOptions, Context } from "../src/types.js";
|
||||
import { beforeAll, describe, expect, it } from "vitest";
|
||||
import { complete, stream } from "../src/generate.js";
|
||||
import { getModel } from "../src/models.js";
|
||||
import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
|
||||
|
||||
async function testAbortSignal<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
|
||||
const context: Context = {
|
||||
messages: [{
|
||||
role: "user",
|
||||
content: "What is 15 + 27? Think step by step. Then list 50 first names."
|
||||
}]
|
||||
};
|
||||
async function testAbortSignal<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "What is 15 + 27? Think step by step. Then list 50 first names.",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
let abortFired = false;
|
||||
const controller = new AbortController();
|
||||
const response = await llm.generate(context, {
|
||||
...options,
|
||||
signal: controller.signal,
|
||||
onEvent: (event) => {
|
||||
// console.log(JSON.stringify(event, null, 2));
|
||||
if (abortFired) return;
|
||||
setTimeout(() => controller.abort(), 2000);
|
||||
abortFired = true;
|
||||
}
|
||||
});
|
||||
let abortFired = false;
|
||||
const controller = new AbortController();
|
||||
const response = await stream(llm, context, { ...options, signal: controller.signal });
|
||||
for await (const event of response) {
|
||||
if (abortFired) return;
|
||||
setTimeout(() => controller.abort(), 3000);
|
||||
abortFired = true;
|
||||
break;
|
||||
}
|
||||
const msg = await response.finalMessage();
|
||||
|
||||
// If we get here without throwing, the abort didn't work
|
||||
expect(response.stopReason).toBe("error");
|
||||
expect(response.content.length).toBeGreaterThan(0);
|
||||
// If we get here without throwing, the abort didn't work
|
||||
expect(msg.stopReason).toBe("error");
|
||||
expect(msg.content.length).toBeGreaterThan(0);
|
||||
|
||||
context.messages.push(response);
|
||||
context.messages.push({ role: "user", content: "Please continue, but only generate 5 names." });
|
||||
context.messages.push(msg);
|
||||
context.messages.push({ role: "user", content: "Please continue, but only generate 5 names." });
|
||||
|
||||
// Ensure we can still make requests after abort
|
||||
const followUp = await llm.generate(context, options);
|
||||
expect(followUp.stopReason).toBe("stop");
|
||||
expect(followUp.content.length).toBeGreaterThan(0);
|
||||
const followUp = await complete(llm, context, options);
|
||||
expect(followUp.stopReason).toBe("stop");
|
||||
expect(followUp.content.length).toBeGreaterThan(0);
|
||||
}
|
||||
|
||||
async function testImmediateAbort<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
|
||||
const controller = new AbortController();
|
||||
async function testImmediateAbort<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||
const controller = new AbortController();
|
||||
|
||||
// Abort immediately
|
||||
controller.abort();
|
||||
controller.abort();
|
||||
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: "Hello" }]
|
||||
};
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: "Hello" }],
|
||||
};
|
||||
|
||||
const response = await llm.generate(context, {
|
||||
...options,
|
||||
signal: controller.signal
|
||||
});
|
||||
expect(response.stopReason).toBe("error");
|
||||
const response = await complete(llm, context, { ...options, signal: controller.signal });
|
||||
expect(response.stopReason).toBe("error");
|
||||
}
|
||||
|
||||
describe("AI Providers Abort Tests", () => {
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Abort", () => {
|
||||
let llm: GoogleLLM;
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Abort", () => {
|
||||
const llm = getModel("google", "gemini-2.5-flash");
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new GoogleLLM(getModel("google", "gemini-2.5-flash")!, process.env.GEMINI_API_KEY!);
|
||||
});
|
||||
it("should abort mid-stream", async () => {
|
||||
await testAbortSignal(llm, { thinking: { enabled: true } });
|
||||
});
|
||||
|
||||
it("should abort mid-stream", async () => {
|
||||
await testAbortSignal(llm, { thinking: { enabled: true } });
|
||||
});
|
||||
it("should handle immediate abort", async () => {
|
||||
await testImmediateAbort(llm, { thinking: { enabled: true } });
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle immediate abort", async () => {
|
||||
await testImmediateAbort(llm, { thinking: { enabled: true } });
|
||||
});
|
||||
});
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Abort", () => {
|
||||
const llm: Model<"openai-completions"> = {
|
||||
...getModel("openai", "gpt-4o-mini")!,
|
||||
api: "openai-completions",
|
||||
};
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Abort", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
it("should abort mid-stream", async () => {
|
||||
await testAbortSignal(llm);
|
||||
});
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new OpenAICompletionsLLM(getModel("openai", "gpt-4o-mini")!, process.env.OPENAI_API_KEY!);
|
||||
});
|
||||
it("should handle immediate abort", async () => {
|
||||
await testImmediateAbort(llm);
|
||||
});
|
||||
});
|
||||
|
||||
it("should abort mid-stream", async () => {
|
||||
await testAbortSignal(llm);
|
||||
});
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Abort", () => {
|
||||
const llm = getModel("openai", "gpt-5-mini");
|
||||
|
||||
it("should handle immediate abort", async () => {
|
||||
await testImmediateAbort(llm);
|
||||
});
|
||||
});
|
||||
it("should abort mid-stream", async () => {
|
||||
await testAbortSignal(llm);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Abort", () => {
|
||||
let llm: OpenAIResponsesLLM;
|
||||
it("should handle immediate abort", async () => {
|
||||
await testImmediateAbort(llm);
|
||||
});
|
||||
});
|
||||
|
||||
beforeAll(() => {
|
||||
const model = getModel("openai", "gpt-5-mini");
|
||||
if (!model) {
|
||||
throw new Error("Model not found");
|
||||
}
|
||||
llm = new OpenAIResponsesLLM(model, process.env.OPENAI_API_KEY!);
|
||||
});
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Abort", () => {
|
||||
const llm = getModel("anthropic", "claude-opus-4-1-20250805");
|
||||
|
||||
it("should abort mid-stream", async () => {
|
||||
await testAbortSignal(llm, {});
|
||||
});
|
||||
it("should abort mid-stream", async () => {
|
||||
await testAbortSignal(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
|
||||
});
|
||||
|
||||
it("should handle immediate abort", async () => {
|
||||
await testImmediateAbort(llm, {});
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Abort", () => {
|
||||
let llm: AnthropicLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new AnthropicLLM(getModel("anthropic", "claude-opus-4-1-20250805")!, process.env.ANTHROPIC_OAUTH_TOKEN!);
|
||||
});
|
||||
|
||||
it("should abort mid-stream", async () => {
|
||||
await testAbortSignal(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
|
||||
});
|
||||
|
||||
it("should handle immediate abort", async () => {
|
||||
await testImmediateAbort(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
|
||||
});
|
||||
});
|
||||
});
|
||||
it("should handle immediate abort", async () => {
|
||||
await testImmediateAbort(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,313 +1,265 @@
|
|||
import { describe, it, beforeAll, expect } from "vitest";
|
||||
import { GoogleLLM } from "../src/providers/google.js";
|
||||
import { OpenAICompletionsLLM } from "../src/providers/openai-completions.js";
|
||||
import { OpenAIResponsesLLM } from "../src/providers/openai-responses.js";
|
||||
import { AnthropicLLM } from "../src/providers/anthropic.js";
|
||||
import type { LLM, LLMOptions, Context, UserMessage, AssistantMessage } from "../src/types.js";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { complete } from "../src/generate.js";
|
||||
import { getModel } from "../src/models.js";
|
||||
import type { Api, AssistantMessage, Context, Model, OptionsForApi, UserMessage } from "../src/types.js";
|
||||
|
||||
async function testEmptyMessage<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
|
||||
// Test with completely empty content array
|
||||
const emptyMessage: UserMessage = {
|
||||
role: "user",
|
||||
content: []
|
||||
};
|
||||
async function testEmptyMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||
// Test with completely empty content array
|
||||
const emptyMessage: UserMessage = {
|
||||
role: "user",
|
||||
content: [],
|
||||
};
|
||||
|
||||
const context: Context = {
|
||||
messages: [emptyMessage]
|
||||
};
|
||||
const context: Context = {
|
||||
messages: [emptyMessage],
|
||||
};
|
||||
|
||||
const response = await llm.generate(context, options);
|
||||
|
||||
// Should either handle gracefully or return an error
|
||||
expect(response).toBeDefined();
|
||||
expect(response.role).toBe("assistant");
|
||||
|
||||
// Most providers should return an error or empty response
|
||||
if (response.stopReason === "error") {
|
||||
expect(response.error).toBeDefined();
|
||||
} else {
|
||||
// If it didn't error, it should have some content or gracefully handle empty
|
||||
expect(response.content).toBeDefined();
|
||||
}
|
||||
const response = await complete(llm, context, options);
|
||||
|
||||
// Should either handle gracefully or return an error
|
||||
expect(response).toBeDefined();
|
||||
expect(response.role).toBe("assistant");
|
||||
// Should handle empty string gracefully
|
||||
if (response.stopReason === "error") {
|
||||
expect(response.error).toBeDefined();
|
||||
} else {
|
||||
expect(response.content).toBeDefined();
|
||||
}
|
||||
}
|
||||
|
||||
async function testEmptyStringMessage<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
|
||||
// Test with empty string content
|
||||
const context: Context = {
|
||||
messages: [{
|
||||
role: "user",
|
||||
content: ""
|
||||
}]
|
||||
};
|
||||
async function testEmptyStringMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||
// Test with empty string content
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response = await llm.generate(context, options);
|
||||
|
||||
expect(response).toBeDefined();
|
||||
expect(response.role).toBe("assistant");
|
||||
|
||||
// Should handle empty string gracefully
|
||||
if (response.stopReason === "error") {
|
||||
expect(response.error).toBeDefined();
|
||||
} else {
|
||||
expect(response.content).toBeDefined();
|
||||
}
|
||||
const response = await complete(llm, context, options);
|
||||
|
||||
expect(response).toBeDefined();
|
||||
expect(response.role).toBe("assistant");
|
||||
|
||||
// Should handle empty string gracefully
|
||||
if (response.stopReason === "error") {
|
||||
expect(response.error).toBeDefined();
|
||||
} else {
|
||||
expect(response.content).toBeDefined();
|
||||
}
|
||||
}
|
||||
|
||||
async function testWhitespaceOnlyMessage<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
|
||||
// Test with whitespace-only content
|
||||
const context: Context = {
|
||||
messages: [{
|
||||
role: "user",
|
||||
content: " \n\t "
|
||||
}]
|
||||
};
|
||||
async function testWhitespaceOnlyMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||
// Test with whitespace-only content
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: " \n\t ",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response = await llm.generate(context, options);
|
||||
|
||||
expect(response).toBeDefined();
|
||||
expect(response.role).toBe("assistant");
|
||||
|
||||
// Should handle whitespace-only gracefully
|
||||
if (response.stopReason === "error") {
|
||||
expect(response.error).toBeDefined();
|
||||
} else {
|
||||
expect(response.content).toBeDefined();
|
||||
}
|
||||
const response = await complete(llm, context, options);
|
||||
|
||||
expect(response).toBeDefined();
|
||||
expect(response.role).toBe("assistant");
|
||||
|
||||
// Should handle whitespace-only gracefully
|
||||
if (response.stopReason === "error") {
|
||||
expect(response.error).toBeDefined();
|
||||
} else {
|
||||
expect(response.content).toBeDefined();
|
||||
}
|
||||
}
|
||||
|
||||
async function testEmptyAssistantMessage<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
|
||||
// Test with empty assistant message in conversation flow
|
||||
// User -> Empty Assistant -> User
|
||||
const emptyAssistant: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: llm.getApi(),
|
||||
provider: llm.getModel().provider,
|
||||
model: llm.getModel().id,
|
||||
usage: {
|
||||
input: 10,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }
|
||||
},
|
||||
stopReason: "stop"
|
||||
};
|
||||
async function testEmptyAssistantMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||
// Test with empty assistant message in conversation flow
|
||||
// User -> Empty Assistant -> User
|
||||
const emptyAssistant: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: llm.api,
|
||||
provider: llm.provider,
|
||||
model: llm.id,
|
||||
usage: {
|
||||
input: 10,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
};
|
||||
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Hello, how are you?"
|
||||
},
|
||||
emptyAssistant,
|
||||
{
|
||||
role: "user",
|
||||
content: "Please respond this time."
|
||||
}
|
||||
]
|
||||
};
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Hello, how are you?",
|
||||
},
|
||||
emptyAssistant,
|
||||
{
|
||||
role: "user",
|
||||
content: "Please respond this time.",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response = await llm.generate(context, options);
|
||||
|
||||
expect(response).toBeDefined();
|
||||
expect(response.role).toBe("assistant");
|
||||
|
||||
// Should handle empty assistant message in context gracefully
|
||||
if (response.stopReason === "error") {
|
||||
expect(response.error).toBeDefined();
|
||||
} else {
|
||||
expect(response.content).toBeDefined();
|
||||
expect(response.content.length).toBeGreaterThan(0);
|
||||
}
|
||||
const response = await complete(llm, context, options);
|
||||
|
||||
expect(response).toBeDefined();
|
||||
expect(response.role).toBe("assistant");
|
||||
|
||||
// Should handle empty assistant message in context gracefully
|
||||
if (response.stopReason === "error") {
|
||||
expect(response.error).toBeDefined();
|
||||
} else {
|
||||
expect(response.content).toBeDefined();
|
||||
expect(response.content.length).toBeGreaterThan(0);
|
||||
}
|
||||
}
|
||||
|
||||
describe("AI Providers Empty Message Tests", () => {
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Empty Messages", () => {
|
||||
let llm: GoogleLLM;
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Empty Messages", () => {
|
||||
const llm = getModel("google", "gemini-2.5-flash");
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new GoogleLLM(getModel("google", "gemini-2.5-flash")!, process.env.GEMINI_API_KEY!);
|
||||
});
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Empty Messages", () => {
|
||||
const llm = getModel("openai", "gpt-4o-mini");
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Empty Messages", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new OpenAICompletionsLLM(getModel("openai", "gpt-4o-mini")!, process.env.OPENAI_API_KEY!);
|
||||
});
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Empty Messages", () => {
|
||||
const llm = getModel("openai", "gpt-5-mini");
|
||||
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Empty Messages", () => {
|
||||
let llm: OpenAIResponsesLLM;
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
|
||||
beforeAll(() => {
|
||||
const model = getModel("openai", "gpt-5-mini");
|
||||
if (!model) {
|
||||
throw new Error("Model gpt-5-mini not found");
|
||||
}
|
||||
llm = new OpenAIResponsesLLM(model, process.env.OPENAI_API_KEY!);
|
||||
});
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Empty Messages", () => {
|
||||
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
|
||||
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Empty Messages", () => {
|
||||
let llm: AnthropicLLM;
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new AnthropicLLM(getModel("anthropic", "claude-3-5-haiku-20241022")!, process.env.ANTHROPIC_OAUTH_TOKEN!);
|
||||
});
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Empty Messages", () => {
|
||||
const llm = getModel("xai", "grok-3");
|
||||
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
// Test with xAI/Grok if available
|
||||
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Empty Messages", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
beforeAll(() => {
|
||||
const model = getModel("xai", "grok-3");
|
||||
if (!model) {
|
||||
throw new Error("Model grok-3 not found");
|
||||
}
|
||||
llm = new OpenAICompletionsLLM(model, process.env.XAI_API_KEY!);
|
||||
});
|
||||
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Empty Messages", () => {
|
||||
const llm = getModel("groq", "openai/gpt-oss-20b");
|
||||
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
// Test with Groq if available
|
||||
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Empty Messages", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Empty Messages", () => {
|
||||
const llm = getModel("cerebras", "gpt-oss-120b");
|
||||
|
||||
beforeAll(() => {
|
||||
const model = getModel("groq", "llama-3.3-70b-versatile");
|
||||
if (!model) {
|
||||
throw new Error("Model llama-3.3-70b-versatile not found");
|
||||
}
|
||||
llm = new OpenAICompletionsLLM(model, process.env.GROQ_API_KEY!);
|
||||
});
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
// Test with Cerebras if available
|
||||
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Empty Messages", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
const model = getModel("cerebras", "gpt-oss-120b");
|
||||
if (!model) {
|
||||
throw new Error("Model gpt-oss-120b not found");
|
||||
}
|
||||
llm = new OpenAICompletionsLLM(model, process.env.CEREBRAS_API_KEY!);
|
||||
});
|
||||
|
||||
it("should handle empty content array", async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty string content", async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle whitespace-only content", async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
});
|
||||
it("should handle empty assistant message in conversation", async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,311 +1,612 @@
|
|||
import { describe, it, beforeAll, expect } from "vitest";
|
||||
import { getModel } from "../src/models.js";
|
||||
import { generate, generateComplete } from "../src/generate.js";
|
||||
import type { Context, Tool, GenerateOptionsUnified, Model, ImageContent, GenerateStream, GenerateOptions } from "../src/types.js";
|
||||
import { type ChildProcess, execSync, spawn } from "child_process";
|
||||
import { readFileSync } from "fs";
|
||||
import { join, dirname } from "path";
|
||||
import { dirname, join } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { afterAll, beforeAll, describe, expect, it } from "vitest";
|
||||
import { complete, stream } from "../src/generate.js";
|
||||
import { getModel } from "../src/models.js";
|
||||
import type { Api, Context, ImageContent, Model, OptionsForApi, Tool } from "../src/types.js";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// Calculator tool definition (same as examples)
|
||||
const calculatorTool: Tool = {
|
||||
name: "calculator",
|
||||
description: "Perform basic arithmetic operations",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
a: { type: "number", description: "First number" },
|
||||
b: { type: "number", description: "Second number" },
|
||||
operation: {
|
||||
type: "string",
|
||||
enum: ["add", "subtract", "multiply", "divide"],
|
||||
description: "The operation to perform"
|
||||
}
|
||||
},
|
||||
required: ["a", "b", "operation"]
|
||||
}
|
||||
name: "calculator",
|
||||
description: "Perform basic arithmetic operations",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
a: { type: "number", description: "First number" },
|
||||
b: { type: "number", description: "Second number" },
|
||||
operation: {
|
||||
type: "string",
|
||||
enum: ["add", "subtract", "multiply", "divide"],
|
||||
description: "The operation to perform",
|
||||
},
|
||||
},
|
||||
required: ["a", "b", "operation"],
|
||||
},
|
||||
};
|
||||
|
||||
async function basicTextGeneration<P extends GenerateOptions>(model: Model, options?: P) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant. Be concise.",
|
||||
messages: [
|
||||
{ role: "user", content: "Reply with exactly: 'Hello test successful'" }
|
||||
]
|
||||
};
|
||||
async function basicTextGeneration<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant. Be concise.",
|
||||
messages: [{ role: "user", content: "Reply with exactly: 'Hello test successful'" }],
|
||||
};
|
||||
const response = await complete(model, context, options);
|
||||
|
||||
const response = await generateComplete(model, context, options);
|
||||
expect(response.role).toBe("assistant");
|
||||
expect(response.content).toBeTruthy();
|
||||
expect(response.usage.input + response.usage.cacheRead).toBeGreaterThan(0);
|
||||
expect(response.usage.output).toBeGreaterThan(0);
|
||||
expect(response.error).toBeFalsy();
|
||||
expect(response.content.map((b) => (b.type === "text" ? b.text : "")).join("")).toContain("Hello test successful");
|
||||
|
||||
expect(response.role).toBe("assistant");
|
||||
expect(response.content).toBeTruthy();
|
||||
expect(response.usage.input + response.usage.cacheRead).toBeGreaterThan(0);
|
||||
expect(response.usage.output).toBeGreaterThan(0);
|
||||
expect(response.error).toBeFalsy();
|
||||
expect(response.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Hello test successful");
|
||||
context.messages.push(response);
|
||||
context.messages.push({ role: "user", content: "Now say 'Goodbye test successful'" });
|
||||
|
||||
context.messages.push(response);
|
||||
context.messages.push({ role: "user", content: "Now say 'Goodbye test successful'" });
|
||||
const secondResponse = await complete(model, context, options);
|
||||
|
||||
const secondResponse = await generateComplete(model, context, options);
|
||||
|
||||
expect(secondResponse.role).toBe("assistant");
|
||||
expect(secondResponse.content).toBeTruthy();
|
||||
expect(secondResponse.usage.input + secondResponse.usage.cacheRead).toBeGreaterThan(0);
|
||||
expect(secondResponse.usage.output).toBeGreaterThan(0);
|
||||
expect(secondResponse.error).toBeFalsy();
|
||||
expect(secondResponse.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Goodbye test successful");
|
||||
expect(secondResponse.role).toBe("assistant");
|
||||
expect(secondResponse.content).toBeTruthy();
|
||||
expect(secondResponse.usage.input + secondResponse.usage.cacheRead).toBeGreaterThan(0);
|
||||
expect(secondResponse.usage.output).toBeGreaterThan(0);
|
||||
expect(secondResponse.error).toBeFalsy();
|
||||
expect(secondResponse.content.map((b) => (b.type === "text" ? b.text : "")).join("")).toContain(
|
||||
"Goodbye test successful",
|
||||
);
|
||||
}
|
||||
|
||||
async function handleToolCall(model: Model, options?: GenerateOptionsUnified) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant that uses tools when asked.",
|
||||
messages: [{
|
||||
role: "user",
|
||||
content: "Calculate 15 + 27 using the calculator tool."
|
||||
}],
|
||||
tools: [calculatorTool]
|
||||
};
|
||||
async function handleToolCall<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant that uses tools when asked.",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Calculate 15 + 27 using the calculator tool.",
|
||||
},
|
||||
],
|
||||
tools: [calculatorTool],
|
||||
};
|
||||
|
||||
const response = await generateComplete(model, context, options);
|
||||
expect(response.stopReason).toBe("toolUse");
|
||||
expect(response.content.some(b => b.type == "toolCall")).toBeTruthy();
|
||||
const toolCall = response.content.find(b => b.type == "toolCall");
|
||||
if (toolCall && toolCall.type === "toolCall") {
|
||||
expect(toolCall.name).toBe("calculator");
|
||||
expect(toolCall.id).toBeTruthy();
|
||||
}
|
||||
const response = await complete(model, context, options);
|
||||
expect(response.stopReason).toBe("toolUse");
|
||||
expect(response.content.some((b) => b.type === "toolCall")).toBeTruthy();
|
||||
const toolCall = response.content.find((b) => b.type === "toolCall");
|
||||
if (toolCall && toolCall.type === "toolCall") {
|
||||
expect(toolCall.name).toBe("calculator");
|
||||
expect(toolCall.id).toBeTruthy();
|
||||
}
|
||||
}
|
||||
|
||||
async function handleStreaming(model: Model, options?: GenerateOptionsUnified) {
|
||||
let textStarted = false;
|
||||
let textChunks = "";
|
||||
let textCompleted = false;
|
||||
async function handleStreaming<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
|
||||
let textStarted = false;
|
||||
let textChunks = "";
|
||||
let textCompleted = false;
|
||||
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: "Count from 1 to 3" }]
|
||||
};
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: "Count from 1 to 3" }],
|
||||
};
|
||||
|
||||
const stream = generate(model, context, options);
|
||||
const s = stream(model, context, options);
|
||||
|
||||
for await (const event of stream) {
|
||||
if (event.type === "text_start") {
|
||||
textStarted = true;
|
||||
} else if (event.type === "text_delta") {
|
||||
textChunks += event.delta;
|
||||
} else if (event.type === "text_end") {
|
||||
textCompleted = true;
|
||||
}
|
||||
}
|
||||
for await (const event of s) {
|
||||
if (event.type === "text_start") {
|
||||
textStarted = true;
|
||||
} else if (event.type === "text_delta") {
|
||||
textChunks += event.delta;
|
||||
} else if (event.type === "text_end") {
|
||||
textCompleted = true;
|
||||
}
|
||||
}
|
||||
|
||||
const response = await stream.finalMessage();
|
||||
const response = await s.finalMessage();
|
||||
|
||||
expect(textStarted).toBe(true);
|
||||
expect(textChunks.length).toBeGreaterThan(0);
|
||||
expect(textCompleted).toBe(true);
|
||||
expect(response.content.some(b => b.type == "text")).toBeTruthy();
|
||||
expect(textStarted).toBe(true);
|
||||
expect(textChunks.length).toBeGreaterThan(0);
|
||||
expect(textCompleted).toBe(true);
|
||||
expect(response.content.some((b) => b.type === "text")).toBeTruthy();
|
||||
}
|
||||
|
||||
async function handleThinking(model: Model, options: GenerateOptionsUnified) {
|
||||
let thinkingStarted = false;
|
||||
let thinkingChunks = "";
|
||||
let thinkingCompleted = false;
|
||||
async function handleThinking<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
|
||||
let thinkingStarted = false;
|
||||
let thinkingChunks = "";
|
||||
let thinkingCompleted = false;
|
||||
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: `Think about ${(Math.random() * 255) | 0} + 27. Think step by step. Then output the result.` }]
|
||||
};
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: `Think about ${(Math.random() * 255) | 0} + 27. Think step by step. Then output the result.`,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const stream = generate(model, context, options);
|
||||
const s = stream(model, context, options);
|
||||
|
||||
for await (const event of stream) {
|
||||
if (event.type === "thinking_start") {
|
||||
thinkingStarted = true;
|
||||
} else if (event.type === "thinking_delta") {
|
||||
thinkingChunks += event.delta;
|
||||
} else if (event.type === "thinking_end") {
|
||||
thinkingCompleted = true;
|
||||
}
|
||||
}
|
||||
for await (const event of s) {
|
||||
if (event.type === "thinking_start") {
|
||||
thinkingStarted = true;
|
||||
} else if (event.type === "thinking_delta") {
|
||||
thinkingChunks += event.delta;
|
||||
} else if (event.type === "thinking_end") {
|
||||
thinkingCompleted = true;
|
||||
}
|
||||
}
|
||||
|
||||
const response = await stream.finalMessage();
|
||||
const response = await s.finalMessage();
|
||||
|
||||
expect(response.stopReason, `Error: ${response.error}`).toBe("stop");
|
||||
expect(thinkingStarted).toBe(true);
|
||||
expect(thinkingChunks.length).toBeGreaterThan(0);
|
||||
expect(thinkingCompleted).toBe(true);
|
||||
expect(response.content.some(b => b.type == "thinking")).toBeTruthy();
|
||||
expect(response.stopReason, `Error: ${response.error}`).toBe("stop");
|
||||
expect(thinkingStarted).toBe(true);
|
||||
expect(thinkingChunks.length).toBeGreaterThan(0);
|
||||
expect(thinkingCompleted).toBe(true);
|
||||
expect(response.content.some((b) => b.type === "thinking")).toBeTruthy();
|
||||
}
|
||||
|
||||
async function handleImage(model: Model, options?: GenerateOptionsUnified) {
|
||||
// Check if the model supports images
|
||||
if (!model.input.includes("image")) {
|
||||
console.log(`Skipping image test - model ${model.id} doesn't support images`);
|
||||
return;
|
||||
}
|
||||
async function handleImage<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
|
||||
// Check if the model supports images
|
||||
if (!model.input.includes("image")) {
|
||||
console.log(`Skipping image test - model ${model.id} doesn't support images`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Read the test image
|
||||
const imagePath = join(__dirname, "data", "red-circle.png");
|
||||
const imageBuffer = readFileSync(imagePath);
|
||||
const base64Image = imageBuffer.toString("base64");
|
||||
// Read the test image
|
||||
const imagePath = join(__dirname, "data", "red-circle.png");
|
||||
const imageBuffer = readFileSync(imagePath);
|
||||
const base64Image = imageBuffer.toString("base64");
|
||||
|
||||
const imageContent: ImageContent = {
|
||||
type: "image",
|
||||
data: base64Image,
|
||||
mimeType: "image/png",
|
||||
};
|
||||
const imageContent: ImageContent = {
|
||||
type: "image",
|
||||
data: base64Image,
|
||||
mimeType: "image/png",
|
||||
};
|
||||
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "What do you see in this image? Please describe the shape (circle, rectangle, square, triangle, ...) and color (red, blue, green, ...)." },
|
||||
imageContent,
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: "What do you see in this image? Please describe the shape (circle, rectangle, square, triangle, ...) and color (red, blue, green, ...).",
|
||||
},
|
||||
imageContent,
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response = await generateComplete(model, context, options);
|
||||
const response = await complete(model, context, options);
|
||||
|
||||
// Check the response mentions red and circle
|
||||
expect(response.content.length > 0).toBeTruthy();
|
||||
const textContent = response.content.find(b => b.type == "text");
|
||||
if (textContent && textContent.type === "text") {
|
||||
const lowerContent = textContent.text.toLowerCase();
|
||||
expect(lowerContent).toContain("red");
|
||||
expect(lowerContent).toContain("circle");
|
||||
}
|
||||
// Check the response mentions red and circle
|
||||
expect(response.content.length > 0).toBeTruthy();
|
||||
const textContent = response.content.find((b) => b.type === "text");
|
||||
if (textContent && textContent.type === "text") {
|
||||
const lowerContent = textContent.text.toLowerCase();
|
||||
expect(lowerContent).toContain("red");
|
||||
expect(lowerContent).toContain("circle");
|
||||
}
|
||||
}
|
||||
|
||||
async function multiTurn(model: Model, options?: GenerateOptionsUnified) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Think about this briefly, then calculate 42 * 17 and 453 + 434 using the calculator tool."
|
||||
}
|
||||
],
|
||||
tools: [calculatorTool]
|
||||
};
|
||||
async function multiTurn<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Think about this briefly, then calculate 42 * 17 and 453 + 434 using the calculator tool.",
|
||||
},
|
||||
],
|
||||
tools: [calculatorTool],
|
||||
};
|
||||
|
||||
// Collect all text content from all assistant responses
|
||||
let allTextContent = "";
|
||||
let hasSeenThinking = false;
|
||||
let hasSeenToolCalls = false;
|
||||
const maxTurns = 5; // Prevent infinite loops
|
||||
// Collect all text content from all assistant responses
|
||||
let allTextContent = "";
|
||||
let hasSeenThinking = false;
|
||||
let hasSeenToolCalls = false;
|
||||
const maxTurns = 5; // Prevent infinite loops
|
||||
|
||||
for (let turn = 0; turn < maxTurns; turn++) {
|
||||
const response = await generateComplete(model, context, options);
|
||||
for (let turn = 0; turn < maxTurns; turn++) {
|
||||
const response = await complete(model, context, options);
|
||||
|
||||
// Add the assistant response to context
|
||||
context.messages.push(response);
|
||||
// Add the assistant response to context
|
||||
context.messages.push(response);
|
||||
|
||||
// Process content blocks
|
||||
for (const block of response.content) {
|
||||
if (block.type === "text") {
|
||||
allTextContent += block.text;
|
||||
} else if (block.type === "thinking") {
|
||||
hasSeenThinking = true;
|
||||
} else if (block.type === "toolCall") {
|
||||
hasSeenToolCalls = true;
|
||||
// Process content blocks
|
||||
for (const block of response.content) {
|
||||
if (block.type === "text") {
|
||||
allTextContent += block.text;
|
||||
} else if (block.type === "thinking") {
|
||||
hasSeenThinking = true;
|
||||
} else if (block.type === "toolCall") {
|
||||
hasSeenToolCalls = true;
|
||||
|
||||
// Process the tool call
|
||||
expect(block.name).toBe("calculator");
|
||||
expect(block.id).toBeTruthy();
|
||||
expect(block.arguments).toBeTruthy();
|
||||
// Process the tool call
|
||||
expect(block.name).toBe("calculator");
|
||||
expect(block.id).toBeTruthy();
|
||||
expect(block.arguments).toBeTruthy();
|
||||
|
||||
const { a, b, operation } = block.arguments;
|
||||
let result: number;
|
||||
switch (operation) {
|
||||
case "add": result = a + b; break;
|
||||
case "multiply": result = a * b; break;
|
||||
default: result = 0;
|
||||
}
|
||||
const { a, b, operation } = block.arguments;
|
||||
let result: number;
|
||||
switch (operation) {
|
||||
case "add":
|
||||
result = a + b;
|
||||
break;
|
||||
case "multiply":
|
||||
result = a * b;
|
||||
break;
|
||||
default:
|
||||
result = 0;
|
||||
}
|
||||
|
||||
// Add tool result to context
|
||||
context.messages.push({
|
||||
role: "toolResult",
|
||||
toolCallId: block.id,
|
||||
toolName: block.name,
|
||||
content: `${result}`,
|
||||
isError: false
|
||||
});
|
||||
}
|
||||
}
|
||||
// Add tool result to context
|
||||
context.messages.push({
|
||||
role: "toolResult",
|
||||
toolCallId: block.id,
|
||||
toolName: block.name,
|
||||
content: `${result}`,
|
||||
isError: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// If we got a stop response with text content, we're likely done
|
||||
expect(response.stopReason).not.toBe("error");
|
||||
if (response.stopReason === "stop") {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If we got a stop response with text content, we're likely done
|
||||
expect(response.stopReason).not.toBe("error");
|
||||
if (response.stopReason === "stop") {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify we got either thinking content or tool calls (or both)
|
||||
expect(hasSeenThinking || hasSeenToolCalls).toBe(true);
|
||||
// Verify we got either thinking content or tool calls (or both)
|
||||
expect(hasSeenThinking || hasSeenToolCalls).toBe(true);
|
||||
|
||||
// The accumulated text should reference both calculations
|
||||
expect(allTextContent).toBeTruthy();
|
||||
expect(allTextContent.includes("714")).toBe(true);
|
||||
expect(allTextContent.includes("887")).toBe(true);
|
||||
// The accumulated text should reference both calculations
|
||||
expect(allTextContent).toBeTruthy();
|
||||
expect(allTextContent.includes("714")).toBe(true);
|
||||
expect(allTextContent.includes("887")).toBe(true);
|
||||
}
|
||||
|
||||
describe("Generate E2E Tests", () => {
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-3-5-haiku-20241022)", () => {
|
||||
let model: Model;
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini Provider (gemini-2.5-flash)", () => {
|
||||
const llm = getModel("google", "gemini-2.5-flash");
|
||||
|
||||
beforeAll(() => {
|
||||
model = getModel("anthropic", "claude-3-5-haiku-20241022");
|
||||
});
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(model);
|
||||
});
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(model);
|
||||
});
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(model);
|
||||
});
|
||||
it("should handle ", async () => {
|
||||
await handleThinking(llm, { thinking: { enabled: true, budgetTokens: 1024 } });
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(model);
|
||||
});
|
||||
});
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, { thinking: { enabled: true, budgetTokens: 2048 } });
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-20250514)", () => {
|
||||
let model: Model;
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
beforeAll(() => {
|
||||
model = getModel("anthropic", "claude-sonnet-4-20250514");
|
||||
});
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => {
|
||||
const llm: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" };
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(model);
|
||||
});
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(model);
|
||||
});
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(model);
|
||||
});
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(model, { reasoning: "low" });
|
||||
});
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(model, { reasoning: "medium" });
|
||||
});
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
|
||||
const llm = getModel("openai", "gpt-5-mini");
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(model);
|
||||
});
|
||||
});
|
||||
});
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle ", { retry: 2 }, async () => {
|
||||
await handleThinking(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-3-5-haiku-20241022)", () => {
|
||||
const model = getModel("anthropic", "claude-3-5-haiku-20241022");
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(model, { thinkingEnabled: true });
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(model);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(model);
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(model);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-20250514)", () => {
|
||||
const model = getModel("anthropic", "claude-sonnet-4-20250514");
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(model, { thinkingEnabled: true });
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(model);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(model);
|
||||
});
|
||||
|
||||
it("should handle thinking", async () => {
|
||||
await handleThinking(model, { thinkingEnabled: true });
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(model, { thinkingEnabled: true });
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(model);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
|
||||
const model = getModel("openai", "gpt-5-mini");
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(model);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(model);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(model);
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(model);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-code-fast-1 via OpenAI Completions)", () => {
|
||||
const llm = getModel("xai", "grok-code-fast-1");
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider (gpt-oss-20b via OpenAI Completions)", () => {
|
||||
const llm = getModel("groq", "openai/gpt-oss-20b");
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider (gpt-oss-120b via OpenAI Completions)", () => {
|
||||
const llm = getModel("cerebras", "gpt-oss-120b");
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => {
|
||||
const llm = getModel("openrouter", "z-ai/glm-4.5v");
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", { retry: 2 }, async () => {
|
||||
await multiTurn(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
// Check if ollama is installed
|
||||
let ollamaInstalled = false;
|
||||
try {
|
||||
execSync("which ollama", { stdio: "ignore" });
|
||||
ollamaInstalled = true;
|
||||
} catch {
|
||||
ollamaInstalled = false;
|
||||
}
|
||||
|
||||
describe.skipIf(!ollamaInstalled)("Ollama Provider (gpt-oss-20b via OpenAI Completions)", () => {
|
||||
let llm: Model<"openai-completions">;
|
||||
let ollamaProcess: ChildProcess | null = null;
|
||||
|
||||
beforeAll(async () => {
|
||||
// Check if model is available, if not pull it
|
||||
try {
|
||||
execSync("ollama list | grep -q 'gpt-oss:20b'", { stdio: "ignore" });
|
||||
} catch {
|
||||
console.log("Pulling gpt-oss:20b model for Ollama tests...");
|
||||
try {
|
||||
execSync("ollama pull gpt-oss:20b", { stdio: "inherit" });
|
||||
} catch (e) {
|
||||
console.warn("Failed to pull gpt-oss:20b model, tests will be skipped");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Start ollama server
|
||||
ollamaProcess = spawn("ollama", ["serve"], {
|
||||
detached: false,
|
||||
stdio: "ignore",
|
||||
});
|
||||
|
||||
// Wait for server to be ready
|
||||
await new Promise<void>((resolve) => {
|
||||
const checkServer = async () => {
|
||||
try {
|
||||
const response = await fetch("http://localhost:11434/api/tags");
|
||||
if (response.ok) {
|
||||
resolve();
|
||||
} else {
|
||||
setTimeout(checkServer, 500);
|
||||
}
|
||||
} catch {
|
||||
setTimeout(checkServer, 500);
|
||||
}
|
||||
};
|
||||
setTimeout(checkServer, 1000); // Initial delay
|
||||
});
|
||||
|
||||
llm = {
|
||||
id: "gpt-oss:20b",
|
||||
api: "openai-completions",
|
||||
provider: "ollama",
|
||||
baseUrl: "http://localhost:11434/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16000,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
name: "Ollama GPT-OSS 20B",
|
||||
};
|
||||
}, 30000); // 30 second timeout for setup
|
||||
|
||||
afterAll(() => {
|
||||
// Kill ollama server
|
||||
if (ollamaProcess) {
|
||||
ollamaProcess.kill("SIGTERM");
|
||||
ollamaProcess = null;
|
||||
}
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm, { apiKey: "test" });
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm, { apiKey: "test" });
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm, { apiKey: "test" });
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, { apiKey: "test", reasoningEffort: "medium" });
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, { apiKey: "test", reasoningEffort: "medium" });
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,503 +1,489 @@
|
|||
import { describe, it, expect, beforeAll } from "vitest";
|
||||
import { GoogleLLM } from "../src/providers/google.js";
|
||||
import { OpenAICompletionsLLM } from "../src/providers/openai-completions.js";
|
||||
import { OpenAIResponsesLLM } from "../src/providers/openai-responses.js";
|
||||
import { AnthropicLLM } from "../src/providers/anthropic.js";
|
||||
import type { LLM, Context, AssistantMessage, Tool, Message } from "../src/types.js";
|
||||
import { createLLM, getModel } from "../src/models.js";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { complete } from "../src/generate.js";
|
||||
import { getModel } from "../src/models.js";
|
||||
import type { Api, AssistantMessage, Context, Message, Model, Tool } from "../src/types.js";
|
||||
|
||||
// Tool for testing
|
||||
const weatherTool: Tool = {
|
||||
name: "get_weather",
|
||||
description: "Get the weather for a location",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
location: { type: "string", description: "City name" }
|
||||
},
|
||||
required: ["location"]
|
||||
}
|
||||
name: "get_weather",
|
||||
description: "Get the weather for a location",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
location: { type: "string", description: "City name" },
|
||||
},
|
||||
required: ["location"],
|
||||
},
|
||||
};
|
||||
|
||||
// Pre-built contexts representing typical outputs from each provider
|
||||
const providerContexts = {
|
||||
// Anthropic-style message with thinking block
|
||||
anthropic: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "Let me calculate 17 * 23. That's 17 * 20 + 17 * 3 = 340 + 51 = 391",
|
||||
thinkingSignature: "signature_abc123"
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "I'll help you with the calculation and check the weather. The result of 17 × 23 is 391. The capital of Austria is Vienna. Now let me check the weather for you."
|
||||
},
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "toolu_01abc123",
|
||||
name: "get_weather",
|
||||
arguments: { location: "Tokyo" }
|
||||
}
|
||||
],
|
||||
provider: "anthropic",
|
||||
model: "claude-3-5-haiku-latest",
|
||||
usage: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "toolUse"
|
||||
} as AssistantMessage,
|
||||
toolResult: {
|
||||
role: "toolResult" as const,
|
||||
toolCallId: "toolu_01abc123",
|
||||
toolName: "get_weather",
|
||||
content: "Weather in Tokyo: 18°C, partly cloudy",
|
||||
isError: false
|
||||
},
|
||||
facts: {
|
||||
calculation: 391,
|
||||
city: "Tokyo",
|
||||
temperature: 18,
|
||||
capital: "Vienna"
|
||||
}
|
||||
},
|
||||
// Anthropic-style message with thinking block
|
||||
anthropic: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "Let me calculate 17 * 23. That's 17 * 20 + 17 * 3 = 340 + 51 = 391",
|
||||
thinkingSignature: "signature_abc123",
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "I'll help you with the calculation and check the weather. The result of 17 × 23 is 391. The capital of Austria is Vienna. Now let me check the weather for you.",
|
||||
},
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "toolu_01abc123",
|
||||
name: "get_weather",
|
||||
arguments: { location: "Tokyo" },
|
||||
},
|
||||
],
|
||||
provider: "anthropic",
|
||||
model: "claude-3-5-haiku-latest",
|
||||
usage: {
|
||||
input: 100,
|
||||
output: 50,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
} as AssistantMessage,
|
||||
toolResult: {
|
||||
role: "toolResult" as const,
|
||||
toolCallId: "toolu_01abc123",
|
||||
toolName: "get_weather",
|
||||
content: "Weather in Tokyo: 18°C, partly cloudy",
|
||||
isError: false,
|
||||
},
|
||||
facts: {
|
||||
calculation: 391,
|
||||
city: "Tokyo",
|
||||
temperature: 18,
|
||||
capital: "Vienna",
|
||||
},
|
||||
},
|
||||
|
||||
// Google-style message with thinking
|
||||
google: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "I need to multiply 19 * 24. Let me work through this: 19 * 24 = 19 * 20 + 19 * 4 = 380 + 76 = 456",
|
||||
thinkingSignature: undefined
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "The multiplication of 19 × 24 equals 456. The capital of France is Paris. Let me check the weather in Berlin for you."
|
||||
},
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "call_gemini_123",
|
||||
name: "get_weather",
|
||||
arguments: { location: "Berlin" }
|
||||
}
|
||||
],
|
||||
provider: "google",
|
||||
model: "gemini-2.5-flash",
|
||||
usage: { input: 120, output: 60, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "toolUse"
|
||||
} as AssistantMessage,
|
||||
toolResult: {
|
||||
role: "toolResult" as const,
|
||||
toolCallId: "call_gemini_123",
|
||||
toolName: "get_weather",
|
||||
content: "Weather in Berlin: 22°C, sunny",
|
||||
isError: false
|
||||
},
|
||||
facts: {
|
||||
calculation: 456,
|
||||
city: "Berlin",
|
||||
temperature: 22,
|
||||
capital: "Paris"
|
||||
}
|
||||
},
|
||||
// Google-style message with thinking
|
||||
google: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking:
|
||||
"I need to multiply 19 * 24. Let me work through this: 19 * 24 = 19 * 20 + 19 * 4 = 380 + 76 = 456",
|
||||
thinkingSignature: undefined,
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "The multiplication of 19 × 24 equals 456. The capital of France is Paris. Let me check the weather in Berlin for you.",
|
||||
},
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "call_gemini_123",
|
||||
name: "get_weather",
|
||||
arguments: { location: "Berlin" },
|
||||
},
|
||||
],
|
||||
provider: "google",
|
||||
model: "gemini-2.5-flash",
|
||||
usage: {
|
||||
input: 120,
|
||||
output: 60,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
} as AssistantMessage,
|
||||
toolResult: {
|
||||
role: "toolResult" as const,
|
||||
toolCallId: "call_gemini_123",
|
||||
toolName: "get_weather",
|
||||
content: "Weather in Berlin: 22°C, sunny",
|
||||
isError: false,
|
||||
},
|
||||
facts: {
|
||||
calculation: 456,
|
||||
city: "Berlin",
|
||||
temperature: 22,
|
||||
capital: "Paris",
|
||||
},
|
||||
},
|
||||
|
||||
// OpenAI Completions style (with reasoning_content)
|
||||
openaiCompletions: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "Let me calculate 21 * 25. That's 21 * 25 = 525",
|
||||
thinkingSignature: "reasoning_content"
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "The result of 21 × 25 is 525. The capital of Spain is Madrid. I'll check the weather in London now."
|
||||
},
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "call_abc123",
|
||||
name: "get_weather",
|
||||
arguments: { location: "London" }
|
||||
}
|
||||
],
|
||||
provider: "openai",
|
||||
model: "gpt-4o-mini",
|
||||
usage: { input: 110, output: 55, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "toolUse"
|
||||
} as AssistantMessage,
|
||||
toolResult: {
|
||||
role: "toolResult" as const,
|
||||
toolCallId: "call_abc123",
|
||||
toolName: "get_weather",
|
||||
content: "Weather in London: 15°C, rainy",
|
||||
isError: false
|
||||
},
|
||||
facts: {
|
||||
calculation: 525,
|
||||
city: "London",
|
||||
temperature: 15,
|
||||
capital: "Madrid"
|
||||
}
|
||||
},
|
||||
// OpenAI Completions style (with reasoning_content)
|
||||
openaiCompletions: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "Let me calculate 21 * 25. That's 21 * 25 = 525",
|
||||
thinkingSignature: "reasoning_content",
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "The result of 21 × 25 is 525. The capital of Spain is Madrid. I'll check the weather in London now.",
|
||||
},
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "call_abc123",
|
||||
name: "get_weather",
|
||||
arguments: { location: "London" },
|
||||
},
|
||||
],
|
||||
provider: "openai",
|
||||
model: "gpt-4o-mini",
|
||||
usage: {
|
||||
input: 110,
|
||||
output: 55,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
} as AssistantMessage,
|
||||
toolResult: {
|
||||
role: "toolResult" as const,
|
||||
toolCallId: "call_abc123",
|
||||
toolName: "get_weather",
|
||||
content: "Weather in London: 15°C, rainy",
|
||||
isError: false,
|
||||
},
|
||||
facts: {
|
||||
calculation: 525,
|
||||
city: "London",
|
||||
temperature: 15,
|
||||
capital: "Madrid",
|
||||
},
|
||||
},
|
||||
|
||||
// OpenAI Responses style (with complex tool call IDs)
|
||||
openaiResponses: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "Calculating 18 * 27: 18 * 27 = 486",
|
||||
thinkingSignature: '{"type":"reasoning","id":"rs_2b2342acdde","summary":[{"type":"summary_text","text":"Calculating 18 * 27: 18 * 27 = 486"}]}'
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "The calculation of 18 × 27 gives us 486. The capital of Italy is Rome. Let me check Sydney's weather.",
|
||||
textSignature: "msg_response_456"
|
||||
},
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "call_789_item_012", // Anthropic requires alphanumeric, dash, and underscore only
|
||||
name: "get_weather",
|
||||
arguments: { location: "Sydney" }
|
||||
}
|
||||
],
|
||||
provider: "openai",
|
||||
model: "gpt-5-mini",
|
||||
usage: { input: 115, output: 58, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "toolUse"
|
||||
} as AssistantMessage,
|
||||
toolResult: {
|
||||
role: "toolResult" as const,
|
||||
toolCallId: "call_789_item_012", // Match the updated ID format
|
||||
toolName: "get_weather",
|
||||
content: "Weather in Sydney: 25°C, clear",
|
||||
isError: false
|
||||
},
|
||||
facts: {
|
||||
calculation: 486,
|
||||
city: "Sydney",
|
||||
temperature: 25,
|
||||
capital: "Rome"
|
||||
}
|
||||
},
|
||||
// OpenAI Responses style (with complex tool call IDs)
|
||||
openaiResponses: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "Calculating 18 * 27: 18 * 27 = 486",
|
||||
thinkingSignature:
|
||||
'{"type":"reasoning","id":"rs_2b2342acdde","summary":[{"type":"summary_text","text":"Calculating 18 * 27: 18 * 27 = 486"}]}',
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "The calculation of 18 × 27 gives us 486. The capital of Italy is Rome. Let me check Sydney's weather.",
|
||||
textSignature: "msg_response_456",
|
||||
},
|
||||
{
|
||||
type: "toolCall",
|
||||
id: "call_789_item_012", // Anthropic requires alphanumeric, dash, and underscore only
|
||||
name: "get_weather",
|
||||
arguments: { location: "Sydney" },
|
||||
},
|
||||
],
|
||||
provider: "openai",
|
||||
model: "gpt-5-mini",
|
||||
usage: {
|
||||
input: 115,
|
||||
output: 58,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
} as AssistantMessage,
|
||||
toolResult: {
|
||||
role: "toolResult" as const,
|
||||
toolCallId: "call_789_item_012", // Match the updated ID format
|
||||
toolName: "get_weather",
|
||||
content: "Weather in Sydney: 25°C, clear",
|
||||
isError: false,
|
||||
},
|
||||
facts: {
|
||||
calculation: 486,
|
||||
city: "Sydney",
|
||||
temperature: 25,
|
||||
capital: "Rome",
|
||||
},
|
||||
},
|
||||
|
||||
// Aborted message (stopReason: 'error')
|
||||
aborted: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "Let me start calculating 20 * 30...",
|
||||
thinkingSignature: "partial_sig"
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "I was about to calculate 20 × 30 which is"
|
||||
}
|
||||
],
|
||||
provider: "test",
|
||||
model: "test-model",
|
||||
usage: { input: 50, output: 25, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
|
||||
stopReason: "error",
|
||||
error: "Request was aborted"
|
||||
} as AssistantMessage,
|
||||
toolResult: null,
|
||||
facts: {
|
||||
calculation: 600,
|
||||
city: "none",
|
||||
temperature: 0,
|
||||
capital: "none"
|
||||
}
|
||||
}
|
||||
// Aborted message (stopReason: 'error')
|
||||
aborted: {
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "Let me start calculating 20 * 30...",
|
||||
thinkingSignature: "partial_sig",
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "I was about to calculate 20 × 30 which is",
|
||||
},
|
||||
],
|
||||
provider: "test",
|
||||
model: "test-model",
|
||||
usage: {
|
||||
input: 50,
|
||||
output: 25,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "error",
|
||||
error: "Request was aborted",
|
||||
} as AssistantMessage,
|
||||
toolResult: null,
|
||||
facts: {
|
||||
calculation: 600,
|
||||
city: "none",
|
||||
temperature: 0,
|
||||
capital: "none",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Test that a provider can handle contexts from different sources
|
||||
*/
|
||||
async function testProviderHandoff(
|
||||
targetProvider: LLM<any>,
|
||||
sourceLabel: string,
|
||||
sourceContext: typeof providerContexts[keyof typeof providerContexts]
|
||||
async function testProviderHandoff<TApi extends Api>(
|
||||
targetModel: Model<TApi>,
|
||||
sourceLabel: string,
|
||||
sourceContext: (typeof providerContexts)[keyof typeof providerContexts],
|
||||
): Promise<boolean> {
|
||||
// Build conversation context
|
||||
const messages: Message[] = [
|
||||
{
|
||||
role: "user",
|
||||
content: "Please do some calculations, tell me about capitals, and check the weather."
|
||||
},
|
||||
sourceContext.message
|
||||
];
|
||||
// Build conversation context
|
||||
const messages: Message[] = [
|
||||
{
|
||||
role: "user",
|
||||
content: "Please do some calculations, tell me about capitals, and check the weather.",
|
||||
},
|
||||
sourceContext.message,
|
||||
];
|
||||
|
||||
// Add tool result if present
|
||||
if (sourceContext.toolResult) {
|
||||
messages.push(sourceContext.toolResult);
|
||||
}
|
||||
// Add tool result if present
|
||||
if (sourceContext.toolResult) {
|
||||
messages.push(sourceContext.toolResult);
|
||||
}
|
||||
|
||||
// Ask follow-up question
|
||||
messages.push({
|
||||
role: "user",
|
||||
content: `Based on our conversation, please answer:
|
||||
// Ask follow-up question
|
||||
messages.push({
|
||||
role: "user",
|
||||
content: `Based on our conversation, please answer:
|
||||
1) What was the multiplication result?
|
||||
2) Which city's weather did we check?
|
||||
3) What was the temperature?
|
||||
4) What capital city was mentioned?
|
||||
Please include the specific numbers and names.`
|
||||
});
|
||||
Please include the specific numbers and names.`,
|
||||
});
|
||||
|
||||
const context: Context = {
|
||||
messages,
|
||||
tools: [weatherTool]
|
||||
};
|
||||
const context: Context = {
|
||||
messages,
|
||||
tools: [weatherTool],
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await targetProvider.generate(context, {});
|
||||
try {
|
||||
const response = await complete(targetModel, context, {});
|
||||
|
||||
// Check for error
|
||||
if (response.stopReason === "error") {
|
||||
console.log(`[${sourceLabel} → ${targetProvider.getModel().provider}] Failed with error: ${response.error}`);
|
||||
return false;
|
||||
}
|
||||
// Check for error
|
||||
if (response.stopReason === "error") {
|
||||
console.log(`[${sourceLabel} → ${targetModel.provider}] Failed with error: ${response.error}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Extract text from response
|
||||
const responseText = response.content
|
||||
.filter(b => b.type === "text")
|
||||
.map(b => b.text)
|
||||
.join(" ")
|
||||
.toLowerCase();
|
||||
// Extract text from response
|
||||
const responseText = response.content
|
||||
.filter((b) => b.type === "text")
|
||||
.map((b) => b.text)
|
||||
.join(" ")
|
||||
.toLowerCase();
|
||||
|
||||
// For aborted messages, we don't expect to find the facts
|
||||
if (sourceContext.message.stopReason === "error") {
|
||||
const hasToolCalls = response.content.some(b => b.type === "toolCall");
|
||||
const hasThinking = response.content.some(b => b.type === "thinking");
|
||||
const hasText = response.content.some(b => b.type === "text");
|
||||
// For aborted messages, we don't expect to find the facts
|
||||
if (sourceContext.message.stopReason === "error") {
|
||||
const hasToolCalls = response.content.some((b) => b.type === "toolCall");
|
||||
const hasThinking = response.content.some((b) => b.type === "thinking");
|
||||
const hasText = response.content.some((b) => b.type === "text");
|
||||
|
||||
expect(response.stopReason === "stop" || response.stopReason === "toolUse").toBe(true);
|
||||
expect(hasThinking || hasText || hasToolCalls).toBe(true);
|
||||
console.log(`[${sourceLabel} → ${targetProvider.getModel().provider}] Handled aborted message successfully, tool calls: ${hasToolCalls}, thinking: ${hasThinking}, text: ${hasText}`);
|
||||
return true;
|
||||
}
|
||||
expect(response.stopReason === "stop" || response.stopReason === "toolUse").toBe(true);
|
||||
expect(hasThinking || hasText || hasToolCalls).toBe(true);
|
||||
console.log(
|
||||
`[${sourceLabel} → ${targetModel.provider}] Handled aborted message successfully, tool calls: ${hasToolCalls}, thinking: ${hasThinking}, text: ${hasText}`,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if response contains our facts
|
||||
const hasCalculation = responseText.includes(sourceContext.facts.calculation.toString());
|
||||
const hasCity = sourceContext.facts.city !== "none" && responseText.includes(sourceContext.facts.city.toLowerCase());
|
||||
const hasTemperature = sourceContext.facts.temperature > 0 && responseText.includes(sourceContext.facts.temperature.toString());
|
||||
const hasCapital = sourceContext.facts.capital !== "none" && responseText.includes(sourceContext.facts.capital.toLowerCase());
|
||||
// Check if response contains our facts
|
||||
const hasCalculation = responseText.includes(sourceContext.facts.calculation.toString());
|
||||
const hasCity =
|
||||
sourceContext.facts.city !== "none" && responseText.includes(sourceContext.facts.city.toLowerCase());
|
||||
const hasTemperature =
|
||||
sourceContext.facts.temperature > 0 && responseText.includes(sourceContext.facts.temperature.toString());
|
||||
const hasCapital =
|
||||
sourceContext.facts.capital !== "none" && responseText.includes(sourceContext.facts.capital.toLowerCase());
|
||||
|
||||
const success = hasCalculation && hasCity && hasTemperature && hasCapital;
|
||||
const success = hasCalculation && hasCity && hasTemperature && hasCapital;
|
||||
|
||||
console.log(`[${sourceLabel} → ${targetProvider.getModel().provider}] Handoff test:`);
|
||||
if (!success) {
|
||||
console.log(` Calculation (${sourceContext.facts.calculation}): ${hasCalculation ? '✓' : '✗'}`);
|
||||
console.log(` City (${sourceContext.facts.city}): ${hasCity ? '✓' : '✗'}`);
|
||||
console.log(` Temperature (${sourceContext.facts.temperature}): ${hasTemperature ? '✓' : '✗'}`);
|
||||
console.log(` Capital (${sourceContext.facts.capital}): ${hasCapital ? '✓' : '✗'}`);
|
||||
} else {
|
||||
console.log(` ✓ All facts found`);
|
||||
}
|
||||
console.log(`[${sourceLabel} → ${targetModel.provider}] Handoff test:`);
|
||||
if (!success) {
|
||||
console.log(` Calculation (${sourceContext.facts.calculation}): ${hasCalculation ? "✓" : "✗"}`);
|
||||
console.log(` City (${sourceContext.facts.city}): ${hasCity ? "✓" : "✗"}`);
|
||||
console.log(` Temperature (${sourceContext.facts.temperature}): ${hasTemperature ? "✓" : "✗"}`);
|
||||
console.log(` Capital (${sourceContext.facts.capital}): ${hasCapital ? "✓" : "✗"}`);
|
||||
} else {
|
||||
console.log(` ✓ All facts found`);
|
||||
}
|
||||
|
||||
return success;
|
||||
} catch (error) {
|
||||
console.error(`[${sourceLabel} → ${targetProvider.getModel().provider}] Exception:`, error);
|
||||
return false;
|
||||
}
|
||||
return success;
|
||||
} catch (error) {
|
||||
console.error(`[${sourceLabel} → ${targetModel.provider}] Exception:`, error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
describe("Cross-Provider Handoff Tests", () => {
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Handoff", () => {
|
||||
let provider: AnthropicLLM;
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Handoff", () => {
|
||||
const model = getModel("anthropic", "claude-3-5-haiku-20241022");
|
||||
|
||||
beforeAll(() => {
|
||||
const model = getModel("anthropic", "claude-3-5-haiku-20241022");
|
||||
if (model) {
|
||||
provider = new AnthropicLLM(model, process.env.ANTHROPIC_API_KEY!);
|
||||
}
|
||||
});
|
||||
it("should handle contexts from all providers", async () => {
|
||||
console.log("\nTesting Anthropic with pre-built contexts:\n");
|
||||
|
||||
it("should handle contexts from all providers", async () => {
|
||||
if (!provider) {
|
||||
console.log("Anthropic provider not available, skipping");
|
||||
return;
|
||||
}
|
||||
const contextTests = [
|
||||
{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
|
||||
{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
|
||||
{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
|
||||
{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
|
||||
{ label: "Aborted", context: providerContexts.aborted, sourceModel: null },
|
||||
];
|
||||
|
||||
console.log("\nTesting Anthropic with pre-built contexts:\n");
|
||||
let successCount = 0;
|
||||
let skippedCount = 0;
|
||||
|
||||
const contextTests = [
|
||||
{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
|
||||
{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
|
||||
{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
|
||||
{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
|
||||
{ label: "Aborted", context: providerContexts.aborted, sourceModel: null }
|
||||
];
|
||||
for (const { label, context, sourceModel } of contextTests) {
|
||||
// Skip testing same model against itself
|
||||
if (sourceModel && sourceModel === model.id) {
|
||||
console.log(`[${label} → ${model.provider}] Skipping same-model test`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
const success = await testProviderHandoff(model, label, context);
|
||||
if (success) successCount++;
|
||||
}
|
||||
|
||||
let successCount = 0;
|
||||
let skippedCount = 0;
|
||||
const totalTests = contextTests.length - skippedCount;
|
||||
console.log(`\nAnthropic success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
|
||||
|
||||
for (const { label, context, sourceModel } of contextTests) {
|
||||
// Skip testing same model against itself
|
||||
if (sourceModel && sourceModel === provider.getModel().id) {
|
||||
console.log(`[${label} → ${provider.getModel().provider}] Skipping same-model test`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
const success = await testProviderHandoff(provider, label, context);
|
||||
if (success) successCount++;
|
||||
}
|
||||
// All non-skipped handoffs should succeed
|
||||
expect(successCount).toBe(totalTests);
|
||||
});
|
||||
});
|
||||
|
||||
const totalTests = contextTests.length - skippedCount;
|
||||
console.log(`\nAnthropic success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Handoff", () => {
|
||||
const model = getModel("google", "gemini-2.5-flash");
|
||||
|
||||
// All non-skipped handoffs should succeed
|
||||
expect(successCount).toBe(totalTests);
|
||||
});
|
||||
});
|
||||
it("should handle contexts from all providers", async () => {
|
||||
console.log("\nTesting Google with pre-built contexts:\n");
|
||||
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Handoff", () => {
|
||||
let provider: GoogleLLM;
|
||||
const contextTests = [
|
||||
{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
|
||||
{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
|
||||
{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
|
||||
{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
|
||||
{ label: "Aborted", context: providerContexts.aborted, sourceModel: null },
|
||||
];
|
||||
|
||||
beforeAll(() => {
|
||||
const model = getModel("google", "gemini-2.5-flash");
|
||||
if (model) {
|
||||
provider = new GoogleLLM(model, process.env.GEMINI_API_KEY!);
|
||||
}
|
||||
});
|
||||
let successCount = 0;
|
||||
let skippedCount = 0;
|
||||
|
||||
it("should handle contexts from all providers", async () => {
|
||||
if (!provider) {
|
||||
console.log("Google provider not available, skipping");
|
||||
return;
|
||||
}
|
||||
for (const { label, context, sourceModel } of contextTests) {
|
||||
// Skip testing same model against itself
|
||||
if (sourceModel && sourceModel === model.id) {
|
||||
console.log(`[${label} → ${model.provider}] Skipping same-model test`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
const success = await testProviderHandoff(model, label, context);
|
||||
if (success) successCount++;
|
||||
}
|
||||
|
||||
console.log("\nTesting Google with pre-built contexts:\n");
|
||||
const totalTests = contextTests.length - skippedCount;
|
||||
console.log(`\nGoogle success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
|
||||
|
||||
const contextTests = [
|
||||
{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
|
||||
{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
|
||||
{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
|
||||
{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
|
||||
{ label: "Aborted", context: providerContexts.aborted, sourceModel: null }
|
||||
];
|
||||
// All non-skipped handoffs should succeed
|
||||
expect(successCount).toBe(totalTests);
|
||||
});
|
||||
});
|
||||
|
||||
let successCount = 0;
|
||||
let skippedCount = 0;
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Handoff", () => {
|
||||
const model: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" };
|
||||
|
||||
for (const { label, context, sourceModel } of contextTests) {
|
||||
// Skip testing same model against itself
|
||||
if (sourceModel && sourceModel === provider.getModel().id) {
|
||||
console.log(`[${label} → ${provider.getModel().provider}] Skipping same-model test`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
const success = await testProviderHandoff(provider, label, context);
|
||||
if (success) successCount++;
|
||||
}
|
||||
it("should handle contexts from all providers", async () => {
|
||||
console.log("\nTesting OpenAI Completions with pre-built contexts:\n");
|
||||
|
||||
const totalTests = contextTests.length - skippedCount;
|
||||
console.log(`\nGoogle success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
|
||||
const contextTests = [
|
||||
{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
|
||||
{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
|
||||
{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
|
||||
{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
|
||||
{ label: "Aborted", context: providerContexts.aborted, sourceModel: null },
|
||||
];
|
||||
|
||||
// All non-skipped handoffs should succeed
|
||||
expect(successCount).toBe(totalTests);
|
||||
});
|
||||
});
|
||||
let successCount = 0;
|
||||
let skippedCount = 0;
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Handoff", () => {
|
||||
let provider: OpenAICompletionsLLM;
|
||||
for (const { label, context, sourceModel } of contextTests) {
|
||||
// Skip testing same model against itself
|
||||
if (sourceModel && sourceModel === model.id) {
|
||||
console.log(`[${label} → ${model.provider}] Skipping same-model test`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
const success = await testProviderHandoff(model, label, context);
|
||||
if (success) successCount++;
|
||||
}
|
||||
|
||||
beforeAll(() => {
|
||||
const model = getModel("openai", "gpt-4o-mini");
|
||||
if (model) {
|
||||
provider = new OpenAICompletionsLLM(model, process.env.OPENAI_API_KEY!);
|
||||
}
|
||||
});
|
||||
const totalTests = contextTests.length - skippedCount;
|
||||
console.log(`\nOpenAI Completions success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
|
||||
|
||||
it("should handle contexts from all providers", async () => {
|
||||
if (!provider) {
|
||||
console.log("OpenAI Completions provider not available, skipping");
|
||||
return;
|
||||
}
|
||||
// All non-skipped handoffs should succeed
|
||||
expect(successCount).toBe(totalTests);
|
||||
});
|
||||
});
|
||||
|
||||
console.log("\nTesting OpenAI Completions with pre-built contexts:\n");
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Handoff", () => {
|
||||
const model = getModel("openai", "gpt-5-mini");
|
||||
|
||||
const contextTests = [
|
||||
{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
|
||||
{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
|
||||
{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
|
||||
{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
|
||||
{ label: "Aborted", context: providerContexts.aborted, sourceModel: null }
|
||||
];
|
||||
it("should handle contexts from all providers", async () => {
|
||||
console.log("\nTesting OpenAI Responses with pre-built contexts:\n");
|
||||
|
||||
let successCount = 0;
|
||||
let skippedCount = 0;
|
||||
const contextTests = [
|
||||
{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
|
||||
{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
|
||||
{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
|
||||
{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
|
||||
{ label: "Aborted", context: providerContexts.aborted, sourceModel: null },
|
||||
];
|
||||
|
||||
for (const { label, context, sourceModel } of contextTests) {
|
||||
// Skip testing same model against itself
|
||||
if (sourceModel && sourceModel === provider.getModel().id) {
|
||||
console.log(`[${label} → ${provider.getModel().provider}] Skipping same-model test`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
const success = await testProviderHandoff(provider, label, context);
|
||||
if (success) successCount++;
|
||||
}
|
||||
let successCount = 0;
|
||||
let skippedCount = 0;
|
||||
|
||||
const totalTests = contextTests.length - skippedCount;
|
||||
console.log(`\nOpenAI Completions success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
|
||||
for (const { label, context, sourceModel } of contextTests) {
|
||||
// Skip testing same model against itself
|
||||
if (sourceModel && sourceModel === model.id) {
|
||||
console.log(`[${label} → ${model.provider}] Skipping same-model test`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
const success = await testProviderHandoff(model, label, context);
|
||||
if (success) successCount++;
|
||||
}
|
||||
|
||||
// All non-skipped handoffs should succeed
|
||||
expect(successCount).toBe(totalTests);
|
||||
});
|
||||
});
|
||||
const totalTests = contextTests.length - skippedCount;
|
||||
console.log(`\nOpenAI Responses success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Handoff", () => {
|
||||
let provider: OpenAIResponsesLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
const model = getModel("openai", "gpt-5-mini");
|
||||
if (model) {
|
||||
provider = new OpenAIResponsesLLM(model, process.env.OPENAI_API_KEY!);
|
||||
}
|
||||
});
|
||||
|
||||
it("should handle contexts from all providers", async () => {
|
||||
if (!provider) {
|
||||
console.log("OpenAI Responses provider not available, skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log("\nTesting OpenAI Responses with pre-built contexts:\n");
|
||||
|
||||
const contextTests = [
|
||||
{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
|
||||
{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
|
||||
{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
|
||||
{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
|
||||
{ label: "Aborted", context: providerContexts.aborted, sourceModel: null }
|
||||
];
|
||||
|
||||
let successCount = 0;
|
||||
let skippedCount = 0;
|
||||
|
||||
for (const { label, context, sourceModel } of contextTests) {
|
||||
// Skip testing same model against itself
|
||||
if (sourceModel && sourceModel === provider.getModel().id) {
|
||||
console.log(`[${label} → ${provider.getModel().provider}] Skipping same-model test`);
|
||||
skippedCount++;
|
||||
continue;
|
||||
}
|
||||
const success = await testProviderHandoff(provider, label, context);
|
||||
if (success) successCount++;
|
||||
}
|
||||
|
||||
const totalTests = contextTests.length - skippedCount;
|
||||
console.log(`\nOpenAI Responses success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
|
||||
|
||||
// All non-skipped handoffs should succeed
|
||||
expect(successCount).toBe(totalTests);
|
||||
});
|
||||
});
|
||||
});
|
||||
// All non-skipped handoffs should succeed
|
||||
expect(successCount).toBe(totalTests);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,31 +0,0 @@
|
|||
import { GoogleGenAI } from "@google/genai";
|
||||
import OpenAI from "openai";
|
||||
|
||||
const ai = new GoogleGenAI({});
|
||||
|
||||
async function main() {
|
||||
/*let pager = await ai.models.list();
|
||||
do {
|
||||
for (const model of pager.page) {
|
||||
console.log(JSON.stringify(model, null, 2));
|
||||
console.log("---");
|
||||
}
|
||||
if (!pager.hasNextPage()) break;
|
||||
await pager.nextPage();
|
||||
} while (true);*/
|
||||
|
||||
const openai = new OpenAI();
|
||||
const response = await openai.models.list();
|
||||
do {
|
||||
const page = response.data;
|
||||
for (const model of page) {
|
||||
const info = await openai.models.retrieve(model.id);
|
||||
console.log(JSON.stringify(model, null, 2));
|
||||
console.log("---");
|
||||
}
|
||||
if (!response.hasNextPage()) break;
|
||||
await response.getNextPage();
|
||||
} while (true);
|
||||
}
|
||||
|
||||
await main();
|
||||
|
|
@ -1,618 +0,0 @@
|
|||
import { describe, it, beforeAll, afterAll, expect } from "vitest";
|
||||
import { GoogleLLM } from "../src/providers/google.js";
|
||||
import { OpenAICompletionsLLM } from "../src/providers/openai-completions.js";
|
||||
import { OpenAIResponsesLLM } from "../src/providers/openai-responses.js";
|
||||
import { AnthropicLLM } from "../src/providers/anthropic.js";
|
||||
import type { LLM, LLMOptions, Context, Tool, AssistantMessage, Model, ImageContent } from "../src/types.js";
|
||||
import { spawn, ChildProcess, execSync } from "child_process";
|
||||
import { createLLM, getModel } from "../src/models.js";
|
||||
import { readFileSync } from "fs";
|
||||
import { join, dirname } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = dirname(__filename);
|
||||
|
||||
// Calculator tool definition (same as examples)
|
||||
const calculatorTool: Tool = {
|
||||
name: "calculator",
|
||||
description: "Perform basic arithmetic operations",
|
||||
parameters: {
|
||||
type: "object",
|
||||
properties: {
|
||||
a: { type: "number", description: "First number" },
|
||||
b: { type: "number", description: "Second number" },
|
||||
operation: {
|
||||
type: "string",
|
||||
enum: ["add", "subtract", "multiply", "divide"],
|
||||
description: "The operation to perform"
|
||||
}
|
||||
},
|
||||
required: ["a", "b", "operation"]
|
||||
}
|
||||
};
|
||||
|
||||
async function basicTextGeneration<T extends LLMOptions>(llm: LLM<T>) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant. Be concise.",
|
||||
messages: [
|
||||
{ role: "user", content: "Reply with exactly: 'Hello test successful'" }
|
||||
]
|
||||
};
|
||||
|
||||
const response = await llm.generate(context);
|
||||
|
||||
expect(response.role).toBe("assistant");
|
||||
expect(response.content).toBeTruthy();
|
||||
expect(response.usage.input + response.usage.cacheRead).toBeGreaterThan(0);
|
||||
expect(response.usage.output).toBeGreaterThan(0);
|
||||
expect(response.error).toBeFalsy();
|
||||
expect(response.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Hello test successful");
|
||||
|
||||
context.messages.push(response);
|
||||
context.messages.push({ role: "user", content: "Now say 'Goodbye test successful'" });
|
||||
|
||||
const secondResponse = await llm.generate(context);
|
||||
|
||||
expect(secondResponse.role).toBe("assistant");
|
||||
expect(secondResponse.content).toBeTruthy();
|
||||
expect(secondResponse.usage.input + secondResponse.usage.cacheRead).toBeGreaterThan(0);
|
||||
expect(secondResponse.usage.output).toBeGreaterThan(0);
|
||||
expect(secondResponse.error).toBeFalsy();
|
||||
expect(secondResponse.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Goodbye test successful");
|
||||
}
|
||||
|
||||
async function handleToolCall<T extends LLMOptions>(llm: LLM<T>) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant that uses tools when asked.",
|
||||
messages: [{
|
||||
role: "user",
|
||||
content: "Calculate 15 + 27 using the calculator tool."
|
||||
}],
|
||||
tools: [calculatorTool]
|
||||
};
|
||||
|
||||
const response = await llm.generate(context);
|
||||
expect(response.stopReason).toBe("toolUse");
|
||||
expect(response.content.some(b => b.type == "toolCall")).toBeTruthy();
|
||||
const toolCall = response.content.find(b => b.type == "toolCall")!;
|
||||
expect(toolCall.name).toBe("calculator");
|
||||
expect(toolCall.id).toBeTruthy();
|
||||
}
|
||||
|
||||
async function handleStreaming<T extends LLMOptions>(llm: LLM<T>) {
|
||||
let textStarted = false;
|
||||
let textChunks = "";
|
||||
let textCompleted = false;
|
||||
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: "Count from 1 to 3" }]
|
||||
};
|
||||
|
||||
const response = await llm.generate(context, {
|
||||
onEvent: (event) => {
|
||||
if (event.type === "text_start") {
|
||||
textStarted = true;
|
||||
} else if (event.type === "text_delta") {
|
||||
textChunks += event.delta;
|
||||
} else if (event.type === "text_end") {
|
||||
textCompleted = true;
|
||||
}
|
||||
}
|
||||
} as T);
|
||||
|
||||
expect(textStarted).toBe(true);
|
||||
expect(textChunks.length).toBeGreaterThan(0);
|
||||
expect(textCompleted).toBe(true);
|
||||
expect(response.content.some(b => b.type == "text")).toBeTruthy();
|
||||
}
|
||||
|
||||
async function handleThinking<T extends LLMOptions>(llm: LLM<T>, options: T) {
|
||||
let thinkingStarted = false;
|
||||
let thinkingChunks = "";
|
||||
let thinkingCompleted = false;
|
||||
|
||||
const context: Context = {
|
||||
messages: [{ role: "user", content: `Think about ${(Math.random() * 255) | 0} + 27. Think step by step. Then output the result.` }]
|
||||
};
|
||||
|
||||
const response = await llm.generate(context, {
|
||||
onEvent: (event) => {
|
||||
if (event.type === "thinking_start") {
|
||||
thinkingStarted = true;
|
||||
} else if (event.type === "thinking_delta") {
|
||||
expect(event.content.endsWith(event.delta)).toBe(true);
|
||||
thinkingChunks += event.delta;
|
||||
} else if (event.type === "thinking_end") {
|
||||
thinkingCompleted = true;
|
||||
}
|
||||
},
|
||||
...options
|
||||
});
|
||||
|
||||
|
||||
expect(response.stopReason, `Error: ${(response as any).error}`).toBe("stop");
|
||||
expect(thinkingStarted).toBe(true);
|
||||
expect(thinkingChunks.length).toBeGreaterThan(0);
|
||||
expect(thinkingCompleted).toBe(true);
|
||||
expect(response.content.some(b => b.type == "thinking")).toBeTruthy();
|
||||
}
|
||||
|
||||
async function handleImage<T extends LLMOptions>(llm: LLM<T>) {
|
||||
// Check if the model supports images
|
||||
const model = llm.getModel();
|
||||
if (!model.input.includes("image")) {
|
||||
console.log(`Skipping image test - model ${model.id} doesn't support images`);
|
||||
return;
|
||||
}
|
||||
|
||||
// Read the test image
|
||||
const imagePath = join(__dirname, "data", "red-circle.png");
|
||||
const imageBuffer = readFileSync(imagePath);
|
||||
const base64Image = imageBuffer.toString("base64");
|
||||
|
||||
const imageContent: ImageContent = {
|
||||
type: "image",
|
||||
data: base64Image,
|
||||
mimeType: "image/png",
|
||||
};
|
||||
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "What do you see in this image? Please describe the shape (circle, rectangle, square, triangle, ...) and color (red, blue, green, ...)." },
|
||||
imageContent,
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response = await llm.generate(context);
|
||||
|
||||
// Check the response mentions red and circle
|
||||
expect(response.content.length > 0).toBeTruthy();
|
||||
const lowerContent = response.content.find(b => b.type == "text")?.text || "";
|
||||
expect(lowerContent).toContain("red");
|
||||
expect(lowerContent).toContain("circle");
|
||||
}
|
||||
|
||||
async function multiTurn<T extends LLMOptions>(llm: LLM<T>, thinkingOptions: T) {
|
||||
const context: Context = {
|
||||
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Think about this briefly, then calculate 42 * 17 and 453 + 434 using the calculator tool."
|
||||
}
|
||||
],
|
||||
tools: [calculatorTool]
|
||||
};
|
||||
|
||||
// Collect all text content from all assistant responses
|
||||
let allTextContent = "";
|
||||
let hasSeenThinking = false;
|
||||
let hasSeenToolCalls = false;
|
||||
const maxTurns = 5; // Prevent infinite loops
|
||||
|
||||
for (let turn = 0; turn < maxTurns; turn++) {
|
||||
const response = await llm.generate(context, thinkingOptions);
|
||||
|
||||
// Add the assistant response to context
|
||||
context.messages.push(response);
|
||||
|
||||
// Process content blocks
|
||||
for (const block of response.content) {
|
||||
if (block.type === "text") {
|
||||
allTextContent += block.text;
|
||||
} else if (block.type === "thinking") {
|
||||
hasSeenThinking = true;
|
||||
} else if (block.type === "toolCall") {
|
||||
hasSeenToolCalls = true;
|
||||
|
||||
// Process the tool call
|
||||
expect(block.name).toBe("calculator");
|
||||
expect(block.id).toBeTruthy();
|
||||
expect(block.arguments).toBeTruthy();
|
||||
|
||||
const { a, b, operation } = block.arguments;
|
||||
let result: number;
|
||||
switch (operation) {
|
||||
case "add": result = a + b; break;
|
||||
case "multiply": result = a * b; break;
|
||||
default: result = 0;
|
||||
}
|
||||
|
||||
// Add tool result to context
|
||||
context.messages.push({
|
||||
role: "toolResult",
|
||||
toolCallId: block.id,
|
||||
toolName: block.name,
|
||||
content: `${result}`,
|
||||
isError: false
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// If we got a stop response with text content, we're likely done
|
||||
expect(response.stopReason).not.toBe("error");
|
||||
if (response.stopReason === "stop") {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Verify we got either thinking content or tool calls (or both)
|
||||
expect(hasSeenThinking || hasSeenToolCalls).toBe(true);
|
||||
|
||||
// The accumulated text should reference both calculations
|
||||
expect(allTextContent).toBeTruthy();
|
||||
expect(allTextContent.includes("714")).toBe(true);
|
||||
expect(allTextContent.includes("887")).toBe(true);
|
||||
}
|
||||
|
||||
describe("AI Providers E2E Tests", () => {
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini Provider (gemini-2.5-flash)", () => {
|
||||
let llm: GoogleLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new GoogleLLM(getModel("google", "gemini-2.5-flash")!, process.env.GEMINI_API_KEY!);
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, {thinking: { enabled: true, budgetTokens: 1024 }});
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new OpenAICompletionsLLM(getModel("openai", "gpt-4o-mini")!, process.env.OPENAI_API_KEY!);
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
|
||||
let llm: OpenAIResponsesLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new OpenAIResponsesLLM(getModel("openai", "gpt-5-mini")!, process.env.OPENAI_API_KEY!);
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", {retry: 2}, async () => {
|
||||
await handleThinking(llm, {reasoningEffort: "high"});
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {reasoningEffort: "high"});
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-20250514)", () => {
|
||||
let llm: AnthropicLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new AnthropicLLM(getModel("anthropic", "claude-sonnet-4-20250514")!, process.env.ANTHROPIC_OAUTH_TOKEN!);
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, {thinking: { enabled: true } });
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-code-fast-1 via OpenAI Completions)", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new OpenAICompletionsLLM(getModel("xai", "grok-code-fast-1")!, process.env.XAI_API_KEY!);
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider (gpt-oss-20b via OpenAI Completions)", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new OpenAICompletionsLLM(getModel("groq", "openai/gpt-oss-20b")!, process.env.GROQ_API_KEY!);
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider (gpt-oss-120b via OpenAI Completions)", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new OpenAICompletionsLLM(getModel("cerebras", "gpt-oss-120b")!, process.env.CEREBRAS_API_KEY!);
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new OpenAICompletionsLLM(getModel("openrouter", "z-ai/glm-4.5v")!, process.env.OPENROUTER_API_KEY!);;
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", { retry: 2 }, async () => {
|
||||
await multiTurn(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
// Check if ollama is installed
|
||||
let ollamaInstalled = false;
|
||||
try {
|
||||
execSync("which ollama", { stdio: "ignore" });
|
||||
ollamaInstalled = true;
|
||||
} catch {
|
||||
ollamaInstalled = false;
|
||||
}
|
||||
|
||||
describe.skipIf(!ollamaInstalled)("Ollama Provider (gpt-oss-20b via OpenAI Completions)", () => {
|
||||
let llm: OpenAICompletionsLLM;
|
||||
let ollamaProcess: ChildProcess | null = null;
|
||||
|
||||
beforeAll(async () => {
|
||||
// Check if model is available, if not pull it
|
||||
try {
|
||||
execSync("ollama list | grep -q 'gpt-oss:20b'", { stdio: "ignore" });
|
||||
} catch {
|
||||
console.log("Pulling gpt-oss:20b model for Ollama tests...");
|
||||
try {
|
||||
execSync("ollama pull gpt-oss:20b", { stdio: "inherit" });
|
||||
} catch (e) {
|
||||
console.warn("Failed to pull gpt-oss:20b model, tests will be skipped");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Start ollama server
|
||||
ollamaProcess = spawn("ollama", ["serve"], {
|
||||
detached: false,
|
||||
stdio: "ignore"
|
||||
});
|
||||
|
||||
// Wait for server to be ready
|
||||
await new Promise<void>((resolve) => {
|
||||
const checkServer = async () => {
|
||||
try {
|
||||
const response = await fetch("http://localhost:11434/api/tags");
|
||||
if (response.ok) {
|
||||
resolve();
|
||||
} else {
|
||||
setTimeout(checkServer, 500);
|
||||
}
|
||||
} catch {
|
||||
setTimeout(checkServer, 500);
|
||||
}
|
||||
};
|
||||
setTimeout(checkServer, 1000); // Initial delay
|
||||
});
|
||||
|
||||
const model: Model = {
|
||||
id: "gpt-oss:20b",
|
||||
provider: "ollama",
|
||||
baseUrl: "http://localhost:11434/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16000,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
name: "Ollama GPT-OSS 20B"
|
||||
}
|
||||
llm = new OpenAICompletionsLLM(model, "dummy");
|
||||
}, 30000); // 30 second timeout for setup
|
||||
|
||||
afterAll(() => {
|
||||
// Kill ollama server
|
||||
if (ollamaProcess) {
|
||||
ollamaProcess.kill("SIGTERM");
|
||||
ollamaProcess = null;
|
||||
}
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", async () => {
|
||||
await handleThinking(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {reasoningEffort: "medium"});
|
||||
});
|
||||
});
|
||||
|
||||
/*
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (Haiku 3.5)", () => {
|
||||
let llm: AnthropicLLM;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = createLLM("anthropic", "claude-3-5-haiku-latest");
|
||||
});
|
||||
|
||||
it("should complete basic text generation", async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", async () => {
|
||||
await multiTurn(llm, {thinking: {enabled: true}});
|
||||
});
|
||||
|
||||
it("should handle image input", async () => {
|
||||
await handleImage(llm);
|
||||
});
|
||||
});
|
||||
*/
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue