Massive refactor of API

- Switch to function based API - Anthropic SDK style async generator - Fully typed with escape hatches for custom models
2026-04-22 02:03:42 +00:00 · 2025-09-02 23:59:36 +02:00 · 2025-09-02 23:59:36 +02:00 · 66cefb236e
commit 66cefb236e
parent 004de3c9d0
29 changed files with 5835 additions and 6225 deletions
--- a/packages/ai/test/abort.test.ts
+++ b/packages/ai/test/abort.test.ts
@ -1,128 +1,103 @@
-import { describe, it, beforeAll, expect } from "vitest";
-import { GoogleLLM } from "../src/providers/google.js";
-import { OpenAICompletionsLLM } from "../src/providers/openai-completions.js";
-import { OpenAIResponsesLLM } from "../src/providers/openai-responses.js";
-import { AnthropicLLM } from "../src/providers/anthropic.js";
-import type { LLM, LLMOptions, Context } from "../src/types.js";
+import { beforeAll, describe, expect, it } from "vitest";
+import { complete, stream } from "../src/generate.js";
 import { getModel } from "../src/models.js";
+import type { Api, Context, Model, OptionsForApi } from "../src/types.js";

-async function testAbortSignal<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
-    const context: Context = {
-        messages: [{
-            role: "user",
-            content: "What is 15 + 27? Think step by step. Then list 50 first names."
-        }]
-    };
+async function testAbortSignal<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	const context: Context = {
+		messages: [
+			{
+				role: "user",
+				content: "What is 15 + 27? Think step by step. Then list 50 first names.",
+			},
+		],
+	};

-    let abortFired = false;
-    const controller = new AbortController();
-    const response = await llm.generate(context, {
-        ...options,
-        signal: controller.signal,
-        onEvent: (event) => {
-            // console.log(JSON.stringify(event, null, 2));
-            if (abortFired) return;
-            setTimeout(() => controller.abort(), 2000);
-            abortFired = true;
-        }
-    });
+	let abortFired = false;
+	const controller = new AbortController();
+	const response = await stream(llm, context, { ...options, signal: controller.signal });
+	for await (const event of response) {
+		if (abortFired) return;
+		setTimeout(() => controller.abort(), 3000);
+		abortFired = true;
+		break;
+	}
+	const msg = await response.finalMessage();

-    // If we get here without throwing, the abort didn't work
-    expect(response.stopReason).toBe("error");
-    expect(response.content.length).toBeGreaterThan(0);
+	// If we get here without throwing, the abort didn't work
+	expect(msg.stopReason).toBe("error");
+	expect(msg.content.length).toBeGreaterThan(0);

-    context.messages.push(response);
-    context.messages.push({ role: "user", content: "Please continue, but only generate 5 names." });
+	context.messages.push(msg);
+	context.messages.push({ role: "user", content: "Please continue, but only generate 5 names." });

-    // Ensure we can still make requests after abort
-    const followUp = await llm.generate(context, options);
-    expect(followUp.stopReason).toBe("stop");
-    expect(followUp.content.length).toBeGreaterThan(0);
+	const followUp = await complete(llm, context, options);
+	expect(followUp.stopReason).toBe("stop");
+	expect(followUp.content.length).toBeGreaterThan(0);
 }

-async function testImmediateAbort<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
-    const controller = new AbortController();
+async function testImmediateAbort<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	const controller = new AbortController();

-    // Abort immediately
-    controller.abort();
+	controller.abort();

-    const context: Context = {
-        messages: [{ role: "user", content: "Hello" }]
-    };
+	const context: Context = {
+		messages: [{ role: "user", content: "Hello" }],
+	};

-    const response = await llm.generate(context, {
-        ...options,
-        signal: controller.signal
-    });
-    expect(response.stopReason).toBe("error");
+	const response = await complete(llm, context, { ...options, signal: controller.signal });
+	expect(response.stopReason).toBe("error");
 }

 describe("AI Providers Abort Tests", () => {
-    describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Abort", () => {
-        let llm: GoogleLLM;
+	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Abort", () => {
+		const llm = getModel("google", "gemini-2.5-flash");

-        beforeAll(() => {
-            llm = new GoogleLLM(getModel("google", "gemini-2.5-flash")!, process.env.GEMINI_API_KEY!);
-        });
+		it("should abort mid-stream", async () => {
+			await testAbortSignal(llm, { thinking: { enabled: true } });
+		});

-        it("should abort mid-stream", async () => {
-            await testAbortSignal(llm, { thinking: { enabled: true } });
-        });
+		it("should handle immediate abort", async () => {
+			await testImmediateAbort(llm, { thinking: { enabled: true } });
+		});
+	});

-        it("should handle immediate abort", async () => {
-            await testImmediateAbort(llm, { thinking: { enabled: true } });
-        });
-    });
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Abort", () => {
+		const llm: Model<"openai-completions"> = {
+			...getModel("openai", "gpt-4o-mini")!,
+			api: "openai-completions",
+		};

-    describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Abort", () => {
-        let llm: OpenAICompletionsLLM;
+		it("should abort mid-stream", async () => {
+			await testAbortSignal(llm);
+		});

-        beforeAll(() => {
-            llm = new OpenAICompletionsLLM(getModel("openai", "gpt-4o-mini")!, process.env.OPENAI_API_KEY!);
-        });
+		it("should handle immediate abort", async () => {
+			await testImmediateAbort(llm);
+		});
+	});

-        it("should abort mid-stream", async () => {
-            await testAbortSignal(llm);
-        });
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Abort", () => {
+		const llm = getModel("openai", "gpt-5-mini");

-        it("should handle immediate abort", async () => {
-            await testImmediateAbort(llm);
-        });
-    });
+		it("should abort mid-stream", async () => {
+			await testAbortSignal(llm);
+		});

-    describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Abort", () => {
-        let llm: OpenAIResponsesLLM;
+		it("should handle immediate abort", async () => {
+			await testImmediateAbort(llm);
+		});
+	});

-        beforeAll(() => {
-            const model = getModel("openai", "gpt-5-mini");
-            if (!model) {
-                throw new Error("Model not found");
-            }
-            llm = new OpenAIResponsesLLM(model, process.env.OPENAI_API_KEY!);
-        });
+	describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Abort", () => {
+		const llm = getModel("anthropic", "claude-opus-4-1-20250805");

-        it("should abort mid-stream", async () => {
-            await testAbortSignal(llm, {});
-        });
+		it("should abort mid-stream", async () => {
+			await testAbortSignal(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
+		});

-        it("should handle immediate abort", async () => {
-            await testImmediateAbort(llm, {});
-        });
-    });
-
-    describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Abort", () => {
-        let llm: AnthropicLLM;
-
-        beforeAll(() => {
-            llm = new AnthropicLLM(getModel("anthropic", "claude-opus-4-1-20250805")!, process.env.ANTHROPIC_OAUTH_TOKEN!);
-        });
-
-        it("should abort mid-stream", async () => {
-            await testAbortSignal(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
-        });
-
-        it("should handle immediate abort", async () => {
-            await testImmediateAbort(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
-        });
-    });
-});
+		it("should handle immediate abort", async () => {
+			await testImmediateAbort(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
+		});
+	});
+});
--- a/packages/ai/test/empty.test.ts
+++ b/packages/ai/test/empty.test.ts
@ -1,313 +1,265 @@
-import { describe, it, beforeAll, expect } from "vitest";
-import { GoogleLLM } from "../src/providers/google.js";
-import { OpenAICompletionsLLM } from "../src/providers/openai-completions.js";
-import { OpenAIResponsesLLM } from "../src/providers/openai-responses.js";
-import { AnthropicLLM } from "../src/providers/anthropic.js";
-import type { LLM, LLMOptions, Context, UserMessage, AssistantMessage } from "../src/types.js";
+import { describe, expect, it } from "vitest";
+import { complete } from "../src/generate.js";
 import { getModel } from "../src/models.js";
+import type { Api, AssistantMessage, Context, Model, OptionsForApi, UserMessage } from "../src/types.js";

-async function testEmptyMessage<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
-    // Test with completely empty content array
-    const emptyMessage: UserMessage = {
-        role: "user",
-        content: []
-    };
+async function testEmptyMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	// Test with completely empty content array
+	const emptyMessage: UserMessage = {
+		role: "user",
+		content: [],
+	};

-    const context: Context = {
-        messages: [emptyMessage]
-    };
+	const context: Context = {
+		messages: [emptyMessage],
+	};

-    const response = await llm.generate(context, options);
-    
-    // Should either handle gracefully or return an error
-    expect(response).toBeDefined();
-    expect(response.role).toBe("assistant");
-    
-    // Most providers should return an error or empty response
-    if (response.stopReason === "error") {
-        expect(response.error).toBeDefined();
-    } else {
-        // If it didn't error, it should have some content or gracefully handle empty
-        expect(response.content).toBeDefined();
-    }
+	const response = await complete(llm, context, options);
+
+	// Should either handle gracefully or return an error
+	expect(response).toBeDefined();
+	expect(response.role).toBe("assistant");
+	// Should handle empty string gracefully
+	if (response.stopReason === "error") {
+		expect(response.error).toBeDefined();
+	} else {
+		expect(response.content).toBeDefined();
+	}
 }

-async function testEmptyStringMessage<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
-    // Test with empty string content
-    const context: Context = {
-        messages: [{
-            role: "user",
-            content: ""
-        }]
-    };
+async function testEmptyStringMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	// Test with empty string content
+	const context: Context = {
+		messages: [
+			{
+				role: "user",
+				content: "",
+			},
+		],
+	};

-    const response = await llm.generate(context, options);
-    
-    expect(response).toBeDefined();
-    expect(response.role).toBe("assistant");
-    
-    // Should handle empty string gracefully
-    if (response.stopReason === "error") {
-        expect(response.error).toBeDefined();
-    } else {
-        expect(response.content).toBeDefined();
-    }
+	const response = await complete(llm, context, options);
+
+	expect(response).toBeDefined();
+	expect(response.role).toBe("assistant");
+
+	// Should handle empty string gracefully
+	if (response.stopReason === "error") {
+		expect(response.error).toBeDefined();
+	} else {
+		expect(response.content).toBeDefined();
+	}
 }

-async function testWhitespaceOnlyMessage<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
-    // Test with whitespace-only content
-    const context: Context = {
-        messages: [{
-            role: "user",
-            content: "   \n\t  "
-        }]
-    };
+async function testWhitespaceOnlyMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	// Test with whitespace-only content
+	const context: Context = {
+		messages: [
+			{
+				role: "user",
+				content: "   \n\t  ",
+			},
+		],
+	};

-    const response = await llm.generate(context, options);
-    
-    expect(response).toBeDefined();
-    expect(response.role).toBe("assistant");
-    
-    // Should handle whitespace-only gracefully
-    if (response.stopReason === "error") {
-        expect(response.error).toBeDefined();
-    } else {
-        expect(response.content).toBeDefined();
-    }
+	const response = await complete(llm, context, options);
+
+	expect(response).toBeDefined();
+	expect(response.role).toBe("assistant");
+
+	// Should handle whitespace-only gracefully
+	if (response.stopReason === "error") {
+		expect(response.error).toBeDefined();
+	} else {
+		expect(response.content).toBeDefined();
+	}
 }

-async function testEmptyAssistantMessage<T extends LLMOptions>(llm: LLM<T>, options: T = {} as T) {
-    // Test with empty assistant message in conversation flow
-    // User -> Empty Assistant -> User
-    const emptyAssistant: AssistantMessage = {
-        role: "assistant",
-        content: [],
-        api: llm.getApi(),
-        provider: llm.getModel().provider,
-        model: llm.getModel().id,
-        usage: {
-            input: 10,
-            output: 0,
-            cacheRead: 0,
-            cacheWrite: 0,
-            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }
-        },
-        stopReason: "stop"
-    };
+async function testEmptyAssistantMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	// Test with empty assistant message in conversation flow
+	// User -> Empty Assistant -> User
+	const emptyAssistant: AssistantMessage = {
+		role: "assistant",
+		content: [],
+		api: llm.api,
+		provider: llm.provider,
+		model: llm.id,
+		usage: {
+			input: 10,
+			output: 0,
+			cacheRead: 0,
+			cacheWrite: 0,
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+		},
+		stopReason: "stop",
+	};

-    const context: Context = {
-        messages: [
-            {
-                role: "user",
-                content: "Hello, how are you?"
-            },
-            emptyAssistant,
-            {
-                role: "user",
-                content: "Please respond this time."
-            }
-        ]
-    };
+	const context: Context = {
+		messages: [
+			{
+				role: "user",
+				content: "Hello, how are you?",
+			},
+			emptyAssistant,
+			{
+				role: "user",
+				content: "Please respond this time.",
+			},
+		],
+	};

-    const response = await llm.generate(context, options);
-    
-    expect(response).toBeDefined();
-    expect(response.role).toBe("assistant");
-    
-    // Should handle empty assistant message in context gracefully
-    if (response.stopReason === "error") {
-        expect(response.error).toBeDefined();
-    } else {
-        expect(response.content).toBeDefined();
-        expect(response.content.length).toBeGreaterThan(0);
-    }
+	const response = await complete(llm, context, options);
+
+	expect(response).toBeDefined();
+	expect(response.role).toBe("assistant");
+
+	// Should handle empty assistant message in context gracefully
+	if (response.stopReason === "error") {
+		expect(response.error).toBeDefined();
+	} else {
+		expect(response.content).toBeDefined();
+		expect(response.content.length).toBeGreaterThan(0);
+	}
 }

 describe("AI Providers Empty Message Tests", () => {
-    describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Empty Messages", () => {
-        let llm: GoogleLLM;
+	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Empty Messages", () => {
+		const llm = getModel("google", "gemini-2.5-flash");

-        beforeAll(() => {
-            llm = new GoogleLLM(getModel("google", "gemini-2.5-flash")!, process.env.GEMINI_API_KEY!);
-        });
+		it("should handle empty content array", async () => {
+			await testEmptyMessage(llm);
+		});

-        it("should handle empty content array", async () => {
-            await testEmptyMessage(llm);
-        });
+		it("should handle empty string content", async () => {
+			await testEmptyStringMessage(llm);
+		});

-        it("should handle empty string content", async () => {
-            await testEmptyStringMessage(llm);
-        });
+		it("should handle whitespace-only content", async () => {
+			await testWhitespaceOnlyMessage(llm);
+		});

-        it("should handle whitespace-only content", async () => {
-            await testWhitespaceOnlyMessage(llm);
-        });
+		it("should handle empty assistant message in conversation", async () => {
+			await testEmptyAssistantMessage(llm);
+		});
+	});

-        it("should handle empty assistant message in conversation", async () => {
-            await testEmptyAssistantMessage(llm);
-        });
-    });
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Empty Messages", () => {
+		const llm = getModel("openai", "gpt-4o-mini");

-    describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Empty Messages", () => {
-        let llm: OpenAICompletionsLLM;
+		it("should handle empty content array", async () => {
+			await testEmptyMessage(llm);
+		});

-        beforeAll(() => {
-            llm = new OpenAICompletionsLLM(getModel("openai", "gpt-4o-mini")!, process.env.OPENAI_API_KEY!);
-        });
+		it("should handle empty string content", async () => {
+			await testEmptyStringMessage(llm);
+		});

-        it("should handle empty content array", async () => {
-            await testEmptyMessage(llm);
-        });
+		it("should handle whitespace-only content", async () => {
+			await testWhitespaceOnlyMessage(llm);
+		});

-        it("should handle empty string content", async () => {
-            await testEmptyStringMessage(llm);
-        });
+		it("should handle empty assistant message in conversation", async () => {
+			await testEmptyAssistantMessage(llm);
+		});
+	});

-        it("should handle whitespace-only content", async () => {
-            await testWhitespaceOnlyMessage(llm);
-        });
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Empty Messages", () => {
+		const llm = getModel("openai", "gpt-5-mini");

-        it("should handle empty assistant message in conversation", async () => {
-            await testEmptyAssistantMessage(llm);
-        });
-    });
+		it("should handle empty content array", async () => {
+			await testEmptyMessage(llm);
+		});

-    describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Empty Messages", () => {
-        let llm: OpenAIResponsesLLM;
+		it("should handle empty string content", async () => {
+			await testEmptyStringMessage(llm);
+		});

-        beforeAll(() => {
-            const model = getModel("openai", "gpt-5-mini");
-            if (!model) {
-                throw new Error("Model gpt-5-mini not found");
-            }
-            llm = new OpenAIResponsesLLM(model, process.env.OPENAI_API_KEY!);
-        });
+		it("should handle whitespace-only content", async () => {
+			await testWhitespaceOnlyMessage(llm);
+		});

-        it("should handle empty content array", async () => {
-            await testEmptyMessage(llm);
-        });
+		it("should handle empty assistant message in conversation", async () => {
+			await testEmptyAssistantMessage(llm);
+		});
+	});

-        it("should handle empty string content", async () => {
-            await testEmptyStringMessage(llm);
-        });
+	describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Empty Messages", () => {
+		const llm = getModel("anthropic", "claude-3-5-haiku-20241022");

-        it("should handle whitespace-only content", async () => {
-            await testWhitespaceOnlyMessage(llm);
-        });
+		it("should handle empty content array", async () => {
+			await testEmptyMessage(llm);
+		});

-        it("should handle empty assistant message in conversation", async () => {
-            await testEmptyAssistantMessage(llm);
-        });
-    });
+		it("should handle empty string content", async () => {
+			await testEmptyStringMessage(llm);
+		});

-    describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Empty Messages", () => {
-        let llm: AnthropicLLM;
+		it("should handle whitespace-only content", async () => {
+			await testWhitespaceOnlyMessage(llm);
+		});

-        beforeAll(() => {
-            llm = new AnthropicLLM(getModel("anthropic", "claude-3-5-haiku-20241022")!, process.env.ANTHROPIC_OAUTH_TOKEN!);
-        });
+		it("should handle empty assistant message in conversation", async () => {
+			await testEmptyAssistantMessage(llm);
+		});
+	});

-        it("should handle empty content array", async () => {
-            await testEmptyMessage(llm);
-        });
+	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Empty Messages", () => {
+		const llm = getModel("xai", "grok-3");

-        it("should handle empty string content", async () => {
-            await testEmptyStringMessage(llm);
-        });
+		it("should handle empty content array", async () => {
+			await testEmptyMessage(llm);
+		});

-        it("should handle whitespace-only content", async () => {
-            await testWhitespaceOnlyMessage(llm);
-        });
+		it("should handle empty string content", async () => {
+			await testEmptyStringMessage(llm);
+		});

-        it("should handle empty assistant message in conversation", async () => {
-            await testEmptyAssistantMessage(llm);
-        });
-    });
+		it("should handle whitespace-only content", async () => {
+			await testWhitespaceOnlyMessage(llm);
+		});

-    // Test with xAI/Grok if available
-    describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Empty Messages", () => {
-        let llm: OpenAICompletionsLLM;
+		it("should handle empty assistant message in conversation", async () => {
+			await testEmptyAssistantMessage(llm);
+		});
+	});

-        beforeAll(() => {
-            const model = getModel("xai", "grok-3");
-            if (!model) {
-                throw new Error("Model grok-3 not found");
-            }
-            llm = new OpenAICompletionsLLM(model, process.env.XAI_API_KEY!);
-        });
+	describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Empty Messages", () => {
+		const llm = getModel("groq", "openai/gpt-oss-20b");

-        it("should handle empty content array", async () => {
-            await testEmptyMessage(llm);
-        });
+		it("should handle empty content array", async () => {
+			await testEmptyMessage(llm);
+		});

-        it("should handle empty string content", async () => {
-            await testEmptyStringMessage(llm);
-        });
+		it("should handle empty string content", async () => {
+			await testEmptyStringMessage(llm);
+		});

-        it("should handle whitespace-only content", async () => {
-            await testWhitespaceOnlyMessage(llm);
-        });
+		it("should handle whitespace-only content", async () => {
+			await testWhitespaceOnlyMessage(llm);
+		});

-        it("should handle empty assistant message in conversation", async () => {
-            await testEmptyAssistantMessage(llm);
-        });
-    });
+		it("should handle empty assistant message in conversation", async () => {
+			await testEmptyAssistantMessage(llm);
+		});
+	});

-    // Test with Groq if available
-    describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Empty Messages", () => {
-        let llm: OpenAICompletionsLLM;
+	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Empty Messages", () => {
+		const llm = getModel("cerebras", "gpt-oss-120b");

-        beforeAll(() => {
-            const model = getModel("groq", "llama-3.3-70b-versatile");
-            if (!model) {
-                throw new Error("Model llama-3.3-70b-versatile not found");
-            }
-            llm = new OpenAICompletionsLLM(model, process.env.GROQ_API_KEY!);
-        });
+		it("should handle empty content array", async () => {
+			await testEmptyMessage(llm);
+		});

-        it("should handle empty content array", async () => {
-            await testEmptyMessage(llm);
-        });
+		it("should handle empty string content", async () => {
+			await testEmptyStringMessage(llm);
+		});

-        it("should handle empty string content", async () => {
-            await testEmptyStringMessage(llm);
-        });
+		it("should handle whitespace-only content", async () => {
+			await testWhitespaceOnlyMessage(llm);
+		});

-        it("should handle whitespace-only content", async () => {
-            await testWhitespaceOnlyMessage(llm);
-        });
-
-        it("should handle empty assistant message in conversation", async () => {
-            await testEmptyAssistantMessage(llm);
-        });
-    });
-
-    // Test with Cerebras if available
-    describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Empty Messages", () => {
-        let llm: OpenAICompletionsLLM;
-
-        beforeAll(() => {
-            const model = getModel("cerebras", "gpt-oss-120b");
-            if (!model) {
-                throw new Error("Model gpt-oss-120b not found");
-            }
-            llm = new OpenAICompletionsLLM(model, process.env.CEREBRAS_API_KEY!);
-        });
-
-        it("should handle empty content array", async () => {
-            await testEmptyMessage(llm);
-        });
-
-        it("should handle empty string content", async () => {
-            await testEmptyStringMessage(llm);
-        });
-
-        it("should handle whitespace-only content", async () => {
-            await testWhitespaceOnlyMessage(llm);
-        });
-
-        it("should handle empty assistant message in conversation", async () => {
-            await testEmptyAssistantMessage(llm);
-        });
-    });
-});
+		it("should handle empty assistant message in conversation", async () => {
+			await testEmptyAssistantMessage(llm);
+		});
+	});
+});
--- a/packages/ai/test/generate.test.ts
+++ b/packages/ai/test/generate.test.ts
@ -1,311 +1,612 @@
-import { describe, it, beforeAll, expect } from "vitest";
-import { getModel } from "../src/models.js";
-import { generate, generateComplete } from "../src/generate.js";
-import type { Context, Tool, GenerateOptionsUnified, Model, ImageContent, GenerateStream, GenerateOptions } from "../src/types.js";
+import { type ChildProcess, execSync, spawn } from "child_process";
 import { readFileSync } from "fs";
-import { join, dirname } from "path";
+import { dirname, join } from "path";
 import { fileURLToPath } from "url";
+import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { complete, stream } from "../src/generate.js";
+import { getModel } from "../src/models.js";
+import type { Api, Context, ImageContent, Model, OptionsForApi, Tool } from "../src/types.js";

 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);

 // Calculator tool definition (same as examples)
 const calculatorTool: Tool = {
-    name: "calculator",
-    description: "Perform basic arithmetic operations",
-    parameters: {
-        type: "object",
-        properties: {
-            a: { type: "number", description: "First number" },
-            b: { type: "number", description: "Second number" },
-            operation: {
-                type: "string",
-                enum: ["add", "subtract", "multiply", "divide"],
-                description: "The operation to perform"
-            }
-        },
-        required: ["a", "b", "operation"]
-    }
+	name: "calculator",
+	description: "Perform basic arithmetic operations",
+	parameters: {
+		type: "object",
+		properties: {
+			a: { type: "number", description: "First number" },
+			b: { type: "number", description: "Second number" },
+			operation: {
+				type: "string",
+				enum: ["add", "subtract", "multiply", "divide"],
+				description: "The operation to perform",
+			},
+		},
+		required: ["a", "b", "operation"],
+	},
 };

-async function basicTextGeneration<P extends GenerateOptions>(model: Model, options?: P) {
-    const context: Context = {
-        systemPrompt: "You are a helpful assistant. Be concise.",
-        messages: [
-            { role: "user", content: "Reply with exactly: 'Hello test successful'" }
-        ]
-    };
+async function basicTextGeneration<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
+	const context: Context = {
+		systemPrompt: "You are a helpful assistant. Be concise.",
+		messages: [{ role: "user", content: "Reply with exactly: 'Hello test successful'" }],
+	};
+	const response = await complete(model, context, options);

-    const response = await generateComplete(model, context, options);
+	expect(response.role).toBe("assistant");
+	expect(response.content).toBeTruthy();
+	expect(response.usage.input + response.usage.cacheRead).toBeGreaterThan(0);
+	expect(response.usage.output).toBeGreaterThan(0);
+	expect(response.error).toBeFalsy();
+	expect(response.content.map((b) => (b.type === "text" ? b.text : "")).join("")).toContain("Hello test successful");

-    expect(response.role).toBe("assistant");
-    expect(response.content).toBeTruthy();
-    expect(response.usage.input + response.usage.cacheRead).toBeGreaterThan(0);
-    expect(response.usage.output).toBeGreaterThan(0);
-    expect(response.error).toBeFalsy();
-    expect(response.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Hello test successful");
+	context.messages.push(response);
+	context.messages.push({ role: "user", content: "Now say 'Goodbye test successful'" });

-    context.messages.push(response);
-    context.messages.push({ role: "user", content: "Now say 'Goodbye test successful'" });
+	const secondResponse = await complete(model, context, options);

-    const secondResponse = await generateComplete(model, context, options);
-
-    expect(secondResponse.role).toBe("assistant");
-    expect(secondResponse.content).toBeTruthy();
-    expect(secondResponse.usage.input + secondResponse.usage.cacheRead).toBeGreaterThan(0);
-    expect(secondResponse.usage.output).toBeGreaterThan(0);
-    expect(secondResponse.error).toBeFalsy();
-    expect(secondResponse.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Goodbye test successful");
+	expect(secondResponse.role).toBe("assistant");
+	expect(secondResponse.content).toBeTruthy();
+	expect(secondResponse.usage.input + secondResponse.usage.cacheRead).toBeGreaterThan(0);
+	expect(secondResponse.usage.output).toBeGreaterThan(0);
+	expect(secondResponse.error).toBeFalsy();
+	expect(secondResponse.content.map((b) => (b.type === "text" ? b.text : "")).join("")).toContain(
+		"Goodbye test successful",
+	);
 }

-async function handleToolCall(model: Model, options?: GenerateOptionsUnified) {
-    const context: Context = {
-        systemPrompt: "You are a helpful assistant that uses tools when asked.",
-        messages: [{
-            role: "user",
-            content: "Calculate 15 + 27 using the calculator tool."
-        }],
-        tools: [calculatorTool]
-    };
+async function handleToolCall<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
+	const context: Context = {
+		systemPrompt: "You are a helpful assistant that uses tools when asked.",
+		messages: [
+			{
+				role: "user",
+				content: "Calculate 15 + 27 using the calculator tool.",
+			},
+		],
+		tools: [calculatorTool],
+	};

-    const response = await generateComplete(model, context, options);
-    expect(response.stopReason).toBe("toolUse");
-    expect(response.content.some(b => b.type == "toolCall")).toBeTruthy();
-    const toolCall = response.content.find(b => b.type == "toolCall");
-    if (toolCall && toolCall.type === "toolCall") {
-        expect(toolCall.name).toBe("calculator");
-        expect(toolCall.id).toBeTruthy();
-    }
+	const response = await complete(model, context, options);
+	expect(response.stopReason).toBe("toolUse");
+	expect(response.content.some((b) => b.type === "toolCall")).toBeTruthy();
+	const toolCall = response.content.find((b) => b.type === "toolCall");
+	if (toolCall && toolCall.type === "toolCall") {
+		expect(toolCall.name).toBe("calculator");
+		expect(toolCall.id).toBeTruthy();
+	}
 }

-async function handleStreaming(model: Model, options?: GenerateOptionsUnified) {
-    let textStarted = false;
-    let textChunks = "";
-    let textCompleted = false;
+async function handleStreaming<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
+	let textStarted = false;
+	let textChunks = "";
+	let textCompleted = false;

-    const context: Context = {
-        messages: [{ role: "user", content: "Count from 1 to 3" }]
-    };
+	const context: Context = {
+		messages: [{ role: "user", content: "Count from 1 to 3" }],
+	};

-    const stream = generate(model, context, options);
+	const s = stream(model, context, options);

-    for await (const event of stream) {
-        if (event.type === "text_start") {
-            textStarted = true;
-        } else if (event.type === "text_delta") {
-            textChunks += event.delta;
-        } else if (event.type === "text_end") {
-            textCompleted = true;
-        }
-    }
+	for await (const event of s) {
+		if (event.type === "text_start") {
+			textStarted = true;
+		} else if (event.type === "text_delta") {
+			textChunks += event.delta;
+		} else if (event.type === "text_end") {
+			textCompleted = true;
+		}
+	}

-    const response = await stream.finalMessage();
+	const response = await s.finalMessage();

-    expect(textStarted).toBe(true);
-    expect(textChunks.length).toBeGreaterThan(0);
-    expect(textCompleted).toBe(true);
-    expect(response.content.some(b => b.type == "text")).toBeTruthy();
+	expect(textStarted).toBe(true);
+	expect(textChunks.length).toBeGreaterThan(0);
+	expect(textCompleted).toBe(true);
+	expect(response.content.some((b) => b.type === "text")).toBeTruthy();
 }

-async function handleThinking(model: Model, options: GenerateOptionsUnified) {
-    let thinkingStarted = false;
-    let thinkingChunks = "";
-    let thinkingCompleted = false;
+async function handleThinking<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
+	let thinkingStarted = false;
+	let thinkingChunks = "";
+	let thinkingCompleted = false;

-    const context: Context = {
-        messages: [{ role: "user", content: `Think about ${(Math.random() * 255) | 0} + 27. Think step by step. Then output the result.` }]
-    };
+	const context: Context = {
+		messages: [
+			{
+				role: "user",
+				content: `Think about ${(Math.random() * 255) | 0} + 27. Think step by step. Then output the result.`,
+			},
+		],
+	};

-    const stream = generate(model, context, options);
+	const s = stream(model, context, options);

-    for await (const event of stream) {
-        if (event.type === "thinking_start") {
-            thinkingStarted = true;
-        } else if (event.type === "thinking_delta") {
-            thinkingChunks += event.delta;
-        } else if (event.type === "thinking_end") {
-            thinkingCompleted = true;
-        }
-    }
+	for await (const event of s) {
+		if (event.type === "thinking_start") {
+			thinkingStarted = true;
+		} else if (event.type === "thinking_delta") {
+			thinkingChunks += event.delta;
+		} else if (event.type === "thinking_end") {
+			thinkingCompleted = true;
+		}
+	}

-    const response = await stream.finalMessage();
+	const response = await s.finalMessage();

-    expect(response.stopReason, `Error: ${response.error}`).toBe("stop");
-    expect(thinkingStarted).toBe(true);
-    expect(thinkingChunks.length).toBeGreaterThan(0);
-    expect(thinkingCompleted).toBe(true);
-    expect(response.content.some(b => b.type == "thinking")).toBeTruthy();
+	expect(response.stopReason, `Error: ${response.error}`).toBe("stop");
+	expect(thinkingStarted).toBe(true);
+	expect(thinkingChunks.length).toBeGreaterThan(0);
+	expect(thinkingCompleted).toBe(true);
+	expect(response.content.some((b) => b.type === "thinking")).toBeTruthy();
 }

-async function handleImage(model: Model, options?: GenerateOptionsUnified) {
-    // Check if the model supports images
-    if (!model.input.includes("image")) {
-        console.log(`Skipping image test - model ${model.id} doesn't support images`);
-        return;
-    }
+async function handleImage<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
+	// Check if the model supports images
+	if (!model.input.includes("image")) {
+		console.log(`Skipping image test - model ${model.id} doesn't support images`);
+		return;
+	}

-    // Read the test image
-    const imagePath = join(__dirname, "data", "red-circle.png");
-    const imageBuffer = readFileSync(imagePath);
-    const base64Image = imageBuffer.toString("base64");
+	// Read the test image
+	const imagePath = join(__dirname, "data", "red-circle.png");
+	const imageBuffer = readFileSync(imagePath);
+	const base64Image = imageBuffer.toString("base64");

-    const imageContent: ImageContent = {
-        type: "image",
-        data: base64Image,
-        mimeType: "image/png",
-    };
+	const imageContent: ImageContent = {
+		type: "image",
+		data: base64Image,
+		mimeType: "image/png",
+	};

-    const context: Context = {
-        messages: [
-            {
-                role: "user",
-                content: [
-                    { type: "text", text: "What do you see in this image? Please describe the shape (circle, rectangle, square, triangle, ...) and color (red, blue, green, ...)." },
-                    imageContent,
-                ],
-            },
-        ],
-    };
+	const context: Context = {
+		messages: [
+			{
+				role: "user",
+				content: [
+					{
+						type: "text",
+						text: "What do you see in this image? Please describe the shape (circle, rectangle, square, triangle, ...) and color (red, blue, green, ...).",
+					},
+					imageContent,
+				],
+			},
+		],
+	};

-    const response = await generateComplete(model, context, options);
+	const response = await complete(model, context, options);

-    // Check the response mentions red and circle
-    expect(response.content.length > 0).toBeTruthy();
-    const textContent = response.content.find(b => b.type == "text");
-    if (textContent && textContent.type === "text") {
-        const lowerContent = textContent.text.toLowerCase();
-        expect(lowerContent).toContain("red");
-        expect(lowerContent).toContain("circle");
-    }
+	// Check the response mentions red and circle
+	expect(response.content.length > 0).toBeTruthy();
+	const textContent = response.content.find((b) => b.type === "text");
+	if (textContent && textContent.type === "text") {
+		const lowerContent = textContent.text.toLowerCase();
+		expect(lowerContent).toContain("red");
+		expect(lowerContent).toContain("circle");
+	}
 }

-async function multiTurn(model: Model, options?: GenerateOptionsUnified) {
-    const context: Context = {
-        systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
-        messages: [
-            {
-                role: "user",
-                content: "Think about this briefly, then calculate 42 * 17 and 453 + 434 using the calculator tool."
-            }
-        ],
-        tools: [calculatorTool]
-    };
+async function multiTurn<TApi extends Api>(model: Model<TApi>, options?: OptionsForApi<TApi>) {
+	const context: Context = {
+		systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
+		messages: [
+			{
+				role: "user",
+				content: "Think about this briefly, then calculate 42 * 17 and 453 + 434 using the calculator tool.",
+			},
+		],
+		tools: [calculatorTool],
+	};

-    // Collect all text content from all assistant responses
-    let allTextContent = "";
-    let hasSeenThinking = false;
-    let hasSeenToolCalls = false;
-    const maxTurns = 5; // Prevent infinite loops
+	// Collect all text content from all assistant responses
+	let allTextContent = "";
+	let hasSeenThinking = false;
+	let hasSeenToolCalls = false;
+	const maxTurns = 5; // Prevent infinite loops

-    for (let turn = 0; turn < maxTurns; turn++) {
-        const response = await generateComplete(model, context, options);
+	for (let turn = 0; turn < maxTurns; turn++) {
+		const response = await complete(model, context, options);

-        // Add the assistant response to context
-        context.messages.push(response);
+		// Add the assistant response to context
+		context.messages.push(response);

-        // Process content blocks
-        for (const block of response.content) {
-            if (block.type === "text") {
-                allTextContent += block.text;
-            } else if (block.type === "thinking") {
-                hasSeenThinking = true;
-            } else if (block.type === "toolCall") {
-                hasSeenToolCalls = true;
+		// Process content blocks
+		for (const block of response.content) {
+			if (block.type === "text") {
+				allTextContent += block.text;
+			} else if (block.type === "thinking") {
+				hasSeenThinking = true;
+			} else if (block.type === "toolCall") {
+				hasSeenToolCalls = true;

-                // Process the tool call
-                expect(block.name).toBe("calculator");
-                expect(block.id).toBeTruthy();
-                expect(block.arguments).toBeTruthy();
+				// Process the tool call
+				expect(block.name).toBe("calculator");
+				expect(block.id).toBeTruthy();
+				expect(block.arguments).toBeTruthy();

-                const { a, b, operation } = block.arguments;
-                let result: number;
-                switch (operation) {
-                    case "add": result = a + b; break;
-                    case "multiply": result = a * b; break;
-                    default: result = 0;
-                }
+				const { a, b, operation } = block.arguments;
+				let result: number;
+				switch (operation) {
+					case "add":
+						result = a + b;
+						break;
+					case "multiply":
+						result = a * b;
+						break;
+					default:
+						result = 0;
+				}

-                // Add tool result to context
-                context.messages.push({
-                    role: "toolResult",
-                    toolCallId: block.id,
-                    toolName: block.name,
-                    content: `${result}`,
-                    isError: false
-                });
-            }
-        }
+				// Add tool result to context
+				context.messages.push({
+					role: "toolResult",
+					toolCallId: block.id,
+					toolName: block.name,
+					content: `${result}`,
+					isError: false,
+				});
+			}
+		}

-        // If we got a stop response with text content, we're likely done
-        expect(response.stopReason).not.toBe("error");
-        if (response.stopReason === "stop") {
-            break;
-        }
-    }
+		// If we got a stop response with text content, we're likely done
+		expect(response.stopReason).not.toBe("error");
+		if (response.stopReason === "stop") {
+			break;
+		}
+	}

-    // Verify we got either thinking content or tool calls (or both)
-    expect(hasSeenThinking || hasSeenToolCalls).toBe(true);
+	// Verify we got either thinking content or tool calls (or both)
+	expect(hasSeenThinking || hasSeenToolCalls).toBe(true);

-    // The accumulated text should reference both calculations
-    expect(allTextContent).toBeTruthy();
-    expect(allTextContent.includes("714")).toBe(true);
-    expect(allTextContent.includes("887")).toBe(true);
+	// The accumulated text should reference both calculations
+	expect(allTextContent).toBeTruthy();
+	expect(allTextContent.includes("714")).toBe(true);
+	expect(allTextContent.includes("887")).toBe(true);
 }

 describe("Generate E2E Tests", () => {
-    describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-3-5-haiku-20241022)", () => {
-        let model: Model;
+	describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini Provider (gemini-2.5-flash)", () => {
+		const llm = getModel("google", "gemini-2.5-flash");

-        beforeAll(() => {
-            model = getModel("anthropic", "claude-3-5-haiku-20241022");
-        });
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(llm);
+		});

-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(model);
-        });
+		it("should handle tool calling", async () => {
+			await handleToolCall(llm);
+		});

-        it("should handle tool calling", async () => {
-            await handleToolCall(model);
-        });
+		it("should handle streaming", async () => {
+			await handleStreaming(llm);
+		});

-        it("should handle streaming", async () => {
-            await handleStreaming(model);
-        });
+		it("should handle ", async () => {
+			await handleThinking(llm, { thinking: { enabled: true, budgetTokens: 1024 } });
+		});

-        it("should handle image input", async () => {
-            await handleImage(model);
-        });
-    });
+		it("should handle multi-turn with thinking and tools", async () => {
+			await multiTurn(llm, { thinking: { enabled: true, budgetTokens: 2048 } });
+		});

-    describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-20250514)", () => {
-        let model: Model;
+		it("should handle image input", async () => {
+			await handleImage(llm);
+		});
+	});

-        beforeAll(() => {
-            model = getModel("anthropic", "claude-sonnet-4-20250514");
-        });
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => {
+		const llm: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" };

-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(model);
-        });
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(llm);
+		});

-        it("should handle tool calling", async () => {
-            await handleToolCall(model);
-        });
+		it("should handle tool calling", async () => {
+			await handleToolCall(llm);
+		});

-        it("should handle streaming", async () => {
-            await handleStreaming(model);
-        });
+		it("should handle streaming", async () => {
+			await handleStreaming(llm);
+		});

-        it("should handle thinking mode", async () => {
-            await handleThinking(model, { reasoning: "low" });
-        });
+		it("should handle image input", async () => {
+			await handleImage(llm);
+		});
+	});

-        it("should handle multi-turn with thinking and tools", async () => {
-            await multiTurn(model, { reasoning: "medium" });
-        });
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
+		const llm = getModel("openai", "gpt-5-mini");

-        it("should handle image input", async () => {
-            await handleImage(model);
-        });
-    });
-});
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(llm);
+		});
+
+		it("should handle tool calling", async () => {
+			await handleToolCall(llm);
+		});
+
+		it("should handle streaming", async () => {
+			await handleStreaming(llm);
+		});
+
+		it("should handle ", { retry: 2 }, async () => {
+			await handleThinking(llm, { reasoningEffort: "medium" });
+		});
+
+		it("should handle multi-turn with thinking and tools", async () => {
+			await multiTurn(llm, { reasoningEffort: "medium" });
+		});
+
+		it("should handle image input", async () => {
+			await handleImage(llm);
+		});
+	});
+
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-3-5-haiku-20241022)", () => {
+		const model = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(model, { thinkingEnabled: true });
+		});
+
+		it("should handle tool calling", async () => {
+			await handleToolCall(model);
+		});
+
+		it("should handle streaming", async () => {
+			await handleStreaming(model);
+		});
+
+		it("should handle image input", async () => {
+			await handleImage(model);
+		});
+	});
+
+	describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-20250514)", () => {
+		const model = getModel("anthropic", "claude-sonnet-4-20250514");
+
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(model, { thinkingEnabled: true });
+		});
+
+		it("should handle tool calling", async () => {
+			await handleToolCall(model);
+		});
+
+		it("should handle streaming", async () => {
+			await handleStreaming(model);
+		});
+
+		it("should handle thinking", async () => {
+			await handleThinking(model, { thinkingEnabled: true });
+		});
+
+		it("should handle multi-turn with thinking and tools", async () => {
+			await multiTurn(model, { thinkingEnabled: true });
+		});
+
+		it("should handle image input", async () => {
+			await handleImage(model);
+		});
+	});
+
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
+		const model = getModel("openai", "gpt-5-mini");
+
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(model);
+		});
+
+		it("should handle tool calling", async () => {
+			await handleToolCall(model);
+		});
+
+		it("should handle streaming", async () => {
+			await handleStreaming(model);
+		});
+
+		it("should handle image input", async () => {
+			await handleImage(model);
+		});
+	});
+
+	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-code-fast-1 via OpenAI Completions)", () => {
+		const llm = getModel("xai", "grok-code-fast-1");
+
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(llm);
+		});
+
+		it("should handle tool calling", async () => {
+			await handleToolCall(llm);
+		});
+
+		it("should handle streaming", async () => {
+			await handleStreaming(llm);
+		});
+
+		it("should handle thinking mode", async () => {
+			await handleThinking(llm, { reasoningEffort: "medium" });
+		});
+
+		it("should handle multi-turn with thinking and tools", async () => {
+			await multiTurn(llm, { reasoningEffort: "medium" });
+		});
+	});
+
+	describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider (gpt-oss-20b via OpenAI Completions)", () => {
+		const llm = getModel("groq", "openai/gpt-oss-20b");
+
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(llm);
+		});
+
+		it("should handle tool calling", async () => {
+			await handleToolCall(llm);
+		});
+
+		it("should handle streaming", async () => {
+			await handleStreaming(llm);
+		});
+
+		it("should handle thinking mode", async () => {
+			await handleThinking(llm, { reasoningEffort: "medium" });
+		});
+
+		it("should handle multi-turn with thinking and tools", async () => {
+			await multiTurn(llm, { reasoningEffort: "medium" });
+		});
+	});
+
+	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider (gpt-oss-120b via OpenAI Completions)", () => {
+		const llm = getModel("cerebras", "gpt-oss-120b");
+
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(llm);
+		});
+
+		it("should handle tool calling", async () => {
+			await handleToolCall(llm);
+		});
+
+		it("should handle streaming", async () => {
+			await handleStreaming(llm);
+		});
+
+		it("should handle thinking mode", async () => {
+			await handleThinking(llm, { reasoningEffort: "medium" });
+		});
+
+		it("should handle multi-turn with thinking and tools", async () => {
+			await multiTurn(llm, { reasoningEffort: "medium" });
+		});
+	});
+
+	describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => {
+		const llm = getModel("openrouter", "z-ai/glm-4.5v");
+
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(llm);
+		});
+
+		it("should handle tool calling", async () => {
+			await handleToolCall(llm);
+		});
+
+		it("should handle streaming", async () => {
+			await handleStreaming(llm);
+		});
+
+		it("should handle thinking mode", async () => {
+			await handleThinking(llm, { reasoningEffort: "medium" });
+		});
+
+		it("should handle multi-turn with thinking and tools", { retry: 2 }, async () => {
+			await multiTurn(llm, { reasoningEffort: "medium" });
+		});
+
+		it("should handle image input", async () => {
+			await handleImage(llm);
+		});
+	});
+
+	// Check if ollama is installed
+	let ollamaInstalled = false;
+	try {
+		execSync("which ollama", { stdio: "ignore" });
+		ollamaInstalled = true;
+	} catch {
+		ollamaInstalled = false;
+	}
+
+	describe.skipIf(!ollamaInstalled)("Ollama Provider (gpt-oss-20b via OpenAI Completions)", () => {
+		let llm: Model<"openai-completions">;
+		let ollamaProcess: ChildProcess | null = null;
+
+		beforeAll(async () => {
+			// Check if model is available, if not pull it
+			try {
+				execSync("ollama list | grep -q 'gpt-oss:20b'", { stdio: "ignore" });
+			} catch {
+				console.log("Pulling gpt-oss:20b model for Ollama tests...");
+				try {
+					execSync("ollama pull gpt-oss:20b", { stdio: "inherit" });
+				} catch (e) {
+					console.warn("Failed to pull gpt-oss:20b model, tests will be skipped");
+					return;
+				}
+			}
+
+			// Start ollama server
+			ollamaProcess = spawn("ollama", ["serve"], {
+				detached: false,
+				stdio: "ignore",
+			});
+
+			// Wait for server to be ready
+			await new Promise<void>((resolve) => {
+				const checkServer = async () => {
+					try {
+						const response = await fetch("http://localhost:11434/api/tags");
+						if (response.ok) {
+							resolve();
+						} else {
+							setTimeout(checkServer, 500);
+						}
+					} catch {
+						setTimeout(checkServer, 500);
+					}
+				};
+				setTimeout(checkServer, 1000); // Initial delay
+			});
+
+			llm = {
+				id: "gpt-oss:20b",
+				api: "openai-completions",
+				provider: "ollama",
+				baseUrl: "http://localhost:11434/v1",
+				reasoning: true,
+				input: ["text"],
+				contextWindow: 128000,
+				maxTokens: 16000,
+				cost: {
+					input: 0,
+					output: 0,
+					cacheRead: 0,
+					cacheWrite: 0,
+				},
+				name: "Ollama GPT-OSS 20B",
+			};
+		}, 30000); // 30 second timeout for setup
+
+		afterAll(() => {
+			// Kill ollama server
+			if (ollamaProcess) {
+				ollamaProcess.kill("SIGTERM");
+				ollamaProcess = null;
+			}
+		});
+
+		it("should complete basic text generation", async () => {
+			await basicTextGeneration(llm, { apiKey: "test" });
+		});
+
+		it("should handle tool calling", async () => {
+			await handleToolCall(llm, { apiKey: "test" });
+		});
+
+		it("should handle streaming", async () => {
+			await handleStreaming(llm, { apiKey: "test" });
+		});
+
+		it("should handle thinking mode", async () => {
+			await handleThinking(llm, { apiKey: "test", reasoningEffort: "medium" });
+		});
+
+		it("should handle multi-turn with thinking and tools", async () => {
+			await multiTurn(llm, { apiKey: "test", reasoningEffort: "medium" });
+		});
+	});
+});
--- a/packages/ai/test/handoff.test.ts
+++ b/packages/ai/test/handoff.test.ts
@ -1,503 +1,489 @@
-import { describe, it, expect, beforeAll } from "vitest";
-import { GoogleLLM } from "../src/providers/google.js";
-import { OpenAICompletionsLLM } from "../src/providers/openai-completions.js";
-import { OpenAIResponsesLLM } from "../src/providers/openai-responses.js";
-import { AnthropicLLM } from "../src/providers/anthropic.js";
-import type { LLM, Context, AssistantMessage, Tool, Message } from "../src/types.js";
-import { createLLM, getModel } from "../src/models.js";
+import { describe, expect, it } from "vitest";
+import { complete } from "../src/generate.js";
+import { getModel } from "../src/models.js";
+import type { Api, AssistantMessage, Context, Message, Model, Tool } from "../src/types.js";

 // Tool for testing
 const weatherTool: Tool = {
-    name: "get_weather",
-    description: "Get the weather for a location",
-    parameters: {
-        type: "object",
-        properties: {
-            location: { type: "string", description: "City name" }
-        },
-        required: ["location"]
-    }
+	name: "get_weather",
+	description: "Get the weather for a location",
+	parameters: {
+		type: "object",
+		properties: {
+			location: { type: "string", description: "City name" },
+		},
+		required: ["location"],
+	},
 };

 // Pre-built contexts representing typical outputs from each provider
 const providerContexts = {
-    // Anthropic-style message with thinking block
-    anthropic: {
-        message: {
-            role: "assistant",
-            content: [
-                {
-                    type: "thinking",
-                    thinking: "Let me calculate 17 * 23. That's 17 * 20 + 17 * 3 = 340 + 51 = 391",
-                    thinkingSignature: "signature_abc123"
-                },
-                {
-                    type: "text",
-                    text: "I'll help you with the calculation and check the weather. The result of 17 × 23 is 391. The capital of Austria is Vienna. Now let me check the weather for you."
-                },
-                {
-                    type: "toolCall",
-                    id: "toolu_01abc123",
-                    name: "get_weather",
-                    arguments: { location: "Tokyo" }
-                }
-            ],
-            provider: "anthropic",
-            model: "claude-3-5-haiku-latest",
-            usage: { input: 100, output: 50, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
-            stopReason: "toolUse"
-        } as AssistantMessage,
-        toolResult: {
-            role: "toolResult" as const,
-            toolCallId: "toolu_01abc123",
-            toolName: "get_weather",
-            content: "Weather in Tokyo: 18°C, partly cloudy",
-            isError: false
-        },
-        facts: {
-            calculation: 391,
-            city: "Tokyo",
-            temperature: 18,
-            capital: "Vienna"
-        }
-    },
+	// Anthropic-style message with thinking block
+	anthropic: {
+		message: {
+			role: "assistant",
+			content: [
+				{
+					type: "thinking",
+					thinking: "Let me calculate 17 * 23. That's 17 * 20 + 17 * 3 = 340 + 51 = 391",
+					thinkingSignature: "signature_abc123",
+				},
+				{
+					type: "text",
+					text: "I'll help you with the calculation and check the weather. The result of 17 × 23 is 391. The capital of Austria is Vienna. Now let me check the weather for you.",
+				},
+				{
+					type: "toolCall",
+					id: "toolu_01abc123",
+					name: "get_weather",
+					arguments: { location: "Tokyo" },
+				},
+			],
+			provider: "anthropic",
+			model: "claude-3-5-haiku-latest",
+			usage: {
+				input: 100,
+				output: 50,
+				cacheRead: 0,
+				cacheWrite: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "toolUse",
+		} as AssistantMessage,
+		toolResult: {
+			role: "toolResult" as const,
+			toolCallId: "toolu_01abc123",
+			toolName: "get_weather",
+			content: "Weather in Tokyo: 18°C, partly cloudy",
+			isError: false,
+		},
+		facts: {
+			calculation: 391,
+			city: "Tokyo",
+			temperature: 18,
+			capital: "Vienna",
+		},
+	},

-    // Google-style message with thinking
-    google: {
-        message: {
-            role: "assistant",
-            content: [
-                {
-                    type: "thinking",
-                    thinking: "I need to multiply 19 * 24. Let me work through this: 19 * 24 = 19 * 20 + 19 * 4 = 380 + 76 = 456",
-                    thinkingSignature: undefined
-                },
-                {
-                    type: "text",
-                    text: "The multiplication of 19 × 24 equals 456. The capital of France is Paris. Let me check the weather in Berlin for you."
-                },
-                {
-                    type: "toolCall",
-                    id: "call_gemini_123",
-                    name: "get_weather",
-                    arguments: { location: "Berlin" }
-                }
-            ],
-            provider: "google",
-            model: "gemini-2.5-flash",
-            usage: { input: 120, output: 60, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
-            stopReason: "toolUse"
-        } as AssistantMessage,
-        toolResult: {
-            role: "toolResult" as const,
-            toolCallId: "call_gemini_123",
-            toolName: "get_weather",
-            content: "Weather in Berlin: 22°C, sunny",
-            isError: false
-        },
-        facts: {
-            calculation: 456,
-            city: "Berlin",
-            temperature: 22,
-            capital: "Paris"
-        }
-    },
+	// Google-style message with thinking
+	google: {
+		message: {
+			role: "assistant",
+			content: [
+				{
+					type: "thinking",
+					thinking:
+						"I need to multiply 19 * 24. Let me work through this: 19 * 24 = 19 * 20 + 19 * 4 = 380 + 76 = 456",
+					thinkingSignature: undefined,
+				},
+				{
+					type: "text",
+					text: "The multiplication of 19 × 24 equals 456. The capital of France is Paris. Let me check the weather in Berlin for you.",
+				},
+				{
+					type: "toolCall",
+					id: "call_gemini_123",
+					name: "get_weather",
+					arguments: { location: "Berlin" },
+				},
+			],
+			provider: "google",
+			model: "gemini-2.5-flash",
+			usage: {
+				input: 120,
+				output: 60,
+				cacheRead: 0,
+				cacheWrite: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "toolUse",
+		} as AssistantMessage,
+		toolResult: {
+			role: "toolResult" as const,
+			toolCallId: "call_gemini_123",
+			toolName: "get_weather",
+			content: "Weather in Berlin: 22°C, sunny",
+			isError: false,
+		},
+		facts: {
+			calculation: 456,
+			city: "Berlin",
+			temperature: 22,
+			capital: "Paris",
+		},
+	},

-    // OpenAI Completions style (with reasoning_content)
-    openaiCompletions: {
-        message: {
-            role: "assistant",
-            content: [
-                {
-                    type: "thinking",
-                    thinking: "Let me calculate 21 * 25. That's 21 * 25 = 525",
-                    thinkingSignature: "reasoning_content"
-                },
-                {
-                    type: "text",
-                    text: "The result of 21 × 25 is 525. The capital of Spain is Madrid. I'll check the weather in London now."
-                },
-                {
-                    type: "toolCall",
-                    id: "call_abc123",
-                    name: "get_weather",
-                    arguments: { location: "London" }
-                }
-            ],
-            provider: "openai",
-            model: "gpt-4o-mini",
-            usage: { input: 110, output: 55, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
-            stopReason: "toolUse"
-        } as AssistantMessage,
-        toolResult: {
-            role: "toolResult" as const,
-            toolCallId: "call_abc123",
-            toolName: "get_weather",
-            content: "Weather in London: 15°C, rainy",
-            isError: false
-        },
-        facts: {
-            calculation: 525,
-            city: "London",
-            temperature: 15,
-            capital: "Madrid"
-        }
-    },
+	// OpenAI Completions style (with reasoning_content)
+	openaiCompletions: {
+		message: {
+			role: "assistant",
+			content: [
+				{
+					type: "thinking",
+					thinking: "Let me calculate 21 * 25. That's 21 * 25 = 525",
+					thinkingSignature: "reasoning_content",
+				},
+				{
+					type: "text",
+					text: "The result of 21 × 25 is 525. The capital of Spain is Madrid. I'll check the weather in London now.",
+				},
+				{
+					type: "toolCall",
+					id: "call_abc123",
+					name: "get_weather",
+					arguments: { location: "London" },
+				},
+			],
+			provider: "openai",
+			model: "gpt-4o-mini",
+			usage: {
+				input: 110,
+				output: 55,
+				cacheRead: 0,
+				cacheWrite: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "toolUse",
+		} as AssistantMessage,
+		toolResult: {
+			role: "toolResult" as const,
+			toolCallId: "call_abc123",
+			toolName: "get_weather",
+			content: "Weather in London: 15°C, rainy",
+			isError: false,
+		},
+		facts: {
+			calculation: 525,
+			city: "London",
+			temperature: 15,
+			capital: "Madrid",
+		},
+	},

-    // OpenAI Responses style (with complex tool call IDs)
-    openaiResponses: {
-        message: {
-            role: "assistant",
-            content: [
-                {
-                    type: "thinking",
-                    thinking: "Calculating 18 * 27: 18 * 27 = 486",
-                    thinkingSignature: '{"type":"reasoning","id":"rs_2b2342acdde","summary":[{"type":"summary_text","text":"Calculating 18 * 27: 18 * 27 = 486"}]}'
-                },
-                {
-                    type: "text",
-                    text: "The calculation of 18 × 27 gives us 486. The capital of Italy is Rome. Let me check Sydney's weather.",
-                    textSignature: "msg_response_456"
-                },
-                {
-                    type: "toolCall",
-                    id: "call_789_item_012",  // Anthropic requires alphanumeric, dash, and underscore only
-                    name: "get_weather",
-                    arguments: { location: "Sydney" }
-                }
-            ],
-            provider: "openai",
-            model: "gpt-5-mini",
-            usage: { input: 115, output: 58, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
-            stopReason: "toolUse"
-        } as AssistantMessage,
-        toolResult: {
-            role: "toolResult" as const,
-            toolCallId: "call_789_item_012",  // Match the updated ID format
-            toolName: "get_weather",
-            content: "Weather in Sydney: 25°C, clear",
-            isError: false
-        },
-        facts: {
-            calculation: 486,
-            city: "Sydney",
-            temperature: 25,
-            capital: "Rome"
-        }
-    },
+	// OpenAI Responses style (with complex tool call IDs)
+	openaiResponses: {
+		message: {
+			role: "assistant",
+			content: [
+				{
+					type: "thinking",
+					thinking: "Calculating 18 * 27: 18 * 27 = 486",
+					thinkingSignature:
+						'{"type":"reasoning","id":"rs_2b2342acdde","summary":[{"type":"summary_text","text":"Calculating 18 * 27: 18 * 27 = 486"}]}',
+				},
+				{
+					type: "text",
+					text: "The calculation of 18 × 27 gives us 486. The capital of Italy is Rome. Let me check Sydney's weather.",
+					textSignature: "msg_response_456",
+				},
+				{
+					type: "toolCall",
+					id: "call_789_item_012", // Anthropic requires alphanumeric, dash, and underscore only
+					name: "get_weather",
+					arguments: { location: "Sydney" },
+				},
+			],
+			provider: "openai",
+			model: "gpt-5-mini",
+			usage: {
+				input: 115,
+				output: 58,
+				cacheRead: 0,
+				cacheWrite: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "toolUse",
+		} as AssistantMessage,
+		toolResult: {
+			role: "toolResult" as const,
+			toolCallId: "call_789_item_012", // Match the updated ID format
+			toolName: "get_weather",
+			content: "Weather in Sydney: 25°C, clear",
+			isError: false,
+		},
+		facts: {
+			calculation: 486,
+			city: "Sydney",
+			temperature: 25,
+			capital: "Rome",
+		},
+	},

-    // Aborted message (stopReason: 'error')
-    aborted: {
-        message: {
-            role: "assistant",
-            content: [
-                {
-                    type: "thinking",
-                    thinking: "Let me start calculating 20 * 30...",
-                    thinkingSignature: "partial_sig"
-                },
-                {
-                    type: "text",
-                    text: "I was about to calculate 20 × 30 which is"
-                }
-            ],
-            provider: "test",
-            model: "test-model",
-            usage: { input: 50, output: 25, cacheRead: 0, cacheWrite: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 } },
-            stopReason: "error",
-            error: "Request was aborted"
-        } as AssistantMessage,
-        toolResult: null,
-        facts: {
-            calculation: 600,
-            city: "none",
-            temperature: 0,
-            capital: "none"
-        }
-    }
+	// Aborted message (stopReason: 'error')
+	aborted: {
+		message: {
+			role: "assistant",
+			content: [
+				{
+					type: "thinking",
+					thinking: "Let me start calculating 20 * 30...",
+					thinkingSignature: "partial_sig",
+				},
+				{
+					type: "text",
+					text: "I was about to calculate 20 × 30 which is",
+				},
+			],
+			provider: "test",
+			model: "test-model",
+			usage: {
+				input: 50,
+				output: 25,
+				cacheRead: 0,
+				cacheWrite: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "error",
+			error: "Request was aborted",
+		} as AssistantMessage,
+		toolResult: null,
+		facts: {
+			calculation: 600,
+			city: "none",
+			temperature: 0,
+			capital: "none",
+		},
+	},
 };

 /**
 * Test that a provider can handle contexts from different sources
 */
-async function testProviderHandoff(
-    targetProvider: LLM<any>,
-    sourceLabel: string,
-    sourceContext: typeof providerContexts[keyof typeof providerContexts]
+async function testProviderHandoff<TApi extends Api>(
+	targetModel: Model<TApi>,
+	sourceLabel: string,
+	sourceContext: (typeof providerContexts)[keyof typeof providerContexts],
 ): Promise<boolean> {
-    // Build conversation context
-    const messages: Message[] = [
-        {
-            role: "user",
-            content: "Please do some calculations, tell me about capitals, and check the weather."
-        },
-        sourceContext.message
-    ];
+	// Build conversation context
+	const messages: Message[] = [
+		{
+			role: "user",
+			content: "Please do some calculations, tell me about capitals, and check the weather.",
+		},
+		sourceContext.message,
+	];

-    // Add tool result if present
-    if (sourceContext.toolResult) {
-        messages.push(sourceContext.toolResult);
-    }
+	// Add tool result if present
+	if (sourceContext.toolResult) {
+		messages.push(sourceContext.toolResult);
+	}

-    // Ask follow-up question
-    messages.push({
-        role: "user",
-        content: `Based on our conversation, please answer:
+	// Ask follow-up question
+	messages.push({
+		role: "user",
+		content: `Based on our conversation, please answer:
                 1) What was the multiplication result?
                 2) Which city's weather did we check?
                 3) What was the temperature?
                 4) What capital city was mentioned?
-                 Please include the specific numbers and names.`
-    });
+                 Please include the specific numbers and names.`,
+	});

-    const context: Context = {
-        messages,
-        tools: [weatherTool]
-    };
+	const context: Context = {
+		messages,
+		tools: [weatherTool],
+	};

-    try {
-        const response = await targetProvider.generate(context, {});
+	try {
+		const response = await complete(targetModel, context, {});

-        // Check for error
-        if (response.stopReason === "error") {
-            console.log(`[${sourceLabel} → ${targetProvider.getModel().provider}] Failed with error: ${response.error}`);
-            return false;
-        }
+		// Check for error
+		if (response.stopReason === "error") {
+			console.log(`[${sourceLabel} → ${targetModel.provider}] Failed with error: ${response.error}`);
+			return false;
+		}

-        // Extract text from response
-        const responseText = response.content
-            .filter(b => b.type === "text")
-            .map(b => b.text)
-            .join(" ")
-            .toLowerCase();
+		// Extract text from response
+		const responseText = response.content
+			.filter((b) => b.type === "text")
+			.map((b) => b.text)
+			.join(" ")
+			.toLowerCase();

-        // For aborted messages, we don't expect to find the facts
-        if (sourceContext.message.stopReason === "error") {
-            const hasToolCalls = response.content.some(b => b.type === "toolCall");
-            const hasThinking = response.content.some(b => b.type === "thinking");
-            const hasText = response.content.some(b => b.type === "text");
+		// For aborted messages, we don't expect to find the facts
+		if (sourceContext.message.stopReason === "error") {
+			const hasToolCalls = response.content.some((b) => b.type === "toolCall");
+			const hasThinking = response.content.some((b) => b.type === "thinking");
+			const hasText = response.content.some((b) => b.type === "text");

-            expect(response.stopReason === "stop" || response.stopReason === "toolUse").toBe(true);
-            expect(hasThinking || hasText || hasToolCalls).toBe(true);
-            console.log(`[${sourceLabel} → ${targetProvider.getModel().provider}] Handled aborted message successfully, tool calls: ${hasToolCalls}, thinking: ${hasThinking}, text: ${hasText}`);
-            return true;
-        }
+			expect(response.stopReason === "stop" || response.stopReason === "toolUse").toBe(true);
+			expect(hasThinking || hasText || hasToolCalls).toBe(true);
+			console.log(
+				`[${sourceLabel} → ${targetModel.provider}] Handled aborted message successfully, tool calls: ${hasToolCalls}, thinking: ${hasThinking}, text: ${hasText}`,
+			);
+			return true;
+		}

-        // Check if response contains our facts
-        const hasCalculation = responseText.includes(sourceContext.facts.calculation.toString());
-        const hasCity = sourceContext.facts.city !== "none" && responseText.includes(sourceContext.facts.city.toLowerCase());
-        const hasTemperature = sourceContext.facts.temperature > 0 && responseText.includes(sourceContext.facts.temperature.toString());
-        const hasCapital = sourceContext.facts.capital !== "none" && responseText.includes(sourceContext.facts.capital.toLowerCase());
+		// Check if response contains our facts
+		const hasCalculation = responseText.includes(sourceContext.facts.calculation.toString());
+		const hasCity =
+			sourceContext.facts.city !== "none" && responseText.includes(sourceContext.facts.city.toLowerCase());
+		const hasTemperature =
+			sourceContext.facts.temperature > 0 && responseText.includes(sourceContext.facts.temperature.toString());
+		const hasCapital =
+			sourceContext.facts.capital !== "none" && responseText.includes(sourceContext.facts.capital.toLowerCase());

-        const success = hasCalculation && hasCity && hasTemperature && hasCapital;
+		const success = hasCalculation && hasCity && hasTemperature && hasCapital;

-        console.log(`[${sourceLabel} → ${targetProvider.getModel().provider}] Handoff test:`);
-        if (!success) {
-            console.log(`  Calculation (${sourceContext.facts.calculation}): ${hasCalculation ? '✓' : '✗'}`);
-            console.log(`  City (${sourceContext.facts.city}): ${hasCity ? '✓' : '✗'}`);
-            console.log(`  Temperature (${sourceContext.facts.temperature}): ${hasTemperature ? '✓' : '✗'}`);
-            console.log(`  Capital (${sourceContext.facts.capital}): ${hasCapital ? '✓' : '✗'}`);
-        } else {
-            console.log(`  ✓ All facts found`);
-        }
+		console.log(`[${sourceLabel} → ${targetModel.provider}] Handoff test:`);
+		if (!success) {
+			console.log(`  Calculation (${sourceContext.facts.calculation}): ${hasCalculation ? "✓" : "✗"}`);
+			console.log(`  City (${sourceContext.facts.city}): ${hasCity ? "✓" : "✗"}`);
+			console.log(`  Temperature (${sourceContext.facts.temperature}): ${hasTemperature ? "✓" : "✗"}`);
+			console.log(`  Capital (${sourceContext.facts.capital}): ${hasCapital ? "✓" : "✗"}`);
+		} else {
+			console.log(`  ✓ All facts found`);
+		}

-        return success;
-    } catch (error) {
-        console.error(`[${sourceLabel} → ${targetProvider.getModel().provider}] Exception:`, error);
-        return false;
-    }
+		return success;
+	} catch (error) {
+		console.error(`[${sourceLabel} → ${targetModel.provider}] Exception:`, error);
+		return false;
+	}
 }

 describe("Cross-Provider Handoff Tests", () => {
-    describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Handoff", () => {
-        let provider: AnthropicLLM;
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Handoff", () => {
+		const model = getModel("anthropic", "claude-3-5-haiku-20241022");

-        beforeAll(() => {
-            const model = getModel("anthropic", "claude-3-5-haiku-20241022");
-            if (model) {
-                provider = new AnthropicLLM(model, process.env.ANTHROPIC_API_KEY!);
-            }
-        });
+		it("should handle contexts from all providers", async () => {
+			console.log("\nTesting Anthropic with pre-built contexts:\n");

-        it("should handle contexts from all providers", async () => {
-            if (!provider) {
-                console.log("Anthropic provider not available, skipping");
-                return;
-            }
+			const contextTests = [
+				{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
+				{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
+				{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
+				{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
+				{ label: "Aborted", context: providerContexts.aborted, sourceModel: null },
+			];

-            console.log("\nTesting Anthropic with pre-built contexts:\n");
+			let successCount = 0;
+			let skippedCount = 0;

-            const contextTests = [
-                { label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
-                { label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
-                { label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
-                { label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
-                { label: "Aborted", context: providerContexts.aborted, sourceModel: null }
-            ];
+			for (const { label, context, sourceModel } of contextTests) {
+				// Skip testing same model against itself
+				if (sourceModel && sourceModel === model.id) {
+					console.log(`[${label} → ${model.provider}] Skipping same-model test`);
+					skippedCount++;
+					continue;
+				}
+				const success = await testProviderHandoff(model, label, context);
+				if (success) successCount++;
+			}

-            let successCount = 0;
-            let skippedCount = 0;
+			const totalTests = contextTests.length - skippedCount;
+			console.log(`\nAnthropic success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);

-            for (const { label, context, sourceModel } of contextTests) {
-                // Skip testing same model against itself
-                if (sourceModel && sourceModel === provider.getModel().id) {
-                    console.log(`[${label} → ${provider.getModel().provider}] Skipping same-model test`);
-                    skippedCount++;
-                    continue;
-                }
-                const success = await testProviderHandoff(provider, label, context);
-                if (success) successCount++;
-            }
+			// All non-skipped handoffs should succeed
+			expect(successCount).toBe(totalTests);
+		});
+	});

-            const totalTests = contextTests.length - skippedCount;
-            console.log(`\nAnthropic success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
+	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Handoff", () => {
+		const model = getModel("google", "gemini-2.5-flash");

-            // All non-skipped handoffs should succeed
-            expect(successCount).toBe(totalTests);
-        });
-    });
+		it("should handle contexts from all providers", async () => {
+			console.log("\nTesting Google with pre-built contexts:\n");

-    describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Handoff", () => {
-        let provider: GoogleLLM;
+			const contextTests = [
+				{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
+				{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
+				{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
+				{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
+				{ label: "Aborted", context: providerContexts.aborted, sourceModel: null },
+			];

-        beforeAll(() => {
-            const model = getModel("google", "gemini-2.5-flash");
-            if (model) {
-                provider = new GoogleLLM(model, process.env.GEMINI_API_KEY!);
-            }
-        });
+			let successCount = 0;
+			let skippedCount = 0;

-        it("should handle contexts from all providers", async () => {
-            if (!provider) {
-                console.log("Google provider not available, skipping");
-                return;
-            }
+			for (const { label, context, sourceModel } of contextTests) {
+				// Skip testing same model against itself
+				if (sourceModel && sourceModel === model.id) {
+					console.log(`[${label} → ${model.provider}] Skipping same-model test`);
+					skippedCount++;
+					continue;
+				}
+				const success = await testProviderHandoff(model, label, context);
+				if (success) successCount++;
+			}

-            console.log("\nTesting Google with pre-built contexts:\n");
+			const totalTests = contextTests.length - skippedCount;
+			console.log(`\nGoogle success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);

-            const contextTests = [
-                { label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
-                { label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
-                { label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
-                { label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
-                { label: "Aborted", context: providerContexts.aborted, sourceModel: null }
-            ];
+			// All non-skipped handoffs should succeed
+			expect(successCount).toBe(totalTests);
+		});
+	});

-            let successCount = 0;
-            let skippedCount = 0;
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Handoff", () => {
+		const model: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" };

-            for (const { label, context, sourceModel } of contextTests) {
-                // Skip testing same model against itself
-                if (sourceModel && sourceModel === provider.getModel().id) {
-                    console.log(`[${label} → ${provider.getModel().provider}] Skipping same-model test`);
-                    skippedCount++;
-                    continue;
-                }
-                const success = await testProviderHandoff(provider, label, context);
-                if (success) successCount++;
-            }
+		it("should handle contexts from all providers", async () => {
+			console.log("\nTesting OpenAI Completions with pre-built contexts:\n");

-            const totalTests = contextTests.length - skippedCount;
-            console.log(`\nGoogle success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
+			const contextTests = [
+				{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
+				{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
+				{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
+				{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
+				{ label: "Aborted", context: providerContexts.aborted, sourceModel: null },
+			];

-            // All non-skipped handoffs should succeed
-            expect(successCount).toBe(totalTests);
-        });
-    });
+			let successCount = 0;
+			let skippedCount = 0;

-    describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Handoff", () => {
-        let provider: OpenAICompletionsLLM;
+			for (const { label, context, sourceModel } of contextTests) {
+				// Skip testing same model against itself
+				if (sourceModel && sourceModel === model.id) {
+					console.log(`[${label} → ${model.provider}] Skipping same-model test`);
+					skippedCount++;
+					continue;
+				}
+				const success = await testProviderHandoff(model, label, context);
+				if (success) successCount++;
+			}

-        beforeAll(() => {
-            const model = getModel("openai", "gpt-4o-mini");
-            if (model) {
-                provider = new OpenAICompletionsLLM(model, process.env.OPENAI_API_KEY!);
-            }
-        });
+			const totalTests = contextTests.length - skippedCount;
+			console.log(`\nOpenAI Completions success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);

-        it("should handle contexts from all providers", async () => {
-            if (!provider) {
-                console.log("OpenAI Completions provider not available, skipping");
-                return;
-            }
+			// All non-skipped handoffs should succeed
+			expect(successCount).toBe(totalTests);
+		});
+	});

-            console.log("\nTesting OpenAI Completions with pre-built contexts:\n");
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Handoff", () => {
+		const model = getModel("openai", "gpt-5-mini");

-            const contextTests = [
-                { label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
-                { label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
-                { label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
-                { label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
-                { label: "Aborted", context: providerContexts.aborted, sourceModel: null }
-            ];
+		it("should handle contexts from all providers", async () => {
+			console.log("\nTesting OpenAI Responses with pre-built contexts:\n");

-            let successCount = 0;
-            let skippedCount = 0;
+			const contextTests = [
+				{ label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
+				{ label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
+				{ label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
+				{ label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
+				{ label: "Aborted", context: providerContexts.aborted, sourceModel: null },
+			];

-            for (const { label, context, sourceModel } of contextTests) {
-                // Skip testing same model against itself
-                if (sourceModel && sourceModel === provider.getModel().id) {
-                    console.log(`[${label} → ${provider.getModel().provider}] Skipping same-model test`);
-                    skippedCount++;
-                    continue;
-                }
-                const success = await testProviderHandoff(provider, label, context);
-                if (success) successCount++;
-            }
+			let successCount = 0;
+			let skippedCount = 0;

-            const totalTests = contextTests.length - skippedCount;
-            console.log(`\nOpenAI Completions success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
+			for (const { label, context, sourceModel } of contextTests) {
+				// Skip testing same model against itself
+				if (sourceModel && sourceModel === model.id) {
+					console.log(`[${label} → ${model.provider}] Skipping same-model test`);
+					skippedCount++;
+					continue;
+				}
+				const success = await testProviderHandoff(model, label, context);
+				if (success) successCount++;
+			}

-            // All non-skipped handoffs should succeed
-            expect(successCount).toBe(totalTests);
-        });
-    });
+			const totalTests = contextTests.length - skippedCount;
+			console.log(`\nOpenAI Responses success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);

-    describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Handoff", () => {
-        let provider: OpenAIResponsesLLM;
-
-        beforeAll(() => {
-            const model = getModel("openai", "gpt-5-mini");
-            if (model) {
-                provider = new OpenAIResponsesLLM(model, process.env.OPENAI_API_KEY!);
-            }
-        });
-
-        it("should handle contexts from all providers", async () => {
-            if (!provider) {
-                console.log("OpenAI Responses provider not available, skipping");
-                return;
-            }
-
-            console.log("\nTesting OpenAI Responses with pre-built contexts:\n");
-
-            const contextTests = [
-                { label: "Anthropic-style", context: providerContexts.anthropic, sourceModel: "claude-3-5-haiku-20241022" },
-                { label: "Google-style", context: providerContexts.google, sourceModel: "gemini-2.5-flash" },
-                { label: "OpenAI-Completions", context: providerContexts.openaiCompletions, sourceModel: "gpt-4o-mini" },
-                { label: "OpenAI-Responses", context: providerContexts.openaiResponses, sourceModel: "gpt-5-mini" },
-                { label: "Aborted", context: providerContexts.aborted, sourceModel: null }
-            ];
-
-            let successCount = 0;
-            let skippedCount = 0;
-
-            for (const { label, context, sourceModel } of contextTests) {
-                // Skip testing same model against itself
-                if (sourceModel && sourceModel === provider.getModel().id) {
-                    console.log(`[${label} → ${provider.getModel().provider}] Skipping same-model test`);
-                    skippedCount++;
-                    continue;
-                }
-                const success = await testProviderHandoff(provider, label, context);
-                if (success) successCount++;
-            }
-
-            const totalTests = contextTests.length - skippedCount;
-            console.log(`\nOpenAI Responses success rate: ${successCount}/${totalTests} (${skippedCount} skipped)\n`);
-
-            // All non-skipped handoffs should succeed
-            expect(successCount).toBe(totalTests);
-        });
-    });
-});
+			// All non-skipped handoffs should succeed
+			expect(successCount).toBe(totalTests);
+		});
+	});
+});
--- a/packages/ai/test/models.ts
+++ b/packages/ai/test/models.ts
@ -1,31 +0,0 @@
-import { GoogleGenAI } from "@google/genai";
-import OpenAI from "openai";
-
-const ai = new GoogleGenAI({});
-
-async function main() {
-    /*let pager = await ai.models.list();
-    do {
-        for (const model of pager.page) {
-            console.log(JSON.stringify(model, null, 2));
-            console.log("---");
-        }
-        if (!pager.hasNextPage()) break;
-        await pager.nextPage();
-    } while (true);*/
-
-    const openai = new OpenAI();
-    const response = await openai.models.list();
-    do {
-        const page = response.data;
-        for (const model of page) {
-            const info = await openai.models.retrieve(model.id);
-            console.log(JSON.stringify(model, null, 2));
-            console.log("---");
-        }
-        if (!response.hasNextPage()) break;
-        await response.getNextPage();
-    } while (true);
-}
-
-await main();
--- a/packages/ai/test/providers.test.ts
+++ b/packages/ai/test/providers.test.ts
@ -1,618 +0,0 @@
-import { describe, it, beforeAll, afterAll, expect } from "vitest";
-import { GoogleLLM } from "../src/providers/google.js";
-import { OpenAICompletionsLLM } from "../src/providers/openai-completions.js";
-import { OpenAIResponsesLLM } from "../src/providers/openai-responses.js";
-import { AnthropicLLM } from "../src/providers/anthropic.js";
-import type { LLM, LLMOptions, Context, Tool, AssistantMessage, Model, ImageContent } from "../src/types.js";
-import { spawn, ChildProcess, execSync } from "child_process";
-import { createLLM, getModel } from "../src/models.js";
-import { readFileSync } from "fs";
-import { join, dirname } from "path";
-import { fileURLToPath } from "url";
-
-const __filename = fileURLToPath(import.meta.url);
-const __dirname = dirname(__filename);
-
-// Calculator tool definition (same as examples)
-const calculatorTool: Tool = {
-    name: "calculator",
-    description: "Perform basic arithmetic operations",
-    parameters: {
-        type: "object",
-        properties: {
-            a: { type: "number", description: "First number" },
-            b: { type: "number", description: "Second number" },
-            operation: {
-                type: "string",
-                enum: ["add", "subtract", "multiply", "divide"],
-                description: "The operation to perform"
-            }
-        },
-        required: ["a", "b", "operation"]
-    }
-};
-
-async function basicTextGeneration<T extends LLMOptions>(llm: LLM<T>) {
-            const context: Context = {
-                systemPrompt: "You are a helpful assistant. Be concise.",
-                messages: [
-                    { role: "user", content: "Reply with exactly: 'Hello test successful'" }
-                ]
-            };
-
-            const response = await llm.generate(context);
-
-            expect(response.role).toBe("assistant");
-            expect(response.content).toBeTruthy();
-            expect(response.usage.input + response.usage.cacheRead).toBeGreaterThan(0);
-            expect(response.usage.output).toBeGreaterThan(0);
-            expect(response.error).toBeFalsy();
-            expect(response.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Hello test successful");
-
-            context.messages.push(response);
-            context.messages.push({ role: "user", content: "Now say 'Goodbye test successful'" });
-
-            const secondResponse = await llm.generate(context);
-
-            expect(secondResponse.role).toBe("assistant");
-            expect(secondResponse.content).toBeTruthy();
-            expect(secondResponse.usage.input + secondResponse.usage.cacheRead).toBeGreaterThan(0);
-            expect(secondResponse.usage.output).toBeGreaterThan(0);
-            expect(secondResponse.error).toBeFalsy();
-            expect(secondResponse.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Goodbye test successful");
-}
-
-async function handleToolCall<T extends LLMOptions>(llm: LLM<T>) {
-    const context: Context = {
-        systemPrompt: "You are a helpful assistant that uses tools when asked.",
-        messages: [{
-            role: "user",
-            content: "Calculate 15 + 27 using the calculator tool."
-        }],
-        tools: [calculatorTool]
-    };
-
-    const response = await llm.generate(context);
-    expect(response.stopReason).toBe("toolUse");
-    expect(response.content.some(b => b.type == "toolCall")).toBeTruthy();
-    const toolCall = response.content.find(b => b.type == "toolCall")!;
-    expect(toolCall.name).toBe("calculator");
-    expect(toolCall.id).toBeTruthy();
-}
-
-async function handleStreaming<T extends LLMOptions>(llm: LLM<T>) {
-    let textStarted = false;
-    let textChunks = "";
-    let textCompleted = false;
-
-    const context: Context = {
-        messages: [{ role: "user", content: "Count from 1 to 3" }]
-    };
-
-    const response = await llm.generate(context, {
-        onEvent: (event) => {
-            if (event.type === "text_start") {
-                textStarted = true;
-            } else if (event.type === "text_delta") {
-                textChunks += event.delta;
-            } else if (event.type === "text_end") {
-                textCompleted = true;
-            }
-        }
-    } as T);
-
-    expect(textStarted).toBe(true);
-    expect(textChunks.length).toBeGreaterThan(0);
-    expect(textCompleted).toBe(true);
-    expect(response.content.some(b => b.type == "text")).toBeTruthy();
-}
-
-async function handleThinking<T extends LLMOptions>(llm: LLM<T>, options: T) {
-    let thinkingStarted = false;
-    let thinkingChunks = "";
-    let thinkingCompleted = false;
-
-    const context: Context = {
-        messages: [{ role: "user", content: `Think about ${(Math.random() * 255) | 0} + 27. Think step by step. Then output the result.` }]
-    };
-
-    const response = await llm.generate(context, {
-       onEvent: (event) => {
-            if (event.type === "thinking_start") {
-                thinkingStarted = true;
-            } else if (event.type === "thinking_delta") {
-                expect(event.content.endsWith(event.delta)).toBe(true);
-                thinkingChunks += event.delta;
-            } else if (event.type === "thinking_end") {
-                thinkingCompleted = true;
-            }
-        },
-        ...options
-    });
-
-
-    expect(response.stopReason, `Error: ${(response as any).error}`).toBe("stop");
-    expect(thinkingStarted).toBe(true);
-    expect(thinkingChunks.length).toBeGreaterThan(0);
-    expect(thinkingCompleted).toBe(true);
-    expect(response.content.some(b => b.type == "thinking")).toBeTruthy();
-}
-
-async function handleImage<T extends LLMOptions>(llm: LLM<T>) {
-    // Check if the model supports images
-    const model = llm.getModel();
-    if (!model.input.includes("image")) {
-        console.log(`Skipping image test - model ${model.id} doesn't support images`);
-        return;
-    }
-
-    // Read the test image
-    const imagePath = join(__dirname, "data", "red-circle.png");
-    const imageBuffer = readFileSync(imagePath);
-    const base64Image = imageBuffer.toString("base64");
-
-    const imageContent: ImageContent = {
-        type: "image",
-        data: base64Image,
-        mimeType: "image/png",
-    };
-
-    const context: Context = {
-        messages: [
-            {
-                role: "user",
-                content: [
-                    { type: "text", text: "What do you see in this image? Please describe the shape (circle, rectangle, square, triangle, ...) and color (red, blue, green, ...)." },
-                    imageContent,
-                ],
-            },
-        ],
-    };
-
-    const response = await llm.generate(context);
-
-    // Check the response mentions red and circle
-    expect(response.content.length > 0).toBeTruthy();
-    const lowerContent = response.content.find(b => b.type == "text")?.text || "";
-    expect(lowerContent).toContain("red");
-    expect(lowerContent).toContain("circle");
-}
-
-async function multiTurn<T extends LLMOptions>(llm: LLM<T>, thinkingOptions: T) {
-    const context: Context = {
-        systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
-        messages: [
-            {
-                role: "user",
-                content: "Think about this briefly, then calculate 42 * 17 and 453 + 434 using the calculator tool."
-            }
-        ],
-        tools: [calculatorTool]
-    };
-
-    // Collect all text content from all assistant responses
-    let allTextContent = "";
-    let hasSeenThinking = false;
-    let hasSeenToolCalls = false;
-    const maxTurns = 5; // Prevent infinite loops
-
-    for (let turn = 0; turn < maxTurns; turn++) {
-        const response = await llm.generate(context, thinkingOptions);
-
-        // Add the assistant response to context
-        context.messages.push(response);
-
-        // Process content blocks
-        for (const block of response.content) {
-            if (block.type === "text") {
-                allTextContent += block.text;
-            } else if (block.type === "thinking") {
-                hasSeenThinking = true;
-            } else if (block.type === "toolCall") {
-                hasSeenToolCalls = true;
-
-                // Process the tool call
-                expect(block.name).toBe("calculator");
-                expect(block.id).toBeTruthy();
-                expect(block.arguments).toBeTruthy();
-
-                const { a, b, operation } = block.arguments;
-                let result: number;
-                switch (operation) {
-                    case "add": result = a + b; break;
-                    case "multiply": result = a * b; break;
-                    default: result = 0;
-                }
-
-                // Add tool result to context
-                context.messages.push({
-                    role: "toolResult",
-                    toolCallId: block.id,
-                    toolName: block.name,
-                    content: `${result}`,
-                    isError: false
-                });
-            }
-        }
-
-        // If we got a stop response with text content, we're likely done
-        expect(response.stopReason).not.toBe("error");
-        if (response.stopReason === "stop") {
-            break;
-        }
-    }
-
-    // Verify we got either thinking content or tool calls (or both)
-    expect(hasSeenThinking || hasSeenToolCalls).toBe(true);
-
-    // The accumulated text should reference both calculations
-    expect(allTextContent).toBeTruthy();
-    expect(allTextContent.includes("714")).toBe(true);
-    expect(allTextContent.includes("887")).toBe(true);
-}
-
-describe("AI Providers E2E Tests", () => {
-    describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini Provider (gemini-2.5-flash)", () => {
-        let llm: GoogleLLM;
-
-        beforeAll(() => {
-            llm = new GoogleLLM(getModel("google", "gemini-2.5-flash")!, process.env.GEMINI_API_KEY!);
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle thinking mode", async () => {
-            await handleThinking(llm, {thinking: { enabled: true, budgetTokens: 1024 }});
-        });
-
-        it("should handle multi-turn with thinking and tools", async () => {
-            await multiTurn(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
-        });
-
-        it("should handle image input", async () => {
-            await handleImage(llm);
-        });
-    });
-
-    describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => {
-        let llm: OpenAICompletionsLLM;
-
-        beforeAll(() => {
-            llm = new OpenAICompletionsLLM(getModel("openai", "gpt-4o-mini")!, process.env.OPENAI_API_KEY!);
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle image input", async () => {
-            await handleImage(llm);
-        });
-    });
-
-    describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
-        let llm: OpenAIResponsesLLM;
-
-        beforeAll(() => {
-            llm = new OpenAIResponsesLLM(getModel("openai", "gpt-5-mini")!, process.env.OPENAI_API_KEY!);
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle thinking mode", {retry: 2}, async () => {
-            await handleThinking(llm, {reasoningEffort: "high"});
-        });
-
-        it("should handle multi-turn with thinking and tools", async () => {
-            await multiTurn(llm, {reasoningEffort: "high"});
-        });
-
-        it("should handle image input", async () => {
-            await handleImage(llm);
-        });
-    });
-
-    describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-20250514)", () => {
-        let llm: AnthropicLLM;
-
-        beforeAll(() => {
-            llm = new AnthropicLLM(getModel("anthropic", "claude-sonnet-4-20250514")!, process.env.ANTHROPIC_OAUTH_TOKEN!);
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle thinking mode", async () => {
-            await handleThinking(llm, {thinking: { enabled: true } });
-        });
-
-        it("should handle multi-turn with thinking and tools", async () => {
-            await multiTurn(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
-        });
-
-        it("should handle image input", async () => {
-            await handleImage(llm);
-        });
-    });
-
-    describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-code-fast-1 via OpenAI Completions)", () => {
-        let llm: OpenAICompletionsLLM;
-
-        beforeAll(() => {
-            llm = new OpenAICompletionsLLM(getModel("xai", "grok-code-fast-1")!, process.env.XAI_API_KEY!);
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle thinking mode", async () => {
-            await handleThinking(llm, {reasoningEffort: "medium"});
-        });
-
-        it("should handle multi-turn with thinking and tools", async () => {
-            await multiTurn(llm, {reasoningEffort: "medium"});
-        });
-    });
-
-    describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider (gpt-oss-20b via OpenAI Completions)", () => {
-        let llm: OpenAICompletionsLLM;
-
-        beforeAll(() => {
-            llm = new OpenAICompletionsLLM(getModel("groq", "openai/gpt-oss-20b")!, process.env.GROQ_API_KEY!);
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle thinking mode", async () => {
-            await handleThinking(llm, {reasoningEffort: "medium"});
-        });
-
-        it("should handle multi-turn with thinking and tools", async () => {
-            await multiTurn(llm, {reasoningEffort: "medium"});
-        });
-    });
-
-    describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider (gpt-oss-120b via OpenAI Completions)", () => {
-        let llm: OpenAICompletionsLLM;
-
-        beforeAll(() => {
-            llm = new OpenAICompletionsLLM(getModel("cerebras", "gpt-oss-120b")!, process.env.CEREBRAS_API_KEY!);
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle thinking mode", async () => {
-            await handleThinking(llm, {reasoningEffort: "medium"});
-        });
-
-        it("should handle multi-turn with thinking and tools", async () => {
-            await multiTurn(llm, {reasoningEffort: "medium"});
-        });
-    });
-
-    describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => {
-        let llm: OpenAICompletionsLLM;
-
-        beforeAll(() => {
-            llm = new OpenAICompletionsLLM(getModel("openrouter", "z-ai/glm-4.5v")!, process.env.OPENROUTER_API_KEY!);;
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle thinking mode", async () => {
-            await handleThinking(llm, {reasoningEffort: "medium"});
-        });
-
-        it("should handle multi-turn with thinking and tools", { retry: 2 }, async () => {
-            await multiTurn(llm, {reasoningEffort: "medium"});
-        });
-
-        it("should handle image input", async () => {
-            await handleImage(llm);
-        });
-    });
-
-    // Check if ollama is installed
-    let ollamaInstalled = false;
-    try {
-        execSync("which ollama", { stdio: "ignore" });
-        ollamaInstalled = true;
-    } catch {
-        ollamaInstalled = false;
-    }
-
-    describe.skipIf(!ollamaInstalled)("Ollama Provider (gpt-oss-20b via OpenAI Completions)", () => {
-        let llm: OpenAICompletionsLLM;
-        let ollamaProcess: ChildProcess | null = null;
-
-        beforeAll(async () => {
-            // Check if model is available, if not pull it
-            try {
-                execSync("ollama list | grep -q 'gpt-oss:20b'", { stdio: "ignore" });
-            } catch {
-                console.log("Pulling gpt-oss:20b model for Ollama tests...");
-                try {
-                    execSync("ollama pull gpt-oss:20b", { stdio: "inherit" });
-                } catch (e) {
-                    console.warn("Failed to pull gpt-oss:20b model, tests will be skipped");
-                    return;
-                }
-            }
-
-            // Start ollama server
-            ollamaProcess = spawn("ollama", ["serve"], {
-                detached: false,
-                stdio: "ignore"
-            });
-
-            // Wait for server to be ready
-            await new Promise<void>((resolve) => {
-                const checkServer = async () => {
-                    try {
-                        const response = await fetch("http://localhost:11434/api/tags");
-                        if (response.ok) {
-                            resolve();
-                        } else {
-                            setTimeout(checkServer, 500);
-                        }
-                    } catch {
-                        setTimeout(checkServer, 500);
-                    }
-                };
-                setTimeout(checkServer, 1000); // Initial delay
-            });
-
-            const model: Model = {
-                id: "gpt-oss:20b",
-                provider: "ollama",
-                baseUrl: "http://localhost:11434/v1",
-                reasoning: true,
-                input: ["text"],
-                contextWindow: 128000,
-                maxTokens: 16000,
-                cost: {
-                    input: 0,
-                    output: 0,
-                    cacheRead: 0,
-                    cacheWrite: 0,
-                },
-                name: "Ollama GPT-OSS 20B"
-            }
-            llm = new OpenAICompletionsLLM(model, "dummy");
-        }, 30000); // 30 second timeout for setup
-
-        afterAll(() => {
-            // Kill ollama server
-            if (ollamaProcess) {
-                ollamaProcess.kill("SIGTERM");
-                ollamaProcess = null;
-            }
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle thinking mode", async () => {
-            await handleThinking(llm, {reasoningEffort: "medium"});
-        });
-
-        it("should handle multi-turn with thinking and tools", async () => {
-            await multiTurn(llm, {reasoningEffort: "medium"});
-        });
-    });
-
-    /*
-    describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (Haiku 3.5)", () => {
-        let llm: AnthropicLLM;
-
-        beforeAll(() => {
-            llm = createLLM("anthropic", "claude-3-5-haiku-latest");
-        });
-
-        it("should complete basic text generation", async () => {
-            await basicTextGeneration(llm);
-        });
-
-        it("should handle tool calling", async () => {
-            await handleToolCall(llm);
-        });
-
-        it("should handle streaming", async () => {
-            await handleStreaming(llm);
-        });
-
-        it("should handle multi-turn with thinking and tools", async () => {
-            await multiTurn(llm, {thinking: {enabled: true}});
-        });
-
-        it("should handle image input", async () => {
-            await handleImage(llm);
-        });
-    });
-    */
-});