Massive refactor of API

- Switch to function based API - Anthropic SDK style async generator - Fully typed with escape hatches for custom models
2026-04-21 06:04:44 +00:00 · 2025-09-02 23:59:36 +02:00 · 2025-09-02 23:59:36 +02:00 · 66cefb236e
commit 66cefb236e
parent 004de3c9d0
29 changed files with 5835 additions and 6225 deletions
--- a/packages/ai/src/generate.ts
+++ b/packages/ai/src/generate.ts
@ -1,47 +1,43 @@
+import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic.js";
+import { type GoogleOptions, streamGoogle } from "./providers/google.js";
+import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions.js";
+import { type OpenAIResponsesOptions, streamOpenAIResponses } from "./providers/openai-responses.js";
 import type {
 	Api,
 	AssistantMessage,
 	AssistantMessageEvent,
 	Context,
-	GenerateFunction,
-	GenerateOptionsUnified,
 	GenerateStream,
 	KnownProvider,
 	Model,
+	OptionsForApi,
 	ReasoningEffort,
+	SimpleGenerateOptions,
 } from "./types.js";

 export class QueuedGenerateStream implements GenerateStream {
 	private queue: AssistantMessageEvent[] = [];
 	private waiting: ((value: IteratorResult<AssistantMessageEvent>) => void)[] = [];
 	private done = false;
-	private error?: Error;
 	private finalMessagePromise: Promise<AssistantMessage>;
 	private resolveFinalMessage!: (message: AssistantMessage) => void;
-	private rejectFinalMessage!: (error: Error) => void;

 	constructor() {
-		this.finalMessagePromise = new Promise((resolve, reject) => {
+		this.finalMessagePromise = new Promise((resolve) => {
 			this.resolveFinalMessage = resolve;
-			this.rejectFinalMessage = reject;
 		});
 	}

 	push(event: AssistantMessageEvent): void {
 		if (this.done) return;

-		// If it's the done event, resolve the final message
 		if (event.type === "done") {
 			this.done = true;
 			this.resolveFinalMessage(event.message);
 		}
-
-		// If it's an error event, reject the final message
 		if (event.type === "error") {
-			this.error = new Error(event.error);
-			if (!this.done) {
-				this.rejectFinalMessage(this.error);
-			}
+			this.done = true;
+			this.resolveFinalMessage(event.partial);
 		}

 		// Deliver to waiting consumer or queue it
@ -86,31 +82,14 @@ export class QueuedGenerateStream implements GenerateStream {
 	}
 }

-// API implementations registry
-const apiImplementations: Map<Api | string, GenerateFunction> = new Map();
-
-/**
- * Register a custom API implementation
- */
-export function registerApi(api: string, impl: GenerateFunction): void {
-	apiImplementations.set(api, impl);
-}
-
-// API key storage
 const apiKeys: Map<string, string> = new Map();

-/**
- * Set an API key for a provider
- */
 export function setApiKey(provider: KnownProvider, key: string): void;
 export function setApiKey(provider: string, key: string): void;
 export function setApiKey(provider: any, key: string): void {
 	apiKeys.set(provider, key);
 }

-/**
- * Get API key for a provider
- */
 export function getApiKey(provider: KnownProvider): string | undefined;
 export function getApiKey(provider: string): string | undefined;
 export function getApiKey(provider: any): string | undefined {
@ -133,45 +112,76 @@ export function getApiKey(provider: any): string | undefined {
 	return envVar ? process.env[envVar] : undefined;
 }

-/**
- * Main generate function
- */
-export function generate(model: Model, context: Context, options?: GenerateOptionsUnified): GenerateStream {
-	// Get implementation
-	const impl = apiImplementations.get(model.api);
-	if (!impl) {
-		throw new Error(`Unsupported API: ${model.api}`);
+export function stream<TApi extends Api>(
+	model: Model<TApi>,
+	context: Context,
+	options?: OptionsForApi<TApi>,
+): GenerateStream {
+	const apiKey = options?.apiKey || getApiKey(model.provider);
+	if (!apiKey) {
+		throw new Error(`No API key for provider: ${model.provider}`);
 	}
+	const providerOptions = { ...options, apiKey };

-	// Get API key from options or environment
+	const api: Api = model.api;
+	switch (api) {
+		case "anthropic-messages":
+			return streamAnthropic(model as Model<"anthropic-messages">, context, providerOptions);
+
+		case "openai-completions":
+			return streamOpenAICompletions(model as Model<"openai-completions">, context, providerOptions as any);
+
+		case "openai-responses":
+			return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
+
+		case "google-generative-ai":
+			return streamGoogle(model as Model<"google-generative-ai">, context, providerOptions);
+
+		default: {
+			// This should never be reached if all Api cases are handled
+			const _exhaustive: never = api;
+			throw new Error(`Unhandled API: ${_exhaustive}`);
+		}
+	}
+}
+
+export async function complete<TApi extends Api>(
+	model: Model<TApi>,
+	context: Context,
+	options?: OptionsForApi<TApi>,
+): Promise<AssistantMessage> {
+	const s = stream(model, context, options);
+	return s.finalMessage();
+}
+
+export function streamSimple<TApi extends Api>(
+	model: Model<TApi>,
+	context: Context,
+	options?: SimpleGenerateOptions,
+): GenerateStream {
 	const apiKey = options?.apiKey || getApiKey(model.provider);
 	if (!apiKey) {
 		throw new Error(`No API key for provider: ${model.provider}`);
 	}

-	// Map generic options to provider-specific
-	const providerOptions = mapOptionsForApi(model.api, model, options, apiKey);
-
-	// Return the GenerateStream from implementation
-	return impl(model, context, providerOptions);
+	const providerOptions = mapOptionsForApi(model, options, apiKey);
+	return stream(model, context, providerOptions);
 }

-/**
- * Helper to generate and get complete response (no streaming)
- */
-export async function generateComplete(
-	model: Model,
+export async function completeSimple<TApi extends Api>(
+	model: Model<TApi>,
 	context: Context,
-	options?: GenerateOptionsUnified,
+	options?: SimpleGenerateOptions,
 ): Promise<AssistantMessage> {
-	const stream = generate(model, context, options);
-	return stream.finalMessage();
+	const s = streamSimple(model, context, options);
+	return s.finalMessage();
 }

-/**
- * Map generic options to provider-specific options
- */
-function mapOptionsForApi(api: Api | string, model: Model, options?: GenerateOptionsUnified, apiKey?: string): any {
+function mapOptionsForApi<TApi extends Api>(
+	model: Model<TApi>,
+	options?: SimpleGenerateOptions,
+	apiKey?: string,
+): OptionsForApi<TApi> {
 	const base = {
 		temperature: options?.temperature,
 		maxTokens: options?.maxTokens,
@ -179,18 +189,10 @@ function mapOptionsForApi(api: Api | string, model: Model, options?: GenerateOpt
 		apiKey: apiKey || options?.apiKey,
 	};

-	switch (api) {
-		case "openai-responses":
-		case "openai-completions":
-			return {
-				...base,
-				reasoning_effort: options?.reasoning,
-			};
-
+	switch (model.api) {
 		case "anthropic-messages": {
-			if (!options?.reasoning) return base;
+			if (!options?.reasoning) return base satisfies AnthropicOptions;

-			// Map effort to token budget
 			const anthropicBudgets = {
 				minimal: 1024,
 				low: 2048,
@ -200,55 +202,60 @@ function mapOptionsForApi(api: Api | string, model: Model, options?: GenerateOpt

 			return {
 				...base,
-				thinking: {
-					enabled: true,
-					budgetTokens: anthropicBudgets[options.reasoning],
-				},
-			};
+				thinkingEnabled: true,
+				thinkingBudgetTokens: anthropicBudgets[options.reasoning],
+			} satisfies AnthropicOptions;
 		}
-		case "google-generative-ai": {
-			if (!options?.reasoning) return { ...base, thinking_budget: -1 };

-			// Model-specific mapping for Google
-			const googleBudget = getGoogleBudget(model, options.reasoning);
+		case "openai-completions":
 			return {
 				...base,
-				thinking_budget: googleBudget,
-			};
+				reasoningEffort: options?.reasoning,
+			} satisfies OpenAICompletionsOptions;
+
+		case "openai-responses":
+			return {
+				...base,
+				reasoningEffort: options?.reasoning,
+			} satisfies OpenAIResponsesOptions;
+
+		case "google-generative-ai": {
+			if (!options?.reasoning) return base as any;
+
+			const googleBudget = getGoogleBudget(model as Model<"google-generative-ai">, options.reasoning);
+			return {
+				...base,
+				thinking: {
+					enabled: true,
+					budgetTokens: googleBudget,
+				},
+			} satisfies GoogleOptions;
+		}
+
+		default: {
+			// Exhaustiveness check
+			const _exhaustive: never = model.api;
+			throw new Error(`Unhandled API in mapOptionsForApi: ${_exhaustive}`);
 		}
-		default:
-			return base;
 	}
 }

-/**
- * Get Google thinking budget based on model and effort
- */
-function getGoogleBudget(model: Model, effort: ReasoningEffort): number {
-	// Model-specific logic
-	if (model.id.includes("flash-lite")) {
-		const budgets = {
-			minimal: 512,
-			low: 2048,
-			medium: 8192,
-			high: 24576,
-		};
-		return budgets[effort];
-	}
-
-	if (model.id.includes("pro")) {
+function getGoogleBudget(model: Model<"google-generative-ai">, effort: ReasoningEffort): number {
+	// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
+	if (model.id.includes("2.5-pro")) {
 		const budgets = {
 			minimal: 128,
 			low: 2048,
 			medium: 8192,
-			high: Math.min(25000, 32768),
+			high: 32768,
 		};
 		return budgets[effort];
 	}

-	if (model.id.includes("flash")) {
+	if (model.id.includes("2.5-flash")) {
+		// Covers 2.5-flash-lite as well
 		const budgets = {
-			minimal: 0, // Disable thinking
+			minimal: 128,
 			low: 2048,
 			medium: 8192,
 			high: 24576,
@ -259,10 +266,3 @@ function getGoogleBudget(model: Model, effort: ReasoningEffort): number {
 	// Unknown model - use dynamic
 	return -1;
 }
-
-// Register built-in API implementations
-// Import the new function-based implementations
-import { generateAnthropic } from "./providers/anthropic-generate.js";
-
-// Register Anthropic implementation
-apiImplementations.set("anthropic-messages", generateAnthropic);
--- a/packages/ai/src/index.ts
+++ b/packages/ai/src/index.ts
@ -1,37 +1,8 @@
-// @mariozechner/pi-ai - Unified LLM API with automatic model discovery
-// This package provides a common interface for working with multiple LLM providers
-
-export const version = "0.5.8";
-
-// Export generate API
-export {
-	generate,
-	generateComplete,
-	getApiKey,
-	QueuedGenerateStream,
-	registerApi,
-	setApiKey,
-} from "./generate.js";
-// Export generated models data
-export { PROVIDERS } from "./models.generated.js";
-// Export model utilities
-export {
-	calculateCost,
-	getModel,
-	type KnownProvider,
-	registerModel,
-} from "./models.js";
-
-// Legacy providers (to be deprecated)
-export { AnthropicLLM } from "./providers/anthropic.js";
-export { GoogleLLM } from "./providers/google.js";
-export { OpenAICompletionsLLM } from "./providers/openai-completions.js";
-export { OpenAIResponsesLLM } from "./providers/openai-responses.js";
-
-// Export types
-export type * from "./types.js";
-
-// TODO: Remove these legacy exports once consumers are updated
-export function createLLM(): never {
-	throw new Error("createLLM is deprecated. Use generate() with getModel() instead.");
-}
+export * from "./generate.js";
+export * from "./models.generated.js";
+export * from "./models.js";
+export * from "./providers/anthropic.js";
+export * from "./providers/google.js";
+export * from "./providers/openai-completions.js";
+export * from "./providers/openai-responses.js";
+export * from "./types.js";
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
--- a/packages/ai/src/models.ts
+++ b/packages/ai/src/models.ts
@ -1,44 +1,39 @@
 import { PROVIDERS } from "./models.generated.js";
-import type { KnownProvider, Model, Usage } from "./types.js";
+import type { Api, KnownProvider, Model, Usage } from "./types.js";

-// Re-export Model type
-export type { KnownProvider, Model } from "./types.js";
-
-// Dynamic model registry initialized from PROVIDERS
-const modelRegistry: Map<string, Map<string, Model>> = new Map();
+const modelRegistry: Map<string, Map<string, Model<Api>>> = new Map();

 // Initialize registry from PROVIDERS on module load
 for (const [provider, models] of Object.entries(PROVIDERS)) {
-	const providerModels = new Map<string, Model>();
+	const providerModels = new Map<string, Model<Api>>();
 	for (const [id, model] of Object.entries(models)) {
-		providerModels.set(id, model as Model);
+		providerModels.set(id, model as Model<Api>);
 	}
 	modelRegistry.set(provider, providerModels);
 }

-/**
- * Get a model from the registry - typed overload for known providers
- */
-export function getModel<P extends KnownProvider>(provider: P, modelId: keyof (typeof PROVIDERS)[P]): Model;
-export function getModel(provider: string, modelId: string): Model | undefined;
-export function getModel(provider: any, modelId: any): Model | undefined {
-	return modelRegistry.get(provider)?.get(modelId);
+type ModelApi<
+	TProvider extends KnownProvider,
+	TModelId extends keyof (typeof PROVIDERS)[TProvider],
+> = (typeof PROVIDERS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never;
+
+export function getModel<TProvider extends KnownProvider, TModelId extends keyof (typeof PROVIDERS)[TProvider]>(
+	provider: TProvider,
+	modelId: TModelId,
+): Model<ModelApi<TProvider, TModelId>>;
+export function getModel<TApi extends Api>(provider: string, modelId: string): Model<TApi> | undefined;
+export function getModel<TApi extends Api>(provider: any, modelId: any): Model<TApi> | undefined {
+	return modelRegistry.get(provider)?.get(modelId) as Model<TApi> | undefined;
 }

-/**
- * Register a custom model
- */
-export function registerModel(model: Model): void {
+export function registerModel<TApi extends Api>(model: Model<TApi>): void {
 	if (!modelRegistry.has(model.provider)) {
 		modelRegistry.set(model.provider, new Map());
 	}
 	modelRegistry.get(model.provider)!.set(model.id, model);
 }

-/**
- * Calculate cost for token usage
- */
-export function calculateCost(model: Model, usage: Usage): Usage["cost"] {
+export function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage): Usage["cost"] {
 	usage.cost.input = (model.cost.input / 1000000) * usage.input;
 	usage.cost.output = (model.cost.output / 1000000) * usage.output;
 	usage.cost.cacheRead = (model.cost.cacheRead / 1000000) * usage.cacheRead;
--- a/packages/ai/src/providers/anthropic-generate.ts
+++ b/packages/ai/src/providers/anthropic-generate.ts
@ -1,425 +0,0 @@
-import Anthropic from "@anthropic-ai/sdk";
-import type {
-	ContentBlockParam,
-	MessageCreateParamsStreaming,
-	MessageParam,
-	Tool,
-} from "@anthropic-ai/sdk/resources/messages.js";
-import { QueuedGenerateStream } from "../generate.js";
-import { calculateCost } from "../models.js";
-import type {
-	Api,
-	AssistantMessage,
-	Context,
-	GenerateFunction,
-	GenerateOptions,
-	GenerateStream,
-	Message,
-	Model,
-	StopReason,
-	TextContent,
-	ThinkingContent,
-	ToolCall,
-} from "../types.js";
-import { transformMessages } from "./utils.js";
-
-// Anthropic-specific options
-export interface AnthropicOptions extends GenerateOptions {
-	thinking?: {
-		enabled: boolean;
-		budgetTokens?: number;
-	};
-	toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
-}
-
-/**
- * Generate function for Anthropic API
- */
-export const generateAnthropic: GenerateFunction<AnthropicOptions> = (
-	model: Model,
-	context: Context,
-	options: AnthropicOptions,
-): GenerateStream => {
-	const stream = new QueuedGenerateStream();
-
-	// Start async processing
-	(async () => {
-		const output: AssistantMessage = {
-			role: "assistant",
-			content: [],
-			api: "anthropic-messages" as Api,
-			provider: model.provider,
-			model: model.id,
-			usage: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-			},
-			stopReason: "stop",
-		};
-
-		try {
-			// Create Anthropic client
-			const client = createAnthropicClient(model, options.apiKey!);
-
-			// Convert messages
-			const messages = convertMessages(context.messages, model, "anthropic-messages");
-
-			// Build params
-			const params = buildAnthropicParams(model, context, options, messages, client.isOAuthToken);
-
-			// Create Anthropic stream
-			const anthropicStream = client.client.messages.stream(
-				{
-					...params,
-					stream: true,
-				},
-				{
-					signal: options.signal,
-				},
-			);
-
-			// Emit start event
-			stream.push({
-				type: "start",
-				partial: output,
-			});
-
-			// Process Anthropic events
-			let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
-
-			for await (const event of anthropicStream) {
-				if (event.type === "content_block_start") {
-					if (event.content_block.type === "text") {
-						currentBlock = {
-							type: "text",
-							text: "",
-						};
-						output.content.push(currentBlock);
-						stream.push({ type: "text_start", partial: output });
-					} else if (event.content_block.type === "thinking") {
-						currentBlock = {
-							type: "thinking",
-							thinking: "",
-							thinkingSignature: "",
-						};
-						output.content.push(currentBlock);
-						stream.push({ type: "thinking_start", partial: output });
-					} else if (event.content_block.type === "tool_use") {
-						// We wait for the full tool use to be streamed
-						currentBlock = {
-							type: "toolCall",
-							id: event.content_block.id,
-							name: event.content_block.name,
-							arguments: event.content_block.input as Record<string, any>,
-							partialJson: "",
-						};
-					}
-				} else if (event.type === "content_block_delta") {
-					if (event.delta.type === "text_delta") {
-						if (currentBlock && currentBlock.type === "text") {
-							currentBlock.text += event.delta.text;
-							stream.push({
-								type: "text_delta",
-								delta: event.delta.text,
-								partial: output,
-							});
-						}
-					} else if (event.delta.type === "thinking_delta") {
-						if (currentBlock && currentBlock.type === "thinking") {
-							currentBlock.thinking += event.delta.thinking;
-							stream.push({
-								type: "thinking_delta",
-								delta: event.delta.thinking,
-								partial: output,
-							});
-						}
-					} else if (event.delta.type === "input_json_delta") {
-						if (currentBlock && currentBlock.type === "toolCall") {
-							currentBlock.partialJson += event.delta.partial_json;
-						}
-					} else if (event.delta.type === "signature_delta") {
-						if (currentBlock && currentBlock.type === "thinking") {
-							currentBlock.thinkingSignature = currentBlock.thinkingSignature || "";
-							currentBlock.thinkingSignature += event.delta.signature;
-						}
-					}
-				} else if (event.type === "content_block_stop") {
-					if (currentBlock) {
-						if (currentBlock.type === "text") {
-							stream.push({ type: "text_end", content: currentBlock.text, partial: output });
-						} else if (currentBlock.type === "thinking") {
-							stream.push({ type: "thinking_end", content: currentBlock.thinking, partial: output });
-						} else if (currentBlock.type === "toolCall") {
-							const finalToolCall: ToolCall = {
-								type: "toolCall",
-								id: currentBlock.id,
-								name: currentBlock.name,
-								arguments: JSON.parse(currentBlock.partialJson),
-							};
-							output.content.push(finalToolCall);
-							stream.push({ type: "toolCall", toolCall: finalToolCall, partial: output });
-						}
-						currentBlock = null;
-					}
-				} else if (event.type === "message_delta") {
-					if (event.delta.stop_reason) {
-						output.stopReason = mapStopReason(event.delta.stop_reason);
-					}
-					output.usage.input += event.usage.input_tokens || 0;
-					output.usage.output += event.usage.output_tokens || 0;
-					output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
-					output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
-					calculateCost(model, output.usage);
-				}
-			}
-
-			// Emit done event with final message
-			stream.push({ type: "done", reason: output.stopReason, message: output });
-			stream.end();
-		} catch (error) {
-			output.stopReason = "error";
-			output.error = error instanceof Error ? error.message : JSON.stringify(error);
-			stream.push({ type: "error", error: output.error, partial: output });
-			stream.end();
-		}
-	})();
-
-	return stream;
-};
-
-// Helper to create Anthropic client
-interface AnthropicClientWrapper {
-	client: Anthropic;
-	isOAuthToken: boolean;
-}
-
-function createAnthropicClient(model: Model, apiKey: string): AnthropicClientWrapper {
-	if (apiKey.includes("sk-ant-oat")) {
-		const defaultHeaders = {
-			accept: "application/json",
-			"anthropic-dangerous-direct-browser-access": "true",
-			"anthropic-beta": "oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
-		};
-
-		// Clear the env var if we're in Node.js to prevent SDK from using it
-		if (typeof process !== "undefined" && process.env) {
-			process.env.ANTHROPIC_API_KEY = undefined;
-		}
-
-		const client = new Anthropic({
-			apiKey: null,
-			authToken: apiKey,
-			baseURL: model.baseUrl,
-			defaultHeaders,
-			dangerouslyAllowBrowser: true,
-		});
-
-		return { client, isOAuthToken: true };
-	} else {
-		const defaultHeaders = {
-			accept: "application/json",
-			"anthropic-dangerous-direct-browser-access": "true",
-			"anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
-		};
-
-		const client = new Anthropic({
-			apiKey,
-			baseURL: model.baseUrl,
-			dangerouslyAllowBrowser: true,
-			defaultHeaders,
-		});
-
-		return { client, isOAuthToken: false };
-	}
-}
-
-// Build Anthropic API params
-function buildAnthropicParams(
-	model: Model,
-	context: Context,
-	options: AnthropicOptions,
-	messages: MessageParam[],
-	isOAuthToken: boolean,
-): MessageCreateParamsStreaming {
-	const params: MessageCreateParamsStreaming = {
-		model: model.id,
-		messages,
-		max_tokens: options.maxTokens || model.maxTokens,
-		stream: true,
-	};
-
-	// For OAuth tokens, we MUST include Claude Code identity
-	if (isOAuthToken) {
-		params.system = [
-			{
-				type: "text",
-				text: "You are Claude Code, Anthropic's official CLI for Claude.",
-				cache_control: {
-					type: "ephemeral",
-				},
-			},
-		];
-		if (context.systemPrompt) {
-			params.system.push({
-				type: "text",
-				text: context.systemPrompt,
-				cache_control: {
-					type: "ephemeral",
-				},
-			});
-		}
-	} else if (context.systemPrompt) {
-		params.system = context.systemPrompt;
-	}
-
-	if (options.temperature !== undefined) {
-		params.temperature = options.temperature;
-	}
-
-	if (context.tools) {
-		params.tools = convertTools(context.tools);
-	}
-
-	// Only enable thinking if the model supports it
-	if (options.thinking?.enabled && model.reasoning) {
-		params.thinking = {
-			type: "enabled",
-			budget_tokens: options.thinking.budgetTokens || 1024,
-		};
-	}
-
-	if (options.toolChoice) {
-		if (typeof options.toolChoice === "string") {
-			params.tool_choice = { type: options.toolChoice };
-		} else {
-			params.tool_choice = options.toolChoice;
-		}
-	}
-
-	return params;
-}
-
-// Convert messages to Anthropic format
-function convertMessages(messages: Message[], model: Model, api: Api): MessageParam[] {
-	const params: MessageParam[] = [];
-
-	// Transform messages for cross-provider compatibility
-	const transformedMessages = transformMessages(messages, model, api);
-
-	for (const msg of transformedMessages) {
-		if (msg.role === "user") {
-			// Handle both string and array content
-			if (typeof msg.content === "string") {
-				params.push({
-					role: "user",
-					content: msg.content,
-				});
-			} else {
-				// Convert array content to Anthropic format
-				const blocks: ContentBlockParam[] = msg.content.map((item) => {
-					if (item.type === "text") {
-						return {
-							type: "text",
-							text: item.text,
-						};
-					} else {
-						// Image content
-						return {
-							type: "image",
-							source: {
-								type: "base64",
-								media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
-								data: item.data,
-							},
-						};
-					}
-				});
-				const filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
-				params.push({
-					role: "user",
-					content: filteredBlocks,
-				});
-			}
-		} else if (msg.role === "assistant") {
-			const blocks: ContentBlockParam[] = [];
-
-			for (const block of msg.content) {
-				if (block.type === "text") {
-					blocks.push({
-						type: "text",
-						text: block.text,
-					});
-				} else if (block.type === "thinking") {
-					blocks.push({
-						type: "thinking",
-						thinking: block.thinking,
-						signature: block.thinkingSignature || "",
-					});
-				} else if (block.type === "toolCall") {
-					blocks.push({
-						type: "tool_use",
-						id: block.id,
-						name: block.name,
-						input: block.arguments,
-					});
-				}
-			}
-
-			params.push({
-				role: "assistant",
-				content: blocks,
-			});
-		} else if (msg.role === "toolResult") {
-			params.push({
-				role: "user",
-				content: [
-					{
-						type: "tool_result",
-						tool_use_id: msg.toolCallId,
-						content: msg.content,
-						is_error: msg.isError,
-					},
-				],
-			});
-		}
-	}
-	return params;
-}
-
-// Convert tools to Anthropic format
-function convertTools(tools: Context["tools"]): Tool[] {
-	if (!tools) return [];
-
-	return tools.map((tool) => ({
-		name: tool.name,
-		description: tool.description,
-		input_schema: {
-			type: "object" as const,
-			properties: tool.parameters.properties || {},
-			required: tool.parameters.required || [],
-		},
-	}));
-}
-
-// Map Anthropic stop reason to our StopReason type
-function mapStopReason(reason: Anthropic.Messages.StopReason | null): StopReason {
-	switch (reason) {
-		case "end_turn":
-			return "stop";
-		case "max_tokens":
-			return "length";
-		case "tool_use":
-			return "toolUse";
-		case "refusal":
-			return "safety";
-		case "pause_turn": // Stop is good enough -> resubmit
-			return "stop";
-		case "stop_sequence":
-			return "stop"; // We don't supply stop sequences, so this should never happen
-		default:
-			return "stop";
-	}
-}
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@ -3,91 +3,46 @@ import type {
 	ContentBlockParam,
 	MessageCreateParamsStreaming,
 	MessageParam,
-	Tool,
 } from "@anthropic-ai/sdk/resources/messages.js";
+import { QueuedGenerateStream } from "../generate.js";
 import { calculateCost } from "../models.js";
 import type {
+	Api,
 	AssistantMessage,
 	Context,
-	LLM,
-	LLMOptions,
+	GenerateFunction,
+	GenerateOptions,
+	GenerateStream,
 	Message,
 	Model,
 	StopReason,
 	TextContent,
 	ThinkingContent,
+	Tool,
 	ToolCall,
 } from "../types.js";
 import { transformMessages } from "./utils.js";

-export interface AnthropicLLMOptions extends LLMOptions {
-	thinking?: {
-		enabled: boolean;
-		budgetTokens?: number;
-	};
+export interface AnthropicOptions extends GenerateOptions {
+	thinkingEnabled?: boolean;
+	thinkingBudgetTokens?: number;
 	toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
 }

-export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
-	private client: Anthropic;
-	private modelInfo: Model;
-	private isOAuthToken: boolean = false;
+export const streamAnthropic: GenerateFunction<"anthropic-messages"> = (
+	model: Model<"anthropic-messages">,
+	context: Context,
+	options?: AnthropicOptions,
+): GenerateStream => {
+	const stream = new QueuedGenerateStream();

-	constructor(model: Model, apiKey?: string) {
-		if (!apiKey) {
-			if (!process.env.ANTHROPIC_API_KEY) {
-				throw new Error(
-					"Anthropic API key is required. Set ANTHROPIC_API_KEY environment variable or pass it as an argument.",
-				);
-			}
-			apiKey = process.env.ANTHROPIC_API_KEY;
-		}
-		if (apiKey.includes("sk-ant-oat")) {
-			const defaultHeaders = {
-				accept: "application/json",
-				"anthropic-dangerous-direct-browser-access": "true",
-				"anthropic-beta": "oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
-			};
-
-			// Clear the env var if we're in Node.js to prevent SDK from using it
-			if (typeof process !== "undefined" && process.env) {
-				process.env.ANTHROPIC_API_KEY = undefined;
-			}
-			this.client = new Anthropic({
-				apiKey: null,
-				authToken: apiKey,
-				baseURL: model.baseUrl,
-				defaultHeaders,
-				dangerouslyAllowBrowser: true,
-			});
-			this.isOAuthToken = true;
-		} else {
-			const defaultHeaders = {
-				accept: "application/json",
-				"anthropic-dangerous-direct-browser-access": "true",
-				"anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
-			};
-			this.client = new Anthropic({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true, defaultHeaders });
-			this.isOAuthToken = false;
-		}
-		this.modelInfo = model;
-	}
-
-	getModel(): Model {
-		return this.modelInfo;
-	}
-
-	getApi(): string {
-		return "anthropic-messages";
-	}
-
-	async generate(context: Context, options?: AnthropicLLMOptions): Promise<AssistantMessage> {
+	(async () => {
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
-			api: this.getApi(),
-			provider: this.modelInfo.provider,
-			model: this.modelInfo.id,
+			api: "anthropic-messages" as Api,
+			provider: model.provider,
+			model: model.id,
 			usage: {
 				input: 0,
 				output: 0,
@ -99,77 +54,14 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
 		};

 		try {
-			const messages = this.convertMessages(context.messages);
-
-			const params: MessageCreateParamsStreaming = {
-				model: this.modelInfo.id,
-				messages,
-				max_tokens: options?.maxTokens || 4096,
-				stream: true,
-			};
-
-			// For OAuth tokens, we MUST include Claude Code identity
-			if (this.isOAuthToken) {
-				params.system = [
-					{
-						type: "text",
-						text: "You are Claude Code, Anthropic's official CLI for Claude.",
-						cache_control: {
-							type: "ephemeral",
-						},
-					},
-				];
-				if (context.systemPrompt) {
-					params.system.push({
-						type: "text",
-						text: context.systemPrompt,
-						cache_control: {
-							type: "ephemeral",
-						},
-					});
-				}
-			} else if (context.systemPrompt) {
-				params.system = context.systemPrompt;
-			}
-
-			if (options?.temperature !== undefined) {
-				params.temperature = options?.temperature;
-			}
-
-			if (context.tools) {
-				params.tools = this.convertTools(context.tools);
-			}
-
-			// Only enable thinking if the model supports it
-			if (options?.thinking?.enabled && this.modelInfo.reasoning) {
-				params.thinking = {
-					type: "enabled",
-					budget_tokens: options.thinking.budgetTokens || 1024,
-				};
-			}
-
-			if (options?.toolChoice) {
-				if (typeof options.toolChoice === "string") {
-					params.tool_choice = { type: options.toolChoice };
-				} else {
-					params.tool_choice = options.toolChoice;
-				}
-			}
-
-			const stream = this.client.messages.stream(
-				{
-					...params,
-					stream: true,
-				},
-				{
-					signal: options?.signal,
-				},
-			);
-
-			options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
+			const { client, isOAuthToken } = createClient(model, options?.apiKey!);
+			const params = buildParams(model, context, isOAuthToken, options);
+			const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
+			stream.push({ type: "start", partial: output });

 			let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
-			for await (const event of stream) {
+
+			for await (const event of anthropicStream) {
 				if (event.type === "content_block_start") {
 					if (event.content_block.type === "text") {
 						currentBlock = {
@ -177,7 +69,7 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
 							text: "",
 						};
 						output.content.push(currentBlock);
-						options?.onEvent?.({ type: "text_start" });
+						stream.push({ type: "text_start", partial: output });
 					} else if (event.content_block.type === "thinking") {
 						currentBlock = {
 							type: "thinking",
@ -185,9 +77,9 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
 							thinkingSignature: "",
 						};
 						output.content.push(currentBlock);
-						options?.onEvent?.({ type: "thinking_start" });
+						stream.push({ type: "thinking_start", partial: output });
 					} else if (event.content_block.type === "tool_use") {
-						// We wait for the full tool use to be streamed to send the event
+						// We wait for the full tool use to be streamed
 						currentBlock = {
 							type: "toolCall",
 							id: event.content_block.id,
@ -200,15 +92,19 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
 					if (event.delta.type === "text_delta") {
 						if (currentBlock && currentBlock.type === "text") {
 							currentBlock.text += event.delta.text;
-							options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: event.delta.text });
+							stream.push({
+								type: "text_delta",
+								delta: event.delta.text,
+								partial: output,
+							});
 						}
 					} else if (event.delta.type === "thinking_delta") {
 						if (currentBlock && currentBlock.type === "thinking") {
 							currentBlock.thinking += event.delta.thinking;
-							options?.onEvent?.({
+							stream.push({
 								type: "thinking_delta",
-								content: currentBlock.thinking,
 								delta: event.delta.thinking,
+								partial: output,
 							});
 						}
 					} else if (event.delta.type === "input_json_delta") {
@ -224,9 +120,17 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
 				} else if (event.type === "content_block_stop") {
 					if (currentBlock) {
 						if (currentBlock.type === "text") {
-							options?.onEvent?.({ type: "text_end", content: currentBlock.text });
+							stream.push({
+								type: "text_end",
+								content: currentBlock.text,
+								partial: output,
+							});
 						} else if (currentBlock.type === "thinking") {
-							options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
+							stream.push({
+								type: "thinking_end",
+								content: currentBlock.thinking,
+								partial: output,
+							});
 						} else if (currentBlock.type === "toolCall") {
 							const finalToolCall: ToolCall = {
 								type: "toolCall",
@ -235,150 +139,274 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
 								arguments: JSON.parse(currentBlock.partialJson),
 							};
 							output.content.push(finalToolCall);
-							options?.onEvent?.({ type: "toolCall", toolCall: finalToolCall });
+							stream.push({
+								type: "toolCall",
+								toolCall: finalToolCall,
+								partial: output,
+							});
 						}
 						currentBlock = null;
 					}
 				} else if (event.type === "message_delta") {
 					if (event.delta.stop_reason) {
-						output.stopReason = this.mapStopReason(event.delta.stop_reason);
+						output.stopReason = mapStopReason(event.delta.stop_reason);
 					}
 					output.usage.input += event.usage.input_tokens || 0;
 					output.usage.output += event.usage.output_tokens || 0;
 					output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
 					output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
-					calculateCost(this.modelInfo, output.usage);
+					calculateCost(model, output.usage);
 				}
 			}

-			options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
-			return output;
+			if (options?.signal?.aborted) {
+				throw new Error("Request was aborted");
+			}
+
+			stream.push({ type: "done", reason: output.stopReason, message: output });
+			stream.end();
 		} catch (error) {
 			output.stopReason = "error";
 			output.error = error instanceof Error ? error.message : JSON.stringify(error);
-			options?.onEvent?.({ type: "error", error: output.error });
-			return output;
+			stream.push({ type: "error", error: output.error, partial: output });
+			stream.end();
+		}
+	})();
+
+	return stream;
+};
+
+function createClient(
+	model: Model<"anthropic-messages">,
+	apiKey: string,
+): { client: Anthropic; isOAuthToken: boolean } {
+	if (apiKey.includes("sk-ant-oat")) {
+		const defaultHeaders = {
+			accept: "application/json",
+			"anthropic-dangerous-direct-browser-access": "true",
+			"anthropic-beta": "oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
+		};
+
+		// Clear the env var if we're in Node.js to prevent SDK from using it
+		if (typeof process !== "undefined" && process.env) {
+			process.env.ANTHROPIC_API_KEY = undefined;
+		}
+
+		const client = new Anthropic({
+			apiKey: null,
+			authToken: apiKey,
+			baseURL: model.baseUrl,
+			defaultHeaders,
+			dangerouslyAllowBrowser: true,
+		});
+
+		return { client, isOAuthToken: true };
+	} else {
+		const defaultHeaders = {
+			accept: "application/json",
+			"anthropic-dangerous-direct-browser-access": "true",
+			"anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
+		};
+
+		const client = new Anthropic({
+			apiKey,
+			baseURL: model.baseUrl,
+			dangerouslyAllowBrowser: true,
+			defaultHeaders,
+		});
+
+		return { client, isOAuthToken: false };
+	}
+}
+
+function buildParams(
+	model: Model<"anthropic-messages">,
+	context: Context,
+	isOAuthToken: boolean,
+	options?: AnthropicOptions,
+): MessageCreateParamsStreaming {
+	const params: MessageCreateParamsStreaming = {
+		model: model.id,
+		messages: convertMessages(context.messages, model),
+		max_tokens: options?.maxTokens || model.maxTokens,
+		stream: true,
+	};
+
+	// For OAuth tokens, we MUST include Claude Code identity
+	if (isOAuthToken) {
+		params.system = [
+			{
+				type: "text",
+				text: "You are Claude Code, Anthropic's official CLI for Claude.",
+				cache_control: {
+					type: "ephemeral",
+				},
+			},
+		];
+		if (context.systemPrompt) {
+			params.system.push({
+				type: "text",
+				text: context.systemPrompt,
+				cache_control: {
+					type: "ephemeral",
+				},
+			});
+		}
+	} else if (context.systemPrompt) {
+		params.system = context.systemPrompt;
+	}
+
+	if (options?.temperature !== undefined) {
+		params.temperature = options.temperature;
+	}
+
+	if (context.tools) {
+		params.tools = convertTools(context.tools);
+	}
+
+	if (options?.thinkingEnabled && model.reasoning) {
+		params.thinking = {
+			type: "enabled",
+			budget_tokens: options.thinkingBudgetTokens || 1024,
+		};
+	}
+
+	if (options?.toolChoice) {
+		if (typeof options.toolChoice === "string") {
+			params.tool_choice = { type: options.toolChoice };
+		} else {
+			params.tool_choice = options.toolChoice;
 		}
 	}

-	private convertMessages(messages: Message[]): MessageParam[] {
-		const params: MessageParam[] = [];
+	return params;
+}

-		// Transform messages for cross-provider compatibility
-		const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
+function convertMessages(messages: Message[], model: Model<"anthropic-messages">): MessageParam[] {
+	const params: MessageParam[] = [];

-		for (const msg of transformedMessages) {
-			if (msg.role === "user") {
-				// Handle both string and array content
-				if (typeof msg.content === "string") {
+	// Transform messages for cross-provider compatibility
+	const transformedMessages = transformMessages(messages, model);
+
+	for (const msg of transformedMessages) {
+		if (msg.role === "user") {
+			if (typeof msg.content === "string") {
+				if (msg.content.trim().length > 0) {
 					params.push({
 						role: "user",
 						content: msg.content,
 					});
-				} else {
-					// Convert array content to Anthropic format
-					const blocks: ContentBlockParam[] = msg.content.map((item) => {
-						if (item.type === "text") {
-							return {
-								type: "text",
-								text: item.text,
-							};
-						} else {
-							// Image content
-							return {
-								type: "image",
-								source: {
-									type: "base64",
-									media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
-									data: item.data,
-								},
-							};
-						}
-					});
-					const filteredBlocks = !this.modelInfo?.input.includes("image")
-						? blocks.filter((b) => b.type !== "image")
-						: blocks;
-					params.push({
-						role: "user",
-						content: filteredBlocks,
-					});
 				}
-			} else if (msg.role === "assistant") {
-				const blocks: ContentBlockParam[] = [];
-
-				for (const block of msg.content) {
-					if (block.type === "text") {
-						blocks.push({
+			} else {
+				const blocks: ContentBlockParam[] = msg.content.map((item) => {
+					if (item.type === "text") {
+						return {
 							type: "text",
-							text: block.text,
-						});
-					} else if (block.type === "thinking") {
-						blocks.push({
-							type: "thinking",
-							thinking: block.thinking,
-							signature: block.thinkingSignature || "",
-						});
-					} else if (block.type === "toolCall") {
-						blocks.push({
-							type: "tool_use",
-							id: block.id,
-							name: block.name,
-							input: block.arguments,
-						});
+							text: item.text,
+						};
+					} else {
+						return {
+							type: "image",
+							source: {
+								type: "base64",
+								media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
+								data: item.data,
+							},
+						};
 					}
-				}
-
-				params.push({
-					role: "assistant",
-					content: blocks,
 				});
-			} else if (msg.role === "toolResult") {
+				let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
+				filteredBlocks = filteredBlocks.filter((b) => {
+					if (b.type === "text") {
+						return b.text.trim().length > 0;
+					}
+					return true;
+				});
+				if (filteredBlocks.length === 0) continue;
 				params.push({
 					role: "user",
-					content: [
-						{
-							type: "tool_result",
-							tool_use_id: msg.toolCallId,
-							content: msg.content,
-							is_error: msg.isError,
-						},
-					],
+					content: filteredBlocks,
 				});
 			}
+		} else if (msg.role === "assistant") {
+			const blocks: ContentBlockParam[] = [];
+
+			for (const block of msg.content) {
+				if (block.type === "text") {
+					if (block.text.trim().length === 0) continue;
+					blocks.push({
+						type: "text",
+						text: block.text,
+					});
+				} else if (block.type === "thinking") {
+					if (block.thinking.trim().length === 0) continue;
+					blocks.push({
+						type: "thinking",
+						thinking: block.thinking,
+						signature: block.thinkingSignature || "",
+					});
+				} else if (block.type === "toolCall") {
+					blocks.push({
+						type: "tool_use",
+						id: block.id,
+						name: block.name,
+						input: block.arguments,
+					});
+				}
+			}
+			if (blocks.length === 0) continue;
+			params.push({
+				role: "assistant",
+				content: blocks,
+			});
+		} else if (msg.role === "toolResult") {
+			params.push({
+				role: "user",
+				content: [
+					{
+						type: "tool_result",
+						tool_use_id: msg.toolCallId,
+						content: msg.content,
+						is_error: msg.isError,
+					},
+				],
+			});
 		}
-		return params;
 	}
+	return params;
+}

-	private convertTools(tools: Context["tools"]): Tool[] {
-		if (!tools) return [];
+function convertTools(tools: Tool[]): Anthropic.Messages.Tool[] {
+	if (!tools) return [];

-		return tools.map((tool) => ({
-			name: tool.name,
-			description: tool.description,
-			input_schema: {
-				type: "object" as const,
-				properties: tool.parameters.properties || {},
-				required: tool.parameters.required || [],
-			},
-		}));
-	}
+	return tools.map((tool) => ({
+		name: tool.name,
+		description: tool.description,
+		input_schema: {
+			type: "object" as const,
+			properties: tool.parameters.properties || {},
+			required: tool.parameters.required || [],
+		},
+	}));
+}

-	private mapStopReason(reason: Anthropic.Messages.StopReason | null): StopReason {
-		switch (reason) {
-			case "end_turn":
-				return "stop";
-			case "max_tokens":
-				return "length";
-			case "tool_use":
-				return "toolUse";
-			case "refusal":
-				return "safety";
-			case "pause_turn": // Stop is good enough -> resubmit
-				return "stop";
-			case "stop_sequence":
-				return "stop"; // We don't supply stop sequences, so this should never happen
-			default:
-				return "stop";
+function mapStopReason(reason: Anthropic.Messages.StopReason): StopReason {
+	switch (reason) {
+		case "end_turn":
+			return "stop";
+		case "max_tokens":
+			return "length";
+		case "tool_use":
+			return "toolUse";
+		case "refusal":
+			return "safety";
+		case "pause_turn": // Stop is good enough -> resubmit
+			return "stop";
+		case "stop_sequence":
+			return "stop"; // We don't supply stop sequences, so this should never happen
+		default: {
+			const _exhaustive: never = reason;
+			throw new Error(`Unhandled stop reason: ${_exhaustive}`);
 		}
 	}
 }
--- a/packages/ai/src/providers/google.ts
+++ b/packages/ai/src/providers/google.ts
@ -1,19 +1,21 @@
 import {
 	type Content,
-	type FinishReason,
+	FinishReason,
 	FunctionCallingConfigMode,
 	type GenerateContentConfig,
 	type GenerateContentParameters,
 	GoogleGenAI,
 	type Part,
 } from "@google/genai";
+import { QueuedGenerateStream } from "../generate.js";
 import { calculateCost } from "../models.js";
 import type {
+	Api,
 	AssistantMessage,
 	Context,
-	LLM,
-	LLMOptions,
-	Message,
+	GenerateFunction,
+	GenerateOptions,
+	GenerateStream,
 	Model,
 	StopReason,
 	TextContent,
@ -23,7 +25,7 @@ import type {
 } from "../types.js";
 import { transformMessages } from "./utils.js";

-export interface GoogleLLMOptions extends LLMOptions {
+export interface GoogleOptions extends GenerateOptions {
 	toolChoice?: "auto" | "none" | "any";
 	thinking?: {
 		enabled: boolean;
@ -31,38 +33,20 @@ export interface GoogleLLMOptions extends LLMOptions {
 	};
 }

-export class GoogleLLM implements LLM<GoogleLLMOptions> {
-	private client: GoogleGenAI;
-	private modelInfo: Model;
+export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
+	model: Model<"google-generative-ai">,
+	context: Context,
+	options?: GoogleOptions,
+): GenerateStream => {
+	const stream = new QueuedGenerateStream();

-	constructor(model: Model, apiKey?: string) {
-		if (!apiKey) {
-			if (!process.env.GEMINI_API_KEY) {
-				throw new Error(
-					"Gemini API key is required. Set GEMINI_API_KEY environment variable or pass it as an argument.",
-				);
-			}
-			apiKey = process.env.GEMINI_API_KEY;
-		}
-		this.client = new GoogleGenAI({ apiKey });
-		this.modelInfo = model;
-	}
-
-	getModel(): Model {
-		return this.modelInfo;
-	}
-
-	getApi(): string {
-		return "google-generative-ai";
-	}
-
-	async generate(context: Context, options?: GoogleLLMOptions): Promise<AssistantMessage> {
+	(async () => {
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
-			api: this.getApi(),
-			provider: this.modelInfo.provider,
-			model: this.modelInfo.id,
+			api: "google-generative-ai" as Api,
+			provider: model.provider,
+			model: model.id,
 			usage: {
 				input: 0,
 				output: 0,
@ -72,70 +56,20 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 			},
 			stopReason: "stop",
 		};
+
 		try {
-			const contents = this.convertMessages(context.messages);
+			const client = createClient(options?.apiKey);
+			const params = buildParams(model, context, options);
+			const googleStream = await client.models.generateContentStream(params);

-			// Build generation config
-			const generationConfig: GenerateContentConfig = {};
-			if (options?.temperature !== undefined) {
-				generationConfig.temperature = options.temperature;
-			}
-			if (options?.maxTokens !== undefined) {
-				generationConfig.maxOutputTokens = options.maxTokens;
-			}
-
-			// Build the config object
-			const config: GenerateContentConfig = {
-				...(Object.keys(generationConfig).length > 0 && generationConfig),
-				...(context.systemPrompt && { systemInstruction: context.systemPrompt }),
-				...(context.tools && { tools: this.convertTools(context.tools) }),
-			};
-
-			// Add tool config if needed
-			if (context.tools && options?.toolChoice) {
-				config.toolConfig = {
-					functionCallingConfig: {
-						mode: this.mapToolChoice(options.toolChoice),
-					},
-				};
-			}
-
-			// Add thinking config if enabled and model supports it
-			if (options?.thinking?.enabled && this.modelInfo.reasoning) {
-				config.thinkingConfig = {
-					includeThoughts: true,
-					...(options.thinking.budgetTokens !== undefined && { thinkingBudget: options.thinking.budgetTokens }),
-				};
-			}
-
-			// Abort signal
-			if (options?.signal) {
-				if (options.signal.aborted) {
-					throw new Error("Request aborted");
-				}
-				config.abortSignal = options.signal;
-			}
-
-			// Build the request parameters
-			const params: GenerateContentParameters = {
-				model: this.modelInfo.id,
-				contents,
-				config,
-			};
-
-			const stream = await this.client.models.generateContentStream(params);
-
-			options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
+			stream.push({ type: "start", partial: output });
 			let currentBlock: TextContent | ThinkingContent | null = null;
-			for await (const chunk of stream) {
-				// Extract parts from the chunk
+			for await (const chunk of googleStream) {
 				const candidate = chunk.candidates?.[0];
 				if (candidate?.content?.parts) {
 					for (const part of candidate.content.parts) {
 						if (part.text !== undefined) {
 							const isThinking = part.thought === true;
-
-							// Check if we need to switch blocks
 							if (
 								!currentBlock ||
 								(isThinking && currentBlock.type !== "thinking") ||
@ -143,50 +77,60 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 							) {
 								if (currentBlock) {
 									if (currentBlock.type === "text") {
-										options?.onEvent?.({ type: "text_end", content: currentBlock.text });
+										stream.push({
+											type: "text_end",
+											content: currentBlock.text,
+											partial: output,
+										});
 									} else {
-										options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
+										stream.push({
+											type: "thinking_end",
+											content: currentBlock.thinking,
+											partial: output,
+										});
 									}
 								}
-
-								// Start new block
 								if (isThinking) {
 									currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
-									options?.onEvent?.({ type: "thinking_start" });
+									stream.push({ type: "thinking_start", partial: output });
 								} else {
 									currentBlock = { type: "text", text: "" };
-									options?.onEvent?.({ type: "text_start" });
+									stream.push({ type: "text_start", partial: output });
 								}
 								output.content.push(currentBlock);
 							}
-
-							// Append content to current block
 							if (currentBlock.type === "thinking") {
 								currentBlock.thinking += part.text;
 								currentBlock.thinkingSignature = part.thoughtSignature;
-								options?.onEvent?.({
+								stream.push({
 									type: "thinking_delta",
-									content: currentBlock.thinking,
 									delta: part.text,
+									partial: output,
 								});
 							} else {
 								currentBlock.text += part.text;
-								options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: part.text });
+								stream.push({ type: "text_delta", delta: part.text, partial: output });
 							}
 						}

-						// Handle function calls
 						if (part.functionCall) {
 							if (currentBlock) {
 								if (currentBlock.type === "text") {
-									options?.onEvent?.({ type: "text_end", content: currentBlock.text });
+									stream.push({
+										type: "text_end",
+										content: currentBlock.text,
+										partial: output,
+									});
 								} else {
-									options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
+									stream.push({
+										type: "thinking_end",
+										content: currentBlock.thinking,
+										partial: output,
+									});
 								}
 								currentBlock = null;
 							}

-							// Add tool call
 							const toolCallId = part.functionCall.id || `${part.functionCall.name}_${Date.now()}`;
 							const toolCall: ToolCall = {
 								type: "toolCall",
@ -195,21 +139,18 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 								arguments: part.functionCall.args as Record<string, any>,
 							};
 							output.content.push(toolCall);
-							options?.onEvent?.({ type: "toolCall", toolCall });
+							stream.push({ type: "toolCall", toolCall, partial: output });
 						}
 					}
 				}

-				// Map finish reason
 				if (candidate?.finishReason) {
-					output.stopReason = this.mapStopReason(candidate.finishReason);
-					// Check if we have tool calls in blocks
+					output.stopReason = mapStopReason(candidate.finishReason);
 					if (output.content.some((b) => b.type === "toolCall")) {
 						output.stopReason = "toolUse";
 					}
 				}

-				// Capture usage metadata if available
 				if (chunk.usageMetadata) {
 					output.usage = {
 						input: chunk.usageMetadata.promptTokenCount || 0,
@ -225,166 +166,223 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 							total: 0,
 						},
 					};
-					calculateCost(this.modelInfo, output.usage);
+					calculateCost(model, output.usage);
 				}
 			}

-			// Finalize last block
 			if (currentBlock) {
 				if (currentBlock.type === "text") {
-					options?.onEvent?.({ type: "text_end", content: currentBlock.text });
+					stream.push({ type: "text_end", content: currentBlock.text, partial: output });
 				} else {
-					options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
+					stream.push({ type: "thinking_end", content: currentBlock.thinking, partial: output });
 				}
 			}

-			options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
-			return output;
+			stream.push({ type: "done", reason: output.stopReason, message: output });
+			stream.end();
 		} catch (error) {
 			output.stopReason = "error";
 			output.error = error instanceof Error ? error.message : JSON.stringify(error);
-			options?.onEvent?.({ type: "error", error: output.error });
-			return output;
+			stream.push({ type: "error", error: output.error, partial: output });
+			stream.end();
 		}
+	})();
+
+	return stream;
+};
+
+function createClient(apiKey?: string): GoogleGenAI {
+	if (!apiKey) {
+		if (!process.env.GEMINI_API_KEY) {
+			throw new Error(
+				"Gemini API key is required. Set GEMINI_API_KEY environment variable or pass it as an argument.",
+			);
+		}
+		apiKey = process.env.GEMINI_API_KEY;
+	}
+	return new GoogleGenAI({ apiKey });
+}
+
+function buildParams(
+	model: Model<"google-generative-ai">,
+	context: Context,
+	options: GoogleOptions = {},
+): GenerateContentParameters {
+	const contents = convertMessages(model, context);
+
+	const generationConfig: GenerateContentConfig = {};
+	if (options.temperature !== undefined) {
+		generationConfig.temperature = options.temperature;
+	}
+	if (options.maxTokens !== undefined) {
+		generationConfig.maxOutputTokens = options.maxTokens;
 	}

-	private convertMessages(messages: Message[]): Content[] {
-		const contents: Content[] = [];
+	const config: GenerateContentConfig = {
+		...(Object.keys(generationConfig).length > 0 && generationConfig),
+		...(context.systemPrompt && { systemInstruction: context.systemPrompt }),
+		...(context.tools && { tools: convertTools(context.tools) }),
+	};

-		// Transform messages for cross-provider compatibility
-		const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
+	if (context.tools && options.toolChoice) {
+		config.toolConfig = {
+			functionCallingConfig: {
+				mode: mapToolChoice(options.toolChoice),
+			},
+		};
+	}

-		for (const msg of transformedMessages) {
-			if (msg.role === "user") {
-				// Handle both string and array content
-				if (typeof msg.content === "string") {
-					contents.push({
-						role: "user",
-						parts: [{ text: msg.content }],
-					});
-				} else {
-					// Convert array content to Google format
-					const parts: Part[] = msg.content.map((item) => {
-						if (item.type === "text") {
-							return { text: item.text };
-						} else {
-							// Image content - Google uses inlineData
-							return {
-								inlineData: {
-									mimeType: item.mimeType,
-									data: item.data,
-								},
-							};
-						}
-					});
-					const filteredParts = !this.modelInfo?.input.includes("image")
-						? parts.filter((p) => p.text !== undefined)
-						: parts;
-					contents.push({
-						role: "user",
-						parts: filteredParts,
-					});
-				}
-			} else if (msg.role === "assistant") {
-				const parts: Part[] = [];
+	if (options.thinking?.enabled && model.reasoning) {
+		config.thinkingConfig = {
+			includeThoughts: true,
+			...(options.thinking.budgetTokens !== undefined && { thinkingBudget: options.thinking.budgetTokens }),
+		};
+	}

-				// Process content blocks
-				for (const block of msg.content) {
-					if (block.type === "text") {
-						parts.push({ text: block.text });
-					} else if (block.type === "thinking") {
-						const thinkingPart: Part = {
-							thought: true,
-							thoughtSignature: block.thinkingSignature,
-							text: block.thinking,
-						};
-						parts.push(thinkingPart);
-					} else if (block.type === "toolCall") {
-						parts.push({
-							functionCall: {
-								id: block.id,
-								name: block.name,
-								args: block.arguments,
-							},
-						});
-					}
-				}
+	if (options.signal) {
+		if (options.signal.aborted) {
+			throw new Error("Request aborted");
+		}
+		config.abortSignal = options.signal;
+	}

-				if (parts.length > 0) {
-					contents.push({
-						role: "model",
-						parts,
-					});
-				}
-			} else if (msg.role === "toolResult") {
+	const params: GenerateContentParameters = {
+		model: model.id,
+		contents,
+		config,
+	};
+
+	return params;
+}
+function convertMessages(model: Model<"google-generative-ai">, context: Context): Content[] {
+	const contents: Content[] = [];
+	const transformedMessages = transformMessages(context.messages, model);
+
+	for (const msg of transformedMessages) {
+		if (msg.role === "user") {
+			if (typeof msg.content === "string") {
 				contents.push({
 					role: "user",
-					parts: [
-						{
-							functionResponse: {
-								id: msg.toolCallId,
-								name: msg.toolName,
-								response: {
-									result: msg.content,
-									isError: msg.isError,
-								},
+					parts: [{ text: msg.content }],
+				});
+			} else {
+				const parts: Part[] = msg.content.map((item) => {
+					if (item.type === "text") {
+						return { text: item.text };
+					} else {
+						return {
+							inlineData: {
+								mimeType: item.mimeType,
+								data: item.data,
 							},
-						},
-					],
+						};
+					}
+				});
+				const filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts;
+				if (filteredParts.length === 0) continue;
+				contents.push({
+					role: "user",
+					parts: filteredParts,
 				});
 			}
-		}
+		} else if (msg.role === "assistant") {
+			const parts: Part[] = [];

-		return contents;
-	}
+			for (const block of msg.content) {
+				if (block.type === "text") {
+					parts.push({ text: block.text });
+				} else if (block.type === "thinking") {
+					const thinkingPart: Part = {
+						thought: true,
+						thoughtSignature: block.thinkingSignature,
+						text: block.thinking,
+					};
+					parts.push(thinkingPart);
+				} else if (block.type === "toolCall") {
+					parts.push({
+						functionCall: {
+							id: block.id,
+							name: block.name,
+							args: block.arguments,
+						},
+					});
+				}
+			}

-	private convertTools(tools: Tool[]): any[] {
-		return [
-			{
-				functionDeclarations: tools.map((tool) => ({
-					name: tool.name,
-					description: tool.description,
-					parameters: tool.parameters,
-				})),
-			},
-		];
-	}
-
-	private mapToolChoice(choice: string): FunctionCallingConfigMode {
-		switch (choice) {
-			case "auto":
-				return FunctionCallingConfigMode.AUTO;
-			case "none":
-				return FunctionCallingConfigMode.NONE;
-			case "any":
-				return FunctionCallingConfigMode.ANY;
-			default:
-				return FunctionCallingConfigMode.AUTO;
+			if (parts.length === 0) continue;
+			contents.push({
+				role: "model",
+				parts,
+			});
+		} else if (msg.role === "toolResult") {
+			contents.push({
+				role: "user",
+				parts: [
+					{
+						functionResponse: {
+							id: msg.toolCallId,
+							name: msg.toolName,
+							response: {
+								result: msg.content,
+								isError: msg.isError,
+							},
+						},
+					},
+				],
+			});
 		}
 	}

-	private mapStopReason(reason: FinishReason): StopReason {
-		switch (reason) {
-			case "STOP":
-				return "stop";
-			case "MAX_TOKENS":
-				return "length";
-			case "BLOCKLIST":
-			case "PROHIBITED_CONTENT":
-			case "SPII":
-			case "SAFETY":
-			case "IMAGE_SAFETY":
-				return "safety";
-			case "RECITATION":
-				return "safety";
-			case "FINISH_REASON_UNSPECIFIED":
-			case "OTHER":
-			case "LANGUAGE":
-			case "MALFORMED_FUNCTION_CALL":
-			case "UNEXPECTED_TOOL_CALL":
-				return "error";
-			default:
-				return "stop";
+	return contents;
+}
+
+function convertTools(tools: Tool[]): any[] {
+	return [
+		{
+			functionDeclarations: tools.map((tool) => ({
+				name: tool.name,
+				description: tool.description,
+				parameters: tool.parameters,
+			})),
+		},
+	];
+}
+
+function mapToolChoice(choice: string): FunctionCallingConfigMode {
+	switch (choice) {
+		case "auto":
+			return FunctionCallingConfigMode.AUTO;
+		case "none":
+			return FunctionCallingConfigMode.NONE;
+		case "any":
+			return FunctionCallingConfigMode.ANY;
+		default:
+			return FunctionCallingConfigMode.AUTO;
+	}
+}
+
+function mapStopReason(reason: FinishReason): StopReason {
+	switch (reason) {
+		case FinishReason.STOP:
+			return "stop";
+		case FinishReason.MAX_TOKENS:
+			return "length";
+		case FinishReason.BLOCKLIST:
+		case FinishReason.PROHIBITED_CONTENT:
+		case FinishReason.SPII:
+		case FinishReason.SAFETY:
+		case FinishReason.IMAGE_SAFETY:
+		case FinishReason.RECITATION:
+			return "safety";
+		case FinishReason.FINISH_REASON_UNSPECIFIED:
+		case FinishReason.OTHER:
+		case FinishReason.LANGUAGE:
+		case FinishReason.MALFORMED_FUNCTION_CALL:
+		case FinishReason.UNEXPECTED_TOOL_CALL:
+			return "error";
+		default: {
+			const _exhaustive: never = reason;
+			throw new Error(`Unhandled stop reason: ${_exhaustive}`);
 		}
 	}
 }
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@ -1,18 +1,20 @@
 import OpenAI from "openai";
 import type {
+	ChatCompletionAssistantMessageParam,
 	ChatCompletionChunk,
 	ChatCompletionContentPart,
 	ChatCompletionContentPartImage,
 	ChatCompletionContentPartText,
 	ChatCompletionMessageParam,
 } from "openai/resources/chat/completions.js";
+import { QueuedGenerateStream } from "../generate.js";
 import { calculateCost } from "../models.js";
 import type {
 	AssistantMessage,
 	Context,
-	LLM,
-	LLMOptions,
-	Message,
+	GenerateFunction,
+	GenerateOptions,
+	GenerateStream,
 	Model,
 	StopReason,
 	TextContent,
@ -22,43 +24,25 @@ import type {
 } from "../types.js";
 import { transformMessages } from "./utils.js";

-export interface OpenAICompletionsLLMOptions extends LLMOptions {
+export interface OpenAICompletionsOptions extends GenerateOptions {
 	toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
-	reasoningEffort?: "low" | "medium" | "high";
+	reasoningEffort?: "minimal" | "low" | "medium" | "high";
 }

-export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
-	private client: OpenAI;
-	private modelInfo: Model;
+export const streamOpenAICompletions: GenerateFunction<"openai-completions"> = (
+	model: Model<"openai-completions">,
+	context: Context,
+	options?: OpenAICompletionsOptions,
+): GenerateStream => {
+	const stream = new QueuedGenerateStream();

-	constructor(model: Model, apiKey?: string) {
-		if (!apiKey) {
-			if (!process.env.OPENAI_API_KEY) {
-				throw new Error(
-					"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
-				);
-			}
-			apiKey = process.env.OPENAI_API_KEY;
-		}
-		this.client = new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
-		this.modelInfo = model;
-	}
-
-	getModel(): Model {
-		return this.modelInfo;
-	}
-
-	getApi(): string {
-		return "openai-completions";
-	}
-
-	async generate(request: Context, options?: OpenAICompletionsLLMOptions): Promise<AssistantMessage> {
+	(async () => {
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
-			api: this.getApi(),
-			provider: this.modelInfo.provider,
-			model: this.modelInfo.id,
+			api: model.api,
+			provider: model.provider,
+			model: model.id,
 			usage: {
 				input: 0,
 				output: 0,
@ -70,52 +54,13 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 		};

 		try {
-			const messages = this.convertMessages(request.messages, request.systemPrompt);
-
-			const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
-				model: this.modelInfo.id,
-				messages,
-				stream: true,
-				stream_options: { include_usage: true },
-			};
-
-			// Cerebras/xAI dont like the "store" field
-			if (!this.modelInfo.baseUrl?.includes("cerebras.ai") && !this.modelInfo.baseUrl?.includes("api.x.ai")) {
-				params.store = false;
-			}
-
-			if (options?.maxTokens) {
-				params.max_completion_tokens = options?.maxTokens;
-			}
-
-			if (options?.temperature !== undefined) {
-				params.temperature = options?.temperature;
-			}
-
-			if (request.tools) {
-				params.tools = this.convertTools(request.tools);
-			}
-
-			if (options?.toolChoice) {
-				params.tool_choice = options.toolChoice;
-			}
-
-			if (
-				options?.reasoningEffort &&
-				this.modelInfo.reasoning &&
-				!this.modelInfo.id.toLowerCase().includes("grok")
-			) {
-				params.reasoning_effort = options.reasoningEffort;
-			}
-
-			const stream = await this.client.chat.completions.create(params, {
-				signal: options?.signal,
-			});
-
-			options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
+			const client = createClient(model, options?.apiKey);
+			const params = buildParams(model, context, options);
+			const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
+			stream.push({ type: "start", partial: output });

 			let currentBlock: TextContent | ThinkingContent | (ToolCall & { partialArgs?: string }) | null = null;
-			for await (const chunk of stream) {
+			for await (const chunk of openaiStream) {
 				if (chunk.usage) {
 					output.usage = {
 						input: chunk.usage.prompt_tokens || 0,
@ -132,137 +77,170 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 							total: 0,
 						},
 					};
-					calculateCost(this.modelInfo, output.usage);
+					calculateCost(model, output.usage);
 				}

 				const choice = chunk.choices[0];
 				if (!choice) continue;

-				// Capture finish reason
 				if (choice.finish_reason) {
-					output.stopReason = this.mapStopReason(choice.finish_reason);
+					output.stopReason = mapStopReason(choice.finish_reason);
 				}

 				if (choice.delta) {
-					// Handle text content
 					if (
 						choice.delta.content !== null &&
 						choice.delta.content !== undefined &&
 						choice.delta.content.length > 0
 					) {
-						// Check if we need to switch to text block
 						if (!currentBlock || currentBlock.type !== "text") {
-							// Save current block if exists
 							if (currentBlock) {
 								if (currentBlock.type === "thinking") {
-									options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
+									stream.push({
+										type: "thinking_end",
+										content: currentBlock.thinking,
+										partial: output,
+									});
 								} else if (currentBlock.type === "toolCall") {
 									currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
 									delete currentBlock.partialArgs;
-									options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
+									stream.push({
+										type: "toolCall",
+										toolCall: currentBlock as ToolCall,
+										partial: output,
+									});
 								}
 							}
-							// Start new text block
 							currentBlock = { type: "text", text: "" };
 							output.content.push(currentBlock);
-							options?.onEvent?.({ type: "text_start" });
+							stream.push({ type: "text_start", partial: output });
 						}
-						// Append to text block
+
 						if (currentBlock.type === "text") {
 							currentBlock.text += choice.delta.content;
-							options?.onEvent?.({
+							stream.push({
 								type: "text_delta",
-								content: currentBlock.text,
 								delta: choice.delta.content,
+								partial: output,
 							});
 						}
 					}

-					// Handle reasoning_content field
+					// Some endpoints return reasoning in reasoning_content (llama.cpp)
 					if (
 						(choice.delta as any).reasoning_content !== null &&
 						(choice.delta as any).reasoning_content !== undefined &&
 						(choice.delta as any).reasoning_content.length > 0
 					) {
-						// Check if we need to switch to thinking block
 						if (!currentBlock || currentBlock.type !== "thinking") {
-							// Save current block if exists
 							if (currentBlock) {
 								if (currentBlock.type === "text") {
-									options?.onEvent?.({ type: "text_end", content: currentBlock.text });
+									stream.push({
+										type: "text_end",
+										content: currentBlock.text,
+										partial: output,
+									});
 								} else if (currentBlock.type === "toolCall") {
 									currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
 									delete currentBlock.partialArgs;
-									options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
+									stream.push({
+										type: "toolCall",
+										toolCall: currentBlock as ToolCall,
+										partial: output,
+									});
 								}
 							}
-							// Start new thinking block
-							currentBlock = { type: "thinking", thinking: "", thinkingSignature: "reasoning_content" };
+							currentBlock = {
+								type: "thinking",
+								thinking: "",
+								thinkingSignature: "reasoning_content",
+							};
 							output.content.push(currentBlock);
-							options?.onEvent?.({ type: "thinking_start" });
+							stream.push({ type: "thinking_start", partial: output });
 						}
-						// Append to thinking block
+
 						if (currentBlock.type === "thinking") {
 							const delta = (choice.delta as any).reasoning_content;
 							currentBlock.thinking += delta;
-							options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta });
+							stream.push({
+								type: "thinking_delta",
+								delta,
+								partial: output,
+							});
 						}
 					}

-					// Handle reasoning field
+					// Some endpoints return reasoning in reasining (ollama, xAI, ...)
 					if (
 						(choice.delta as any).reasoning !== null &&
 						(choice.delta as any).reasoning !== undefined &&
 						(choice.delta as any).reasoning.length > 0
 					) {
-						// Check if we need to switch to thinking block
 						if (!currentBlock || currentBlock.type !== "thinking") {
-							// Save current block if exists
 							if (currentBlock) {
 								if (currentBlock.type === "text") {
-									options?.onEvent?.({ type: "text_end", content: currentBlock.text });
+									stream.push({
+										type: "text_end",
+										content: currentBlock.text,
+										partial: output,
+									});
 								} else if (currentBlock.type === "toolCall") {
 									currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
 									delete currentBlock.partialArgs;
-									options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
+									stream.push({
+										type: "toolCall",
+										toolCall: currentBlock as ToolCall,
+										partial: output,
+									});
 								}
 							}
-							// Start new thinking block
-							currentBlock = { type: "thinking", thinking: "", thinkingSignature: "reasoning" };
+							currentBlock = {
+								type: "thinking",
+								thinking: "",
+								thinkingSignature: "reasoning",
+							};
 							output.content.push(currentBlock);
-							options?.onEvent?.({ type: "thinking_start" });
+							stream.push({ type: "thinking_start", partial: output });
 						}
-						// Append to thinking block
+
 						if (currentBlock.type === "thinking") {
 							const delta = (choice.delta as any).reasoning;
 							currentBlock.thinking += delta;
-							options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta });
+							stream.push({ type: "thinking_delta", delta, partial: output });
 						}
 					}

-					// Handle tool calls
 					if (choice?.delta?.tool_calls) {
 						for (const toolCall of choice.delta.tool_calls) {
-							// Check if we need a new tool call block
 							if (
 								!currentBlock ||
 								currentBlock.type !== "toolCall" ||
 								(toolCall.id && currentBlock.id !== toolCall.id)
 							) {
-								// Save current block if exists
 								if (currentBlock) {
 									if (currentBlock.type === "text") {
-										options?.onEvent?.({ type: "text_end", content: currentBlock.text });
+										stream.push({
+											type: "text_end",
+											content: currentBlock.text,
+											partial: output,
+										});
 									} else if (currentBlock.type === "thinking") {
-										options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
+										stream.push({
+											type: "thinking_end",
+											content: currentBlock.thinking,
+											partial: output,
+										});
 									} else if (currentBlock.type === "toolCall") {
 										currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
 										delete currentBlock.partialArgs;
-										options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
+										stream.push({
+											type: "toolCall",
+											toolCall: currentBlock as ToolCall,
+											partial: output,
+										});
 									}
 								}

-								// Start new tool call block
 								currentBlock = {
 									type: "toolCall",
 									id: toolCall.id || "",
@ -273,7 +251,6 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 								output.content.push(currentBlock);
 							}

-							// Accumulate tool call data
 							if (currentBlock.type === "toolCall") {
 								if (toolCall.id) currentBlock.id = toolCall.id;
 								if (toolCall.function?.name) currentBlock.name = toolCall.function.name;
@ -286,16 +263,27 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 				}
 			}

-			// Save final block if exists
 			if (currentBlock) {
 				if (currentBlock.type === "text") {
-					options?.onEvent?.({ type: "text_end", content: currentBlock.text });
+					stream.push({
+						type: "text_end",
+						content: currentBlock.text,
+						partial: output,
+					});
 				} else if (currentBlock.type === "thinking") {
-					options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
+					stream.push({
+						type: "thinking_end",
+						content: currentBlock.thinking,
+						partial: output,
+					});
 				} else if (currentBlock.type === "toolCall") {
 					currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
 					delete currentBlock.partialArgs;
-					options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
+					stream.push({
+						type: "toolCall",
+						toolCall: currentBlock as ToolCall,
+						partial: output,
+					});
 				}
 			}

@ -303,141 +291,188 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 				throw new Error("Request was aborted");
 			}

-			options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
+			stream.push({ type: "done", reason: output.stopReason, message: output });
+			stream.end();
 			return output;
 		} catch (error) {
-			// Update output with error information
 			output.stopReason = "error";
 			output.error = error instanceof Error ? error.message : String(error);
-			options?.onEvent?.({ type: "error", error: output.error });
-			return output;
+			stream.push({ type: "error", error: output.error, partial: output });
+			stream.end();
 		}
+	})();
+
+	return stream;
+};
+
+function createClient(model: Model<"openai-completions">, apiKey?: string) {
+	if (!apiKey) {
+		if (!process.env.OPENAI_API_KEY) {
+			throw new Error(
+				"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
+			);
+		}
+		apiKey = process.env.OPENAI_API_KEY;
+	}
+	return new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
+}
+
+function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
+	const messages = convertMessages(model, context);
+
+	const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+		model: model.id,
+		messages,
+		stream: true,
+		stream_options: { include_usage: true },
+	};
+
+	// Cerebras/xAI dont like the "store" field
+	if (!model.baseUrl.includes("cerebras.ai") && !model.baseUrl.includes("api.x.ai")) {
+		params.store = false;
 	}

-	private convertMessages(messages: Message[], systemPrompt?: string): ChatCompletionMessageParam[] {
-		const params: ChatCompletionMessageParam[] = [];
+	if (options?.maxTokens) {
+		params.max_completion_tokens = options?.maxTokens;
+	}

-		// Transform messages for cross-provider compatibility
-		const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
+	if (options?.temperature !== undefined) {
+		params.temperature = options?.temperature;
+	}

-		// Add system prompt if provided
-		if (systemPrompt) {
-			// Cerebras/xAi don't like the "developer" role
-			const useDeveloperRole =
-				this.modelInfo.reasoning &&
-				!this.modelInfo.baseUrl?.includes("cerebras.ai") &&
-				!this.modelInfo.baseUrl?.includes("api.x.ai");
-			const role = useDeveloperRole ? "developer" : "system";
-			params.push({ role: role, content: systemPrompt });
-		}
+	if (context.tools) {
+		params.tools = convertTools(context.tools);
+	}

-		// Convert messages
-		for (const msg of transformedMessages) {
-			if (msg.role === "user") {
-				// Handle both string and array content
-				if (typeof msg.content === "string") {
-					params.push({
-						role: "user",
-						content: msg.content,
-					});
-				} else {
-					// Convert array content to OpenAI format
-					const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
-						if (item.type === "text") {
-							return {
-								type: "text",
-								text: item.text,
-							} satisfies ChatCompletionContentPartText;
-						} else {
-							// Image content - OpenAI uses data URLs
-							return {
-								type: "image_url",
-								image_url: {
-									url: `data:${item.mimeType};base64,${item.data}`,
-								},
-							} satisfies ChatCompletionContentPartImage;
-						}
-					});
-					const filteredContent = !this.modelInfo?.input.includes("image")
-						? content.filter((c) => c.type !== "image_url")
-						: content;
-					params.push({
-						role: "user",
-						content: filteredContent,
-					});
-				}
-			} else if (msg.role === "assistant") {
-				const assistantMsg: ChatCompletionMessageParam = {
-					role: "assistant",
-					content: null,
-				};
+	if (options?.toolChoice) {
+		params.tool_choice = options.toolChoice;
+	}

-				// Build content from blocks
-				const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
-				if (textBlocks.length > 0) {
-					assistantMsg.content = textBlocks.map((b) => b.text).join("");
-				}
+	// Grok models don't like reasoning_effort
+	if (options?.reasoningEffort && model.reasoning && !model.id.toLowerCase().includes("grok")) {
+		params.reasoning_effort = options.reasoningEffort;
+	}

-				// Handle thinking blocks for llama.cpp server + gpt-oss
-				const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
-				if (thinkingBlocks.length > 0) {
-					// Use the signature from the first thinking block if available
-					const signature = thinkingBlocks[0].thinkingSignature;
-					if (signature && signature.length > 0) {
-						(assistantMsg as any)[signature] = thinkingBlocks.map((b) => b.thinking).join("");
-					}
-				}
+	return params;
+}

-				// Handle tool calls
-				const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
-				if (toolCalls.length > 0) {
-					assistantMsg.tool_calls = toolCalls.map((tc) => ({
-						id: tc.id,
-						type: "function" as const,
-						function: {
-							name: tc.name,
-							arguments: JSON.stringify(tc.arguments),
-						},
-					}));
-				}
+function convertMessages(model: Model<"openai-completions">, context: Context): ChatCompletionMessageParam[] {
+	const params: ChatCompletionMessageParam[] = [];

-				params.push(assistantMsg);
-			} else if (msg.role === "toolResult") {
+	const transformedMessages = transformMessages(context.messages, model);
+
+	if (context.systemPrompt) {
+		// Cerebras/xAi don't like the "developer" role
+		const useDeveloperRole =
+			model.reasoning && !model.baseUrl.includes("cerebras.ai") && !model.baseUrl.includes("api.x.ai");
+		const role = useDeveloperRole ? "developer" : "system";
+		params.push({ role: role, content: context.systemPrompt });
+	}
+
+	for (const msg of transformedMessages) {
+		if (msg.role === "user") {
+			if (typeof msg.content === "string") {
 				params.push({
-					role: "tool",
+					role: "user",
 					content: msg.content,
-					tool_call_id: msg.toolCallId,
+				});
+			} else {
+				const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
+					if (item.type === "text") {
+						return {
+							type: "text",
+							text: item.text,
+						} satisfies ChatCompletionContentPartText;
+					} else {
+						return {
+							type: "image_url",
+							image_url: {
+								url: `data:${item.mimeType};base64,${item.data}`,
+							},
+						} satisfies ChatCompletionContentPartImage;
+					}
+				});
+				const filteredContent = !model.input.includes("image")
+					? content.filter((c) => c.type !== "image_url")
+					: content;
+				if (filteredContent.length === 0) continue;
+				params.push({
+					role: "user",
+					content: filteredContent,
 				});
 			}
+		} else if (msg.role === "assistant") {
+			const assistantMsg: ChatCompletionAssistantMessageParam = {
+				role: "assistant",
+				content: null,
+			};
+
+			const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
+			if (textBlocks.length > 0) {
+				assistantMsg.content = textBlocks.map((b) => b.text).join("");
+			}
+
+			// Handle thinking blocks for llama.cpp server + gpt-oss
+			const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
+			if (thinkingBlocks.length > 0) {
+				// Use the signature from the first thinking block if available
+				const signature = thinkingBlocks[0].thinkingSignature;
+				if (signature && signature.length > 0) {
+					(assistantMsg as any)[signature] = thinkingBlocks.map((b) => b.thinking).join("");
+				}
+			}
+
+			const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
+			if (toolCalls.length > 0) {
+				assistantMsg.tool_calls = toolCalls.map((tc) => ({
+					id: tc.id,
+					type: "function" as const,
+					function: {
+						name: tc.name,
+						arguments: JSON.stringify(tc.arguments),
+					},
+				}));
+			}
+
+			params.push(assistantMsg);
+		} else if (msg.role === "toolResult") {
+			params.push({
+				role: "tool",
+				content: msg.content,
+				tool_call_id: msg.toolCallId,
+			});
 		}
-
-		return params;
 	}

-	private convertTools(tools: Tool[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
-		return tools.map((tool) => ({
-			type: "function",
-			function: {
-				name: tool.name,
-				description: tool.description,
-				parameters: tool.parameters,
-			},
-		}));
-	}
+	return params;
+}

-	private mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | null): StopReason {
-		switch (reason) {
-			case "stop":
-				return "stop";
-			case "length":
-				return "length";
-			case "function_call":
-			case "tool_calls":
-				return "toolUse";
-			case "content_filter":
-				return "safety";
-			default:
-				return "stop";
+function convertTools(tools: Tool[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
+	return tools.map((tool) => ({
+		type: "function",
+		function: {
+			name: tool.name,
+			description: tool.description,
+			parameters: tool.parameters,
+		},
+	}));
+}
+
+function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): StopReason {
+	if (reason === null) return "stop";
+	switch (reason) {
+		case "stop":
+			return "stop";
+		case "length":
+			return "length";
+		case "function_call":
+		case "tool_calls":
+			return "toolUse";
+		case "content_filter":
+			return "safety";
+		default: {
+			const _exhaustive: never = reason;
+			throw new Error(`Unhandled stop reason: ${_exhaustive}`);
 		}
 	}
 }
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@ -10,58 +10,49 @@ import type {
 	ResponseOutputMessage,
 	ResponseReasoningItem,
 } from "openai/resources/responses/responses.js";
+import { QueuedGenerateStream } from "../generate.js";
 import { calculateCost } from "../models.js";
 import type {
+	Api,
 	AssistantMessage,
 	Context,
-	LLM,
-	LLMOptions,
+	GenerateFunction,
+	GenerateOptions,
+	GenerateStream,
 	Message,
 	Model,
 	StopReason,
 	TextContent,
+	ThinkingContent,
 	Tool,
 	ToolCall,
 } from "../types.js";
 import { transformMessages } from "./utils.js";

-export interface OpenAIResponsesLLMOptions extends LLMOptions {
+// OpenAI Responses-specific options
+export interface OpenAIResponsesOptions extends GenerateOptions {
 	reasoningEffort?: "minimal" | "low" | "medium" | "high";
 	reasoningSummary?: "auto" | "detailed" | "concise" | null;
 }

-export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
-	private client: OpenAI;
-	private modelInfo: Model;
+/**
+ * Generate function for OpenAI Responses API
+ */
+export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
+	model: Model<"openai-responses">,
+	context: Context,
+	options?: OpenAIResponsesOptions,
+): GenerateStream => {
+	const stream = new QueuedGenerateStream();

-	constructor(model: Model, apiKey?: string) {
-		if (!apiKey) {
-			if (!process.env.OPENAI_API_KEY) {
-				throw new Error(
-					"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
-				);
-			}
-			apiKey = process.env.OPENAI_API_KEY;
-		}
-		this.client = new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
-		this.modelInfo = model;
-	}
-
-	getModel(): Model {
-		return this.modelInfo;
-	}
-
-	getApi(): string {
-		return "openai-responses";
-	}
-
-	async generate(request: Context, options?: OpenAIResponsesLLMOptions): Promise<AssistantMessage> {
+	// Start async processing
+	(async () => {
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
-			api: this.getApi(),
-			provider: this.modelInfo.provider,
-			model: this.modelInfo.id,
+			api: "openai-responses" as Api,
+			provider: model.provider,
+			model: model.id,
 			usage: {
 				input: 0,
 				output: 0,
@ -71,77 +62,31 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 			},
 			stopReason: "stop",
 		};
+
 		try {
-			const input = this.convertToInput(request.messages, request.systemPrompt);
+			// Create OpenAI client
+			const client = createClient(model, options?.apiKey);
+			const params = buildParams(model, context, options);
+			const openaiStream = await client.responses.create(params, { signal: options?.signal });
+			stream.push({ type: "start", partial: output });

-			const params: ResponseCreateParamsStreaming = {
-				model: this.modelInfo.id,
-				input,
-				stream: true,
-			};
-
-			if (options?.maxTokens) {
-				params.max_output_tokens = options?.maxTokens;
-			}
-
-			if (options?.temperature !== undefined) {
-				params.temperature = options?.temperature;
-			}
-
-			if (request.tools) {
-				params.tools = this.convertTools(request.tools);
-			}
-
-			// Add reasoning options for models that support it
-			if (this.modelInfo?.reasoning) {
-				if (options?.reasoningEffort || options?.reasoningSummary) {
-					params.reasoning = {
-						effort: options?.reasoningEffort || "medium",
-						summary: options?.reasoningSummary || "auto",
-					};
-					params.include = ["reasoning.encrypted_content"];
-				} else {
-					params.reasoning = {
-						effort: this.modelInfo.name.startsWith("gpt-5") ? "minimal" : null,
-						summary: null,
-					};
-
-					if (this.modelInfo.name.startsWith("gpt-5")) {
-						// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
-						input.push({
-							role: "developer",
-							content: [
-								{
-									type: "input_text",
-									text: "# Juice: 0 !important",
-								},
-							],
-						});
-					}
-				}
-			}
-
-			const stream = await this.client.responses.create(params, {
-				signal: options?.signal,
-			});
-
-			options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
-
-			const outputItems: (ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall)[] = [];
 			let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
+			let currentBlock: ThinkingContent | TextContent | ToolCall | null = null;

-			for await (const event of stream) {
+			for await (const event of openaiStream) {
 				// Handle output item start
 				if (event.type === "response.output_item.added") {
 					const item = event.item;
 					if (item.type === "reasoning") {
-						options?.onEvent?.({ type: "thinking_start" });
-						outputItems.push(item);
 						currentItem = item;
+						currentBlock = { type: "thinking", thinking: "" };
+						output.content.push(currentBlock);
+						stream.push({ type: "thinking_start", partial: output });
 					} else if (item.type === "message") {
-						options?.onEvent?.({ type: "text_start" });
-						outputItems.push(item);
 						currentItem = item;
+						currentBlock = { type: "text", text: "" };
+						output.content.push(currentBlock);
+						stream.push({ type: "text_start", partial: output });
 					}
 				}
 				// Handle reasoning summary deltas
@ -151,30 +96,42 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 						currentItem.summary.push(event.part);
 					}
 				} else if (event.type === "response.reasoning_summary_text.delta") {
-					if (currentItem && currentItem.type === "reasoning") {
+					if (
+						currentItem &&
+						currentItem.type === "reasoning" &&
+						currentBlock &&
+						currentBlock.type === "thinking"
+					) {
 						currentItem.summary = currentItem.summary || [];
 						const lastPart = currentItem.summary[currentItem.summary.length - 1];
 						if (lastPart) {
+							currentBlock.thinking += event.delta;
 							lastPart.text += event.delta;
-							options?.onEvent?.({
+							stream.push({
 								type: "thinking_delta",
-								content: currentItem.summary.map((s) => s.text).join("\n\n"),
 								delta: event.delta,
+								partial: output,
 							});
 						}
 					}
 				}
 				// Add a new line between summary parts (hack...)
 				else if (event.type === "response.reasoning_summary_part.done") {
-					if (currentItem && currentItem.type === "reasoning") {
+					if (
+						currentItem &&
+						currentItem.type === "reasoning" &&
+						currentBlock &&
+						currentBlock.type === "thinking"
+					) {
 						currentItem.summary = currentItem.summary || [];
 						const lastPart = currentItem.summary[currentItem.summary.length - 1];
 						if (lastPart) {
+							currentBlock.thinking += "\n\n";
 							lastPart.text += "\n\n";
-							options?.onEvent?.({
+							stream.push({
 								type: "thinking_delta",
-								content: currentItem.summary.map((s) => s.text).join("\n\n"),
 								delta: "\n\n",
+								partial: output,
 							});
 						}
 					}
@ -186,30 +143,28 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 						currentItem.content.push(event.part);
 					}
 				} else if (event.type === "response.output_text.delta") {
-					if (currentItem && currentItem.type === "message") {
+					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
 						const lastPart = currentItem.content[currentItem.content.length - 1];
 						if (lastPart && lastPart.type === "output_text") {
+							currentBlock.text += event.delta;
 							lastPart.text += event.delta;
-							options?.onEvent?.({
+							stream.push({
 								type: "text_delta",
-								content: currentItem.content
-									.map((c) => (c.type === "output_text" ? c.text : c.refusal))
-									.join(""),
 								delta: event.delta,
+								partial: output,
 							});
 						}
 					}
 				} else if (event.type === "response.refusal.delta") {
-					if (currentItem && currentItem.type === "message") {
+					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
 						const lastPart = currentItem.content[currentItem.content.length - 1];
 						if (lastPart && lastPart.type === "refusal") {
+							currentBlock.text += event.delta;
 							lastPart.refusal += event.delta;
-							options?.onEvent?.({
+							stream.push({
 								type: "text_delta",
-								content: currentItem.content
-									.map((c) => (c.type === "output_text" ? c.text : c.refusal))
-									.join(""),
 								delta: event.delta,
+								partial: output,
 							});
 						}
 					}
@ -218,14 +173,24 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 				else if (event.type === "response.output_item.done") {
 					const item = event.item;

-					if (item.type === "reasoning") {
-						outputItems[outputItems.length - 1] = item; // Update with final item
-						const thinkingContent = item.summary?.map((s) => s.text).join("\n\n") || "";
-						options?.onEvent?.({ type: "thinking_end", content: thinkingContent });
-					} else if (item.type === "message") {
-						outputItems[outputItems.length - 1] = item; // Update with final item
-						const textContent = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
-						options?.onEvent?.({ type: "text_end", content: textContent });
+					if (item.type === "reasoning" && currentBlock && currentBlock.type === "thinking") {
+						currentBlock.thinking = item.summary?.map((s) => s.text).join("\n\n") || "";
+						currentBlock.thinkingSignature = JSON.stringify(item);
+						stream.push({
+							type: "thinking_end",
+							content: currentBlock.thinking,
+							partial: output,
+						});
+						currentBlock = null;
+					} else if (item.type === "message" && currentBlock && currentBlock.type === "text") {
+						currentBlock.text = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
+						currentBlock.textSignature = item.id;
+						stream.push({
+							type: "text_end",
+							content: currentBlock.text,
+							partial: output,
+						});
+						currentBlock = null;
 					} else if (item.type === "function_call") {
 						const toolCall: ToolCall = {
 							type: "toolCall",
@ -233,8 +198,8 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 							name: item.name,
 							arguments: JSON.parse(item.arguments),
 						};
-						options?.onEvent?.({ type: "toolCall", toolCall });
-						outputItems.push(item);
+						output.content.push(toolCall);
+						stream.push({ type: "toolCall", toolCall, partial: output });
 					}
 				}
 				// Handle completion
@ -249,10 +214,10 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 							cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 						};
 					}
-					calculateCost(this.modelInfo, output.usage);
+					calculateCost(model, output.usage);
 					// Map status to stop reason
-					output.stopReason = this.mapStopReason(response?.status);
-					if (outputItems.some((b) => b.type === "function_call") && output.stopReason === "stop") {
+					output.stopReason = mapStopReason(response?.status);
+					if (output.content.some((b) => b.type === "toolCall") && output.stopReason === "stop") {
 						output.stopReason = "toolUse";
 					}
 				}
@ -260,173 +225,215 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 				else if (event.type === "error") {
 					output.stopReason = "error";
 					output.error = `Code ${event.code}: ${event.message}` || "Unknown error";
-					options?.onEvent?.({ type: "error", error: output.error });
+					stream.push({ type: "error", error: output.error, partial: output });
+					stream.end();
 					return output;
 				} else if (event.type === "response.failed") {
 					output.stopReason = "error";
 					output.error = "Unknown error";
-					options?.onEvent?.({ type: "error", error: output.error });
+					stream.push({ type: "error", error: output.error, partial: output });
+					stream.end();
 					return output;
 				}
 			}

-			// Convert output items to blocks
-			for (const item of outputItems) {
-				if (item.type === "reasoning") {
-					output.content.push({
-						type: "thinking",
-						thinking: item.summary?.map((s: any) => s.text).join("\n\n") || "",
-						thinkingSignature: JSON.stringify(item), // Full item for resubmission
-					});
-				} else if (item.type === "message") {
-					output.content.push({
-						type: "text",
-						text: item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""),
-						textSignature: item.id, // ID for resubmission
-					});
-				} else if (item.type === "function_call") {
-					output.content.push({
-						type: "toolCall",
-						id: item.call_id + "|" + item.id,
-						name: item.name,
-						arguments: JSON.parse(item.arguments),
-					});
-				}
-			}
-
 			if (options?.signal?.aborted) {
 				throw new Error("Request was aborted");
 			}

-			options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
-			return output;
+			stream.push({ type: "done", reason: output.stopReason, message: output });
+			stream.end();
 		} catch (error) {
 			output.stopReason = "error";
 			output.error = error instanceof Error ? error.message : JSON.stringify(error);
-			options?.onEvent?.({ type: "error", error: output.error });
-			return output;
+			stream.push({ type: "error", error: output.error, partial: output });
+			stream.end();
 		}
+	})();
+
+	return stream;
+};
+
+function createClient(model: Model<"openai-responses">, apiKey?: string) {
+	if (!apiKey) {
+		if (!process.env.OPENAI_API_KEY) {
+			throw new Error(
+				"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
+			);
+		}
+		apiKey = process.env.OPENAI_API_KEY;
+	}
+	return new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
+}
+
+function buildParams(model: Model<"openai-responses">, context: Context, options?: OpenAIResponsesOptions) {
+	const messages = convertMessages(model, context);
+
+	const params: ResponseCreateParamsStreaming = {
+		model: model.id,
+		input: messages,
+		stream: true,
+	};
+
+	if (options?.maxTokens) {
+		params.max_output_tokens = options?.maxTokens;
 	}

-	private convertToInput(messages: Message[], systemPrompt?: string): ResponseInput {
-		const input: ResponseInput = [];
+	if (options?.temperature !== undefined) {
+		params.temperature = options?.temperature;
+	}

-		// Transform messages for cross-provider compatibility
-		const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
+	if (context.tools) {
+		params.tools = convertTools(context.tools);
+	}

-		// Add system prompt if provided
-		if (systemPrompt) {
-			const role = this.modelInfo?.reasoning ? "developer" : "system";
-			input.push({
-				role,
-				content: systemPrompt,
-			});
-		}
+	if (model.reasoning) {
+		if (options?.reasoningEffort || options?.reasoningSummary) {
+			params.reasoning = {
+				effort: options?.reasoningEffort || "medium",
+				summary: options?.reasoningSummary || "auto",
+			};
+			params.include = ["reasoning.encrypted_content"];
+		} else {
+			params.reasoning = {
+				effort: model.name.startsWith("gpt-5") ? "minimal" : null,
+				summary: null,
+			};

-		// Convert messages
-		for (const msg of transformedMessages) {
-			if (msg.role === "user") {
-				// Handle both string and array content
-				if (typeof msg.content === "string") {
-					input.push({
-						role: "user",
-						content: [{ type: "input_text", text: msg.content }],
-					});
-				} else {
-					// Convert array content to OpenAI Responses format
-					const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
-						if (item.type === "text") {
-							return {
-								type: "input_text",
-								text: item.text,
-							} satisfies ResponseInputText;
-						} else {
-							// Image content - OpenAI Responses uses data URLs
-							return {
-								type: "input_image",
-								detail: "auto",
-								image_url: `data:${item.mimeType};base64,${item.data}`,
-							} satisfies ResponseInputImage;
-						}
-					});
-					const filteredContent = !this.modelInfo?.input.includes("image")
-						? content.filter((c) => c.type !== "input_image")
-						: content;
-					input.push({
-						role: "user",
-						content: filteredContent,
-					});
-				}
-			} else if (msg.role === "assistant") {
-				// Process content blocks in order
-				const output: ResponseInput = [];
-
-				for (const block of msg.content) {
-					// Do not submit thinking blocks if the completion had an error (i.e. abort)
-					if (block.type === "thinking" && msg.stopReason !== "error") {
-						// Push the full reasoning item(s) from signature
-						if (block.thinkingSignature) {
-							const reasoningItem = JSON.parse(block.thinkingSignature);
-							output.push(reasoningItem);
-						}
-					} else if (block.type === "text") {
-						const textBlock = block as TextContent;
-						output.push({
-							type: "message",
-							role: "assistant",
-							content: [{ type: "output_text", text: textBlock.text, annotations: [] }],
-							status: "completed",
-							id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
-						} satisfies ResponseOutputMessage);
-						// Do not submit thinking blocks if the completion had an error (i.e. abort)
-					} else if (block.type === "toolCall" && msg.stopReason !== "error") {
-						const toolCall = block as ToolCall;
-						output.push({
-							type: "function_call",
-							id: toolCall.id.split("|")[1], // Extract original ID
-							call_id: toolCall.id.split("|")[0], // Extract call session ID
-							name: toolCall.name,
-							arguments: JSON.stringify(toolCall.arguments),
-						});
-					}
-				}
-
-				// Add all output items to input
-				input.push(...output);
-			} else if (msg.role === "toolResult") {
-				// Tool results are sent as function_call_output
-				input.push({
-					type: "function_call_output",
-					call_id: msg.toolCallId.split("|")[0], // Extract call session ID
-					output: msg.content,
+			if (model.name.startsWith("gpt-5")) {
+				// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
+				messages.push({
+					role: "developer",
+					content: [
+						{
+							type: "input_text",
+							text: "# Juice: 0 !important",
+						},
+					],
 				});
 			}
 		}
-
-		return input;
 	}

-	private convertTools(tools: Tool[]): OpenAITool[] {
-		return tools.map((tool) => ({
-			type: "function",
-			name: tool.name,
-			description: tool.description,
-			parameters: tool.parameters,
-			strict: null,
-		}));
+	return params;
+}
+
+function convertMessages(model: Model<"openai-responses">, context: Context): ResponseInput {
+	const messages: ResponseInput = [];
+
+	const transformedMessages = transformMessages(context.messages, model);
+
+	if (context.systemPrompt) {
+		const role = model.reasoning ? "developer" : "system";
+		messages.push({
+			role,
+			content: context.systemPrompt,
+		});
 	}

-	private mapStopReason(status: string | undefined): StopReason {
-		switch (status) {
-			case "completed":
-				return "stop";
-			case "incomplete":
-				return "length";
-			case "failed":
-			case "cancelled":
-				return "error";
-			default:
-				return "stop";
+	for (const msg of transformedMessages) {
+		if (msg.role === "user") {
+			if (typeof msg.content === "string") {
+				messages.push({
+					role: "user",
+					content: [{ type: "input_text", text: msg.content }],
+				});
+			} else {
+				const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
+					if (item.type === "text") {
+						return {
+							type: "input_text",
+							text: item.text,
+						} satisfies ResponseInputText;
+					} else {
+						return {
+							type: "input_image",
+							detail: "auto",
+							image_url: `data:${item.mimeType};base64,${item.data}`,
+						} satisfies ResponseInputImage;
+					}
+				});
+				const filteredContent = !model.input.includes("image")
+					? content.filter((c) => c.type !== "input_image")
+					: content;
+				if (filteredContent.length === 0) continue;
+				messages.push({
+					role: "user",
+					content: filteredContent,
+				});
+			}
+		} else if (msg.role === "assistant") {
+			const output: ResponseInput = [];
+
+			for (const block of msg.content) {
+				// Do not submit thinking blocks if the completion had an error (i.e. abort)
+				if (block.type === "thinking" && msg.stopReason !== "error") {
+					if (block.thinkingSignature) {
+						const reasoningItem = JSON.parse(block.thinkingSignature);
+						output.push(reasoningItem);
+					}
+				} else if (block.type === "text") {
+					const textBlock = block as TextContent;
+					output.push({
+						type: "message",
+						role: "assistant",
+						content: [{ type: "output_text", text: textBlock.text, annotations: [] }],
+						status: "completed",
+						id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
+					} satisfies ResponseOutputMessage);
+					// Do not submit toolcall blocks if the completion had an error (i.e. abort)
+				} else if (block.type === "toolCall" && msg.stopReason !== "error") {
+					const toolCall = block as ToolCall;
+					output.push({
+						type: "function_call",
+						id: toolCall.id.split("|")[1],
+						call_id: toolCall.id.split("|")[0],
+						name: toolCall.name,
+						arguments: JSON.stringify(toolCall.arguments),
+					});
+				}
+			}
+			if (output.length === 0) continue;
+			messages.push(...output);
+		} else if (msg.role === "toolResult") {
+			messages.push({
+				type: "function_call_output",
+				call_id: msg.toolCallId.split("|")[0],
+				output: msg.content,
+			});
+		}
+	}
+
+	return messages;
+}
+
+function convertTools(tools: Tool[]): OpenAITool[] {
+	return tools.map((tool) => ({
+		type: "function",
+		name: tool.name,
+		description: tool.description,
+		parameters: tool.parameters,
+		strict: null,
+	}));
+}
+
+function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
+	if (!status) return "stop";
+	switch (status) {
+		case "completed":
+			return "stop";
+		case "incomplete":
+			return "length";
+		case "failed":
+		case "cancelled":
+			return "error";
+		// These two are wonky ...
+		case "in_progress":
+		case "queued":
+			return "stop";
+		default: {
+			const _exhaustive: never = status;
+			throw new Error(`Unhandled stop reason: ${_exhaustive}`);
 		}
 	}
 }
--- a/packages/ai/src/providers/utils.ts
+++ b/packages/ai/src/providers/utils.ts
@ -1,18 +1,6 @@
-import type { AssistantMessage, Message, Model } from "../types.js";
+import type { Api, AssistantMessage, Message, Model } from "../types.js";

-/**
- * Transform messages for cross-provider compatibility.
- *
- * - User and toolResult messages are copied verbatim
- * - Assistant messages:
- *   - If from the same provider/model, copied as-is
- *   - If from different provider/model, thinking blocks are converted to text blocks with <thinking></thinking> tags
- *
- * @param messages The messages to transform
- * @param model The target model that will process these messages
- * @returns A copy of the messages array with transformations applied
- */
-export function transformMessages(messages: Message[], model: Model, api: string): Message[] {
+export function transformMessages<TApi extends Api>(messages: Message[], model: Model<TApi>): Message[] {
 	return messages.map((msg) => {
 		// User and toolResult messages pass through unchanged
 		if (msg.role === "user" || msg.role === "toolResult") {
@ -24,7 +12,7 @@ export function transformMessages(messages: Message[], model: Model, api: string
 			const assistantMsg = msg as AssistantMessage;

 			// If message is from the same provider and API, keep as is
-			if (assistantMsg.provider === model.provider && assistantMsg.api === api) {
+			if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
 				return msg;
 			}

@ -47,8 +35,6 @@ export function transformMessages(messages: Message[], model: Model, api: string
 				content: transformedContent,
 			};
 		}
-
-		// Should not reach here, but return as-is for safety
 		return msg;
 	});
 }
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@ -1,5 +1,27 @@
-export type KnownApi = "openai-completions" | "openai-responses" | "anthropic-messages" | "google-generative-ai";
-export type Api = KnownApi | string;
+import type { AnthropicOptions } from "./providers/anthropic";
+import type { GoogleOptions } from "./providers/google";
+import type { OpenAICompletionsOptions } from "./providers/openai-completions";
+import type { OpenAIResponsesOptions } from "./providers/openai-responses";
+
+export type Api = "openai-completions" | "openai-responses" | "anthropic-messages" | "google-generative-ai";
+
+export interface ApiOptionsMap {
+	"anthropic-messages": AnthropicOptions;
+	"openai-completions": OpenAICompletionsOptions;
+	"openai-responses": OpenAIResponsesOptions;
+	"google-generative-ai": GoogleOptions;
+}
+
+// Compile-time exhaustiveness check - this will fail if ApiOptionsMap doesn't have all KnownApi keys
+type _CheckExhaustive = ApiOptionsMap extends Record<Api, GenerateOptions>
+	? Record<Api, GenerateOptions> extends ApiOptionsMap
+		? true
+		: ["ApiOptionsMap is missing some KnownApi values", Exclude<Api, keyof ApiOptionsMap>]
+	: ["ApiOptionsMap doesn't extend Record<KnownApi, GenerateOptions>"];
+const _exhaustive: _CheckExhaustive = true;
+
+// Helper type to get options for a specific API
+export type OptionsForApi<TApi extends Api> = ApiOptionsMap[TApi];

 export type KnownProvider = "anthropic" | "google" | "openai" | "xai" | "groq" | "cerebras" | "openrouter";
 export type Provider = KnownProvider | string;
@ -21,31 +43,17 @@ export interface GenerateOptions {
 }

 // Unified options with reasoning (what public generate() accepts)
-export interface GenerateOptionsUnified extends GenerateOptions {
+export interface SimpleGenerateOptions extends GenerateOptions {
 	reasoning?: ReasoningEffort;
 }

 // Generic GenerateFunction with typed options
-export type GenerateFunction<TOptions extends GenerateOptions = GenerateOptions> = (
-	model: Model,
+export type GenerateFunction<TApi extends Api> = (
+	model: Model<TApi>,
 	context: Context,
-	options: TOptions,
+	options: OptionsForApi<TApi>,
 ) => GenerateStream;

-// Legacy LLM interface (to be removed)
-export interface LLMOptions {
-	temperature?: number;
-	maxTokens?: number;
-	onEvent?: (event: AssistantMessageEvent) => void;
-	signal?: AbortSignal;
-}
-
-export interface LLM<T extends LLMOptions> {
-	generate(request: Context, options?: T): Promise<AssistantMessage>;
-	getModel(): Model;
-	getApi(): string;
-}
-
 export interface TextContent {
 	type: "text";
 	text: string;
@ -100,7 +108,7 @@ export interface AssistantMessage {
 	model: string;
 	usage: Usage;
 	stopReason: StopReason;
-	error?: string | Error;
+	error?: string;
 }

 export interface ToolResultMessage {
@ -138,10 +146,10 @@ export type AssistantMessageEvent =
 	| { type: "error"; error: string; partial: AssistantMessage };

 // Model interface for the unified model system
-export interface Model {
+export interface Model<TApi extends Api> {
 	id: string;
 	name: string;
-	api: Api;
+	api: TApi;
 	provider: Provider;
 	baseUrl: string;
 	reasoning: boolean;