From 4cee070bdd2e392f2bbd3869a1be382c91825daf Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Wed, 3 Sep 2025 01:25:19 +0200
Subject: [PATCH] refactor(ai): Simplify API with new streaming interface and
 model management

- Replace createLLM with getModel/getModels/getProviders functions
- Rename PROVIDERS to MODELS (internal only, not exposed)
- Add streamSimple/completeSimple for unified reasoning interface
- Update README with new API examples and comprehensive documentation
- Remove model registration (models are now fixed from build time)
- Add proper TypeScript typing for provider-specific options
- Document context serialization, cross-provider handoffs, and browser usage
---
 packages/ai/README.md                  | 656 ++++++++++++++-----------
 packages/ai/scripts/generate-models.ts |   2 +-
 packages/ai/src/index.ts               |   1 -
 packages/ai/src/models.generated.ts    | 104 ++--
 packages/ai/src/models.ts              |  32 +-
 5 files changed, 438 insertions(+), 357 deletions(-)

diff --git a/packages/ai/README.md b/packages/ai/README.md
index 09a51d4f..942506c9 100644
--- a/packages/ai/README.md
+++ b/packages/ai/README.md
@@ -24,31 +24,130 @@ npm install @mariozechner/pi-ai
 ## Quick Start
 
 ```typescript
-import { createLLM } from '@mariozechner/pi-ai';
+import { getModel, stream, complete, Context, Tool } from '@mariozechner/pi-ai';
 
-const llm = createLLM('openai', 'gpt-4o-mini');
+// Fully typed with auto-complete support for both providers and models
+const model = getModel('openai', 'gpt-4o-mini');
 
-const response = await llm.generate({
-  messages: [{ role: 'user', content: 'Hello!' }]
-});
+// Define tools
+const tools: Tool[] = [{
+  name: 'get_time',
+  description: 'Get the current time',
+  parameters: {
+    type: 'object',
+    properties: {},
+    required: []
+  }
+}];
+
+// Build a conversation context (easily serializable and transferable between models)
+const context: Context = {
+  systemPrompt: 'You are a helpful assistant.',
+  messages: [{ role: 'user', content: 'What time is it?' }],
+  tools
+};
+
+// Option 1: Streaming with all event types
+const s = stream(model, context);
+
+for await (const event of s) {
+  switch (event.type) {
+    case 'start':
+      console.log(`Starting with ${event.partial.model}`);
+      break;
+    case 'text_start':
+      console.log('\n[Text started]');
+      break;
+    case 'text_delta':
+      process.stdout.write(event.delta);
+      break;
+    case 'text_end':
+      console.log('\n[Text ended]');
+      break;
+    case 'thinking_start':
+      console.log('[Model is thinking...]');
+      break;
+    case 'thinking_delta':
+      process.stdout.write(event.delta);
+      break;
+    case 'thinking_end':
+      console.log('[Thinking complete]');
+      break;
+    case 'toolCall':
+      console.log(`\nTool called: ${event.toolCall.name}`);
+      break;
+    case 'done':
+      console.log(`\nFinished: ${event.reason}`);
+      break;
+    case 'error':
+      console.error(`Error: ${event.error}`);
+      break;
+  }
+}
+
+// Get the final message after streaming, add it to the context
+const finalMessage = await s.finalMessage();
+context.messages.push(finalMessage);
+
+// Handle tool calls if any
+const toolCalls = finalMessage.content.filter(b => b.type === 'toolCall');
+for (const call of toolCalls) {
+  // Execute the tool
+  const result = call.name === 'get_time'
+    ? new Date().toISOString()
+    : 'Unknown tool';
+
+  // Add tool result to context
+  context.messages.push({
+    role: 'toolResult',
+    toolCallId: call.id,
+    toolName: call.name,
+    content: result,
+    isError: false
+  });
+}
+
+// Continue if there were tool calls
+if (toolCalls.length > 0) {
+  const continuation = await complete(model, context);
+  context.messages.push(continuation);
+  console.log('After tool execution:', continuation.content);
+}
+
+console.log(`Total tokens: ${finalMessage.usage.input} in, ${finalMessage.usage.output} out`);
+console.log(`Cost: $${finalMessage.usage.cost.total.toFixed(4)}`);
+
+// Option 2: Get complete response without streaming
+const response = await complete(model, context);
 
-// response.content is an array of content blocks
 for (const block of response.content) {
   if (block.type === 'text') {
     console.log(block.text);
+  } else if (block.type === 'toolCall') {
+    console.log(`Tool: ${block.name}(${JSON.stringify(block.arguments)})`);
   }
 }
 ```
 
 ## Image Input
 
+Models with vision capabilities can process images. You can check if a model supports images via the `input` property. If you pass images to a non-vision model, they are silently ignored.
+
 ```typescript
 import { readFileSync } from 'fs';
+import { getModel, complete } from '@mariozechner/pi-ai';
+
+const model = getModel('openai', 'gpt-4o-mini');
+
+// Check if model supports images
+if (model.input.includes('image')) {
+  console.log('Model supports vision');
+}
 
 const imageBuffer = readFileSync('image.png');
 const base64Image = imageBuffer.toString('base64');
 
-const response = await llm.generate({
+const response = await complete(model, {
   messages: [{
     role: 'user',
     content: [
@@ -57,166 +156,151 @@ const response = await llm.generate({
     ]
   }]
 });
-```
 
-## Tool Calling
-
-```typescript
-const tools = [{
-  name: 'get_weather',
-  description: 'Get current weather for a location',
-  parameters: {
-    type: 'object',
-    properties: {
-      location: { type: 'string' }
-    },
-    required: ['location']
-  }
-}];
-
-const messages = [];
-messages.push({ role: 'user', content: 'What is the weather in Paris?' });
-
-const response = await llm.generate({ messages, tools });
-messages.push(response);
-
-// Check for tool calls in the content blocks
-const toolCalls = response.content.filter(block => block.type === 'toolCall');
-
-for (const call of toolCalls) {
-  // Call your actual function
-  const result = await getWeather(call.arguments.location);
-
-  // Add tool result to context
-  messages.push({
-    role: 'toolResult',
-    content: JSON.stringify(result),
-    toolCallId: call.id,
-    toolName: call.name,
-    isError: false
-  });
-}
-
-if (toolCalls.length > 0) {
-  // Continue conversation with tool results
-  const followUp = await llm.generate({ messages, tools });
-  messages.push(followUp);
-
-  // Print text blocks from the response
-  for (const block of followUp.content) {
-    if (block.type === 'text') {
-      console.log(block.text);
-    }
+// Access the response
+for (const block of response.content) {
+  if (block.type === 'text') {
+    console.log(block.text);
   }
 }
 ```
 
-## Streaming
+## Thinking/Reasoning
+
+Many models support thinking/reasoning capabilities where they can show their internal thought process. You can check if a model supports reasoning via the `reasoning` property. If you pass reasoning options to a non-reasoning model, they are silently ignored.
+
+### Unified Interface (streamSimple/completeSimple)
 
 ```typescript
-const response = await llm.generate({
-  messages: [{ role: 'user', content: 'Write a story' }]
+import { getModel, streamSimple, completeSimple } from '@mariozechner/pi-ai';
+
+// Many models across providers support thinking/reasoning
+const model = getModel('anthropic', 'claude-sonnet-4-20250514');
+// or getModel('openai', 'gpt-5-mini');
+// or getModel('google', 'gemini-2.5-flash');
+// or getModel('xai', 'grok-code-fast-1');
+// or getModel('groq', 'openai/gpt-oss-20b');
+// or getModel('cerebras', 'gpt-oss-120b');
+// or getModel('openrouter', 'z-ai/glm-4.5v');
+
+// Check if model supports reasoning
+if (model.reasoning) {
+  console.log('Model supports reasoning/thinking');
+}
+
+// Use the simplified reasoning option
+const response = await completeSimple(model, {
+  messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13' }]
 }, {
-  onEvent: (event) => {
-    switch (event.type) {
-      case 'start':
-        console.log(`Starting ${event.provider} ${event.model}`);
-        break;
-      case 'text_start':
-        console.log('[Starting text block]');
-        break;
-      case 'text_delta':
-        process.stdout.write(event.delta);
-        break;
-      case 'text_end':
-        console.log(`\n[Text block complete: ${event.content.length} chars]`);
-        break;
-      case 'thinking_start':
-        console.error('[Starting thinking]');
-        break;
-      case 'thinking_delta':
-        process.stderr.write(event.delta);
-        break;
-      case 'thinking_end':
-        console.error(`\n[Thinking complete: ${event.content.length} chars]`);
-        break;
-      case 'toolCall':
-        console.log(`Tool called: ${event.toolCall.name}(${JSON.stringify(event.toolCall.arguments)})`);
-        break;
-      case 'done':
-        console.log(`Completed with reason: ${event.reason}`);
-        console.log(`Tokens: ${event.message.usage.input} in, ${event.message.usage.output} out`);
-        break;
-      case 'error':
-        console.error('Error:', event.error);
-        break;
-    }
+  reasoning: 'medium'  // 'minimal' | 'low' | 'medium' | 'high'
+});
+
+// Access thinking and text blocks
+for (const block of response.content) {
+  if (block.type === 'thinking') {
+    console.log('Thinking:', block.thinking);
+  } else if (block.type === 'text') {
+    console.log('Response:', block.text);
+  }
+}
+```
+
+### Provider-Specific Options (stream/complete)
+
+For fine-grained control, use the provider-specific options:
+
+```typescript
+import { getModel, complete } from '@mariozechner/pi-ai';
+
+// OpenAI Reasoning (o1, o3, gpt-5)
+const openaiModel = getModel('openai', 'gpt-5-mini');
+await complete(openaiModel, context, {
+  reasoningEffort: 'medium',
+  reasoningSummary: 'detailed'  // OpenAI Responses API only
+});
+
+// Anthropic Thinking (Claude Sonnet 4)
+const anthropicModel = getModel('anthropic', 'claude-sonnet-4-20250514');
+await complete(anthropicModel, context, {
+  thinkingEnabled: true,
+  thinkingBudgetTokens: 8192  // Optional token limit
+});
+
+// Google Gemini Thinking
+const googleModel = getModel('google', 'gemini-2.5-flash');
+await complete(googleModel, context, {
+  thinking: {
+    enabled: true,
+    budgetTokens: 8192  // -1 for dynamic, 0 to disable
   }
 });
 ```
 
-## Abort Signal
+### Streaming Thinking Content
 
-The abort signal allows you to cancel in-progress requests. When aborted, providers return partial results accumulated up to the cancellation point, including accurate token counts and cost estimates.
-
-### Basic Usage
+When streaming, thinking content is delivered through specific events:
 
 ```typescript
+const s = streamSimple(model, context, { reasoning: 'high' });
+
+for await (const event of s) {
+  switch (event.type) {
+    case 'thinking_start':
+      console.log('[Model started thinking]');
+      break;
+    case 'thinking_delta':
+      process.stdout.write(event.delta);  // Stream thinking content
+      break;
+    case 'thinking_end':
+      console.log('\n[Thinking complete]');
+      break;
+  }
+}
+```
+
+## Errors & Abort Signal
+
+When a request ends with an error (including aborts), the API returns an `AssistantMessage` with:
+- `stopReason: 'error'` - Indicates the request ended with an error
+- `error: string` - Error message describing what happened
+- `content: array` - **Partial content** accumulated before the error
+- `usage: Usage` - **Token counts and costs** (may be incomplete depending on when error occurred)
+
+### Aborting
+The abort signal allows you to cancel in-progress requests. Aborted requests return an `AssistantMessage` with `stopReason === 'error'`.
+
+```typescript
+import { getModel, stream } from '@mariozechner/pi-ai';
+
+const model = getModel('openai', 'gpt-4o-mini');
 const controller = new AbortController();
 
 // Abort after 2 seconds
 setTimeout(() => controller.abort(), 2000);
 
-const response = await llm.generate({
+const s = stream(model, {
   messages: [{ role: 'user', content: 'Write a long story' }]
 }, {
-  signal: controller.signal,
-  onEvent: (event) => {
-    if (event.type === 'text_delta') {
-      process.stdout.write(event.delta);
-    }
-  }
+  signal: controller.signal
 });
 
-// Check if the request was aborted
-if (response.stopReason === 'error' && response.error) {
-  console.log('Request was aborted:', response.error);
+for await (const event of s) {
+  if (event.type === 'text_delta') {
+    process.stdout.write(event.delta);
+  } else if (event.type === 'error') {
+    console.log('Error:', event.error);
+  }
+}
+
+// Get results (may be partial if aborted)
+const response = await s.finalMessage();
+if (response.stopReason === 'error') {
+  console.log('Error:', response.error);
   console.log('Partial content received:', response.content);
   console.log('Tokens used:', response.usage);
-} else {
-  console.log('Request completed successfully');
 }
 ```
 
-### Partial Results and Token Tracking
-
-When a request is aborted, the API returns an `AssistantMessage` with:
-- `stopReason: 'error'` - Indicates the request was aborted
-- `error: string` - Error message describing the abort
-- `content: array` - **Partial content** accumulated before the abort
-- `usage: object` - **Token counts and costs** (may be incomplete depending on when abort occurred)
-
-```typescript
-// Example: User interrupts a long-running request
-const controller = new AbortController();
-document.getElementById('stop-button').onclick = () => controller.abort();
-
-const response = await llm.generate(context, {
-  signal: controller.signal,
-  onEvent: (e) => {
-    if (e.type === 'text_delta') updateUI(e.delta);
-  }
-});
-
-// Even if aborted, you get:
-// - Partial text that was streamed
-// - Token count (may be partial/estimated)
-// - Cost calculations (may be incomplete)
-console.log(`Generated ${response.content.length} content blocks`);
-console.log(`Estimated ${response.usage.output} output tokens`);
-console.log(`Estimated cost: $${response.usage.cost.total}`);
-```
-
 ### Continuing After Abort
 
 Aborted messages can be added to the conversation context and continued in subsequent requests:
@@ -232,19 +316,99 @@ const context = {
 const controller1 = new AbortController();
 setTimeout(() => controller1.abort(), 2000);
 
-const partial = await llm.generate(context, { signal: controller1.signal });
+const partial = await complete(model, context, { signal: controller1.signal });
 
 // Add the partial response to context
 context.messages.push(partial);
 context.messages.push({ role: 'user', content: 'Please continue' });
 
 // Continue the conversation
-const continuation = await llm.generate(context);
+const continuation = await complete(model, context);
 ```
 
-When an aborted message (with `stopReason: 'error'`) is resubmitted in the context:
-- **OpenAI Responses**: Filters out thinking blocks and tool calls from aborted messages, as API call will fail if incomplete thinking and tool calls are submitted
-- **Anthropic, Google, OpenAI Completions**: Send all blocks as-is (text, thinking, tool calls)
+## APIs, Models, and Providers
+
+The library implements 4 API interfaces, each with its own streaming function and options:
+
+- **`anthropic-messages`**: Anthropic's Messages API (`streamAnthropic`, `AnthropicOptions`)
+- **`google-generative-ai`**: Google's Generative AI API (`streamGoogle`, `GoogleOptions`)
+- **`openai-completions`**: OpenAI's Chat Completions API (`streamOpenAICompletions`, `OpenAICompletionsOptions`)
+- **`openai-responses`**: OpenAI's Responses API (`streamOpenAIResponses`, `OpenAIResponsesOptions`)
+
+### Providers and Models
+
+A **provider** offers models through a specific API. For example:
+- **Anthropic** models use the `anthropic-messages` API
+- **Google** models use the `google-generative-ai` API
+- **OpenAI** models use the `openai-responses` API
+- **xAI, Cerebras, Groq, etc.** models use the `openai-completions` API (OpenAI-compatible)
+
+### Querying Providers and Models
+
+```typescript
+import { getProviders, getModels, getModel } from '@mariozechner/pi-ai';
+
+// Get all available providers
+const providers = getProviders();
+console.log(providers); // ['openai', 'anthropic', 'google', 'xai', 'groq', ...]
+
+// Get all models from a provider (fully typed)
+const anthropicModels = getModels('anthropic');
+for (const model of anthropicModels) {
+  console.log(`${model.id}: ${model.name}`);
+  console.log(`  API: ${model.api}`); // 'anthropic-messages'
+  console.log(`  Context: ${model.contextWindow} tokens`);
+  console.log(`  Vision: ${model.input.includes('image')}`);
+  console.log(`  Reasoning: ${model.reasoning}`);
+}
+
+// Get a specific model (both provider and model ID are auto-completed in IDEs)
+const model = getModel('openai', 'gpt-4o-mini');
+console.log(`Using ${model.name} via ${model.api} API`);
+```
+
+### Custom Models
+
+You can create custom models for local inference servers or custom endpoints:
+
+```typescript
+import { Model, stream } from '@mariozechner/pi-ai';
+
+// Example: Ollama using OpenAI-compatible API
+const ollamaModel: Model<'openai-completions'> = {
+  id: 'llama-3.1-8b',
+  name: 'Llama 3.1 8B (Ollama)',
+  api: 'openai-completions',
+  provider: 'ollama',
+  baseUrl: 'http://localhost:11434/v1',
+  reasoning: false,
+  input: ['text'],
+  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+  contextWindow: 128000,
+  maxTokens: 32000
+};
+
+// Use the custom model
+const response = await stream(ollamaModel, context, {
+  apiKey: 'dummy' // Ollama doesn't need a real key
+});
+```
+
+### Type Safety
+
+Models are typed by their API, ensuring type-safe options:
+
+```typescript
+// TypeScript knows this is an Anthropic model
+const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
+
+// So these options are type-checked for AnthropicOptions
+await stream(claude, context, {
+  thinkingEnabled: true,      // ✓ Valid for anthropic-messages
+  thinkingBudgetTokens: 2048, // ✓ Valid for anthropic-messages
+  // reasoningEffort: 'high'  // ✗ TypeScript error: not valid for anthropic-messages
+});
+```
 
 ## Cross-Provider Handoffs
 
@@ -255,35 +419,37 @@ The library supports seamless handoffs between different LLM providers within th
 When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility:
 
 - **User and tool result messages** are passed through unchanged
-- **Assistant messages from the same provider/model** are preserved as-is
+- **Assistant messages from the same provider/API** are preserved as-is
 - **Assistant messages from different providers** have their thinking blocks converted to text with `<thinking>` tags
 - **Tool calls and regular text** are preserved unchanged
 
 ### Example: Multi-Provider Conversation
 
 ```typescript
-import { createLLM } from '@mariozechner/pi-ai';
+import { getModel, complete, Context } from '@mariozechner/pi-ai';
 
 // Start with Claude
-const claude = createLLM('anthropic', 'claude-sonnet-4-0');
-const messages = [];
+const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
+const context: Context = {
+  messages: []
+};
 
-messages.push({ role: 'user', content: 'What is 25 * 18?' });
-const claudeResponse = await claude.generate({ messages }, {
-  thinking: { enabled: true }
+context.messages.push({ role: 'user', content: 'What is 25 * 18?' });
+const claudeResponse = await complete(claude, context, {
+  thinkingEnabled: true
 });
-messages.push(claudeResponse);
+context.messages.push(claudeResponse);
 
 // Switch to GPT-5 - it will see Claude's thinking as <thinking> tagged text
-const gpt5 = createLLM('openai', 'gpt-5-mini');
-messages.push({ role: 'user', content: 'Is that calculation correct?' });
-const gptResponse = await gpt5.generate({ messages });
-messages.push(gptResponse);
+const gpt5 = getModel('openai', 'gpt-5-mini');
+context.messages.push({ role: 'user', content: 'Is that calculation correct?' });
+const gptResponse = await complete(gpt5, context);
+context.messages.push(gptResponse);
 
 // Switch to Gemini
-const gemini = createLLM('google', 'gemini-2.5-flash');  
-messages.push({ role: 'user', content: 'What was the original question?' });
-const geminiResponse = await gemini.generate({ messages });
+const gemini = getModel('google', 'gemini-2.5-flash');
+context.messages.push({ role: 'user', content: 'What was the original question?' });
+const geminiResponse = await complete(gemini, context);
 ```
 
 ### Provider Compatibility
@@ -300,155 +466,65 @@ This enables flexible workflows where you can:
 - Use specialized models for specific tasks
 - Maintain conversation continuity across provider outages
 
-## Provider-Specific Options
+## Context Serialization
+
+The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services:
 
-### OpenAI Reasoning (o1, o3)
 ```typescript
-const llm = createLLM('openai', 'o1-mini');
+import { Context, getModel, complete } from '@mariozechner/pi-ai';
 
-await llm.generate(context, {
-  reasoningEffort: 'medium'  // 'minimal' | 'low' | 'medium' | 'high'
-});
-```
-
-### Anthropic Thinking
-```typescript
-const llm = createLLM('anthropic', 'claude-3-5-sonnet-20241022');
-
-await llm.generate(context, {
-  thinking: {
-    enabled: true,
-    budgetTokens: 2048  // Optional thinking token limit
-  }
-});
-```
-
-### Google Gemini Thinking
-```typescript
-const llm = createLLM('google', 'gemini-2.5-pro');
-
-await llm.generate(context, {
-  thinking: { enabled: true }
-});
-```
-
-## Custom Models
-
-### Local Models (Ollama, vLLM, etc.)
-```typescript
-import { OpenAICompletionsLLM } from '@mariozechner/pi-ai';
-
-const model = {
-  id: 'gpt-oss:20b',
-  provider: 'ollama',
-  baseUrl: 'http://localhost:11434/v1',
-  reasoning: false,
-  input: ['text'],
-  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
-  contextWindow: 126000,
-  maxTokens: 32000,
-  name: 'Llama 3.1 8B'
+// Create and use a context
+const context: Context = {
+  systemPrompt: 'You are a helpful assistant.',
+  messages: [
+    { role: 'user', content: 'What is TypeScript?' }
+  ]
 };
 
-const llm = new OpenAICompletionsLLM(model, 'dummy-key');
-```
-
-### Custom OpenAI-Compatible Endpoints
-```typescript
-const model = {
-  id: 'custom-model',
-  provider: 'custom',
-  baseUrl: 'https://your-api.com/v1',
-  reasoning: true,
-  input: ['text', 'image'],
-  cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 },
-  contextWindow: 32768,
-  maxTokens: 8192,
-  name: 'Custom Model'
-};
-
-const llm = new OpenAICompletionsLLM(model, 'your-api-key');
-```
-
-## Model Discovery
-
-All models in this library support tool calling. Models are automatically fetched from OpenRouter and models.dev APIs at build time.
-
-### List Available Models
-```typescript
-import { PROVIDERS } from '@mariozechner/pi-ai';
-
-// List all OpenAI models (all support tool calling)
-for (const [modelId, model] of Object.entries(PROVIDERS.openai.models)) {
-  console.log(`${modelId}: ${model.name}`);
-  console.log(`  Context: ${model.contextWindow} tokens`);
-  console.log(`  Reasoning: ${model.reasoning}`);
-  console.log(`  Vision: ${model.input.includes('image')}`);
-  console.log(`  Cost: $${model.cost.input}/$${model.cost.output} per million tokens`);
-}
-
-// Find all models with reasoning support
-const reasoningModels = [];
-for (const provider of Object.values(PROVIDERS)) {
-  for (const model of Object.values(provider.models)) {
-    if (model.reasoning) {
-      reasoningModels.push(model);
-    }
-  }
-}
-
-// Find all vision-capable models
-const visionModels = [];
-for (const provider of Object.values(PROVIDERS)) {
-  for (const model of Object.values(provider.models)) {
-    if (model.input.includes('image')) {
-      visionModels.push(model);
-    }
-  }
-}
-```
-
-### Check Model Capabilities
-```typescript
-import { getModel } from '@mariozechner/pi-ai';
-
 const model = getModel('openai', 'gpt-4o-mini');
-if (model) {
-  console.log(`Model: ${model.name}`);
-  console.log(`Provider: ${model.provider}`);
-  console.log(`Context window: ${model.contextWindow} tokens`);
-  console.log(`Max output: ${model.maxTokens} tokens`);
-  console.log(`Supports reasoning: ${model.reasoning}`);
-  console.log(`Supports images: ${model.input.includes('image')}`);
-  console.log(`Input cost: $${model.cost.input} per million tokens`);
-  console.log(`Output cost: $${model.cost.output} per million tokens`);
-  console.log(`Cache read cost: $${model.cost.cacheRead} per million tokens`);
-  console.log(`Cache write cost: $${model.cost.cacheWrite} per million tokens`);
-}
+const response = await complete(model, context);
+context.messages.push(response);
+
+// Serialize the entire context
+const serialized = JSON.stringify(context);
+console.log('Serialized context size:', serialized.length, 'bytes');
+
+// Save to database, localStorage, file, etc.
+localStorage.setItem('conversation', serialized);
+
+// Later: deserialize and continue the conversation
+const restored: Context = JSON.parse(localStorage.getItem('conversation')!);
+restored.messages.push({ role: 'user', content: 'Tell me more about its type system' });
+
+// Continue with any model
+const newModel = getModel('anthropic', 'claude-3-5-haiku-20241022');
+const continuation = await complete(newModel, restored);
 ```
 
+> **Note**: If the context contains images (encoded as base64 as shown in the Image Input section), those will also be serialized.
+
 ## Browser Usage
 
 The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers:
 
 ```typescript
-import { createLLM } from '@mariozechner/pi-ai';
+import { getModel, complete } from '@mariozechner/pi-ai';
 
 // API key must be passed explicitly in browser
-const llm = createLLM('anthropic', 'claude-3-5-haiku-20241022', {
-  apiKey: 'your-api-key'
-});
+const model = getModel('anthropic', 'claude-3-5-haiku-20241022');
 
-const response = await llm.generate({
+const response = await complete(model, {
   messages: [{ role: 'user', content: 'Hello!' }]
+}, {
+  apiKey: 'your-api-key'
 });
 ```
 
 > **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure.
 
-## Environment Variables
+### Environment Variables (Node.js only)
 
-Set these environment variables to use `createLLM` without passing API keys:
+In Node.js environments, you can set environment variables to avoid passing API keys:
 
 ```bash
 OPENAI_API_KEY=sk-...
@@ -460,13 +536,17 @@ XAI_API_KEY=xai-...
 OPENROUTER_API_KEY=sk-or-...
 ```
 
-When set, you can omit the API key parameter:
+When set, the library automatically uses these keys:
+
 ```typescript
 // Uses OPENAI_API_KEY from environment
-const llm = createLLM('openai', 'gpt-4o-mini');
+const model = getModel('openai', 'gpt-4o-mini');
+const response = await complete(model, context);
 
-// Or pass explicitly
-const llm = createLLM('openai', 'gpt-4o-mini', 'sk-...');
+// Or override with explicit key
+const response = await complete(model, context, {
+  apiKey: 'sk-different-key'
+});
 ```
 
 ## License
diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts
index c567ff1a..1ddfa16b 100644
--- a/packages/ai/scripts/generate-models.ts
+++ b/packages/ai/scripts/generate-models.ts
@@ -338,7 +338,7 @@ async function generateModels() {
 
 import type { Model } from "./types.js";
 
-export const PROVIDERS = {
+export const MODELS = {
 `;
 
 	// Generate provider sections
diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts
index bc07efae..d163aad6 100644
--- a/packages/ai/src/index.ts
+++ b/packages/ai/src/index.ts
@@ -1,5 +1,4 @@
 export * from "./generate.js";
-export * from "./models.generated.js";
 export * from "./models.js";
 export * from "./providers/anthropic.js";
 export * from "./providers/google.js";
diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts
index 974a901c..3b2263f4 100644
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@@ -3,7 +3,7 @@
 
 import type { Model } from "./types.js";
 
-export const PROVIDERS = {
+export const MODELS = {
 	anthropic: {
 		"claude-3-7-sonnet-20250219": {
 			id: "claude-3-7-sonnet-20250219",
@@ -2652,23 +2652,6 @@ export const PROVIDERS = {
 			contextWindow: 32768,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"cohere/command-r-08-2024": {
-			id: "cohere/command-r-08-2024",
-			name: "Cohere: Command R (08-2024)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.15,
-				output: 0.6,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4000,
-		} satisfies Model<"openai-completions">,
 		"cohere/command-r-plus-08-2024": {
 			id: "cohere/command-r-plus-08-2024",
 			name: "Cohere: Command R+ (08-2024)",
@@ -2686,6 +2669,23 @@ export const PROVIDERS = {
 			contextWindow: 128000,
 			maxTokens: 4000,
 		} satisfies Model<"openai-completions">,
+		"cohere/command-r-08-2024": {
+			id: "cohere/command-r-08-2024",
+			name: "Cohere: Command R (08-2024)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.15,
+				output: 0.6,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 4000,
+		} satisfies Model<"openai-completions">,
 		"microsoft/phi-3.5-mini-128k-instruct": {
 			id: "microsoft/phi-3.5-mini-128k-instruct",
 			name: "Microsoft: Phi-3.5 Mini 128K Instruct",
@@ -2720,23 +2720,6 @@ export const PROVIDERS = {
 			contextWindow: 131072,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3.1-405b-instruct": {
-			id: "meta-llama/llama-3.1-405b-instruct",
-			name: "Meta: Llama 3.1 405B Instruct",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.7999999999999999,
-				output: 0.7999999999999999,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 32768,
-			maxTokens: 16384,
-		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3.1-8b-instruct": {
 			id: "meta-llama/llama-3.1-8b-instruct",
 			name: "Meta: Llama 3.1 8B Instruct",
@@ -2754,6 +2737,23 @@ export const PROVIDERS = {
 			contextWindow: 131072,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
+		"meta-llama/llama-3.1-405b-instruct": {
+			id: "meta-llama/llama-3.1-405b-instruct",
+			name: "Meta: Llama 3.1 405B Instruct",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.7999999999999999,
+				output: 0.7999999999999999,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 32768,
+			maxTokens: 16384,
+		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3.1-70b-instruct": {
 			id: "meta-llama/llama-3.1-70b-instruct",
 			name: "Meta: Llama 3.1 70B Instruct",
@@ -2873,23 +2873,6 @@ export const PROVIDERS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3-70b-instruct": {
-			id: "meta-llama/llama-3-70b-instruct",
-			name: "Meta: Llama 3 70B Instruct",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.3,
-				output: 0.39999999999999997,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 8192,
-			maxTokens: 16384,
-		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3-8b-instruct": {
 			id: "meta-llama/llama-3-8b-instruct",
 			name: "Meta: Llama 3 8B Instruct",
@@ -2907,6 +2890,23 @@ export const PROVIDERS = {
 			contextWindow: 8192,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
+		"meta-llama/llama-3-70b-instruct": {
+			id: "meta-llama/llama-3-70b-instruct",
+			name: "Meta: Llama 3 70B Instruct",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 0.39999999999999997,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 8192,
+			maxTokens: 16384,
+		} satisfies Model<"openai-completions">,
 		"mistralai/mixtral-8x22b-instruct": {
 			id: "mistralai/mixtral-8x22b-instruct",
 			name: "Mistral: Mixtral 8x22B Instruct",
diff --git a/packages/ai/src/models.ts b/packages/ai/src/models.ts
index d701bdd6..7acc7684 100644
--- a/packages/ai/src/models.ts
+++ b/packages/ai/src/models.ts
@@ -1,10 +1,10 @@
-import { PROVIDERS } from "./models.generated.js";
+import { MODELS } from "./models.generated.js";
 import type { Api, KnownProvider, Model, Usage } from "./types.js";
 
 const modelRegistry: Map<string, Map<string, Model<Api>>> = new Map();
 
-// Initialize registry from PROVIDERS on module load
-for (const [provider, models] of Object.entries(PROVIDERS)) {
+// Initialize registry from MODELS on module load
+for (const [provider, models] of Object.entries(MODELS)) {
 	const providerModels = new Map<string, Model<Api>>();
 	for (const [id, model] of Object.entries(models)) {
 		providerModels.set(id, model as Model<Api>);
@@ -14,23 +14,25 @@ for (const [provider, models] of Object.entries(PROVIDERS)) {
 
 type ModelApi<
 	TProvider extends KnownProvider,
-	TModelId extends keyof (typeof PROVIDERS)[TProvider],
-> = (typeof PROVIDERS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never;
+	TModelId extends keyof (typeof MODELS)[TProvider],
+> = (typeof MODELS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never;
 
-export function getModel<TProvider extends KnownProvider, TModelId extends keyof (typeof PROVIDERS)[TProvider]>(
+export function getModel<TProvider extends KnownProvider, TModelId extends keyof (typeof MODELS)[TProvider]>(
 	provider: TProvider,
 	modelId: TModelId,
-): Model<ModelApi<TProvider, TModelId>>;
-export function getModel<TApi extends Api>(provider: string, modelId: string): Model<TApi> | undefined;
-export function getModel<TApi extends Api>(provider: any, modelId: any): Model<TApi> | undefined {
-	return modelRegistry.get(provider)?.get(modelId) as Model<TApi> | undefined;
+): Model<ModelApi<TProvider, TModelId>> {
+	return modelRegistry.get(provider)?.get(modelId as string) as Model<ModelApi<TProvider, TModelId>>;
 }
 
-export function registerModel<TApi extends Api>(model: Model<TApi>): void {
-	if (!modelRegistry.has(model.provider)) {
-		modelRegistry.set(model.provider, new Map());
-	}
-	modelRegistry.get(model.provider)!.set(model.id, model);
+export function getProviders(): KnownProvider[] {
+	return Array.from(modelRegistry.keys()) as KnownProvider[];
+}
+
+export function getModels<TProvider extends KnownProvider>(
+	provider: TProvider,
+): Model<ModelApi<TProvider, keyof (typeof MODELS)[TProvider]>>[] {
+	const models = modelRegistry.get(provider);
+	return models ? (Array.from(models.values()) as Model<ModelApi<TProvider, keyof (typeof MODELS)[TProvider]>>[]) : [];
 }
 
 export function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage): Usage["cost"] {