From 4cee070bdd2e392f2bbd3869a1be382c91825daf Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Wed, 3 Sep 2025 01:25:19 +0200 Subject: [PATCH] refactor(ai): Simplify API with new streaming interface and model management - Replace createLLM with getModel/getModels/getProviders functions - Rename PROVIDERS to MODELS (internal only, not exposed) - Add streamSimple/completeSimple for unified reasoning interface - Update README with new API examples and comprehensive documentation - Remove model registration (models are now fixed from build time) - Add proper TypeScript typing for provider-specific options - Document context serialization, cross-provider handoffs, and browser usage --- packages/ai/README.md | 656 ++++++++++++++----------- packages/ai/scripts/generate-models.ts | 2 +- packages/ai/src/index.ts | 1 - packages/ai/src/models.generated.ts | 104 ++-- packages/ai/src/models.ts | 32 +- 5 files changed, 438 insertions(+), 357 deletions(-) diff --git a/packages/ai/README.md b/packages/ai/README.md index 09a51d4f..942506c9 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -24,31 +24,130 @@ npm install @mariozechner/pi-ai ## Quick Start ```typescript -import { createLLM } from '@mariozechner/pi-ai'; +import { getModel, stream, complete, Context, Tool } from '@mariozechner/pi-ai'; -const llm = createLLM('openai', 'gpt-4o-mini'); +// Fully typed with auto-complete support for both providers and models +const model = getModel('openai', 'gpt-4o-mini'); -const response = await llm.generate({ - messages: [{ role: 'user', content: 'Hello!' }] -}); +// Define tools +const tools: Tool[] = [{ + name: 'get_time', + description: 'Get the current time', + parameters: { + type: 'object', + properties: {}, + required: [] + } +}]; + +// Build a conversation context (easily serializable and transferable between models) +const context: Context = { + systemPrompt: 'You are a helpful assistant.', + messages: [{ role: 'user', content: 'What time is it?' }], + tools +}; + +// Option 1: Streaming with all event types +const s = stream(model, context); + +for await (const event of s) { + switch (event.type) { + case 'start': + console.log(`Starting with ${event.partial.model}`); + break; + case 'text_start': + console.log('\n[Text started]'); + break; + case 'text_delta': + process.stdout.write(event.delta); + break; + case 'text_end': + console.log('\n[Text ended]'); + break; + case 'thinking_start': + console.log('[Model is thinking...]'); + break; + case 'thinking_delta': + process.stdout.write(event.delta); + break; + case 'thinking_end': + console.log('[Thinking complete]'); + break; + case 'toolCall': + console.log(`\nTool called: ${event.toolCall.name}`); + break; + case 'done': + console.log(`\nFinished: ${event.reason}`); + break; + case 'error': + console.error(`Error: ${event.error}`); + break; + } +} + +// Get the final message after streaming, add it to the context +const finalMessage = await s.finalMessage(); +context.messages.push(finalMessage); + +// Handle tool calls if any +const toolCalls = finalMessage.content.filter(b => b.type === 'toolCall'); +for (const call of toolCalls) { + // Execute the tool + const result = call.name === 'get_time' + ? new Date().toISOString() + : 'Unknown tool'; + + // Add tool result to context + context.messages.push({ + role: 'toolResult', + toolCallId: call.id, + toolName: call.name, + content: result, + isError: false + }); +} + +// Continue if there were tool calls +if (toolCalls.length > 0) { + const continuation = await complete(model, context); + context.messages.push(continuation); + console.log('After tool execution:', continuation.content); +} + +console.log(`Total tokens: ${finalMessage.usage.input} in, ${finalMessage.usage.output} out`); +console.log(`Cost: $${finalMessage.usage.cost.total.toFixed(4)}`); + +// Option 2: Get complete response without streaming +const response = await complete(model, context); -// response.content is an array of content blocks for (const block of response.content) { if (block.type === 'text') { console.log(block.text); + } else if (block.type === 'toolCall') { + console.log(`Tool: ${block.name}(${JSON.stringify(block.arguments)})`); } } ``` ## Image Input +Models with vision capabilities can process images. You can check if a model supports images via the `input` property. If you pass images to a non-vision model, they are silently ignored. + ```typescript import { readFileSync } from 'fs'; +import { getModel, complete } from '@mariozechner/pi-ai'; + +const model = getModel('openai', 'gpt-4o-mini'); + +// Check if model supports images +if (model.input.includes('image')) { + console.log('Model supports vision'); +} const imageBuffer = readFileSync('image.png'); const base64Image = imageBuffer.toString('base64'); -const response = await llm.generate({ +const response = await complete(model, { messages: [{ role: 'user', content: [ @@ -57,166 +156,151 @@ const response = await llm.generate({ ] }] }); -``` -## Tool Calling - -```typescript -const tools = [{ - name: 'get_weather', - description: 'Get current weather for a location', - parameters: { - type: 'object', - properties: { - location: { type: 'string' } - }, - required: ['location'] - } -}]; - -const messages = []; -messages.push({ role: 'user', content: 'What is the weather in Paris?' }); - -const response = await llm.generate({ messages, tools }); -messages.push(response); - -// Check for tool calls in the content blocks -const toolCalls = response.content.filter(block => block.type === 'toolCall'); - -for (const call of toolCalls) { - // Call your actual function - const result = await getWeather(call.arguments.location); - - // Add tool result to context - messages.push({ - role: 'toolResult', - content: JSON.stringify(result), - toolCallId: call.id, - toolName: call.name, - isError: false - }); -} - -if (toolCalls.length > 0) { - // Continue conversation with tool results - const followUp = await llm.generate({ messages, tools }); - messages.push(followUp); - - // Print text blocks from the response - for (const block of followUp.content) { - if (block.type === 'text') { - console.log(block.text); - } +// Access the response +for (const block of response.content) { + if (block.type === 'text') { + console.log(block.text); } } ``` -## Streaming +## Thinking/Reasoning + +Many models support thinking/reasoning capabilities where they can show their internal thought process. You can check if a model supports reasoning via the `reasoning` property. If you pass reasoning options to a non-reasoning model, they are silently ignored. + +### Unified Interface (streamSimple/completeSimple) ```typescript -const response = await llm.generate({ - messages: [{ role: 'user', content: 'Write a story' }] +import { getModel, streamSimple, completeSimple } from '@mariozechner/pi-ai'; + +// Many models across providers support thinking/reasoning +const model = getModel('anthropic', 'claude-sonnet-4-20250514'); +// or getModel('openai', 'gpt-5-mini'); +// or getModel('google', 'gemini-2.5-flash'); +// or getModel('xai', 'grok-code-fast-1'); +// or getModel('groq', 'openai/gpt-oss-20b'); +// or getModel('cerebras', 'gpt-oss-120b'); +// or getModel('openrouter', 'z-ai/glm-4.5v'); + +// Check if model supports reasoning +if (model.reasoning) { + console.log('Model supports reasoning/thinking'); +} + +// Use the simplified reasoning option +const response = await completeSimple(model, { + messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13' }] }, { - onEvent: (event) => { - switch (event.type) { - case 'start': - console.log(`Starting ${event.provider} ${event.model}`); - break; - case 'text_start': - console.log('[Starting text block]'); - break; - case 'text_delta': - process.stdout.write(event.delta); - break; - case 'text_end': - console.log(`\n[Text block complete: ${event.content.length} chars]`); - break; - case 'thinking_start': - console.error('[Starting thinking]'); - break; - case 'thinking_delta': - process.stderr.write(event.delta); - break; - case 'thinking_end': - console.error(`\n[Thinking complete: ${event.content.length} chars]`); - break; - case 'toolCall': - console.log(`Tool called: ${event.toolCall.name}(${JSON.stringify(event.toolCall.arguments)})`); - break; - case 'done': - console.log(`Completed with reason: ${event.reason}`); - console.log(`Tokens: ${event.message.usage.input} in, ${event.message.usage.output} out`); - break; - case 'error': - console.error('Error:', event.error); - break; - } + reasoning: 'medium' // 'minimal' | 'low' | 'medium' | 'high' +}); + +// Access thinking and text blocks +for (const block of response.content) { + if (block.type === 'thinking') { + console.log('Thinking:', block.thinking); + } else if (block.type === 'text') { + console.log('Response:', block.text); + } +} +``` + +### Provider-Specific Options (stream/complete) + +For fine-grained control, use the provider-specific options: + +```typescript +import { getModel, complete } from '@mariozechner/pi-ai'; + +// OpenAI Reasoning (o1, o3, gpt-5) +const openaiModel = getModel('openai', 'gpt-5-mini'); +await complete(openaiModel, context, { + reasoningEffort: 'medium', + reasoningSummary: 'detailed' // OpenAI Responses API only +}); + +// Anthropic Thinking (Claude Sonnet 4) +const anthropicModel = getModel('anthropic', 'claude-sonnet-4-20250514'); +await complete(anthropicModel, context, { + thinkingEnabled: true, + thinkingBudgetTokens: 8192 // Optional token limit +}); + +// Google Gemini Thinking +const googleModel = getModel('google', 'gemini-2.5-flash'); +await complete(googleModel, context, { + thinking: { + enabled: true, + budgetTokens: 8192 // -1 for dynamic, 0 to disable } }); ``` -## Abort Signal +### Streaming Thinking Content -The abort signal allows you to cancel in-progress requests. When aborted, providers return partial results accumulated up to the cancellation point, including accurate token counts and cost estimates. - -### Basic Usage +When streaming, thinking content is delivered through specific events: ```typescript +const s = streamSimple(model, context, { reasoning: 'high' }); + +for await (const event of s) { + switch (event.type) { + case 'thinking_start': + console.log('[Model started thinking]'); + break; + case 'thinking_delta': + process.stdout.write(event.delta); // Stream thinking content + break; + case 'thinking_end': + console.log('\n[Thinking complete]'); + break; + } +} +``` + +## Errors & Abort Signal + +When a request ends with an error (including aborts), the API returns an `AssistantMessage` with: +- `stopReason: 'error'` - Indicates the request ended with an error +- `error: string` - Error message describing what happened +- `content: array` - **Partial content** accumulated before the error +- `usage: Usage` - **Token counts and costs** (may be incomplete depending on when error occurred) + +### Aborting +The abort signal allows you to cancel in-progress requests. Aborted requests return an `AssistantMessage` with `stopReason === 'error'`. + +```typescript +import { getModel, stream } from '@mariozechner/pi-ai'; + +const model = getModel('openai', 'gpt-4o-mini'); const controller = new AbortController(); // Abort after 2 seconds setTimeout(() => controller.abort(), 2000); -const response = await llm.generate({ +const s = stream(model, { messages: [{ role: 'user', content: 'Write a long story' }] }, { - signal: controller.signal, - onEvent: (event) => { - if (event.type === 'text_delta') { - process.stdout.write(event.delta); - } - } + signal: controller.signal }); -// Check if the request was aborted -if (response.stopReason === 'error' && response.error) { - console.log('Request was aborted:', response.error); +for await (const event of s) { + if (event.type === 'text_delta') { + process.stdout.write(event.delta); + } else if (event.type === 'error') { + console.log('Error:', event.error); + } +} + +// Get results (may be partial if aborted) +const response = await s.finalMessage(); +if (response.stopReason === 'error') { + console.log('Error:', response.error); console.log('Partial content received:', response.content); console.log('Tokens used:', response.usage); -} else { - console.log('Request completed successfully'); } ``` -### Partial Results and Token Tracking - -When a request is aborted, the API returns an `AssistantMessage` with: -- `stopReason: 'error'` - Indicates the request was aborted -- `error: string` - Error message describing the abort -- `content: array` - **Partial content** accumulated before the abort -- `usage: object` - **Token counts and costs** (may be incomplete depending on when abort occurred) - -```typescript -// Example: User interrupts a long-running request -const controller = new AbortController(); -document.getElementById('stop-button').onclick = () => controller.abort(); - -const response = await llm.generate(context, { - signal: controller.signal, - onEvent: (e) => { - if (e.type === 'text_delta') updateUI(e.delta); - } -}); - -// Even if aborted, you get: -// - Partial text that was streamed -// - Token count (may be partial/estimated) -// - Cost calculations (may be incomplete) -console.log(`Generated ${response.content.length} content blocks`); -console.log(`Estimated ${response.usage.output} output tokens`); -console.log(`Estimated cost: $${response.usage.cost.total}`); -``` - ### Continuing After Abort Aborted messages can be added to the conversation context and continued in subsequent requests: @@ -232,19 +316,99 @@ const context = { const controller1 = new AbortController(); setTimeout(() => controller1.abort(), 2000); -const partial = await llm.generate(context, { signal: controller1.signal }); +const partial = await complete(model, context, { signal: controller1.signal }); // Add the partial response to context context.messages.push(partial); context.messages.push({ role: 'user', content: 'Please continue' }); // Continue the conversation -const continuation = await llm.generate(context); +const continuation = await complete(model, context); ``` -When an aborted message (with `stopReason: 'error'`) is resubmitted in the context: -- **OpenAI Responses**: Filters out thinking blocks and tool calls from aborted messages, as API call will fail if incomplete thinking and tool calls are submitted -- **Anthropic, Google, OpenAI Completions**: Send all blocks as-is (text, thinking, tool calls) +## APIs, Models, and Providers + +The library implements 4 API interfaces, each with its own streaming function and options: + +- **`anthropic-messages`**: Anthropic's Messages API (`streamAnthropic`, `AnthropicOptions`) +- **`google-generative-ai`**: Google's Generative AI API (`streamGoogle`, `GoogleOptions`) +- **`openai-completions`**: OpenAI's Chat Completions API (`streamOpenAICompletions`, `OpenAICompletionsOptions`) +- **`openai-responses`**: OpenAI's Responses API (`streamOpenAIResponses`, `OpenAIResponsesOptions`) + +### Providers and Models + +A **provider** offers models through a specific API. For example: +- **Anthropic** models use the `anthropic-messages` API +- **Google** models use the `google-generative-ai` API +- **OpenAI** models use the `openai-responses` API +- **xAI, Cerebras, Groq, etc.** models use the `openai-completions` API (OpenAI-compatible) + +### Querying Providers and Models + +```typescript +import { getProviders, getModels, getModel } from '@mariozechner/pi-ai'; + +// Get all available providers +const providers = getProviders(); +console.log(providers); // ['openai', 'anthropic', 'google', 'xai', 'groq', ...] + +// Get all models from a provider (fully typed) +const anthropicModels = getModels('anthropic'); +for (const model of anthropicModels) { + console.log(`${model.id}: ${model.name}`); + console.log(` API: ${model.api}`); // 'anthropic-messages' + console.log(` Context: ${model.contextWindow} tokens`); + console.log(` Vision: ${model.input.includes('image')}`); + console.log(` Reasoning: ${model.reasoning}`); +} + +// Get a specific model (both provider and model ID are auto-completed in IDEs) +const model = getModel('openai', 'gpt-4o-mini'); +console.log(`Using ${model.name} via ${model.api} API`); +``` + +### Custom Models + +You can create custom models for local inference servers or custom endpoints: + +```typescript +import { Model, stream } from '@mariozechner/pi-ai'; + +// Example: Ollama using OpenAI-compatible API +const ollamaModel: Model<'openai-completions'> = { + id: 'llama-3.1-8b', + name: 'Llama 3.1 8B (Ollama)', + api: 'openai-completions', + provider: 'ollama', + baseUrl: 'http://localhost:11434/v1', + reasoning: false, + input: ['text'], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 32000 +}; + +// Use the custom model +const response = await stream(ollamaModel, context, { + apiKey: 'dummy' // Ollama doesn't need a real key +}); +``` + +### Type Safety + +Models are typed by their API, ensuring type-safe options: + +```typescript +// TypeScript knows this is an Anthropic model +const claude = getModel('anthropic', 'claude-sonnet-4-20250514'); + +// So these options are type-checked for AnthropicOptions +await stream(claude, context, { + thinkingEnabled: true, // ✓ Valid for anthropic-messages + thinkingBudgetTokens: 2048, // ✓ Valid for anthropic-messages + // reasoningEffort: 'high' // ✗ TypeScript error: not valid for anthropic-messages +}); +``` ## Cross-Provider Handoffs @@ -255,35 +419,37 @@ The library supports seamless handoffs between different LLM providers within th When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility: - **User and tool result messages** are passed through unchanged -- **Assistant messages from the same provider/model** are preserved as-is +- **Assistant messages from the same provider/API** are preserved as-is - **Assistant messages from different providers** have their thinking blocks converted to text with `` tags - **Tool calls and regular text** are preserved unchanged ### Example: Multi-Provider Conversation ```typescript -import { createLLM } from '@mariozechner/pi-ai'; +import { getModel, complete, Context } from '@mariozechner/pi-ai'; // Start with Claude -const claude = createLLM('anthropic', 'claude-sonnet-4-0'); -const messages = []; +const claude = getModel('anthropic', 'claude-sonnet-4-20250514'); +const context: Context = { + messages: [] +}; -messages.push({ role: 'user', content: 'What is 25 * 18?' }); -const claudeResponse = await claude.generate({ messages }, { - thinking: { enabled: true } +context.messages.push({ role: 'user', content: 'What is 25 * 18?' }); +const claudeResponse = await complete(claude, context, { + thinkingEnabled: true }); -messages.push(claudeResponse); +context.messages.push(claudeResponse); // Switch to GPT-5 - it will see Claude's thinking as tagged text -const gpt5 = createLLM('openai', 'gpt-5-mini'); -messages.push({ role: 'user', content: 'Is that calculation correct?' }); -const gptResponse = await gpt5.generate({ messages }); -messages.push(gptResponse); +const gpt5 = getModel('openai', 'gpt-5-mini'); +context.messages.push({ role: 'user', content: 'Is that calculation correct?' }); +const gptResponse = await complete(gpt5, context); +context.messages.push(gptResponse); // Switch to Gemini -const gemini = createLLM('google', 'gemini-2.5-flash'); -messages.push({ role: 'user', content: 'What was the original question?' }); -const geminiResponse = await gemini.generate({ messages }); +const gemini = getModel('google', 'gemini-2.5-flash'); +context.messages.push({ role: 'user', content: 'What was the original question?' }); +const geminiResponse = await complete(gemini, context); ``` ### Provider Compatibility @@ -300,155 +466,65 @@ This enables flexible workflows where you can: - Use specialized models for specific tasks - Maintain conversation continuity across provider outages -## Provider-Specific Options +## Context Serialization + +The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services: -### OpenAI Reasoning (o1, o3) ```typescript -const llm = createLLM('openai', 'o1-mini'); +import { Context, getModel, complete } from '@mariozechner/pi-ai'; -await llm.generate(context, { - reasoningEffort: 'medium' // 'minimal' | 'low' | 'medium' | 'high' -}); -``` - -### Anthropic Thinking -```typescript -const llm = createLLM('anthropic', 'claude-3-5-sonnet-20241022'); - -await llm.generate(context, { - thinking: { - enabled: true, - budgetTokens: 2048 // Optional thinking token limit - } -}); -``` - -### Google Gemini Thinking -```typescript -const llm = createLLM('google', 'gemini-2.5-pro'); - -await llm.generate(context, { - thinking: { enabled: true } -}); -``` - -## Custom Models - -### Local Models (Ollama, vLLM, etc.) -```typescript -import { OpenAICompletionsLLM } from '@mariozechner/pi-ai'; - -const model = { - id: 'gpt-oss:20b', - provider: 'ollama', - baseUrl: 'http://localhost:11434/v1', - reasoning: false, - input: ['text'], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 126000, - maxTokens: 32000, - name: 'Llama 3.1 8B' +// Create and use a context +const context: Context = { + systemPrompt: 'You are a helpful assistant.', + messages: [ + { role: 'user', content: 'What is TypeScript?' } + ] }; -const llm = new OpenAICompletionsLLM(model, 'dummy-key'); -``` - -### Custom OpenAI-Compatible Endpoints -```typescript -const model = { - id: 'custom-model', - provider: 'custom', - baseUrl: 'https://your-api.com/v1', - reasoning: true, - input: ['text', 'image'], - cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 }, - contextWindow: 32768, - maxTokens: 8192, - name: 'Custom Model' -}; - -const llm = new OpenAICompletionsLLM(model, 'your-api-key'); -``` - -## Model Discovery - -All models in this library support tool calling. Models are automatically fetched from OpenRouter and models.dev APIs at build time. - -### List Available Models -```typescript -import { PROVIDERS } from '@mariozechner/pi-ai'; - -// List all OpenAI models (all support tool calling) -for (const [modelId, model] of Object.entries(PROVIDERS.openai.models)) { - console.log(`${modelId}: ${model.name}`); - console.log(` Context: ${model.contextWindow} tokens`); - console.log(` Reasoning: ${model.reasoning}`); - console.log(` Vision: ${model.input.includes('image')}`); - console.log(` Cost: $${model.cost.input}/$${model.cost.output} per million tokens`); -} - -// Find all models with reasoning support -const reasoningModels = []; -for (const provider of Object.values(PROVIDERS)) { - for (const model of Object.values(provider.models)) { - if (model.reasoning) { - reasoningModels.push(model); - } - } -} - -// Find all vision-capable models -const visionModels = []; -for (const provider of Object.values(PROVIDERS)) { - for (const model of Object.values(provider.models)) { - if (model.input.includes('image')) { - visionModels.push(model); - } - } -} -``` - -### Check Model Capabilities -```typescript -import { getModel } from '@mariozechner/pi-ai'; - const model = getModel('openai', 'gpt-4o-mini'); -if (model) { - console.log(`Model: ${model.name}`); - console.log(`Provider: ${model.provider}`); - console.log(`Context window: ${model.contextWindow} tokens`); - console.log(`Max output: ${model.maxTokens} tokens`); - console.log(`Supports reasoning: ${model.reasoning}`); - console.log(`Supports images: ${model.input.includes('image')}`); - console.log(`Input cost: $${model.cost.input} per million tokens`); - console.log(`Output cost: $${model.cost.output} per million tokens`); - console.log(`Cache read cost: $${model.cost.cacheRead} per million tokens`); - console.log(`Cache write cost: $${model.cost.cacheWrite} per million tokens`); -} +const response = await complete(model, context); +context.messages.push(response); + +// Serialize the entire context +const serialized = JSON.stringify(context); +console.log('Serialized context size:', serialized.length, 'bytes'); + +// Save to database, localStorage, file, etc. +localStorage.setItem('conversation', serialized); + +// Later: deserialize and continue the conversation +const restored: Context = JSON.parse(localStorage.getItem('conversation')!); +restored.messages.push({ role: 'user', content: 'Tell me more about its type system' }); + +// Continue with any model +const newModel = getModel('anthropic', 'claude-3-5-haiku-20241022'); +const continuation = await complete(newModel, restored); ``` +> **Note**: If the context contains images (encoded as base64 as shown in the Image Input section), those will also be serialized. + ## Browser Usage The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers: ```typescript -import { createLLM } from '@mariozechner/pi-ai'; +import { getModel, complete } from '@mariozechner/pi-ai'; // API key must be passed explicitly in browser -const llm = createLLM('anthropic', 'claude-3-5-haiku-20241022', { - apiKey: 'your-api-key' -}); +const model = getModel('anthropic', 'claude-3-5-haiku-20241022'); -const response = await llm.generate({ +const response = await complete(model, { messages: [{ role: 'user', content: 'Hello!' }] +}, { + apiKey: 'your-api-key' }); ``` > **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure. -## Environment Variables +### Environment Variables (Node.js only) -Set these environment variables to use `createLLM` without passing API keys: +In Node.js environments, you can set environment variables to avoid passing API keys: ```bash OPENAI_API_KEY=sk-... @@ -460,13 +536,17 @@ XAI_API_KEY=xai-... OPENROUTER_API_KEY=sk-or-... ``` -When set, you can omit the API key parameter: +When set, the library automatically uses these keys: + ```typescript // Uses OPENAI_API_KEY from environment -const llm = createLLM('openai', 'gpt-4o-mini'); +const model = getModel('openai', 'gpt-4o-mini'); +const response = await complete(model, context); -// Or pass explicitly -const llm = createLLM('openai', 'gpt-4o-mini', 'sk-...'); +// Or override with explicit key +const response = await complete(model, context, { + apiKey: 'sk-different-key' +}); ``` ## License diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index c567ff1a..1ddfa16b 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -338,7 +338,7 @@ async function generateModels() { import type { Model } from "./types.js"; -export const PROVIDERS = { +export const MODELS = { `; // Generate provider sections diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts index bc07efae..d163aad6 100644 --- a/packages/ai/src/index.ts +++ b/packages/ai/src/index.ts @@ -1,5 +1,4 @@ export * from "./generate.js"; -export * from "./models.generated.js"; export * from "./models.js"; export * from "./providers/anthropic.js"; export * from "./providers/google.js"; diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 974a901c..3b2263f4 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -3,7 +3,7 @@ import type { Model } from "./types.js"; -export const PROVIDERS = { +export const MODELS = { anthropic: { "claude-3-7-sonnet-20250219": { id: "claude-3-7-sonnet-20250219", @@ -2652,23 +2652,6 @@ export const PROVIDERS = { contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, - "cohere/command-r-08-2024": { - id: "cohere/command-r-08-2024", - name: "Cohere: Command R (08-2024)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4000, - } satisfies Model<"openai-completions">, "cohere/command-r-plus-08-2024": { id: "cohere/command-r-plus-08-2024", name: "Cohere: Command R+ (08-2024)", @@ -2686,6 +2669,23 @@ export const PROVIDERS = { contextWindow: 128000, maxTokens: 4000, } satisfies Model<"openai-completions">, + "cohere/command-r-08-2024": { + id: "cohere/command-r-08-2024", + name: "Cohere: Command R (08-2024)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4000, + } satisfies Model<"openai-completions">, "microsoft/phi-3.5-mini-128k-instruct": { id: "microsoft/phi-3.5-mini-128k-instruct", name: "Microsoft: Phi-3.5 Mini 128K Instruct", @@ -2720,23 +2720,6 @@ export const PROVIDERS = { contextWindow: 131072, maxTokens: 4096, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-405b-instruct": { - id: "meta-llama/llama-3.1-405b-instruct", - name: "Meta: Llama 3.1 405B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.7999999999999999, - output: 0.7999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-8b-instruct": { id: "meta-llama/llama-3.1-8b-instruct", name: "Meta: Llama 3.1 8B Instruct", @@ -2754,6 +2737,23 @@ export const PROVIDERS = { contextWindow: 131072, maxTokens: 16384, } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-405b-instruct": { + id: "meta-llama/llama-3.1-405b-instruct", + name: "Meta: Llama 3.1 405B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.7999999999999999, + output: 0.7999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-70b-instruct": { id: "meta-llama/llama-3.1-70b-instruct", name: "Meta: Llama 3.1 70B Instruct", @@ -2873,23 +2873,6 @@ export const PROVIDERS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "meta-llama/llama-3-70b-instruct": { - id: "meta-llama/llama-3-70b-instruct", - name: "Meta: Llama 3 70B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.3, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "meta-llama/llama-3-8b-instruct": { id: "meta-llama/llama-3-8b-instruct", name: "Meta: Llama 3 8B Instruct", @@ -2907,6 +2890,23 @@ export const PROVIDERS = { contextWindow: 8192, maxTokens: 16384, } satisfies Model<"openai-completions">, + "meta-llama/llama-3-70b-instruct": { + id: "meta-llama/llama-3-70b-instruct", + name: "Meta: Llama 3 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.3, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "mistralai/mixtral-8x22b-instruct": { id: "mistralai/mixtral-8x22b-instruct", name: "Mistral: Mixtral 8x22B Instruct", diff --git a/packages/ai/src/models.ts b/packages/ai/src/models.ts index d701bdd6..7acc7684 100644 --- a/packages/ai/src/models.ts +++ b/packages/ai/src/models.ts @@ -1,10 +1,10 @@ -import { PROVIDERS } from "./models.generated.js"; +import { MODELS } from "./models.generated.js"; import type { Api, KnownProvider, Model, Usage } from "./types.js"; const modelRegistry: Map>> = new Map(); -// Initialize registry from PROVIDERS on module load -for (const [provider, models] of Object.entries(PROVIDERS)) { +// Initialize registry from MODELS on module load +for (const [provider, models] of Object.entries(MODELS)) { const providerModels = new Map>(); for (const [id, model] of Object.entries(models)) { providerModels.set(id, model as Model); @@ -14,23 +14,25 @@ for (const [provider, models] of Object.entries(PROVIDERS)) { type ModelApi< TProvider extends KnownProvider, - TModelId extends keyof (typeof PROVIDERS)[TProvider], -> = (typeof PROVIDERS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never; + TModelId extends keyof (typeof MODELS)[TProvider], +> = (typeof MODELS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never; -export function getModel( +export function getModel( provider: TProvider, modelId: TModelId, -): Model>; -export function getModel(provider: string, modelId: string): Model | undefined; -export function getModel(provider: any, modelId: any): Model | undefined { - return modelRegistry.get(provider)?.get(modelId) as Model | undefined; +): Model> { + return modelRegistry.get(provider)?.get(modelId as string) as Model>; } -export function registerModel(model: Model): void { - if (!modelRegistry.has(model.provider)) { - modelRegistry.set(model.provider, new Map()); - } - modelRegistry.get(model.provider)!.set(model.id, model); +export function getProviders(): KnownProvider[] { + return Array.from(modelRegistry.keys()) as KnownProvider[]; +} + +export function getModels( + provider: TProvider, +): Model>[] { + const models = modelRegistry.get(provider); + return models ? (Array.from(models.values()) as Model>[]) : []; } export function calculateCost(model: Model, usage: Usage): Usage["cost"] {