mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-20 06:04:15 +00:00
refactor(ai): Simplify API with new streaming interface and model management
- Replace createLLM with getModel/getModels/getProviders functions - Rename PROVIDERS to MODELS (internal only, not exposed) - Add streamSimple/completeSimple for unified reasoning interface - Update README with new API examples and comprehensive documentation - Remove model registration (models are now fixed from build time) - Add proper TypeScript typing for provider-specific options - Document context serialization, cross-provider handoffs, and browser usage
This commit is contained in:
parent
21750c230a
commit
4cee070bdd
5 changed files with 438 additions and 357 deletions
|
|
@ -24,31 +24,130 @@ npm install @mariozechner/pi-ai
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
import { createLLM } from '@mariozechner/pi-ai';
|
import { getModel, stream, complete, Context, Tool } from '@mariozechner/pi-ai';
|
||||||
|
|
||||||
const llm = createLLM('openai', 'gpt-4o-mini');
|
// Fully typed with auto-complete support for both providers and models
|
||||||
|
const model = getModel('openai', 'gpt-4o-mini');
|
||||||
|
|
||||||
const response = await llm.generate({
|
// Define tools
|
||||||
messages: [{ role: 'user', content: 'Hello!' }]
|
const tools: Tool[] = [{
|
||||||
});
|
name: 'get_time',
|
||||||
|
description: 'Get the current time',
|
||||||
|
parameters: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {},
|
||||||
|
required: []
|
||||||
|
}
|
||||||
|
}];
|
||||||
|
|
||||||
|
// Build a conversation context (easily serializable and transferable between models)
|
||||||
|
const context: Context = {
|
||||||
|
systemPrompt: 'You are a helpful assistant.',
|
||||||
|
messages: [{ role: 'user', content: 'What time is it?' }],
|
||||||
|
tools
|
||||||
|
};
|
||||||
|
|
||||||
|
// Option 1: Streaming with all event types
|
||||||
|
const s = stream(model, context);
|
||||||
|
|
||||||
|
for await (const event of s) {
|
||||||
|
switch (event.type) {
|
||||||
|
case 'start':
|
||||||
|
console.log(`Starting with ${event.partial.model}`);
|
||||||
|
break;
|
||||||
|
case 'text_start':
|
||||||
|
console.log('\n[Text started]');
|
||||||
|
break;
|
||||||
|
case 'text_delta':
|
||||||
|
process.stdout.write(event.delta);
|
||||||
|
break;
|
||||||
|
case 'text_end':
|
||||||
|
console.log('\n[Text ended]');
|
||||||
|
break;
|
||||||
|
case 'thinking_start':
|
||||||
|
console.log('[Model is thinking...]');
|
||||||
|
break;
|
||||||
|
case 'thinking_delta':
|
||||||
|
process.stdout.write(event.delta);
|
||||||
|
break;
|
||||||
|
case 'thinking_end':
|
||||||
|
console.log('[Thinking complete]');
|
||||||
|
break;
|
||||||
|
case 'toolCall':
|
||||||
|
console.log(`\nTool called: ${event.toolCall.name}`);
|
||||||
|
break;
|
||||||
|
case 'done':
|
||||||
|
console.log(`\nFinished: ${event.reason}`);
|
||||||
|
break;
|
||||||
|
case 'error':
|
||||||
|
console.error(`Error: ${event.error}`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the final message after streaming, add it to the context
|
||||||
|
const finalMessage = await s.finalMessage();
|
||||||
|
context.messages.push(finalMessage);
|
||||||
|
|
||||||
|
// Handle tool calls if any
|
||||||
|
const toolCalls = finalMessage.content.filter(b => b.type === 'toolCall');
|
||||||
|
for (const call of toolCalls) {
|
||||||
|
// Execute the tool
|
||||||
|
const result = call.name === 'get_time'
|
||||||
|
? new Date().toISOString()
|
||||||
|
: 'Unknown tool';
|
||||||
|
|
||||||
|
// Add tool result to context
|
||||||
|
context.messages.push({
|
||||||
|
role: 'toolResult',
|
||||||
|
toolCallId: call.id,
|
||||||
|
toolName: call.name,
|
||||||
|
content: result,
|
||||||
|
isError: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Continue if there were tool calls
|
||||||
|
if (toolCalls.length > 0) {
|
||||||
|
const continuation = await complete(model, context);
|
||||||
|
context.messages.push(continuation);
|
||||||
|
console.log('After tool execution:', continuation.content);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Total tokens: ${finalMessage.usage.input} in, ${finalMessage.usage.output} out`);
|
||||||
|
console.log(`Cost: $${finalMessage.usage.cost.total.toFixed(4)}`);
|
||||||
|
|
||||||
|
// Option 2: Get complete response without streaming
|
||||||
|
const response = await complete(model, context);
|
||||||
|
|
||||||
// response.content is an array of content blocks
|
|
||||||
for (const block of response.content) {
|
for (const block of response.content) {
|
||||||
if (block.type === 'text') {
|
if (block.type === 'text') {
|
||||||
console.log(block.text);
|
console.log(block.text);
|
||||||
|
} else if (block.type === 'toolCall') {
|
||||||
|
console.log(`Tool: ${block.name}(${JSON.stringify(block.arguments)})`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Image Input
|
## Image Input
|
||||||
|
|
||||||
|
Models with vision capabilities can process images. You can check if a model supports images via the `input` property. If you pass images to a non-vision model, they are silently ignored.
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
import { readFileSync } from 'fs';
|
import { readFileSync } from 'fs';
|
||||||
|
import { getModel, complete } from '@mariozechner/pi-ai';
|
||||||
|
|
||||||
|
const model = getModel('openai', 'gpt-4o-mini');
|
||||||
|
|
||||||
|
// Check if model supports images
|
||||||
|
if (model.input.includes('image')) {
|
||||||
|
console.log('Model supports vision');
|
||||||
|
}
|
||||||
|
|
||||||
const imageBuffer = readFileSync('image.png');
|
const imageBuffer = readFileSync('image.png');
|
||||||
const base64Image = imageBuffer.toString('base64');
|
const base64Image = imageBuffer.toString('base64');
|
||||||
|
|
||||||
const response = await llm.generate({
|
const response = await complete(model, {
|
||||||
messages: [{
|
messages: [{
|
||||||
role: 'user',
|
role: 'user',
|
||||||
content: [
|
content: [
|
||||||
|
|
@ -57,166 +156,151 @@ const response = await llm.generate({
|
||||||
]
|
]
|
||||||
}]
|
}]
|
||||||
});
|
});
|
||||||
```
|
|
||||||
|
|
||||||
## Tool Calling
|
// Access the response
|
||||||
|
for (const block of response.content) {
|
||||||
```typescript
|
if (block.type === 'text') {
|
||||||
const tools = [{
|
console.log(block.text);
|
||||||
name: 'get_weather',
|
|
||||||
description: 'Get current weather for a location',
|
|
||||||
parameters: {
|
|
||||||
type: 'object',
|
|
||||||
properties: {
|
|
||||||
location: { type: 'string' }
|
|
||||||
},
|
|
||||||
required: ['location']
|
|
||||||
}
|
|
||||||
}];
|
|
||||||
|
|
||||||
const messages = [];
|
|
||||||
messages.push({ role: 'user', content: 'What is the weather in Paris?' });
|
|
||||||
|
|
||||||
const response = await llm.generate({ messages, tools });
|
|
||||||
messages.push(response);
|
|
||||||
|
|
||||||
// Check for tool calls in the content blocks
|
|
||||||
const toolCalls = response.content.filter(block => block.type === 'toolCall');
|
|
||||||
|
|
||||||
for (const call of toolCalls) {
|
|
||||||
// Call your actual function
|
|
||||||
const result = await getWeather(call.arguments.location);
|
|
||||||
|
|
||||||
// Add tool result to context
|
|
||||||
messages.push({
|
|
||||||
role: 'toolResult',
|
|
||||||
content: JSON.stringify(result),
|
|
||||||
toolCallId: call.id,
|
|
||||||
toolName: call.name,
|
|
||||||
isError: false
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (toolCalls.length > 0) {
|
|
||||||
// Continue conversation with tool results
|
|
||||||
const followUp = await llm.generate({ messages, tools });
|
|
||||||
messages.push(followUp);
|
|
||||||
|
|
||||||
// Print text blocks from the response
|
|
||||||
for (const block of followUp.content) {
|
|
||||||
if (block.type === 'text') {
|
|
||||||
console.log(block.text);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## Streaming
|
## Thinking/Reasoning
|
||||||
|
|
||||||
|
Many models support thinking/reasoning capabilities where they can show their internal thought process. You can check if a model supports reasoning via the `reasoning` property. If you pass reasoning options to a non-reasoning model, they are silently ignored.
|
||||||
|
|
||||||
|
### Unified Interface (streamSimple/completeSimple)
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
const response = await llm.generate({
|
import { getModel, streamSimple, completeSimple } from '@mariozechner/pi-ai';
|
||||||
messages: [{ role: 'user', content: 'Write a story' }]
|
|
||||||
|
// Many models across providers support thinking/reasoning
|
||||||
|
const model = getModel('anthropic', 'claude-sonnet-4-20250514');
|
||||||
|
// or getModel('openai', 'gpt-5-mini');
|
||||||
|
// or getModel('google', 'gemini-2.5-flash');
|
||||||
|
// or getModel('xai', 'grok-code-fast-1');
|
||||||
|
// or getModel('groq', 'openai/gpt-oss-20b');
|
||||||
|
// or getModel('cerebras', 'gpt-oss-120b');
|
||||||
|
// or getModel('openrouter', 'z-ai/glm-4.5v');
|
||||||
|
|
||||||
|
// Check if model supports reasoning
|
||||||
|
if (model.reasoning) {
|
||||||
|
console.log('Model supports reasoning/thinking');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the simplified reasoning option
|
||||||
|
const response = await completeSimple(model, {
|
||||||
|
messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13' }]
|
||||||
}, {
|
}, {
|
||||||
onEvent: (event) => {
|
reasoning: 'medium' // 'minimal' | 'low' | 'medium' | 'high'
|
||||||
switch (event.type) {
|
});
|
||||||
case 'start':
|
|
||||||
console.log(`Starting ${event.provider} ${event.model}`);
|
// Access thinking and text blocks
|
||||||
break;
|
for (const block of response.content) {
|
||||||
case 'text_start':
|
if (block.type === 'thinking') {
|
||||||
console.log('[Starting text block]');
|
console.log('Thinking:', block.thinking);
|
||||||
break;
|
} else if (block.type === 'text') {
|
||||||
case 'text_delta':
|
console.log('Response:', block.text);
|
||||||
process.stdout.write(event.delta);
|
}
|
||||||
break;
|
}
|
||||||
case 'text_end':
|
```
|
||||||
console.log(`\n[Text block complete: ${event.content.length} chars]`);
|
|
||||||
break;
|
### Provider-Specific Options (stream/complete)
|
||||||
case 'thinking_start':
|
|
||||||
console.error('[Starting thinking]');
|
For fine-grained control, use the provider-specific options:
|
||||||
break;
|
|
||||||
case 'thinking_delta':
|
```typescript
|
||||||
process.stderr.write(event.delta);
|
import { getModel, complete } from '@mariozechner/pi-ai';
|
||||||
break;
|
|
||||||
case 'thinking_end':
|
// OpenAI Reasoning (o1, o3, gpt-5)
|
||||||
console.error(`\n[Thinking complete: ${event.content.length} chars]`);
|
const openaiModel = getModel('openai', 'gpt-5-mini');
|
||||||
break;
|
await complete(openaiModel, context, {
|
||||||
case 'toolCall':
|
reasoningEffort: 'medium',
|
||||||
console.log(`Tool called: ${event.toolCall.name}(${JSON.stringify(event.toolCall.arguments)})`);
|
reasoningSummary: 'detailed' // OpenAI Responses API only
|
||||||
break;
|
});
|
||||||
case 'done':
|
|
||||||
console.log(`Completed with reason: ${event.reason}`);
|
// Anthropic Thinking (Claude Sonnet 4)
|
||||||
console.log(`Tokens: ${event.message.usage.input} in, ${event.message.usage.output} out`);
|
const anthropicModel = getModel('anthropic', 'claude-sonnet-4-20250514');
|
||||||
break;
|
await complete(anthropicModel, context, {
|
||||||
case 'error':
|
thinkingEnabled: true,
|
||||||
console.error('Error:', event.error);
|
thinkingBudgetTokens: 8192 // Optional token limit
|
||||||
break;
|
});
|
||||||
}
|
|
||||||
|
// Google Gemini Thinking
|
||||||
|
const googleModel = getModel('google', 'gemini-2.5-flash');
|
||||||
|
await complete(googleModel, context, {
|
||||||
|
thinking: {
|
||||||
|
enabled: true,
|
||||||
|
budgetTokens: 8192 // -1 for dynamic, 0 to disable
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
## Abort Signal
|
### Streaming Thinking Content
|
||||||
|
|
||||||
The abort signal allows you to cancel in-progress requests. When aborted, providers return partial results accumulated up to the cancellation point, including accurate token counts and cost estimates.
|
When streaming, thinking content is delivered through specific events:
|
||||||
|
|
||||||
### Basic Usage
|
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
|
const s = streamSimple(model, context, { reasoning: 'high' });
|
||||||
|
|
||||||
|
for await (const event of s) {
|
||||||
|
switch (event.type) {
|
||||||
|
case 'thinking_start':
|
||||||
|
console.log('[Model started thinking]');
|
||||||
|
break;
|
||||||
|
case 'thinking_delta':
|
||||||
|
process.stdout.write(event.delta); // Stream thinking content
|
||||||
|
break;
|
||||||
|
case 'thinking_end':
|
||||||
|
console.log('\n[Thinking complete]');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Errors & Abort Signal
|
||||||
|
|
||||||
|
When a request ends with an error (including aborts), the API returns an `AssistantMessage` with:
|
||||||
|
- `stopReason: 'error'` - Indicates the request ended with an error
|
||||||
|
- `error: string` - Error message describing what happened
|
||||||
|
- `content: array` - **Partial content** accumulated before the error
|
||||||
|
- `usage: Usage` - **Token counts and costs** (may be incomplete depending on when error occurred)
|
||||||
|
|
||||||
|
### Aborting
|
||||||
|
The abort signal allows you to cancel in-progress requests. Aborted requests return an `AssistantMessage` with `stopReason === 'error'`.
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { getModel, stream } from '@mariozechner/pi-ai';
|
||||||
|
|
||||||
|
const model = getModel('openai', 'gpt-4o-mini');
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
|
|
||||||
// Abort after 2 seconds
|
// Abort after 2 seconds
|
||||||
setTimeout(() => controller.abort(), 2000);
|
setTimeout(() => controller.abort(), 2000);
|
||||||
|
|
||||||
const response = await llm.generate({
|
const s = stream(model, {
|
||||||
messages: [{ role: 'user', content: 'Write a long story' }]
|
messages: [{ role: 'user', content: 'Write a long story' }]
|
||||||
}, {
|
}, {
|
||||||
signal: controller.signal,
|
signal: controller.signal
|
||||||
onEvent: (event) => {
|
|
||||||
if (event.type === 'text_delta') {
|
|
||||||
process.stdout.write(event.delta);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Check if the request was aborted
|
for await (const event of s) {
|
||||||
if (response.stopReason === 'error' && response.error) {
|
if (event.type === 'text_delta') {
|
||||||
console.log('Request was aborted:', response.error);
|
process.stdout.write(event.delta);
|
||||||
|
} else if (event.type === 'error') {
|
||||||
|
console.log('Error:', event.error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get results (may be partial if aborted)
|
||||||
|
const response = await s.finalMessage();
|
||||||
|
if (response.stopReason === 'error') {
|
||||||
|
console.log('Error:', response.error);
|
||||||
console.log('Partial content received:', response.content);
|
console.log('Partial content received:', response.content);
|
||||||
console.log('Tokens used:', response.usage);
|
console.log('Tokens used:', response.usage);
|
||||||
} else {
|
|
||||||
console.log('Request completed successfully');
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Partial Results and Token Tracking
|
|
||||||
|
|
||||||
When a request is aborted, the API returns an `AssistantMessage` with:
|
|
||||||
- `stopReason: 'error'` - Indicates the request was aborted
|
|
||||||
- `error: string` - Error message describing the abort
|
|
||||||
- `content: array` - **Partial content** accumulated before the abort
|
|
||||||
- `usage: object` - **Token counts and costs** (may be incomplete depending on when abort occurred)
|
|
||||||
|
|
||||||
```typescript
|
|
||||||
// Example: User interrupts a long-running request
|
|
||||||
const controller = new AbortController();
|
|
||||||
document.getElementById('stop-button').onclick = () => controller.abort();
|
|
||||||
|
|
||||||
const response = await llm.generate(context, {
|
|
||||||
signal: controller.signal,
|
|
||||||
onEvent: (e) => {
|
|
||||||
if (e.type === 'text_delta') updateUI(e.delta);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Even if aborted, you get:
|
|
||||||
// - Partial text that was streamed
|
|
||||||
// - Token count (may be partial/estimated)
|
|
||||||
// - Cost calculations (may be incomplete)
|
|
||||||
console.log(`Generated ${response.content.length} content blocks`);
|
|
||||||
console.log(`Estimated ${response.usage.output} output tokens`);
|
|
||||||
console.log(`Estimated cost: $${response.usage.cost.total}`);
|
|
||||||
```
|
|
||||||
|
|
||||||
### Continuing After Abort
|
### Continuing After Abort
|
||||||
|
|
||||||
Aborted messages can be added to the conversation context and continued in subsequent requests:
|
Aborted messages can be added to the conversation context and continued in subsequent requests:
|
||||||
|
|
@ -232,19 +316,99 @@ const context = {
|
||||||
const controller1 = new AbortController();
|
const controller1 = new AbortController();
|
||||||
setTimeout(() => controller1.abort(), 2000);
|
setTimeout(() => controller1.abort(), 2000);
|
||||||
|
|
||||||
const partial = await llm.generate(context, { signal: controller1.signal });
|
const partial = await complete(model, context, { signal: controller1.signal });
|
||||||
|
|
||||||
// Add the partial response to context
|
// Add the partial response to context
|
||||||
context.messages.push(partial);
|
context.messages.push(partial);
|
||||||
context.messages.push({ role: 'user', content: 'Please continue' });
|
context.messages.push({ role: 'user', content: 'Please continue' });
|
||||||
|
|
||||||
// Continue the conversation
|
// Continue the conversation
|
||||||
const continuation = await llm.generate(context);
|
const continuation = await complete(model, context);
|
||||||
```
|
```
|
||||||
|
|
||||||
When an aborted message (with `stopReason: 'error'`) is resubmitted in the context:
|
## APIs, Models, and Providers
|
||||||
- **OpenAI Responses**: Filters out thinking blocks and tool calls from aborted messages, as API call will fail if incomplete thinking and tool calls are submitted
|
|
||||||
- **Anthropic, Google, OpenAI Completions**: Send all blocks as-is (text, thinking, tool calls)
|
The library implements 4 API interfaces, each with its own streaming function and options:
|
||||||
|
|
||||||
|
- **`anthropic-messages`**: Anthropic's Messages API (`streamAnthropic`, `AnthropicOptions`)
|
||||||
|
- **`google-generative-ai`**: Google's Generative AI API (`streamGoogle`, `GoogleOptions`)
|
||||||
|
- **`openai-completions`**: OpenAI's Chat Completions API (`streamOpenAICompletions`, `OpenAICompletionsOptions`)
|
||||||
|
- **`openai-responses`**: OpenAI's Responses API (`streamOpenAIResponses`, `OpenAIResponsesOptions`)
|
||||||
|
|
||||||
|
### Providers and Models
|
||||||
|
|
||||||
|
A **provider** offers models through a specific API. For example:
|
||||||
|
- **Anthropic** models use the `anthropic-messages` API
|
||||||
|
- **Google** models use the `google-generative-ai` API
|
||||||
|
- **OpenAI** models use the `openai-responses` API
|
||||||
|
- **xAI, Cerebras, Groq, etc.** models use the `openai-completions` API (OpenAI-compatible)
|
||||||
|
|
||||||
|
### Querying Providers and Models
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { getProviders, getModels, getModel } from '@mariozechner/pi-ai';
|
||||||
|
|
||||||
|
// Get all available providers
|
||||||
|
const providers = getProviders();
|
||||||
|
console.log(providers); // ['openai', 'anthropic', 'google', 'xai', 'groq', ...]
|
||||||
|
|
||||||
|
// Get all models from a provider (fully typed)
|
||||||
|
const anthropicModels = getModels('anthropic');
|
||||||
|
for (const model of anthropicModels) {
|
||||||
|
console.log(`${model.id}: ${model.name}`);
|
||||||
|
console.log(` API: ${model.api}`); // 'anthropic-messages'
|
||||||
|
console.log(` Context: ${model.contextWindow} tokens`);
|
||||||
|
console.log(` Vision: ${model.input.includes('image')}`);
|
||||||
|
console.log(` Reasoning: ${model.reasoning}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get a specific model (both provider and model ID are auto-completed in IDEs)
|
||||||
|
const model = getModel('openai', 'gpt-4o-mini');
|
||||||
|
console.log(`Using ${model.name} via ${model.api} API`);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Custom Models
|
||||||
|
|
||||||
|
You can create custom models for local inference servers or custom endpoints:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { Model, stream } from '@mariozechner/pi-ai';
|
||||||
|
|
||||||
|
// Example: Ollama using OpenAI-compatible API
|
||||||
|
const ollamaModel: Model<'openai-completions'> = {
|
||||||
|
id: 'llama-3.1-8b',
|
||||||
|
name: 'Llama 3.1 8B (Ollama)',
|
||||||
|
api: 'openai-completions',
|
||||||
|
provider: 'ollama',
|
||||||
|
baseUrl: 'http://localhost:11434/v1',
|
||||||
|
reasoning: false,
|
||||||
|
input: ['text'],
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
contextWindow: 128000,
|
||||||
|
maxTokens: 32000
|
||||||
|
};
|
||||||
|
|
||||||
|
// Use the custom model
|
||||||
|
const response = await stream(ollamaModel, context, {
|
||||||
|
apiKey: 'dummy' // Ollama doesn't need a real key
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Type Safety
|
||||||
|
|
||||||
|
Models are typed by their API, ensuring type-safe options:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// TypeScript knows this is an Anthropic model
|
||||||
|
const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
|
||||||
|
|
||||||
|
// So these options are type-checked for AnthropicOptions
|
||||||
|
await stream(claude, context, {
|
||||||
|
thinkingEnabled: true, // ✓ Valid for anthropic-messages
|
||||||
|
thinkingBudgetTokens: 2048, // ✓ Valid for anthropic-messages
|
||||||
|
// reasoningEffort: 'high' // ✗ TypeScript error: not valid for anthropic-messages
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
## Cross-Provider Handoffs
|
## Cross-Provider Handoffs
|
||||||
|
|
||||||
|
|
@ -255,35 +419,37 @@ The library supports seamless handoffs between different LLM providers within th
|
||||||
When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility:
|
When messages from one provider are sent to a different provider, the library automatically transforms them for compatibility:
|
||||||
|
|
||||||
- **User and tool result messages** are passed through unchanged
|
- **User and tool result messages** are passed through unchanged
|
||||||
- **Assistant messages from the same provider/model** are preserved as-is
|
- **Assistant messages from the same provider/API** are preserved as-is
|
||||||
- **Assistant messages from different providers** have their thinking blocks converted to text with `<thinking>` tags
|
- **Assistant messages from different providers** have their thinking blocks converted to text with `<thinking>` tags
|
||||||
- **Tool calls and regular text** are preserved unchanged
|
- **Tool calls and regular text** are preserved unchanged
|
||||||
|
|
||||||
### Example: Multi-Provider Conversation
|
### Example: Multi-Provider Conversation
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
import { createLLM } from '@mariozechner/pi-ai';
|
import { getModel, complete, Context } from '@mariozechner/pi-ai';
|
||||||
|
|
||||||
// Start with Claude
|
// Start with Claude
|
||||||
const claude = createLLM('anthropic', 'claude-sonnet-4-0');
|
const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
|
||||||
const messages = [];
|
const context: Context = {
|
||||||
|
messages: []
|
||||||
|
};
|
||||||
|
|
||||||
messages.push({ role: 'user', content: 'What is 25 * 18?' });
|
context.messages.push({ role: 'user', content: 'What is 25 * 18?' });
|
||||||
const claudeResponse = await claude.generate({ messages }, {
|
const claudeResponse = await complete(claude, context, {
|
||||||
thinking: { enabled: true }
|
thinkingEnabled: true
|
||||||
});
|
});
|
||||||
messages.push(claudeResponse);
|
context.messages.push(claudeResponse);
|
||||||
|
|
||||||
// Switch to GPT-5 - it will see Claude's thinking as <thinking> tagged text
|
// Switch to GPT-5 - it will see Claude's thinking as <thinking> tagged text
|
||||||
const gpt5 = createLLM('openai', 'gpt-5-mini');
|
const gpt5 = getModel('openai', 'gpt-5-mini');
|
||||||
messages.push({ role: 'user', content: 'Is that calculation correct?' });
|
context.messages.push({ role: 'user', content: 'Is that calculation correct?' });
|
||||||
const gptResponse = await gpt5.generate({ messages });
|
const gptResponse = await complete(gpt5, context);
|
||||||
messages.push(gptResponse);
|
context.messages.push(gptResponse);
|
||||||
|
|
||||||
// Switch to Gemini
|
// Switch to Gemini
|
||||||
const gemini = createLLM('google', 'gemini-2.5-flash');
|
const gemini = getModel('google', 'gemini-2.5-flash');
|
||||||
messages.push({ role: 'user', content: 'What was the original question?' });
|
context.messages.push({ role: 'user', content: 'What was the original question?' });
|
||||||
const geminiResponse = await gemini.generate({ messages });
|
const geminiResponse = await complete(gemini, context);
|
||||||
```
|
```
|
||||||
|
|
||||||
### Provider Compatibility
|
### Provider Compatibility
|
||||||
|
|
@ -300,155 +466,65 @@ This enables flexible workflows where you can:
|
||||||
- Use specialized models for specific tasks
|
- Use specialized models for specific tasks
|
||||||
- Maintain conversation continuity across provider outages
|
- Maintain conversation continuity across provider outages
|
||||||
|
|
||||||
## Provider-Specific Options
|
## Context Serialization
|
||||||
|
|
||||||
|
The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services:
|
||||||
|
|
||||||
### OpenAI Reasoning (o1, o3)
|
|
||||||
```typescript
|
```typescript
|
||||||
const llm = createLLM('openai', 'o1-mini');
|
import { Context, getModel, complete } from '@mariozechner/pi-ai';
|
||||||
|
|
||||||
await llm.generate(context, {
|
// Create and use a context
|
||||||
reasoningEffort: 'medium' // 'minimal' | 'low' | 'medium' | 'high'
|
const context: Context = {
|
||||||
});
|
systemPrompt: 'You are a helpful assistant.',
|
||||||
```
|
messages: [
|
||||||
|
{ role: 'user', content: 'What is TypeScript?' }
|
||||||
### Anthropic Thinking
|
]
|
||||||
```typescript
|
|
||||||
const llm = createLLM('anthropic', 'claude-3-5-sonnet-20241022');
|
|
||||||
|
|
||||||
await llm.generate(context, {
|
|
||||||
thinking: {
|
|
||||||
enabled: true,
|
|
||||||
budgetTokens: 2048 // Optional thinking token limit
|
|
||||||
}
|
|
||||||
});
|
|
||||||
```
|
|
||||||
|
|
||||||
### Google Gemini Thinking
|
|
||||||
```typescript
|
|
||||||
const llm = createLLM('google', 'gemini-2.5-pro');
|
|
||||||
|
|
||||||
await llm.generate(context, {
|
|
||||||
thinking: { enabled: true }
|
|
||||||
});
|
|
||||||
```
|
|
||||||
|
|
||||||
## Custom Models
|
|
||||||
|
|
||||||
### Local Models (Ollama, vLLM, etc.)
|
|
||||||
```typescript
|
|
||||||
import { OpenAICompletionsLLM } from '@mariozechner/pi-ai';
|
|
||||||
|
|
||||||
const model = {
|
|
||||||
id: 'gpt-oss:20b',
|
|
||||||
provider: 'ollama',
|
|
||||||
baseUrl: 'http://localhost:11434/v1',
|
|
||||||
reasoning: false,
|
|
||||||
input: ['text'],
|
|
||||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
||||||
contextWindow: 126000,
|
|
||||||
maxTokens: 32000,
|
|
||||||
name: 'Llama 3.1 8B'
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const llm = new OpenAICompletionsLLM(model, 'dummy-key');
|
|
||||||
```
|
|
||||||
|
|
||||||
### Custom OpenAI-Compatible Endpoints
|
|
||||||
```typescript
|
|
||||||
const model = {
|
|
||||||
id: 'custom-model',
|
|
||||||
provider: 'custom',
|
|
||||||
baseUrl: 'https://your-api.com/v1',
|
|
||||||
reasoning: true,
|
|
||||||
input: ['text', 'image'],
|
|
||||||
cost: { input: 0.5, output: 1.5, cacheRead: 0, cacheWrite: 0 },
|
|
||||||
contextWindow: 32768,
|
|
||||||
maxTokens: 8192,
|
|
||||||
name: 'Custom Model'
|
|
||||||
};
|
|
||||||
|
|
||||||
const llm = new OpenAICompletionsLLM(model, 'your-api-key');
|
|
||||||
```
|
|
||||||
|
|
||||||
## Model Discovery
|
|
||||||
|
|
||||||
All models in this library support tool calling. Models are automatically fetched from OpenRouter and models.dev APIs at build time.
|
|
||||||
|
|
||||||
### List Available Models
|
|
||||||
```typescript
|
|
||||||
import { PROVIDERS } from '@mariozechner/pi-ai';
|
|
||||||
|
|
||||||
// List all OpenAI models (all support tool calling)
|
|
||||||
for (const [modelId, model] of Object.entries(PROVIDERS.openai.models)) {
|
|
||||||
console.log(`${modelId}: ${model.name}`);
|
|
||||||
console.log(` Context: ${model.contextWindow} tokens`);
|
|
||||||
console.log(` Reasoning: ${model.reasoning}`);
|
|
||||||
console.log(` Vision: ${model.input.includes('image')}`);
|
|
||||||
console.log(` Cost: $${model.cost.input}/$${model.cost.output} per million tokens`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find all models with reasoning support
|
|
||||||
const reasoningModels = [];
|
|
||||||
for (const provider of Object.values(PROVIDERS)) {
|
|
||||||
for (const model of Object.values(provider.models)) {
|
|
||||||
if (model.reasoning) {
|
|
||||||
reasoningModels.push(model);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find all vision-capable models
|
|
||||||
const visionModels = [];
|
|
||||||
for (const provider of Object.values(PROVIDERS)) {
|
|
||||||
for (const model of Object.values(provider.models)) {
|
|
||||||
if (model.input.includes('image')) {
|
|
||||||
visionModels.push(model);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Check Model Capabilities
|
|
||||||
```typescript
|
|
||||||
import { getModel } from '@mariozechner/pi-ai';
|
|
||||||
|
|
||||||
const model = getModel('openai', 'gpt-4o-mini');
|
const model = getModel('openai', 'gpt-4o-mini');
|
||||||
if (model) {
|
const response = await complete(model, context);
|
||||||
console.log(`Model: ${model.name}`);
|
context.messages.push(response);
|
||||||
console.log(`Provider: ${model.provider}`);
|
|
||||||
console.log(`Context window: ${model.contextWindow} tokens`);
|
// Serialize the entire context
|
||||||
console.log(`Max output: ${model.maxTokens} tokens`);
|
const serialized = JSON.stringify(context);
|
||||||
console.log(`Supports reasoning: ${model.reasoning}`);
|
console.log('Serialized context size:', serialized.length, 'bytes');
|
||||||
console.log(`Supports images: ${model.input.includes('image')}`);
|
|
||||||
console.log(`Input cost: $${model.cost.input} per million tokens`);
|
// Save to database, localStorage, file, etc.
|
||||||
console.log(`Output cost: $${model.cost.output} per million tokens`);
|
localStorage.setItem('conversation', serialized);
|
||||||
console.log(`Cache read cost: $${model.cost.cacheRead} per million tokens`);
|
|
||||||
console.log(`Cache write cost: $${model.cost.cacheWrite} per million tokens`);
|
// Later: deserialize and continue the conversation
|
||||||
}
|
const restored: Context = JSON.parse(localStorage.getItem('conversation')!);
|
||||||
|
restored.messages.push({ role: 'user', content: 'Tell me more about its type system' });
|
||||||
|
|
||||||
|
// Continue with any model
|
||||||
|
const newModel = getModel('anthropic', 'claude-3-5-haiku-20241022');
|
||||||
|
const continuation = await complete(newModel, restored);
|
||||||
```
|
```
|
||||||
|
|
||||||
|
> **Note**: If the context contains images (encoded as base64 as shown in the Image Input section), those will also be serialized.
|
||||||
|
|
||||||
## Browser Usage
|
## Browser Usage
|
||||||
|
|
||||||
The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers:
|
The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers:
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
import { createLLM } from '@mariozechner/pi-ai';
|
import { getModel, complete } from '@mariozechner/pi-ai';
|
||||||
|
|
||||||
// API key must be passed explicitly in browser
|
// API key must be passed explicitly in browser
|
||||||
const llm = createLLM('anthropic', 'claude-3-5-haiku-20241022', {
|
const model = getModel('anthropic', 'claude-3-5-haiku-20241022');
|
||||||
apiKey: 'your-api-key'
|
|
||||||
});
|
|
||||||
|
|
||||||
const response = await llm.generate({
|
const response = await complete(model, {
|
||||||
messages: [{ role: 'user', content: 'Hello!' }]
|
messages: [{ role: 'user', content: 'Hello!' }]
|
||||||
|
}, {
|
||||||
|
apiKey: 'your-api-key'
|
||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
> **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure.
|
> **Security Warning**: Exposing API keys in frontend code is dangerous. Anyone can extract and abuse your keys. Only use this approach for internal tools or demos. For production applications, use a backend proxy that keeps your API keys secure.
|
||||||
|
|
||||||
## Environment Variables
|
### Environment Variables (Node.js only)
|
||||||
|
|
||||||
Set these environment variables to use `createLLM` without passing API keys:
|
In Node.js environments, you can set environment variables to avoid passing API keys:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
OPENAI_API_KEY=sk-...
|
OPENAI_API_KEY=sk-...
|
||||||
|
|
@ -460,13 +536,17 @@ XAI_API_KEY=xai-...
|
||||||
OPENROUTER_API_KEY=sk-or-...
|
OPENROUTER_API_KEY=sk-or-...
|
||||||
```
|
```
|
||||||
|
|
||||||
When set, you can omit the API key parameter:
|
When set, the library automatically uses these keys:
|
||||||
|
|
||||||
```typescript
|
```typescript
|
||||||
// Uses OPENAI_API_KEY from environment
|
// Uses OPENAI_API_KEY from environment
|
||||||
const llm = createLLM('openai', 'gpt-4o-mini');
|
const model = getModel('openai', 'gpt-4o-mini');
|
||||||
|
const response = await complete(model, context);
|
||||||
|
|
||||||
// Or pass explicitly
|
// Or override with explicit key
|
||||||
const llm = createLLM('openai', 'gpt-4o-mini', 'sk-...');
|
const response = await complete(model, context, {
|
||||||
|
apiKey: 'sk-different-key'
|
||||||
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
|
||||||
|
|
@ -338,7 +338,7 @@ async function generateModels() {
|
||||||
|
|
||||||
import type { Model } from "./types.js";
|
import type { Model } from "./types.js";
|
||||||
|
|
||||||
export const PROVIDERS = {
|
export const MODELS = {
|
||||||
`;
|
`;
|
||||||
|
|
||||||
// Generate provider sections
|
// Generate provider sections
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
export * from "./generate.js";
|
export * from "./generate.js";
|
||||||
export * from "./models.generated.js";
|
|
||||||
export * from "./models.js";
|
export * from "./models.js";
|
||||||
export * from "./providers/anthropic.js";
|
export * from "./providers/anthropic.js";
|
||||||
export * from "./providers/google.js";
|
export * from "./providers/google.js";
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
import type { Model } from "./types.js";
|
import type { Model } from "./types.js";
|
||||||
|
|
||||||
export const PROVIDERS = {
|
export const MODELS = {
|
||||||
anthropic: {
|
anthropic: {
|
||||||
"claude-3-7-sonnet-20250219": {
|
"claude-3-7-sonnet-20250219": {
|
||||||
id: "claude-3-7-sonnet-20250219",
|
id: "claude-3-7-sonnet-20250219",
|
||||||
|
|
@ -2652,23 +2652,6 @@ export const PROVIDERS = {
|
||||||
contextWindow: 32768,
|
contextWindow: 32768,
|
||||||
maxTokens: 4096,
|
maxTokens: 4096,
|
||||||
} satisfies Model<"openai-completions">,
|
} satisfies Model<"openai-completions">,
|
||||||
"cohere/command-r-08-2024": {
|
|
||||||
id: "cohere/command-r-08-2024",
|
|
||||||
name: "Cohere: Command R (08-2024)",
|
|
||||||
api: "openai-completions",
|
|
||||||
provider: "openrouter",
|
|
||||||
baseUrl: "https://openrouter.ai/api/v1",
|
|
||||||
reasoning: false,
|
|
||||||
input: ["text"],
|
|
||||||
cost: {
|
|
||||||
input: 0.15,
|
|
||||||
output: 0.6,
|
|
||||||
cacheRead: 0,
|
|
||||||
cacheWrite: 0,
|
|
||||||
},
|
|
||||||
contextWindow: 128000,
|
|
||||||
maxTokens: 4000,
|
|
||||||
} satisfies Model<"openai-completions">,
|
|
||||||
"cohere/command-r-plus-08-2024": {
|
"cohere/command-r-plus-08-2024": {
|
||||||
id: "cohere/command-r-plus-08-2024",
|
id: "cohere/command-r-plus-08-2024",
|
||||||
name: "Cohere: Command R+ (08-2024)",
|
name: "Cohere: Command R+ (08-2024)",
|
||||||
|
|
@ -2686,6 +2669,23 @@ export const PROVIDERS = {
|
||||||
contextWindow: 128000,
|
contextWindow: 128000,
|
||||||
maxTokens: 4000,
|
maxTokens: 4000,
|
||||||
} satisfies Model<"openai-completions">,
|
} satisfies Model<"openai-completions">,
|
||||||
|
"cohere/command-r-08-2024": {
|
||||||
|
id: "cohere/command-r-08-2024",
|
||||||
|
name: "Cohere: Command R (08-2024)",
|
||||||
|
api: "openai-completions",
|
||||||
|
provider: "openrouter",
|
||||||
|
baseUrl: "https://openrouter.ai/api/v1",
|
||||||
|
reasoning: false,
|
||||||
|
input: ["text"],
|
||||||
|
cost: {
|
||||||
|
input: 0.15,
|
||||||
|
output: 0.6,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
},
|
||||||
|
contextWindow: 128000,
|
||||||
|
maxTokens: 4000,
|
||||||
|
} satisfies Model<"openai-completions">,
|
||||||
"microsoft/phi-3.5-mini-128k-instruct": {
|
"microsoft/phi-3.5-mini-128k-instruct": {
|
||||||
id: "microsoft/phi-3.5-mini-128k-instruct",
|
id: "microsoft/phi-3.5-mini-128k-instruct",
|
||||||
name: "Microsoft: Phi-3.5 Mini 128K Instruct",
|
name: "Microsoft: Phi-3.5 Mini 128K Instruct",
|
||||||
|
|
@ -2720,23 +2720,6 @@ export const PROVIDERS = {
|
||||||
contextWindow: 131072,
|
contextWindow: 131072,
|
||||||
maxTokens: 4096,
|
maxTokens: 4096,
|
||||||
} satisfies Model<"openai-completions">,
|
} satisfies Model<"openai-completions">,
|
||||||
"meta-llama/llama-3.1-405b-instruct": {
|
|
||||||
id: "meta-llama/llama-3.1-405b-instruct",
|
|
||||||
name: "Meta: Llama 3.1 405B Instruct",
|
|
||||||
api: "openai-completions",
|
|
||||||
provider: "openrouter",
|
|
||||||
baseUrl: "https://openrouter.ai/api/v1",
|
|
||||||
reasoning: false,
|
|
||||||
input: ["text"],
|
|
||||||
cost: {
|
|
||||||
input: 0.7999999999999999,
|
|
||||||
output: 0.7999999999999999,
|
|
||||||
cacheRead: 0,
|
|
||||||
cacheWrite: 0,
|
|
||||||
},
|
|
||||||
contextWindow: 32768,
|
|
||||||
maxTokens: 16384,
|
|
||||||
} satisfies Model<"openai-completions">,
|
|
||||||
"meta-llama/llama-3.1-8b-instruct": {
|
"meta-llama/llama-3.1-8b-instruct": {
|
||||||
id: "meta-llama/llama-3.1-8b-instruct",
|
id: "meta-llama/llama-3.1-8b-instruct",
|
||||||
name: "Meta: Llama 3.1 8B Instruct",
|
name: "Meta: Llama 3.1 8B Instruct",
|
||||||
|
|
@ -2754,6 +2737,23 @@ export const PROVIDERS = {
|
||||||
contextWindow: 131072,
|
contextWindow: 131072,
|
||||||
maxTokens: 16384,
|
maxTokens: 16384,
|
||||||
} satisfies Model<"openai-completions">,
|
} satisfies Model<"openai-completions">,
|
||||||
|
"meta-llama/llama-3.1-405b-instruct": {
|
||||||
|
id: "meta-llama/llama-3.1-405b-instruct",
|
||||||
|
name: "Meta: Llama 3.1 405B Instruct",
|
||||||
|
api: "openai-completions",
|
||||||
|
provider: "openrouter",
|
||||||
|
baseUrl: "https://openrouter.ai/api/v1",
|
||||||
|
reasoning: false,
|
||||||
|
input: ["text"],
|
||||||
|
cost: {
|
||||||
|
input: 0.7999999999999999,
|
||||||
|
output: 0.7999999999999999,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
},
|
||||||
|
contextWindow: 32768,
|
||||||
|
maxTokens: 16384,
|
||||||
|
} satisfies Model<"openai-completions">,
|
||||||
"meta-llama/llama-3.1-70b-instruct": {
|
"meta-llama/llama-3.1-70b-instruct": {
|
||||||
id: "meta-llama/llama-3.1-70b-instruct",
|
id: "meta-llama/llama-3.1-70b-instruct",
|
||||||
name: "Meta: Llama 3.1 70B Instruct",
|
name: "Meta: Llama 3.1 70B Instruct",
|
||||||
|
|
@ -2873,23 +2873,6 @@ export const PROVIDERS = {
|
||||||
contextWindow: 128000,
|
contextWindow: 128000,
|
||||||
maxTokens: 4096,
|
maxTokens: 4096,
|
||||||
} satisfies Model<"openai-completions">,
|
} satisfies Model<"openai-completions">,
|
||||||
"meta-llama/llama-3-70b-instruct": {
|
|
||||||
id: "meta-llama/llama-3-70b-instruct",
|
|
||||||
name: "Meta: Llama 3 70B Instruct",
|
|
||||||
api: "openai-completions",
|
|
||||||
provider: "openrouter",
|
|
||||||
baseUrl: "https://openrouter.ai/api/v1",
|
|
||||||
reasoning: false,
|
|
||||||
input: ["text"],
|
|
||||||
cost: {
|
|
||||||
input: 0.3,
|
|
||||||
output: 0.39999999999999997,
|
|
||||||
cacheRead: 0,
|
|
||||||
cacheWrite: 0,
|
|
||||||
},
|
|
||||||
contextWindow: 8192,
|
|
||||||
maxTokens: 16384,
|
|
||||||
} satisfies Model<"openai-completions">,
|
|
||||||
"meta-llama/llama-3-8b-instruct": {
|
"meta-llama/llama-3-8b-instruct": {
|
||||||
id: "meta-llama/llama-3-8b-instruct",
|
id: "meta-llama/llama-3-8b-instruct",
|
||||||
name: "Meta: Llama 3 8B Instruct",
|
name: "Meta: Llama 3 8B Instruct",
|
||||||
|
|
@ -2907,6 +2890,23 @@ export const PROVIDERS = {
|
||||||
contextWindow: 8192,
|
contextWindow: 8192,
|
||||||
maxTokens: 16384,
|
maxTokens: 16384,
|
||||||
} satisfies Model<"openai-completions">,
|
} satisfies Model<"openai-completions">,
|
||||||
|
"meta-llama/llama-3-70b-instruct": {
|
||||||
|
id: "meta-llama/llama-3-70b-instruct",
|
||||||
|
name: "Meta: Llama 3 70B Instruct",
|
||||||
|
api: "openai-completions",
|
||||||
|
provider: "openrouter",
|
||||||
|
baseUrl: "https://openrouter.ai/api/v1",
|
||||||
|
reasoning: false,
|
||||||
|
input: ["text"],
|
||||||
|
cost: {
|
||||||
|
input: 0.3,
|
||||||
|
output: 0.39999999999999997,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
},
|
||||||
|
contextWindow: 8192,
|
||||||
|
maxTokens: 16384,
|
||||||
|
} satisfies Model<"openai-completions">,
|
||||||
"mistralai/mixtral-8x22b-instruct": {
|
"mistralai/mixtral-8x22b-instruct": {
|
||||||
id: "mistralai/mixtral-8x22b-instruct",
|
id: "mistralai/mixtral-8x22b-instruct",
|
||||||
name: "Mistral: Mixtral 8x22B Instruct",
|
name: "Mistral: Mixtral 8x22B Instruct",
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
import { PROVIDERS } from "./models.generated.js";
|
import { MODELS } from "./models.generated.js";
|
||||||
import type { Api, KnownProvider, Model, Usage } from "./types.js";
|
import type { Api, KnownProvider, Model, Usage } from "./types.js";
|
||||||
|
|
||||||
const modelRegistry: Map<string, Map<string, Model<Api>>> = new Map();
|
const modelRegistry: Map<string, Map<string, Model<Api>>> = new Map();
|
||||||
|
|
||||||
// Initialize registry from PROVIDERS on module load
|
// Initialize registry from MODELS on module load
|
||||||
for (const [provider, models] of Object.entries(PROVIDERS)) {
|
for (const [provider, models] of Object.entries(MODELS)) {
|
||||||
const providerModels = new Map<string, Model<Api>>();
|
const providerModels = new Map<string, Model<Api>>();
|
||||||
for (const [id, model] of Object.entries(models)) {
|
for (const [id, model] of Object.entries(models)) {
|
||||||
providerModels.set(id, model as Model<Api>);
|
providerModels.set(id, model as Model<Api>);
|
||||||
|
|
@ -14,23 +14,25 @@ for (const [provider, models] of Object.entries(PROVIDERS)) {
|
||||||
|
|
||||||
type ModelApi<
|
type ModelApi<
|
||||||
TProvider extends KnownProvider,
|
TProvider extends KnownProvider,
|
||||||
TModelId extends keyof (typeof PROVIDERS)[TProvider],
|
TModelId extends keyof (typeof MODELS)[TProvider],
|
||||||
> = (typeof PROVIDERS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never;
|
> = (typeof MODELS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never;
|
||||||
|
|
||||||
export function getModel<TProvider extends KnownProvider, TModelId extends keyof (typeof PROVIDERS)[TProvider]>(
|
export function getModel<TProvider extends KnownProvider, TModelId extends keyof (typeof MODELS)[TProvider]>(
|
||||||
provider: TProvider,
|
provider: TProvider,
|
||||||
modelId: TModelId,
|
modelId: TModelId,
|
||||||
): Model<ModelApi<TProvider, TModelId>>;
|
): Model<ModelApi<TProvider, TModelId>> {
|
||||||
export function getModel<TApi extends Api>(provider: string, modelId: string): Model<TApi> | undefined;
|
return modelRegistry.get(provider)?.get(modelId as string) as Model<ModelApi<TProvider, TModelId>>;
|
||||||
export function getModel<TApi extends Api>(provider: any, modelId: any): Model<TApi> | undefined {
|
|
||||||
return modelRegistry.get(provider)?.get(modelId) as Model<TApi> | undefined;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function registerModel<TApi extends Api>(model: Model<TApi>): void {
|
export function getProviders(): KnownProvider[] {
|
||||||
if (!modelRegistry.has(model.provider)) {
|
return Array.from(modelRegistry.keys()) as KnownProvider[];
|
||||||
modelRegistry.set(model.provider, new Map());
|
}
|
||||||
}
|
|
||||||
modelRegistry.get(model.provider)!.set(model.id, model);
|
export function getModels<TProvider extends KnownProvider>(
|
||||||
|
provider: TProvider,
|
||||||
|
): Model<ModelApi<TProvider, keyof (typeof MODELS)[TProvider]>>[] {
|
||||||
|
const models = modelRegistry.get(provider);
|
||||||
|
return models ? (Array.from(models.values()) as Model<ModelApi<TProvider, keyof (typeof MODELS)[TProvider]>>[]) : [];
|
||||||
}
|
}
|
||||||
|
|
||||||
export function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage): Usage["cost"] {
|
export function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage): Usage["cost"] {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue