More clean-up

This commit is contained in:
Mario Zechner 2025-09-03 00:01:56 +02:00
parent acf0f5aee2
commit 21750c230a
6 changed files with 0 additions and 6587 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,322 +0,0 @@
# Image Input Support for LLM Providers
This document describes how to submit images to different LLM provider APIs and proposes an abstraction layer for unified image handling.
## Provider-Specific Image Support
### 1. Anthropic (Claude)
**Supported Models**: Claude 3 and Claude 4 families (Sonnet, Haiku, Opus)
**Image Formats**: JPEG, PNG, GIF, WebP
**Methods**:
1. **Base64 Encoding**:
```json
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": "<base64_encoded_image_data>"
}
},
{
"type": "text",
"text": "What's in this image?"
}
]
}
```
2. **URL Support**:
```json
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "url",
"url": "https://example.com/image.jpg"
}
}
]
}
```
**Limitations**:
- Maximum 20 images per request
- Each image max 3.75 MB
- Maximum dimensions: 8,000px × 8,000px
- Images are ephemeral (not stored beyond request duration)
### 2. Google GenAI (Gemini)
**Supported Models**: Gemini Pro Vision, Gemini 1.5, Gemini 2.0
**Image Formats**: JPEG, PNG, GIF, WebP
**Methods**:
1. **Inline Base64 Data** (for files < 20MB):
```json
{
"contents": [{
"parts": [
{
"inline_data": {
"mime_type": "image/jpeg",
"data": "BASE64_ENCODED_IMAGE_DATA"
}
},
{
"text": "Describe this image"
}
]
}]
}
```
2. **File API** (for larger files or reuse):
- Upload file first using File API
- Reference by file URI in subsequent requests
**Limitations**:
- Inline data: Total request size (text + images) < 20MB
- Base64 encoding increases size in transit
- Returns HTTP 413 if request too large
### 3. OpenAI Chat Completions (GPT-4o, GPT-4o-mini)
**Supported Models**: GPT-4o, GPT-4o-mini, GPT-4-turbo with vision
**Image Formats**: JPEG, PNG, GIF, WebP
**Methods**:
1. **URL Reference**:
```json
{
"role": "user",
"content": [
{
"type": "text",
"text": "What's in this image?"
},
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg"
}
}
]
}
```
2. **Base64 Data URL**:
```json
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "data:image/jpeg;base64,<base64_encoded_image>"
}
}
]
}
```
**Note**: Despite the field name `image_url`, base64 data URLs are supported.
### 4. OpenAI Responses API (o1, o3, o4-mini)
**Vision Support by Model**:
- ✅ **o1**: Full vision support
- ✅ **o3**: Vision support + image generation
- ✅ **o4-mini**: Vision support + image generation
- ❌ **o3-mini**: No vision capabilities
- ✅ **o3-pro**: Vision analysis (no generation)
**Methods**: Same as Chat Completions API
- URL references
- Base64 data URLs
**Note**: Vision capabilities integrated into reasoning chain-of-thought for more contextually rich responses.
## Proposed Unified Abstraction
### Image Content Type
```typescript
interface ImageContent {
type: "image";
data: string; // base64 encoded image data
mimeType: string; // e.g., "image/jpeg", "image/png"
}
```
### Unified Message Structure
```typescript
interface UserMessage {
role: "user";
content: string | (TextContent | ImageContent)[];
}
interface TextContent {
type: "text";
text: string;
}
```
### Provider Adapter Implementation
Each provider adapter would:
1. **Check Model Capabilities**:
```typescript
if (model.input.includes("image")) {
// Process image content
} else {
// Throw error or ignore images
}
```
2. **Convert to Provider Format**:
```typescript
// Anthropic converter
function toAnthropicContent(content: (TextContent | ImageContent)[]) {
return content.map(item => {
if (item.type === "image") {
return {
type: "image",
source: {
type: "base64",
media_type: item.mimeType,
data: item.data
}
};
}
return { type: "text", text: item.text };
});
}
// OpenAI converter
function toOpenAIContent(content: (TextContent | ImageContent)[]) {
return content.map(item => {
if (item.type === "image") {
return {
type: "image_url",
image_url: {
url: `data:${item.mimeType};base64,${item.data}`
}
};
}
return { type: "text", text: item.text };
});
}
// Google converter
function toGoogleContent(content: (TextContent | ImageContent)[]) {
return content.map(item => {
if (item.type === "image") {
return {
inline_data: {
mime_type: item.mimeType,
data: item.data
}
};
}
return { text: item.text };
});
}
```
### Size and Format Validation
```typescript
interface ImageConstraints {
maxSizeMB: number;
maxWidth: number;
maxHeight: number;
maxCount: number;
supportedFormats: string[];
}
const PROVIDER_CONSTRAINTS: Record<string, ImageConstraints> = {
anthropic: {
maxSizeMB: 3.75,
maxWidth: 8000,
maxHeight: 8000,
maxCount: 20,
supportedFormats: ["image/jpeg", "image/png", "image/gif", "image/webp"]
},
google: {
maxSizeMB: 20, // for inline data
maxWidth: Infinity,
maxHeight: Infinity,
maxCount: Infinity,
supportedFormats: ["image/jpeg", "image/png", "image/gif", "image/webp"]
},
openai: {
maxSizeMB: 20,
maxWidth: Infinity,
maxHeight: Infinity,
maxCount: Infinity,
supportedFormats: ["image/jpeg", "image/png", "image/gif", "image/webp"]
}
};
async function validateImage(
image: ImageContent,
provider: string
): Promise<void> {
const constraints = PROVIDER_CONSTRAINTS[provider];
// Check MIME type
if (!constraints.supportedFormats.includes(image.mimeType)) {
throw new Error(`Unsupported image format: ${image.mimeType}`);
}
// Check size
const imageBuffer = Buffer.from(image.data, 'base64');
const sizeMB = imageBuffer.length / (1024 * 1024);
if (sizeMB > constraints.maxSizeMB) {
throw new Error(`Image exceeds ${constraints.maxSizeMB}MB limit`);
}
// Could add dimension checks using image processing library
}
```
## Implementation Considerations
1. **Preprocessing**:
- User is responsible for converting images to base64 before passing to API
- Utility functions could be provided for common conversions (file to base64, URL to base64)
- Image optimization (resize/compress) should happen before encoding
2. **Error Handling**:
- Validate MIME types and sizes before sending
- Check model capabilities (via `model.input.includes("image")`)
- Provide clear error messages for unsupported features
3. **Performance**:
- Base64 encoding increases payload size by ~33%
- Consider image compression before encoding
- For Google GenAI, be aware of 20MB total request limit
4. **Token Counting**:
- Images consume tokens (varies by provider and image size)
- Include image token estimates in usage calculations
- Anthropic: ~1 token per ~3-4 bytes of base64 data
- OpenAI: Detailed images consume more tokens than low-detail
5. **Fallback Strategies**:
- If model doesn't support images, throw error or ignore images
- Consider offering text-only fallback for non-vision models

View file

@ -1,56 +0,0 @@
# OpenAI Models
## All Models
- [ ] [GPT-5](https://platform.openai.com/docs/models/gpt-5)
- [ ] [GPT-5 mini](https://platform.openai.com/docs/models/gpt-5-mini)
- [ ] [GPT-5 nano](https://platform.openai.com/docs/models/gpt-5-nano)
- [ ] [o3-deep-research](https://platform.openai.com/docs/models/o3-deep-research)
- [ ] [o4-mini-deep-research](https://platform.openai.com/docs/models/o4-mini-deep-research)
- [ ] [o3-pro](https://platform.openai.com/docs/models/o3-pro)
- [ ] [GPT-4o Audio](https://platform.openai.com/docs/models/gpt-4o-audio-preview)
- [ ] [GPT-4o Realtime](https://platform.openai.com/docs/models/gpt-4o-realtime-preview)
- [ ] [o3](https://platform.openai.com/docs/models/o3)
- [ ] [o4-mini](https://platform.openai.com/docs/models/o4-mini)
- [ ] [GPT-4.1](https://platform.openai.com/docs/models/gpt-4.1)
- [ ] [GPT-4.1 mini](https://platform.openai.com/docs/models/gpt-4.1-mini)
- [ ] [GPT-4.1 nano](https://platform.openai.com/docs/models/gpt-4.1-nano)
- [ ] [o1-pro](https://platform.openai.com/docs/models/o1-pro)
- [ ] [computer-use-preview](https://platform.openai.com/docs/models/computer-use-preview)
- [ ] [GPT-4o mini Search Preview](https://platform.openai.com/docs/models/gpt-4o-mini-search-preview)
- [ ] [GPT-4o Search Preview](https://platform.openai.com/docs/models/gpt-4o-search-preview)
- [ ] [GPT-4.5 Preview (Deprecated)](https://platform.openai.com/docs/models/gpt-4.5-preview)
- [ ] [o3-mini](https://platform.openai.com/docs/models/o3-mini)
- [ ] [GPT-4o mini Audio](https://platform.openai.com/docs/models/gpt-4o-mini-audio-preview)
- [ ] [GPT-4o mini Realtime](https://platform.openai.com/docs/models/gpt-4o-mini-realtime-preview)
- [ ] [o1](https://platform.openai.com/docs/models/o1)
- [ ] [omni-moderation](https://platform.openai.com/docs/models/omni-moderation-latest)
- [ ] [o1-mini](https://platform.openai.com/docs/models/o1-mini)
- [ ] [o1 Preview](https://platform.openai.com/docs/models/o1-preview)
- [ ] [GPT-4o](https://platform.openai.com/docs/models/gpt-4o)
- [ ] [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini)
- [ ] [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo)
- [ ] [babbage-002](https://platform.openai.com/docs/models/babbage-002)
- [ ] [ChatGPT-4o](https://platform.openai.com/docs/models/chatgpt-4o-latest)
- [ ] [codex-mini-latest](https://platform.openai.com/docs/models/codex-mini-latest)
- [ ] [DALL·E 2](https://platform.openai.com/docs/models/dall-e-2)
- [ ] [DALL·E 3](https://platform.openai.com/docs/models/dall-e-3)
- [ ] [davinci-002](https://platform.openai.com/docs/models/davinci-002)
- [ ] [GPT-3.5 Turbo](https://platform.openai.com/docs/models/gpt-3.5-turbo)
- [ ] [GPT-4](https://platform.openai.com/docs/models/gpt-4)
- [ ] [GPT-4 Turbo Preview](https://platform.openai.com/docs/models/gpt-4-turbo-preview)
- [ ] [GPT-4o mini Transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe)
- [ ] [GPT-4o mini TTS](https://platform.openai.com/docs/models/gpt-4o-mini-tts)
- [ ] [GPT-4o Transcribe](https://platform.openai.com/docs/models/gpt-4o-transcribe)
- [ ] [GPT-5 Chat](https://platform.openai.com/docs/models/gpt-5-chat-latest)
- [ ] [GPT Image 1](https://platform.openai.com/docs/models/gpt-image-1)
- [ ] [gpt-oss-120b](https://platform.openai.com/docs/models/gpt-oss-120b)
- [ ] [gpt-oss-20b](https://platform.openai.com/docs/models/gpt-oss-20b)
- [ ] [text-embedding-3-large](https://platform.openai.com/docs/models/text-embedding-3-large)
- [ ] [text-embedding-3-small](https://platform.openai.com/docs/models/text-embedding-3-small)
- [ ] [text-embedding-ada-002](https://platform.openai.com/docs/models/text-embedding-ada-002)
- [ ] [text-moderation](https://platform.openai.com/docs/models/text-moderation-latest)
- [ ] [text-moderation-stable](https://platform.openai.com/docs/models/text-moderation-stable)
- [ ] [TTS-1](https://platform.openai.com/docs/models/tts-1)
- [ ] [TTS-1 HD](https://platform.openai.com/docs/models/tts-1-hd)
- [ ] [Whisper](https://platform.openai.com/docs/models/whisper-1)

File diff suppressed because it is too large Load diff

View file

@ -1,950 +0,0 @@
# Unified AI API Design Plan
Based on comprehensive investigation of OpenAI, Anthropic, and Gemini SDKs with actual implementation examples.
## Key API Differences Summary
### OpenAI
- **Dual APIs**: Chat Completions (broad support) vs Responses API (o1/o3 thinking content)
- **Thinking**: Only Responses API gives actual content, Chat Completions only gives counts
- **Roles**: `system`, `user`, `assistant`, `tool` (o1/o3 use `developer` instead of `system`)
- **Streaming**: Deltas in chunks with `stream_options.include_usage` for token usage
### Anthropic
- **Single API**: Messages API with comprehensive streaming
- **Content Blocks**: Always arrays, even for simple text
- **System**: Separate parameter, not in messages array
- **Tool Use**: Content blocks, not separate message role
- **Thinking**: Explicit budget allocation, appears as content blocks
- **Caching**: Per-block cache control with TTL options
### Gemini
- **Parts System**: All content split into typed parts
- **System**: Separate `systemInstruction` parameter
- **Roles**: Uses `model` instead of `assistant`
- **Thinking**: `part.thought: true` flag identifies reasoning
- **Streaming**: Returns complete responses, not deltas
- **Function Calls**: Embedded in parts array
## Unified API Design
### Core Client
```typescript
interface AIConfig {
provider: 'openai' | 'anthropic' | 'gemini';
apiKey: string;
model: string;
baseURL?: string; // For OpenAI-compatible endpoints
}
interface ModelInfo {
id: string;
name: string;
provider: string;
capabilities: {
reasoning: boolean;
toolCall: boolean;
vision: boolean;
audio?: boolean;
};
cost: {
input: number; // per million tokens
output: number; // per million tokens
cacheRead?: number;
cacheWrite?: number;
};
limits: {
context: number;
output: number;
};
knowledge?: string; // Knowledge cutoff date
}
class AI {
constructor(config: AIConfig);
// Main streaming interface - everything else builds on this
async *stream(request: Request): AsyncGenerator<Event>;
// Convenience method for non-streaming
async complete(request: Request): Promise<Response>;
// Get model information
getModelInfo(): ModelInfo;
// Abort current request
abort(): void;
}
```
### Message Format
```typescript
type Message =
| {
role: 'user';
content: string | Content[];
}
| {
role: 'assistant';
content: string | Content[];
model: string;
usage: TokenUsage;
toolCalls?: {
id: string;
name: string;
arguments: Record<string, any>;
}[];
}
| {
role: 'tool';
content: string | Content[];
toolCallId: string;
};
interface Content {
type: 'text' | 'image';
text?: string;
image?: {
data: string; // base64
mimeType: string;
};
}
```
### Request Format
```typescript
interface Request {
messages: Message[];
// System prompt (separated for Anthropic/Gemini compatibility)
systemPrompt?: string;
// Common parameters
temperature?: number;
maxTokens?: number;
stopSequences?: string[];
// Tools
tools?: {
name: string;
description: string;
parameters: Record<string, any>; // JSON Schema
}[];
toolChoice?: 'auto' | 'none' | 'required' | { name: string };
// Thinking/reasoning
reasoning?: {
enabled: boolean;
effort?: 'low' | 'medium' | 'high'; // OpenAI reasoning_effort
maxTokens?: number; // Anthropic thinking budget
};
// Abort signal
signal?: AbortSignal;
}
```
### Event Stream
```typescript
type Event =
| { type: 'start'; model: string; provider: string }
| { type: 'text'; content: string; delta: string }
| { type: 'thinking'; content: string; delta: string }
| { type: 'toolCall'; toolCall: ToolCall }
| { type: 'usage'; usage: TokenUsage }
| { type: 'done'; reason: StopReason; message: Message } // message includes model and usage
| { type: 'error'; error: Error };
interface TokenUsage {
input: number;
output: number;
total: number;
thinking?: number;
cacheRead?: number;
cacheWrite?: number;
cost?: {
input: number;
output: number;
cache?: number;
total: number;
};
}
type StopReason = 'stop' | 'length' | 'toolUse' | 'safety' | 'error';
```
## Caching Strategy
Caching is handled automatically by each provider adapter:
- **OpenAI**: Automatic prompt caching (no configuration needed)
- **Gemini**: Automatic context caching (no configuration needed)
- **Anthropic**: We automatically add cache_control to the system prompt and older messages
```typescript
class AnthropicAdapter {
private addCaching(messages: Message[]): any[] {
const anthropicMessages = [];
// Automatically cache older messages (assuming incremental context)
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
const isOld = i < messages.length - 2; // Cache all but last 2 messages
// Convert to Anthropic format with automatic caching
const blocks = this.toContentBlocks(msg);
if (isOld && blocks.length > 0) {
blocks[0].cache_control = { type: 'ephemeral' };
}
anthropicMessages.push({
role: msg.role === 'assistant' ? 'assistant' : 'user',
content: blocks
});
}
return anthropicMessages;
}
}
```
## Provider Adapter Implementation
### OpenAI Adapter
```typescript
class OpenAIAdapter {
private client: OpenAI;
private useResponsesAPI: boolean = false;
async *stream(request: Request): AsyncGenerator<Event> {
// Determine which API to use
if (request.reasoning?.enabled && this.isReasoningModel()) {
yield* this.streamResponsesAPI(request);
} else {
yield* this.streamChatCompletions(request);
}
}
private async *streamChatCompletions(request: Request) {
const stream = await this.client.chat.completions.create({
model: this.model,
messages: this.toOpenAIMessages(request),
tools: this.toOpenAITools(request.tools),
reasoning_effort: request.reasoning?.effort,
stream: true,
stream_options: { include_usage: true }
});
let content = '';
let toolCalls: any[] = [];
for await (const chunk of stream) {
if (chunk.choices[0]?.delta?.content) {
const delta = chunk.choices[0].delta.content;
content += delta;
yield { type: 'text', content, delta };
}
if (chunk.choices[0]?.delta?.tool_calls) {
// Accumulate tool calls
this.mergeToolCalls(toolCalls, chunk.choices[0].delta.tool_calls);
for (const tc of toolCalls) {
yield { type: 'toolCall', toolCall: tc, partial: true };
}
}
if (chunk.usage) {
yield {
type: 'usage',
usage: {
input: chunk.usage.prompt_tokens,
output: chunk.usage.completion_tokens,
total: chunk.usage.total_tokens,
thinking: chunk.usage.completion_tokens_details?.reasoning_tokens
}
};
}
}
}
private async *streamResponsesAPI(request: Request) {
// Use Responses API for actual thinking content
const response = await this.client.responses.create({
model: this.model,
input: this.toResponsesInput(request),
tools: this.toResponsesTools(request.tools),
stream: true
});
for await (const event of response) {
if (event.type === 'response.reasoning_text.delta') {
yield {
type: 'thinking',
content: event.text,
delta: event.delta
};
}
// Handle other event types...
}
}
private toOpenAIMessages(request: Request): any[] {
const messages: any[] = [];
// Handle system prompt
if (request.systemPrompt) {
const role = this.isReasoningModel() ? 'developer' : 'system';
messages.push({ role, content: request.systemPrompt });
}
// Convert unified messages
for (const msg of request.messages) {
if (msg.role === 'tool') {
messages.push({
role: 'tool',
content: msg.content,
tool_call_id: msg.toolCallId
});
} else {
messages.push({
role: msg.role,
content: this.contentToString(msg.content),
tool_calls: msg.toolCalls
});
}
}
return messages;
}
}
```
### Anthropic Adapter
```typescript
class AnthropicAdapter {
private client: Anthropic;
async *stream(request: Request): AsyncGenerator<Event> {
const stream = this.client.messages.stream({
model: this.model,
max_tokens: request.maxTokens || 1024,
messages: this.addCaching(request.messages),
system: request.systemPrompt,
tools: this.toAnthropicTools(request.tools),
thinking: request.reasoning?.enabled ? {
type: 'enabled',
budget_tokens: request.reasoning.maxTokens || 2000
} : undefined
});
let content = '';
let thinking = '';
stream.on('text', (delta, snapshot) => {
content = snapshot;
// Note: Can't yield from callback, need different approach
});
stream.on('thinking', (delta, snapshot) => {
thinking = snapshot;
});
// Use raw streaming instead for proper async generator
const rawStream = await this.client.messages.create({
...params,
stream: true
});
for await (const chunk of rawStream) {
switch (chunk.type) {
case 'content_block_delta':
if (chunk.delta.type === 'text_delta') {
content += chunk.delta.text;
yield {
type: 'text',
content,
delta: chunk.delta.text
};
}
break;
case 'message_delta':
if (chunk.usage) {
yield {
type: 'usage',
usage: {
input: chunk.usage.input_tokens,
output: chunk.usage.output_tokens,
total: chunk.usage.input_tokens + chunk.usage.output_tokens,
cacheRead: chunk.usage.cache_read_input_tokens,
cacheWrite: chunk.usage.cache_creation_input_tokens
}
};
}
break;
}
}
}
private toAnthropicMessages(request: Request): any[] {
return request.messages.map(msg => {
if (msg.role === 'tool') {
// Tool results go as user messages with tool_result blocks
return {
role: 'user',
content: [{
type: 'tool_result',
tool_use_id: msg.toolCallId,
content: msg.content
}]
};
}
// Always use content blocks
const blocks: any[] = [];
if (typeof msg.content === 'string') {
blocks.push({
type: 'text',
text: msg.content,
cache_control: msg.cacheControl
});
} else {
// Convert unified content to blocks
for (const part of msg.content) {
if (part.type === 'text') {
blocks.push({ type: 'text', text: part.text });
} else if (part.type === 'image') {
blocks.push({
type: 'image',
source: {
type: 'base64',
media_type: part.image.mimeType,
data: part.image.data
}
});
}
}
}
// Add tool calls as blocks
if (msg.toolCalls) {
for (const tc of msg.toolCalls) {
blocks.push({
type: 'tool_use',
id: tc.id,
name: tc.name,
input: tc.arguments
});
}
}
return {
role: msg.role === 'assistant' ? 'assistant' : 'user',
content: blocks
};
});
}
}
```
### Gemini Adapter
```typescript
class GeminiAdapter {
private client: GoogleGenAI;
async *stream(request: Request): AsyncGenerator<Event> {
const stream = await this.client.models.generateContentStream({
model: this.model,
systemInstruction: request.systemPrompt ? {
parts: [{ text: request.systemPrompt }]
} : undefined,
contents: this.toGeminiContents(request),
tools: this.toGeminiTools(request.tools),
abortSignal: request.signal
});
let content = '';
let thinking = '';
for await (const chunk of stream) {
const candidate = chunk.candidates?.[0];
if (!candidate?.content?.parts) continue;
for (const part of candidate.content.parts) {
if (part.text && !part.thought) {
content += part.text;
yield {
type: 'text',
content,
delta: part.text
};
} else if (part.text && part.thought) {
thinking += part.text;
yield {
type: 'thinking',
content: thinking,
delta: part.text
};
} else if (part.functionCall) {
yield {
type: 'toolCall',
toolCall: {
id: part.functionCall.id || crypto.randomUUID(),
name: part.functionCall.name,
arguments: part.functionCall.args
}
};
}
}
if (chunk.usageMetadata) {
yield {
type: 'usage',
usage: {
input: chunk.usageMetadata.promptTokenCount || 0,
output: chunk.usageMetadata.candidatesTokenCount || 0,
total: chunk.usageMetadata.totalTokenCount || 0,
thinking: chunk.usageMetadata.thoughtsTokenCount,
cacheRead: chunk.usageMetadata.cachedContentTokenCount
}
};
}
}
}
private toGeminiContents(request: Request): any[] {
return request.messages.map(msg => {
const parts: any[] = [];
if (typeof msg.content === 'string') {
parts.push({ text: msg.content });
} else {
for (const part of msg.content) {
if (part.type === 'text') {
parts.push({ text: part.text });
} else if (part.type === 'image') {
parts.push({
inlineData: {
mimeType: part.image.mimeType,
data: part.image.data
}
});
}
}
}
// Add function calls as parts
if (msg.toolCalls) {
for (const tc of msg.toolCalls) {
parts.push({
functionCall: {
name: tc.name,
args: tc.arguments
}
});
}
}
// Add tool results as function responses
if (msg.role === 'tool') {
parts.push({
functionResponse: {
name: msg.toolCallId,
response: { result: msg.content }
}
});
}
return {
role: msg.role === 'assistant' ? 'model' : msg.role === 'tool' ? 'user' : msg.role,
parts
};
});
}
}
```
## Usage Examples
### Basic Streaming
```typescript
const ai = new AI({
provider: 'openai',
apiKey: process.env.OPENAI_API_KEY,
model: 'gpt-4'
});
const stream = ai.stream({
messages: [
{ role: 'user', content: 'Write a haiku about coding' }
],
systemPrompt: 'You are a poetic programmer'
});
for await (const event of stream) {
switch (event.type) {
case 'text':
process.stdout.write(event.delta);
break;
case 'usage':
console.log(`\nTokens: ${event.usage.total}`);
break;
case 'done':
console.log(`\nFinished: ${event.reason}`);
break;
}
}
```
### Cross-Provider Tool Calling
```typescript
async function callWithTools(provider: 'openai' | 'anthropic' | 'gemini') {
const ai = new AI({
provider,
apiKey: process.env[`${provider.toUpperCase()}_API_KEY`],
model: getDefaultModel(provider)
});
const messages: Message[] = [{
role: 'user',
content: 'What is the weather in Paris and calculate 15 * 23?'
}];
const stream = ai.stream({
messages,
tools: [
{
name: 'weather',
description: 'Get weather for a location',
parameters: {
type: 'object',
properties: {
location: { type: 'string' }
},
required: ['location']
}
},
{
name: 'calculator',
description: 'Calculate math expressions',
parameters: {
type: 'object',
properties: {
expression: { type: 'string' }
},
required: ['expression']
}
}
]
});
const toolCalls: any[] = [];
for await (const event of stream) {
if (event.type === 'toolCall') {
toolCalls.push(event.toolCall);
// Execute tool
const result = await executeToolCall(event.toolCall);
// Add tool result to conversation
messages.push({
role: 'assistant',
toolCalls: [event.toolCall]
});
messages.push({
role: 'tool',
content: JSON.stringify(result),
toolCallId: event.toolCall.id
});
}
}
// Continue conversation with tool results
if (toolCalls.length > 0) {
const finalStream = ai.stream({ messages });
for await (const event of finalStream) {
if (event.type === 'text') {
process.stdout.write(event.delta);
}
}
}
}
```
### Thinking/Reasoning
```typescript
async function withThinking() {
// OpenAI o1
const openai = new AI({
provider: 'openai',
model: 'o1-preview'
});
// Anthropic Claude
const anthropic = new AI({
provider: 'anthropic',
model: 'claude-3-opus-20240229'
});
// Gemini thinking model
const gemini = new AI({
provider: 'gemini',
model: 'gemini-2.0-flash-thinking-exp-1219'
});
for (const ai of [openai, anthropic, gemini]) {
const stream = ai.stream({
messages: [{
role: 'user',
content: 'Solve this step by step: If a tree falls in a forest...'
}],
reasoning: {
enabled: true,
effort: 'high', // OpenAI reasoning_effort
maxTokens: 2000 // Anthropic budget
}
});
for await (const event of stream) {
if (event.type === 'thinking') {
console.log('[THINKING]', event.delta);
} else if (event.type === 'text') {
console.log('[RESPONSE]', event.delta);
} else if (event.type === 'done') {
// Final message includes model and usage with cost
console.log('Model:', event.message.model);
console.log('Tokens:', event.message.usage?.total);
console.log('Cost: $', event.message.usage?.cost?.total);
}
}
}
}
```
## Implementation Notes
### Critical Decisions
1. **Streaming First**: All providers support streaming, non-streaming is just collected events
2. **Unified Events**: Same event types across all providers for consistent handling
3. **Separate System Prompt**: Required for Anthropic/Gemini compatibility
4. **Tool Role**: Unified way to handle tool responses across providers
5. **Content Arrays**: Support both string and structured content
6. **Thinking Extraction**: Normalize reasoning across different provider formats
### Provider-Specific Handling
**OpenAI**:
- Choose between Chat Completions and Responses API based on model and thinking needs
- Map `developer` role for o1/o3 models
- Handle streaming tool call deltas
**Anthropic**:
- Convert to content blocks (always arrays)
- Tool results as user messages with tool_result blocks
- Handle MessageStream events or raw streaming
**Gemini**:
- Convert to parts system
- Extract thinking from `part.thought` flag
- Map `assistant` to `model` role
- Handle function calls/responses in parts
### Error Handling
```typescript
class AIError extends Error {
constructor(
message: string,
public code: string,
public provider: string,
public retryable: boolean,
public statusCode?: number
) {
super(message);
}
}
// In adapters
try {
// API call
} catch (error) {
if (error instanceof RateLimitError) {
throw new AIError(
'Rate limit exceeded',
'rate_limit',
this.provider,
true,
429
);
}
// Map other errors...
}
```
## Model Information & Cost Tracking
### Models Database
We cache the models.dev API data at build time for fast, offline access:
```typescript
// scripts/update-models.ts - Run during build or manually
async function updateModels() {
const response = await fetch('https://models.dev/api.json');
const data = await response.json();
// Transform to our format
const models: ModelsDatabase = transformModelsData(data);
// Generate TypeScript file
const content = `// Auto-generated from models.dev API
// Last updated: ${new Date().toISOString()}
// Run 'npm run update-models' to refresh
export const MODELS_DATABASE: ModelsDatabase = ${JSON.stringify(models, null, 2)};
`;
await fs.writeFile('src/models-data.ts', content);
}
// src/models.ts - Runtime model lookup
import { MODELS_DATABASE } from './models-data.js';
// Simple lookup with fallback
export function getModelInfo(provider: string, model: string): ModelInfo {
const info = MODELS_DATABASE.providers[provider]?.models[model];
if (!info) {
// Fallback for unknown models
return {
id: model,
name: model,
provider,
capabilities: {
reasoning: false,
toolCall: true,
vision: false
},
cost: { input: 0, output: 0 },
limits: { context: 128000, output: 4096 }
};
}
return info;
}
// Optional: Runtime override for testing new models
const runtimeOverrides = new Map<string, ModelInfo>();
export function registerModel(provider: string, model: string, info: ModelInfo) {
runtimeOverrides.set(`${provider}:${model}`, info);
}
```
### Cost Calculation
```typescript
class CostTracker {
private usage: TokenUsage = {
input: 0,
output: 0,
total: 0,
cacheRead: 0,
cacheWrite: 0
};
private modelInfo: ModelInfo;
constructor(modelInfo: ModelInfo) {
this.modelInfo = modelInfo;
}
addUsage(tokens: Partial<TokenUsage>): TokenUsage {
this.usage.input += tokens.input || 0;
this.usage.output += tokens.output || 0;
this.usage.thinking += tokens.thinking || 0;
this.usage.cacheRead += tokens.cacheRead || 0;
this.usage.cacheWrite += tokens.cacheWrite || 0;
this.usage.total = this.usage.input + this.usage.output + (this.usage.thinking || 0);
// Calculate costs (per million tokens)
const cost = this.modelInfo.cost;
this.usage.cost = {
input: (this.usage.input / 1_000_000) * cost.input,
output: (this.usage.output / 1_000_000) * cost.output,
cache:
((this.usage.cacheRead || 0) / 1_000_000) * (cost.cacheRead || 0) +
((this.usage.cacheWrite || 0) / 1_000_000) * (cost.cacheWrite || 0),
total: 0
};
this.usage.cost.total =
this.usage.cost.input +
this.usage.cost.output +
this.usage.cost.cache;
return { ...this.usage };
}
getTotalCost(): number {
return this.usage.cost?.total || 0;
}
getUsageSummary(): string {
return `Tokens: ${this.usage.total} (${this.usage.input}→${this.usage.output}) | Cost: $${this.getTotalCost().toFixed(4)}`;
}
}
```
### Integration in Adapters
```typescript
class OpenAIAdapter {
private costTracker: CostTracker;
constructor(config: AIConfig) {
const modelInfo = getModelInfo('openai', config.model);
this.costTracker = new CostTracker(modelInfo);
}
async *stream(request: Request): AsyncGenerator<Event> {
// ... streaming logic ...
if (chunk.usage) {
const usage = this.costTracker.addUsage({
input: chunk.usage.prompt_tokens,
output: chunk.usage.completion_tokens,
thinking: chunk.usage.completion_tokens_details?.reasoning_tokens,
cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens
});
yield { type: 'usage', usage };
}
}
}
```
## Next Steps
1. Create models.ts with models.dev integration
2. Implement base `AI` class with adapter pattern
3. Create three provider adapters with full streaming support
4. Add comprehensive error mapping
5. Implement token counting and cost tracking
6. Add test suite for each provider
7. Create migration guide from native SDKs