diff --git a/packages/ai/docs/anthropic-api.md b/packages/ai/docs/anthropic-api.md deleted file mode 100644 index 7ede2cb2..00000000 --- a/packages/ai/docs/anthropic-api.md +++ /dev/null @@ -1,1706 +0,0 @@ -# Anthropic SDK Implementation Guide - -This document provides a comprehensive guide for implementing the required features using the Anthropic SDK. All examples use TypeScript and include actual code that works with the SDK. - -## Table of Contents - -1. [Basic Client Setup](#basic-client-setup) -2. [Streaming Responses](#streaming-responses) -3. [Request Abortion](#request-abortion) -4. [Error Handling](#error-handling) -5. [Stop Reasons](#stop-reasons) -6. [Context and Message History](#context-and-message-history) -7. [Token Counting](#token-counting) -8. [Prompt Caching](#prompt-caching) -9. [Tool Use (Function Calling)](#tool-use-function-calling) -10. [System Prompts](#system-prompts) -11. [Content Block System](#content-block-system) -12. [MessageStream Helper Class](#messagestream-helper-class) -13. [Thinking Tokens and Extended Reasoning](#thinking-tokens-and-extended-reasoning) -14. [Complete Implementation Example](#complete-implementation-example) - -## Basic Client Setup - -```typescript -import Anthropic from '@anthropic-ai/sdk'; - -// Create client with configuration -const anthropic = new Anthropic({ - apiKey: process.env.ANTHROPIC_API_KEY, // Required - baseURL: 'https://api.anthropic.com', // Optional, this is the default - timeout: 60000, // Optional, in milliseconds - maxRetries: 3, // Optional, default is 2 -}); -``` - -### Environment Variables - -The SDK automatically reads from these environment variables: -- `ANTHROPIC_API_KEY` - Your API key -- `ANTHROPIC_BASE_URL` - Custom base URL (optional) - -## Streaming Responses - -### Basic Streaming with MessageStream - -```typescript -import { MessageStream } from '@anthropic-ai/sdk/lib/MessageStream'; - -async function basicStream() { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Hello, Claude!' }], - }); - - // Listen to different event types - stream.on('text', (text, snapshot) => { - process.stdout.write(text); // text is the delta, snapshot is accumulated - }); - - stream.on('message', (message) => { - console.log('\nFinal message:', message); - }); - - stream.on('error', (error) => { - console.error('Error:', error); - }); - - // Wait for completion - const finalMessage = await stream.finalMessage(); - return finalMessage; -} -``` - -### Raw Streaming with create() - -```typescript -import { RawMessageStreamEvent } from '@anthropic-ai/sdk'; - -async function rawStreaming() { - const stream = await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Hello!' }], - stream: true, - }); - - let content = ''; - let usage: any = null; - - for await (const chunk of stream) { - switch (chunk.type) { - case 'message_start': - console.log('Message started:', chunk.message); - break; - - case 'content_block_delta': - if (chunk.delta.type === 'text_delta') { - content += chunk.delta.text; - process.stdout.write(chunk.delta.text); - } - break; - - case 'message_delta': - if (chunk.usage) { - usage = chunk.usage; - } - console.log('\nStop reason:', chunk.delta.stop_reason); - break; - - case 'message_stop': - console.log('\nStream ended'); - break; - } - } - - return { content, usage }; -} -``` - -### Handling Thinking Tokens in Streams - -```typescript -async function streamWithThinking() { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 4000, - thinking: { - type: 'enabled', - budget_tokens: 2000, - }, - messages: [{ role: 'user', content: 'Solve this complex math problem: ...' }], - }); - - stream.on('thinking', (thinking, snapshot) => { - console.log('[Thinking]', thinking); // Delta thinking content - }); - - stream.on('text', (text, snapshot) => { - process.stdout.write(text); // Regular response text - }); - - const message = await stream.finalMessage(); - - // Access thinking content from final message - for (const block of message.content) { - if (block.type === 'thinking') { - console.log('Final thinking:', block.thinking); - } - } -} -``` - -## Request Abortion - -### AbortController Integration - -```typescript -async function abortableRequest() { - const controller = new AbortController(); - - // Abort after 5 seconds - const timeoutId = setTimeout(() => controller.abort(), 5000); - - try { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Long task...' }], - }, { - // Pass abort signal in request options - signal: controller.signal, - }); - - stream.on('error', (error) => { - if (error.name === 'AbortError') { - console.log('Request was aborted'); - } else { - console.error('Other error:', error); - } - }); - - const result = await stream.finalMessage(); - clearTimeout(timeoutId); - return result; - - } catch (error) { - clearTimeout(timeoutId); - - if (error.name === 'AbortError') { - console.log('Request aborted by user'); - } else { - throw error; - } - } -} - -// Manual abort from MessageStream -async function manualAbort() { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Start a story...' }], - }); - - // Abort after receiving some content - stream.on('text', (text, snapshot) => { - if (snapshot.length > 100) { - stream.abort(); // Built-in abort method - } - }); - - try { - await stream.finalMessage(); - } catch (error) { - if (stream.aborted) { - console.log('Stream was manually aborted'); - } - } -} -``` - -## Error Handling - -### Comprehensive Error Types - -```typescript -import { - AnthropicError, - APIError, - APIConnectionError, - APIConnectionTimeoutError, - APIUserAbortError, - NotFoundError, - ConflictError, - RateLimitError, - BadRequestError, - AuthenticationError, - InternalServerError, - PermissionDeniedError, - UnprocessableEntityError, -} from '@anthropic-ai/sdk'; - -async function handleErrors() { - try { - const message = await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Hello!' }], - }); - - return message; - - } catch (error) { - // Handle specific error types - if (error instanceof RateLimitError) { - console.error('Rate limit exceeded:', { - status: error.status, - headers: error.headers, - retryAfter: error.headers.get('retry-after'), - }); - - // Wait and retry logic - const retryAfter = parseInt(error.headers.get('retry-after') || '60'); - await new Promise(resolve => setTimeout(resolve, retryAfter * 1000)); - - } else if (error instanceof AuthenticationError) { - console.error('Authentication failed:', error.status); - throw new Error('Invalid API key'); - - } else if (error instanceof BadRequestError) { - console.error('Bad request:', { - status: error.status, - error: error.error, - message: error.message, - }); - - } else if (error instanceof APIConnectionTimeoutError) { - console.error('Request timed out'); - // Retry with longer timeout - - } else if (error instanceof APIConnectionError) { - console.error('Network error:', error.message); - // Retry with backoff - - } else if (error instanceof APIUserAbortError) { - console.log('Request was aborted by user'); - - } else if (error instanceof InternalServerError) { - console.error('Server error:', error.status); - // Retry with exponential backoff - - } else if (error instanceof APIError) { - console.error('API error:', { - status: error.status, - error: error.error, - requestId: error.requestID, - }); - - } else { - console.error('Unexpected error:', error); - throw error; - } - } -} - -// Error handling in streams -function handleStreamErrors() { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Hello!' }], - }); - - stream.on('error', (error) => { - if (error instanceof RateLimitError) { - console.log('Rate limited during stream'); - } else if (error instanceof APIConnectionError) { - console.log('Connection lost during stream'); - } else { - console.error('Stream error:', error); - } - }); - - return stream; -} -``` - -## Stop Reasons - -### Understanding Stop Reasons - -```typescript -import { StopReason } from '@anthropic-ai/sdk'; - -async function handleStopReasons() { - const message = await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 100, // Intentionally low to trigger max_tokens - messages: [{ role: 'user', content: 'Write a long story...' }], - stop_sequences: ['THE END'], // Custom stop sequence - }); - - // Extract and handle stop reason - const stopReason: StopReason = message.stop_reason; - - switch (stopReason) { - case 'end_turn': - console.log('Model completed naturally'); - break; - - case 'max_tokens': - console.log('Hit token limit, response may be incomplete'); - // Consider continuing with a follow-up request - break; - - case 'stop_sequence': - console.log('Hit custom stop sequence:', message.stop_sequence); - break; - - case 'tool_use': - console.log('Model wants to use tools'); - // Handle tool calls (see Tool Use section) - break; - - case 'pause_turn': - console.log('Long turn paused, can continue'); - // Continue with the partial response as context - break; - - case 'refusal': - console.log('Model refused to respond due to safety'); - break; - - default: - console.log('Unknown stop reason:', stopReason); - } - - return { message, stopReason }; -} - -// In streaming mode -function handleStopReasonsInStream() { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Hello!' }], - }); - - stream.on('message', (message) => { - const stopReason = message.stop_reason; - console.log('Final stop reason:', stopReason); - - if (stopReason === 'max_tokens') { - console.log('Response was truncated'); - } - }); - - return stream; -} -``` - -## Context and Message History - -### Message Format and Serialization - -```typescript -import { MessageParam, Message } from '@anthropic-ai/sdk'; - -interface ConversationState { - messages: MessageParam[]; - totalTokens: number; - model: string; - systemPrompt?: string; -} - -class ConversationManager { - private state: ConversationState; - - constructor(model: string, systemPrompt?: string) { - this.state = { - messages: [], - totalTokens: 0, - model, - systemPrompt, - }; - } - - // Add user message - addUserMessage(content: string | any[]) { - this.state.messages.push({ - role: 'user', - content, - }); - } - - // Add assistant message from API response - addAssistantMessage(message: Message) { - this.state.messages.push({ - role: 'assistant', - content: message.content, - }); - - // Update token count - this.state.totalTokens += message.usage.input_tokens + message.usage.output_tokens; - } - - // Add tool results - addToolResult(toolUseId: string, result: string, isError = false) { - // Find the last message and ensure it has tool use - const lastMessage = this.state.messages[this.state.messages.length - 1]; - if (lastMessage?.role === 'assistant') { - // Add tool result as new user message - this.state.messages.push({ - role: 'user', - content: [{ - type: 'tool_result', - tool_use_id: toolUseId, - content: result, - is_error: isError, - }], - }); - } - } - - // Get messages for API call - getMessages(): MessageParam[] { - return [...this.state.messages]; - } - - // Serialize for persistence - serialize(): string { - return JSON.stringify(this.state); - } - - // Deserialize from storage - static deserialize(json: string): ConversationManager { - const state = JSON.parse(json); - const manager = new ConversationManager(state.model, state.systemPrompt); - manager.state = state; - return manager; - } - - // Create request parameters - createRequestParams(newMessage?: string): any { - if (newMessage) { - this.addUserMessage(newMessage); - } - - const params: any = { - model: this.state.model, - max_tokens: 4000, - messages: this.getMessages(), - }; - - if (this.state.systemPrompt) { - params.system = this.state.systemPrompt; - } - - return params; - } - - // Get conversation stats - getStats() { - return { - messageCount: this.state.messages.length, - totalTokens: this.state.totalTokens, - userMessages: this.state.messages.filter(m => m.role === 'user').length, - assistantMessages: this.state.messages.filter(m => m.role === 'assistant').length, - }; - } -} - -// Usage example -async function conversationExample() { - const conversation = new ConversationManager( - 'claude-sonnet-4-20250514', - 'You are a helpful coding assistant.' - ); - - // First exchange - const params1 = conversation.createRequestParams('Hello, can you help me with Python?'); - const response1 = await anthropic.messages.create(params1); - conversation.addAssistantMessage(response1); - - // Second exchange - const params2 = conversation.createRequestParams('Show me a simple function.'); - const response2 = await anthropic.messages.create(params2); - conversation.addAssistantMessage(response2); - - // Save conversation - const saved = conversation.serialize(); - localStorage.setItem('conversation', saved); - - // Later: restore conversation - const restored = ConversationManager.deserialize(saved); - console.log('Conversation stats:', restored.getStats()); -} -``` - -## Token Counting - -### Using the Count Tokens API - -```typescript -import { MessageCountTokensParams, MessageTokensCount } from '@anthropic-ai/sdk'; - -async function countTokens() { - const messages = [ - { role: 'user', content: 'Hello, how are you?' }, - { role: 'assistant', content: 'I am doing well, thank you for asking!' }, - { role: 'user', content: 'Can you help me write some code?' }, - ] as const; - - // Count tokens for messages - const tokenCount: MessageTokensCount = await anthropic.messages.countTokens({ - model: 'claude-sonnet-4-20250514', - messages, - system: 'You are a helpful coding assistant.', - }); - - console.log('Input tokens:', tokenCount.input_tokens); - return tokenCount.input_tokens; -} - -// Count tokens with tools -async function countTokensWithTools() { - const tools = [ - { - name: 'calculator', - description: 'Perform mathematical calculations', - input_schema: { - type: 'object', - properties: { - expression: { type: 'string' }, - }, - required: ['expression'], - }, - }, - ]; - - const tokenCount = await anthropic.messages.countTokens({ - model: 'claude-sonnet-4-20250514', - messages: [{ role: 'user', content: 'Calculate 2+2' }], - tools, - }); - - return tokenCount.input_tokens; -} - -// Extract usage from responses -function extractUsageFromResponse(message: Message) { - const usage = message.usage; - - return { - inputTokens: usage.input_tokens, - outputTokens: usage.output_tokens, - cacheReadTokens: usage.cache_read_input_tokens || 0, - cacheWriteTokens: usage.cache_creation_input_tokens || 0, - totalTokens: usage.input_tokens + usage.output_tokens, - serviceTier: usage.service_tier, - cacheCreation: usage.cache_creation, - }; -} - -// Token usage in streaming -function trackTokensInStream() { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Hello!' }], - }); - - let finalUsage: any = null; - - stream.on('message', (message) => { - finalUsage = extractUsageFromResponse(message); - console.log('Final usage:', finalUsage); - }); - - return stream; -} -``` - -## Prompt Caching - -### Basic Caching Implementation - -```typescript -import { CacheControlEphemeral } from '@anthropic-ai/sdk'; - -async function usePromptCaching() { - // Cache control for system prompt - const systemPrompt = [ - { - type: 'text', - text: 'You are an expert software engineer with deep knowledge of...', - cache_control: { type: 'ephemeral', ttl: '1h' } as CacheControlEphemeral, - }, - ]; - - // Cache control for large document - const messages = [ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Here is a large codebase to analyze:', - }, - { - type: 'document', - source: { - type: 'text', - data: '// Large codebase content...', - media_type: 'text/plain', - }, - cache_control: { type: 'ephemeral', ttl: '1h' } as CacheControlEphemeral, - }, - { - type: 'text', - text: 'Please analyze this code for bugs.', - }, - ], - }, - ] as const; - - const response = await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - system: systemPrompt, - messages, - }); - - // Check cache usage - const usage = response.usage; - console.log('Cache read tokens:', usage.cache_read_input_tokens); - console.log('Cache write tokens:', usage.cache_creation_input_tokens); - - return response; -} - -// Caching with different TTL options -async function cachingWithTTL() { - const shortCache = { - type: 'ephemeral', - ttl: '5m', // 5 minutes - } as CacheControlEphemeral; - - const longCache = { - type: 'ephemeral', - ttl: '1h', // 1 hour (default) - } as CacheControlEphemeral; - - const messages = [ - { - role: 'user', - content: [ - { - type: 'text', - text: 'Short-lived context', - cache_control: shortCache, - }, - { - type: 'text', - text: 'Long-lived context that should be cached longer', - cache_control: longCache, - }, - { - type: 'text', - text: 'What can you tell me about this?', - }, - ], - }, - ] as const; - - return await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages, - }); -} -``` - -## Tool Use (Function Calling) - -### Complete Tool Implementation - -```typescript -import { Tool, ToolUseBlock, ToolChoice } from '@anthropic-ai/sdk'; - -// Define tools -const tools: Tool[] = [ - { - name: 'calculator', - description: 'Perform mathematical calculations', - input_schema: { - type: 'object', - properties: { - expression: { - type: 'string', - description: 'Mathematical expression to evaluate', - }, - }, - required: ['expression'], - }, - }, - { - name: 'weather', - description: 'Get weather information for a location', - input_schema: { - type: 'object', - properties: { - location: { - type: 'string', - description: 'City name or coordinates', - }, - units: { - type: 'string', - enum: ['celsius', 'fahrenheit'], - description: 'Temperature units', - }, - }, - required: ['location'], - }, - }, -]; - -// Tool implementations -const toolImplementations = { - calculator: (args: { expression: string }) => { - try { - // Simple eval - in production, use a safe math parser - const result = eval(args.expression); - return `Result: ${result}`; - } catch (error) { - return `Error: Invalid expression - ${error.message}`; - } - }, - - weather: async (args: { location: string; units?: string }) => { - // Mock weather API call - return `Weather in ${args.location}: 22°C, sunny with light clouds`; - }, -}; - -async function toolUseExample() { - const conversation = new ConversationManager('claude-sonnet-4-20250514'); - - // Send initial message with tools - conversation.addUserMessage('What is 15 * 23 and what is the weather in Paris?'); - - const response = await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: conversation.getMessages(), - tools, - tool_choice: { type: 'auto' } as ToolChoice, - }); - - conversation.addAssistantMessage(response); - - // Handle tool calls - const toolCalls: ToolUseBlock[] = response.content.filter( - (block): block is ToolUseBlock => block.type === 'tool_use' - ); - - // Execute each tool call - for (const toolCall of toolCalls) { - const toolName = toolCall.name; - const toolArgs = toolCall.input; - const toolId = toolCall.id; - - console.log(`Executing tool: ${toolName} with args:`, toolArgs); - - try { - let result: string; - - if (toolName in toolImplementations) { - result = await toolImplementations[toolName](toolArgs as any); - } else { - result = `Error: Unknown tool "${toolName}"`; - } - - // Add tool result to conversation - conversation.addToolResult(toolId, result); - - } catch (error) { - // Add error result - conversation.addToolResult(toolId, `Error: ${error.message}`, true); - } - } - - // Get final response after tool execution - if (toolCalls.length > 0) { - const finalResponse = await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: conversation.getMessages(), - tools, - }); - - conversation.addAssistantMessage(finalResponse); - return finalResponse; - } - - return response; -} - -// Streaming with tools -async function streamingWithTools() { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'Calculate 42 * 17' }], - tools, - }); - - const toolCalls: ToolUseBlock[] = []; - - stream.on('contentBlock', (block) => { - if (block.type === 'tool_use') { - toolCalls.push(block); - } - }); - - stream.on('message', async (message) => { - if (message.stop_reason === 'tool_use') { - console.log('Tool calls detected:', toolCalls); - // Handle tools... - } - }); - - return stream; -} - -// Force specific tool usage -async function forceToolUsage() { - return await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: 'I need to do some math' }], - tools, - tool_choice: { - type: 'tool', - name: 'calculator', - } as ToolChoice, - }); -} -``` - -## System Prompts - -### System Prompt Variations - -```typescript -// Simple string system prompt -async function basicSystemPrompt() { - return await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - system: 'You are a helpful coding assistant specialized in Python.', - messages: [{ role: 'user', content: 'Help me write a function' }], - }); -} - -// Complex system prompt with caching -async function complexSystemPrompt() { - const systemPrompt = [ - { - type: 'text', - text: `You are an expert software engineer with the following expertise: - -1. Python development and best practices -2. Web frameworks like Django and FastAPI -3. Database design and optimization -4. Testing strategies and TDD -5. Code review and refactoring - -Guidelines for your responses: -- Always write clean, readable code -- Include proper error handling -- Add type hints when using Python -- Explain your reasoning -- Suggest improvements when applicable - -When reviewing code: -- Focus on functionality, performance, and maintainability -- Point out potential bugs or edge cases -- Suggest more pythonic approaches when relevant`, - cache_control: { type: 'ephemeral', ttl: '1h' }, - }, - ] as const; - - return await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - system: systemPrompt, - messages: [{ role: 'user', content: 'Review this Python function for me' }], - }); -} - -// Dynamic system prompt based on context -function buildSystemPrompt(userRole: string, expertise: string[]): string { - const basePrompt = `You are an AI assistant helping a ${userRole}.`; - - const expertisePrompt = expertise.length > 0 - ? `\n\nYour areas of expertise include: ${expertise.join(', ')}.` - : ''; - - const guidelines = ` - -Guidelines: -- Be helpful and accurate -- Explain complex concepts clearly -- Provide practical examples -- Ask for clarification when needed`; - - return basePrompt + expertisePrompt + guidelines; -} - -async function dynamicSystemPrompt() { - const systemPrompt = buildSystemPrompt('software developer', [ - 'JavaScript', 'TypeScript', 'React', 'Node.js' - ]); - - return await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - system: systemPrompt, - messages: [{ role: 'user', content: 'Help me optimize this React component' }], - }); -} -``` - -## Content Block System - -### Understanding Content Blocks - -The Anthropic API uses a content block system where message content is always an array, even for simple text. - -```typescript -import { - ContentBlockParam, - TextBlockParam, - ImageBlockParam, - DocumentBlockParam, - ToolUseBlockParam, - ToolResultBlockParam -} from '@anthropic-ai/sdk'; - -// Text content (most common) -const textContent: TextBlockParam = { - type: 'text', - text: 'Hello, Claude!', -}; - -// Image content -const imageContent: ImageBlockParam = { - type: 'image', - source: { - type: 'base64', - media_type: 'image/jpeg', - data: '/9j/4AAQSkZJRg...', // base64 encoded image - }, -}; - -// Document content with caching -const documentContent: DocumentBlockParam = { - type: 'document', - source: { - type: 'text', - data: 'Large document content...', - media_type: 'text/plain', - }, - cache_control: { type: 'ephemeral', ttl: '1h' }, - title: 'Important Document', - context: 'This document contains key information for the project', -}; - -// Tool use block (from assistant) -const toolUseContent: ToolUseBlockParam = { - type: 'tool_use', - id: 'tool_123', - name: 'calculator', - input: { expression: '2 + 2' }, -}; - -// Tool result block (from user) -const toolResultContent: ToolResultBlockParam = { - type: 'tool_result', - tool_use_id: 'tool_123', - content: 'Result: 4', -}; - -// Mixed content message -async function mixedContentExample() { - const mixedMessage: ContentBlockParam[] = [ - { - type: 'text', - text: 'Here is an image and a document to analyze:', - }, - { - type: 'image', - source: { - type: 'base64', - media_type: 'image/png', - data: 'iVBORw0KGgoAAAANSUhEUgA...', // base64 image - }, - }, - { - type: 'document', - source: { - type: 'text', - data: 'Document content here...', - media_type: 'text/plain', - }, - title: 'Analysis Document', - }, - { - type: 'text', - text: 'What insights can you provide from these?', - }, - ]; - - return await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - messages: [{ role: 'user', content: mixedMessage }], - }); -} - -// Helper functions for content manipulation -function createTextBlock(text: string, cached = false): TextBlockParam { - const block: TextBlockParam = { - type: 'text', - text, - }; - - if (cached) { - block.cache_control = { type: 'ephemeral', ttl: '1h' }; - } - - return block; -} - -function createImageBlock(base64Data: string, mimeType: string): ImageBlockParam { - return { - type: 'image', - source: { - type: 'base64', - media_type: mimeType as any, - data: base64Data, - }, - }; -} - -// Extract text from response content blocks -function extractTextFromResponse(content: any[]): string { - return content - .filter(block => block.type === 'text') - .map(block => block.text) - .join('\n'); -} - -// Extract thinking content -function extractThinkingFromResponse(content: any[]): string | null { - const thinkingBlock = content.find(block => block.type === 'thinking'); - return thinkingBlock?.thinking || null; -} -``` - -## MessageStream Helper Class - -### Advanced MessageStream Usage - -```typescript -import { MessageStream, MessageStreamEvents } from '@anthropic-ai/sdk/lib/MessageStream'; - -class AdvancedMessageHandler { - private stream: MessageStream; - private content = ''; - private thinking = ''; - private toolCalls: any[] = []; - private citations: any[] = []; - - constructor(stream: MessageStream) { - this.stream = stream; - this.setupEventHandlers(); - } - - private setupEventHandlers() { - // Connection established - this.stream.on('connect', () => { - console.log('Stream connected'); - }); - - // Text content (delta and snapshot) - this.stream.on('text', (delta: string, snapshot: string) => { - process.stdout.write(delta); - this.content = snapshot; - }); - - // Thinking content (Claude's internal reasoning) - this.stream.on('thinking', (delta: string, snapshot: string) => { - console.log('[Thinking]', delta); - this.thinking = snapshot; - }); - - // Citations (when referencing documents) - this.stream.on('citation', (citation, citations) => { - console.log('Citation:', citation); - this.citations = citations; - }); - - // Content blocks (including tool calls) - this.stream.on('contentBlock', (block) => { - if (block.type === 'tool_use') { - console.log('Tool call:', block); - this.toolCalls.push(block); - } - }); - - // Raw stream events - this.stream.on('streamEvent', (event, snapshot) => { - // Handle any stream event - console.log('Stream event:', event.type); - }); - - // Final message - this.stream.on('finalMessage', (message) => { - console.log('\nFinal message received'); - this.handleFinalMessage(message); - }); - - // Error handling - this.stream.on('error', (error) => { - console.error('Stream error:', error); - }); - - // Stream end - this.stream.on('end', () => { - console.log('\nStream ended'); - }); - - // User abort - this.stream.on('abort', (error) => { - console.log('Stream aborted by user'); - }); - } - - private handleFinalMessage(message: any) { - console.log('Stop reason:', message.stop_reason); - console.log('Token usage:', message.usage); - - // Process thinking content if available - for (const block of message.content) { - if (block.type === 'thinking') { - console.log('Final thinking content:', block.thinking); - } - } - } - - async waitForCompletion() { - try { - const finalMessage = await this.stream.finalMessage(); - return { - message: finalMessage, - content: this.content, - thinking: this.thinking, - toolCalls: this.toolCalls, - citations: this.citations, - }; - } catch (error) { - if (this.stream.aborted) { - console.log('Stream was aborted'); - } else { - throw error; - } - } - } - - abort() { - this.stream.abort(); - } - - // Get request ID for debugging - getRequestId() { - return this.stream.request_id; - } - - // Access the underlying Response object - async getResponse() { - const { response } = await this.stream.withResponse(); - return response; - } -} - -// Usage example -async function advancedStreamExample() { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 2000, - thinking: { - type: 'enabled', - budget_tokens: 1000, - }, - messages: [{ - role: 'user', - content: 'Analyze this complex problem and show your reasoning...' - }], - }); - - const handler = new AdvancedMessageHandler(stream); - - // Optional: abort after 30 seconds - const timeoutId = setTimeout(() => { - handler.abort(); - }, 30000); - - try { - const result = await handler.waitForCompletion(); - clearTimeout(timeoutId); - - console.log('Final result:', { - contentLength: result.content.length, - thinkingLength: result.thinking.length, - toolCallCount: result.toolCalls.length, - citationCount: result.citations.length, - }); - - return result; - } catch (error) { - clearTimeout(timeoutId); - throw error; - } -} -``` - -## Thinking Tokens and Extended Reasoning - -### Enabling Extended Thinking - -```typescript -async function extendedThinkingExample() { - const response = await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 4000, - thinking: { - type: 'enabled', - budget_tokens: 2000, // Minimum 1024, must be < max_tokens - }, - messages: [{ - role: 'user', - content: `Solve this complex problem step by step: - -A company has 3 factories. Factory A produces 100 units/day, -Factory B produces 150 units/day, and Factory C produces 200 units/day. -If the company needs to fulfill an order of 10,000 units in the most -cost-efficient way, and the costs per unit are $5, $4, and $6 respectively, -what's the optimal production strategy?` - }], - }); - - // Extract thinking content - for (const block of response.content) { - if (block.type === 'thinking') { - console.log('Claude\'s thinking process:'); - console.log(block.thinking); - console.log('Signature:', block.signature); - } else if (block.type === 'text') { - console.log('\nFinal answer:'); - console.log(block.text); - } - } - - return response; -} - -// Disable thinking -async function disableThinking() { - return await anthropic.messages.create({ - model: 'claude-sonnet-4-20250514', - max_tokens: 1024, - thinking: { - type: 'disabled', - }, - messages: [{ role: 'user', content: 'Quick answer please' }], - }); -} - -// Streaming with thinking -async function streamThinking() { - const stream = anthropic.messages.stream({ - model: 'claude-sonnet-4-20250514', - max_tokens: 3000, - thinking: { - type: 'enabled', - budget_tokens: 1500, - }, - messages: [{ - role: 'user', - content: 'Think through this carefully: How would you design a distributed cache?' - }], - }); - - let thinkingContent = ''; - let responseContent = ''; - - stream.on('thinking', (delta, snapshot) => { - // Stream thinking content as it comes - process.stdout.write(`[THINKING] ${delta}`); - thinkingContent = snapshot; - }); - - stream.on('text', (delta, snapshot) => { - // Stream final response - process.stdout.write(delta); - responseContent = snapshot; - }); - - const finalMessage = await stream.finalMessage(); - - return { - thinking: thinkingContent, - response: responseContent, - usage: finalMessage.usage, - }; -} -``` - -## Complete Implementation Example - -Here's a comprehensive example that combines all the features: - -```typescript -import Anthropic, { - MessageParam, - Message, - Tool, - ToolUseBlock, - AnthropicError -} from '@anthropic-ai/sdk'; - -class AnthropicClient { - private client: Anthropic; - private conversation: MessageParam[] = []; - private totalTokens = 0; - - constructor(apiKey: string) { - this.client = new Anthropic({ apiKey }); - } - - async sendMessage( - content: string, - options: { - stream?: boolean; - tools?: Tool[]; - thinking?: boolean; - systemPrompt?: string; - maxTokens?: number; - temperature?: number; - cached?: boolean; - } = {} - ) { - const { - stream = false, - tools = [], - thinking = false, - systemPrompt, - maxTokens = 1024, - temperature = 1.0, - cached = false, - } = options; - - // Add user message - this.conversation.push({ - role: 'user', - content: cached - ? [{ type: 'text', text: content, cache_control: { type: 'ephemeral', ttl: '1h' } }] - : content, - }); - - const params: any = { - model: 'claude-sonnet-4-20250514', - max_tokens: maxTokens, - temperature, - messages: [...this.conversation], - }; - - if (systemPrompt) { - params.system = systemPrompt; - } - - if (tools.length > 0) { - params.tools = tools; - params.tool_choice = { type: 'auto' }; - } - - if (thinking) { - params.thinking = { - type: 'enabled', - budget_tokens: Math.min(maxTokens / 2, 2000), - }; - } - - try { - if (stream) { - return await this.handleStreamingResponse(params, tools); - } else { - return await this.handleSingleResponse(params, tools); - } - } catch (error) { - return this.handleError(error); - } - } - - private async handleSingleResponse(params: any, tools: Tool[]) { - const response = await this.client.messages.create(params); - - // Track tokens - this.totalTokens += response.usage.input_tokens + response.usage.output_tokens; - - // Add assistant response - this.conversation.push({ - role: 'assistant', - content: response.content, - }); - - // Handle tool calls - const toolCalls = response.content.filter( - (block): block is ToolUseBlock => block.type === 'tool_use' - ); - - if (toolCalls.length > 0 && tools.length > 0) { - await this.handleToolCalls(toolCalls, params, tools); - } - - return { - content: this.extractText(response.content), - thinking: this.extractThinking(response.content), - toolCalls, - usage: response.usage, - stopReason: response.stop_reason, - }; - } - - private async handleStreamingResponse(params: any, tools: Tool[]) { - const stream = this.client.messages.stream(params); - - let content = ''; - let thinking = ''; - const toolCalls: ToolUseBlock[] = []; - let finalMessage: Message; - - return new Promise((resolve, reject) => { - stream.on('text', (delta, snapshot) => { - process.stdout.write(delta); - content = snapshot; - }); - - stream.on('thinking', (delta, snapshot) => { - console.log(`[THINKING] ${delta}`); - thinking = snapshot; - }); - - stream.on('contentBlock', (block) => { - if (block.type === 'tool_use') { - toolCalls.push(block); - } - }); - - stream.on('finalMessage', async (message) => { - finalMessage = message; - this.totalTokens += message.usage.input_tokens + message.usage.output_tokens; - - this.conversation.push({ - role: 'assistant', - content: message.content, - }); - - if (toolCalls.length > 0 && tools.length > 0) { - try { - await this.handleToolCalls(toolCalls, params, tools); - } catch (error) { - reject(error); - return; - } - } - - resolve({ - content, - thinking, - toolCalls, - usage: message.usage, - stopReason: message.stop_reason, - }); - }); - - stream.on('error', reject); - }); - } - - private async handleToolCalls(toolCalls: ToolUseBlock[], params: any, tools: Tool[]) { - // Execute tool calls - for (const toolCall of toolCalls) { - const result = await this.executeToolCall(toolCall); - - this.conversation.push({ - role: 'user', - content: [{ - type: 'tool_result', - tool_use_id: toolCall.id, - content: result.content, - is_error: result.isError, - }], - }); - } - - // Get response after tool execution - const followUpResponse = await this.client.messages.create({ - ...params, - messages: [...this.conversation], - }); - - this.conversation.push({ - role: 'assistant', - content: followUpResponse.content, - }); - - this.totalTokens += followUpResponse.usage.input_tokens + followUpResponse.usage.output_tokens; - } - - private async executeToolCall(toolCall: ToolUseBlock): Promise<{ content: string; isError: boolean }> { - // Mock tool implementations - const tools = { - calculator: (args: any) => { - try { - const result = eval(args.expression); - return { content: `Result: ${result}`, isError: false }; - } catch (error) { - return { content: `Error: ${error.message}`, isError: true }; - } - }, - weather: (args: any) => { - return { content: `Weather in ${args.location}: 22°C, sunny`, isError: false }; - }, - }; - - const toolName = toolCall.name; - if (toolName in tools) { - return tools[toolName](toolCall.input); - } else { - return { content: `Unknown tool: ${toolName}`, isError: true }; - } - } - - private extractText(content: any[]): string { - return content - .filter(block => block.type === 'text') - .map(block => block.text) - .join('\n'); - } - - private extractThinking(content: any[]): string { - const thinkingBlock = content.find(block => block.type === 'thinking'); - return thinkingBlock?.thinking || ''; - } - - private handleError(error: any) { - if (error instanceof AnthropicError) { - console.error('Anthropic API error:', error.message); - - if (error.status === 429) { - console.log('Rate limited - should retry with backoff'); - } else if (error.status === 401) { - console.log('Authentication failed - check API key'); - } - } else { - console.error('Unexpected error:', error); - } - - throw error; - } - - // Utility methods - getConversationHistory(): MessageParam[] { - return [...this.conversation]; - } - - getTotalTokens(): number { - return this.totalTokens; - } - - clearConversation(): void { - this.conversation = []; - this.totalTokens = 0; - } - - async countTokens(messages: MessageParam[], systemPrompt?: string): Promise { - const params: any = { - model: 'claude-sonnet-4-20250514', - messages, - }; - - if (systemPrompt) { - params.system = systemPrompt; - } - - const result = await this.client.messages.countTokens(params); - return result.input_tokens; - } -} - -// Usage example -async function completeExample() { - const client = new AnthropicClient(process.env.ANTHROPIC_API_KEY!); - - const tools: Tool[] = [ - { - name: 'calculator', - description: 'Perform mathematical calculations', - input_schema: { - type: 'object', - properties: { - expression: { type: 'string' }, - }, - required: ['expression'], - }, - }, - ]; - - // Simple message - let result = await client.sendMessage('Hello, Claude!'); - console.log('Response:', result.content); - - // Message with thinking - result = await client.sendMessage( - 'Solve this complex math problem: What is the optimal way to arrange 10 people around a circular table?', - { thinking: true, maxTokens: 2000 } - ); - console.log('Thinking:', result.thinking); - console.log('Response:', result.content); - - // Streaming with tools - result = await client.sendMessage( - 'Calculate 15 * 23 and explain the steps', - { stream: true, tools, thinking: true } - ); - - console.log('Total tokens used:', client.getTotalTokens()); -} -``` - -## Key Implementation Notes - -1. **Content is Always an Array**: Even simple text messages use the content block system -2. **Error Handling**: The SDK provides specific error types for different HTTP status codes -3. **Streaming Events**: Use MessageStream for easier event handling, or raw streaming for more control -4. **Token Counting**: Use the dedicated countTokens API for accurate estimates -5. **Caching**: Add cache_control to content blocks, not to the message level -6. **Tool Calls**: Always check stop_reason for 'tool_use' and handle the tool execution flow -7. **Thinking**: Requires explicit configuration and sufficient token budget -8. **Abort**: Use AbortController for request cancellation, or MessageStream.abort() for streams - -This guide covers all the essential patterns for working with the Anthropic SDK effectively. \ No newline at end of file diff --git a/packages/ai/docs/gemini-api.md b/packages/ai/docs/gemini-api.md deleted file mode 100644 index 6b8ff549..00000000 --- a/packages/ai/docs/gemini-api.md +++ /dev/null @@ -1,1233 +0,0 @@ -# Google Gemini SDK Implementation Guide - -This document provides comprehensive implementation guidance for the Google Gemini SDK (`@google/genai`) showing exactly how to implement all required features for our unified AI API. - -## Table of Contents - -1. [Setup and Basic Usage](#setup-and-basic-usage) -2. [Streaming Responses](#streaming-responses) -3. [Aborting Requests](#aborting-requests) -4. [Error Handling](#error-handling) -5. [Stop Reasons](#stop-reasons) -6. [Message History and Serialization](#message-history-and-serialization) -7. [Token Counting](#token-counting) -8. [Context Caching](#context-caching) -9. [Function Calling (Tools)](#function-calling-tools) -10. [System Instructions](#system-instructions) -11. [Parts System for Content](#parts-system-for-content) -12. [Thinking Tokens](#thinking-tokens) -13. [Peculiarities and Gotchas](#peculiarities-and-gotchas) - -## Setup and Basic Usage - -### Installation and Initialization - -```typescript -import { GoogleGenAI, type GenerateContentResponse } from '@google/genai'; - -// Initialize client -const client = new GoogleGenAI({ - apiKey: process.env.GEMINI_API_KEY, - // Optional: Use Vertex AI instead - // vertexai: true, - // project: 'your-project-id', - // location: 'us-central1', -}); - -// Basic non-streaming request -const response = await client.models.generateContent({ - model: 'gemini-2.0-flash-exp', - contents: 'Hello, how are you?' -}); - -console.log(response.text); -``` - -### Key Types and Interfaces - -```typescript -// Core types from the SDK -interface GoogleGenAIOptions { - apiKey?: string; - vertexai?: boolean; - project?: string; - location?: string; - apiVersion?: string; -} - -interface Content { - parts?: Part[]; - role?: string; // 'user' | 'model' -} - -interface Part { - text?: string; - thought?: boolean; // For thinking content - functionCall?: FunctionCall; - functionResponse?: FunctionResponse; - inlineData?: Blob; - fileData?: FileData; -} - -interface GenerateContentResponse { - candidates?: Candidate[]; - usageMetadata?: GenerateContentResponseUsageMetadata; - promptFeedback?: GenerateContentResponsePromptFeedback; - text: string | undefined; // Convenience getter -} -``` - -## Streaming Responses - -Gemini supports streaming via `generateContentStream` which returns an `AsyncGenerator`: - -```typescript -async function streamContent() { - const stream = await client.models.generateContentStream({ - model: 'gemini-2.0-flash-exp', - contents: 'Write a short story about a robot.' - }); - - let fullText = ''; - for await (const chunk of stream) { - // Each chunk is a GenerateContentResponse - const chunkText = chunk.text; - if (chunkText) { - fullText += chunkText; - process.stdout.write(chunkText); // Stream to output - } - - // Check for function calls in streaming - if (chunk.candidates?.[0]?.content?.parts) { - for (const part of chunk.candidates[0].content.parts) { - if (part.functionCall) { - console.log('Function call:', part.functionCall); - } - if (part.thought) { - console.log('Thinking:', part.text); - } - } - } - } - - return fullText; -} -``` - -### Streaming with Thinking Tokens - -```typescript -async function streamWithThinking() { - const stream = await client.models.generateContentStream({ - model: 'gemini-2.0-flash-thinking-exp-1219', - contents: 'Solve this math problem: 2x + 5 = 13' - }); - - let thinking = ''; - let response = ''; - - for await (const chunk of stream) { - if (chunk.candidates?.[0]?.content?.parts) { - for (const part of chunk.candidates[0].content.parts) { - if (part.thought && part.text) { - thinking += part.text; - console.log('[THINKING]', part.text); - } else if (part.text && !part.thought) { - response += part.text; - console.log('[RESPONSE]', part.text); - } - } - } - } - - return { thinking, response }; -} -``` - -## Aborting Requests - -Gemini supports request cancellation via `AbortSignal`: - -```typescript -class GeminiClient { - private currentController: AbortController | null = null; - - async generateWithCancellation(prompt: string): Promise { - // Create new abort controller - this.currentController = new AbortController(); - - try { - const response = await client.models.generateContent({ - model: 'gemini-2.0-flash-exp', - contents: prompt, - abortSignal: this.currentController.signal - }); - - return response.text || ''; - } catch (error) { - if (error.name === 'AbortError') { - console.log('Request was cancelled'); - throw new Error('Request cancelled by user'); - } - throw error; - } finally { - this.currentController = null; - } - } - - async generateStreamWithCancellation(prompt: string): Promise> { - this.currentController = new AbortController(); - - try { - const stream = await client.models.generateContentStream({ - model: 'gemini-2.0-flash-exp', - contents: prompt, - abortSignal: this.currentController.signal - }); - - return this.processStream(stream); - } catch (error) { - if (error.name === 'AbortError') { - throw new Error('Request cancelled by user'); - } - throw error; - } - } - - private async* processStream(stream: AsyncGenerator): AsyncGenerator { - try { - for await (const chunk of stream) { - if (chunk.text) { - yield chunk.text; - } - } - } catch (error) { - if (error.name === 'AbortError') { - return; // Exit generator cleanly - } - throw error; - } finally { - this.currentController = null; - } - } - - // Cancel current request - cancel(): void { - if (this.currentController) { - this.currentController.abort(); - } - } -} -``` - -## Error Handling - -### Error Types and Handling - -```typescript -import { ApiError } from '@google/genai'; - -interface GeminiErrorInfo { - type: 'rate_limit' | 'auth' | 'invalid_request' | 'network' | 'server' | 'unknown'; - message: string; - statusCode?: number; - retryable: boolean; -} - -function handleGeminiError(error: unknown): GeminiErrorInfo { - if (error instanceof ApiError) { - const statusCode = error.status; - - switch (statusCode) { - case 401: - case 403: - return { - type: 'auth', - message: 'Authentication failed - check API key', - statusCode, - retryable: false - }; - - case 429: - return { - type: 'rate_limit', - message: 'Rate limit exceeded', - statusCode, - retryable: true - }; - - case 400: - return { - type: 'invalid_request', - message: error.message || 'Invalid request parameters', - statusCode, - retryable: false - }; - - case 500: - case 502: - case 503: - case 504: - return { - type: 'server', - message: 'Server error - try again later', - statusCode, - retryable: true - }; - - default: - return { - type: 'unknown', - message: error.message || 'Unknown API error', - statusCode, - retryable: false - }; - } - } - - if (error instanceof Error) { - if (error.name === 'AbortError') { - return { - type: 'network', - message: 'Request was cancelled', - retryable: false - }; - } - - return { - type: 'network', - message: error.message, - retryable: true - }; - } - - return { - type: 'unknown', - message: 'Unknown error occurred', - retryable: false - }; -} - -// Usage with retry logic -async function generateWithRetry(prompt: string, maxRetries = 3): Promise { - for (let attempt = 1; attempt <= maxRetries; attempt++) { - try { - const response = await client.models.generateContent({ - model: 'gemini-2.0-flash-exp', - contents: prompt - }); - - return response.text || ''; - } catch (error) { - const errorInfo = handleGeminiError(error); - - if (!errorInfo.retryable || attempt === maxRetries) { - throw new Error(`${errorInfo.type}: ${errorInfo.message}`); - } - - // Exponential backoff for retryable errors - const delay = Math.pow(2, attempt - 1) * 1000; - await new Promise(resolve => setTimeout(resolve, delay)); - } - } - - throw new Error('Max retries exceeded'); -} -``` - -## Stop Reasons - -Gemini provides finish reasons in the response candidates: - -```typescript -enum FinishReason { - FINISH_REASON_UNSPECIFIED = 'FINISH_REASON_UNSPECIFIED', - STOP = 'STOP', // Natural stop - MAX_TOKENS = 'MAX_TOKENS', // Hit token limit - SAFETY = 'SAFETY', // Safety filter triggered - RECITATION = 'RECITATION', // Recitation filter - LANGUAGE = 'LANGUAGE', // Language not supported - OTHER = 'OTHER' -} - -function extractStopReason(response: GenerateContentResponse): string | null { - const candidate = response.candidates?.[0]; - if (!candidate) return null; - - return candidate.finishReason || null; -} - -// Handle different stop reasons -function handleStopReason(response: GenerateContentResponse): void { - const reason = extractStopReason(response); - - switch (reason) { - case 'STOP': - console.log('Response completed naturally'); - break; - - case 'MAX_TOKENS': - console.log('Response truncated due to token limit'); - break; - - case 'SAFETY': - console.log('Response blocked by safety filters'); - // Check promptFeedback for details - if (response.promptFeedback?.blockReason) { - console.log('Block reason:', response.promptFeedback.blockReason); - } - break; - - case 'RECITATION': - console.log('Response blocked due to recitation concerns'); - break; - - default: - if (reason) { - console.log('Unexpected finish reason:', reason); - } - } -} -``` - -## Message History and Serialization - -### Managing Conversation History - -```typescript -interface SerializableMessage { - role: 'user' | 'model'; - content: string; - functionCalls?: FunctionCall[]; - functionResponses?: FunctionResponse[]; - thinking?: string; -} - -interface SerializableSession { - messages: SerializableMessage[]; - totalUsage: { - promptTokens: number; - candidatesTokens: number; - totalTokens: number; - thoughtsTokens?: number; - }; -} - -class GeminiConversation { - private messages: Content[] = []; - private totalUsage = { - promptTokens: 0, - candidatesTokens: 0, - totalTokens: 0, - thoughtsTokens: 0 - }; - - addUserMessage(text: string): void { - this.messages.push({ - role: 'user', - parts: [{ text }] - }); - } - - addAssistantMessage(response: GenerateContentResponse): void { - const candidate = response.candidates?.[0]; - if (!candidate?.content) return; - - this.messages.push(candidate.content); - - // Update usage - if (response.usageMetadata) { - this.totalUsage.promptTokens += response.usageMetadata.promptTokenCount || 0; - this.totalUsage.candidatesTokens += response.usageMetadata.candidatesTokenCount || 0; - this.totalUsage.totalTokens += response.usageMetadata.totalTokenCount || 0; - this.totalUsage.thoughtsTokens += response.usageMetadata.thoughtsTokenCount || 0; - } - } - - async sendMessage(text: string): Promise { - this.addUserMessage(text); - - const response = await client.models.generateContent({ - model: 'gemini-2.0-flash-exp', - contents: this.messages - }); - - this.addAssistantMessage(response); - return response.text || ''; - } - - // Serialize for persistence - serialize(): SerializableSession { - const messages: SerializableMessage[] = []; - - for (const content of this.messages) { - const message: SerializableMessage = { - role: (content.role as 'user' | 'model') || 'user', - content: '', - functionCalls: [], - functionResponses: [], - thinking: '' - }; - - for (const part of content.parts || []) { - if (part.text) { - if (part.thought) { - message.thinking += part.text; - } else { - message.content += part.text; - } - } - if (part.functionCall) { - message.functionCalls!.push(part.functionCall); - } - if (part.functionResponse) { - message.functionResponses!.push(part.functionResponse); - } - } - - messages.push(message); - } - - return { - messages, - totalUsage: { ...this.totalUsage } - }; - } - - // Deserialize from storage - static fromSerialized(session: SerializableSession): GeminiConversation { - const conversation = new GeminiConversation(); - conversation.totalUsage = { ...session.totalUsage }; - - for (const msg of session.messages) { - const parts: Part[] = []; - - if (msg.content) { - parts.push({ text: msg.content }); - } - - if (msg.thinking) { - parts.push({ text: msg.thinking, thought: true }); - } - - for (const funcCall of msg.functionCalls || []) { - parts.push({ functionCall: funcCall }); - } - - for (const funcResp of msg.functionResponses || []) { - parts.push({ functionResponse: funcResp }); - } - - conversation.messages.push({ - role: msg.role, - parts - }); - } - - return conversation; - } -} -``` - -## Token Counting - -### Understanding Gemini Token Usage - -```typescript -interface TokenUsage { - promptTokens: number; - candidatesTokens: number; // Output tokens - totalTokens: number; - thoughtsTokens?: number; // Thinking tokens (reasoning models) - cachedContentTokens?: number; // Cache read tokens -} - -function extractTokenUsage(response: GenerateContentResponse): TokenUsage { - const usage = response.usageMetadata; - - return { - promptTokens: usage?.promptTokenCount || 0, - candidatesTokens: usage?.candidatesTokenCount || 0, - totalTokens: usage?.totalTokenCount || 0, - thoughtsTokens: usage?.thoughtsTokenCount || 0, - cachedContentTokens: usage?.cachedContentTokenCount || 0 - }; -} - -// Count tokens before sending (estimation) -async function countTokens(content: string | Content[]): Promise { - const response = await client.models.computeTokens({ - model: 'gemini-2.0-flash-exp', - contents: typeof content === 'string' - ? [{ parts: [{ text: content }] }] - : content - }); - - return response.totalTokens || 0; -} - -// Token usage accumulation -class TokenTracker { - private usage = { - totalPromptTokens: 0, - totalCandidatesTokens: 0, - totalThoughtsTokens: 0, - totalCachedTokens: 0, - totalRequests: 0 - }; - - addUsage(response: GenerateContentResponse): void { - const tokenUsage = extractTokenUsage(response); - - this.usage.totalPromptTokens += tokenUsage.promptTokens; - this.usage.totalCandidatesTokens += tokenUsage.candidatesTokens; - this.usage.totalThoughtsTokens += tokenUsage.thoughtsTokens || 0; - this.usage.totalCachedTokens += tokenUsage.cachedContentTokens || 0; - this.usage.totalRequests++; - } - - getStats() { - return { - ...this.usage, - totalTokens: this.usage.totalPromptTokens + this.usage.totalCandidatesTokens, - averageTokensPerRequest: this.usage.totalRequests > 0 - ? (this.usage.totalPromptTokens + this.usage.totalCandidatesTokens) / this.usage.totalRequests - : 0 - }; - } -} -``` - -## Context Caching - -Gemini supports context caching to reduce costs for repeated large prompts: - -```typescript -import { type CachedContent } from '@google/genai'; - -class GeminiCache { - async createCache( - systemInstruction: string, - contents: Content[], - ttlHours = 1 - ): Promise { - const cache = await client.caches.create({ - model: 'gemini-2.0-flash-exp', - systemInstruction: { parts: [{ text: systemInstruction }] }, - contents, - ttl: `${ttlHours * 3600}s` // Convert hours to seconds - }); - - return cache; - } - - async generateWithCache( - cachedContent: CachedContent, - userMessage: string - ): Promise { - return await client.models.generateContent({ - model: cachedContent.model || 'gemini-2.0-flash-exp', - cachedContent: cachedContent.name, - contents: [{ - role: 'user', - parts: [{ text: userMessage }] - }] - }); - } - - async listCaches(): Promise { - const caches = []; - for await (const cache of client.caches.list()) { - caches.push(cache); - } - return caches; - } - - async deleteCache(cacheName: string): Promise { - await client.caches.delete({ name: cacheName }); - } - - // Example: Cache a large document for repeated analysis - async createDocumentCache(document: string): Promise { - const systemInstruction = ` - You are a document analysis assistant. The user will provide a large document, - and you should be ready to answer questions about it, summarize it, or extract - information from it. - `; - - const contents = [{ - role: 'user' as const, - parts: [{ text: `Please analyze this document:\n\n${document}` }] - }]; - - return this.createCache(systemInstruction, contents, 24); // Cache for 24 hours - } -} - -// Usage example -async function demonstrateCache() { - const cache = new GeminiCache(); - - // Create cache with large document - const document = "... very large document content ..."; - const cachedContent = await cache.createDocumentCache(document); - - // Now ask questions using the cache (saves tokens!) - const response1 = await cache.generateWithCache( - cachedContent, - "What are the key points in this document?" - ); - - const response2 = await cache.generateWithCache( - cachedContent, - "Can you summarize the conclusions?" - ); - - // Clean up when done - await cache.deleteCache(cachedContent.name!); -} -``` - -## Function Calling (Tools) - -### Basic Function Calling Setup - -```typescript -interface ToolDefinition { - name: string; - description: string; - parameters: { - type: 'object'; - properties: Record; - required: string[]; - }; -} - -// Define tools -const tools: ToolDefinition[] = [{ - name: 'get_weather', - description: 'Get current weather for a location', - parameters: { - type: 'object', - properties: { - location: { - type: 'string', - description: 'City name or location' - }, - units: { - type: 'string', - enum: ['celsius', 'fahrenheit'], - description: 'Temperature units' - } - }, - required: ['location'] - } -}]; - -// Convert to Gemini format -function createGeminiTools(tools: ToolDefinition[]) { - return [{ - functionDeclarations: tools.map(tool => ({ - name: tool.name, - description: tool.description, - parametersJsonSchema: tool.parameters - })) - }]; -} - -// Function call handler -async function executeFunction(functionCall: FunctionCall): Promise { - const { name, args } = functionCall; - const params = typeof args === 'string' ? JSON.parse(args) : args; - - switch (name) { - case 'get_weather': - return await getWeatherData(params.location, params.units); - default: - throw new Error(`Unknown function: ${name}`); - } -} - -// Mock weather function -async function getWeatherData(location: string, units = 'celsius') { - return { - location, - temperature: 22, - conditions: 'sunny', - units - }; -} -``` - -### Complete Function Calling Flow - -```typescript -class GeminiFunctionCalling { - private tools: ToolDefinition[]; - - constructor(tools: ToolDefinition[]) { - this.tools = tools; - } - - async processWithTools(messages: Content[]): Promise { - let currentMessages = [...messages]; - let iterations = 0; - const maxIterations = 5; - - while (iterations < maxIterations) { - const response = await client.models.generateContent({ - model: 'gemini-2.0-flash-exp', - contents: currentMessages, - tools: createGeminiTools(this.tools), - toolConfig: { - functionCallingConfig: { - mode: 'AUTO' // Let model decide when to call functions - } - } - }); - - const candidate = response.candidates?.[0]; - if (!candidate?.content) break; - - // Add assistant response to conversation - currentMessages.push(candidate.content); - - // Check for function calls - const functionCalls = this.extractFunctionCalls(candidate.content); - - if (functionCalls.length === 0) { - // No more function calls, return final response - return response.text || ''; - } - - // Execute function calls - for (const functionCall of functionCalls) { - try { - const result = await executeFunction(functionCall); - - // Add function response to conversation - currentMessages.push({ - role: 'user', - parts: [{ - functionResponse: { - name: functionCall.name, - id: functionCall.id, - response: { result } - } - }] - }); - } catch (error) { - // Add error response - currentMessages.push({ - role: 'user', - parts: [{ - functionResponse: { - name: functionCall.name, - id: functionCall.id, - response: { error: error.message } - } - }] - }); - } - } - - iterations++; - } - - throw new Error('Max function calling iterations exceeded'); - } - - private extractFunctionCalls(content: Content): FunctionCall[] { - const calls: FunctionCall[] = []; - - for (const part of content.parts || []) { - if (part.functionCall) { - calls.push(part.functionCall); - } - } - - return calls; - } - - // Streaming version with function calls - async *processStreamWithTools(messages: Content[]): AsyncGenerator<{ - type: 'content' | 'function_call' | 'function_result'; - content?: string; - functionCall?: FunctionCall; - functionResult?: any; - }> { - const stream = await client.models.generateContentStream({ - model: 'gemini-2.0-flash-exp', - contents: messages, - tools: createGeminiTools(this.tools), - toolConfig: { - functionCallingConfig: { mode: 'AUTO' } - } - }); - - let pendingFunctionCalls: FunctionCall[] = []; - - for await (const chunk of stream) { - const candidate = chunk.candidates?.[0]; - if (!candidate?.content) continue; - - for (const part of candidate.content.parts || []) { - if (part.text && !part.thought) { - yield { type: 'content', content: part.text }; - } - - if (part.functionCall) { - pendingFunctionCalls.push(part.functionCall); - yield { type: 'function_call', functionCall: part.functionCall }; - } - } - } - - // Execute any pending function calls - for (const functionCall of pendingFunctionCalls) { - try { - const result = await executeFunction(functionCall); - yield { type: 'function_result', functionResult: result }; - } catch (error) { - yield { - type: 'function_result', - functionResult: { error: error.message } - }; - } - } - } -} -``` - -## System Instructions - -Gemini handles system instructions differently from other providers: - -```typescript -// System instruction is a separate parameter, not part of messages -async function generateWithSystemInstruction( - systemPrompt: string, - userMessage: string -): Promise { - const response = await client.models.generateContent({ - model: 'gemini-2.0-flash-exp', - systemInstruction: { - parts: [{ text: systemPrompt }] - }, - contents: [{ - role: 'user', - parts: [{ text: userMessage }] - }] - }); - - return response.text || ''; -} - -// For conversation with system instruction -class GeminiConversationWithSystem { - private systemInstruction: Content; - private messages: Content[] = []; - - constructor(systemPrompt: string) { - this.systemInstruction = { - parts: [{ text: systemPrompt }] - }; - } - - async sendMessage(text: string): Promise { - this.messages.push({ - role: 'user', - parts: [{ text }] - }); - - const response = await client.models.generateContent({ - model: 'gemini-2.0-flash-exp', - systemInstruction: this.systemInstruction, - contents: this.messages - }); - - const candidate = response.candidates?.[0]; - if (candidate?.content) { - this.messages.push(candidate.content); - } - - return response.text || ''; - } - - updateSystemInstruction(newPrompt: string): void { - this.systemInstruction = { - parts: [{ text: newPrompt }] - }; - } -} -``` - -## Parts System for Content - -Understanding Gemini's parts-based content system: - -```typescript -// Text content -const textPart: Part = { - text: 'Hello, world!' -}; - -// Thinking content (for reasoning models) -const thinkingPart: Part = { - text: 'Let me think about this problem...', - thought: true -}; - -// Function call -const functionCallPart: Part = { - functionCall: { - name: 'get_weather', - args: { location: 'San Francisco' } - } -}; - -// Function response -const functionResponsePart: Part = { - functionResponse: { - name: 'get_weather', - response: { temperature: 72, conditions: 'sunny' } - } -}; - -// Image data (inline) -const imagePart: Part = { - inlineData: { - mimeType: 'image/jpeg', - data: 'base64-encoded-image-data' - } -}; - -// File reference -const filePart: Part = { - fileData: { - mimeType: 'image/jpeg', - fileUri: 'gs://bucket/image.jpg' - } -}; - -// Creating multi-part content -const multiPartContent: Content = { - role: 'user', - parts: [ - { text: 'What is in this image?' }, - { - inlineData: { - mimeType: 'image/jpeg', - data: await imageToBase64('path/to/image.jpg') - } - } - ] -}; - -// Utility functions for parts -function createTextPart(text: string): Part { - return { text }; -} - -function createThinkingPart(text: string): Part { - return { text, thought: true }; -} - -function createImagePart(imageData: string, mimeType: string): Part { - return { - inlineData: { - mimeType, - data: imageData - } - }; -} - -async function imageToBase64(filePath: string): Promise { - const fs = await import('fs/promises'); - const buffer = await fs.readFile(filePath); - return buffer.toString('base64'); -} -``` - -## Thinking Tokens - -Gemini thinking models (like `gemini-2.0-flash-thinking-exp-1219`) provide reasoning traces: - -```typescript -interface ThinkingExtractor { - thinking: string; - response: string; - thinkingTokens: number; - responseTokens: number; -} - -function extractThinking(response: GenerateContentResponse): ThinkingExtractor { - let thinking = ''; - let responseText = ''; - - const candidate = response.candidates?.[0]; - if (candidate?.content?.parts) { - for (const part of candidate.content.parts) { - if (part.text) { - if (part.thought) { - thinking += part.text; - } else { - responseText += part.text; - } - } - } - } - - const usage = response.usageMetadata; - - return { - thinking, - response: responseText, - thinkingTokens: usage?.thoughtsTokenCount || 0, - responseTokens: usage?.candidatesTokenCount || 0 - }; -} - -// Streaming thinking extraction -async function streamWithThinkingExtraction(prompt: string) { - const stream = await client.models.generateContentStream({ - model: 'gemini-2.0-flash-thinking-exp-1219', - contents: prompt - }); - - let thinkingContent = ''; - let responseContent = ''; - - for await (const chunk of stream) { - const candidate = chunk.candidates?.[0]; - if (!candidate?.content?.parts) continue; - - for (const part of candidate.content.parts) { - if (part.text) { - if (part.thought) { - thinkingContent += part.text; - console.log('[THINKING DELTA]', part.text); - } else { - responseContent += part.text; - console.log('[RESPONSE DELTA]', part.text); - } - } - } - } - - return { - thinking: thinkingContent, - response: responseContent - }; -} - -// Enable thinking for models that support it -async function generateWithThinking(prompt: string, model = 'gemini-2.0-flash-thinking-exp-1219') { - const response = await client.models.generateContent({ - model, - contents: prompt - }); - - return extractThinking(response); -} -``` - -## Peculiarities and Gotchas - -### Key Differences from Other APIs - -1. **System Instructions**: Separate parameter, not part of message history -2. **Parts-based Content**: Content is split into parts, each with specific types -3. **Thinking Detection**: Must check `part.thought` flag to identify reasoning content -4. **Function Calls**: Embedded in parts, not separate message types -5. **Role Names**: Uses 'model' instead of 'assistant' for AI responses -6. **Streaming**: Returns full `GenerateContentResponse` objects, not deltas - -### Common Pitfalls - -```typescript -// ❌ Wrong: Treating text as complete response -const response = await client.models.generateContent({...}); -console.log(response.candidates[0].content.parts[0].text); // May miss other parts - -// ✅ Correct: Use convenience getter or process all parts -console.log(response.text); // Concatenates all text parts automatically - -// ❌ Wrong: Mixing system instruction with messages -const messages = [ - { role: 'system', parts: [{ text: 'You are helpful' }] }, // Not supported - { role: 'user', parts: [{ text: 'Hello' }] } -]; - -// ✅ Correct: Separate system instruction -const response = await client.models.generateContent({ - systemInstruction: { parts: [{ text: 'You are helpful' }] }, - contents: [{ role: 'user', parts: [{ text: 'Hello' }] }] -}); - -// ❌ Wrong: Assuming single part responses -for await (const chunk of stream) { - console.log(chunk.text); // May miss function calls or thinking -} - -// ✅ Correct: Process all parts -for await (const chunk of stream) { - const candidate = chunk.candidates?.[0]; - if (candidate?.content?.parts) { - for (const part of candidate.content.parts) { - if (part.text && !part.thought) { - console.log('[RESPONSE]', part.text); - } else if (part.text && part.thought) { - console.log('[THINKING]', part.text); - } else if (part.functionCall) { - console.log('[FUNCTION CALL]', part.functionCall); - } - } - } -} -``` - -### Performance Tips - -1. **Use streaming** for better user experience with long responses -2. **Cache large prompts** to reduce token costs -3. **Batch token counting** when possible -4. **Set appropriate `abortSignal` timeouts** for long-running requests -5. **Handle function calls efficiently** to avoid timeout issues - -### Model-Specific Behaviors - -```typescript -// Different models have different capabilities -const modelCapabilities = { - 'gemini-2.0-flash-exp': { - thinking: false, - functionCalling: true, - vision: true, - maxTokens: 1000000 - }, - 'gemini-2.0-flash-thinking-exp-1219': { - thinking: true, - functionCalling: true, - vision: true, - maxTokens: 32768 - }, - 'gemini-1.5-pro': { - thinking: false, - functionCalling: true, - vision: true, - maxTokens: 2000000 - } -}; - -// Check model capabilities before using features -function supportsThinking(model: string): boolean { - return model.includes('thinking'); -} - -function getMaxTokens(model: string): number { - return modelCapabilities[model]?.maxTokens || 32768; -} -``` - -This comprehensive guide covers all the essential aspects of implementing Gemini API features. The key is understanding Gemini's parts-based content system and properly handling the different types of content (text, thinking, function calls) that can appear in responses. \ No newline at end of file diff --git a/packages/ai/docs/images.md b/packages/ai/docs/images.md deleted file mode 100644 index 82350015..00000000 --- a/packages/ai/docs/images.md +++ /dev/null @@ -1,322 +0,0 @@ -# Image Input Support for LLM Providers - -This document describes how to submit images to different LLM provider APIs and proposes an abstraction layer for unified image handling. - -## Provider-Specific Image Support - -### 1. Anthropic (Claude) - -**Supported Models**: Claude 3 and Claude 4 families (Sonnet, Haiku, Opus) - -**Image Formats**: JPEG, PNG, GIF, WebP - -**Methods**: -1. **Base64 Encoding**: -```json -{ - "role": "user", - "content": [ - { - "type": "image", - "source": { - "type": "base64", - "media_type": "image/jpeg", - "data": "" - } - }, - { - "type": "text", - "text": "What's in this image?" - } - ] -} -``` - -2. **URL Support**: -```json -{ - "role": "user", - "content": [ - { - "type": "image", - "source": { - "type": "url", - "url": "https://example.com/image.jpg" - } - } - ] -} -``` - -**Limitations**: -- Maximum 20 images per request -- Each image max 3.75 MB -- Maximum dimensions: 8,000px × 8,000px -- Images are ephemeral (not stored beyond request duration) - -### 2. Google GenAI (Gemini) - -**Supported Models**: Gemini Pro Vision, Gemini 1.5, Gemini 2.0 - -**Image Formats**: JPEG, PNG, GIF, WebP - -**Methods**: -1. **Inline Base64 Data** (for files < 20MB): -```json -{ - "contents": [{ - "parts": [ - { - "inline_data": { - "mime_type": "image/jpeg", - "data": "BASE64_ENCODED_IMAGE_DATA" - } - }, - { - "text": "Describe this image" - } - ] - }] -} -``` - -2. **File API** (for larger files or reuse): -- Upload file first using File API -- Reference by file URI in subsequent requests - -**Limitations**: -- Inline data: Total request size (text + images) < 20MB -- Base64 encoding increases size in transit -- Returns HTTP 413 if request too large - -### 3. OpenAI Chat Completions (GPT-4o, GPT-4o-mini) - -**Supported Models**: GPT-4o, GPT-4o-mini, GPT-4-turbo with vision - -**Image Formats**: JPEG, PNG, GIF, WebP - -**Methods**: -1. **URL Reference**: -```json -{ - "role": "user", - "content": [ - { - "type": "text", - "text": "What's in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://example.com/image.jpg" - } - } - ] -} -``` - -2. **Base64 Data URL**: -```json -{ - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": "data:image/jpeg;base64," - } - } - ] -} -``` - -**Note**: Despite the field name `image_url`, base64 data URLs are supported. - -### 4. OpenAI Responses API (o1, o3, o4-mini) - -**Vision Support by Model**: -- ✅ **o1**: Full vision support -- ✅ **o3**: Vision support + image generation -- ✅ **o4-mini**: Vision support + image generation -- ❌ **o3-mini**: No vision capabilities -- ✅ **o3-pro**: Vision analysis (no generation) - -**Methods**: Same as Chat Completions API -- URL references -- Base64 data URLs - -**Note**: Vision capabilities integrated into reasoning chain-of-thought for more contextually rich responses. - -## Proposed Unified Abstraction - -### Image Content Type - -```typescript -interface ImageContent { - type: "image"; - data: string; // base64 encoded image data - mimeType: string; // e.g., "image/jpeg", "image/png" -} -``` - -### Unified Message Structure - -```typescript -interface UserMessage { - role: "user"; - content: string | (TextContent | ImageContent)[]; -} - -interface TextContent { - type: "text"; - text: string; -} -``` - -### Provider Adapter Implementation - -Each provider adapter would: - -1. **Check Model Capabilities**: -```typescript -if (model.input.includes("image")) { - // Process image content -} else { - // Throw error or ignore images -} -``` - -2. **Convert to Provider Format**: - -```typescript -// Anthropic converter -function toAnthropicContent(content: (TextContent | ImageContent)[]) { - return content.map(item => { - if (item.type === "image") { - return { - type: "image", - source: { - type: "base64", - media_type: item.mimeType, - data: item.data - } - }; - } - return { type: "text", text: item.text }; - }); -} - -// OpenAI converter -function toOpenAIContent(content: (TextContent | ImageContent)[]) { - return content.map(item => { - if (item.type === "image") { - return { - type: "image_url", - image_url: { - url: `data:${item.mimeType};base64,${item.data}` - } - }; - } - return { type: "text", text: item.text }; - }); -} - -// Google converter -function toGoogleContent(content: (TextContent | ImageContent)[]) { - return content.map(item => { - if (item.type === "image") { - return { - inline_data: { - mime_type: item.mimeType, - data: item.data - } - }; - } - return { text: item.text }; - }); -} -``` - -### Size and Format Validation - -```typescript -interface ImageConstraints { - maxSizeMB: number; - maxWidth: number; - maxHeight: number; - maxCount: number; - supportedFormats: string[]; -} - -const PROVIDER_CONSTRAINTS: Record = { - anthropic: { - maxSizeMB: 3.75, - maxWidth: 8000, - maxHeight: 8000, - maxCount: 20, - supportedFormats: ["image/jpeg", "image/png", "image/gif", "image/webp"] - }, - google: { - maxSizeMB: 20, // for inline data - maxWidth: Infinity, - maxHeight: Infinity, - maxCount: Infinity, - supportedFormats: ["image/jpeg", "image/png", "image/gif", "image/webp"] - }, - openai: { - maxSizeMB: 20, - maxWidth: Infinity, - maxHeight: Infinity, - maxCount: Infinity, - supportedFormats: ["image/jpeg", "image/png", "image/gif", "image/webp"] - } -}; - -async function validateImage( - image: ImageContent, - provider: string -): Promise { - const constraints = PROVIDER_CONSTRAINTS[provider]; - - // Check MIME type - if (!constraints.supportedFormats.includes(image.mimeType)) { - throw new Error(`Unsupported image format: ${image.mimeType}`); - } - - // Check size - const imageBuffer = Buffer.from(image.data, 'base64'); - const sizeMB = imageBuffer.length / (1024 * 1024); - if (sizeMB > constraints.maxSizeMB) { - throw new Error(`Image exceeds ${constraints.maxSizeMB}MB limit`); - } - - // Could add dimension checks using image processing library -} -``` - -## Implementation Considerations - -1. **Preprocessing**: - - User is responsible for converting images to base64 before passing to API - - Utility functions could be provided for common conversions (file to base64, URL to base64) - - Image optimization (resize/compress) should happen before encoding - -2. **Error Handling**: - - Validate MIME types and sizes before sending - - Check model capabilities (via `model.input.includes("image")`) - - Provide clear error messages for unsupported features - -3. **Performance**: - - Base64 encoding increases payload size by ~33% - - Consider image compression before encoding - - For Google GenAI, be aware of 20MB total request limit - -4. **Token Counting**: - - Images consume tokens (varies by provider and image size) - - Include image token estimates in usage calculations - - Anthropic: ~1 token per ~3-4 bytes of base64 data - - OpenAI: Detailed images consume more tokens than low-detail - -5. **Fallback Strategies**: - - If model doesn't support images, throw error or ignore images - - Consider offering text-only fallback for non-vision models \ No newline at end of file diff --git a/packages/ai/docs/models.md b/packages/ai/docs/models.md deleted file mode 100644 index a418c390..00000000 --- a/packages/ai/docs/models.md +++ /dev/null @@ -1,56 +0,0 @@ -# OpenAI Models - -## All Models - -- [ ] [GPT-5](https://platform.openai.com/docs/models/gpt-5) -- [ ] [GPT-5 mini](https://platform.openai.com/docs/models/gpt-5-mini) -- [ ] [GPT-5 nano](https://platform.openai.com/docs/models/gpt-5-nano) -- [ ] [o3-deep-research](https://platform.openai.com/docs/models/o3-deep-research) -- [ ] [o4-mini-deep-research](https://platform.openai.com/docs/models/o4-mini-deep-research) -- [ ] [o3-pro](https://platform.openai.com/docs/models/o3-pro) -- [ ] [GPT-4o Audio](https://platform.openai.com/docs/models/gpt-4o-audio-preview) -- [ ] [GPT-4o Realtime](https://platform.openai.com/docs/models/gpt-4o-realtime-preview) -- [ ] [o3](https://platform.openai.com/docs/models/o3) -- [ ] [o4-mini](https://platform.openai.com/docs/models/o4-mini) -- [ ] [GPT-4.1](https://platform.openai.com/docs/models/gpt-4.1) -- [ ] [GPT-4.1 mini](https://platform.openai.com/docs/models/gpt-4.1-mini) -- [ ] [GPT-4.1 nano](https://platform.openai.com/docs/models/gpt-4.1-nano) -- [ ] [o1-pro](https://platform.openai.com/docs/models/o1-pro) -- [ ] [computer-use-preview](https://platform.openai.com/docs/models/computer-use-preview) -- [ ] [GPT-4o mini Search Preview](https://platform.openai.com/docs/models/gpt-4o-mini-search-preview) -- [ ] [GPT-4o Search Preview](https://platform.openai.com/docs/models/gpt-4o-search-preview) -- [ ] [GPT-4.5 Preview (Deprecated)](https://platform.openai.com/docs/models/gpt-4.5-preview) -- [ ] [o3-mini](https://platform.openai.com/docs/models/o3-mini) -- [ ] [GPT-4o mini Audio](https://platform.openai.com/docs/models/gpt-4o-mini-audio-preview) -- [ ] [GPT-4o mini Realtime](https://platform.openai.com/docs/models/gpt-4o-mini-realtime-preview) -- [ ] [o1](https://platform.openai.com/docs/models/o1) -- [ ] [omni-moderation](https://platform.openai.com/docs/models/omni-moderation-latest) -- [ ] [o1-mini](https://platform.openai.com/docs/models/o1-mini) -- [ ] [o1 Preview](https://platform.openai.com/docs/models/o1-preview) -- [ ] [GPT-4o](https://platform.openai.com/docs/models/gpt-4o) -- [ ] [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini) -- [ ] [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo) -- [ ] [babbage-002](https://platform.openai.com/docs/models/babbage-002) -- [ ] [ChatGPT-4o](https://platform.openai.com/docs/models/chatgpt-4o-latest) -- [ ] [codex-mini-latest](https://platform.openai.com/docs/models/codex-mini-latest) -- [ ] [DALL·E 2](https://platform.openai.com/docs/models/dall-e-2) -- [ ] [DALL·E 3](https://platform.openai.com/docs/models/dall-e-3) -- [ ] [davinci-002](https://platform.openai.com/docs/models/davinci-002) -- [ ] [GPT-3.5 Turbo](https://platform.openai.com/docs/models/gpt-3.5-turbo) -- [ ] [GPT-4](https://platform.openai.com/docs/models/gpt-4) -- [ ] [GPT-4 Turbo Preview](https://platform.openai.com/docs/models/gpt-4-turbo-preview) -- [ ] [GPT-4o mini Transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe) -- [ ] [GPT-4o mini TTS](https://platform.openai.com/docs/models/gpt-4o-mini-tts) -- [ ] [GPT-4o Transcribe](https://platform.openai.com/docs/models/gpt-4o-transcribe) -- [ ] [GPT-5 Chat](https://platform.openai.com/docs/models/gpt-5-chat-latest) -- [ ] [GPT Image 1](https://platform.openai.com/docs/models/gpt-image-1) -- [ ] [gpt-oss-120b](https://platform.openai.com/docs/models/gpt-oss-120b) -- [ ] [gpt-oss-20b](https://platform.openai.com/docs/models/gpt-oss-20b) -- [ ] [text-embedding-3-large](https://platform.openai.com/docs/models/text-embedding-3-large) -- [ ] [text-embedding-3-small](https://platform.openai.com/docs/models/text-embedding-3-small) -- [ ] [text-embedding-ada-002](https://platform.openai.com/docs/models/text-embedding-ada-002) -- [ ] [text-moderation](https://platform.openai.com/docs/models/text-moderation-latest) -- [ ] [text-moderation-stable](https://platform.openai.com/docs/models/text-moderation-stable) -- [ ] [TTS-1](https://platform.openai.com/docs/models/tts-1) -- [ ] [TTS-1 HD](https://platform.openai.com/docs/models/tts-1-hd) -- [ ] [Whisper](https://platform.openai.com/docs/models/whisper-1) \ No newline at end of file diff --git a/packages/ai/docs/openai-api.md b/packages/ai/docs/openai-api.md deleted file mode 100644 index cefe9ac3..00000000 --- a/packages/ai/docs/openai-api.md +++ /dev/null @@ -1,2320 +0,0 @@ -# OpenAI SDK Implementation Guide - -This document provides a comprehensive guide to implementing the required features using the OpenAI SDK v5.12.2. All examples are based on actual usage patterns from the pi-mono codebase and include real TypeScript types from the SDK. - -## Table of Contents - -1. [Basic Setup](#basic-setup) -2. [Streaming Responses](#streaming-responses) -3. [Aborting Requests](#aborting-requests) -4. [Error Handling](#error-handling) -5. [Stop Reasons](#stop-reasons) -6. [Message History & Serialization](#message-history--serialization) -7. [Token Counting](#token-counting) -8. [Caching](#caching) -9. [Chat Completions vs Responses API](#chat-completions-vs-responses-api) -10. [Tool/Function Calling](#toolfunction-calling) -11. [System Prompts](#system-prompts) -12. [Provider-Specific Features](#provider-specific-features) -13. [Complete Implementation Examples](#complete-implementation-examples) - -## Basic Setup - -```typescript -import OpenAI from "openai"; - -// Basic client setup -const client = new OpenAI({ - apiKey: process.env.OPENAI_API_KEY, - baseURL: "https://api.openai.com/v1", // Optional, default shown -}); - -// For other providers (Groq, Anthropic OpenAI-compatible, etc.) -const groqClient = new OpenAI({ - apiKey: process.env.GROQ_API_KEY, - baseURL: "https://api.groq.com/openai/v1", -}); -``` - -### Client Configuration Options - -```typescript -interface ClientOptions { - apiKey?: string; - baseURL?: string; - timeout?: number; // Request timeout in milliseconds - maxRetries?: number; // Number of retry attempts - defaultHeaders?: Record; - defaultQuery?: Record; -} -``` - -## Streaming Responses - -### Chat Completions Streaming - -```typescript -import type { - ChatCompletionChunk, - ChatCompletionCreateParamsStreaming -} from "openai/resources/chat/completions"; -import { Stream } from "openai/core/streaming"; - -async function streamChatCompletion() { - const params: ChatCompletionCreateParamsStreaming = { - model: "gpt-4o", - messages: [ - { role: "user", content: "Tell me a story" } - ], - stream: true, - max_completion_tokens: 1000, - }; - - const stream: Stream = await client.chat.completions.create(params); - - for await (const chunk of stream) { - const delta = chunk.choices[0]?.delta; - - if (delta?.content) { - process.stdout.write(delta.content); - } - - if (delta?.tool_calls) { - console.log("Tool call delta:", delta.tool_calls); - } - - if (chunk.choices[0]?.finish_reason) { - console.log("\nFinish reason:", chunk.choices[0].finish_reason); - } - } -} -``` - -### Responses API Streaming - -```typescript -import type { - ResponseCreateParamsStreaming, - ResponseStreamEvent -} from "openai/resources/responses"; - -async function streamResponsesAPI() { - const params: ResponseCreateParamsStreaming = { - model: "o1-mini", - input: [ - { - role: "user", - content: [{ type: "input_text", text: "Solve this math problem: 2x + 5 = 11" }] - } - ], - stream: true, - max_output_tokens: 2000, - reasoning: { - effort: "low", - summary: "detailed" - } - }; - - const stream: Stream = await client.responses.create(params); - - for await (const event of stream) { - switch (event.type) { - case "response.reasoning.text.delta": - // Reasoning/thinking tokens (o1/o3) - process.stdout.write(`[thinking] ${event.delta}`); - break; - - case "response.text.delta": - // Output content - process.stdout.write(event.delta); - break; - - case "response.function_call.arguments.delta": - // Tool call arguments being built - console.log("Tool call delta:", event.delta); - break; - - case "response.completed": - console.log("\nResponse completed"); - break; - } - } -} -``` - -### Streaming Patterns - -```typescript -// Pattern 1: Simple content streaming -async function simpleStream(messages: any[]) { - const stream = await client.chat.completions.create({ - model: "gpt-4o", - messages, - stream: true, - }); - - let fullContent = ""; - for await (const chunk of stream) { - const content = chunk.choices[0]?.delta?.content || ""; - fullContent += content; - process.stdout.write(content); - } - - return fullContent; -} - -// Pattern 2: Event-driven streaming with handlers -interface StreamHandlers { - onContent?: (delta: string) => void; - onToolCall?: (toolCall: any) => void; - onFinish?: (reason: string) => void; -} - -async function eventDrivenStream(messages: any[], handlers: StreamHandlers) { - const stream = await client.chat.completions.create({ - model: "gpt-4o", - messages, - stream: true, - }); - - for await (const chunk of stream) { - const choice = chunk.choices[0]; - if (!choice) continue; - - if (choice.delta?.content) { - handlers.onContent?.(choice.delta.content); - } - - if (choice.delta?.tool_calls) { - handlers.onToolCall?.(choice.delta.tool_calls); - } - - if (choice.finish_reason) { - handlers.onFinish?.(choice.finish_reason); - } - } -} -``` - -## Aborting Requests - -### Using AbortController - -```typescript -class AbortableClient { - private client: OpenAI; - private abortController: AbortController | null = null; - - constructor(config: { apiKey: string; baseURL?: string }) { - this.client = new OpenAI(config); - } - - async askWithAbort(message: string): Promise { - // Create new AbortController for this request - this.abortController = new AbortController(); - - try { - const response = await this.client.chat.completions.create({ - model: "gpt-4o", - messages: [{ role: "user", content: message }], - max_completion_tokens: 1000, - }, { - signal: this.abortController.signal // Pass abort signal - }); - - return response.choices[0]?.message?.content || ""; - } catch (error) { - if (this.abortController.signal.aborted) { - throw new Error("Request was interrupted"); - } - throw error; - } finally { - this.abortController = null; - } - } - - // Call this to abort the current request - interrupt(): void { - this.abortController?.abort(); - } -} - -// Usage example -const abortableClient = new AbortableClient({ - apiKey: process.env.OPENAI_API_KEY! -}); - -// Start request -const responsePromise = abortableClient.askWithAbort("Write a long essay"); - -// Abort after 5 seconds -setTimeout(() => { - abortableClient.interrupt(); -}, 5000); - -try { - const response = await responsePromise; - console.log(response); -} catch (error) { - console.log("Request was aborted:", error.message); -} -``` - -### Aborting Streaming Requests - -```typescript -async function abortableStream(messages: any[]) { - const abortController = new AbortController(); - - // Abort after 10 seconds - const timeoutId = setTimeout(() => { - abortController.abort(); - }, 10000); - - try { - const stream = await client.chat.completions.create({ - model: "gpt-4o", - messages, - stream: true, - }, { - signal: abortController.signal - }); - - for await (const chunk of stream) { - // Check if aborted before processing each chunk - if (abortController.signal.aborted) { - break; - } - - const content = chunk.choices[0]?.delta?.content; - if (content) { - process.stdout.write(content); - } - } - } catch (error) { - if (abortController.signal.aborted) { - console.log("\nStream was aborted"); - } else { - throw error; - } - } finally { - clearTimeout(timeoutId); - } -} -``` - -## Error Handling - -### Error Types from OpenAI SDK - -```typescript -import { - OpenAIError, - APIError, - APIConnectionError, - APIConnectionTimeoutError, - APIUserAbortError, - AuthenticationError, - BadRequestError, - RateLimitError, - InternalServerError, - NotFoundError, - PermissionDeniedError, - UnprocessableEntityError -} from "openai"; - -// Comprehensive error handler -async function handleAPICall(apiCall: () => Promise): Promise { - try { - return await apiCall(); - } catch (error) { - if (error instanceof APIUserAbortError) { - console.log("Request was aborted by user"); - throw new Error("Request interrupted"); - } - - if (error instanceof AuthenticationError) { - console.error("Authentication failed:", error.message); - throw new Error("Invalid API key"); - } - - if (error instanceof RateLimitError) { - console.error("Rate limit exceeded:", error.message); - // Could implement exponential backoff here - throw new Error("Rate limited - try again later"); - } - - if (error instanceof APIConnectionError) { - console.error("Connection error:", error.message); - throw new Error("Network connection failed"); - } - - if (error instanceof APIConnectionTimeoutError) { - console.error("Request timeout:", error.message); - throw new Error("Request timed out"); - } - - if (error instanceof BadRequestError) { - console.error("Bad request:", error.message); - console.error("Error details:", error.error); - throw new Error(`Invalid request: ${error.message}`); - } - - if (error instanceof UnprocessableEntityError) { - console.error("Unprocessable entity:", error.message); - throw new Error(`Validation error: ${error.message}`); - } - - if (error instanceof APIError) { - console.error(`API Error ${error.status}:`, error.message); - console.error("Error code:", error.code); - console.error("Error type:", error.type); - throw new Error(`API error: ${error.message}`); - } - - if (error instanceof OpenAIError) { - console.error("OpenAI SDK error:", error.message); - throw new Error(`SDK error: ${error.message}`); - } - - // Unknown error - console.error("Unexpected error:", error); - throw error; - } -} - -// Usage with retry logic -async function apiCallWithRetry( - apiCall: () => Promise, - maxRetries: number = 3 -): Promise { - let lastError: Error; - - for (let attempt = 0; attempt < maxRetries; attempt++) { - try { - return await handleAPICall(apiCall); - } catch (error) { - lastError = error as Error; - - // Don't retry on certain errors - if (error instanceof AuthenticationError || - error instanceof BadRequestError || - error instanceof APIUserAbortError) { - throw error; - } - - // Exponential backoff for retryable errors - if (attempt < maxRetries - 1) { - const delay = Math.pow(2, attempt) * 1000; // 1s, 2s, 4s - await new Promise(resolve => setTimeout(resolve, delay)); - } - } - } - - throw lastError!; -} -``` - -### Error Context Extraction - -```typescript -function extractErrorDetails(error: unknown): { - message: string; - code?: string; - type?: string; - status?: number; - retryable: boolean; -} { - if (error instanceof APIError) { - return { - message: error.message, - code: error.code || undefined, - type: error.type, - status: error.status, - retryable: error instanceof RateLimitError || - error instanceof APIConnectionError || - error instanceof InternalServerError - }; - } - - if (error instanceof APIUserAbortError) { - return { - message: "Request was aborted", - retryable: false - }; - } - - if (error instanceof OpenAIError) { - return { - message: error.message, - retryable: false - }; - } - - return { - message: error instanceof Error ? error.message : "Unknown error", - retryable: false - }; -} -``` - -## Stop Reasons - -### Chat Completions Stop Reasons - -```typescript -type ChatCompletionFinishReason = - | "stop" // Natural stopping point or stop sequence - | "length" // Maximum token limit reached - | "content_filter" // Content filtered - | "tool_calls" // Model wants to call tools - | "function_call"; // Legacy function calling - -async function handleStopReasons() { - const response = await client.chat.completions.create({ - model: "gpt-4o", - messages: [{ role: "user", content: "Hello" }], - max_completion_tokens: 10, // Low limit to trigger "length" stop - stop: ["END"], // Custom stop sequence - }); - - const choice = response.choices[0]; - const finishReason = choice.finish_reason; - - switch (finishReason) { - case "stop": - console.log("Completed naturally or hit stop sequence"); - break; - - case "length": - console.log("Hit token limit - response may be incomplete"); - // Could request more tokens or continue conversation - break; - - case "content_filter": - console.log("Content was filtered"); - break; - - case "tool_calls": - console.log("Model wants to call tools"); - // Handle tool calls (see Tool Calling section) - break; - - default: - console.log("Unknown finish reason:", finishReason); - } - - return { - content: choice.message.content, - finishReason, - complete: finishReason === "stop" - }; -} -``` - -### Responses API Stop Reasons - -```typescript -// Responses API uses different event types to indicate completion -async function handleResponsesStopReasons() { - const response = await client.responses.create({ - model: "o1-mini", - input: [{ role: "user", content: [{ type: "input_text", text: "Hello" }] }], - max_output_tokens: 100, - }); - - for (const item of response.output || []) { - switch (item.type) { - case "message": - // Check for refusal in content - for (const content of item.content || []) { - if (content.type === "refusal") { - console.log("Response was refused:", content.refusal); - } else if (content.type === "output_text") { - console.log("Response completed normally"); - } - } - break; - - case "function_call": - console.log("Tool call requested"); - break; - } - } -} -``` - -### Streaming Stop Reason Detection - -```typescript -async function streamWithStopReasonHandling() { - const stream = await client.chat.completions.create({ - model: "gpt-4o", - messages: [{ role: "user", content: "Count to 10" }], - stream: true, - max_completion_tokens: 50, - }); - - let content = ""; - let finishReason: string | null = null; - - for await (const chunk of stream) { - const choice = chunk.choices[0]; - if (!choice) continue; - - if (choice.delta?.content) { - content += choice.delta.content; - process.stdout.write(choice.delta.content); - } - - if (choice.finish_reason) { - finishReason = choice.finish_reason; - break; - } - } - - console.log(`\nStreaming finished. Reason: ${finishReason}`); - - if (finishReason === "length") { - console.log("Response was cut off due to token limit"); - // Could continue the conversation to get the rest - } - - return { content, finishReason }; -} -``` - -## Message History & Serialization - -### Message Types and Formats - -```typescript -// Chat Completions message format -interface ChatMessage { - role: "system" | "user" | "assistant" | "tool" | "developer"; - content: string | null; - name?: string; - tool_calls?: Array<{ - id: string; - type: "function"; - function: { - name: string; - arguments: string; - }; - }>; - tool_call_id?: string; // For tool response messages -} - -// Responses API message format -interface ResponseMessage { - role: "user" | "developer"; - content: Array<{ - type: "input_text" | "input_image" | "input_audio"; - text?: string; - image?: { url: string }; - audio?: { data: string }; - }>; -} - -// Unified conversation history -interface ConversationHistory { - api: "completions" | "responses"; - model: string; - systemPrompt?: string; - messages: any[]; // API-specific format - totalTokens: number; - metadata: { - created: number; - lastUpdated: number; - provider: string; - }; -} -``` - -### Serialization Implementation - -```typescript -class ConversationManager { - private messages: any[] = []; - private api: "completions" | "responses"; - private systemPrompt?: string; - private totalTokens = 0; - - constructor(api: "completions" | "responses", systemPrompt?: string) { - this.api = api; - this.systemPrompt = systemPrompt; - - if (systemPrompt) { - if (api === "completions") { - this.messages.push({ role: "system", content: systemPrompt }); - } else { - this.messages.push({ role: "developer", content: systemPrompt }); - } - } - } - - addUserMessage(content: string) { - if (this.api === "completions") { - this.messages.push({ role: "user", content }); - } else { - this.messages.push({ - role: "user", - content: [{ type: "input_text", text: content }] - }); - } - } - - addAssistantMessage(content: string) { - if (this.api === "completions") { - this.messages.push({ role: "assistant", content }); - } else { - this.messages.push({ - type: "message", - content: [{ type: "output_text", text: content }] - }); - } - } - - addToolCall(id: string, name: string, args: string) { - if (this.api === "completions") { - // Add assistant message with tool calls - this.messages.push({ - role: "assistant", - content: null, - tool_calls: [{ - id, - type: "function" as const, - function: { name, arguments: args } - }] - }); - } else { - // Add function call to responses format - this.messages.push({ - type: "function_call", - call_id: id, - name, - arguments: args - }); - } - } - - addToolResult(id: string, result: string) { - if (this.api === "completions") { - this.messages.push({ - role: "tool", - tool_call_id: id, - content: result - }); - } else { - this.messages.push({ - type: "function_call_output", - call_id: id, - output: result - }); - } - } - - // Serialize to JSON - serialize(): string { - const data: ConversationHistory = { - api: this.api, - model: "unknown", // Set externally - systemPrompt: this.systemPrompt, - messages: this.messages, - totalTokens: this.totalTokens, - metadata: { - created: Date.now(), - lastUpdated: Date.now(), - provider: "openai" - } - }; - return JSON.stringify(data, null, 2); - } - - // Deserialize from JSON - static deserialize(json: string): ConversationManager { - const data: ConversationHistory = JSON.parse(json); - const manager = new ConversationManager(data.api, data.systemPrompt); - manager.messages = data.messages; - manager.totalTokens = data.totalTokens; - return manager; - } - - getMessages() { - return this.messages; - } - - updateTokenUsage(tokens: number) { - this.totalTokens += tokens; - } -} - -// Usage example -const conversation = new ConversationManager("completions", "You are a helpful assistant"); -conversation.addUserMessage("Hello"); -conversation.addAssistantMessage("Hi there!"); -conversation.updateTokenUsage(25); - -// Save to file -const serialized = conversation.serialize(); -await fs.writeFile("conversation.json", serialized); - -// Load from file -const loaded = await fs.readFile("conversation.json", "utf-8"); -const restored = ConversationManager.deserialize(loaded); -``` - -### Event-Based History Reconstruction - -```typescript -// From pi-agent codebase - reconstruct messages from events -type AgentEvent = - | { type: "user_message"; text: string } - | { type: "assistant_message"; text: string } - | { type: "tool_call"; toolCallId: string; name: string; args: string } - | { type: "tool_result"; toolCallId: string; result: string; isError: boolean } - | { type: "reasoning"; text: string } - | { type: "token_usage"; inputTokens: number; outputTokens: number; totalTokens: number }; - -function reconstructMessagesFromEvents( - events: AgentEvent[], - api: "completions" | "responses", - systemPrompt?: string -): any[] { - const messages: any[] = []; - - // Add system prompt - if (systemPrompt) { - if (api === "completions") { - messages.push({ role: "system", content: systemPrompt }); - } else { - messages.push({ role: "developer", content: systemPrompt }); - } - } - - if (api === "responses") { - // Responses API format reconstruction - for (const event of events) { - switch (event.type) { - case "user_message": - messages.push({ - role: "user", - content: [{ type: "input_text", text: event.text }] - }); - break; - - case "reasoning": - messages.push({ - type: "reasoning", - content: [{ type: "reasoning_text", text: event.text }] - }); - break; - - case "tool_call": - messages.push({ - type: "function_call", - call_id: event.toolCallId, - name: event.name, - arguments: event.args - }); - break; - - case "tool_result": - messages.push({ - type: "function_call_output", - call_id: event.toolCallId, - output: event.result - }); - break; - - case "assistant_message": - messages.push({ - type: "message", - content: [{ type: "output_text", text: event.text }] - }); - break; - } - } - } else { - // Chat Completions format reconstruction - let pendingToolCalls: any[] = []; - - for (const event of events) { - switch (event.type) { - case "user_message": - messages.push({ role: "user", content: event.text }); - break; - - case "tool_call": - pendingToolCalls.push({ - id: event.toolCallId, - type: "function", - function: { - name: event.name, - arguments: event.args - } - }); - break; - - case "tool_result": - // Add assistant message with tool calls when we see first result - if (pendingToolCalls.length > 0) { - messages.push({ - role: "assistant", - content: null, - tool_calls: pendingToolCalls - }); - pendingToolCalls = []; - } - - messages.push({ - role: "tool", - tool_call_id: event.toolCallId, - content: event.result - }); - break; - - case "assistant_message": - messages.push({ role: "assistant", content: event.text }); - break; - } - } - } - - return messages; -} -``` - -## Token Counting - -### Usage Types from OpenAI SDK - -```typescript -// Chat Completions usage -interface CompletionUsage { - completion_tokens: number; - prompt_tokens: number; - total_tokens: number; - completion_tokens_details?: { - reasoning_tokens?: number; // o1/o3 reasoning tokens - cached_tokens?: number; - }; - prompt_tokens_details?: { - cached_tokens?: number; - }; -} - -// Responses API usage -interface ResponseUsage { - input_tokens: number; - output_tokens: number; - total_tokens: number; - input_tokens_details: { - cached_tokens?: number; - }; - output_tokens_details: { - reasoning_tokens?: number; // o1/o3 reasoning tokens - }; -} -``` - -### Token Counting Implementation - -```typescript -interface TokenUsage { - inputTokens: number; - outputTokens: number; - totalTokens: number; - reasoningTokens: number; - cacheReadTokens: number; - cacheWriteTokens: number; -} - -class TokenCounter { - private totalUsage: TokenUsage = { - inputTokens: 0, - outputTokens: 0, - totalTokens: 0, - reasoningTokens: 0, - cacheReadTokens: 0, - cacheWriteTokens: 0 - }; - - // Extract tokens from Chat Completions response - extractChatCompletionUsage(usage?: CompletionUsage): TokenUsage | null { - if (!usage) return null; - - const extracted: TokenUsage = { - inputTokens: usage.prompt_tokens || 0, - outputTokens: usage.completion_tokens || 0, - totalTokens: usage.total_tokens || 0, - reasoningTokens: usage.completion_tokens_details?.reasoning_tokens || 0, - cacheReadTokens: usage.prompt_tokens_details?.cached_tokens || 0, - cacheWriteTokens: 0 // Not available in this format - }; - - this.addUsage(extracted); - return extracted; - } - - // Extract tokens from Responses API response - extractResponseUsage(usage?: ResponseUsage): TokenUsage | null { - if (!usage) return null; - - const extracted: TokenUsage = { - inputTokens: usage.input_tokens || 0, - outputTokens: usage.output_tokens || 0, - totalTokens: usage.total_tokens || 0, - reasoningTokens: usage.output_tokens_details?.reasoning_tokens || 0, - cacheReadTokens: usage.input_tokens_details?.cached_tokens || 0, - cacheWriteTokens: 0 // Not available in current API - }; - - this.addUsage(extracted); - return extracted; - } - - private addUsage(usage: TokenUsage) { - this.totalUsage.inputTokens += usage.inputTokens; - this.totalUsage.outputTokens += usage.outputTokens; - this.totalUsage.totalTokens += usage.totalTokens; - this.totalUsage.reasoningTokens += usage.reasoningTokens; - this.totalUsage.cacheReadTokens += usage.cacheReadTokens; - this.totalUsage.cacheWriteTokens += usage.cacheWriteTokens; - } - - getTotalUsage(): TokenUsage { - return { ...this.totalUsage }; - } - - reset() { - this.totalUsage = { - inputTokens: 0, - outputTokens: 0, - totalTokens: 0, - reasoningTokens: 0, - cacheReadTokens: 0, - cacheWriteTokens: 0 - }; - } - - // Format for display - formatUsage(usage?: TokenUsage): string { - const u = usage || this.totalUsage; - let parts = [`↑${u.inputTokens}`, `↓${u.outputTokens}`]; - - if (u.reasoningTokens > 0) { - parts.push(`⚡${u.reasoningTokens}`); - } - - if (u.cacheReadTokens > 0) { - parts.push(`📖${u.cacheReadTokens}`); - } - - if (u.cacheWriteTokens > 0) { - parts.push(`📝${u.cacheWriteTokens}`); - } - - return parts.join(" "); - } -} - -// Usage with streaming -async function countTokensInStream() { - const tokenCounter = new TokenCounter(); - - const stream = await client.chat.completions.create({ - model: "gpt-4o", - messages: [{ role: "user", content: "Tell me about AI" }], - stream: true, - stream_options: { include_usage: true } // Important for token counts - }); - - for await (const chunk of stream) { - // Token usage comes in final chunk when stream_options.include_usage = true - if (chunk.usage) { - const usage = tokenCounter.extractChatCompletionUsage(chunk.usage); - console.log("Token usage:", tokenCounter.formatUsage(usage)); - } - } - - console.log("Total usage:", tokenCounter.formatUsage()); -} -``` - -### Token Estimation (for planning) - -```typescript -// Rough token estimation for planning purposes -function estimateTokens(text: string): number { - // Very rough approximation: ~4 characters per token for English - return Math.ceil(text.length / 4); -} - -function estimateMessageTokens(messages: any[]): number { - let total = 0; - - for (const message of messages) { - if (typeof message.content === "string") { - total += estimateTokens(message.content); - } else if (Array.isArray(message.content)) { - for (const content of message.content) { - if (content.text) { - total += estimateTokens(content.text); - } - } - } - - // Add overhead for message formatting - total += 10; - } - - return total; -} - -// Check if request will fit in context window -function checkContextLimit(messages: any[], maxTokens: number = 128000): boolean { - const estimated = estimateMessageTokens(messages); - const safetyMargin = 1000; // Reserve tokens for response - - return estimated + safetyMargin < maxTokens; -} -``` - -## Caching - -### Cache Headers and Configuration - -```typescript -// OpenAI supports prompt caching via special message formatting -// Cache is automatically used when messages are repeated - -async function demonstrateCaching() { - const longSystemPrompt = ` - You are an expert software engineer with deep knowledge of TypeScript, React, Node.js... - [Very long system prompt - 1000+ tokens] - `; - - // First request - will cache the system prompt - const response1 = await client.chat.completions.create({ - model: "gpt-4o", - messages: [ - { role: "system", content: longSystemPrompt }, - { role: "user", content: "Explain TypeScript generics" } - ] - }); - - console.log("First request usage:", response1.usage); - - // Second request with same system prompt - will use cache - const response2 = await client.chat.completions.create({ - model: "gpt-4o", - messages: [ - { role: "system", content: longSystemPrompt }, // Cached - { role: "user", content: "Explain React hooks" } - ] - }); - - console.log("Second request usage:", response2.usage); - console.log("Cache read tokens:", response2.usage?.prompt_tokens_details?.cached_tokens); -} -``` - -### Manual Cache Control - -```typescript -// For providers that support explicit cache control -interface CacheConfig { - enabled: boolean; - ttl?: number; // Time to live in seconds -} - -class CachedClient { - private client: OpenAI; - private cache = new Map(); - - constructor(apiKey: string, baseURL?: string) { - this.client = new OpenAI({ apiKey, baseURL }); - } - - private getCacheKey(messages: any[], model: string): string { - return JSON.stringify({ messages, model }); - } - - private isCacheValid(entry: { timestamp: number; ttl: number }): boolean { - return Date.now() - entry.timestamp < entry.ttl * 1000; - } - - async completionWithCache( - messages: any[], - model: string, - cacheConfig: CacheConfig = { enabled: true, ttl: 3600 } - ) { - if (cacheConfig.enabled) { - const cacheKey = this.getCacheKey(messages, model); - const cached = this.cache.get(cacheKey); - - if (cached && this.isCacheValid(cached)) { - console.log("Cache hit"); - return cached.response; - } - } - - const response = await this.client.chat.completions.create({ - model, - messages - }); - - if (cacheConfig.enabled) { - const cacheKey = this.getCacheKey(messages, model); - this.cache.set(cacheKey, { - response, - timestamp: Date.now(), - ttl: cacheConfig.ttl || 3600 - }); - } - - return response; - } - - clearCache() { - this.cache.clear(); - } -} -``` - -## Chat Completions vs Responses API - -### When to Use Each API - -```typescript -// Chat Completions API - Traditional conversational interface -// Use for: Most general chat/completion tasks -interface ChatCompletionsUseCase { - // ✅ Good for: - // - Regular conversations - // - Function/tool calling - // - Most models (gpt-4o, claude, gemini via compatibility) - // - Streaming text generation - // - File uploads and vision - - // ❌ Limitations: - // - No access to reasoning/thinking tokens for o1/o3 - // - Less structured for complex workflows -} - -// Responses API - Structured response interface -// Use for: Complex reasoning tasks, tool workflows -interface ResponsesAPIUseCase { - // ✅ Good for: - // - o1/o3 models with reasoning access - // - Complex tool calling workflows - // - Structured output requirements - // - Background processing - // - Access to reasoning tokens - - // ❌ Limitations: - // - Newer API with less ecosystem support - // - More complex message format - // - Not all models supported -} -``` - -### API Decision Logic - -```typescript -function selectAPI( - model: string, - requiresReasoning: boolean, - hasComplexTools: boolean -): "completions" | "responses" { - // Use Responses API for o1/o3 when reasoning is needed - if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) { - return "responses"; - } - - // Use Responses API for complex tool workflows - if (hasComplexTools && model.includes("gpt-4")) { - return "responses"; - } - - // Default to Chat Completions for broader compatibility - return "completions"; -} - -// Usage example -const model = "o1-mini"; -const needsReasoning = true; -const api = selectAPI(model, needsReasoning, false); - -if (api === "responses") { - console.log("Using Responses API for reasoning access"); -} else { - console.log("Using Chat Completions API for compatibility"); -} -``` - -### Dual API Client - -```typescript -class DualAPIClient { - private client: OpenAI; - - constructor(apiKey: string, baseURL?: string) { - this.client = new OpenAI({ apiKey, baseURL }); - } - - async complete(params: { - model: string; - messages: any[]; - tools?: any[]; - maxTokens?: number; - temperature?: number; - stream?: boolean; - reasoning?: boolean; - }) { - const api = this.selectAPI(params.model, params.reasoning || false); - - if (api === "responses") { - return this.callResponsesAPI(params); - } else { - return this.callChatCompletionsAPI(params); - } - } - - private selectAPI(model: string, requiresReasoning: boolean): "completions" | "responses" { - if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) { - return "responses"; - } - return "completions"; - } - - private async callChatCompletionsAPI(params: any) { - const requestParams = { - model: params.model, - messages: params.messages, - max_completion_tokens: params.maxTokens, - temperature: params.temperature, - tools: params.tools, - stream: params.stream - }; - - if (params.stream) { - return this.client.chat.completions.create(requestParams); - } else { - return this.client.chat.completions.create(requestParams); - } - } - - private async callResponsesAPI(params: any) { - // Convert messages to Responses API format - const input = params.messages.map((msg: any) => { - if (msg.role === "user") { - return { - role: "user", - content: [{ type: "input_text", text: msg.content }] - }; - } else if (msg.role === "system") { - return { - role: "developer", - content: msg.content - }; - } - return msg; - }); - - const requestParams = { - model: params.model, - input, - max_output_tokens: params.maxTokens, - tools: params.tools, - stream: params.stream, - reasoning: params.reasoning ? { effort: "low" } : undefined - }; - - return this.client.responses.create(requestParams); - } -} -``` - -## Tool/Function Calling - -### Tool Definition Format - -```typescript -// OpenAI tool definition format (JSON Schema) -interface ToolDefinition { - type: "function"; - function: { - name: string; - description: string; - parameters: { - type: "object"; - properties: Record; - required: string[]; - }; - }; -} - -// Example tool definitions -const tools: ToolDefinition[] = [ - { - type: "function", - function: { - name: "read_file", - description: "Read the contents of a file", - parameters: { - type: "object", - properties: { - path: { - type: "string", - description: "The file path to read" - } - }, - required: ["path"] - } - } - }, - { - type: "function", - function: { - name: "execute_command", - description: "Execute a shell command", - parameters: { - type: "object", - properties: { - command: { - type: "string", - description: "The command to execute" - }, - timeout: { - type: "number", - description: "Timeout in seconds", - default: 30 - } - }, - required: ["command"] - } - } - } -]; -``` - -### Tool Execution Engine - -```typescript -type ToolFunction = (args: any) => Promise; - -class ToolExecutor { - private tools = new Map(); - - register(name: string, fn: ToolFunction) { - this.tools.set(name, fn); - } - - async execute(name: string, argsJson: string): Promise { - const tool = this.tools.get(name); - if (!tool) { - throw new Error(`Unknown tool: ${name}`); - } - - try { - const args = JSON.parse(argsJson); - return await tool(args); - } catch (error) { - throw new Error(`Tool execution failed: ${error.message}`); - } - } - - getAvailableTools(): string[] { - return Array.from(this.tools.keys()); - } -} - -// Register tool implementations -const toolExecutor = new ToolExecutor(); - -toolExecutor.register("read_file", async (args: { path: string }) => { - const fs = await import("fs/promises"); - try { - const content = await fs.readFile(args.path, "utf-8"); - return content; - } catch (error) { - return `Error reading file: ${error.message}`; - } -}); - -toolExecutor.register("execute_command", async (args: { command: string; timeout?: number }) => { - const { exec } = await import("child_process"); - const { promisify } = await import("util"); - const execAsync = promisify(exec); - - try { - const { stdout, stderr } = await execAsync(args.command, { - timeout: (args.timeout || 30) * 1000 - }); - return stdout + (stderr ? `\nSTDERR: ${stderr}` : ""); - } catch (error) { - return `Command failed: ${error.message}`; - } -}); -``` - -### Complete Tool Calling Flow - -```typescript -async function completeChatWithTools(userMessage: string) { - const conversation = new ConversationManager("completions", "You are a helpful assistant with file system access."); - const tokenCounter = new TokenCounter(); - - conversation.addUserMessage(userMessage); - - while (true) { - const response = await client.chat.completions.create({ - model: "gpt-4o", - messages: conversation.getMessages(), - tools, - tool_choice: "auto", - max_completion_tokens: 1000 - }); - - // Track token usage - if (response.usage) { - tokenCounter.extractChatCompletionUsage(response.usage); - } - - const message = response.choices[0].message; - - if (message.tool_calls && message.tool_calls.length > 0) { - // Add assistant message with tool calls to conversation - conversation.getMessages().push({ - role: "assistant", - content: message.content, - tool_calls: message.tool_calls - }); - - // Execute each tool call - for (const toolCall of message.tool_calls) { - console.log(`🔧 Calling ${toolCall.function.name}...`); - - try { - const result = await toolExecutor.execute( - toolCall.function.name, - toolCall.function.arguments - ); - - console.log(`✅ Tool result: ${result.substring(0, 100)}...`); - conversation.addToolResult(toolCall.id, result); - - } catch (error) { - console.log(`❌ Tool error: ${error.message}`); - conversation.addToolResult(toolCall.id, `Error: ${error.message}`); - } - } - - // Continue conversation with tool results - continue; - } else { - // Final response - const content = message.content || ""; - conversation.addAssistantMessage(content); - - console.log("🤖 Assistant:", content); - console.log("📊 Token usage:", tokenCounter.formatUsage()); - - return content; - } - } -} - -// Usage -await completeChatWithTools("Read the package.json file and tell me about this project"); -``` - -### Streaming Tool Calls - -```typescript -async function streamingToolCalls(userMessage: string) { - const stream = await client.chat.completions.create({ - model: "gpt-4o", - messages: [{ role: "user", content: userMessage }], - tools, - tool_choice: "auto", - stream: true - }); - - let currentToolCalls: Map = new Map(); - let assistantMessage = ""; - - for await (const chunk of stream) { - const choice = chunk.choices[0]; - if (!choice) continue; - - const delta = choice.delta; - - // Regular content - if (delta.content) { - assistantMessage += delta.content; - process.stdout.write(delta.content); - } - - // Tool call deltas - if (delta.tool_calls) { - for (const toolCallDelta of delta.tool_calls) { - const id = toolCallDelta.id; - if (!id) continue; - - if (!currentToolCalls.has(id)) { - currentToolCalls.set(id, { name: "", args: "" }); - } - - const toolCall = currentToolCalls.get(id)!; - - if (toolCallDelta.function?.name) { - toolCall.name += toolCallDelta.function.name; - } - - if (toolCallDelta.function?.arguments) { - toolCall.args += toolCallDelta.function.arguments; - } - } - } - - // When finished, execute accumulated tool calls - if (choice.finish_reason === "tool_calls") { - console.log("\n🔧 Executing tools..."); - - for (const [id, toolCall] of currentToolCalls) { - try { - const result = await toolExecutor.execute(toolCall.name, toolCall.args); - console.log(`✅ ${toolCall.name}: ${result.substring(0, 100)}...`); - } catch (error) { - console.log(`❌ ${toolCall.name}: ${error.message}`); - } - } - - break; - } - } -} -``` - -### Responses API Tool Calling - -```typescript -async function responsesAPIToolCalling() { - const response = await client.responses.create({ - model: "gpt-4o", - input: [ - { - role: "user", - content: [{ type: "input_text", text: "List files in current directory" }] - } - ], - tools: [ - { - type: "function", - function: { - name: "list_directory", - description: "List files in a directory", - parameters: { - type: "object", - properties: { - path: { type: "string", description: "Directory path" } - }, - required: ["path"] - } - } - } - ] - }); - - for (const item of response.output || []) { - switch (item.type) { - case "function_call": - console.log(`🔧 Tool call: ${item.name}`); - console.log(`📝 Arguments: ${item.arguments}`); - - try { - const result = await toolExecutor.execute(item.name, item.arguments); - console.log(`✅ Result: ${result}`); - - // In a real implementation, you'd add this result back to the conversation - // and continue the response - } catch (error) { - console.log(`❌ Error: ${error.message}`); - } - break; - - case "message": - for (const content of item.content || []) { - if (content.type === "output_text") { - console.log("🤖 Response:", content.text); - } - } - break; - } - } -} -``` - -## System Prompts - -### System Prompt Handling by Model Type - -```typescript -interface SystemPromptConfig { - content: string; - role: "system" | "developer"; // Different models use different roles -} - -function formatSystemPrompt(prompt: string, model: string, api: "completions" | "responses"): any { - // Chat Completions API - if (api === "completions") { - // Most models use "system" role - if (model.includes("claude") || model.includes("gemini")) { - // Some providers via OpenAI compatibility might expect "system" - return { role: "system", content: prompt }; - } - - // OpenAI native models - return { role: "system", content: prompt }; - } - - // Responses API uses "developer" role for system messages - return { role: "developer", content: prompt }; -} - -// System prompt best practices -const systemPrompts = { - // General assistant - assistant: "You are a helpful, accurate, and reliable AI assistant. Provide clear, concise, and helpful responses.", - - // Code assistant - coder: `You are an expert software engineer with deep knowledge of multiple programming languages, frameworks, and best practices. - -Key principles: -- Write clean, maintainable, and well-documented code -- Follow language-specific conventions and best practices -- Explain your reasoning and trade-offs -- Suggest improvements and alternatives when appropriate -- Always test your code mentally before providing it - -When helping with code: -1. Understand the requirements fully -2. Choose appropriate tools and patterns -3. Provide working, tested solutions -4. Explain key concepts and decisions`, - - // Research assistant - researcher: `You are a thorough research assistant. When answering questions: - -1. Provide accurate, well-sourced information -2. Acknowledge limitations in your knowledge -3. Structure responses clearly with headings and bullet points -4. Cite sources when possible -5. Distinguish between facts, analysis, and opinions -6. Ask clarifying questions when the request is ambiguous`, - - // Tool-enabled assistant - toolEnabled: `You are an AI assistant with access to various tools for file operations, web searches, and code execution. - -Guidelines for tool use: -- Use tools when they would be helpful to answer the user's question -- Always explain what you're doing before calling a tool -- Interpret and summarize tool results for the user -- If a tool fails, try alternative approaches -- Be transparent about what information comes from tools vs your training - -Available capabilities: -- Read and write files -- Execute shell commands -- Search the web -- Analyze code and data` -}; -``` - -### Dynamic System Prompt Building - -```typescript -class SystemPromptBuilder { - private sections: string[] = []; - - addRole(role: string): this { - this.sections.push(`You are ${role}.`); - return this; - } - - addCapabilities(capabilities: string[]): this { - if (capabilities.length > 0) { - this.sections.push(`You have access to: ${capabilities.join(", ")}.`); - } - return this; - } - - addGuidelines(guidelines: string[]): this { - if (guidelines.length > 0) { - this.sections.push("Guidelines:\n" + guidelines.map(g => `- ${g}`).join("\n")); - } - return this; - } - - addContext(context: string): this { - if (context.trim()) { - this.sections.push(`Context: ${context}`); - } - return this; - } - - build(): string { - return this.sections.join("\n\n"); - } - - reset(): this { - this.sections = []; - return this; - } -} - -// Usage examples -const codeAssistantPrompt = new SystemPromptBuilder() - .addRole("an expert TypeScript developer") - .addCapabilities(["file system access", "code execution", "documentation lookup"]) - .addGuidelines([ - "Write clean, type-safe code", - "Explain complex concepts clearly", - "Suggest best practices", - "Test code before providing it" - ]) - .build(); - -const customerServicePrompt = new SystemPromptBuilder() - .addRole("a helpful customer service representative") - .addGuidelines([ - "Be polite and professional", - "Listen carefully to customer concerns", - "Provide accurate information", - "Escalate complex issues when needed" - ]) - .addContext("You work for TechCorp, a software company that makes productivity tools.") - .build(); -``` - -### Model-Specific System Prompt Optimization - -```typescript -function optimizeSystemPromptForModel(basePrompt: string, model: string): string { - // OpenAI models (especially o1/o3) work well with detailed, structured prompts - if (model.includes("gpt") || model.includes("o1") || model.includes("o3")) { - return `${basePrompt} - -Think step by step when solving complex problems. Show your reasoning process clearly.`; - } - - // Claude models prefer more conversational, principle-based prompts - if (model.includes("claude")) { - return `${basePrompt} - -I value helpful, harmless, and honest responses. Please be thoughtful and thorough in your analysis.`; - } - - // Gemini models work well with structured instructions - if (model.includes("gemini")) { - return `${basePrompt} - -Please structure your responses clearly and provide specific, actionable advice.`; - } - - // Default: return as-is - return basePrompt; -} - -// Provider-specific prompt injection handling -function detectAndMitigatePromptInjection(userInput: string): { safe: boolean; cleaned?: string } { - const injectionPatterns = [ - /ignore.*previous.*instruction/i, - /forget.*system.*prompt/i, - /act.*as.*different/i, - /pretend.*you.*are/i, - /new.*role.*now/i - ]; - - for (const pattern of injectionPatterns) { - if (pattern.test(userInput)) { - return { - safe: false, - cleaned: userInput.replace(pattern, "[FILTERED]") - }; - } - } - - return { safe: true }; -} -``` - -## Provider-Specific Features - -### Reasoning Support Detection - -```typescript -// From pi-agent codebase - detect and handle reasoning support per provider -type Provider = "openai" | "gemini" | "groq" | "anthropic" | "openrouter" | "other"; - -function detectProvider(baseURL?: string): Provider { - if (!baseURL) return "openai"; - if (baseURL.includes("api.openai.com")) return "openai"; - if (baseURL.includes("generativelanguage.googleapis.com")) return "gemini"; - if (baseURL.includes("api.groq.com")) return "groq"; - if (baseURL.includes("api.anthropic.com")) return "anthropic"; - if (baseURL.includes("openrouter.ai")) return "openrouter"; - return "other"; -} - -// Provider-specific reasoning parameter handling -function adjustRequestForReasoning( - requestOptions: any, - api: "completions" | "responses", - provider: Provider, - supportsReasoning: boolean -): any { - if (!supportsReasoning) return requestOptions; - - switch (provider) { - case "openai": - // OpenAI standard format - if (api === "responses") { - requestOptions.reasoning = { - effort: "low", - summary: "detailed" - }; - } else { - requestOptions.reasoning_effort = "low"; - } - break; - - case "gemini": - // Gemini uses extra_body for thinking configuration - if (api === "completions") { - requestOptions.extra_body = { - google: { - thinking_config: { - thinking_budget: 1024, - include_thoughts: true - } - } - }; - // Remove reasoning_effort when using thinking_config - delete requestOptions.reasoning_effort; - } - break; - - case "groq": - // Groq uses reasoning_format for Chat Completions - if (api === "completions") { - requestOptions.reasoning_format = "parsed"; - requestOptions.reasoning_effort = "low"; - } else { - // Groq Responses API doesn't support reasoning.summary - requestOptions.reasoning = { effort: "low" }; - } - break; - - case "openrouter": - // OpenRouter unified reasoning format - if (api === "completions") { - requestOptions.reasoning = { effort: "low" }; - delete requestOptions.reasoning_effort; - } - break; - - default: - // Standard OpenAI format for others - if (api === "responses") { - requestOptions.reasoning = { effort: "low" }; - } else { - requestOptions.reasoning_effort = "low"; - } - } - - return requestOptions; -} -``` - -### Provider-Specific Response Parsing - -```typescript -// Extract reasoning content from provider-specific response formats -function parseReasoningFromMessage(message: any, provider: Provider): { - cleanContent: string; - reasoningTexts: string[]; -} { - const reasoningTexts: string[] = []; - let cleanContent = message.content || ""; - - switch (provider) { - case "gemini": - // Gemini returns thinking in tags - if (cleanContent.includes("")) { - const thoughtMatches = cleanContent.matchAll(/([\s\S]*?)<\/thought>/g); - for (const match of thoughtMatches) { - reasoningTexts.push(match[1].trim()); - } - // Remove thought tags from response - cleanContent = cleanContent.replace(/[\s\S]*?<\/thought>/g, "").trim(); - } - break; - - case "groq": - // Groq returns reasoning in separate field - if (message.reasoning) { - reasoningTexts.push(message.reasoning); - } - break; - - case "openrouter": - // OpenRouter uses message.reasoning field - if (message.reasoning) { - reasoningTexts.push(message.reasoning); - } - break; - - default: - // OpenAI and others handle reasoning via events - break; - } - - return { cleanContent, reasoningTexts }; -} -``` - -### Provider-Specific Error Handling - -```typescript -function handleProviderSpecificErrors(error: any, provider: Provider): Error { - switch (provider) { - case "groq": - if (error.message?.includes("reasoning_format")) { - return new Error("Reasoning not supported by this Groq model"); - } - break; - - case "gemini": - if (error.message?.includes("thinking_config")) { - return new Error("Thinking mode not supported by this Gemini model"); - } - break; - - case "anthropic": - if (error.message?.includes("reasoning")) { - return new Error("Reasoning not available via Anthropic's OpenAI compatibility layer"); - } - break; - - case "openrouter": - // OpenRouter passes through underlying provider errors - if (error.message?.includes("not supported")) { - return new Error("Feature not supported by the selected model on OpenRouter"); - } - break; - } - - return error; -} -``` - -## Complete Implementation Examples - -### Basic Chat Client - -```typescript -import OpenAI from "openai"; -import type { ChatCompletionMessageParam } from "openai/resources/chat/completions"; - -class BasicChatClient { - private client: OpenAI; - private messages: ChatCompletionMessageParam[] = []; - - constructor(apiKey: string, baseURL?: string, systemPrompt?: string) { - this.client = new OpenAI({ apiKey, baseURL }); - - if (systemPrompt) { - this.messages.push({ role: "system", content: systemPrompt }); - } - } - - async chat(userMessage: string): Promise { - this.messages.push({ role: "user", content: userMessage }); - - try { - const response = await this.client.chat.completions.create({ - model: "gpt-4o", - messages: this.messages, - max_completion_tokens: 1000, - temperature: 0.7 - }); - - const assistantMessage = response.choices[0]?.message?.content || ""; - this.messages.push({ role: "assistant", content: assistantMessage }); - - return assistantMessage; - } catch (error) { - console.error("Chat error:", error); - throw error; - } - } - - getHistory(): ChatCompletionMessageParam[] { - return [...this.messages]; - } - - clearHistory(): void { - this.messages = this.messages.filter(m => m.role === "system"); - } -} -``` - -### Advanced Streaming Client with All Features - -```typescript -import OpenAI from "openai"; -import type { - ChatCompletionCreateParamsStreaming, - ChatCompletionChunk -} from "openai/resources/chat/completions"; - -interface StreamingClientConfig { - apiKey: string; - baseURL?: string; - model: string; - systemPrompt?: string; - tools?: any[]; - maxTokens?: number; - temperature?: number; -} - -interface StreamEvent { - type: "content" | "tool_call" | "reasoning" | "usage" | "error" | "complete"; - data: any; -} - -class AdvancedStreamingClient { - private client: OpenAI; - private config: StreamingClientConfig; - private messages: any[] = []; - private abortController: AbortController | null = null; - private tokenCounter = new TokenCounter(); - - constructor(config: StreamingClientConfig) { - this.config = config; - this.client = new OpenAI({ - apiKey: config.apiKey, - baseURL: config.baseURL - }); - - if (config.systemPrompt) { - this.messages.push({ role: "system", content: config.systemPrompt }); - } - } - - async *streamChat(userMessage: string): AsyncGenerator { - this.messages.push({ role: "user", content: userMessage }); - this.abortController = new AbortController(); - - try { - const params: ChatCompletionCreateParamsStreaming = { - model: this.config.model, - messages: this.messages, - stream: true, - max_completion_tokens: this.config.maxTokens || 1000, - temperature: this.config.temperature || 0.7, - tools: this.config.tools, - tool_choice: this.config.tools ? "auto" : undefined, - stream_options: { include_usage: true } - }; - - const stream = await this.client.chat.completions.create(params, { - signal: this.abortController.signal - }); - - let assistantContent = ""; - let currentToolCalls = new Map(); - - for await (const chunk of stream) { - if (this.abortController.signal.aborted) break; - - const choice = chunk.choices[0]; - if (!choice) continue; - - // Handle content - if (choice.delta?.content) { - assistantContent += choice.delta.content; - yield { - type: "content", - data: { delta: choice.delta.content, content: assistantContent } - }; - } - - // Handle tool calls - if (choice.delta?.tool_calls) { - for (const toolCall of choice.delta.tool_calls) { - if (!toolCall.id) continue; - - if (!currentToolCalls.has(toolCall.id)) { - currentToolCalls.set(toolCall.id, { - id: toolCall.id, - name: "", - arguments: "" - }); - } - - const call = currentToolCalls.get(toolCall.id); - if (toolCall.function?.name) { - call.name += toolCall.function.name; - } - if (toolCall.function?.arguments) { - call.arguments += toolCall.function.arguments; - } - - yield { - type: "tool_call", - data: { id: toolCall.id, delta: toolCall, current: call } - }; - } - } - - // Handle usage - if (chunk.usage) { - const usage = this.tokenCounter.extractChatCompletionUsage(chunk.usage); - yield { - type: "usage", - data: usage - }; - } - - // Handle completion - if (choice.finish_reason) { - if (choice.finish_reason === "tool_calls") { - // Execute tool calls - const toolResults = await this.executeToolCalls(Array.from(currentToolCalls.values())); - - // Add messages and continue - this.messages.push({ - role: "assistant", - content: assistantContent || null, - tool_calls: Array.from(currentToolCalls.values()).map(call => ({ - id: call.id, - type: "function", - function: { - name: call.name, - arguments: call.arguments - } - })) - }); - - for (const result of toolResults) { - this.messages.push({ - role: "tool", - tool_call_id: result.id, - content: result.content - }); - } - - // Continue stream for final response - yield* this.streamChat(""); - return; - } else { - // Regular completion - if (assistantContent) { - this.messages.push({ role: "assistant", content: assistantContent }); - } - - yield { - type: "complete", - data: { reason: choice.finish_reason, content: assistantContent } - }; - } - } - } - } catch (error) { - yield { - type: "error", - data: { error: error.message } - }; - } finally { - this.abortController = null; - } - } - - private async executeToolCalls(toolCalls: any[]): Promise> { - const results = []; - - for (const call of toolCalls) { - try { - // Tool execution would be implemented here - const result = await this.executeTool(call.name, call.arguments); - results.push({ id: call.id, content: result }); - } catch (error) { - results.push({ id: call.id, content: `Error: ${error.message}` }); - } - } - - return results; - } - - private async executeTool(name: string, argsJson: string): Promise { - // Implement tool execution logic - return `Tool ${name} executed with args: ${argsJson}`; - } - - interrupt(): void { - this.abortController?.abort(); - } - - getUsage() { - return this.tokenCounter.getTotalUsage(); - } -} - -// Usage example -const client = new AdvancedStreamingClient({ - apiKey: process.env.OPENAI_API_KEY!, - model: "gpt-4o", - systemPrompt: "You are a helpful assistant.", - tools: [/* tool definitions */] -}); - -for await (const event of client.streamChat("Help me write a TypeScript function")) { - switch (event.type) { - case "content": - process.stdout.write(event.data.delta); - break; - case "tool_call": - console.log(`\n🔧 Tool: ${event.data.current.name}`); - break; - case "usage": - console.log(`\n📊 Tokens: ${event.data.totalTokens}`); - break; - case "complete": - console.log(`\n✅ Complete (${event.data.reason})`); - break; - case "error": - console.log(`\n❌ Error: ${event.data.error}`); - break; - } -} -``` - -This comprehensive guide covers all the essential features needed to implement a robust OpenAI SDK integration. Each section provides working code examples, actual types from the SDK, and real-world patterns from the pi-mono codebase. - -## Key Takeaways - -1. **Always use AbortController** for request cancellation -2. **Handle both Chat Completions and Responses APIs** depending on model capabilities -3. **Implement comprehensive error handling** with proper error types -4. **Track token usage** for cost management and optimization -5. **Support streaming** for better user experience -6. **Handle provider-specific features** like reasoning and caching -7. **Implement proper tool calling workflows** for agentic applications -8. **Serialize conversation state** for session persistence -9. **Use appropriate system prompts** for different model types -10. **Test reasoning support** dynamically for each provider/model combination \ No newline at end of file diff --git a/packages/ai/docs/plan.md b/packages/ai/docs/plan.md deleted file mode 100644 index d09177ac..00000000 --- a/packages/ai/docs/plan.md +++ /dev/null @@ -1,950 +0,0 @@ -# Unified AI API Design Plan - -Based on comprehensive investigation of OpenAI, Anthropic, and Gemini SDKs with actual implementation examples. - -## Key API Differences Summary - -### OpenAI -- **Dual APIs**: Chat Completions (broad support) vs Responses API (o1/o3 thinking content) -- **Thinking**: Only Responses API gives actual content, Chat Completions only gives counts -- **Roles**: `system`, `user`, `assistant`, `tool` (o1/o3 use `developer` instead of `system`) -- **Streaming**: Deltas in chunks with `stream_options.include_usage` for token usage - -### Anthropic -- **Single API**: Messages API with comprehensive streaming -- **Content Blocks**: Always arrays, even for simple text -- **System**: Separate parameter, not in messages array -- **Tool Use**: Content blocks, not separate message role -- **Thinking**: Explicit budget allocation, appears as content blocks -- **Caching**: Per-block cache control with TTL options - -### Gemini -- **Parts System**: All content split into typed parts -- **System**: Separate `systemInstruction` parameter -- **Roles**: Uses `model` instead of `assistant` -- **Thinking**: `part.thought: true` flag identifies reasoning -- **Streaming**: Returns complete responses, not deltas -- **Function Calls**: Embedded in parts array - -## Unified API Design - -### Core Client - -```typescript -interface AIConfig { - provider: 'openai' | 'anthropic' | 'gemini'; - apiKey: string; - model: string; - baseURL?: string; // For OpenAI-compatible endpoints -} - -interface ModelInfo { - id: string; - name: string; - provider: string; - capabilities: { - reasoning: boolean; - toolCall: boolean; - vision: boolean; - audio?: boolean; - }; - cost: { - input: number; // per million tokens - output: number; // per million tokens - cacheRead?: number; - cacheWrite?: number; - }; - limits: { - context: number; - output: number; - }; - knowledge?: string; // Knowledge cutoff date -} - -class AI { - constructor(config: AIConfig); - - // Main streaming interface - everything else builds on this - async *stream(request: Request): AsyncGenerator; - - // Convenience method for non-streaming - async complete(request: Request): Promise; - - // Get model information - getModelInfo(): ModelInfo; - - // Abort current request - abort(): void; -} -``` - -### Message Format - -```typescript -type Message = - | { - role: 'user'; - content: string | Content[]; - } - | { - role: 'assistant'; - content: string | Content[]; - model: string; - usage: TokenUsage; - toolCalls?: { - id: string; - name: string; - arguments: Record; - }[]; - } - | { - role: 'tool'; - content: string | Content[]; - toolCallId: string; - }; - -interface Content { - type: 'text' | 'image'; - text?: string; - image?: { - data: string; // base64 - mimeType: string; - }; -} -``` - -### Request Format - -```typescript -interface Request { - messages: Message[]; - - // System prompt (separated for Anthropic/Gemini compatibility) - systemPrompt?: string; - - // Common parameters - temperature?: number; - maxTokens?: number; - stopSequences?: string[]; - - // Tools - tools?: { - name: string; - description: string; - parameters: Record; // JSON Schema - }[]; - toolChoice?: 'auto' | 'none' | 'required' | { name: string }; - - // Thinking/reasoning - reasoning?: { - enabled: boolean; - effort?: 'low' | 'medium' | 'high'; // OpenAI reasoning_effort - maxTokens?: number; // Anthropic thinking budget - }; - - // Abort signal - signal?: AbortSignal; -} -``` - -### Event Stream - -```typescript -type Event = - | { type: 'start'; model: string; provider: string } - | { type: 'text'; content: string; delta: string } - | { type: 'thinking'; content: string; delta: string } - | { type: 'toolCall'; toolCall: ToolCall } - | { type: 'usage'; usage: TokenUsage } - | { type: 'done'; reason: StopReason; message: Message } // message includes model and usage - | { type: 'error'; error: Error }; - -interface TokenUsage { - input: number; - output: number; - total: number; - thinking?: number; - cacheRead?: number; - cacheWrite?: number; - cost?: { - input: number; - output: number; - cache?: number; - total: number; - }; -} - -type StopReason = 'stop' | 'length' | 'toolUse' | 'safety' | 'error'; -``` - -## Caching Strategy - -Caching is handled automatically by each provider adapter: - -- **OpenAI**: Automatic prompt caching (no configuration needed) -- **Gemini**: Automatic context caching (no configuration needed) -- **Anthropic**: We automatically add cache_control to the system prompt and older messages - -```typescript -class AnthropicAdapter { - private addCaching(messages: Message[]): any[] { - const anthropicMessages = []; - - // Automatically cache older messages (assuming incremental context) - for (let i = 0; i < messages.length; i++) { - const msg = messages[i]; - const isOld = i < messages.length - 2; // Cache all but last 2 messages - - // Convert to Anthropic format with automatic caching - const blocks = this.toContentBlocks(msg); - if (isOld && blocks.length > 0) { - blocks[0].cache_control = { type: 'ephemeral' }; - } - - anthropicMessages.push({ - role: msg.role === 'assistant' ? 'assistant' : 'user', - content: blocks - }); - } - - return anthropicMessages; - } -} -``` - -## Provider Adapter Implementation - -### OpenAI Adapter - -```typescript -class OpenAIAdapter { - private client: OpenAI; - private useResponsesAPI: boolean = false; - - async *stream(request: Request): AsyncGenerator { - // Determine which API to use - if (request.reasoning?.enabled && this.isReasoningModel()) { - yield* this.streamResponsesAPI(request); - } else { - yield* this.streamChatCompletions(request); - } - } - - private async *streamChatCompletions(request: Request) { - const stream = await this.client.chat.completions.create({ - model: this.model, - messages: this.toOpenAIMessages(request), - tools: this.toOpenAITools(request.tools), - reasoning_effort: request.reasoning?.effort, - stream: true, - stream_options: { include_usage: true } - }); - - let content = ''; - let toolCalls: any[] = []; - - for await (const chunk of stream) { - if (chunk.choices[0]?.delta?.content) { - const delta = chunk.choices[0].delta.content; - content += delta; - yield { type: 'text', content, delta }; - } - - if (chunk.choices[0]?.delta?.tool_calls) { - // Accumulate tool calls - this.mergeToolCalls(toolCalls, chunk.choices[0].delta.tool_calls); - for (const tc of toolCalls) { - yield { type: 'toolCall', toolCall: tc, partial: true }; - } - } - - if (chunk.usage) { - yield { - type: 'usage', - usage: { - input: chunk.usage.prompt_tokens, - output: chunk.usage.completion_tokens, - total: chunk.usage.total_tokens, - thinking: chunk.usage.completion_tokens_details?.reasoning_tokens - } - }; - } - } - } - - private async *streamResponsesAPI(request: Request) { - // Use Responses API for actual thinking content - const response = await this.client.responses.create({ - model: this.model, - input: this.toResponsesInput(request), - tools: this.toResponsesTools(request.tools), - stream: true - }); - - for await (const event of response) { - if (event.type === 'response.reasoning_text.delta') { - yield { - type: 'thinking', - content: event.text, - delta: event.delta - }; - } - // Handle other event types... - } - } - - private toOpenAIMessages(request: Request): any[] { - const messages: any[] = []; - - // Handle system prompt - if (request.systemPrompt) { - const role = this.isReasoningModel() ? 'developer' : 'system'; - messages.push({ role, content: request.systemPrompt }); - } - - // Convert unified messages - for (const msg of request.messages) { - if (msg.role === 'tool') { - messages.push({ - role: 'tool', - content: msg.content, - tool_call_id: msg.toolCallId - }); - } else { - messages.push({ - role: msg.role, - content: this.contentToString(msg.content), - tool_calls: msg.toolCalls - }); - } - } - - return messages; - } -} -``` - -### Anthropic Adapter - -```typescript -class AnthropicAdapter { - private client: Anthropic; - - async *stream(request: Request): AsyncGenerator { - const stream = this.client.messages.stream({ - model: this.model, - max_tokens: request.maxTokens || 1024, - messages: this.addCaching(request.messages), - system: request.systemPrompt, - tools: this.toAnthropicTools(request.tools), - thinking: request.reasoning?.enabled ? { - type: 'enabled', - budget_tokens: request.reasoning.maxTokens || 2000 - } : undefined - }); - - let content = ''; - let thinking = ''; - - stream.on('text', (delta, snapshot) => { - content = snapshot; - // Note: Can't yield from callback, need different approach - }); - - stream.on('thinking', (delta, snapshot) => { - thinking = snapshot; - }); - - // Use raw streaming instead for proper async generator - const rawStream = await this.client.messages.create({ - ...params, - stream: true - }); - - for await (const chunk of rawStream) { - switch (chunk.type) { - case 'content_block_delta': - if (chunk.delta.type === 'text_delta') { - content += chunk.delta.text; - yield { - type: 'text', - content, - delta: chunk.delta.text - }; - } - break; - - case 'message_delta': - if (chunk.usage) { - yield { - type: 'usage', - usage: { - input: chunk.usage.input_tokens, - output: chunk.usage.output_tokens, - total: chunk.usage.input_tokens + chunk.usage.output_tokens, - cacheRead: chunk.usage.cache_read_input_tokens, - cacheWrite: chunk.usage.cache_creation_input_tokens - } - }; - } - break; - } - } - } - - private toAnthropicMessages(request: Request): any[] { - return request.messages.map(msg => { - if (msg.role === 'tool') { - // Tool results go as user messages with tool_result blocks - return { - role: 'user', - content: [{ - type: 'tool_result', - tool_use_id: msg.toolCallId, - content: msg.content - }] - }; - } - - // Always use content blocks - const blocks: any[] = []; - - if (typeof msg.content === 'string') { - blocks.push({ - type: 'text', - text: msg.content, - cache_control: msg.cacheControl - }); - } else { - // Convert unified content to blocks - for (const part of msg.content) { - if (part.type === 'text') { - blocks.push({ type: 'text', text: part.text }); - } else if (part.type === 'image') { - blocks.push({ - type: 'image', - source: { - type: 'base64', - media_type: part.image.mimeType, - data: part.image.data - } - }); - } - } - } - - // Add tool calls as blocks - if (msg.toolCalls) { - for (const tc of msg.toolCalls) { - blocks.push({ - type: 'tool_use', - id: tc.id, - name: tc.name, - input: tc.arguments - }); - } - } - - return { - role: msg.role === 'assistant' ? 'assistant' : 'user', - content: blocks - }; - }); - } -} -``` - -### Gemini Adapter - -```typescript -class GeminiAdapter { - private client: GoogleGenAI; - - async *stream(request: Request): AsyncGenerator { - const stream = await this.client.models.generateContentStream({ - model: this.model, - systemInstruction: request.systemPrompt ? { - parts: [{ text: request.systemPrompt }] - } : undefined, - contents: this.toGeminiContents(request), - tools: this.toGeminiTools(request.tools), - abortSignal: request.signal - }); - - let content = ''; - let thinking = ''; - - for await (const chunk of stream) { - const candidate = chunk.candidates?.[0]; - if (!candidate?.content?.parts) continue; - - for (const part of candidate.content.parts) { - if (part.text && !part.thought) { - content += part.text; - yield { - type: 'text', - content, - delta: part.text - }; - } else if (part.text && part.thought) { - thinking += part.text; - yield { - type: 'thinking', - content: thinking, - delta: part.text - }; - } else if (part.functionCall) { - yield { - type: 'toolCall', - toolCall: { - id: part.functionCall.id || crypto.randomUUID(), - name: part.functionCall.name, - arguments: part.functionCall.args - } - }; - } - } - - if (chunk.usageMetadata) { - yield { - type: 'usage', - usage: { - input: chunk.usageMetadata.promptTokenCount || 0, - output: chunk.usageMetadata.candidatesTokenCount || 0, - total: chunk.usageMetadata.totalTokenCount || 0, - thinking: chunk.usageMetadata.thoughtsTokenCount, - cacheRead: chunk.usageMetadata.cachedContentTokenCount - } - }; - } - } - } - - private toGeminiContents(request: Request): any[] { - return request.messages.map(msg => { - const parts: any[] = []; - - if (typeof msg.content === 'string') { - parts.push({ text: msg.content }); - } else { - for (const part of msg.content) { - if (part.type === 'text') { - parts.push({ text: part.text }); - } else if (part.type === 'image') { - parts.push({ - inlineData: { - mimeType: part.image.mimeType, - data: part.image.data - } - }); - } - } - } - - // Add function calls as parts - if (msg.toolCalls) { - for (const tc of msg.toolCalls) { - parts.push({ - functionCall: { - name: tc.name, - args: tc.arguments - } - }); - } - } - - // Add tool results as function responses - if (msg.role === 'tool') { - parts.push({ - functionResponse: { - name: msg.toolCallId, - response: { result: msg.content } - } - }); - } - - return { - role: msg.role === 'assistant' ? 'model' : msg.role === 'tool' ? 'user' : msg.role, - parts - }; - }); - } -} -``` - -## Usage Examples - -### Basic Streaming - -```typescript -const ai = new AI({ - provider: 'openai', - apiKey: process.env.OPENAI_API_KEY, - model: 'gpt-4' -}); - -const stream = ai.stream({ - messages: [ - { role: 'user', content: 'Write a haiku about coding' } - ], - systemPrompt: 'You are a poetic programmer' -}); - -for await (const event of stream) { - switch (event.type) { - case 'text': - process.stdout.write(event.delta); - break; - case 'usage': - console.log(`\nTokens: ${event.usage.total}`); - break; - case 'done': - console.log(`\nFinished: ${event.reason}`); - break; - } -} -``` - -### Cross-Provider Tool Calling - -```typescript -async function callWithTools(provider: 'openai' | 'anthropic' | 'gemini') { - const ai = new AI({ - provider, - apiKey: process.env[`${provider.toUpperCase()}_API_KEY`], - model: getDefaultModel(provider) - }); - - const messages: Message[] = [{ - role: 'user', - content: 'What is the weather in Paris and calculate 15 * 23?' - }]; - - const stream = ai.stream({ - messages, - tools: [ - { - name: 'weather', - description: 'Get weather for a location', - parameters: { - type: 'object', - properties: { - location: { type: 'string' } - }, - required: ['location'] - } - }, - { - name: 'calculator', - description: 'Calculate math expressions', - parameters: { - type: 'object', - properties: { - expression: { type: 'string' } - }, - required: ['expression'] - } - } - ] - }); - - const toolCalls: any[] = []; - - for await (const event of stream) { - if (event.type === 'toolCall') { - toolCalls.push(event.toolCall); - - // Execute tool - const result = await executeToolCall(event.toolCall); - - // Add tool result to conversation - messages.push({ - role: 'assistant', - toolCalls: [event.toolCall] - }); - - messages.push({ - role: 'tool', - content: JSON.stringify(result), - toolCallId: event.toolCall.id - }); - } - } - - // Continue conversation with tool results - if (toolCalls.length > 0) { - const finalStream = ai.stream({ messages }); - - for await (const event of finalStream) { - if (event.type === 'text') { - process.stdout.write(event.delta); - } - } - } -} -``` - -### Thinking/Reasoning - -```typescript -async function withThinking() { - // OpenAI o1 - const openai = new AI({ - provider: 'openai', - model: 'o1-preview' - }); - - // Anthropic Claude - const anthropic = new AI({ - provider: 'anthropic', - model: 'claude-3-opus-20240229' - }); - - // Gemini thinking model - const gemini = new AI({ - provider: 'gemini', - model: 'gemini-2.0-flash-thinking-exp-1219' - }); - - for (const ai of [openai, anthropic, gemini]) { - const stream = ai.stream({ - messages: [{ - role: 'user', - content: 'Solve this step by step: If a tree falls in a forest...' - }], - reasoning: { - enabled: true, - effort: 'high', // OpenAI reasoning_effort - maxTokens: 2000 // Anthropic budget - } - }); - - for await (const event of stream) { - if (event.type === 'thinking') { - console.log('[THINKING]', event.delta); - } else if (event.type === 'text') { - console.log('[RESPONSE]', event.delta); - } else if (event.type === 'done') { - // Final message includes model and usage with cost - console.log('Model:', event.message.model); - console.log('Tokens:', event.message.usage?.total); - console.log('Cost: $', event.message.usage?.cost?.total); - } - } - } -} -``` - -## Implementation Notes - -### Critical Decisions - -1. **Streaming First**: All providers support streaming, non-streaming is just collected events -2. **Unified Events**: Same event types across all providers for consistent handling -3. **Separate System Prompt**: Required for Anthropic/Gemini compatibility -4. **Tool Role**: Unified way to handle tool responses across providers -5. **Content Arrays**: Support both string and structured content -6. **Thinking Extraction**: Normalize reasoning across different provider formats - -### Provider-Specific Handling - -**OpenAI**: -- Choose between Chat Completions and Responses API based on model and thinking needs -- Map `developer` role for o1/o3 models -- Handle streaming tool call deltas - -**Anthropic**: -- Convert to content blocks (always arrays) -- Tool results as user messages with tool_result blocks -- Handle MessageStream events or raw streaming - -**Gemini**: -- Convert to parts system -- Extract thinking from `part.thought` flag -- Map `assistant` to `model` role -- Handle function calls/responses in parts - -### Error Handling - -```typescript -class AIError extends Error { - constructor( - message: string, - public code: string, - public provider: string, - public retryable: boolean, - public statusCode?: number - ) { - super(message); - } -} - -// In adapters -try { - // API call -} catch (error) { - if (error instanceof RateLimitError) { - throw new AIError( - 'Rate limit exceeded', - 'rate_limit', - this.provider, - true, - 429 - ); - } - // Map other errors... -} -``` - -## Model Information & Cost Tracking - -### Models Database - -We cache the models.dev API data at build time for fast, offline access: - -```typescript -// scripts/update-models.ts - Run during build or manually -async function updateModels() { - const response = await fetch('https://models.dev/api.json'); - const data = await response.json(); - - // Transform to our format - const models: ModelsDatabase = transformModelsData(data); - - // Generate TypeScript file - const content = `// Auto-generated from models.dev API -// Last updated: ${new Date().toISOString()} -// Run 'npm run update-models' to refresh - -export const MODELS_DATABASE: ModelsDatabase = ${JSON.stringify(models, null, 2)}; -`; - - await fs.writeFile('src/models-data.ts', content); -} - -// src/models.ts - Runtime model lookup -import { MODELS_DATABASE } from './models-data.js'; - -// Simple lookup with fallback -export function getModelInfo(provider: string, model: string): ModelInfo { - const info = MODELS_DATABASE.providers[provider]?.models[model]; - - if (!info) { - // Fallback for unknown models - return { - id: model, - name: model, - provider, - capabilities: { - reasoning: false, - toolCall: true, - vision: false - }, - cost: { input: 0, output: 0 }, - limits: { context: 128000, output: 4096 } - }; - } - - return info; -} - -// Optional: Runtime override for testing new models -const runtimeOverrides = new Map(); - -export function registerModel(provider: string, model: string, info: ModelInfo) { - runtimeOverrides.set(`${provider}:${model}`, info); -} -``` - -### Cost Calculation - -```typescript -class CostTracker { - private usage: TokenUsage = { - input: 0, - output: 0, - total: 0, - cacheRead: 0, - cacheWrite: 0 - }; - - private modelInfo: ModelInfo; - - constructor(modelInfo: ModelInfo) { - this.modelInfo = modelInfo; - } - - addUsage(tokens: Partial): TokenUsage { - this.usage.input += tokens.input || 0; - this.usage.output += tokens.output || 0; - this.usage.thinking += tokens.thinking || 0; - this.usage.cacheRead += tokens.cacheRead || 0; - this.usage.cacheWrite += tokens.cacheWrite || 0; - this.usage.total = this.usage.input + this.usage.output + (this.usage.thinking || 0); - - // Calculate costs (per million tokens) - const cost = this.modelInfo.cost; - this.usage.cost = { - input: (this.usage.input / 1_000_000) * cost.input, - output: (this.usage.output / 1_000_000) * cost.output, - cache: - ((this.usage.cacheRead || 0) / 1_000_000) * (cost.cacheRead || 0) + - ((this.usage.cacheWrite || 0) / 1_000_000) * (cost.cacheWrite || 0), - total: 0 - }; - - this.usage.cost.total = - this.usage.cost.input + - this.usage.cost.output + - this.usage.cost.cache; - - return { ...this.usage }; - } - - getTotalCost(): number { - return this.usage.cost?.total || 0; - } - - getUsageSummary(): string { - return `Tokens: ${this.usage.total} (${this.usage.input}→${this.usage.output}) | Cost: $${this.getTotalCost().toFixed(4)}`; - } -} -``` - -### Integration in Adapters - -```typescript -class OpenAIAdapter { - private costTracker: CostTracker; - - constructor(config: AIConfig) { - const modelInfo = getModelInfo('openai', config.model); - this.costTracker = new CostTracker(modelInfo); - } - - async *stream(request: Request): AsyncGenerator { - // ... streaming logic ... - - if (chunk.usage) { - const usage = this.costTracker.addUsage({ - input: chunk.usage.prompt_tokens, - output: chunk.usage.completion_tokens, - thinking: chunk.usage.completion_tokens_details?.reasoning_tokens, - cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens - }); - - yield { type: 'usage', usage }; - } - } -} -``` - -## Next Steps - -1. Create models.ts with models.dev integration -2. Implement base `AI` class with adapter pattern -3. Create three provider adapters with full streaming support -4. Add comprehensive error mapping -5. Implement token counting and cost tracking -6. Add test suite for each provider -7. Create migration guide from native SDKs \ No newline at end of file