diff --git a/packages/ai/docs/anthropic-api.md b/packages/ai/docs/anthropic-api.md
deleted file mode 100644
index 7ede2cb2..00000000
--- a/packages/ai/docs/anthropic-api.md
+++ /dev/null
@@ -1,1706 +0,0 @@
-# Anthropic SDK Implementation Guide
-
-This document provides a comprehensive guide for implementing the required features using the Anthropic SDK. All examples use TypeScript and include actual code that works with the SDK.
-
-## Table of Contents
-
-1. [Basic Client Setup](#basic-client-setup)
-2. [Streaming Responses](#streaming-responses)
-3. [Request Abortion](#request-abortion)
-4. [Error Handling](#error-handling)
-5. [Stop Reasons](#stop-reasons)
-6. [Context and Message History](#context-and-message-history)
-7. [Token Counting](#token-counting)
-8. [Prompt Caching](#prompt-caching)
-9. [Tool Use (Function Calling)](#tool-use-function-calling)
-10. [System Prompts](#system-prompts)
-11. [Content Block System](#content-block-system)
-12. [MessageStream Helper Class](#messagestream-helper-class)
-13. [Thinking Tokens and Extended Reasoning](#thinking-tokens-and-extended-reasoning)
-14. [Complete Implementation Example](#complete-implementation-example)
-
-## Basic Client Setup
-
-```typescript
-import Anthropic from '@anthropic-ai/sdk';
-
-// Create client with configuration
-const anthropic = new Anthropic({
-  apiKey: process.env.ANTHROPIC_API_KEY, // Required
-  baseURL: 'https://api.anthropic.com', // Optional, this is the default
-  timeout: 60000, // Optional, in milliseconds
-  maxRetries: 3, // Optional, default is 2
-});
-```
-
-### Environment Variables
-
-The SDK automatically reads from these environment variables:
-- `ANTHROPIC_API_KEY` - Your API key
-- `ANTHROPIC_BASE_URL` - Custom base URL (optional)
-
-## Streaming Responses
-
-### Basic Streaming with MessageStream
-
-```typescript
-import { MessageStream } from '@anthropic-ai/sdk/lib/MessageStream';
-
-async function basicStream() {
-  const stream = anthropic.messages.stream({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: [{ role: 'user', content: 'Hello, Claude!' }],
-  });
-
-  // Listen to different event types
-  stream.on('text', (text, snapshot) => {
-    process.stdout.write(text); // text is the delta, snapshot is accumulated
-  });
-
-  stream.on('message', (message) => {
-    console.log('\nFinal message:', message);
-  });
-
-  stream.on('error', (error) => {
-    console.error('Error:', error);
-  });
-
-  // Wait for completion
-  const finalMessage = await stream.finalMessage();
-  return finalMessage;
-}
-```
-
-### Raw Streaming with create()
-
-```typescript
-import { RawMessageStreamEvent } from '@anthropic-ai/sdk';
-
-async function rawStreaming() {
-  const stream = await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: [{ role: 'user', content: 'Hello!' }],
-    stream: true,
-  });
-
-  let content = '';
-  let usage: any = null;
-
-  for await (const chunk of stream) {
-    switch (chunk.type) {
-      case 'message_start':
-        console.log('Message started:', chunk.message);
-        break;
-        
-      case 'content_block_delta':
-        if (chunk.delta.type === 'text_delta') {
-          content += chunk.delta.text;
-          process.stdout.write(chunk.delta.text);
-        }
-        break;
-        
-      case 'message_delta':
-        if (chunk.usage) {
-          usage = chunk.usage;
-        }
-        console.log('\nStop reason:', chunk.delta.stop_reason);
-        break;
-        
-      case 'message_stop':
-        console.log('\nStream ended');
-        break;
-    }
-  }
-
-  return { content, usage };
-}
-```
-
-### Handling Thinking Tokens in Streams
-
-```typescript
-async function streamWithThinking() {
-  const stream = anthropic.messages.stream({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 4000,
-    thinking: {
-      type: 'enabled',
-      budget_tokens: 2000,
-    },
-    messages: [{ role: 'user', content: 'Solve this complex math problem: ...' }],
-  });
-
-  stream.on('thinking', (thinking, snapshot) => {
-    console.log('[Thinking]', thinking); // Delta thinking content
-  });
-
-  stream.on('text', (text, snapshot) => {
-    process.stdout.write(text); // Regular response text
-  });
-
-  const message = await stream.finalMessage();
-  
-  // Access thinking content from final message
-  for (const block of message.content) {
-    if (block.type === 'thinking') {
-      console.log('Final thinking:', block.thinking);
-    }
-  }
-}
-```
-
-## Request Abortion
-
-### AbortController Integration
-
-```typescript
-async function abortableRequest() {
-  const controller = new AbortController();
-  
-  // Abort after 5 seconds
-  const timeoutId = setTimeout(() => controller.abort(), 5000);
-
-  try {
-    const stream = anthropic.messages.stream({
-      model: 'claude-sonnet-4-20250514',
-      max_tokens: 1024,
-      messages: [{ role: 'user', content: 'Long task...' }],
-    }, {
-      // Pass abort signal in request options
-      signal: controller.signal,
-    });
-
-    stream.on('error', (error) => {
-      if (error.name === 'AbortError') {
-        console.log('Request was aborted');
-      } else {
-        console.error('Other error:', error);
-      }
-    });
-
-    const result = await stream.finalMessage();
-    clearTimeout(timeoutId);
-    return result;
-    
-  } catch (error) {
-    clearTimeout(timeoutId);
-    
-    if (error.name === 'AbortError') {
-      console.log('Request aborted by user');
-    } else {
-      throw error;
-    }
-  }
-}
-
-// Manual abort from MessageStream
-async function manualAbort() {
-  const stream = anthropic.messages.stream({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: [{ role: 'user', content: 'Start a story...' }],
-  });
-
-  // Abort after receiving some content
-  stream.on('text', (text, snapshot) => {
-    if (snapshot.length > 100) {
-      stream.abort(); // Built-in abort method
-    }
-  });
-
-  try {
-    await stream.finalMessage();
-  } catch (error) {
-    if (stream.aborted) {
-      console.log('Stream was manually aborted');
-    }
-  }
-}
-```
-
-## Error Handling
-
-### Comprehensive Error Types
-
-```typescript
-import {
-  AnthropicError,
-  APIError,
-  APIConnectionError,
-  APIConnectionTimeoutError,
-  APIUserAbortError,
-  NotFoundError,
-  ConflictError,
-  RateLimitError,
-  BadRequestError,
-  AuthenticationError,
-  InternalServerError,
-  PermissionDeniedError,
-  UnprocessableEntityError,
-} from '@anthropic-ai/sdk';
-
-async function handleErrors() {
-  try {
-    const message = await anthropic.messages.create({
-      model: 'claude-sonnet-4-20250514',
-      max_tokens: 1024,
-      messages: [{ role: 'user', content: 'Hello!' }],
-    });
-    
-    return message;
-    
-  } catch (error) {
-    // Handle specific error types
-    if (error instanceof RateLimitError) {
-      console.error('Rate limit exceeded:', {
-        status: error.status,
-        headers: error.headers,
-        retryAfter: error.headers.get('retry-after'),
-      });
-      
-      // Wait and retry logic
-      const retryAfter = parseInt(error.headers.get('retry-after') || '60');
-      await new Promise(resolve => setTimeout(resolve, retryAfter * 1000));
-      
-    } else if (error instanceof AuthenticationError) {
-      console.error('Authentication failed:', error.status);
-      throw new Error('Invalid API key');
-      
-    } else if (error instanceof BadRequestError) {
-      console.error('Bad request:', {
-        status: error.status,
-        error: error.error,
-        message: error.message,
-      });
-      
-    } else if (error instanceof APIConnectionTimeoutError) {
-      console.error('Request timed out');
-      // Retry with longer timeout
-      
-    } else if (error instanceof APIConnectionError) {
-      console.error('Network error:', error.message);
-      // Retry with backoff
-      
-    } else if (error instanceof APIUserAbortError) {
-      console.log('Request was aborted by user');
-      
-    } else if (error instanceof InternalServerError) {
-      console.error('Server error:', error.status);
-      // Retry with exponential backoff
-      
-    } else if (error instanceof APIError) {
-      console.error('API error:', {
-        status: error.status,
-        error: error.error,
-        requestId: error.requestID,
-      });
-      
-    } else {
-      console.error('Unexpected error:', error);
-      throw error;
-    }
-  }
-}
-
-// Error handling in streams
-function handleStreamErrors() {
-  const stream = anthropic.messages.stream({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: [{ role: 'user', content: 'Hello!' }],
-  });
-
-  stream.on('error', (error) => {
-    if (error instanceof RateLimitError) {
-      console.log('Rate limited during stream');
-    } else if (error instanceof APIConnectionError) {
-      console.log('Connection lost during stream');
-    } else {
-      console.error('Stream error:', error);
-    }
-  });
-
-  return stream;
-}
-```
-
-## Stop Reasons
-
-### Understanding Stop Reasons
-
-```typescript
-import { StopReason } from '@anthropic-ai/sdk';
-
-async function handleStopReasons() {
-  const message = await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 100, // Intentionally low to trigger max_tokens
-    messages: [{ role: 'user', content: 'Write a long story...' }],
-    stop_sequences: ['THE END'], // Custom stop sequence
-  });
-
-  // Extract and handle stop reason
-  const stopReason: StopReason = message.stop_reason;
-  
-  switch (stopReason) {
-    case 'end_turn':
-      console.log('Model completed naturally');
-      break;
-      
-    case 'max_tokens':
-      console.log('Hit token limit, response may be incomplete');
-      // Consider continuing with a follow-up request
-      break;
-      
-    case 'stop_sequence':
-      console.log('Hit custom stop sequence:', message.stop_sequence);
-      break;
-      
-    case 'tool_use':
-      console.log('Model wants to use tools');
-      // Handle tool calls (see Tool Use section)
-      break;
-      
-    case 'pause_turn':
-      console.log('Long turn paused, can continue');
-      // Continue with the partial response as context
-      break;
-      
-    case 'refusal':
-      console.log('Model refused to respond due to safety');
-      break;
-      
-    default:
-      console.log('Unknown stop reason:', stopReason);
-  }
-
-  return { message, stopReason };
-}
-
-// In streaming mode
-function handleStopReasonsInStream() {
-  const stream = anthropic.messages.stream({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: [{ role: 'user', content: 'Hello!' }],
-  });
-
-  stream.on('message', (message) => {
-    const stopReason = message.stop_reason;
-    console.log('Final stop reason:', stopReason);
-    
-    if (stopReason === 'max_tokens') {
-      console.log('Response was truncated');
-    }
-  });
-
-  return stream;
-}
-```
-
-## Context and Message History
-
-### Message Format and Serialization
-
-```typescript
-import { MessageParam, Message } from '@anthropic-ai/sdk';
-
-interface ConversationState {
-  messages: MessageParam[];
-  totalTokens: number;
-  model: string;
-  systemPrompt?: string;
-}
-
-class ConversationManager {
-  private state: ConversationState;
-
-  constructor(model: string, systemPrompt?: string) {
-    this.state = {
-      messages: [],
-      totalTokens: 0,
-      model,
-      systemPrompt,
-    };
-  }
-
-  // Add user message
-  addUserMessage(content: string | any[]) {
-    this.state.messages.push({
-      role: 'user',
-      content,
-    });
-  }
-
-  // Add assistant message from API response
-  addAssistantMessage(message: Message) {
-    this.state.messages.push({
-      role: 'assistant',
-      content: message.content,
-    });
-    
-    // Update token count
-    this.state.totalTokens += message.usage.input_tokens + message.usage.output_tokens;
-  }
-
-  // Add tool results
-  addToolResult(toolUseId: string, result: string, isError = false) {
-    // Find the last message and ensure it has tool use
-    const lastMessage = this.state.messages[this.state.messages.length - 1];
-    if (lastMessage?.role === 'assistant') {
-      // Add tool result as new user message
-      this.state.messages.push({
-        role: 'user',
-        content: [{
-          type: 'tool_result',
-          tool_use_id: toolUseId,
-          content: result,
-          is_error: isError,
-        }],
-      });
-    }
-  }
-
-  // Get messages for API call
-  getMessages(): MessageParam[] {
-    return [...this.state.messages];
-  }
-
-  // Serialize for persistence
-  serialize(): string {
-    return JSON.stringify(this.state);
-  }
-
-  // Deserialize from storage
-  static deserialize(json: string): ConversationManager {
-    const state = JSON.parse(json);
-    const manager = new ConversationManager(state.model, state.systemPrompt);
-    manager.state = state;
-    return manager;
-  }
-
-  // Create request parameters
-  createRequestParams(newMessage?: string): any {
-    if (newMessage) {
-      this.addUserMessage(newMessage);
-    }
-
-    const params: any = {
-      model: this.state.model,
-      max_tokens: 4000,
-      messages: this.getMessages(),
-    };
-
-    if (this.state.systemPrompt) {
-      params.system = this.state.systemPrompt;
-    }
-
-    return params;
-  }
-
-  // Get conversation stats
-  getStats() {
-    return {
-      messageCount: this.state.messages.length,
-      totalTokens: this.state.totalTokens,
-      userMessages: this.state.messages.filter(m => m.role === 'user').length,
-      assistantMessages: this.state.messages.filter(m => m.role === 'assistant').length,
-    };
-  }
-}
-
-// Usage example
-async function conversationExample() {
-  const conversation = new ConversationManager(
-    'claude-sonnet-4-20250514',
-    'You are a helpful coding assistant.'
-  );
-
-  // First exchange
-  const params1 = conversation.createRequestParams('Hello, can you help me with Python?');
-  const response1 = await anthropic.messages.create(params1);
-  conversation.addAssistantMessage(response1);
-
-  // Second exchange
-  const params2 = conversation.createRequestParams('Show me a simple function.');
-  const response2 = await anthropic.messages.create(params2);
-  conversation.addAssistantMessage(response2);
-
-  // Save conversation
-  const saved = conversation.serialize();
-  localStorage.setItem('conversation', saved);
-
-  // Later: restore conversation
-  const restored = ConversationManager.deserialize(saved);
-  console.log('Conversation stats:', restored.getStats());
-}
-```
-
-## Token Counting
-
-### Using the Count Tokens API
-
-```typescript
-import { MessageCountTokensParams, MessageTokensCount } from '@anthropic-ai/sdk';
-
-async function countTokens() {
-  const messages = [
-    { role: 'user', content: 'Hello, how are you?' },
-    { role: 'assistant', content: 'I am doing well, thank you for asking!' },
-    { role: 'user', content: 'Can you help me write some code?' },
-  ] as const;
-
-  // Count tokens for messages
-  const tokenCount: MessageTokensCount = await anthropic.messages.countTokens({
-    model: 'claude-sonnet-4-20250514',
-    messages,
-    system: 'You are a helpful coding assistant.',
-  });
-
-  console.log('Input tokens:', tokenCount.input_tokens);
-  return tokenCount.input_tokens;
-}
-
-// Count tokens with tools
-async function countTokensWithTools() {
-  const tools = [
-    {
-      name: 'calculator',
-      description: 'Perform mathematical calculations',
-      input_schema: {
-        type: 'object',
-        properties: {
-          expression: { type: 'string' },
-        },
-        required: ['expression'],
-      },
-    },
-  ];
-
-  const tokenCount = await anthropic.messages.countTokens({
-    model: 'claude-sonnet-4-20250514',
-    messages: [{ role: 'user', content: 'Calculate 2+2' }],
-    tools,
-  });
-
-  return tokenCount.input_tokens;
-}
-
-// Extract usage from responses
-function extractUsageFromResponse(message: Message) {
-  const usage = message.usage;
-  
-  return {
-    inputTokens: usage.input_tokens,
-    outputTokens: usage.output_tokens,
-    cacheReadTokens: usage.cache_read_input_tokens || 0,
-    cacheWriteTokens: usage.cache_creation_input_tokens || 0,
-    totalTokens: usage.input_tokens + usage.output_tokens,
-    serviceTier: usage.service_tier,
-    cacheCreation: usage.cache_creation,
-  };
-}
-
-// Token usage in streaming
-function trackTokensInStream() {
-  const stream = anthropic.messages.stream({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: [{ role: 'user', content: 'Hello!' }],
-  });
-
-  let finalUsage: any = null;
-
-  stream.on('message', (message) => {
-    finalUsage = extractUsageFromResponse(message);
-    console.log('Final usage:', finalUsage);
-  });
-
-  return stream;
-}
-```
-
-## Prompt Caching
-
-### Basic Caching Implementation
-
-```typescript
-import { CacheControlEphemeral } from '@anthropic-ai/sdk';
-
-async function usePromptCaching() {
-  // Cache control for system prompt
-  const systemPrompt = [
-    {
-      type: 'text',
-      text: 'You are an expert software engineer with deep knowledge of...',
-      cache_control: { type: 'ephemeral', ttl: '1h' } as CacheControlEphemeral,
-    },
-  ];
-
-  // Cache control for large document
-  const messages = [
-    {
-      role: 'user',
-      content: [
-        {
-          type: 'text',
-          text: 'Here is a large codebase to analyze:',
-        },
-        {
-          type: 'document',
-          source: {
-            type: 'text',
-            data: '// Large codebase content...',
-            media_type: 'text/plain',
-          },
-          cache_control: { type: 'ephemeral', ttl: '1h' } as CacheControlEphemeral,
-        },
-        {
-          type: 'text',
-          text: 'Please analyze this code for bugs.',
-        },
-      ],
-    },
-  ] as const;
-
-  const response = await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    system: systemPrompt,
-    messages,
-  });
-
-  // Check cache usage
-  const usage = response.usage;
-  console.log('Cache read tokens:', usage.cache_read_input_tokens);
-  console.log('Cache write tokens:', usage.cache_creation_input_tokens);
-  
-  return response;
-}
-
-// Caching with different TTL options
-async function cachingWithTTL() {
-  const shortCache = {
-    type: 'ephemeral',
-    ttl: '5m', // 5 minutes
-  } as CacheControlEphemeral;
-
-  const longCache = {
-    type: 'ephemeral', 
-    ttl: '1h', // 1 hour (default)
-  } as CacheControlEphemeral;
-
-  const messages = [
-    {
-      role: 'user',
-      content: [
-        {
-          type: 'text',
-          text: 'Short-lived context',
-          cache_control: shortCache,
-        },
-        {
-          type: 'text',
-          text: 'Long-lived context that should be cached longer',
-          cache_control: longCache,
-        },
-        {
-          type: 'text',
-          text: 'What can you tell me about this?',
-        },
-      ],
-    },
-  ] as const;
-
-  return await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages,
-  });
-}
-```
-
-## Tool Use (Function Calling)
-
-### Complete Tool Implementation
-
-```typescript
-import { Tool, ToolUseBlock, ToolChoice } from '@anthropic-ai/sdk';
-
-// Define tools
-const tools: Tool[] = [
-  {
-    name: 'calculator',
-    description: 'Perform mathematical calculations',
-    input_schema: {
-      type: 'object',
-      properties: {
-        expression: {
-          type: 'string',
-          description: 'Mathematical expression to evaluate',
-        },
-      },
-      required: ['expression'],
-    },
-  },
-  {
-    name: 'weather',
-    description: 'Get weather information for a location',
-    input_schema: {
-      type: 'object',
-      properties: {
-        location: {
-          type: 'string',
-          description: 'City name or coordinates',
-        },
-        units: {
-          type: 'string',
-          enum: ['celsius', 'fahrenheit'],
-          description: 'Temperature units',
-        },
-      },
-      required: ['location'],
-    },
-  },
-];
-
-// Tool implementations
-const toolImplementations = {
-  calculator: (args: { expression: string }) => {
-    try {
-      // Simple eval - in production, use a safe math parser
-      const result = eval(args.expression);
-      return `Result: ${result}`;
-    } catch (error) {
-      return `Error: Invalid expression - ${error.message}`;
-    }
-  },
-  
-  weather: async (args: { location: string; units?: string }) => {
-    // Mock weather API call
-    return `Weather in ${args.location}: 22°C, sunny with light clouds`;
-  },
-};
-
-async function toolUseExample() {
-  const conversation = new ConversationManager('claude-sonnet-4-20250514');
-  
-  // Send initial message with tools
-  conversation.addUserMessage('What is 15 * 23 and what is the weather in Paris?');
-  
-  const response = await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: conversation.getMessages(),
-    tools,
-    tool_choice: { type: 'auto' } as ToolChoice,
-  });
-
-  conversation.addAssistantMessage(response);
-
-  // Handle tool calls
-  const toolCalls: ToolUseBlock[] = response.content.filter(
-    (block): block is ToolUseBlock => block.type === 'tool_use'
-  );
-
-  // Execute each tool call
-  for (const toolCall of toolCalls) {
-    const toolName = toolCall.name;
-    const toolArgs = toolCall.input;
-    const toolId = toolCall.id;
-
-    console.log(`Executing tool: ${toolName} with args:`, toolArgs);
-
-    try {
-      let result: string;
-      
-      if (toolName in toolImplementations) {
-        result = await toolImplementations[toolName](toolArgs as any);
-      } else {
-        result = `Error: Unknown tool "${toolName}"`;
-      }
-
-      // Add tool result to conversation
-      conversation.addToolResult(toolId, result);
-      
-    } catch (error) {
-      // Add error result
-      conversation.addToolResult(toolId, `Error: ${error.message}`, true);
-    }
-  }
-
-  // Get final response after tool execution
-  if (toolCalls.length > 0) {
-    const finalResponse = await anthropic.messages.create({
-      model: 'claude-sonnet-4-20250514',
-      max_tokens: 1024,
-      messages: conversation.getMessages(),
-      tools,
-    });
-
-    conversation.addAssistantMessage(finalResponse);
-    return finalResponse;
-  }
-
-  return response;
-}
-
-// Streaming with tools
-async function streamingWithTools() {
-  const stream = anthropic.messages.stream({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: [{ role: 'user', content: 'Calculate 42 * 17' }],
-    tools,
-  });
-
-  const toolCalls: ToolUseBlock[] = [];
-
-  stream.on('contentBlock', (block) => {
-    if (block.type === 'tool_use') {
-      toolCalls.push(block);
-    }
-  });
-
-  stream.on('message', async (message) => {
-    if (message.stop_reason === 'tool_use') {
-      console.log('Tool calls detected:', toolCalls);
-      // Handle tools...
-    }
-  });
-
-  return stream;
-}
-
-// Force specific tool usage
-async function forceToolUsage() {
-  return await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: [{ role: 'user', content: 'I need to do some math' }],
-    tools,
-    tool_choice: { 
-      type: 'tool',
-      name: 'calculator',
-    } as ToolChoice,
-  });
-}
-```
-
-## System Prompts
-
-### System Prompt Variations
-
-```typescript
-// Simple string system prompt
-async function basicSystemPrompt() {
-  return await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    system: 'You are a helpful coding assistant specialized in Python.',
-    messages: [{ role: 'user', content: 'Help me write a function' }],
-  });
-}
-
-// Complex system prompt with caching
-async function complexSystemPrompt() {
-  const systemPrompt = [
-    {
-      type: 'text',
-      text: `You are an expert software engineer with the following expertise:
-
-1. Python development and best practices
-2. Web frameworks like Django and FastAPI  
-3. Database design and optimization
-4. Testing strategies and TDD
-5. Code review and refactoring
-
-Guidelines for your responses:
-- Always write clean, readable code
-- Include proper error handling
-- Add type hints when using Python
-- Explain your reasoning
-- Suggest improvements when applicable
-
-When reviewing code:
-- Focus on functionality, performance, and maintainability
-- Point out potential bugs or edge cases
-- Suggest more pythonic approaches when relevant`,
-      cache_control: { type: 'ephemeral', ttl: '1h' },
-    },
-  ] as const;
-
-  return await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    system: systemPrompt,
-    messages: [{ role: 'user', content: 'Review this Python function for me' }],
-  });
-}
-
-// Dynamic system prompt based on context
-function buildSystemPrompt(userRole: string, expertise: string[]): string {
-  const basePrompt = `You are an AI assistant helping a ${userRole}.`;
-  
-  const expertisePrompt = expertise.length > 0 
-    ? `\n\nYour areas of expertise include: ${expertise.join(', ')}.`
-    : '';
-    
-  const guidelines = `
-  
-Guidelines:
-- Be helpful and accurate
-- Explain complex concepts clearly
-- Provide practical examples
-- Ask for clarification when needed`;
-
-  return basePrompt + expertisePrompt + guidelines;
-}
-
-async function dynamicSystemPrompt() {
-  const systemPrompt = buildSystemPrompt('software developer', [
-    'JavaScript', 'TypeScript', 'React', 'Node.js'
-  ]);
-
-  return await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    system: systemPrompt,
-    messages: [{ role: 'user', content: 'Help me optimize this React component' }],
-  });
-}
-```
-
-## Content Block System
-
-### Understanding Content Blocks
-
-The Anthropic API uses a content block system where message content is always an array, even for simple text.
-
-```typescript
-import { 
-  ContentBlockParam, 
-  TextBlockParam, 
-  ImageBlockParam,
-  DocumentBlockParam,
-  ToolUseBlockParam,
-  ToolResultBlockParam 
-} from '@anthropic-ai/sdk';
-
-// Text content (most common)
-const textContent: TextBlockParam = {
-  type: 'text',
-  text: 'Hello, Claude!',
-};
-
-// Image content
-const imageContent: ImageBlockParam = {
-  type: 'image',
-  source: {
-    type: 'base64',
-    media_type: 'image/jpeg',
-    data: '/9j/4AAQSkZJRg...', // base64 encoded image
-  },
-};
-
-// Document content with caching
-const documentContent: DocumentBlockParam = {
-  type: 'document',
-  source: {
-    type: 'text',
-    data: 'Large document content...',
-    media_type: 'text/plain',
-  },
-  cache_control: { type: 'ephemeral', ttl: '1h' },
-  title: 'Important Document',
-  context: 'This document contains key information for the project',
-};
-
-// Tool use block (from assistant)
-const toolUseContent: ToolUseBlockParam = {
-  type: 'tool_use',
-  id: 'tool_123',
-  name: 'calculator',
-  input: { expression: '2 + 2' },
-};
-
-// Tool result block (from user)
-const toolResultContent: ToolResultBlockParam = {
-  type: 'tool_result',
-  tool_use_id: 'tool_123',
-  content: 'Result: 4',
-};
-
-// Mixed content message
-async function mixedContentExample() {
-  const mixedMessage: ContentBlockParam[] = [
-    {
-      type: 'text',
-      text: 'Here is an image and a document to analyze:',
-    },
-    {
-      type: 'image',
-      source: {
-        type: 'base64',
-        media_type: 'image/png',
-        data: 'iVBORw0KGgoAAAANSUhEUgA...', // base64 image
-      },
-    },
-    {
-      type: 'document',
-      source: {
-        type: 'text',
-        data: 'Document content here...',
-        media_type: 'text/plain',
-      },
-      title: 'Analysis Document',
-    },
-    {
-      type: 'text',
-      text: 'What insights can you provide from these?',
-    },
-  ];
-
-  return await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    messages: [{ role: 'user', content: mixedMessage }],
-  });
-}
-
-// Helper functions for content manipulation
-function createTextBlock(text: string, cached = false): TextBlockParam {
-  const block: TextBlockParam = {
-    type: 'text',
-    text,
-  };
-  
-  if (cached) {
-    block.cache_control = { type: 'ephemeral', ttl: '1h' };
-  }
-  
-  return block;
-}
-
-function createImageBlock(base64Data: string, mimeType: string): ImageBlockParam {
-  return {
-    type: 'image',
-    source: {
-      type: 'base64',
-      media_type: mimeType as any,
-      data: base64Data,
-    },
-  };
-}
-
-// Extract text from response content blocks
-function extractTextFromResponse(content: any[]): string {
-  return content
-    .filter(block => block.type === 'text')
-    .map(block => block.text)
-    .join('\n');
-}
-
-// Extract thinking content
-function extractThinkingFromResponse(content: any[]): string | null {
-  const thinkingBlock = content.find(block => block.type === 'thinking');
-  return thinkingBlock?.thinking || null;
-}
-```
-
-## MessageStream Helper Class
-
-### Advanced MessageStream Usage
-
-```typescript
-import { MessageStream, MessageStreamEvents } from '@anthropic-ai/sdk/lib/MessageStream';
-
-class AdvancedMessageHandler {
-  private stream: MessageStream;
-  private content = '';
-  private thinking = '';
-  private toolCalls: any[] = [];
-  private citations: any[] = [];
-
-  constructor(stream: MessageStream) {
-    this.stream = stream;
-    this.setupEventHandlers();
-  }
-
-  private setupEventHandlers() {
-    // Connection established
-    this.stream.on('connect', () => {
-      console.log('Stream connected');
-    });
-
-    // Text content (delta and snapshot)
-    this.stream.on('text', (delta: string, snapshot: string) => {
-      process.stdout.write(delta);
-      this.content = snapshot;
-    });
-
-    // Thinking content (Claude's internal reasoning)
-    this.stream.on('thinking', (delta: string, snapshot: string) => {
-      console.log('[Thinking]', delta);
-      this.thinking = snapshot;
-    });
-
-    // Citations (when referencing documents)
-    this.stream.on('citation', (citation, citations) => {
-      console.log('Citation:', citation);
-      this.citations = citations;
-    });
-
-    // Content blocks (including tool calls)
-    this.stream.on('contentBlock', (block) => {
-      if (block.type === 'tool_use') {
-        console.log('Tool call:', block);
-        this.toolCalls.push(block);
-      }
-    });
-
-    // Raw stream events
-    this.stream.on('streamEvent', (event, snapshot) => {
-      // Handle any stream event
-      console.log('Stream event:', event.type);
-    });
-
-    // Final message
-    this.stream.on('finalMessage', (message) => {
-      console.log('\nFinal message received');
-      this.handleFinalMessage(message);
-    });
-
-    // Error handling
-    this.stream.on('error', (error) => {
-      console.error('Stream error:', error);
-    });
-
-    // Stream end
-    this.stream.on('end', () => {
-      console.log('\nStream ended');
-    });
-
-    // User abort
-    this.stream.on('abort', (error) => {
-      console.log('Stream aborted by user');
-    });
-  }
-
-  private handleFinalMessage(message: any) {
-    console.log('Stop reason:', message.stop_reason);
-    console.log('Token usage:', message.usage);
-    
-    // Process thinking content if available
-    for (const block of message.content) {
-      if (block.type === 'thinking') {
-        console.log('Final thinking content:', block.thinking);
-      }
-    }
-  }
-
-  async waitForCompletion() {
-    try {
-      const finalMessage = await this.stream.finalMessage();
-      return {
-        message: finalMessage,
-        content: this.content,
-        thinking: this.thinking,
-        toolCalls: this.toolCalls,
-        citations: this.citations,
-      };
-    } catch (error) {
-      if (this.stream.aborted) {
-        console.log('Stream was aborted');
-      } else {
-        throw error;
-      }
-    }
-  }
-
-  abort() {
-    this.stream.abort();
-  }
-
-  // Get request ID for debugging
-  getRequestId() {
-    return this.stream.request_id;
-  }
-
-  // Access the underlying Response object
-  async getResponse() {
-    const { response } = await this.stream.withResponse();
-    return response;
-  }
-}
-
-// Usage example
-async function advancedStreamExample() {
-  const stream = anthropic.messages.stream({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 2000,
-    thinking: {
-      type: 'enabled',
-      budget_tokens: 1000,
-    },
-    messages: [{ 
-      role: 'user', 
-      content: 'Analyze this complex problem and show your reasoning...' 
-    }],
-  });
-
-  const handler = new AdvancedMessageHandler(stream);
-  
-  // Optional: abort after 30 seconds
-  const timeoutId = setTimeout(() => {
-    handler.abort();
-  }, 30000);
-
-  try {
-    const result = await handler.waitForCompletion();
-    clearTimeout(timeoutId);
-    
-    console.log('Final result:', {
-      contentLength: result.content.length,
-      thinkingLength: result.thinking.length,
-      toolCallCount: result.toolCalls.length,
-      citationCount: result.citations.length,
-    });
-    
-    return result;
-  } catch (error) {
-    clearTimeout(timeoutId);
-    throw error;
-  }
-}
-```
-
-## Thinking Tokens and Extended Reasoning
-
-### Enabling Extended Thinking
-
-```typescript
-async function extendedThinkingExample() {
-  const response = await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 4000,
-    thinking: {
-      type: 'enabled',
-      budget_tokens: 2000, // Minimum 1024, must be < max_tokens
-    },
-    messages: [{
-      role: 'user',
-      content: `Solve this complex problem step by step:
-        
-A company has 3 factories. Factory A produces 100 units/day, 
-Factory B produces 150 units/day, and Factory C produces 200 units/day.
-If the company needs to fulfill an order of 10,000 units in the most
-cost-efficient way, and the costs per unit are $5, $4, and $6 respectively,
-what's the optimal production strategy?`
-    }],
-  });
-
-  // Extract thinking content
-  for (const block of response.content) {
-    if (block.type === 'thinking') {
-      console.log('Claude\'s thinking process:');
-      console.log(block.thinking);
-      console.log('Signature:', block.signature);
-    } else if (block.type === 'text') {
-      console.log('\nFinal answer:');
-      console.log(block.text);
-    }
-  }
-
-  return response;
-}
-
-// Disable thinking
-async function disableThinking() {
-  return await anthropic.messages.create({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 1024,
-    thinking: {
-      type: 'disabled',
-    },
-    messages: [{ role: 'user', content: 'Quick answer please' }],
-  });
-}
-
-// Streaming with thinking
-async function streamThinking() {
-  const stream = anthropic.messages.stream({
-    model: 'claude-sonnet-4-20250514',
-    max_tokens: 3000,
-    thinking: {
-      type: 'enabled',
-      budget_tokens: 1500,
-    },
-    messages: [{
-      role: 'user',
-      content: 'Think through this carefully: How would you design a distributed cache?'
-    }],
-  });
-
-  let thinkingContent = '';
-  let responseContent = '';
-
-  stream.on('thinking', (delta, snapshot) => {
-    // Stream thinking content as it comes
-    process.stdout.write(`[THINKING] ${delta}`);
-    thinkingContent = snapshot;
-  });
-
-  stream.on('text', (delta, snapshot) => {
-    // Stream final response
-    process.stdout.write(delta);
-    responseContent = snapshot;
-  });
-
-  const finalMessage = await stream.finalMessage();
-  
-  return {
-    thinking: thinkingContent,
-    response: responseContent,
-    usage: finalMessage.usage,
-  };
-}
-```
-
-## Complete Implementation Example
-
-Here's a comprehensive example that combines all the features:
-
-```typescript
-import Anthropic, { 
-  MessageParam, 
-  Message, 
-  Tool,
-  ToolUseBlock,
-  AnthropicError 
-} from '@anthropic-ai/sdk';
-
-class AnthropicClient {
-  private client: Anthropic;
-  private conversation: MessageParam[] = [];
-  private totalTokens = 0;
-
-  constructor(apiKey: string) {
-    this.client = new Anthropic({ apiKey });
-  }
-
-  async sendMessage(
-    content: string,
-    options: {
-      stream?: boolean;
-      tools?: Tool[];
-      thinking?: boolean;
-      systemPrompt?: string;
-      maxTokens?: number;
-      temperature?: number;
-      cached?: boolean;
-    } = {}
-  ) {
-    const {
-      stream = false,
-      tools = [],
-      thinking = false,
-      systemPrompt,
-      maxTokens = 1024,
-      temperature = 1.0,
-      cached = false,
-    } = options;
-
-    // Add user message
-    this.conversation.push({
-      role: 'user',
-      content: cached 
-        ? [{ type: 'text', text: content, cache_control: { type: 'ephemeral', ttl: '1h' } }]
-        : content,
-    });
-
-    const params: any = {
-      model: 'claude-sonnet-4-20250514',
-      max_tokens: maxTokens,
-      temperature,
-      messages: [...this.conversation],
-    };
-
-    if (systemPrompt) {
-      params.system = systemPrompt;
-    }
-
-    if (tools.length > 0) {
-      params.tools = tools;
-      params.tool_choice = { type: 'auto' };
-    }
-
-    if (thinking) {
-      params.thinking = {
-        type: 'enabled',
-        budget_tokens: Math.min(maxTokens / 2, 2000),
-      };
-    }
-
-    try {
-      if (stream) {
-        return await this.handleStreamingResponse(params, tools);
-      } else {
-        return await this.handleSingleResponse(params, tools);
-      }
-    } catch (error) {
-      return this.handleError(error);
-    }
-  }
-
-  private async handleSingleResponse(params: any, tools: Tool[]) {
-    const response = await this.client.messages.create(params);
-    
-    // Track tokens
-    this.totalTokens += response.usage.input_tokens + response.usage.output_tokens;
-    
-    // Add assistant response
-    this.conversation.push({
-      role: 'assistant',
-      content: response.content,
-    });
-
-    // Handle tool calls
-    const toolCalls = response.content.filter(
-      (block): block is ToolUseBlock => block.type === 'tool_use'
-    );
-
-    if (toolCalls.length > 0 && tools.length > 0) {
-      await this.handleToolCalls(toolCalls, params, tools);
-    }
-
-    return {
-      content: this.extractText(response.content),
-      thinking: this.extractThinking(response.content),
-      toolCalls,
-      usage: response.usage,
-      stopReason: response.stop_reason,
-    };
-  }
-
-  private async handleStreamingResponse(params: any, tools: Tool[]) {
-    const stream = this.client.messages.stream(params);
-    
-    let content = '';
-    let thinking = '';
-    const toolCalls: ToolUseBlock[] = [];
-    let finalMessage: Message;
-
-    return new Promise((resolve, reject) => {
-      stream.on('text', (delta, snapshot) => {
-        process.stdout.write(delta);
-        content = snapshot;
-      });
-
-      stream.on('thinking', (delta, snapshot) => {
-        console.log(`[THINKING] ${delta}`);
-        thinking = snapshot;
-      });
-
-      stream.on('contentBlock', (block) => {
-        if (block.type === 'tool_use') {
-          toolCalls.push(block);
-        }
-      });
-
-      stream.on('finalMessage', async (message) => {
-        finalMessage = message;
-        this.totalTokens += message.usage.input_tokens + message.usage.output_tokens;
-        
-        this.conversation.push({
-          role: 'assistant',
-          content: message.content,
-        });
-
-        if (toolCalls.length > 0 && tools.length > 0) {
-          try {
-            await this.handleToolCalls(toolCalls, params, tools);
-          } catch (error) {
-            reject(error);
-            return;
-          }
-        }
-
-        resolve({
-          content,
-          thinking,
-          toolCalls,
-          usage: message.usage,
-          stopReason: message.stop_reason,
-        });
-      });
-
-      stream.on('error', reject);
-    });
-  }
-
-  private async handleToolCalls(toolCalls: ToolUseBlock[], params: any, tools: Tool[]) {
-    // Execute tool calls
-    for (const toolCall of toolCalls) {
-      const result = await this.executeToolCall(toolCall);
-      
-      this.conversation.push({
-        role: 'user',
-        content: [{
-          type: 'tool_result',
-          tool_use_id: toolCall.id,
-          content: result.content,
-          is_error: result.isError,
-        }],
-      });
-    }
-
-    // Get response after tool execution
-    const followUpResponse = await this.client.messages.create({
-      ...params,
-      messages: [...this.conversation],
-    });
-
-    this.conversation.push({
-      role: 'assistant',
-      content: followUpResponse.content,
-    });
-
-    this.totalTokens += followUpResponse.usage.input_tokens + followUpResponse.usage.output_tokens;
-  }
-
-  private async executeToolCall(toolCall: ToolUseBlock): Promise<{ content: string; isError: boolean }> {
-    // Mock tool implementations
-    const tools = {
-      calculator: (args: any) => {
-        try {
-          const result = eval(args.expression);
-          return { content: `Result: ${result}`, isError: false };
-        } catch (error) {
-          return { content: `Error: ${error.message}`, isError: true };
-        }
-      },
-      weather: (args: any) => {
-        return { content: `Weather in ${args.location}: 22°C, sunny`, isError: false };
-      },
-    };
-
-    const toolName = toolCall.name;
-    if (toolName in tools) {
-      return tools[toolName](toolCall.input);
-    } else {
-      return { content: `Unknown tool: ${toolName}`, isError: true };
-    }
-  }
-
-  private extractText(content: any[]): string {
-    return content
-      .filter(block => block.type === 'text')
-      .map(block => block.text)
-      .join('\n');
-  }
-
-  private extractThinking(content: any[]): string {
-    const thinkingBlock = content.find(block => block.type === 'thinking');
-    return thinkingBlock?.thinking || '';
-  }
-
-  private handleError(error: any) {
-    if (error instanceof AnthropicError) {
-      console.error('Anthropic API error:', error.message);
-      
-      if (error.status === 429) {
-        console.log('Rate limited - should retry with backoff');
-      } else if (error.status === 401) {
-        console.log('Authentication failed - check API key');
-      }
-    } else {
-      console.error('Unexpected error:', error);
-    }
-    
-    throw error;
-  }
-
-  // Utility methods
-  getConversationHistory(): MessageParam[] {
-    return [...this.conversation];
-  }
-
-  getTotalTokens(): number {
-    return this.totalTokens;
-  }
-
-  clearConversation(): void {
-    this.conversation = [];
-    this.totalTokens = 0;
-  }
-
-  async countTokens(messages: MessageParam[], systemPrompt?: string): Promise<number> {
-    const params: any = {
-      model: 'claude-sonnet-4-20250514',
-      messages,
-    };
-
-    if (systemPrompt) {
-      params.system = systemPrompt;
-    }
-
-    const result = await this.client.messages.countTokens(params);
-    return result.input_tokens;
-  }
-}
-
-// Usage example
-async function completeExample() {
-  const client = new AnthropicClient(process.env.ANTHROPIC_API_KEY!);
-
-  const tools: Tool[] = [
-    {
-      name: 'calculator',
-      description: 'Perform mathematical calculations',
-      input_schema: {
-        type: 'object',
-        properties: {
-          expression: { type: 'string' },
-        },
-        required: ['expression'],
-      },
-    },
-  ];
-
-  // Simple message
-  let result = await client.sendMessage('Hello, Claude!');
-  console.log('Response:', result.content);
-
-  // Message with thinking
-  result = await client.sendMessage(
-    'Solve this complex math problem: What is the optimal way to arrange 10 people around a circular table?',
-    { thinking: true, maxTokens: 2000 }
-  );
-  console.log('Thinking:', result.thinking);
-  console.log('Response:', result.content);
-
-  // Streaming with tools
-  result = await client.sendMessage(
-    'Calculate 15 * 23 and explain the steps',
-    { stream: true, tools, thinking: true }
-  );
-
-  console.log('Total tokens used:', client.getTotalTokens());
-}
-```
-
-## Key Implementation Notes
-
-1. **Content is Always an Array**: Even simple text messages use the content block system
-2. **Error Handling**: The SDK provides specific error types for different HTTP status codes
-3. **Streaming Events**: Use MessageStream for easier event handling, or raw streaming for more control
-4. **Token Counting**: Use the dedicated countTokens API for accurate estimates
-5. **Caching**: Add cache_control to content blocks, not to the message level
-6. **Tool Calls**: Always check stop_reason for 'tool_use' and handle the tool execution flow
-7. **Thinking**: Requires explicit configuration and sufficient token budget
-8. **Abort**: Use AbortController for request cancellation, or MessageStream.abort() for streams
-
-This guide covers all the essential patterns for working with the Anthropic SDK effectively.
\ No newline at end of file
diff --git a/packages/ai/docs/gemini-api.md b/packages/ai/docs/gemini-api.md
deleted file mode 100644
index 6b8ff549..00000000
--- a/packages/ai/docs/gemini-api.md
+++ /dev/null
@@ -1,1233 +0,0 @@
-# Google Gemini SDK Implementation Guide
-
-This document provides comprehensive implementation guidance for the Google Gemini SDK (`@google/genai`) showing exactly how to implement all required features for our unified AI API.
-
-## Table of Contents
-
-1. [Setup and Basic Usage](#setup-and-basic-usage)
-2. [Streaming Responses](#streaming-responses)
-3. [Aborting Requests](#aborting-requests)
-4. [Error Handling](#error-handling)
-5. [Stop Reasons](#stop-reasons)
-6. [Message History and Serialization](#message-history-and-serialization)
-7. [Token Counting](#token-counting)
-8. [Context Caching](#context-caching)
-9. [Function Calling (Tools)](#function-calling-tools)
-10. [System Instructions](#system-instructions)
-11. [Parts System for Content](#parts-system-for-content)
-12. [Thinking Tokens](#thinking-tokens)
-13. [Peculiarities and Gotchas](#peculiarities-and-gotchas)
-
-## Setup and Basic Usage
-
-### Installation and Initialization
-
-```typescript
-import { GoogleGenAI, type GenerateContentResponse } from '@google/genai';
-
-// Initialize client
-const client = new GoogleGenAI({
-  apiKey: process.env.GEMINI_API_KEY,
-  // Optional: Use Vertex AI instead
-  // vertexai: true,
-  // project: 'your-project-id',
-  // location: 'us-central1',
-});
-
-// Basic non-streaming request
-const response = await client.models.generateContent({
-  model: 'gemini-2.0-flash-exp',
-  contents: 'Hello, how are you?'
-});
-
-console.log(response.text);
-```
-
-### Key Types and Interfaces
-
-```typescript
-// Core types from the SDK
-interface GoogleGenAIOptions {
-  apiKey?: string;
-  vertexai?: boolean;
-  project?: string;
-  location?: string;
-  apiVersion?: string;
-}
-
-interface Content {
-  parts?: Part[];
-  role?: string; // 'user' | 'model'
-}
-
-interface Part {
-  text?: string;
-  thought?: boolean; // For thinking content
-  functionCall?: FunctionCall;
-  functionResponse?: FunctionResponse;
-  inlineData?: Blob;
-  fileData?: FileData;
-}
-
-interface GenerateContentResponse {
-  candidates?: Candidate[];
-  usageMetadata?: GenerateContentResponseUsageMetadata;
-  promptFeedback?: GenerateContentResponsePromptFeedback;
-  text: string | undefined; // Convenience getter
-}
-```
-
-## Streaming Responses
-
-Gemini supports streaming via `generateContentStream` which returns an `AsyncGenerator`:
-
-```typescript
-async function streamContent() {
-  const stream = await client.models.generateContentStream({
-    model: 'gemini-2.0-flash-exp',
-    contents: 'Write a short story about a robot.'
-  });
-
-  let fullText = '';
-  for await (const chunk of stream) {
-    // Each chunk is a GenerateContentResponse
-    const chunkText = chunk.text;
-    if (chunkText) {
-      fullText += chunkText;
-      process.stdout.write(chunkText); // Stream to output
-    }
-
-    // Check for function calls in streaming
-    if (chunk.candidates?.[0]?.content?.parts) {
-      for (const part of chunk.candidates[0].content.parts) {
-        if (part.functionCall) {
-          console.log('Function call:', part.functionCall);
-        }
-        if (part.thought) {
-          console.log('Thinking:', part.text);
-        }
-      }
-    }
-  }
-
-  return fullText;
-}
-```
-
-### Streaming with Thinking Tokens
-
-```typescript
-async function streamWithThinking() {
-  const stream = await client.models.generateContentStream({
-    model: 'gemini-2.0-flash-thinking-exp-1219',
-    contents: 'Solve this math problem: 2x + 5 = 13'
-  });
-
-  let thinking = '';
-  let response = '';
-
-  for await (const chunk of stream) {
-    if (chunk.candidates?.[0]?.content?.parts) {
-      for (const part of chunk.candidates[0].content.parts) {
-        if (part.thought && part.text) {
-          thinking += part.text;
-          console.log('[THINKING]', part.text);
-        } else if (part.text && !part.thought) {
-          response += part.text;
-          console.log('[RESPONSE]', part.text);
-        }
-      }
-    }
-  }
-
-  return { thinking, response };
-}
-```
-
-## Aborting Requests
-
-Gemini supports request cancellation via `AbortSignal`:
-
-```typescript
-class GeminiClient {
-  private currentController: AbortController | null = null;
-
-  async generateWithCancellation(prompt: string): Promise<string> {
-    // Create new abort controller
-    this.currentController = new AbortController();
-
-    try {
-      const response = await client.models.generateContent({
-        model: 'gemini-2.0-flash-exp',
-        contents: prompt,
-        abortSignal: this.currentController.signal
-      });
-
-      return response.text || '';
-    } catch (error) {
-      if (error.name === 'AbortError') {
-        console.log('Request was cancelled');
-        throw new Error('Request cancelled by user');
-      }
-      throw error;
-    } finally {
-      this.currentController = null;
-    }
-  }
-
-  async generateStreamWithCancellation(prompt: string): Promise<AsyncGenerator<string>> {
-    this.currentController = new AbortController();
-
-    try {
-      const stream = await client.models.generateContentStream({
-        model: 'gemini-2.0-flash-exp',
-        contents: prompt,
-        abortSignal: this.currentController.signal
-      });
-
-      return this.processStream(stream);
-    } catch (error) {
-      if (error.name === 'AbortError') {
-        throw new Error('Request cancelled by user');
-      }
-      throw error;
-    }
-  }
-
-  private async* processStream(stream: AsyncGenerator<GenerateContentResponse>): AsyncGenerator<string> {
-    try {
-      for await (const chunk of stream) {
-        if (chunk.text) {
-          yield chunk.text;
-        }
-      }
-    } catch (error) {
-      if (error.name === 'AbortError') {
-        return; // Exit generator cleanly
-      }
-      throw error;
-    } finally {
-      this.currentController = null;
-    }
-  }
-
-  // Cancel current request
-  cancel(): void {
-    if (this.currentController) {
-      this.currentController.abort();
-    }
-  }
-}
-```
-
-## Error Handling
-
-### Error Types and Handling
-
-```typescript
-import { ApiError } from '@google/genai';
-
-interface GeminiErrorInfo {
-  type: 'rate_limit' | 'auth' | 'invalid_request' | 'network' | 'server' | 'unknown';
-  message: string;
-  statusCode?: number;
-  retryable: boolean;
-}
-
-function handleGeminiError(error: unknown): GeminiErrorInfo {
-  if (error instanceof ApiError) {
-    const statusCode = error.status;
-    
-    switch (statusCode) {
-      case 401:
-      case 403:
-        return {
-          type: 'auth',
-          message: 'Authentication failed - check API key',
-          statusCode,
-          retryable: false
-        };
-        
-      case 429:
-        return {
-          type: 'rate_limit',
-          message: 'Rate limit exceeded',
-          statusCode,
-          retryable: true
-        };
-        
-      case 400:
-        return {
-          type: 'invalid_request',
-          message: error.message || 'Invalid request parameters',
-          statusCode,
-          retryable: false
-        };
-        
-      case 500:
-      case 502:
-      case 503:
-      case 504:
-        return {
-          type: 'server',
-          message: 'Server error - try again later',
-          statusCode,
-          retryable: true
-        };
-        
-      default:
-        return {
-          type: 'unknown',
-          message: error.message || 'Unknown API error',
-          statusCode,
-          retryable: false
-        };
-    }
-  }
-
-  if (error instanceof Error) {
-    if (error.name === 'AbortError') {
-      return {
-        type: 'network',
-        message: 'Request was cancelled',
-        retryable: false
-      };
-    }
-
-    return {
-      type: 'network',
-      message: error.message,
-      retryable: true
-    };
-  }
-
-  return {
-    type: 'unknown',
-    message: 'Unknown error occurred',
-    retryable: false
-  };
-}
-
-// Usage with retry logic
-async function generateWithRetry(prompt: string, maxRetries = 3): Promise<string> {
-  for (let attempt = 1; attempt <= maxRetries; attempt++) {
-    try {
-      const response = await client.models.generateContent({
-        model: 'gemini-2.0-flash-exp',
-        contents: prompt
-      });
-      
-      return response.text || '';
-    } catch (error) {
-      const errorInfo = handleGeminiError(error);
-      
-      if (!errorInfo.retryable || attempt === maxRetries) {
-        throw new Error(`${errorInfo.type}: ${errorInfo.message}`);
-      }
-      
-      // Exponential backoff for retryable errors
-      const delay = Math.pow(2, attempt - 1) * 1000;
-      await new Promise(resolve => setTimeout(resolve, delay));
-    }
-  }
-  
-  throw new Error('Max retries exceeded');
-}
-```
-
-## Stop Reasons
-
-Gemini provides finish reasons in the response candidates:
-
-```typescript
-enum FinishReason {
-  FINISH_REASON_UNSPECIFIED = 'FINISH_REASON_UNSPECIFIED',
-  STOP = 'STOP', // Natural stop
-  MAX_TOKENS = 'MAX_TOKENS', // Hit token limit
-  SAFETY = 'SAFETY', // Safety filter triggered
-  RECITATION = 'RECITATION', // Recitation filter
-  LANGUAGE = 'LANGUAGE', // Language not supported
-  OTHER = 'OTHER'
-}
-
-function extractStopReason(response: GenerateContentResponse): string | null {
-  const candidate = response.candidates?.[0];
-  if (!candidate) return null;
-  
-  return candidate.finishReason || null;
-}
-
-// Handle different stop reasons
-function handleStopReason(response: GenerateContentResponse): void {
-  const reason = extractStopReason(response);
-  
-  switch (reason) {
-    case 'STOP':
-      console.log('Response completed naturally');
-      break;
-      
-    case 'MAX_TOKENS':
-      console.log('Response truncated due to token limit');
-      break;
-      
-    case 'SAFETY':
-      console.log('Response blocked by safety filters');
-      // Check promptFeedback for details
-      if (response.promptFeedback?.blockReason) {
-        console.log('Block reason:', response.promptFeedback.blockReason);
-      }
-      break;
-      
-    case 'RECITATION':
-      console.log('Response blocked due to recitation concerns');
-      break;
-      
-    default:
-      if (reason) {
-        console.log('Unexpected finish reason:', reason);
-      }
-  }
-}
-```
-
-## Message History and Serialization
-
-### Managing Conversation History
-
-```typescript
-interface SerializableMessage {
-  role: 'user' | 'model';
-  content: string;
-  functionCalls?: FunctionCall[];
-  functionResponses?: FunctionResponse[];
-  thinking?: string;
-}
-
-interface SerializableSession {
-  messages: SerializableMessage[];
-  totalUsage: {
-    promptTokens: number;
-    candidatesTokens: number;
-    totalTokens: number;
-    thoughtsTokens?: number;
-  };
-}
-
-class GeminiConversation {
-  private messages: Content[] = [];
-  private totalUsage = {
-    promptTokens: 0,
-    candidatesTokens: 0,
-    totalTokens: 0,
-    thoughtsTokens: 0
-  };
-
-  addUserMessage(text: string): void {
-    this.messages.push({
-      role: 'user',
-      parts: [{ text }]
-    });
-  }
-
-  addAssistantMessage(response: GenerateContentResponse): void {
-    const candidate = response.candidates?.[0];
-    if (!candidate?.content) return;
-
-    this.messages.push(candidate.content);
-
-    // Update usage
-    if (response.usageMetadata) {
-      this.totalUsage.promptTokens += response.usageMetadata.promptTokenCount || 0;
-      this.totalUsage.candidatesTokens += response.usageMetadata.candidatesTokenCount || 0;
-      this.totalUsage.totalTokens += response.usageMetadata.totalTokenCount || 0;
-      this.totalUsage.thoughtsTokens += response.usageMetadata.thoughtsTokenCount || 0;
-    }
-  }
-
-  async sendMessage(text: string): Promise<string> {
-    this.addUserMessage(text);
-
-    const response = await client.models.generateContent({
-      model: 'gemini-2.0-flash-exp',
-      contents: this.messages
-    });
-
-    this.addAssistantMessage(response);
-    return response.text || '';
-  }
-
-  // Serialize for persistence
-  serialize(): SerializableSession {
-    const messages: SerializableMessage[] = [];
-    
-    for (const content of this.messages) {
-      const message: SerializableMessage = {
-        role: (content.role as 'user' | 'model') || 'user',
-        content: '',
-        functionCalls: [],
-        functionResponses: [],
-        thinking: ''
-      };
-
-      for (const part of content.parts || []) {
-        if (part.text) {
-          if (part.thought) {
-            message.thinking += part.text;
-          } else {
-            message.content += part.text;
-          }
-        }
-        if (part.functionCall) {
-          message.functionCalls!.push(part.functionCall);
-        }
-        if (part.functionResponse) {
-          message.functionResponses!.push(part.functionResponse);
-        }
-      }
-
-      messages.push(message);
-    }
-
-    return {
-      messages,
-      totalUsage: { ...this.totalUsage }
-    };
-  }
-
-  // Deserialize from storage
-  static fromSerialized(session: SerializableSession): GeminiConversation {
-    const conversation = new GeminiConversation();
-    conversation.totalUsage = { ...session.totalUsage };
-
-    for (const msg of session.messages) {
-      const parts: Part[] = [];
-      
-      if (msg.content) {
-        parts.push({ text: msg.content });
-      }
-      
-      if (msg.thinking) {
-        parts.push({ text: msg.thinking, thought: true });
-      }
-      
-      for (const funcCall of msg.functionCalls || []) {
-        parts.push({ functionCall: funcCall });
-      }
-      
-      for (const funcResp of msg.functionResponses || []) {
-        parts.push({ functionResponse: funcResp });
-      }
-
-      conversation.messages.push({
-        role: msg.role,
-        parts
-      });
-    }
-
-    return conversation;
-  }
-}
-```
-
-## Token Counting
-
-### Understanding Gemini Token Usage
-
-```typescript
-interface TokenUsage {
-  promptTokens: number;
-  candidatesTokens: number; // Output tokens
-  totalTokens: number;
-  thoughtsTokens?: number; // Thinking tokens (reasoning models)
-  cachedContentTokens?: number; // Cache read tokens
-}
-
-function extractTokenUsage(response: GenerateContentResponse): TokenUsage {
-  const usage = response.usageMetadata;
-  
-  return {
-    promptTokens: usage?.promptTokenCount || 0,
-    candidatesTokens: usage?.candidatesTokenCount || 0,
-    totalTokens: usage?.totalTokenCount || 0,
-    thoughtsTokens: usage?.thoughtsTokenCount || 0,
-    cachedContentTokens: usage?.cachedContentTokenCount || 0
-  };
-}
-
-// Count tokens before sending (estimation)
-async function countTokens(content: string | Content[]): Promise<number> {
-  const response = await client.models.computeTokens({
-    model: 'gemini-2.0-flash-exp',
-    contents: typeof content === 'string' 
-      ? [{ parts: [{ text: content }] }]
-      : content
-  });
-
-  return response.totalTokens || 0;
-}
-
-// Token usage accumulation
-class TokenTracker {
-  private usage = {
-    totalPromptTokens: 0,
-    totalCandidatesTokens: 0,
-    totalThoughtsTokens: 0,
-    totalCachedTokens: 0,
-    totalRequests: 0
-  };
-
-  addUsage(response: GenerateContentResponse): void {
-    const tokenUsage = extractTokenUsage(response);
-    
-    this.usage.totalPromptTokens += tokenUsage.promptTokens;
-    this.usage.totalCandidatesTokens += tokenUsage.candidatesTokens;
-    this.usage.totalThoughtsTokens += tokenUsage.thoughtsTokens || 0;
-    this.usage.totalCachedTokens += tokenUsage.cachedContentTokens || 0;
-    this.usage.totalRequests++;
-  }
-
-  getStats() {
-    return {
-      ...this.usage,
-      totalTokens: this.usage.totalPromptTokens + this.usage.totalCandidatesTokens,
-      averageTokensPerRequest: this.usage.totalRequests > 0 
-        ? (this.usage.totalPromptTokens + this.usage.totalCandidatesTokens) / this.usage.totalRequests 
-        : 0
-    };
-  }
-}
-```
-
-## Context Caching
-
-Gemini supports context caching to reduce costs for repeated large prompts:
-
-```typescript
-import { type CachedContent } from '@google/genai';
-
-class GeminiCache {
-  async createCache(
-    systemInstruction: string,
-    contents: Content[],
-    ttlHours = 1
-  ): Promise<CachedContent> {
-    const cache = await client.caches.create({
-      model: 'gemini-2.0-flash-exp',
-      systemInstruction: { parts: [{ text: systemInstruction }] },
-      contents,
-      ttl: `${ttlHours * 3600}s` // Convert hours to seconds
-    });
-
-    return cache;
-  }
-
-  async generateWithCache(
-    cachedContent: CachedContent,
-    userMessage: string
-  ): Promise<GenerateContentResponse> {
-    return await client.models.generateContent({
-      model: cachedContent.model || 'gemini-2.0-flash-exp',
-      cachedContent: cachedContent.name,
-      contents: [{ 
-        role: 'user', 
-        parts: [{ text: userMessage }] 
-      }]
-    });
-  }
-
-  async listCaches(): Promise<CachedContent[]> {
-    const caches = [];
-    for await (const cache of client.caches.list()) {
-      caches.push(cache);
-    }
-    return caches;
-  }
-
-  async deleteCache(cacheName: string): Promise<void> {
-    await client.caches.delete({ name: cacheName });
-  }
-
-  // Example: Cache a large document for repeated analysis
-  async createDocumentCache(document: string): Promise<CachedContent> {
-    const systemInstruction = `
-      You are a document analysis assistant. The user will provide a large document,
-      and you should be ready to answer questions about it, summarize it, or extract
-      information from it.
-    `;
-
-    const contents = [{
-      role: 'user' as const,
-      parts: [{ text: `Please analyze this document:\n\n${document}` }]
-    }];
-
-    return this.createCache(systemInstruction, contents, 24); // Cache for 24 hours
-  }
-}
-
-// Usage example
-async function demonstrateCache() {
-  const cache = new GeminiCache();
-  
-  // Create cache with large document
-  const document = "... very large document content ...";
-  const cachedContent = await cache.createDocumentCache(document);
-  
-  // Now ask questions using the cache (saves tokens!)
-  const response1 = await cache.generateWithCache(
-    cachedContent, 
-    "What are the key points in this document?"
-  );
-  
-  const response2 = await cache.generateWithCache(
-    cachedContent, 
-    "Can you summarize the conclusions?"
-  );
-  
-  // Clean up when done
-  await cache.deleteCache(cachedContent.name!);
-}
-```
-
-## Function Calling (Tools)
-
-### Basic Function Calling Setup
-
-```typescript
-interface ToolDefinition {
-  name: string;
-  description: string;
-  parameters: {
-    type: 'object';
-    properties: Record<string, any>;
-    required: string[];
-  };
-}
-
-// Define tools
-const tools: ToolDefinition[] = [{
-  name: 'get_weather',
-  description: 'Get current weather for a location',
-  parameters: {
-    type: 'object',
-    properties: {
-      location: {
-        type: 'string',
-        description: 'City name or location'
-      },
-      units: {
-        type: 'string',
-        enum: ['celsius', 'fahrenheit'],
-        description: 'Temperature units'
-      }
-    },
-    required: ['location']
-  }
-}];
-
-// Convert to Gemini format
-function createGeminiTools(tools: ToolDefinition[]) {
-  return [{
-    functionDeclarations: tools.map(tool => ({
-      name: tool.name,
-      description: tool.description,
-      parametersJsonSchema: tool.parameters
-    }))
-  }];
-}
-
-// Function call handler
-async function executeFunction(functionCall: FunctionCall): Promise<any> {
-  const { name, args } = functionCall;
-  const params = typeof args === 'string' ? JSON.parse(args) : args;
-
-  switch (name) {
-    case 'get_weather':
-      return await getWeatherData(params.location, params.units);
-    default:
-      throw new Error(`Unknown function: ${name}`);
-  }
-}
-
-// Mock weather function
-async function getWeatherData(location: string, units = 'celsius') {
-  return {
-    location,
-    temperature: 22,
-    conditions: 'sunny',
-    units
-  };
-}
-```
-
-### Complete Function Calling Flow
-
-```typescript
-class GeminiFunctionCalling {
-  private tools: ToolDefinition[];
-
-  constructor(tools: ToolDefinition[]) {
-    this.tools = tools;
-  }
-
-  async processWithTools(messages: Content[]): Promise<string> {
-    let currentMessages = [...messages];
-    let iterations = 0;
-    const maxIterations = 5;
-
-    while (iterations < maxIterations) {
-      const response = await client.models.generateContent({
-        model: 'gemini-2.0-flash-exp',
-        contents: currentMessages,
-        tools: createGeminiTools(this.tools),
-        toolConfig: {
-          functionCallingConfig: {
-            mode: 'AUTO' // Let model decide when to call functions
-          }
-        }
-      });
-
-      const candidate = response.candidates?.[0];
-      if (!candidate?.content) break;
-
-      // Add assistant response to conversation
-      currentMessages.push(candidate.content);
-
-      // Check for function calls
-      const functionCalls = this.extractFunctionCalls(candidate.content);
-      
-      if (functionCalls.length === 0) {
-        // No more function calls, return final response
-        return response.text || '';
-      }
-
-      // Execute function calls
-      for (const functionCall of functionCalls) {
-        try {
-          const result = await executeFunction(functionCall);
-          
-          // Add function response to conversation
-          currentMessages.push({
-            role: 'user',
-            parts: [{
-              functionResponse: {
-                name: functionCall.name,
-                id: functionCall.id,
-                response: { result }
-              }
-            }]
-          });
-        } catch (error) {
-          // Add error response
-          currentMessages.push({
-            role: 'user',
-            parts: [{
-              functionResponse: {
-                name: functionCall.name,
-                id: functionCall.id,
-                response: { error: error.message }
-              }
-            }]
-          });
-        }
-      }
-
-      iterations++;
-    }
-
-    throw new Error('Max function calling iterations exceeded');
-  }
-
-  private extractFunctionCalls(content: Content): FunctionCall[] {
-    const calls: FunctionCall[] = [];
-    
-    for (const part of content.parts || []) {
-      if (part.functionCall) {
-        calls.push(part.functionCall);
-      }
-    }
-    
-    return calls;
-  }
-
-  // Streaming version with function calls
-  async *processStreamWithTools(messages: Content[]): AsyncGenerator<{
-    type: 'content' | 'function_call' | 'function_result';
-    content?: string;
-    functionCall?: FunctionCall;
-    functionResult?: any;
-  }> {
-    const stream = await client.models.generateContentStream({
-      model: 'gemini-2.0-flash-exp',
-      contents: messages,
-      tools: createGeminiTools(this.tools),
-      toolConfig: {
-        functionCallingConfig: { mode: 'AUTO' }
-      }
-    });
-
-    let pendingFunctionCalls: FunctionCall[] = [];
-
-    for await (const chunk of stream) {
-      const candidate = chunk.candidates?.[0];
-      if (!candidate?.content) continue;
-
-      for (const part of candidate.content.parts || []) {
-        if (part.text && !part.thought) {
-          yield { type: 'content', content: part.text };
-        }
-        
-        if (part.functionCall) {
-          pendingFunctionCalls.push(part.functionCall);
-          yield { type: 'function_call', functionCall: part.functionCall };
-        }
-      }
-    }
-
-    // Execute any pending function calls
-    for (const functionCall of pendingFunctionCalls) {
-      try {
-        const result = await executeFunction(functionCall);
-        yield { type: 'function_result', functionResult: result };
-      } catch (error) {
-        yield { 
-          type: 'function_result', 
-          functionResult: { error: error.message } 
-        };
-      }
-    }
-  }
-}
-```
-
-## System Instructions
-
-Gemini handles system instructions differently from other providers:
-
-```typescript
-// System instruction is a separate parameter, not part of messages
-async function generateWithSystemInstruction(
-  systemPrompt: string, 
-  userMessage: string
-): Promise<string> {
-  const response = await client.models.generateContent({
-    model: 'gemini-2.0-flash-exp',
-    systemInstruction: {
-      parts: [{ text: systemPrompt }]
-    },
-    contents: [{
-      role: 'user',
-      parts: [{ text: userMessage }]
-    }]
-  });
-
-  return response.text || '';
-}
-
-// For conversation with system instruction
-class GeminiConversationWithSystem {
-  private systemInstruction: Content;
-  private messages: Content[] = [];
-
-  constructor(systemPrompt: string) {
-    this.systemInstruction = {
-      parts: [{ text: systemPrompt }]
-    };
-  }
-
-  async sendMessage(text: string): Promise<string> {
-    this.messages.push({
-      role: 'user',
-      parts: [{ text }]
-    });
-
-    const response = await client.models.generateContent({
-      model: 'gemini-2.0-flash-exp',
-      systemInstruction: this.systemInstruction,
-      contents: this.messages
-    });
-
-    const candidate = response.candidates?.[0];
-    if (candidate?.content) {
-      this.messages.push(candidate.content);
-    }
-
-    return response.text || '';
-  }
-
-  updateSystemInstruction(newPrompt: string): void {
-    this.systemInstruction = {
-      parts: [{ text: newPrompt }]
-    };
-  }
-}
-```
-
-## Parts System for Content
-
-Understanding Gemini's parts-based content system:
-
-```typescript
-// Text content
-const textPart: Part = {
-  text: 'Hello, world!'
-};
-
-// Thinking content (for reasoning models)
-const thinkingPart: Part = {
-  text: 'Let me think about this problem...',
-  thought: true
-};
-
-// Function call
-const functionCallPart: Part = {
-  functionCall: {
-    name: 'get_weather',
-    args: { location: 'San Francisco' }
-  }
-};
-
-// Function response
-const functionResponsePart: Part = {
-  functionResponse: {
-    name: 'get_weather',
-    response: { temperature: 72, conditions: 'sunny' }
-  }
-};
-
-// Image data (inline)
-const imagePart: Part = {
-  inlineData: {
-    mimeType: 'image/jpeg',
-    data: 'base64-encoded-image-data'
-  }
-};
-
-// File reference
-const filePart: Part = {
-  fileData: {
-    mimeType: 'image/jpeg',
-    fileUri: 'gs://bucket/image.jpg'
-  }
-};
-
-// Creating multi-part content
-const multiPartContent: Content = {
-  role: 'user',
-  parts: [
-    { text: 'What is in this image?' },
-    {
-      inlineData: {
-        mimeType: 'image/jpeg',
-        data: await imageToBase64('path/to/image.jpg')
-      }
-    }
-  ]
-};
-
-// Utility functions for parts
-function createTextPart(text: string): Part {
-  return { text };
-}
-
-function createThinkingPart(text: string): Part {
-  return { text, thought: true };
-}
-
-function createImagePart(imageData: string, mimeType: string): Part {
-  return {
-    inlineData: {
-      mimeType,
-      data: imageData
-    }
-  };
-}
-
-async function imageToBase64(filePath: string): Promise<string> {
-  const fs = await import('fs/promises');
-  const buffer = await fs.readFile(filePath);
-  return buffer.toString('base64');
-}
-```
-
-## Thinking Tokens
-
-Gemini thinking models (like `gemini-2.0-flash-thinking-exp-1219`) provide reasoning traces:
-
-```typescript
-interface ThinkingExtractor {
-  thinking: string;
-  response: string;
-  thinkingTokens: number;
-  responseTokens: number;
-}
-
-function extractThinking(response: GenerateContentResponse): ThinkingExtractor {
-  let thinking = '';
-  let responseText = '';
-  
-  const candidate = response.candidates?.[0];
-  if (candidate?.content?.parts) {
-    for (const part of candidate.content.parts) {
-      if (part.text) {
-        if (part.thought) {
-          thinking += part.text;
-        } else {
-          responseText += part.text;
-        }
-      }
-    }
-  }
-
-  const usage = response.usageMetadata;
-  
-  return {
-    thinking,
-    response: responseText,
-    thinkingTokens: usage?.thoughtsTokenCount || 0,
-    responseTokens: usage?.candidatesTokenCount || 0
-  };
-}
-
-// Streaming thinking extraction
-async function streamWithThinkingExtraction(prompt: string) {
-  const stream = await client.models.generateContentStream({
-    model: 'gemini-2.0-flash-thinking-exp-1219',
-    contents: prompt
-  });
-
-  let thinkingContent = '';
-  let responseContent = '';
-
-  for await (const chunk of stream) {
-    const candidate = chunk.candidates?.[0];
-    if (!candidate?.content?.parts) continue;
-
-    for (const part of candidate.content.parts) {
-      if (part.text) {
-        if (part.thought) {
-          thinkingContent += part.text;
-          console.log('[THINKING DELTA]', part.text);
-        } else {
-          responseContent += part.text;
-          console.log('[RESPONSE DELTA]', part.text);
-        }
-      }
-    }
-  }
-
-  return {
-    thinking: thinkingContent,
-    response: responseContent
-  };
-}
-
-// Enable thinking for models that support it
-async function generateWithThinking(prompt: string, model = 'gemini-2.0-flash-thinking-exp-1219') {
-  const response = await client.models.generateContent({
-    model,
-    contents: prompt
-  });
-
-  return extractThinking(response);
-}
-```
-
-## Peculiarities and Gotchas
-
-### Key Differences from Other APIs
-
-1. **System Instructions**: Separate parameter, not part of message history
-2. **Parts-based Content**: Content is split into parts, each with specific types
-3. **Thinking Detection**: Must check `part.thought` flag to identify reasoning content
-4. **Function Calls**: Embedded in parts, not separate message types
-5. **Role Names**: Uses 'model' instead of 'assistant' for AI responses
-6. **Streaming**: Returns full `GenerateContentResponse` objects, not deltas
-
-### Common Pitfalls
-
-```typescript
-// ❌ Wrong: Treating text as complete response
-const response = await client.models.generateContent({...});
-console.log(response.candidates[0].content.parts[0].text); // May miss other parts
-
-// ✅ Correct: Use convenience getter or process all parts
-console.log(response.text); // Concatenates all text parts automatically
-
-// ❌ Wrong: Mixing system instruction with messages
-const messages = [
-  { role: 'system', parts: [{ text: 'You are helpful' }] }, // Not supported
-  { role: 'user', parts: [{ text: 'Hello' }] }
-];
-
-// ✅ Correct: Separate system instruction
-const response = await client.models.generateContent({
-  systemInstruction: { parts: [{ text: 'You are helpful' }] },
-  contents: [{ role: 'user', parts: [{ text: 'Hello' }] }]
-});
-
-// ❌ Wrong: Assuming single part responses
-for await (const chunk of stream) {
-  console.log(chunk.text); // May miss function calls or thinking
-}
-
-// ✅ Correct: Process all parts
-for await (const chunk of stream) {
-  const candidate = chunk.candidates?.[0];
-  if (candidate?.content?.parts) {
-    for (const part of candidate.content.parts) {
-      if (part.text && !part.thought) {
-        console.log('[RESPONSE]', part.text);
-      } else if (part.text && part.thought) {
-        console.log('[THINKING]', part.text);
-      } else if (part.functionCall) {
-        console.log('[FUNCTION CALL]', part.functionCall);
-      }
-    }
-  }
-}
-```
-
-### Performance Tips
-
-1. **Use streaming** for better user experience with long responses
-2. **Cache large prompts** to reduce token costs
-3. **Batch token counting** when possible
-4. **Set appropriate `abortSignal` timeouts** for long-running requests
-5. **Handle function calls efficiently** to avoid timeout issues
-
-### Model-Specific Behaviors
-
-```typescript
-// Different models have different capabilities
-const modelCapabilities = {
-  'gemini-2.0-flash-exp': {
-    thinking: false,
-    functionCalling: true,
-    vision: true,
-    maxTokens: 1000000
-  },
-  'gemini-2.0-flash-thinking-exp-1219': {
-    thinking: true,
-    functionCalling: true,
-    vision: true,
-    maxTokens: 32768
-  },
-  'gemini-1.5-pro': {
-    thinking: false,
-    functionCalling: true,
-    vision: true,
-    maxTokens: 2000000
-  }
-};
-
-// Check model capabilities before using features
-function supportsThinking(model: string): boolean {
-  return model.includes('thinking');
-}
-
-function getMaxTokens(model: string): number {
-  return modelCapabilities[model]?.maxTokens || 32768;
-}
-```
-
-This comprehensive guide covers all the essential aspects of implementing Gemini API features. The key is understanding Gemini's parts-based content system and properly handling the different types of content (text, thinking, function calls) that can appear in responses.
\ No newline at end of file
diff --git a/packages/ai/docs/images.md b/packages/ai/docs/images.md
deleted file mode 100644
index 82350015..00000000
--- a/packages/ai/docs/images.md
+++ /dev/null
@@ -1,322 +0,0 @@
-# Image Input Support for LLM Providers
-
-This document describes how to submit images to different LLM provider APIs and proposes an abstraction layer for unified image handling.
-
-## Provider-Specific Image Support
-
-### 1. Anthropic (Claude)
-
-**Supported Models**: Claude 3 and Claude 4 families (Sonnet, Haiku, Opus)
-
-**Image Formats**: JPEG, PNG, GIF, WebP
-
-**Methods**:
-1. **Base64 Encoding**:
-```json
-{
-  "role": "user",
-  "content": [
-    {
-      "type": "image",
-      "source": {
-        "type": "base64",
-        "media_type": "image/jpeg",
-        "data": "<base64_encoded_image_data>"
-      }
-    },
-    {
-      "type": "text",
-      "text": "What's in this image?"
-    }
-  ]
-}
-```
-
-2. **URL Support**:
-```json
-{
-  "role": "user",
-  "content": [
-    {
-      "type": "image",
-      "source": {
-        "type": "url",
-        "url": "https://example.com/image.jpg"
-      }
-    }
-  ]
-}
-```
-
-**Limitations**:
-- Maximum 20 images per request
-- Each image max 3.75 MB
-- Maximum dimensions: 8,000px × 8,000px
-- Images are ephemeral (not stored beyond request duration)
-
-### 2. Google GenAI (Gemini)
-
-**Supported Models**: Gemini Pro Vision, Gemini 1.5, Gemini 2.0
-
-**Image Formats**: JPEG, PNG, GIF, WebP
-
-**Methods**:
-1. **Inline Base64 Data** (for files < 20MB):
-```json
-{
-  "contents": [{
-    "parts": [
-      {
-        "inline_data": {
-          "mime_type": "image/jpeg",
-          "data": "BASE64_ENCODED_IMAGE_DATA"
-        }
-      },
-      {
-        "text": "Describe this image"
-      }
-    ]
-  }]
-}
-```
-
-2. **File API** (for larger files or reuse):
-- Upload file first using File API
-- Reference by file URI in subsequent requests
-
-**Limitations**:
-- Inline data: Total request size (text + images) < 20MB
-- Base64 encoding increases size in transit
-- Returns HTTP 413 if request too large
-
-### 3. OpenAI Chat Completions (GPT-4o, GPT-4o-mini)
-
-**Supported Models**: GPT-4o, GPT-4o-mini, GPT-4-turbo with vision
-
-**Image Formats**: JPEG, PNG, GIF, WebP
-
-**Methods**:
-1. **URL Reference**:
-```json
-{
-  "role": "user",
-  "content": [
-    {
-      "type": "text",
-      "text": "What's in this image?"
-    },
-    {
-      "type": "image_url",
-      "image_url": {
-        "url": "https://example.com/image.jpg"
-      }
-    }
-  ]
-}
-```
-
-2. **Base64 Data URL**:
-```json
-{
-  "role": "user",
-  "content": [
-    {
-      "type": "image_url",
-      "image_url": {
-        "url": "data:image/jpeg;base64,<base64_encoded_image>"
-      }
-    }
-  ]
-}
-```
-
-**Note**: Despite the field name `image_url`, base64 data URLs are supported.
-
-### 4. OpenAI Responses API (o1, o3, o4-mini)
-
-**Vision Support by Model**:
-- ✅ **o1**: Full vision support
-- ✅ **o3**: Vision support + image generation
-- ✅ **o4-mini**: Vision support + image generation
-- ❌ **o3-mini**: No vision capabilities
-- ✅ **o3-pro**: Vision analysis (no generation)
-
-**Methods**: Same as Chat Completions API
-- URL references
-- Base64 data URLs
-
-**Note**: Vision capabilities integrated into reasoning chain-of-thought for more contextually rich responses.
-
-## Proposed Unified Abstraction
-
-### Image Content Type
-
-```typescript
-interface ImageContent {
-  type: "image";
-  data: string; // base64 encoded image data
-  mimeType: string; // e.g., "image/jpeg", "image/png"
-}
-```
-
-### Unified Message Structure
-
-```typescript
-interface UserMessage {
-  role: "user";
-  content: string | (TextContent | ImageContent)[];
-}
-
-interface TextContent {
-  type: "text";
-  text: string;
-}
-```
-
-### Provider Adapter Implementation
-
-Each provider adapter would:
-
-1. **Check Model Capabilities**:
-```typescript
-if (model.input.includes("image")) {
-  // Process image content
-} else {
-  // Throw error or ignore images
-}
-```
-
-2. **Convert to Provider Format**:
-
-```typescript
-// Anthropic converter
-function toAnthropicContent(content: (TextContent | ImageContent)[]) {
-  return content.map(item => {
-    if (item.type === "image") {
-      return {
-        type: "image",
-        source: {
-          type: "base64",
-          media_type: item.mimeType,
-          data: item.data
-        }
-      };
-    }
-    return { type: "text", text: item.text };
-  });
-}
-
-// OpenAI converter
-function toOpenAIContent(content: (TextContent | ImageContent)[]) {
-  return content.map(item => {
-    if (item.type === "image") {
-      return {
-        type: "image_url",
-        image_url: {
-          url: `data:${item.mimeType};base64,${item.data}`
-        }
-      };
-    }
-    return { type: "text", text: item.text };
-  });
-}
-
-// Google converter
-function toGoogleContent(content: (TextContent | ImageContent)[]) {
-  return content.map(item => {
-    if (item.type === "image") {
-      return {
-        inline_data: {
-          mime_type: item.mimeType,
-          data: item.data
-        }
-      };
-    }
-    return { text: item.text };
-  });
-}
-```
-
-### Size and Format Validation
-
-```typescript
-interface ImageConstraints {
-  maxSizeMB: number;
-  maxWidth: number;
-  maxHeight: number;
-  maxCount: number;
-  supportedFormats: string[];
-}
-
-const PROVIDER_CONSTRAINTS: Record<string, ImageConstraints> = {
-  anthropic: {
-    maxSizeMB: 3.75,
-    maxWidth: 8000,
-    maxHeight: 8000,
-    maxCount: 20,
-    supportedFormats: ["image/jpeg", "image/png", "image/gif", "image/webp"]
-  },
-  google: {
-    maxSizeMB: 20, // for inline data
-    maxWidth: Infinity,
-    maxHeight: Infinity,
-    maxCount: Infinity,
-    supportedFormats: ["image/jpeg", "image/png", "image/gif", "image/webp"]
-  },
-  openai: {
-    maxSizeMB: 20,
-    maxWidth: Infinity,
-    maxHeight: Infinity,
-    maxCount: Infinity,
-    supportedFormats: ["image/jpeg", "image/png", "image/gif", "image/webp"]
-  }
-};
-
-async function validateImage(
-  image: ImageContent, 
-  provider: string
-): Promise<void> {
-  const constraints = PROVIDER_CONSTRAINTS[provider];
-  
-  // Check MIME type
-  if (!constraints.supportedFormats.includes(image.mimeType)) {
-    throw new Error(`Unsupported image format: ${image.mimeType}`);
-  }
-  
-  // Check size
-  const imageBuffer = Buffer.from(image.data, 'base64');
-  const sizeMB = imageBuffer.length / (1024 * 1024);
-  if (sizeMB > constraints.maxSizeMB) {
-    throw new Error(`Image exceeds ${constraints.maxSizeMB}MB limit`);
-  }
-  
-  // Could add dimension checks using image processing library
-}
-```
-
-## Implementation Considerations
-
-1. **Preprocessing**:
-   - User is responsible for converting images to base64 before passing to API
-   - Utility functions could be provided for common conversions (file to base64, URL to base64)
-   - Image optimization (resize/compress) should happen before encoding
-
-2. **Error Handling**:
-   - Validate MIME types and sizes before sending
-   - Check model capabilities (via `model.input.includes("image")`)
-   - Provide clear error messages for unsupported features
-
-3. **Performance**:
-   - Base64 encoding increases payload size by ~33%
-   - Consider image compression before encoding
-   - For Google GenAI, be aware of 20MB total request limit
-
-4. **Token Counting**:
-   - Images consume tokens (varies by provider and image size)
-   - Include image token estimates in usage calculations
-   - Anthropic: ~1 token per ~3-4 bytes of base64 data
-   - OpenAI: Detailed images consume more tokens than low-detail
-
-5. **Fallback Strategies**:
-   - If model doesn't support images, throw error or ignore images
-   - Consider offering text-only fallback for non-vision models
\ No newline at end of file
diff --git a/packages/ai/docs/models.md b/packages/ai/docs/models.md
deleted file mode 100644
index a418c390..00000000
--- a/packages/ai/docs/models.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# OpenAI Models
-
-## All Models
-
-- [ ] [GPT-5](https://platform.openai.com/docs/models/gpt-5)
-- [ ] [GPT-5 mini](https://platform.openai.com/docs/models/gpt-5-mini)
-- [ ] [GPT-5 nano](https://platform.openai.com/docs/models/gpt-5-nano)
-- [ ] [o3-deep-research](https://platform.openai.com/docs/models/o3-deep-research)
-- [ ] [o4-mini-deep-research](https://platform.openai.com/docs/models/o4-mini-deep-research)
-- [ ] [o3-pro](https://platform.openai.com/docs/models/o3-pro)
-- [ ] [GPT-4o Audio](https://platform.openai.com/docs/models/gpt-4o-audio-preview)
-- [ ] [GPT-4o Realtime](https://platform.openai.com/docs/models/gpt-4o-realtime-preview)
-- [ ] [o3](https://platform.openai.com/docs/models/o3)
-- [ ] [o4-mini](https://platform.openai.com/docs/models/o4-mini)
-- [ ] [GPT-4.1](https://platform.openai.com/docs/models/gpt-4.1)
-- [ ] [GPT-4.1 mini](https://platform.openai.com/docs/models/gpt-4.1-mini)
-- [ ] [GPT-4.1 nano](https://platform.openai.com/docs/models/gpt-4.1-nano)
-- [ ] [o1-pro](https://platform.openai.com/docs/models/o1-pro)
-- [ ] [computer-use-preview](https://platform.openai.com/docs/models/computer-use-preview)
-- [ ] [GPT-4o mini Search Preview](https://platform.openai.com/docs/models/gpt-4o-mini-search-preview)
-- [ ] [GPT-4o Search Preview](https://platform.openai.com/docs/models/gpt-4o-search-preview)
-- [ ] [GPT-4.5 Preview (Deprecated)](https://platform.openai.com/docs/models/gpt-4.5-preview)
-- [ ] [o3-mini](https://platform.openai.com/docs/models/o3-mini)
-- [ ] [GPT-4o mini Audio](https://platform.openai.com/docs/models/gpt-4o-mini-audio-preview)
-- [ ] [GPT-4o mini Realtime](https://platform.openai.com/docs/models/gpt-4o-mini-realtime-preview)
-- [ ] [o1](https://platform.openai.com/docs/models/o1)
-- [ ] [omni-moderation](https://platform.openai.com/docs/models/omni-moderation-latest)
-- [ ] [o1-mini](https://platform.openai.com/docs/models/o1-mini)
-- [ ] [o1 Preview](https://platform.openai.com/docs/models/o1-preview)
-- [ ] [GPT-4o](https://platform.openai.com/docs/models/gpt-4o)
-- [ ] [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini)
-- [ ] [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo)
-- [ ] [babbage-002](https://platform.openai.com/docs/models/babbage-002)
-- [ ] [ChatGPT-4o](https://platform.openai.com/docs/models/chatgpt-4o-latest)
-- [ ] [codex-mini-latest](https://platform.openai.com/docs/models/codex-mini-latest)
-- [ ] [DALL·E 2](https://platform.openai.com/docs/models/dall-e-2)
-- [ ] [DALL·E 3](https://platform.openai.com/docs/models/dall-e-3)
-- [ ] [davinci-002](https://platform.openai.com/docs/models/davinci-002)
-- [ ] [GPT-3.5 Turbo](https://platform.openai.com/docs/models/gpt-3.5-turbo)
-- [ ] [GPT-4](https://platform.openai.com/docs/models/gpt-4)
-- [ ] [GPT-4 Turbo Preview](https://platform.openai.com/docs/models/gpt-4-turbo-preview)
-- [ ] [GPT-4o mini Transcribe](https://platform.openai.com/docs/models/gpt-4o-mini-transcribe)
-- [ ] [GPT-4o mini TTS](https://platform.openai.com/docs/models/gpt-4o-mini-tts)
-- [ ] [GPT-4o Transcribe](https://platform.openai.com/docs/models/gpt-4o-transcribe)
-- [ ] [GPT-5 Chat](https://platform.openai.com/docs/models/gpt-5-chat-latest)
-- [ ] [GPT Image 1](https://platform.openai.com/docs/models/gpt-image-1)
-- [ ] [gpt-oss-120b](https://platform.openai.com/docs/models/gpt-oss-120b)
-- [ ] [gpt-oss-20b](https://platform.openai.com/docs/models/gpt-oss-20b)
-- [ ] [text-embedding-3-large](https://platform.openai.com/docs/models/text-embedding-3-large)
-- [ ] [text-embedding-3-small](https://platform.openai.com/docs/models/text-embedding-3-small)
-- [ ] [text-embedding-ada-002](https://platform.openai.com/docs/models/text-embedding-ada-002)
-- [ ] [text-moderation](https://platform.openai.com/docs/models/text-moderation-latest)
-- [ ] [text-moderation-stable](https://platform.openai.com/docs/models/text-moderation-stable)
-- [ ] [TTS-1](https://platform.openai.com/docs/models/tts-1)
-- [ ] [TTS-1 HD](https://platform.openai.com/docs/models/tts-1-hd)
-- [ ] [Whisper](https://platform.openai.com/docs/models/whisper-1)
\ No newline at end of file
diff --git a/packages/ai/docs/openai-api.md b/packages/ai/docs/openai-api.md
deleted file mode 100644
index cefe9ac3..00000000
--- a/packages/ai/docs/openai-api.md
+++ /dev/null
@@ -1,2320 +0,0 @@
-# OpenAI SDK Implementation Guide
-
-This document provides a comprehensive guide to implementing the required features using the OpenAI SDK v5.12.2. All examples are based on actual usage patterns from the pi-mono codebase and include real TypeScript types from the SDK.
-
-## Table of Contents
-
-1. [Basic Setup](#basic-setup)
-2. [Streaming Responses](#streaming-responses)
-3. [Aborting Requests](#aborting-requests)
-4. [Error Handling](#error-handling)
-5. [Stop Reasons](#stop-reasons)
-6. [Message History & Serialization](#message-history--serialization)
-7. [Token Counting](#token-counting)
-8. [Caching](#caching)
-9. [Chat Completions vs Responses API](#chat-completions-vs-responses-api)
-10. [Tool/Function Calling](#toolfunction-calling)
-11. [System Prompts](#system-prompts)
-12. [Provider-Specific Features](#provider-specific-features)
-13. [Complete Implementation Examples](#complete-implementation-examples)
-
-## Basic Setup
-
-```typescript
-import OpenAI from "openai";
-
-// Basic client setup
-const client = new OpenAI({
-  apiKey: process.env.OPENAI_API_KEY,
-  baseURL: "https://api.openai.com/v1", // Optional, default shown
-});
-
-// For other providers (Groq, Anthropic OpenAI-compatible, etc.)
-const groqClient = new OpenAI({
-  apiKey: process.env.GROQ_API_KEY,
-  baseURL: "https://api.groq.com/openai/v1",
-});
-```
-
-### Client Configuration Options
-
-```typescript
-interface ClientOptions {
-  apiKey?: string;
-  baseURL?: string;
-  timeout?: number;      // Request timeout in milliseconds
-  maxRetries?: number;   // Number of retry attempts
-  defaultHeaders?: Record<string, string>;
-  defaultQuery?: Record<string, unknown>;
-}
-```
-
-## Streaming Responses
-
-### Chat Completions Streaming
-
-```typescript
-import type { 
-  ChatCompletionChunk, 
-  ChatCompletionCreateParamsStreaming 
-} from "openai/resources/chat/completions";
-import { Stream } from "openai/core/streaming";
-
-async function streamChatCompletion() {
-  const params: ChatCompletionCreateParamsStreaming = {
-    model: "gpt-4o",
-    messages: [
-      { role: "user", content: "Tell me a story" }
-    ],
-    stream: true,
-    max_completion_tokens: 1000,
-  };
-
-  const stream: Stream<ChatCompletionChunk> = await client.chat.completions.create(params);
-
-  for await (const chunk of stream) {
-    const delta = chunk.choices[0]?.delta;
-    
-    if (delta?.content) {
-      process.stdout.write(delta.content);
-    }
-    
-    if (delta?.tool_calls) {
-      console.log("Tool call delta:", delta.tool_calls);
-    }
-    
-    if (chunk.choices[0]?.finish_reason) {
-      console.log("\nFinish reason:", chunk.choices[0].finish_reason);
-    }
-  }
-}
-```
-
-### Responses API Streaming
-
-```typescript
-import type { 
-  ResponseCreateParamsStreaming,
-  ResponseStreamEvent 
-} from "openai/resources/responses";
-
-async function streamResponsesAPI() {
-  const params: ResponseCreateParamsStreaming = {
-    model: "o1-mini",
-    input: [
-      {
-        role: "user",
-        content: [{ type: "input_text", text: "Solve this math problem: 2x + 5 = 11" }]
-      }
-    ],
-    stream: true,
-    max_output_tokens: 2000,
-    reasoning: {
-      effort: "low",
-      summary: "detailed"
-    }
-  };
-
-  const stream: Stream<ResponseStreamEvent> = await client.responses.create(params);
-
-  for await (const event of stream) {
-    switch (event.type) {
-      case "response.reasoning.text.delta":
-        // Reasoning/thinking tokens (o1/o3)
-        process.stdout.write(`[thinking] ${event.delta}`);
-        break;
-        
-      case "response.text.delta":
-        // Output content
-        process.stdout.write(event.delta);
-        break;
-        
-      case "response.function_call.arguments.delta":
-        // Tool call arguments being built
-        console.log("Tool call delta:", event.delta);
-        break;
-        
-      case "response.completed":
-        console.log("\nResponse completed");
-        break;
-    }
-  }
-}
-```
-
-### Streaming Patterns
-
-```typescript
-// Pattern 1: Simple content streaming
-async function simpleStream(messages: any[]) {
-  const stream = await client.chat.completions.create({
-    model: "gpt-4o",
-    messages,
-    stream: true,
-  });
-
-  let fullContent = "";
-  for await (const chunk of stream) {
-    const content = chunk.choices[0]?.delta?.content || "";
-    fullContent += content;
-    process.stdout.write(content);
-  }
-  
-  return fullContent;
-}
-
-// Pattern 2: Event-driven streaming with handlers
-interface StreamHandlers {
-  onContent?: (delta: string) => void;
-  onToolCall?: (toolCall: any) => void;
-  onFinish?: (reason: string) => void;
-}
-
-async function eventDrivenStream(messages: any[], handlers: StreamHandlers) {
-  const stream = await client.chat.completions.create({
-    model: "gpt-4o",
-    messages,
-    stream: true,
-  });
-
-  for await (const chunk of stream) {
-    const choice = chunk.choices[0];
-    if (!choice) continue;
-
-    if (choice.delta?.content) {
-      handlers.onContent?.(choice.delta.content);
-    }
-
-    if (choice.delta?.tool_calls) {
-      handlers.onToolCall?.(choice.delta.tool_calls);
-    }
-
-    if (choice.finish_reason) {
-      handlers.onFinish?.(choice.finish_reason);
-    }
-  }
-}
-```
-
-## Aborting Requests
-
-### Using AbortController
-
-```typescript
-class AbortableClient {
-  private client: OpenAI;
-  private abortController: AbortController | null = null;
-
-  constructor(config: { apiKey: string; baseURL?: string }) {
-    this.client = new OpenAI(config);
-  }
-
-  async askWithAbort(message: string): Promise<string> {
-    // Create new AbortController for this request
-    this.abortController = new AbortController();
-
-    try {
-      const response = await this.client.chat.completions.create({
-        model: "gpt-4o",
-        messages: [{ role: "user", content: message }],
-        max_completion_tokens: 1000,
-      }, {
-        signal: this.abortController.signal  // Pass abort signal
-      });
-
-      return response.choices[0]?.message?.content || "";
-    } catch (error) {
-      if (this.abortController.signal.aborted) {
-        throw new Error("Request was interrupted");
-      }
-      throw error;
-    } finally {
-      this.abortController = null;
-    }
-  }
-
-  // Call this to abort the current request
-  interrupt(): void {
-    this.abortController?.abort();
-  }
-}
-
-// Usage example
-const abortableClient = new AbortableClient({
-  apiKey: process.env.OPENAI_API_KEY!
-});
-
-// Start request
-const responsePromise = abortableClient.askWithAbort("Write a long essay");
-
-// Abort after 5 seconds
-setTimeout(() => {
-  abortableClient.interrupt();
-}, 5000);
-
-try {
-  const response = await responsePromise;
-  console.log(response);
-} catch (error) {
-  console.log("Request was aborted:", error.message);
-}
-```
-
-### Aborting Streaming Requests
-
-```typescript
-async function abortableStream(messages: any[]) {
-  const abortController = new AbortController();
-  
-  // Abort after 10 seconds
-  const timeoutId = setTimeout(() => {
-    abortController.abort();
-  }, 10000);
-
-  try {
-    const stream = await client.chat.completions.create({
-      model: "gpt-4o",
-      messages,
-      stream: true,
-    }, {
-      signal: abortController.signal
-    });
-
-    for await (const chunk of stream) {
-      // Check if aborted before processing each chunk
-      if (abortController.signal.aborted) {
-        break;
-      }
-
-      const content = chunk.choices[0]?.delta?.content;
-      if (content) {
-        process.stdout.write(content);
-      }
-    }
-  } catch (error) {
-    if (abortController.signal.aborted) {
-      console.log("\nStream was aborted");
-    } else {
-      throw error;
-    }
-  } finally {
-    clearTimeout(timeoutId);
-  }
-}
-```
-
-## Error Handling
-
-### Error Types from OpenAI SDK
-
-```typescript
-import {
-  OpenAIError,
-  APIError,
-  APIConnectionError,
-  APIConnectionTimeoutError,
-  APIUserAbortError,
-  AuthenticationError,
-  BadRequestError,
-  RateLimitError,
-  InternalServerError,
-  NotFoundError,
-  PermissionDeniedError,
-  UnprocessableEntityError
-} from "openai";
-
-// Comprehensive error handler
-async function handleAPICall<T>(apiCall: () => Promise<T>): Promise<T> {
-  try {
-    return await apiCall();
-  } catch (error) {
-    if (error instanceof APIUserAbortError) {
-      console.log("Request was aborted by user");
-      throw new Error("Request interrupted");
-    }
-    
-    if (error instanceof AuthenticationError) {
-      console.error("Authentication failed:", error.message);
-      throw new Error("Invalid API key");
-    }
-    
-    if (error instanceof RateLimitError) {
-      console.error("Rate limit exceeded:", error.message);
-      // Could implement exponential backoff here
-      throw new Error("Rate limited - try again later");
-    }
-    
-    if (error instanceof APIConnectionError) {
-      console.error("Connection error:", error.message);
-      throw new Error("Network connection failed");
-    }
-    
-    if (error instanceof APIConnectionTimeoutError) {
-      console.error("Request timeout:", error.message);
-      throw new Error("Request timed out");
-    }
-    
-    if (error instanceof BadRequestError) {
-      console.error("Bad request:", error.message);
-      console.error("Error details:", error.error);
-      throw new Error(`Invalid request: ${error.message}`);
-    }
-    
-    if (error instanceof UnprocessableEntityError) {
-      console.error("Unprocessable entity:", error.message);
-      throw new Error(`Validation error: ${error.message}`);
-    }
-    
-    if (error instanceof APIError) {
-      console.error(`API Error ${error.status}:`, error.message);
-      console.error("Error code:", error.code);
-      console.error("Error type:", error.type);
-      throw new Error(`API error: ${error.message}`);
-    }
-    
-    if (error instanceof OpenAIError) {
-      console.error("OpenAI SDK error:", error.message);
-      throw new Error(`SDK error: ${error.message}`);
-    }
-    
-    // Unknown error
-    console.error("Unexpected error:", error);
-    throw error;
-  }
-}
-
-// Usage with retry logic
-async function apiCallWithRetry<T>(
-  apiCall: () => Promise<T>, 
-  maxRetries: number = 3
-): Promise<T> {
-  let lastError: Error;
-  
-  for (let attempt = 0; attempt < maxRetries; attempt++) {
-    try {
-      return await handleAPICall(apiCall);
-    } catch (error) {
-      lastError = error as Error;
-      
-      // Don't retry on certain errors
-      if (error instanceof AuthenticationError || 
-          error instanceof BadRequestError ||
-          error instanceof APIUserAbortError) {
-        throw error;
-      }
-      
-      // Exponential backoff for retryable errors
-      if (attempt < maxRetries - 1) {
-        const delay = Math.pow(2, attempt) * 1000; // 1s, 2s, 4s
-        await new Promise(resolve => setTimeout(resolve, delay));
-      }
-    }
-  }
-  
-  throw lastError!;
-}
-```
-
-### Error Context Extraction
-
-```typescript
-function extractErrorDetails(error: unknown): {
-  message: string;
-  code?: string;
-  type?: string;
-  status?: number;
-  retryable: boolean;
-} {
-  if (error instanceof APIError) {
-    return {
-      message: error.message,
-      code: error.code || undefined,
-      type: error.type,
-      status: error.status,
-      retryable: error instanceof RateLimitError || 
-                error instanceof APIConnectionError ||
-                error instanceof InternalServerError
-    };
-  }
-  
-  if (error instanceof APIUserAbortError) {
-    return {
-      message: "Request was aborted",
-      retryable: false
-    };
-  }
-  
-  if (error instanceof OpenAIError) {
-    return {
-      message: error.message,
-      retryable: false
-    };
-  }
-  
-  return {
-    message: error instanceof Error ? error.message : "Unknown error",
-    retryable: false
-  };
-}
-```
-
-## Stop Reasons
-
-### Chat Completions Stop Reasons
-
-```typescript
-type ChatCompletionFinishReason = 
-  | "stop"           // Natural stopping point or stop sequence
-  | "length"         // Maximum token limit reached
-  | "content_filter" // Content filtered
-  | "tool_calls"     // Model wants to call tools
-  | "function_call"; // Legacy function calling
-
-async function handleStopReasons() {
-  const response = await client.chat.completions.create({
-    model: "gpt-4o",
-    messages: [{ role: "user", content: "Hello" }],
-    max_completion_tokens: 10, // Low limit to trigger "length" stop
-    stop: ["END"], // Custom stop sequence
-  });
-
-  const choice = response.choices[0];
-  const finishReason = choice.finish_reason;
-
-  switch (finishReason) {
-    case "stop":
-      console.log("Completed naturally or hit stop sequence");
-      break;
-      
-    case "length":
-      console.log("Hit token limit - response may be incomplete");
-      // Could request more tokens or continue conversation
-      break;
-      
-    case "content_filter":
-      console.log("Content was filtered");
-      break;
-      
-    case "tool_calls":
-      console.log("Model wants to call tools");
-      // Handle tool calls (see Tool Calling section)
-      break;
-      
-    default:
-      console.log("Unknown finish reason:", finishReason);
-  }
-
-  return { 
-    content: choice.message.content,
-    finishReason,
-    complete: finishReason === "stop"
-  };
-}
-```
-
-### Responses API Stop Reasons
-
-```typescript
-// Responses API uses different event types to indicate completion
-async function handleResponsesStopReasons() {
-  const response = await client.responses.create({
-    model: "o1-mini",
-    input: [{ role: "user", content: [{ type: "input_text", text: "Hello" }] }],
-    max_output_tokens: 100,
-  });
-
-  for (const item of response.output || []) {
-    switch (item.type) {
-      case "message":
-        // Check for refusal in content
-        for (const content of item.content || []) {
-          if (content.type === "refusal") {
-            console.log("Response was refused:", content.refusal);
-          } else if (content.type === "output_text") {
-            console.log("Response completed normally");
-          }
-        }
-        break;
-        
-      case "function_call":
-        console.log("Tool call requested");
-        break;
-    }
-  }
-}
-```
-
-### Streaming Stop Reason Detection
-
-```typescript
-async function streamWithStopReasonHandling() {
-  const stream = await client.chat.completions.create({
-    model: "gpt-4o",
-    messages: [{ role: "user", content: "Count to 10" }],
-    stream: true,
-    max_completion_tokens: 50,
-  });
-
-  let content = "";
-  let finishReason: string | null = null;
-
-  for await (const chunk of stream) {
-    const choice = chunk.choices[0];
-    if (!choice) continue;
-
-    if (choice.delta?.content) {
-      content += choice.delta.content;
-      process.stdout.write(choice.delta.content);
-    }
-
-    if (choice.finish_reason) {
-      finishReason = choice.finish_reason;
-      break;
-    }
-  }
-
-  console.log(`\nStreaming finished. Reason: ${finishReason}`);
-  
-  if (finishReason === "length") {
-    console.log("Response was cut off due to token limit");
-    // Could continue the conversation to get the rest
-  }
-  
-  return { content, finishReason };
-}
-```
-
-## Message History & Serialization
-
-### Message Types and Formats
-
-```typescript
-// Chat Completions message format
-interface ChatMessage {
-  role: "system" | "user" | "assistant" | "tool" | "developer";
-  content: string | null;
-  name?: string;
-  tool_calls?: Array<{
-    id: string;
-    type: "function";
-    function: {
-      name: string;
-      arguments: string;
-    };
-  }>;
-  tool_call_id?: string; // For tool response messages
-}
-
-// Responses API message format
-interface ResponseMessage {
-  role: "user" | "developer";
-  content: Array<{
-    type: "input_text" | "input_image" | "input_audio";
-    text?: string;
-    image?: { url: string };
-    audio?: { data: string };
-  }>;
-}
-
-// Unified conversation history
-interface ConversationHistory {
-  api: "completions" | "responses";
-  model: string;
-  systemPrompt?: string;
-  messages: any[]; // API-specific format
-  totalTokens: number;
-  metadata: {
-    created: number;
-    lastUpdated: number;
-    provider: string;
-  };
-}
-```
-
-### Serialization Implementation
-
-```typescript
-class ConversationManager {
-  private messages: any[] = [];
-  private api: "completions" | "responses";
-  private systemPrompt?: string;
-  private totalTokens = 0;
-
-  constructor(api: "completions" | "responses", systemPrompt?: string) {
-    this.api = api;
-    this.systemPrompt = systemPrompt;
-    
-    if (systemPrompt) {
-      if (api === "completions") {
-        this.messages.push({ role: "system", content: systemPrompt });
-      } else {
-        this.messages.push({ role: "developer", content: systemPrompt });
-      }
-    }
-  }
-
-  addUserMessage(content: string) {
-    if (this.api === "completions") {
-      this.messages.push({ role: "user", content });
-    } else {
-      this.messages.push({
-        role: "user",
-        content: [{ type: "input_text", text: content }]
-      });
-    }
-  }
-
-  addAssistantMessage(content: string) {
-    if (this.api === "completions") {
-      this.messages.push({ role: "assistant", content });
-    } else {
-      this.messages.push({
-        type: "message",
-        content: [{ type: "output_text", text: content }]
-      });
-    }
-  }
-
-  addToolCall(id: string, name: string, args: string) {
-    if (this.api === "completions") {
-      // Add assistant message with tool calls
-      this.messages.push({
-        role: "assistant",
-        content: null,
-        tool_calls: [{
-          id,
-          type: "function" as const,
-          function: { name, arguments: args }
-        }]
-      });
-    } else {
-      // Add function call to responses format
-      this.messages.push({
-        type: "function_call",
-        call_id: id,
-        name,
-        arguments: args
-      });
-    }
-  }
-
-  addToolResult(id: string, result: string) {
-    if (this.api === "completions") {
-      this.messages.push({
-        role: "tool",
-        tool_call_id: id,
-        content: result
-      });
-    } else {
-      this.messages.push({
-        type: "function_call_output",
-        call_id: id,
-        output: result
-      });
-    }
-  }
-
-  // Serialize to JSON
-  serialize(): string {
-    const data: ConversationHistory = {
-      api: this.api,
-      model: "unknown", // Set externally
-      systemPrompt: this.systemPrompt,
-      messages: this.messages,
-      totalTokens: this.totalTokens,
-      metadata: {
-        created: Date.now(),
-        lastUpdated: Date.now(),
-        provider: "openai"
-      }
-    };
-    return JSON.stringify(data, null, 2);
-  }
-
-  // Deserialize from JSON
-  static deserialize(json: string): ConversationManager {
-    const data: ConversationHistory = JSON.parse(json);
-    const manager = new ConversationManager(data.api, data.systemPrompt);
-    manager.messages = data.messages;
-    manager.totalTokens = data.totalTokens;
-    return manager;
-  }
-
-  getMessages() {
-    return this.messages;
-  }
-
-  updateTokenUsage(tokens: number) {
-    this.totalTokens += tokens;
-  }
-}
-
-// Usage example
-const conversation = new ConversationManager("completions", "You are a helpful assistant");
-conversation.addUserMessage("Hello");
-conversation.addAssistantMessage("Hi there!");
-conversation.updateTokenUsage(25);
-
-// Save to file
-const serialized = conversation.serialize();
-await fs.writeFile("conversation.json", serialized);
-
-// Load from file
-const loaded = await fs.readFile("conversation.json", "utf-8");
-const restored = ConversationManager.deserialize(loaded);
-```
-
-### Event-Based History Reconstruction
-
-```typescript
-// From pi-agent codebase - reconstruct messages from events
-type AgentEvent = 
-  | { type: "user_message"; text: string }
-  | { type: "assistant_message"; text: string }
-  | { type: "tool_call"; toolCallId: string; name: string; args: string }
-  | { type: "tool_result"; toolCallId: string; result: string; isError: boolean }
-  | { type: "reasoning"; text: string }
-  | { type: "token_usage"; inputTokens: number; outputTokens: number; totalTokens: number };
-
-function reconstructMessagesFromEvents(
-  events: AgentEvent[], 
-  api: "completions" | "responses", 
-  systemPrompt?: string
-): any[] {
-  const messages: any[] = [];
-  
-  // Add system prompt
-  if (systemPrompt) {
-    if (api === "completions") {
-      messages.push({ role: "system", content: systemPrompt });
-    } else {
-      messages.push({ role: "developer", content: systemPrompt });
-    }
-  }
-
-  if (api === "responses") {
-    // Responses API format reconstruction
-    for (const event of events) {
-      switch (event.type) {
-        case "user_message":
-          messages.push({
-            role: "user",
-            content: [{ type: "input_text", text: event.text }]
-          });
-          break;
-          
-        case "reasoning":
-          messages.push({
-            type: "reasoning",
-            content: [{ type: "reasoning_text", text: event.text }]
-          });
-          break;
-          
-        case "tool_call":
-          messages.push({
-            type: "function_call",
-            call_id: event.toolCallId,
-            name: event.name,
-            arguments: event.args
-          });
-          break;
-          
-        case "tool_result":
-          messages.push({
-            type: "function_call_output",
-            call_id: event.toolCallId,
-            output: event.result
-          });
-          break;
-          
-        case "assistant_message":
-          messages.push({
-            type: "message",
-            content: [{ type: "output_text", text: event.text }]
-          });
-          break;
-      }
-    }
-  } else {
-    // Chat Completions format reconstruction
-    let pendingToolCalls: any[] = [];
-    
-    for (const event of events) {
-      switch (event.type) {
-        case "user_message":
-          messages.push({ role: "user", content: event.text });
-          break;
-          
-        case "tool_call":
-          pendingToolCalls.push({
-            id: event.toolCallId,
-            type: "function",
-            function: {
-              name: event.name,
-              arguments: event.args
-            }
-          });
-          break;
-          
-        case "tool_result":
-          // Add assistant message with tool calls when we see first result
-          if (pendingToolCalls.length > 0) {
-            messages.push({
-              role: "assistant",
-              content: null,
-              tool_calls: pendingToolCalls
-            });
-            pendingToolCalls = [];
-          }
-          
-          messages.push({
-            role: "tool",
-            tool_call_id: event.toolCallId,
-            content: event.result
-          });
-          break;
-          
-        case "assistant_message":
-          messages.push({ role: "assistant", content: event.text });
-          break;
-      }
-    }
-  }
-  
-  return messages;
-}
-```
-
-## Token Counting
-
-### Usage Types from OpenAI SDK
-
-```typescript
-// Chat Completions usage
-interface CompletionUsage {
-  completion_tokens: number;
-  prompt_tokens: number;
-  total_tokens: number;
-  completion_tokens_details?: {
-    reasoning_tokens?: number; // o1/o3 reasoning tokens
-    cached_tokens?: number;
-  };
-  prompt_tokens_details?: {
-    cached_tokens?: number;
-  };
-}
-
-// Responses API usage
-interface ResponseUsage {
-  input_tokens: number;
-  output_tokens: number;
-  total_tokens: number;
-  input_tokens_details: {
-    cached_tokens?: number;
-  };
-  output_tokens_details: {
-    reasoning_tokens?: number; // o1/o3 reasoning tokens
-  };
-}
-```
-
-### Token Counting Implementation
-
-```typescript
-interface TokenUsage {
-  inputTokens: number;
-  outputTokens: number;
-  totalTokens: number;
-  reasoningTokens: number;
-  cacheReadTokens: number;
-  cacheWriteTokens: number;
-}
-
-class TokenCounter {
-  private totalUsage: TokenUsage = {
-    inputTokens: 0,
-    outputTokens: 0,
-    totalTokens: 0,
-    reasoningTokens: 0,
-    cacheReadTokens: 0,
-    cacheWriteTokens: 0
-  };
-
-  // Extract tokens from Chat Completions response
-  extractChatCompletionUsage(usage?: CompletionUsage): TokenUsage | null {
-    if (!usage) return null;
-
-    const extracted: TokenUsage = {
-      inputTokens: usage.prompt_tokens || 0,
-      outputTokens: usage.completion_tokens || 0,
-      totalTokens: usage.total_tokens || 0,
-      reasoningTokens: usage.completion_tokens_details?.reasoning_tokens || 0,
-      cacheReadTokens: usage.prompt_tokens_details?.cached_tokens || 0,
-      cacheWriteTokens: 0 // Not available in this format
-    };
-
-    this.addUsage(extracted);
-    return extracted;
-  }
-
-  // Extract tokens from Responses API response
-  extractResponseUsage(usage?: ResponseUsage): TokenUsage | null {
-    if (!usage) return null;
-
-    const extracted: TokenUsage = {
-      inputTokens: usage.input_tokens || 0,
-      outputTokens: usage.output_tokens || 0,
-      totalTokens: usage.total_tokens || 0,
-      reasoningTokens: usage.output_tokens_details?.reasoning_tokens || 0,
-      cacheReadTokens: usage.input_tokens_details?.cached_tokens || 0,
-      cacheWriteTokens: 0 // Not available in current API
-    };
-
-    this.addUsage(extracted);
-    return extracted;
-  }
-
-  private addUsage(usage: TokenUsage) {
-    this.totalUsage.inputTokens += usage.inputTokens;
-    this.totalUsage.outputTokens += usage.outputTokens;
-    this.totalUsage.totalTokens += usage.totalTokens;
-    this.totalUsage.reasoningTokens += usage.reasoningTokens;
-    this.totalUsage.cacheReadTokens += usage.cacheReadTokens;
-    this.totalUsage.cacheWriteTokens += usage.cacheWriteTokens;
-  }
-
-  getTotalUsage(): TokenUsage {
-    return { ...this.totalUsage };
-  }
-
-  reset() {
-    this.totalUsage = {
-      inputTokens: 0,
-      outputTokens: 0,
-      totalTokens: 0,
-      reasoningTokens: 0,
-      cacheReadTokens: 0,
-      cacheWriteTokens: 0
-    };
-  }
-
-  // Format for display
-  formatUsage(usage?: TokenUsage): string {
-    const u = usage || this.totalUsage;
-    let parts = [`↑${u.inputTokens}`, `↓${u.outputTokens}`];
-    
-    if (u.reasoningTokens > 0) {
-      parts.push(`⚡${u.reasoningTokens}`);
-    }
-    
-    if (u.cacheReadTokens > 0) {
-      parts.push(`📖${u.cacheReadTokens}`);
-    }
-    
-    if (u.cacheWriteTokens > 0) {
-      parts.push(`📝${u.cacheWriteTokens}`);
-    }
-    
-    return parts.join(" ");
-  }
-}
-
-// Usage with streaming
-async function countTokensInStream() {
-  const tokenCounter = new TokenCounter();
-  
-  const stream = await client.chat.completions.create({
-    model: "gpt-4o",
-    messages: [{ role: "user", content: "Tell me about AI" }],
-    stream: true,
-    stream_options: { include_usage: true } // Important for token counts
-  });
-
-  for await (const chunk of stream) {
-    // Token usage comes in final chunk when stream_options.include_usage = true
-    if (chunk.usage) {
-      const usage = tokenCounter.extractChatCompletionUsage(chunk.usage);
-      console.log("Token usage:", tokenCounter.formatUsage(usage));
-    }
-  }
-  
-  console.log("Total usage:", tokenCounter.formatUsage());
-}
-```
-
-### Token Estimation (for planning)
-
-```typescript
-// Rough token estimation for planning purposes
-function estimateTokens(text: string): number {
-  // Very rough approximation: ~4 characters per token for English
-  return Math.ceil(text.length / 4);
-}
-
-function estimateMessageTokens(messages: any[]): number {
-  let total = 0;
-  
-  for (const message of messages) {
-    if (typeof message.content === "string") {
-      total += estimateTokens(message.content);
-    } else if (Array.isArray(message.content)) {
-      for (const content of message.content) {
-        if (content.text) {
-          total += estimateTokens(content.text);
-        }
-      }
-    }
-    
-    // Add overhead for message formatting
-    total += 10;
-  }
-  
-  return total;
-}
-
-// Check if request will fit in context window
-function checkContextLimit(messages: any[], maxTokens: number = 128000): boolean {
-  const estimated = estimateMessageTokens(messages);
-  const safetyMargin = 1000; // Reserve tokens for response
-  
-  return estimated + safetyMargin < maxTokens;
-}
-```
-
-## Caching
-
-### Cache Headers and Configuration
-
-```typescript
-// OpenAI supports prompt caching via special message formatting
-// Cache is automatically used when messages are repeated
-
-async function demonstrateCaching() {
-  const longSystemPrompt = `
-    You are an expert software engineer with deep knowledge of TypeScript, React, Node.js...
-    [Very long system prompt - 1000+ tokens]
-  `;
-
-  // First request - will cache the system prompt
-  const response1 = await client.chat.completions.create({
-    model: "gpt-4o",
-    messages: [
-      { role: "system", content: longSystemPrompt },
-      { role: "user", content: "Explain TypeScript generics" }
-    ]
-  });
-
-  console.log("First request usage:", response1.usage);
-
-  // Second request with same system prompt - will use cache
-  const response2 = await client.chat.completions.create({
-    model: "gpt-4o", 
-    messages: [
-      { role: "system", content: longSystemPrompt }, // Cached
-      { role: "user", content: "Explain React hooks" }
-    ]
-  });
-
-  console.log("Second request usage:", response2.usage);
-  console.log("Cache read tokens:", response2.usage?.prompt_tokens_details?.cached_tokens);
-}
-```
-
-### Manual Cache Control
-
-```typescript
-// For providers that support explicit cache control
-interface CacheConfig {
-  enabled: boolean;
-  ttl?: number; // Time to live in seconds
-}
-
-class CachedClient {
-  private client: OpenAI;
-  private cache = new Map<string, { response: any; timestamp: number; ttl: number }>();
-
-  constructor(apiKey: string, baseURL?: string) {
-    this.client = new OpenAI({ apiKey, baseURL });
-  }
-
-  private getCacheKey(messages: any[], model: string): string {
-    return JSON.stringify({ messages, model });
-  }
-
-  private isCacheValid(entry: { timestamp: number; ttl: number }): boolean {
-    return Date.now() - entry.timestamp < entry.ttl * 1000;
-  }
-
-  async completionWithCache(
-    messages: any[], 
-    model: string,
-    cacheConfig: CacheConfig = { enabled: true, ttl: 3600 }
-  ) {
-    if (cacheConfig.enabled) {
-      const cacheKey = this.getCacheKey(messages, model);
-      const cached = this.cache.get(cacheKey);
-      
-      if (cached && this.isCacheValid(cached)) {
-        console.log("Cache hit");
-        return cached.response;
-      }
-    }
-
-    const response = await this.client.chat.completions.create({
-      model,
-      messages
-    });
-
-    if (cacheConfig.enabled) {
-      const cacheKey = this.getCacheKey(messages, model);
-      this.cache.set(cacheKey, {
-        response,
-        timestamp: Date.now(),
-        ttl: cacheConfig.ttl || 3600
-      });
-    }
-
-    return response;
-  }
-
-  clearCache() {
-    this.cache.clear();
-  }
-}
-```
-
-## Chat Completions vs Responses API
-
-### When to Use Each API
-
-```typescript
-// Chat Completions API - Traditional conversational interface
-// Use for: Most general chat/completion tasks
-interface ChatCompletionsUseCase {
-  // ✅ Good for:
-  // - Regular conversations
-  // - Function/tool calling
-  // - Most models (gpt-4o, claude, gemini via compatibility)
-  // - Streaming text generation
-  // - File uploads and vision
-  
-  // ❌ Limitations:
-  // - No access to reasoning/thinking tokens for o1/o3
-  // - Less structured for complex workflows
-}
-
-// Responses API - Structured response interface  
-// Use for: Complex reasoning tasks, tool workflows
-interface ResponsesAPIUseCase {
-  // ✅ Good for:
-  // - o1/o3 models with reasoning access
-  // - Complex tool calling workflows
-  // - Structured output requirements
-  // - Background processing
-  // - Access to reasoning tokens
-  
-  // ❌ Limitations:
-  // - Newer API with less ecosystem support
-  // - More complex message format
-  // - Not all models supported
-}
-```
-
-### API Decision Logic
-
-```typescript
-function selectAPI(
-  model: string, 
-  requiresReasoning: boolean,
-  hasComplexTools: boolean
-): "completions" | "responses" {
-  // Use Responses API for o1/o3 when reasoning is needed
-  if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) {
-    return "responses";
-  }
-  
-  // Use Responses API for complex tool workflows
-  if (hasComplexTools && model.includes("gpt-4")) {
-    return "responses";
-  }
-  
-  // Default to Chat Completions for broader compatibility
-  return "completions";
-}
-
-// Usage example
-const model = "o1-mini";
-const needsReasoning = true;
-const api = selectAPI(model, needsReasoning, false);
-
-if (api === "responses") {
-  console.log("Using Responses API for reasoning access");
-} else {
-  console.log("Using Chat Completions API for compatibility");
-}
-```
-
-### Dual API Client
-
-```typescript
-class DualAPIClient {
-  private client: OpenAI;
-
-  constructor(apiKey: string, baseURL?: string) {
-    this.client = new OpenAI({ apiKey, baseURL });
-  }
-
-  async complete(params: {
-    model: string;
-    messages: any[];
-    tools?: any[];
-    maxTokens?: number;
-    temperature?: number;
-    stream?: boolean;
-    reasoning?: boolean;
-  }) {
-    const api = this.selectAPI(params.model, params.reasoning || false);
-    
-    if (api === "responses") {
-      return this.callResponsesAPI(params);
-    } else {
-      return this.callChatCompletionsAPI(params);
-    }
-  }
-
-  private selectAPI(model: string, requiresReasoning: boolean): "completions" | "responses" {
-    if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) {
-      return "responses";
-    }
-    return "completions";
-  }
-
-  private async callChatCompletionsAPI(params: any) {
-    const requestParams = {
-      model: params.model,
-      messages: params.messages,
-      max_completion_tokens: params.maxTokens,
-      temperature: params.temperature,
-      tools: params.tools,
-      stream: params.stream
-    };
-
-    if (params.stream) {
-      return this.client.chat.completions.create(requestParams);
-    } else {
-      return this.client.chat.completions.create(requestParams);
-    }
-  }
-
-  private async callResponsesAPI(params: any) {
-    // Convert messages to Responses API format
-    const input = params.messages.map((msg: any) => {
-      if (msg.role === "user") {
-        return {
-          role: "user",
-          content: [{ type: "input_text", text: msg.content }]
-        };
-      } else if (msg.role === "system") {
-        return {
-          role: "developer", 
-          content: msg.content
-        };
-      }
-      return msg;
-    });
-
-    const requestParams = {
-      model: params.model,
-      input,
-      max_output_tokens: params.maxTokens,
-      tools: params.tools,
-      stream: params.stream,
-      reasoning: params.reasoning ? { effort: "low" } : undefined
-    };
-
-    return this.client.responses.create(requestParams);
-  }
-}
-```
-
-## Tool/Function Calling
-
-### Tool Definition Format
-
-```typescript
-// OpenAI tool definition format (JSON Schema)
-interface ToolDefinition {
-  type: "function";
-  function: {
-    name: string;
-    description: string;
-    parameters: {
-      type: "object";
-      properties: Record<string, any>;
-      required: string[];
-    };
-  };
-}
-
-// Example tool definitions
-const tools: ToolDefinition[] = [
-  {
-    type: "function",
-    function: {
-      name: "read_file",
-      description: "Read the contents of a file",
-      parameters: {
-        type: "object",
-        properties: {
-          path: {
-            type: "string",
-            description: "The file path to read"
-          }
-        },
-        required: ["path"]
-      }
-    }
-  },
-  {
-    type: "function", 
-    function: {
-      name: "execute_command",
-      description: "Execute a shell command",
-      parameters: {
-        type: "object",
-        properties: {
-          command: {
-            type: "string",
-            description: "The command to execute"
-          },
-          timeout: {
-            type: "number",
-            description: "Timeout in seconds",
-            default: 30
-          }
-        },
-        required: ["command"]
-      }
-    }
-  }
-];
-```
-
-### Tool Execution Engine
-
-```typescript
-type ToolFunction = (args: any) => Promise<string>;
-
-class ToolExecutor {
-  private tools = new Map<string, ToolFunction>();
-
-  register(name: string, fn: ToolFunction) {
-    this.tools.set(name, fn);
-  }
-
-  async execute(name: string, argsJson: string): Promise<string> {
-    const tool = this.tools.get(name);
-    if (!tool) {
-      throw new Error(`Unknown tool: ${name}`);
-    }
-
-    try {
-      const args = JSON.parse(argsJson);
-      return await tool(args);
-    } catch (error) {
-      throw new Error(`Tool execution failed: ${error.message}`);
-    }
-  }
-
-  getAvailableTools(): string[] {
-    return Array.from(this.tools.keys());
-  }
-}
-
-// Register tool implementations
-const toolExecutor = new ToolExecutor();
-
-toolExecutor.register("read_file", async (args: { path: string }) => {
-  const fs = await import("fs/promises");
-  try {
-    const content = await fs.readFile(args.path, "utf-8");
-    return content;
-  } catch (error) {
-    return `Error reading file: ${error.message}`;
-  }
-});
-
-toolExecutor.register("execute_command", async (args: { command: string; timeout?: number }) => {
-  const { exec } = await import("child_process");
-  const { promisify } = await import("util");
-  const execAsync = promisify(exec);
-
-  try {
-    const { stdout, stderr } = await execAsync(args.command, {
-      timeout: (args.timeout || 30) * 1000
-    });
-    return stdout + (stderr ? `\nSTDERR: ${stderr}` : "");
-  } catch (error) {
-    return `Command failed: ${error.message}`;
-  }
-});
-```
-
-### Complete Tool Calling Flow
-
-```typescript
-async function completeChatWithTools(userMessage: string) {
-  const conversation = new ConversationManager("completions", "You are a helpful assistant with file system access.");
-  const tokenCounter = new TokenCounter();
-  
-  conversation.addUserMessage(userMessage);
-  
-  while (true) {
-    const response = await client.chat.completions.create({
-      model: "gpt-4o",
-      messages: conversation.getMessages(),
-      tools,
-      tool_choice: "auto",
-      max_completion_tokens: 1000
-    });
-
-    // Track token usage
-    if (response.usage) {
-      tokenCounter.extractChatCompletionUsage(response.usage);
-    }
-
-    const message = response.choices[0].message;
-    
-    if (message.tool_calls && message.tool_calls.length > 0) {
-      // Add assistant message with tool calls to conversation
-      conversation.getMessages().push({
-        role: "assistant",
-        content: message.content,
-        tool_calls: message.tool_calls
-      });
-
-      // Execute each tool call
-      for (const toolCall of message.tool_calls) {
-        console.log(`🔧 Calling ${toolCall.function.name}...`);
-        
-        try {
-          const result = await toolExecutor.execute(
-            toolCall.function.name, 
-            toolCall.function.arguments
-          );
-          
-          console.log(`✅ Tool result: ${result.substring(0, 100)}...`);
-          conversation.addToolResult(toolCall.id, result);
-          
-        } catch (error) {
-          console.log(`❌ Tool error: ${error.message}`);
-          conversation.addToolResult(toolCall.id, `Error: ${error.message}`);
-        }
-      }
-      
-      // Continue conversation with tool results
-      continue;
-    } else {
-      // Final response
-      const content = message.content || "";
-      conversation.addAssistantMessage(content);
-      
-      console.log("🤖 Assistant:", content);
-      console.log("📊 Token usage:", tokenCounter.formatUsage());
-      
-      return content;
-    }
-  }
-}
-
-// Usage
-await completeChatWithTools("Read the package.json file and tell me about this project");
-```
-
-### Streaming Tool Calls
-
-```typescript
-async function streamingToolCalls(userMessage: string) {
-  const stream = await client.chat.completions.create({
-    model: "gpt-4o",
-    messages: [{ role: "user", content: userMessage }],
-    tools,
-    tool_choice: "auto",
-    stream: true
-  });
-
-  let currentToolCalls: Map<string, { name: string; args: string }> = new Map();
-  let assistantMessage = "";
-
-  for await (const chunk of stream) {
-    const choice = chunk.choices[0];
-    if (!choice) continue;
-
-    const delta = choice.delta;
-
-    // Regular content
-    if (delta.content) {
-      assistantMessage += delta.content;
-      process.stdout.write(delta.content);
-    }
-
-    // Tool call deltas
-    if (delta.tool_calls) {
-      for (const toolCallDelta of delta.tool_calls) {
-        const id = toolCallDelta.id;
-        if (!id) continue;
-
-        if (!currentToolCalls.has(id)) {
-          currentToolCalls.set(id, { name: "", args: "" });
-        }
-
-        const toolCall = currentToolCalls.get(id)!;
-        
-        if (toolCallDelta.function?.name) {
-          toolCall.name += toolCallDelta.function.name;
-        }
-        
-        if (toolCallDelta.function?.arguments) {
-          toolCall.args += toolCallDelta.function.arguments;
-        }
-      }
-    }
-
-    // When finished, execute accumulated tool calls
-    if (choice.finish_reason === "tool_calls") {
-      console.log("\n🔧 Executing tools...");
-      
-      for (const [id, toolCall] of currentToolCalls) {
-        try {
-          const result = await toolExecutor.execute(toolCall.name, toolCall.args);
-          console.log(`✅ ${toolCall.name}: ${result.substring(0, 100)}...`);
-        } catch (error) {
-          console.log(`❌ ${toolCall.name}: ${error.message}`);
-        }
-      }
-      
-      break;
-    }
-  }
-}
-```
-
-### Responses API Tool Calling
-
-```typescript
-async function responsesAPIToolCalling() {
-  const response = await client.responses.create({
-    model: "gpt-4o",
-    input: [
-      {
-        role: "user",
-        content: [{ type: "input_text", text: "List files in current directory" }]
-      }
-    ],
-    tools: [
-      {
-        type: "function",
-        function: {
-          name: "list_directory",
-          description: "List files in a directory",
-          parameters: {
-            type: "object",
-            properties: {
-              path: { type: "string", description: "Directory path" }
-            },
-            required: ["path"]
-          }
-        }
-      }
-    ]
-  });
-
-  for (const item of response.output || []) {
-    switch (item.type) {
-      case "function_call":
-        console.log(`🔧 Tool call: ${item.name}`);
-        console.log(`📝 Arguments: ${item.arguments}`);
-        
-        try {
-          const result = await toolExecutor.execute(item.name, item.arguments);
-          console.log(`✅ Result: ${result}`);
-          
-          // In a real implementation, you'd add this result back to the conversation
-          // and continue the response
-        } catch (error) {
-          console.log(`❌ Error: ${error.message}`);
-        }
-        break;
-        
-      case "message":
-        for (const content of item.content || []) {
-          if (content.type === "output_text") {
-            console.log("🤖 Response:", content.text);
-          }
-        }
-        break;
-    }
-  }
-}
-```
-
-## System Prompts
-
-### System Prompt Handling by Model Type
-
-```typescript
-interface SystemPromptConfig {
-  content: string;
-  role: "system" | "developer";  // Different models use different roles
-}
-
-function formatSystemPrompt(prompt: string, model: string, api: "completions" | "responses"): any {
-  // Chat Completions API
-  if (api === "completions") {
-    // Most models use "system" role
-    if (model.includes("claude") || model.includes("gemini")) {
-      // Some providers via OpenAI compatibility might expect "system"
-      return { role: "system", content: prompt };
-    }
-    
-    // OpenAI native models
-    return { role: "system", content: prompt };
-  }
-  
-  // Responses API uses "developer" role for system messages
-  return { role: "developer", content: prompt };
-}
-
-// System prompt best practices
-const systemPrompts = {
-  // General assistant
-  assistant: "You are a helpful, accurate, and reliable AI assistant. Provide clear, concise, and helpful responses.",
-  
-  // Code assistant
-  coder: `You are an expert software engineer with deep knowledge of multiple programming languages, frameworks, and best practices. 
-
-Key principles:
-- Write clean, maintainable, and well-documented code
-- Follow language-specific conventions and best practices  
-- Explain your reasoning and trade-offs
-- Suggest improvements and alternatives when appropriate
-- Always test your code mentally before providing it
-
-When helping with code:
-1. Understand the requirements fully
-2. Choose appropriate tools and patterns
-3. Provide working, tested solutions
-4. Explain key concepts and decisions`,
-
-  // Research assistant
-  researcher: `You are a thorough research assistant. When answering questions:
-
-1. Provide accurate, well-sourced information
-2. Acknowledge limitations in your knowledge
-3. Structure responses clearly with headings and bullet points
-4. Cite sources when possible
-5. Distinguish between facts, analysis, and opinions
-6. Ask clarifying questions when the request is ambiguous`,
-
-  // Tool-enabled assistant
-  toolEnabled: `You are an AI assistant with access to various tools for file operations, web searches, and code execution.
-
-Guidelines for tool use:
-- Use tools when they would be helpful to answer the user's question
-- Always explain what you're doing before calling a tool
-- Interpret and summarize tool results for the user
-- If a tool fails, try alternative approaches
-- Be transparent about what information comes from tools vs your training
-
-Available capabilities:
-- Read and write files
-- Execute shell commands
-- Search the web
-- Analyze code and data`
-};
-```
-
-### Dynamic System Prompt Building
-
-```typescript
-class SystemPromptBuilder {
-  private sections: string[] = [];
-
-  addRole(role: string): this {
-    this.sections.push(`You are ${role}.`);
-    return this;
-  }
-
-  addCapabilities(capabilities: string[]): this {
-    if (capabilities.length > 0) {
-      this.sections.push(`You have access to: ${capabilities.join(", ")}.`);
-    }
-    return this;
-  }
-
-  addGuidelines(guidelines: string[]): this {
-    if (guidelines.length > 0) {
-      this.sections.push("Guidelines:\n" + guidelines.map(g => `- ${g}`).join("\n"));
-    }
-    return this;
-  }
-
-  addContext(context: string): this {
-    if (context.trim()) {
-      this.sections.push(`Context: ${context}`);
-    }
-    return this;
-  }
-
-  build(): string {
-    return this.sections.join("\n\n");
-  }
-
-  reset(): this {
-    this.sections = [];
-    return this;
-  }
-}
-
-// Usage examples
-const codeAssistantPrompt = new SystemPromptBuilder()
-  .addRole("an expert TypeScript developer")
-  .addCapabilities(["file system access", "code execution", "documentation lookup"])
-  .addGuidelines([
-    "Write clean, type-safe code",
-    "Explain complex concepts clearly", 
-    "Suggest best practices",
-    "Test code before providing it"
-  ])
-  .build();
-
-const customerServicePrompt = new SystemPromptBuilder()
-  .addRole("a helpful customer service representative")
-  .addGuidelines([
-    "Be polite and professional",
-    "Listen carefully to customer concerns",
-    "Provide accurate information",
-    "Escalate complex issues when needed"
-  ])
-  .addContext("You work for TechCorp, a software company that makes productivity tools.")
-  .build();
-```
-
-### Model-Specific System Prompt Optimization
-
-```typescript
-function optimizeSystemPromptForModel(basePrompt: string, model: string): string {
-  // OpenAI models (especially o1/o3) work well with detailed, structured prompts
-  if (model.includes("gpt") || model.includes("o1") || model.includes("o3")) {
-    return `${basePrompt}
-
-Think step by step when solving complex problems. Show your reasoning process clearly.`;
-  }
-  
-  // Claude models prefer more conversational, principle-based prompts
-  if (model.includes("claude")) {
-    return `${basePrompt}
-
-I value helpful, harmless, and honest responses. Please be thoughtful and thorough in your analysis.`;
-  }
-  
-  // Gemini models work well with structured instructions
-  if (model.includes("gemini")) {
-    return `${basePrompt}
-
-Please structure your responses clearly and provide specific, actionable advice.`;
-  }
-  
-  // Default: return as-is
-  return basePrompt;
-}
-
-// Provider-specific prompt injection handling
-function detectAndMitigatePromptInjection(userInput: string): { safe: boolean; cleaned?: string } {
-  const injectionPatterns = [
-    /ignore.*previous.*instruction/i,
-    /forget.*system.*prompt/i,
-    /act.*as.*different/i,
-    /pretend.*you.*are/i,
-    /new.*role.*now/i
-  ];
-
-  for (const pattern of injectionPatterns) {
-    if (pattern.test(userInput)) {
-      return { 
-        safe: false, 
-        cleaned: userInput.replace(pattern, "[FILTERED]")
-      };
-    }
-  }
-
-  return { safe: true };
-}
-```
-
-## Provider-Specific Features
-
-### Reasoning Support Detection
-
-```typescript
-// From pi-agent codebase - detect and handle reasoning support per provider
-type Provider = "openai" | "gemini" | "groq" | "anthropic" | "openrouter" | "other";
-
-function detectProvider(baseURL?: string): Provider {
-  if (!baseURL) return "openai";
-  if (baseURL.includes("api.openai.com")) return "openai";
-  if (baseURL.includes("generativelanguage.googleapis.com")) return "gemini";
-  if (baseURL.includes("api.groq.com")) return "groq";
-  if (baseURL.includes("api.anthropic.com")) return "anthropic";
-  if (baseURL.includes("openrouter.ai")) return "openrouter";
-  return "other";
-}
-
-// Provider-specific reasoning parameter handling
-function adjustRequestForReasoning(
-  requestOptions: any,
-  api: "completions" | "responses",
-  provider: Provider,
-  supportsReasoning: boolean
-): any {
-  if (!supportsReasoning) return requestOptions;
-
-  switch (provider) {
-    case "openai":
-      // OpenAI standard format
-      if (api === "responses") {
-        requestOptions.reasoning = {
-          effort: "low",
-          summary: "detailed"
-        };
-      } else {
-        requestOptions.reasoning_effort = "low";
-      }
-      break;
-
-    case "gemini":
-      // Gemini uses extra_body for thinking configuration
-      if (api === "completions") {
-        requestOptions.extra_body = {
-          google: {
-            thinking_config: {
-              thinking_budget: 1024,
-              include_thoughts: true
-            }
-          }
-        };
-        // Remove reasoning_effort when using thinking_config
-        delete requestOptions.reasoning_effort;
-      }
-      break;
-
-    case "groq":
-      // Groq uses reasoning_format for Chat Completions
-      if (api === "completions") {
-        requestOptions.reasoning_format = "parsed";
-        requestOptions.reasoning_effort = "low";
-      } else {
-        // Groq Responses API doesn't support reasoning.summary
-        requestOptions.reasoning = { effort: "low" };
-      }
-      break;
-
-    case "openrouter":
-      // OpenRouter unified reasoning format
-      if (api === "completions") {
-        requestOptions.reasoning = { effort: "low" };
-        delete requestOptions.reasoning_effort;
-      }
-      break;
-
-    default:
-      // Standard OpenAI format for others
-      if (api === "responses") {
-        requestOptions.reasoning = { effort: "low" };
-      } else {
-        requestOptions.reasoning_effort = "low";
-      }
-  }
-
-  return requestOptions;
-}
-```
-
-### Provider-Specific Response Parsing
-
-```typescript
-// Extract reasoning content from provider-specific response formats
-function parseReasoningFromMessage(message: any, provider: Provider): {
-  cleanContent: string;
-  reasoningTexts: string[];
-} {
-  const reasoningTexts: string[] = [];
-  let cleanContent = message.content || "";
-
-  switch (provider) {
-    case "gemini":
-      // Gemini returns thinking in <thought> tags
-      if (cleanContent.includes("<thought>")) {
-        const thoughtMatches = cleanContent.matchAll(/<thought>([\s\S]*?)<\/thought>/g);
-        for (const match of thoughtMatches) {
-          reasoningTexts.push(match[1].trim());
-        }
-        // Remove thought tags from response
-        cleanContent = cleanContent.replace(/<thought>[\s\S]*?<\/thought>/g, "").trim();
-      }
-      break;
-
-    case "groq":
-      // Groq returns reasoning in separate field
-      if (message.reasoning) {
-        reasoningTexts.push(message.reasoning);
-      }
-      break;
-
-    case "openrouter":
-      // OpenRouter uses message.reasoning field
-      if (message.reasoning) {
-        reasoningTexts.push(message.reasoning);
-      }
-      break;
-
-    default:
-      // OpenAI and others handle reasoning via events
-      break;
-  }
-
-  return { cleanContent, reasoningTexts };
-}
-```
-
-### Provider-Specific Error Handling
-
-```typescript
-function handleProviderSpecificErrors(error: any, provider: Provider): Error {
-  switch (provider) {
-    case "groq":
-      if (error.message?.includes("reasoning_format")) {
-        return new Error("Reasoning not supported by this Groq model");
-      }
-      break;
-
-    case "gemini":
-      if (error.message?.includes("thinking_config")) {
-        return new Error("Thinking mode not supported by this Gemini model");
-      }
-      break;
-
-    case "anthropic":
-      if (error.message?.includes("reasoning")) {
-        return new Error("Reasoning not available via Anthropic's OpenAI compatibility layer");
-      }
-      break;
-
-    case "openrouter":
-      // OpenRouter passes through underlying provider errors
-      if (error.message?.includes("not supported")) {
-        return new Error("Feature not supported by the selected model on OpenRouter");
-      }
-      break;
-  }
-
-  return error;
-}
-```
-
-## Complete Implementation Examples
-
-### Basic Chat Client
-
-```typescript
-import OpenAI from "openai";
-import type { ChatCompletionMessageParam } from "openai/resources/chat/completions";
-
-class BasicChatClient {
-  private client: OpenAI;
-  private messages: ChatCompletionMessageParam[] = [];
-
-  constructor(apiKey: string, baseURL?: string, systemPrompt?: string) {
-    this.client = new OpenAI({ apiKey, baseURL });
-    
-    if (systemPrompt) {
-      this.messages.push({ role: "system", content: systemPrompt });
-    }
-  }
-
-  async chat(userMessage: string): Promise<string> {
-    this.messages.push({ role: "user", content: userMessage });
-
-    try {
-      const response = await this.client.chat.completions.create({
-        model: "gpt-4o",
-        messages: this.messages,
-        max_completion_tokens: 1000,
-        temperature: 0.7
-      });
-
-      const assistantMessage = response.choices[0]?.message?.content || "";
-      this.messages.push({ role: "assistant", content: assistantMessage });
-
-      return assistantMessage;
-    } catch (error) {
-      console.error("Chat error:", error);
-      throw error;
-    }
-  }
-
-  getHistory(): ChatCompletionMessageParam[] {
-    return [...this.messages];
-  }
-
-  clearHistory(): void {
-    this.messages = this.messages.filter(m => m.role === "system");
-  }
-}
-```
-
-### Advanced Streaming Client with All Features
-
-```typescript
-import OpenAI from "openai";
-import type { 
-  ChatCompletionCreateParamsStreaming,
-  ChatCompletionChunk 
-} from "openai/resources/chat/completions";
-
-interface StreamingClientConfig {
-  apiKey: string;
-  baseURL?: string;
-  model: string;
-  systemPrompt?: string;
-  tools?: any[];
-  maxTokens?: number;
-  temperature?: number;
-}
-
-interface StreamEvent {
-  type: "content" | "tool_call" | "reasoning" | "usage" | "error" | "complete";
-  data: any;
-}
-
-class AdvancedStreamingClient {
-  private client: OpenAI;
-  private config: StreamingClientConfig;
-  private messages: any[] = [];
-  private abortController: AbortController | null = null;
-  private tokenCounter = new TokenCounter();
-
-  constructor(config: StreamingClientConfig) {
-    this.config = config;
-    this.client = new OpenAI({
-      apiKey: config.apiKey,
-      baseURL: config.baseURL
-    });
-
-    if (config.systemPrompt) {
-      this.messages.push({ role: "system", content: config.systemPrompt });
-    }
-  }
-
-  async *streamChat(userMessage: string): AsyncGenerator<StreamEvent> {
-    this.messages.push({ role: "user", content: userMessage });
-    this.abortController = new AbortController();
-
-    try {
-      const params: ChatCompletionCreateParamsStreaming = {
-        model: this.config.model,
-        messages: this.messages,
-        stream: true,
-        max_completion_tokens: this.config.maxTokens || 1000,
-        temperature: this.config.temperature || 0.7,
-        tools: this.config.tools,
-        tool_choice: this.config.tools ? "auto" : undefined,
-        stream_options: { include_usage: true }
-      };
-
-      const stream = await this.client.chat.completions.create(params, {
-        signal: this.abortController.signal
-      });
-
-      let assistantContent = "";
-      let currentToolCalls = new Map<string, any>();
-
-      for await (const chunk of stream) {
-        if (this.abortController.signal.aborted) break;
-
-        const choice = chunk.choices[0];
-        if (!choice) continue;
-
-        // Handle content
-        if (choice.delta?.content) {
-          assistantContent += choice.delta.content;
-          yield {
-            type: "content",
-            data: { delta: choice.delta.content, content: assistantContent }
-          };
-        }
-
-        // Handle tool calls
-        if (choice.delta?.tool_calls) {
-          for (const toolCall of choice.delta.tool_calls) {
-            if (!toolCall.id) continue;
-
-            if (!currentToolCalls.has(toolCall.id)) {
-              currentToolCalls.set(toolCall.id, {
-                id: toolCall.id,
-                name: "",
-                arguments: ""
-              });
-            }
-
-            const call = currentToolCalls.get(toolCall.id);
-            if (toolCall.function?.name) {
-              call.name += toolCall.function.name;
-            }
-            if (toolCall.function?.arguments) {
-              call.arguments += toolCall.function.arguments;
-            }
-
-            yield {
-              type: "tool_call",
-              data: { id: toolCall.id, delta: toolCall, current: call }
-            };
-          }
-        }
-
-        // Handle usage
-        if (chunk.usage) {
-          const usage = this.tokenCounter.extractChatCompletionUsage(chunk.usage);
-          yield {
-            type: "usage",
-            data: usage
-          };
-        }
-
-        // Handle completion
-        if (choice.finish_reason) {
-          if (choice.finish_reason === "tool_calls") {
-            // Execute tool calls
-            const toolResults = await this.executeToolCalls(Array.from(currentToolCalls.values()));
-            
-            // Add messages and continue
-            this.messages.push({
-              role: "assistant",
-              content: assistantContent || null,
-              tool_calls: Array.from(currentToolCalls.values()).map(call => ({
-                id: call.id,
-                type: "function",
-                function: {
-                  name: call.name,
-                  arguments: call.arguments
-                }
-              }))
-            });
-
-            for (const result of toolResults) {
-              this.messages.push({
-                role: "tool",
-                tool_call_id: result.id,
-                content: result.content
-              });
-            }
-
-            // Continue stream for final response
-            yield* this.streamChat("");
-            return;
-          } else {
-            // Regular completion
-            if (assistantContent) {
-              this.messages.push({ role: "assistant", content: assistantContent });
-            }
-
-            yield {
-              type: "complete",
-              data: { reason: choice.finish_reason, content: assistantContent }
-            };
-          }
-        }
-      }
-    } catch (error) {
-      yield {
-        type: "error",
-        data: { error: error.message }
-      };
-    } finally {
-      this.abortController = null;
-    }
-  }
-
-  private async executeToolCalls(toolCalls: any[]): Promise<Array<{ id: string; content: string }>> {
-    const results = [];
-    
-    for (const call of toolCalls) {
-      try {
-        // Tool execution would be implemented here
-        const result = await this.executeTool(call.name, call.arguments);
-        results.push({ id: call.id, content: result });
-      } catch (error) {
-        results.push({ id: call.id, content: `Error: ${error.message}` });
-      }
-    }
-    
-    return results;
-  }
-
-  private async executeTool(name: string, argsJson: string): Promise<string> {
-    // Implement tool execution logic
-    return `Tool ${name} executed with args: ${argsJson}`;
-  }
-
-  interrupt(): void {
-    this.abortController?.abort();
-  }
-
-  getUsage() {
-    return this.tokenCounter.getTotalUsage();
-  }
-}
-
-// Usage example
-const client = new AdvancedStreamingClient({
-  apiKey: process.env.OPENAI_API_KEY!,
-  model: "gpt-4o",
-  systemPrompt: "You are a helpful assistant.",
-  tools: [/* tool definitions */]
-});
-
-for await (const event of client.streamChat("Help me write a TypeScript function")) {
-  switch (event.type) {
-    case "content":
-      process.stdout.write(event.data.delta);
-      break;
-    case "tool_call":
-      console.log(`\n🔧 Tool: ${event.data.current.name}`);
-      break;
-    case "usage":
-      console.log(`\n📊 Tokens: ${event.data.totalTokens}`);
-      break;
-    case "complete":
-      console.log(`\n✅ Complete (${event.data.reason})`);
-      break;
-    case "error":
-      console.log(`\n❌ Error: ${event.data.error}`);
-      break;
-  }
-}
-```
-
-This comprehensive guide covers all the essential features needed to implement a robust OpenAI SDK integration. Each section provides working code examples, actual types from the SDK, and real-world patterns from the pi-mono codebase.
-
-## Key Takeaways
-
-1. **Always use AbortController** for request cancellation
-2. **Handle both Chat Completions and Responses APIs** depending on model capabilities
-3. **Implement comprehensive error handling** with proper error types
-4. **Track token usage** for cost management and optimization
-5. **Support streaming** for better user experience
-6. **Handle provider-specific features** like reasoning and caching
-7. **Implement proper tool calling workflows** for agentic applications
-8. **Serialize conversation state** for session persistence
-9. **Use appropriate system prompts** for different model types
-10. **Test reasoning support** dynamically for each provider/model combination
\ No newline at end of file
diff --git a/packages/ai/docs/plan.md b/packages/ai/docs/plan.md
deleted file mode 100644
index d09177ac..00000000
--- a/packages/ai/docs/plan.md
+++ /dev/null
@@ -1,950 +0,0 @@
-# Unified AI API Design Plan
-
-Based on comprehensive investigation of OpenAI, Anthropic, and Gemini SDKs with actual implementation examples.
-
-## Key API Differences Summary
-
-### OpenAI
-- **Dual APIs**: Chat Completions (broad support) vs Responses API (o1/o3 thinking content)
-- **Thinking**: Only Responses API gives actual content, Chat Completions only gives counts
-- **Roles**: `system`, `user`, `assistant`, `tool` (o1/o3 use `developer` instead of `system`)
-- **Streaming**: Deltas in chunks with `stream_options.include_usage` for token usage
-
-### Anthropic
-- **Single API**: Messages API with comprehensive streaming
-- **Content Blocks**: Always arrays, even for simple text
-- **System**: Separate parameter, not in messages array
-- **Tool Use**: Content blocks, not separate message role
-- **Thinking**: Explicit budget allocation, appears as content blocks
-- **Caching**: Per-block cache control with TTL options
-
-### Gemini
-- **Parts System**: All content split into typed parts
-- **System**: Separate `systemInstruction` parameter
-- **Roles**: Uses `model` instead of `assistant`
-- **Thinking**: `part.thought: true` flag identifies reasoning
-- **Streaming**: Returns complete responses, not deltas
-- **Function Calls**: Embedded in parts array
-
-## Unified API Design
-
-### Core Client
-
-```typescript
-interface AIConfig {
-  provider: 'openai' | 'anthropic' | 'gemini';
-  apiKey: string;
-  model: string;
-  baseURL?: string; // For OpenAI-compatible endpoints
-}
-
-interface ModelInfo {
-  id: string;
-  name: string;
-  provider: string;
-  capabilities: {
-    reasoning: boolean;
-    toolCall: boolean;
-    vision: boolean;
-    audio?: boolean;
-  };
-  cost: {
-    input: number;  // per million tokens
-    output: number; // per million tokens
-    cacheRead?: number;
-    cacheWrite?: number;
-  };
-  limits: {
-    context: number;
-    output: number;
-  };
-  knowledge?: string; // Knowledge cutoff date
-}
-
-class AI {
-  constructor(config: AIConfig);
-  
-  // Main streaming interface - everything else builds on this
-  async *stream(request: Request): AsyncGenerator<Event>;
-  
-  // Convenience method for non-streaming
-  async complete(request: Request): Promise<Response>;
-  
-  // Get model information
-  getModelInfo(): ModelInfo;
-  
-  // Abort current request
-  abort(): void;
-}
-```
-
-### Message Format
-
-```typescript
-type Message = 
-  | {
-      role: 'user';
-      content: string | Content[];
-    }
-  | {
-      role: 'assistant';
-      content: string | Content[];
-      model: string;
-      usage: TokenUsage;
-      toolCalls?: {
-        id: string;
-        name: string;
-        arguments: Record<string, any>;
-      }[];
-    }
-  | {
-      role: 'tool';
-      content: string | Content[];
-      toolCallId: string;
-    };
-
-interface Content {
-  type: 'text' | 'image';
-  text?: string;
-  image?: {
-    data: string; // base64
-    mimeType: string;
-  };
-}
-```
-
-### Request Format
-
-```typescript
-interface Request {
-  messages: Message[];
-  
-  // System prompt (separated for Anthropic/Gemini compatibility)
-  systemPrompt?: string;
-  
-  // Common parameters
-  temperature?: number;
-  maxTokens?: number;
-  stopSequences?: string[];
-  
-  // Tools
-  tools?: {
-    name: string;
-    description: string;
-    parameters: Record<string, any>; // JSON Schema
-  }[];
-  toolChoice?: 'auto' | 'none' | 'required' | { name: string };
-  
-  // Thinking/reasoning
-  reasoning?: {
-    enabled: boolean;
-    effort?: 'low' | 'medium' | 'high'; // OpenAI reasoning_effort
-    maxTokens?: number; // Anthropic thinking budget
-  };
-  
-  // Abort signal
-  signal?: AbortSignal;
-}
-```
-
-### Event Stream
-
-```typescript
-type Event =
-  | { type: 'start'; model: string; provider: string }
-  | { type: 'text'; content: string; delta: string }
-  | { type: 'thinking'; content: string; delta: string }
-  | { type: 'toolCall'; toolCall: ToolCall }
-  | { type: 'usage'; usage: TokenUsage }
-  | { type: 'done'; reason: StopReason; message: Message } // message includes model and usage
-  | { type: 'error'; error: Error };
-
-interface TokenUsage {
-  input: number;
-  output: number;
-  total: number;
-  thinking?: number;
-  cacheRead?: number;
-  cacheWrite?: number;
-  cost?: {
-    input: number;
-    output: number;
-    cache?: number;
-    total: number;
-  };
-}
-
-type StopReason = 'stop' | 'length' | 'toolUse' | 'safety' | 'error';
-```
-
-## Caching Strategy
-
-Caching is handled automatically by each provider adapter:
-
-- **OpenAI**: Automatic prompt caching (no configuration needed)
-- **Gemini**: Automatic context caching (no configuration needed)  
-- **Anthropic**: We automatically add cache_control to the system prompt and older messages
-
-```typescript
-class AnthropicAdapter {
-  private addCaching(messages: Message[]): any[] {
-    const anthropicMessages = [];
-    
-    // Automatically cache older messages (assuming incremental context)
-    for (let i = 0; i < messages.length; i++) {
-      const msg = messages[i];
-      const isOld = i < messages.length - 2; // Cache all but last 2 messages
-      
-      // Convert to Anthropic format with automatic caching
-      const blocks = this.toContentBlocks(msg);
-      if (isOld && blocks.length > 0) {
-        blocks[0].cache_control = { type: 'ephemeral' };
-      }
-      
-      anthropicMessages.push({
-        role: msg.role === 'assistant' ? 'assistant' : 'user',
-        content: blocks
-      });
-    }
-    
-    return anthropicMessages;
-  }
-}
-```
-
-## Provider Adapter Implementation
-
-### OpenAI Adapter
-
-```typescript
-class OpenAIAdapter {
-  private client: OpenAI;
-  private useResponsesAPI: boolean = false;
-  
-  async *stream(request: Request): AsyncGenerator<Event> {
-    // Determine which API to use
-    if (request.reasoning?.enabled && this.isReasoningModel()) {
-      yield* this.streamResponsesAPI(request);
-    } else {
-      yield* this.streamChatCompletions(request);
-    }
-  }
-  
-  private async *streamChatCompletions(request: Request) {
-    const stream = await this.client.chat.completions.create({
-      model: this.model,
-      messages: this.toOpenAIMessages(request),
-      tools: this.toOpenAITools(request.tools),
-      reasoning_effort: request.reasoning?.effort,
-      stream: true,
-      stream_options: { include_usage: true }
-    });
-    
-    let content = '';
-    let toolCalls: any[] = [];
-    
-    for await (const chunk of stream) {
-      if (chunk.choices[0]?.delta?.content) {
-        const delta = chunk.choices[0].delta.content;
-        content += delta;
-        yield { type: 'text', content, delta };
-      }
-      
-      if (chunk.choices[0]?.delta?.tool_calls) {
-        // Accumulate tool calls
-        this.mergeToolCalls(toolCalls, chunk.choices[0].delta.tool_calls);
-        for (const tc of toolCalls) {
-          yield { type: 'toolCall', toolCall: tc, partial: true };
-        }
-      }
-      
-      if (chunk.usage) {
-        yield {
-          type: 'usage',
-          usage: {
-            input: chunk.usage.prompt_tokens,
-            output: chunk.usage.completion_tokens,
-            total: chunk.usage.total_tokens,
-            thinking: chunk.usage.completion_tokens_details?.reasoning_tokens
-          }
-        };
-      }
-    }
-  }
-  
-  private async *streamResponsesAPI(request: Request) {
-    // Use Responses API for actual thinking content
-    const response = await this.client.responses.create({
-      model: this.model,
-      input: this.toResponsesInput(request),
-      tools: this.toResponsesTools(request.tools),
-      stream: true
-    });
-    
-    for await (const event of response) {
-      if (event.type === 'response.reasoning_text.delta') {
-        yield {
-          type: 'thinking',
-          content: event.text,
-          delta: event.delta
-        };
-      }
-      // Handle other event types...
-    }
-  }
-  
-  private toOpenAIMessages(request: Request): any[] {
-    const messages: any[] = [];
-    
-    // Handle system prompt
-    if (request.systemPrompt) {
-      const role = this.isReasoningModel() ? 'developer' : 'system';
-      messages.push({ role, content: request.systemPrompt });
-    }
-    
-    // Convert unified messages
-    for (const msg of request.messages) {
-      if (msg.role === 'tool') {
-        messages.push({
-          role: 'tool',
-          content: msg.content,
-          tool_call_id: msg.toolCallId
-        });
-      } else {
-        messages.push({
-          role: msg.role,
-          content: this.contentToString(msg.content),
-          tool_calls: msg.toolCalls
-        });
-      }
-    }
-    
-    return messages;
-  }
-}
-```
-
-### Anthropic Adapter
-
-```typescript
-class AnthropicAdapter {
-  private client: Anthropic;
-  
-  async *stream(request: Request): AsyncGenerator<Event> {
-    const stream = this.client.messages.stream({
-      model: this.model,
-      max_tokens: request.maxTokens || 1024,
-      messages: this.addCaching(request.messages),
-      system: request.systemPrompt,
-      tools: this.toAnthropicTools(request.tools),
-      thinking: request.reasoning?.enabled ? {
-        type: 'enabled',
-        budget_tokens: request.reasoning.maxTokens || 2000
-      } : undefined
-    });
-    
-    let content = '';
-    let thinking = '';
-    
-    stream.on('text', (delta, snapshot) => {
-      content = snapshot;
-      // Note: Can't yield from callback, need different approach
-    });
-    
-    stream.on('thinking', (delta, snapshot) => {
-      thinking = snapshot;
-    });
-    
-    // Use raw streaming instead for proper async generator
-    const rawStream = await this.client.messages.create({
-      ...params,
-      stream: true
-    });
-    
-    for await (const chunk of rawStream) {
-      switch (chunk.type) {
-        case 'content_block_delta':
-          if (chunk.delta.type === 'text_delta') {
-            content += chunk.delta.text;
-            yield {
-              type: 'text',
-              content,
-              delta: chunk.delta.text
-            };
-          }
-          break;
-          
-        case 'message_delta':
-          if (chunk.usage) {
-            yield {
-              type: 'usage',
-              usage: {
-                input: chunk.usage.input_tokens,
-                output: chunk.usage.output_tokens,
-                total: chunk.usage.input_tokens + chunk.usage.output_tokens,
-                cacheRead: chunk.usage.cache_read_input_tokens,
-                cacheWrite: chunk.usage.cache_creation_input_tokens
-              }
-            };
-          }
-          break;
-      }
-    }
-  }
-  
-  private toAnthropicMessages(request: Request): any[] {
-    return request.messages.map(msg => {
-      if (msg.role === 'tool') {
-        // Tool results go as user messages with tool_result blocks
-        return {
-          role: 'user',
-          content: [{
-            type: 'tool_result',
-            tool_use_id: msg.toolCallId,
-            content: msg.content
-          }]
-        };
-      }
-      
-      // Always use content blocks
-      const blocks: any[] = [];
-      
-      if (typeof msg.content === 'string') {
-        blocks.push({
-          type: 'text',
-          text: msg.content,
-          cache_control: msg.cacheControl
-        });
-      } else {
-        // Convert unified content to blocks
-        for (const part of msg.content) {
-          if (part.type === 'text') {
-            blocks.push({ type: 'text', text: part.text });
-          } else if (part.type === 'image') {
-            blocks.push({
-              type: 'image',
-              source: {
-                type: 'base64',
-                media_type: part.image.mimeType,
-                data: part.image.data
-              }
-            });
-          }
-        }
-      }
-      
-      // Add tool calls as blocks
-      if (msg.toolCalls) {
-        for (const tc of msg.toolCalls) {
-          blocks.push({
-            type: 'tool_use',
-            id: tc.id,
-            name: tc.name,
-            input: tc.arguments
-          });
-        }
-      }
-      
-      return {
-        role: msg.role === 'assistant' ? 'assistant' : 'user',
-        content: blocks
-      };
-    });
-  }
-}
-```
-
-### Gemini Adapter
-
-```typescript
-class GeminiAdapter {
-  private client: GoogleGenAI;
-  
-  async *stream(request: Request): AsyncGenerator<Event> {
-    const stream = await this.client.models.generateContentStream({
-      model: this.model,
-      systemInstruction: request.systemPrompt ? {
-        parts: [{ text: request.systemPrompt }]
-      } : undefined,
-      contents: this.toGeminiContents(request),
-      tools: this.toGeminiTools(request.tools),
-      abortSignal: request.signal
-    });
-    
-    let content = '';
-    let thinking = '';
-    
-    for await (const chunk of stream) {
-      const candidate = chunk.candidates?.[0];
-      if (!candidate?.content?.parts) continue;
-      
-      for (const part of candidate.content.parts) {
-        if (part.text && !part.thought) {
-          content += part.text;
-          yield {
-            type: 'text',
-            content,
-            delta: part.text
-          };
-        } else if (part.text && part.thought) {
-          thinking += part.text;
-          yield {
-            type: 'thinking',
-            content: thinking,
-            delta: part.text
-          };
-        } else if (part.functionCall) {
-          yield {
-            type: 'toolCall',
-            toolCall: {
-              id: part.functionCall.id || crypto.randomUUID(),
-              name: part.functionCall.name,
-              arguments: part.functionCall.args
-            }
-          };
-        }
-      }
-      
-      if (chunk.usageMetadata) {
-        yield {
-          type: 'usage',
-          usage: {
-            input: chunk.usageMetadata.promptTokenCount || 0,
-            output: chunk.usageMetadata.candidatesTokenCount || 0,
-            total: chunk.usageMetadata.totalTokenCount || 0,
-            thinking: chunk.usageMetadata.thoughtsTokenCount,
-            cacheRead: chunk.usageMetadata.cachedContentTokenCount
-          }
-        };
-      }
-    }
-  }
-  
-  private toGeminiContents(request: Request): any[] {
-    return request.messages.map(msg => {
-      const parts: any[] = [];
-      
-      if (typeof msg.content === 'string') {
-        parts.push({ text: msg.content });
-      } else {
-        for (const part of msg.content) {
-          if (part.type === 'text') {
-            parts.push({ text: part.text });
-          } else if (part.type === 'image') {
-            parts.push({
-              inlineData: {
-                mimeType: part.image.mimeType,
-                data: part.image.data
-              }
-            });
-          }
-        }
-      }
-      
-      // Add function calls as parts
-      if (msg.toolCalls) {
-        for (const tc of msg.toolCalls) {
-          parts.push({
-            functionCall: {
-              name: tc.name,
-              args: tc.arguments
-            }
-          });
-        }
-      }
-      
-      // Add tool results as function responses
-      if (msg.role === 'tool') {
-        parts.push({
-          functionResponse: {
-            name: msg.toolCallId,
-            response: { result: msg.content }
-          }
-        });
-      }
-      
-      return {
-        role: msg.role === 'assistant' ? 'model' : msg.role === 'tool' ? 'user' : msg.role,
-        parts
-      };
-    });
-  }
-}
-```
-
-## Usage Examples
-
-### Basic Streaming
-
-```typescript
-const ai = new AI({
-  provider: 'openai',
-  apiKey: process.env.OPENAI_API_KEY,
-  model: 'gpt-4'
-});
-
-const stream = ai.stream({
-  messages: [
-    { role: 'user', content: 'Write a haiku about coding' }
-  ],
-  systemPrompt: 'You are a poetic programmer'
-});
-
-for await (const event of stream) {
-  switch (event.type) {
-    case 'text':
-      process.stdout.write(event.delta);
-      break;
-    case 'usage':
-      console.log(`\nTokens: ${event.usage.total}`);
-      break;
-    case 'done':
-      console.log(`\nFinished: ${event.reason}`);
-      break;
-  }
-}
-```
-
-### Cross-Provider Tool Calling
-
-```typescript
-async function callWithTools(provider: 'openai' | 'anthropic' | 'gemini') {
-  const ai = new AI({
-    provider,
-    apiKey: process.env[`${provider.toUpperCase()}_API_KEY`],
-    model: getDefaultModel(provider)
-  });
-  
-  const messages: Message[] = [{
-    role: 'user',
-    content: 'What is the weather in Paris and calculate 15 * 23?'
-  }];
-  
-  const stream = ai.stream({
-    messages,
-    tools: [
-      {
-        name: 'weather',
-        description: 'Get weather for a location',
-        parameters: {
-          type: 'object',
-          properties: {
-            location: { type: 'string' }
-          },
-          required: ['location']
-        }
-      },
-      {
-        name: 'calculator',
-        description: 'Calculate math expressions',
-        parameters: {
-          type: 'object',
-          properties: {
-            expression: { type: 'string' }
-          },
-          required: ['expression']
-        }
-      }
-    ]
-  });
-  
-  const toolCalls: any[] = [];
-  
-  for await (const event of stream) {
-    if (event.type === 'toolCall') {
-      toolCalls.push(event.toolCall);
-      
-      // Execute tool
-      const result = await executeToolCall(event.toolCall);
-      
-      // Add tool result to conversation
-      messages.push({
-        role: 'assistant',
-        toolCalls: [event.toolCall]
-      });
-      
-      messages.push({
-        role: 'tool',
-        content: JSON.stringify(result),
-        toolCallId: event.toolCall.id
-      });
-    }
-  }
-  
-  // Continue conversation with tool results
-  if (toolCalls.length > 0) {
-    const finalStream = ai.stream({ messages });
-    
-    for await (const event of finalStream) {
-      if (event.type === 'text') {
-        process.stdout.write(event.delta);
-      }
-    }
-  }
-}
-```
-
-### Thinking/Reasoning
-
-```typescript
-async function withThinking() {
-  // OpenAI o1
-  const openai = new AI({
-    provider: 'openai',
-    model: 'o1-preview'
-  });
-  
-  // Anthropic Claude
-  const anthropic = new AI({
-    provider: 'anthropic',
-    model: 'claude-3-opus-20240229'
-  });
-  
-  // Gemini thinking model
-  const gemini = new AI({
-    provider: 'gemini',
-    model: 'gemini-2.0-flash-thinking-exp-1219'
-  });
-  
-  for (const ai of [openai, anthropic, gemini]) {
-    const stream = ai.stream({
-      messages: [{
-        role: 'user',
-        content: 'Solve this step by step: If a tree falls in a forest...'
-      }],
-      reasoning: {
-        enabled: true,
-        effort: 'high', // OpenAI reasoning_effort
-        maxTokens: 2000 // Anthropic budget
-      }
-    });
-    
-    for await (const event of stream) {
-      if (event.type === 'thinking') {
-        console.log('[THINKING]', event.delta);
-      } else if (event.type === 'text') {
-        console.log('[RESPONSE]', event.delta);
-      } else if (event.type === 'done') {
-        // Final message includes model and usage with cost
-        console.log('Model:', event.message.model);
-        console.log('Tokens:', event.message.usage?.total);
-        console.log('Cost: $', event.message.usage?.cost?.total);
-      }
-    }
-  }
-}
-```
-
-## Implementation Notes
-
-### Critical Decisions
-
-1. **Streaming First**: All providers support streaming, non-streaming is just collected events
-2. **Unified Events**: Same event types across all providers for consistent handling
-3. **Separate System Prompt**: Required for Anthropic/Gemini compatibility
-4. **Tool Role**: Unified way to handle tool responses across providers
-5. **Content Arrays**: Support both string and structured content
-6. **Thinking Extraction**: Normalize reasoning across different provider formats
-
-### Provider-Specific Handling
-
-**OpenAI**:
-- Choose between Chat Completions and Responses API based on model and thinking needs
-- Map `developer` role for o1/o3 models
-- Handle streaming tool call deltas
-
-**Anthropic**:
-- Convert to content blocks (always arrays)
-- Tool results as user messages with tool_result blocks
-- Handle MessageStream events or raw streaming
-
-**Gemini**:
-- Convert to parts system
-- Extract thinking from `part.thought` flag
-- Map `assistant` to `model` role
-- Handle function calls/responses in parts
-
-### Error Handling
-
-```typescript
-class AIError extends Error {
-  constructor(
-    message: string,
-    public code: string,
-    public provider: string,
-    public retryable: boolean,
-    public statusCode?: number
-  ) {
-    super(message);
-  }
-}
-
-// In adapters
-try {
-  // API call
-} catch (error) {
-  if (error instanceof RateLimitError) {
-    throw new AIError(
-      'Rate limit exceeded',
-      'rate_limit',
-      this.provider,
-      true,
-      429
-    );
-  }
-  // Map other errors...
-}
-```
-
-## Model Information & Cost Tracking
-
-### Models Database
-
-We cache the models.dev API data at build time for fast, offline access:
-
-```typescript
-// scripts/update-models.ts - Run during build or manually
-async function updateModels() {
-  const response = await fetch('https://models.dev/api.json');
-  const data = await response.json();
-  
-  // Transform to our format
-  const models: ModelsDatabase = transformModelsData(data);
-  
-  // Generate TypeScript file
-  const content = `// Auto-generated from models.dev API
-// Last updated: ${new Date().toISOString()}
-// Run 'npm run update-models' to refresh
-
-export const MODELS_DATABASE: ModelsDatabase = ${JSON.stringify(models, null, 2)};
-`;
-  
-  await fs.writeFile('src/models-data.ts', content);
-}
-
-// src/models.ts - Runtime model lookup
-import { MODELS_DATABASE } from './models-data.js';
-
-// Simple lookup with fallback
-export function getModelInfo(provider: string, model: string): ModelInfo {
-  const info = MODELS_DATABASE.providers[provider]?.models[model];
-  
-  if (!info) {
-    // Fallback for unknown models
-    return {
-      id: model,
-      name: model,
-      provider,
-      capabilities: {
-        reasoning: false,
-        toolCall: true,
-        vision: false
-      },
-      cost: { input: 0, output: 0 },
-      limits: { context: 128000, output: 4096 }
-    };
-  }
-  
-  return info;
-}
-
-// Optional: Runtime override for testing new models
-const runtimeOverrides = new Map<string, ModelInfo>();
-
-export function registerModel(provider: string, model: string, info: ModelInfo) {
-  runtimeOverrides.set(`${provider}:${model}`, info);
-}
-```
-
-### Cost Calculation
-
-```typescript
-class CostTracker {
-  private usage: TokenUsage = {
-    input: 0,
-    output: 0,
-    total: 0,
-    cacheRead: 0,
-    cacheWrite: 0
-  };
-  
-  private modelInfo: ModelInfo;
-  
-  constructor(modelInfo: ModelInfo) {
-    this.modelInfo = modelInfo;
-  }
-  
-  addUsage(tokens: Partial<TokenUsage>): TokenUsage {
-    this.usage.input += tokens.input || 0;
-    this.usage.output += tokens.output || 0;
-    this.usage.thinking += tokens.thinking || 0;
-    this.usage.cacheRead += tokens.cacheRead || 0;
-    this.usage.cacheWrite += tokens.cacheWrite || 0;
-    this.usage.total = this.usage.input + this.usage.output + (this.usage.thinking || 0);
-    
-    // Calculate costs (per million tokens)
-    const cost = this.modelInfo.cost;
-    this.usage.cost = {
-      input: (this.usage.input / 1_000_000) * cost.input,
-      output: (this.usage.output / 1_000_000) * cost.output,
-      cache: 
-        ((this.usage.cacheRead || 0) / 1_000_000) * (cost.cacheRead || 0) +
-        ((this.usage.cacheWrite || 0) / 1_000_000) * (cost.cacheWrite || 0),
-      total: 0
-    };
-    
-    this.usage.cost.total = 
-      this.usage.cost.input + 
-      this.usage.cost.output + 
-      this.usage.cost.cache;
-    
-    return { ...this.usage };
-  }
-  
-  getTotalCost(): number {
-    return this.usage.cost?.total || 0;
-  }
-  
-  getUsageSummary(): string {
-    return `Tokens: ${this.usage.total} (${this.usage.input}→${this.usage.output}) | Cost: $${this.getTotalCost().toFixed(4)}`;
-  }
-}
-```
-
-### Integration in Adapters
-
-```typescript
-class OpenAIAdapter {
-  private costTracker: CostTracker;
-  
-  constructor(config: AIConfig) {
-    const modelInfo = getModelInfo('openai', config.model);
-    this.costTracker = new CostTracker(modelInfo);
-  }
-  
-  async *stream(request: Request): AsyncGenerator<Event> {
-    // ... streaming logic ...
-    
-    if (chunk.usage) {
-      const usage = this.costTracker.addUsage({
-        input: chunk.usage.prompt_tokens,
-        output: chunk.usage.completion_tokens,
-        thinking: chunk.usage.completion_tokens_details?.reasoning_tokens,
-        cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens
-      });
-      
-      yield { type: 'usage', usage };
-    }
-  }
-}
-```
-
-## Next Steps
-
-1. Create models.ts with models.dev integration
-2. Implement base `AI` class with adapter pattern
-3. Create three provider adapters with full streaming support
-4. Add comprehensive error mapping
-5. Implement token counting and cost tracking
-6. Add test suite for each provider
-7. Create migration guide from native SDKs
\ No newline at end of file