# OpenAI SDK Implementation Guide This document provides a comprehensive guide to implementing the required features using the OpenAI SDK v5.12.2. All examples are based on actual usage patterns from the pi-mono codebase and include real TypeScript types from the SDK. ## Table of Contents 1. [Basic Setup](#basic-setup) 2. [Streaming Responses](#streaming-responses) 3. [Aborting Requests](#aborting-requests) 4. [Error Handling](#error-handling) 5. [Stop Reasons](#stop-reasons) 6. [Message History & Serialization](#message-history--serialization) 7. [Token Counting](#token-counting) 8. [Caching](#caching) 9. [Chat Completions vs Responses API](#chat-completions-vs-responses-api) 10. [Tool/Function Calling](#toolfunction-calling) 11. [System Prompts](#system-prompts) 12. [Provider-Specific Features](#provider-specific-features) 13. [Complete Implementation Examples](#complete-implementation-examples) ## Basic Setup ```typescript import OpenAI from "openai"; // Basic client setup const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, baseURL: "https://api.openai.com/v1", // Optional, default shown }); // For other providers (Groq, Anthropic OpenAI-compatible, etc.) const groqClient = new OpenAI({ apiKey: process.env.GROQ_API_KEY, baseURL: "https://api.groq.com/openai/v1", }); ``` ### Client Configuration Options ```typescript interface ClientOptions { apiKey?: string; baseURL?: string; timeout?: number; // Request timeout in milliseconds maxRetries?: number; // Number of retry attempts defaultHeaders?: Record; defaultQuery?: Record; } ``` ## Streaming Responses ### Chat Completions Streaming ```typescript import type { ChatCompletionChunk, ChatCompletionCreateParamsStreaming } from "openai/resources/chat/completions"; import { Stream } from "openai/core/streaming"; async function streamChatCompletion() { const params: ChatCompletionCreateParamsStreaming = { model: "gpt-4o", messages: [ { role: "user", content: "Tell me a story" } ], stream: true, max_completion_tokens: 1000, }; const stream: Stream = await client.chat.completions.create(params); for await (const chunk of stream) { const delta = chunk.choices[0]?.delta; if (delta?.content) { process.stdout.write(delta.content); } if (delta?.tool_calls) { console.log("Tool call delta:", delta.tool_calls); } if (chunk.choices[0]?.finish_reason) { console.log("\nFinish reason:", chunk.choices[0].finish_reason); } } } ``` ### Responses API Streaming ```typescript import type { ResponseCreateParamsStreaming, ResponseStreamEvent } from "openai/resources/responses"; async function streamResponsesAPI() { const params: ResponseCreateParamsStreaming = { model: "o1-mini", input: [ { role: "user", content: [{ type: "input_text", text: "Solve this math problem: 2x + 5 = 11" }] } ], stream: true, max_output_tokens: 2000, reasoning: { effort: "low", summary: "detailed" } }; const stream: Stream = await client.responses.create(params); for await (const event of stream) { switch (event.type) { case "response.reasoning.text.delta": // Reasoning/thinking tokens (o1/o3) process.stdout.write(`[thinking] ${event.delta}`); break; case "response.text.delta": // Output content process.stdout.write(event.delta); break; case "response.function_call.arguments.delta": // Tool call arguments being built console.log("Tool call delta:", event.delta); break; case "response.completed": console.log("\nResponse completed"); break; } } } ``` ### Streaming Patterns ```typescript // Pattern 1: Simple content streaming async function simpleStream(messages: any[]) { const stream = await client.chat.completions.create({ model: "gpt-4o", messages, stream: true, }); let fullContent = ""; for await (const chunk of stream) { const content = chunk.choices[0]?.delta?.content || ""; fullContent += content; process.stdout.write(content); } return fullContent; } // Pattern 2: Event-driven streaming with handlers interface StreamHandlers { onContent?: (delta: string) => void; onToolCall?: (toolCall: any) => void; onFinish?: (reason: string) => void; } async function eventDrivenStream(messages: any[], handlers: StreamHandlers) { const stream = await client.chat.completions.create({ model: "gpt-4o", messages, stream: true, }); for await (const chunk of stream) { const choice = chunk.choices[0]; if (!choice) continue; if (choice.delta?.content) { handlers.onContent?.(choice.delta.content); } if (choice.delta?.tool_calls) { handlers.onToolCall?.(choice.delta.tool_calls); } if (choice.finish_reason) { handlers.onFinish?.(choice.finish_reason); } } } ``` ## Aborting Requests ### Using AbortController ```typescript class AbortableClient { private client: OpenAI; private abortController: AbortController | null = null; constructor(config: { apiKey: string; baseURL?: string }) { this.client = new OpenAI(config); } async askWithAbort(message: string): Promise { // Create new AbortController for this request this.abortController = new AbortController(); try { const response = await this.client.chat.completions.create({ model: "gpt-4o", messages: [{ role: "user", content: message }], max_completion_tokens: 1000, }, { signal: this.abortController.signal // Pass abort signal }); return response.choices[0]?.message?.content || ""; } catch (error) { if (this.abortController.signal.aborted) { throw new Error("Request was interrupted"); } throw error; } finally { this.abortController = null; } } // Call this to abort the current request interrupt(): void { this.abortController?.abort(); } } // Usage example const abortableClient = new AbortableClient({ apiKey: process.env.OPENAI_API_KEY! }); // Start request const responsePromise = abortableClient.askWithAbort("Write a long essay"); // Abort after 5 seconds setTimeout(() => { abortableClient.interrupt(); }, 5000); try { const response = await responsePromise; console.log(response); } catch (error) { console.log("Request was aborted:", error.message); } ``` ### Aborting Streaming Requests ```typescript async function abortableStream(messages: any[]) { const abortController = new AbortController(); // Abort after 10 seconds const timeoutId = setTimeout(() => { abortController.abort(); }, 10000); try { const stream = await client.chat.completions.create({ model: "gpt-4o", messages, stream: true, }, { signal: abortController.signal }); for await (const chunk of stream) { // Check if aborted before processing each chunk if (abortController.signal.aborted) { break; } const content = chunk.choices[0]?.delta?.content; if (content) { process.stdout.write(content); } } } catch (error) { if (abortController.signal.aborted) { console.log("\nStream was aborted"); } else { throw error; } } finally { clearTimeout(timeoutId); } } ``` ## Error Handling ### Error Types from OpenAI SDK ```typescript import { OpenAIError, APIError, APIConnectionError, APIConnectionTimeoutError, APIUserAbortError, AuthenticationError, BadRequestError, RateLimitError, InternalServerError, NotFoundError, PermissionDeniedError, UnprocessableEntityError } from "openai"; // Comprehensive error handler async function handleAPICall(apiCall: () => Promise): Promise { try { return await apiCall(); } catch (error) { if (error instanceof APIUserAbortError) { console.log("Request was aborted by user"); throw new Error("Request interrupted"); } if (error instanceof AuthenticationError) { console.error("Authentication failed:", error.message); throw new Error("Invalid API key"); } if (error instanceof RateLimitError) { console.error("Rate limit exceeded:", error.message); // Could implement exponential backoff here throw new Error("Rate limited - try again later"); } if (error instanceof APIConnectionError) { console.error("Connection error:", error.message); throw new Error("Network connection failed"); } if (error instanceof APIConnectionTimeoutError) { console.error("Request timeout:", error.message); throw new Error("Request timed out"); } if (error instanceof BadRequestError) { console.error("Bad request:", error.message); console.error("Error details:", error.error); throw new Error(`Invalid request: ${error.message}`); } if (error instanceof UnprocessableEntityError) { console.error("Unprocessable entity:", error.message); throw new Error(`Validation error: ${error.message}`); } if (error instanceof APIError) { console.error(`API Error ${error.status}:`, error.message); console.error("Error code:", error.code); console.error("Error type:", error.type); throw new Error(`API error: ${error.message}`); } if (error instanceof OpenAIError) { console.error("OpenAI SDK error:", error.message); throw new Error(`SDK error: ${error.message}`); } // Unknown error console.error("Unexpected error:", error); throw error; } } // Usage with retry logic async function apiCallWithRetry( apiCall: () => Promise, maxRetries: number = 3 ): Promise { let lastError: Error; for (let attempt = 0; attempt < maxRetries; attempt++) { try { return await handleAPICall(apiCall); } catch (error) { lastError = error as Error; // Don't retry on certain errors if (error instanceof AuthenticationError || error instanceof BadRequestError || error instanceof APIUserAbortError) { throw error; } // Exponential backoff for retryable errors if (attempt < maxRetries - 1) { const delay = Math.pow(2, attempt) * 1000; // 1s, 2s, 4s await new Promise(resolve => setTimeout(resolve, delay)); } } } throw lastError!; } ``` ### Error Context Extraction ```typescript function extractErrorDetails(error: unknown): { message: string; code?: string; type?: string; status?: number; retryable: boolean; } { if (error instanceof APIError) { return { message: error.message, code: error.code || undefined, type: error.type, status: error.status, retryable: error instanceof RateLimitError || error instanceof APIConnectionError || error instanceof InternalServerError }; } if (error instanceof APIUserAbortError) { return { message: "Request was aborted", retryable: false }; } if (error instanceof OpenAIError) { return { message: error.message, retryable: false }; } return { message: error instanceof Error ? error.message : "Unknown error", retryable: false }; } ``` ## Stop Reasons ### Chat Completions Stop Reasons ```typescript type ChatCompletionFinishReason = | "stop" // Natural stopping point or stop sequence | "length" // Maximum token limit reached | "content_filter" // Content filtered | "tool_calls" // Model wants to call tools | "function_call"; // Legacy function calling async function handleStopReasons() { const response = await client.chat.completions.create({ model: "gpt-4o", messages: [{ role: "user", content: "Hello" }], max_completion_tokens: 10, // Low limit to trigger "length" stop stop: ["END"], // Custom stop sequence }); const choice = response.choices[0]; const finishReason = choice.finish_reason; switch (finishReason) { case "stop": console.log("Completed naturally or hit stop sequence"); break; case "length": console.log("Hit token limit - response may be incomplete"); // Could request more tokens or continue conversation break; case "content_filter": console.log("Content was filtered"); break; case "tool_calls": console.log("Model wants to call tools"); // Handle tool calls (see Tool Calling section) break; default: console.log("Unknown finish reason:", finishReason); } return { content: choice.message.content, finishReason, complete: finishReason === "stop" }; } ``` ### Responses API Stop Reasons ```typescript // Responses API uses different event types to indicate completion async function handleResponsesStopReasons() { const response = await client.responses.create({ model: "o1-mini", input: [{ role: "user", content: [{ type: "input_text", text: "Hello" }] }], max_output_tokens: 100, }); for (const item of response.output || []) { switch (item.type) { case "message": // Check for refusal in content for (const content of item.content || []) { if (content.type === "refusal") { console.log("Response was refused:", content.refusal); } else if (content.type === "output_text") { console.log("Response completed normally"); } } break; case "function_call": console.log("Tool call requested"); break; } } } ``` ### Streaming Stop Reason Detection ```typescript async function streamWithStopReasonHandling() { const stream = await client.chat.completions.create({ model: "gpt-4o", messages: [{ role: "user", content: "Count to 10" }], stream: true, max_completion_tokens: 50, }); let content = ""; let finishReason: string | null = null; for await (const chunk of stream) { const choice = chunk.choices[0]; if (!choice) continue; if (choice.delta?.content) { content += choice.delta.content; process.stdout.write(choice.delta.content); } if (choice.finish_reason) { finishReason = choice.finish_reason; break; } } console.log(`\nStreaming finished. Reason: ${finishReason}`); if (finishReason === "length") { console.log("Response was cut off due to token limit"); // Could continue the conversation to get the rest } return { content, finishReason }; } ``` ## Message History & Serialization ### Message Types and Formats ```typescript // Chat Completions message format interface ChatMessage { role: "system" | "user" | "assistant" | "tool" | "developer"; content: string | null; name?: string; tool_calls?: Array<{ id: string; type: "function"; function: { name: string; arguments: string; }; }>; tool_call_id?: string; // For tool response messages } // Responses API message format interface ResponseMessage { role: "user" | "developer"; content: Array<{ type: "input_text" | "input_image" | "input_audio"; text?: string; image?: { url: string }; audio?: { data: string }; }>; } // Unified conversation history interface ConversationHistory { api: "completions" | "responses"; model: string; systemPrompt?: string; messages: any[]; // API-specific format totalTokens: number; metadata: { created: number; lastUpdated: number; provider: string; }; } ``` ### Serialization Implementation ```typescript class ConversationManager { private messages: any[] = []; private api: "completions" | "responses"; private systemPrompt?: string; private totalTokens = 0; constructor(api: "completions" | "responses", systemPrompt?: string) { this.api = api; this.systemPrompt = systemPrompt; if (systemPrompt) { if (api === "completions") { this.messages.push({ role: "system", content: systemPrompt }); } else { this.messages.push({ role: "developer", content: systemPrompt }); } } } addUserMessage(content: string) { if (this.api === "completions") { this.messages.push({ role: "user", content }); } else { this.messages.push({ role: "user", content: [{ type: "input_text", text: content }] }); } } addAssistantMessage(content: string) { if (this.api === "completions") { this.messages.push({ role: "assistant", content }); } else { this.messages.push({ type: "message", content: [{ type: "output_text", text: content }] }); } } addToolCall(id: string, name: string, args: string) { if (this.api === "completions") { // Add assistant message with tool calls this.messages.push({ role: "assistant", content: null, tool_calls: [{ id, type: "function" as const, function: { name, arguments: args } }] }); } else { // Add function call to responses format this.messages.push({ type: "function_call", call_id: id, name, arguments: args }); } } addToolResult(id: string, result: string) { if (this.api === "completions") { this.messages.push({ role: "tool", tool_call_id: id, content: result }); } else { this.messages.push({ type: "function_call_output", call_id: id, output: result }); } } // Serialize to JSON serialize(): string { const data: ConversationHistory = { api: this.api, model: "unknown", // Set externally systemPrompt: this.systemPrompt, messages: this.messages, totalTokens: this.totalTokens, metadata: { created: Date.now(), lastUpdated: Date.now(), provider: "openai" } }; return JSON.stringify(data, null, 2); } // Deserialize from JSON static deserialize(json: string): ConversationManager { const data: ConversationHistory = JSON.parse(json); const manager = new ConversationManager(data.api, data.systemPrompt); manager.messages = data.messages; manager.totalTokens = data.totalTokens; return manager; } getMessages() { return this.messages; } updateTokenUsage(tokens: number) { this.totalTokens += tokens; } } // Usage example const conversation = new ConversationManager("completions", "You are a helpful assistant"); conversation.addUserMessage("Hello"); conversation.addAssistantMessage("Hi there!"); conversation.updateTokenUsage(25); // Save to file const serialized = conversation.serialize(); await fs.writeFile("conversation.json", serialized); // Load from file const loaded = await fs.readFile("conversation.json", "utf-8"); const restored = ConversationManager.deserialize(loaded); ``` ### Event-Based History Reconstruction ```typescript // From pi-agent codebase - reconstruct messages from events type AgentEvent = | { type: "user_message"; text: string } | { type: "assistant_message"; text: string } | { type: "tool_call"; toolCallId: string; name: string; args: string } | { type: "tool_result"; toolCallId: string; result: string; isError: boolean } | { type: "reasoning"; text: string } | { type: "token_usage"; inputTokens: number; outputTokens: number; totalTokens: number }; function reconstructMessagesFromEvents( events: AgentEvent[], api: "completions" | "responses", systemPrompt?: string ): any[] { const messages: any[] = []; // Add system prompt if (systemPrompt) { if (api === "completions") { messages.push({ role: "system", content: systemPrompt }); } else { messages.push({ role: "developer", content: systemPrompt }); } } if (api === "responses") { // Responses API format reconstruction for (const event of events) { switch (event.type) { case "user_message": messages.push({ role: "user", content: [{ type: "input_text", text: event.text }] }); break; case "reasoning": messages.push({ type: "reasoning", content: [{ type: "reasoning_text", text: event.text }] }); break; case "tool_call": messages.push({ type: "function_call", call_id: event.toolCallId, name: event.name, arguments: event.args }); break; case "tool_result": messages.push({ type: "function_call_output", call_id: event.toolCallId, output: event.result }); break; case "assistant_message": messages.push({ type: "message", content: [{ type: "output_text", text: event.text }] }); break; } } } else { // Chat Completions format reconstruction let pendingToolCalls: any[] = []; for (const event of events) { switch (event.type) { case "user_message": messages.push({ role: "user", content: event.text }); break; case "tool_call": pendingToolCalls.push({ id: event.toolCallId, type: "function", function: { name: event.name, arguments: event.args } }); break; case "tool_result": // Add assistant message with tool calls when we see first result if (pendingToolCalls.length > 0) { messages.push({ role: "assistant", content: null, tool_calls: pendingToolCalls }); pendingToolCalls = []; } messages.push({ role: "tool", tool_call_id: event.toolCallId, content: event.result }); break; case "assistant_message": messages.push({ role: "assistant", content: event.text }); break; } } } return messages; } ``` ## Token Counting ### Usage Types from OpenAI SDK ```typescript // Chat Completions usage interface CompletionUsage { completion_tokens: number; prompt_tokens: number; total_tokens: number; completion_tokens_details?: { reasoning_tokens?: number; // o1/o3 reasoning tokens cached_tokens?: number; }; prompt_tokens_details?: { cached_tokens?: number; }; } // Responses API usage interface ResponseUsage { input_tokens: number; output_tokens: number; total_tokens: number; input_tokens_details: { cached_tokens?: number; }; output_tokens_details: { reasoning_tokens?: number; // o1/o3 reasoning tokens }; } ``` ### Token Counting Implementation ```typescript interface TokenUsage { inputTokens: number; outputTokens: number; totalTokens: number; reasoningTokens: number; cacheReadTokens: number; cacheWriteTokens: number; } class TokenCounter { private totalUsage: TokenUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0, reasoningTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }; // Extract tokens from Chat Completions response extractChatCompletionUsage(usage?: CompletionUsage): TokenUsage | null { if (!usage) return null; const extracted: TokenUsage = { inputTokens: usage.prompt_tokens || 0, outputTokens: usage.completion_tokens || 0, totalTokens: usage.total_tokens || 0, reasoningTokens: usage.completion_tokens_details?.reasoning_tokens || 0, cacheReadTokens: usage.prompt_tokens_details?.cached_tokens || 0, cacheWriteTokens: 0 // Not available in this format }; this.addUsage(extracted); return extracted; } // Extract tokens from Responses API response extractResponseUsage(usage?: ResponseUsage): TokenUsage | null { if (!usage) return null; const extracted: TokenUsage = { inputTokens: usage.input_tokens || 0, outputTokens: usage.output_tokens || 0, totalTokens: usage.total_tokens || 0, reasoningTokens: usage.output_tokens_details?.reasoning_tokens || 0, cacheReadTokens: usage.input_tokens_details?.cached_tokens || 0, cacheWriteTokens: 0 // Not available in current API }; this.addUsage(extracted); return extracted; } private addUsage(usage: TokenUsage) { this.totalUsage.inputTokens += usage.inputTokens; this.totalUsage.outputTokens += usage.outputTokens; this.totalUsage.totalTokens += usage.totalTokens; this.totalUsage.reasoningTokens += usage.reasoningTokens; this.totalUsage.cacheReadTokens += usage.cacheReadTokens; this.totalUsage.cacheWriteTokens += usage.cacheWriteTokens; } getTotalUsage(): TokenUsage { return { ...this.totalUsage }; } reset() { this.totalUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0, reasoningTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 0 }; } // Format for display formatUsage(usage?: TokenUsage): string { const u = usage || this.totalUsage; let parts = [`↑${u.inputTokens}`, `↓${u.outputTokens}`]; if (u.reasoningTokens > 0) { parts.push(`⚔${u.reasoningTokens}`); } if (u.cacheReadTokens > 0) { parts.push(`šŸ“–${u.cacheReadTokens}`); } if (u.cacheWriteTokens > 0) { parts.push(`šŸ“${u.cacheWriteTokens}`); } return parts.join(" "); } } // Usage with streaming async function countTokensInStream() { const tokenCounter = new TokenCounter(); const stream = await client.chat.completions.create({ model: "gpt-4o", messages: [{ role: "user", content: "Tell me about AI" }], stream: true, stream_options: { include_usage: true } // Important for token counts }); for await (const chunk of stream) { // Token usage comes in final chunk when stream_options.include_usage = true if (chunk.usage) { const usage = tokenCounter.extractChatCompletionUsage(chunk.usage); console.log("Token usage:", tokenCounter.formatUsage(usage)); } } console.log("Total usage:", tokenCounter.formatUsage()); } ``` ### Token Estimation (for planning) ```typescript // Rough token estimation for planning purposes function estimateTokens(text: string): number { // Very rough approximation: ~4 characters per token for English return Math.ceil(text.length / 4); } function estimateMessageTokens(messages: any[]): number { let total = 0; for (const message of messages) { if (typeof message.content === "string") { total += estimateTokens(message.content); } else if (Array.isArray(message.content)) { for (const content of message.content) { if (content.text) { total += estimateTokens(content.text); } } } // Add overhead for message formatting total += 10; } return total; } // Check if request will fit in context window function checkContextLimit(messages: any[], maxTokens: number = 128000): boolean { const estimated = estimateMessageTokens(messages); const safetyMargin = 1000; // Reserve tokens for response return estimated + safetyMargin < maxTokens; } ``` ## Caching ### Cache Headers and Configuration ```typescript // OpenAI supports prompt caching via special message formatting // Cache is automatically used when messages are repeated async function demonstrateCaching() { const longSystemPrompt = ` You are an expert software engineer with deep knowledge of TypeScript, React, Node.js... [Very long system prompt - 1000+ tokens] `; // First request - will cache the system prompt const response1 = await client.chat.completions.create({ model: "gpt-4o", messages: [ { role: "system", content: longSystemPrompt }, { role: "user", content: "Explain TypeScript generics" } ] }); console.log("First request usage:", response1.usage); // Second request with same system prompt - will use cache const response2 = await client.chat.completions.create({ model: "gpt-4o", messages: [ { role: "system", content: longSystemPrompt }, // Cached { role: "user", content: "Explain React hooks" } ] }); console.log("Second request usage:", response2.usage); console.log("Cache read tokens:", response2.usage?.prompt_tokens_details?.cached_tokens); } ``` ### Manual Cache Control ```typescript // For providers that support explicit cache control interface CacheConfig { enabled: boolean; ttl?: number; // Time to live in seconds } class CachedClient { private client: OpenAI; private cache = new Map(); constructor(apiKey: string, baseURL?: string) { this.client = new OpenAI({ apiKey, baseURL }); } private getCacheKey(messages: any[], model: string): string { return JSON.stringify({ messages, model }); } private isCacheValid(entry: { timestamp: number; ttl: number }): boolean { return Date.now() - entry.timestamp < entry.ttl * 1000; } async completionWithCache( messages: any[], model: string, cacheConfig: CacheConfig = { enabled: true, ttl: 3600 } ) { if (cacheConfig.enabled) { const cacheKey = this.getCacheKey(messages, model); const cached = this.cache.get(cacheKey); if (cached && this.isCacheValid(cached)) { console.log("Cache hit"); return cached.response; } } const response = await this.client.chat.completions.create({ model, messages }); if (cacheConfig.enabled) { const cacheKey = this.getCacheKey(messages, model); this.cache.set(cacheKey, { response, timestamp: Date.now(), ttl: cacheConfig.ttl || 3600 }); } return response; } clearCache() { this.cache.clear(); } } ``` ## Chat Completions vs Responses API ### When to Use Each API ```typescript // Chat Completions API - Traditional conversational interface // Use for: Most general chat/completion tasks interface ChatCompletionsUseCase { // āœ… Good for: // - Regular conversations // - Function/tool calling // - Most models (gpt-4o, claude, gemini via compatibility) // - Streaming text generation // - File uploads and vision // āŒ Limitations: // - No access to reasoning/thinking tokens for o1/o3 // - Less structured for complex workflows } // Responses API - Structured response interface // Use for: Complex reasoning tasks, tool workflows interface ResponsesAPIUseCase { // āœ… Good for: // - o1/o3 models with reasoning access // - Complex tool calling workflows // - Structured output requirements // - Background processing // - Access to reasoning tokens // āŒ Limitations: // - Newer API with less ecosystem support // - More complex message format // - Not all models supported } ``` ### API Decision Logic ```typescript function selectAPI( model: string, requiresReasoning: boolean, hasComplexTools: boolean ): "completions" | "responses" { // Use Responses API for o1/o3 when reasoning is needed if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) { return "responses"; } // Use Responses API for complex tool workflows if (hasComplexTools && model.includes("gpt-4")) { return "responses"; } // Default to Chat Completions for broader compatibility return "completions"; } // Usage example const model = "o1-mini"; const needsReasoning = true; const api = selectAPI(model, needsReasoning, false); if (api === "responses") { console.log("Using Responses API for reasoning access"); } else { console.log("Using Chat Completions API for compatibility"); } ``` ### Dual API Client ```typescript class DualAPIClient { private client: OpenAI; constructor(apiKey: string, baseURL?: string) { this.client = new OpenAI({ apiKey, baseURL }); } async complete(params: { model: string; messages: any[]; tools?: any[]; maxTokens?: number; temperature?: number; stream?: boolean; reasoning?: boolean; }) { const api = this.selectAPI(params.model, params.reasoning || false); if (api === "responses") { return this.callResponsesAPI(params); } else { return this.callChatCompletionsAPI(params); } } private selectAPI(model: string, requiresReasoning: boolean): "completions" | "responses" { if ((model.includes("o1") || model.includes("o3")) && requiresReasoning) { return "responses"; } return "completions"; } private async callChatCompletionsAPI(params: any) { const requestParams = { model: params.model, messages: params.messages, max_completion_tokens: params.maxTokens, temperature: params.temperature, tools: params.tools, stream: params.stream }; if (params.stream) { return this.client.chat.completions.create(requestParams); } else { return this.client.chat.completions.create(requestParams); } } private async callResponsesAPI(params: any) { // Convert messages to Responses API format const input = params.messages.map((msg: any) => { if (msg.role === "user") { return { role: "user", content: [{ type: "input_text", text: msg.content }] }; } else if (msg.role === "system") { return { role: "developer", content: msg.content }; } return msg; }); const requestParams = { model: params.model, input, max_output_tokens: params.maxTokens, tools: params.tools, stream: params.stream, reasoning: params.reasoning ? { effort: "low" } : undefined }; return this.client.responses.create(requestParams); } } ``` ## Tool/Function Calling ### Tool Definition Format ```typescript // OpenAI tool definition format (JSON Schema) interface ToolDefinition { type: "function"; function: { name: string; description: string; parameters: { type: "object"; properties: Record; required: string[]; }; }; } // Example tool definitions const tools: ToolDefinition[] = [ { type: "function", function: { name: "read_file", description: "Read the contents of a file", parameters: { type: "object", properties: { path: { type: "string", description: "The file path to read" } }, required: ["path"] } } }, { type: "function", function: { name: "execute_command", description: "Execute a shell command", parameters: { type: "object", properties: { command: { type: "string", description: "The command to execute" }, timeout: { type: "number", description: "Timeout in seconds", default: 30 } }, required: ["command"] } } } ]; ``` ### Tool Execution Engine ```typescript type ToolFunction = (args: any) => Promise; class ToolExecutor { private tools = new Map(); register(name: string, fn: ToolFunction) { this.tools.set(name, fn); } async execute(name: string, argsJson: string): Promise { const tool = this.tools.get(name); if (!tool) { throw new Error(`Unknown tool: ${name}`); } try { const args = JSON.parse(argsJson); return await tool(args); } catch (error) { throw new Error(`Tool execution failed: ${error.message}`); } } getAvailableTools(): string[] { return Array.from(this.tools.keys()); } } // Register tool implementations const toolExecutor = new ToolExecutor(); toolExecutor.register("read_file", async (args: { path: string }) => { const fs = await import("fs/promises"); try { const content = await fs.readFile(args.path, "utf-8"); return content; } catch (error) { return `Error reading file: ${error.message}`; } }); toolExecutor.register("execute_command", async (args: { command: string; timeout?: number }) => { const { exec } = await import("child_process"); const { promisify } = await import("util"); const execAsync = promisify(exec); try { const { stdout, stderr } = await execAsync(args.command, { timeout: (args.timeout || 30) * 1000 }); return stdout + (stderr ? `\nSTDERR: ${stderr}` : ""); } catch (error) { return `Command failed: ${error.message}`; } }); ``` ### Complete Tool Calling Flow ```typescript async function completeChatWithTools(userMessage: string) { const conversation = new ConversationManager("completions", "You are a helpful assistant with file system access."); const tokenCounter = new TokenCounter(); conversation.addUserMessage(userMessage); while (true) { const response = await client.chat.completions.create({ model: "gpt-4o", messages: conversation.getMessages(), tools, tool_choice: "auto", max_completion_tokens: 1000 }); // Track token usage if (response.usage) { tokenCounter.extractChatCompletionUsage(response.usage); } const message = response.choices[0].message; if (message.tool_calls && message.tool_calls.length > 0) { // Add assistant message with tool calls to conversation conversation.getMessages().push({ role: "assistant", content: message.content, tool_calls: message.tool_calls }); // Execute each tool call for (const toolCall of message.tool_calls) { console.log(`šŸ”§ Calling ${toolCall.function.name}...`); try { const result = await toolExecutor.execute( toolCall.function.name, toolCall.function.arguments ); console.log(`āœ… Tool result: ${result.substring(0, 100)}...`); conversation.addToolResult(toolCall.id, result); } catch (error) { console.log(`āŒ Tool error: ${error.message}`); conversation.addToolResult(toolCall.id, `Error: ${error.message}`); } } // Continue conversation with tool results continue; } else { // Final response const content = message.content || ""; conversation.addAssistantMessage(content); console.log("šŸ¤– Assistant:", content); console.log("šŸ“Š Token usage:", tokenCounter.formatUsage()); return content; } } } // Usage await completeChatWithTools("Read the package.json file and tell me about this project"); ``` ### Streaming Tool Calls ```typescript async function streamingToolCalls(userMessage: string) { const stream = await client.chat.completions.create({ model: "gpt-4o", messages: [{ role: "user", content: userMessage }], tools, tool_choice: "auto", stream: true }); let currentToolCalls: Map = new Map(); let assistantMessage = ""; for await (const chunk of stream) { const choice = chunk.choices[0]; if (!choice) continue; const delta = choice.delta; // Regular content if (delta.content) { assistantMessage += delta.content; process.stdout.write(delta.content); } // Tool call deltas if (delta.tool_calls) { for (const toolCallDelta of delta.tool_calls) { const id = toolCallDelta.id; if (!id) continue; if (!currentToolCalls.has(id)) { currentToolCalls.set(id, { name: "", args: "" }); } const toolCall = currentToolCalls.get(id)!; if (toolCallDelta.function?.name) { toolCall.name += toolCallDelta.function.name; } if (toolCallDelta.function?.arguments) { toolCall.args += toolCallDelta.function.arguments; } } } // When finished, execute accumulated tool calls if (choice.finish_reason === "tool_calls") { console.log("\nšŸ”§ Executing tools..."); for (const [id, toolCall] of currentToolCalls) { try { const result = await toolExecutor.execute(toolCall.name, toolCall.args); console.log(`āœ… ${toolCall.name}: ${result.substring(0, 100)}...`); } catch (error) { console.log(`āŒ ${toolCall.name}: ${error.message}`); } } break; } } } ``` ### Responses API Tool Calling ```typescript async function responsesAPIToolCalling() { const response = await client.responses.create({ model: "gpt-4o", input: [ { role: "user", content: [{ type: "input_text", text: "List files in current directory" }] } ], tools: [ { type: "function", function: { name: "list_directory", description: "List files in a directory", parameters: { type: "object", properties: { path: { type: "string", description: "Directory path" } }, required: ["path"] } } } ] }); for (const item of response.output || []) { switch (item.type) { case "function_call": console.log(`šŸ”§ Tool call: ${item.name}`); console.log(`šŸ“ Arguments: ${item.arguments}`); try { const result = await toolExecutor.execute(item.name, item.arguments); console.log(`āœ… Result: ${result}`); // In a real implementation, you'd add this result back to the conversation // and continue the response } catch (error) { console.log(`āŒ Error: ${error.message}`); } break; case "message": for (const content of item.content || []) { if (content.type === "output_text") { console.log("šŸ¤– Response:", content.text); } } break; } } } ``` ## System Prompts ### System Prompt Handling by Model Type ```typescript interface SystemPromptConfig { content: string; role: "system" | "developer"; // Different models use different roles } function formatSystemPrompt(prompt: string, model: string, api: "completions" | "responses"): any { // Chat Completions API if (api === "completions") { // Most models use "system" role if (model.includes("claude") || model.includes("gemini")) { // Some providers via OpenAI compatibility might expect "system" return { role: "system", content: prompt }; } // OpenAI native models return { role: "system", content: prompt }; } // Responses API uses "developer" role for system messages return { role: "developer", content: prompt }; } // System prompt best practices const systemPrompts = { // General assistant assistant: "You are a helpful, accurate, and reliable AI assistant. Provide clear, concise, and helpful responses.", // Code assistant coder: `You are an expert software engineer with deep knowledge of multiple programming languages, frameworks, and best practices. Key principles: - Write clean, maintainable, and well-documented code - Follow language-specific conventions and best practices - Explain your reasoning and trade-offs - Suggest improvements and alternatives when appropriate - Always test your code mentally before providing it When helping with code: 1. Understand the requirements fully 2. Choose appropriate tools and patterns 3. Provide working, tested solutions 4. Explain key concepts and decisions`, // Research assistant researcher: `You are a thorough research assistant. When answering questions: 1. Provide accurate, well-sourced information 2. Acknowledge limitations in your knowledge 3. Structure responses clearly with headings and bullet points 4. Cite sources when possible 5. Distinguish between facts, analysis, and opinions 6. Ask clarifying questions when the request is ambiguous`, // Tool-enabled assistant toolEnabled: `You are an AI assistant with access to various tools for file operations, web searches, and code execution. Guidelines for tool use: - Use tools when they would be helpful to answer the user's question - Always explain what you're doing before calling a tool - Interpret and summarize tool results for the user - If a tool fails, try alternative approaches - Be transparent about what information comes from tools vs your training Available capabilities: - Read and write files - Execute shell commands - Search the web - Analyze code and data` }; ``` ### Dynamic System Prompt Building ```typescript class SystemPromptBuilder { private sections: string[] = []; addRole(role: string): this { this.sections.push(`You are ${role}.`); return this; } addCapabilities(capabilities: string[]): this { if (capabilities.length > 0) { this.sections.push(`You have access to: ${capabilities.join(", ")}.`); } return this; } addGuidelines(guidelines: string[]): this { if (guidelines.length > 0) { this.sections.push("Guidelines:\n" + guidelines.map(g => `- ${g}`).join("\n")); } return this; } addContext(context: string): this { if (context.trim()) { this.sections.push(`Context: ${context}`); } return this; } build(): string { return this.sections.join("\n\n"); } reset(): this { this.sections = []; return this; } } // Usage examples const codeAssistantPrompt = new SystemPromptBuilder() .addRole("an expert TypeScript developer") .addCapabilities(["file system access", "code execution", "documentation lookup"]) .addGuidelines([ "Write clean, type-safe code", "Explain complex concepts clearly", "Suggest best practices", "Test code before providing it" ]) .build(); const customerServicePrompt = new SystemPromptBuilder() .addRole("a helpful customer service representative") .addGuidelines([ "Be polite and professional", "Listen carefully to customer concerns", "Provide accurate information", "Escalate complex issues when needed" ]) .addContext("You work for TechCorp, a software company that makes productivity tools.") .build(); ``` ### Model-Specific System Prompt Optimization ```typescript function optimizeSystemPromptForModel(basePrompt: string, model: string): string { // OpenAI models (especially o1/o3) work well with detailed, structured prompts if (model.includes("gpt") || model.includes("o1") || model.includes("o3")) { return `${basePrompt} Think step by step when solving complex problems. Show your reasoning process clearly.`; } // Claude models prefer more conversational, principle-based prompts if (model.includes("claude")) { return `${basePrompt} I value helpful, harmless, and honest responses. Please be thoughtful and thorough in your analysis.`; } // Gemini models work well with structured instructions if (model.includes("gemini")) { return `${basePrompt} Please structure your responses clearly and provide specific, actionable advice.`; } // Default: return as-is return basePrompt; } // Provider-specific prompt injection handling function detectAndMitigatePromptInjection(userInput: string): { safe: boolean; cleaned?: string } { const injectionPatterns = [ /ignore.*previous.*instruction/i, /forget.*system.*prompt/i, /act.*as.*different/i, /pretend.*you.*are/i, /new.*role.*now/i ]; for (const pattern of injectionPatterns) { if (pattern.test(userInput)) { return { safe: false, cleaned: userInput.replace(pattern, "[FILTERED]") }; } } return { safe: true }; } ``` ## Provider-Specific Features ### Reasoning Support Detection ```typescript // From pi-agent codebase - detect and handle reasoning support per provider type Provider = "openai" | "gemini" | "groq" | "anthropic" | "openrouter" | "other"; function detectProvider(baseURL?: string): Provider { if (!baseURL) return "openai"; if (baseURL.includes("api.openai.com")) return "openai"; if (baseURL.includes("generativelanguage.googleapis.com")) return "gemini"; if (baseURL.includes("api.groq.com")) return "groq"; if (baseURL.includes("api.anthropic.com")) return "anthropic"; if (baseURL.includes("openrouter.ai")) return "openrouter"; return "other"; } // Provider-specific reasoning parameter handling function adjustRequestForReasoning( requestOptions: any, api: "completions" | "responses", provider: Provider, supportsReasoning: boolean ): any { if (!supportsReasoning) return requestOptions; switch (provider) { case "openai": // OpenAI standard format if (api === "responses") { requestOptions.reasoning = { effort: "low", summary: "detailed" }; } else { requestOptions.reasoning_effort = "low"; } break; case "gemini": // Gemini uses extra_body for thinking configuration if (api === "completions") { requestOptions.extra_body = { google: { thinking_config: { thinking_budget: 1024, include_thoughts: true } } }; // Remove reasoning_effort when using thinking_config delete requestOptions.reasoning_effort; } break; case "groq": // Groq uses reasoning_format for Chat Completions if (api === "completions") { requestOptions.reasoning_format = "parsed"; requestOptions.reasoning_effort = "low"; } else { // Groq Responses API doesn't support reasoning.summary requestOptions.reasoning = { effort: "low" }; } break; case "openrouter": // OpenRouter unified reasoning format if (api === "completions") { requestOptions.reasoning = { effort: "low" }; delete requestOptions.reasoning_effort; } break; default: // Standard OpenAI format for others if (api === "responses") { requestOptions.reasoning = { effort: "low" }; } else { requestOptions.reasoning_effort = "low"; } } return requestOptions; } ``` ### Provider-Specific Response Parsing ```typescript // Extract reasoning content from provider-specific response formats function parseReasoningFromMessage(message: any, provider: Provider): { cleanContent: string; reasoningTexts: string[]; } { const reasoningTexts: string[] = []; let cleanContent = message.content || ""; switch (provider) { case "gemini": // Gemini returns thinking in tags if (cleanContent.includes("")) { const thoughtMatches = cleanContent.matchAll(/([\s\S]*?)<\/thought>/g); for (const match of thoughtMatches) { reasoningTexts.push(match[1].trim()); } // Remove thought tags from response cleanContent = cleanContent.replace(/[\s\S]*?<\/thought>/g, "").trim(); } break; case "groq": // Groq returns reasoning in separate field if (message.reasoning) { reasoningTexts.push(message.reasoning); } break; case "openrouter": // OpenRouter uses message.reasoning field if (message.reasoning) { reasoningTexts.push(message.reasoning); } break; default: // OpenAI and others handle reasoning via events break; } return { cleanContent, reasoningTexts }; } ``` ### Provider-Specific Error Handling ```typescript function handleProviderSpecificErrors(error: any, provider: Provider): Error { switch (provider) { case "groq": if (error.message?.includes("reasoning_format")) { return new Error("Reasoning not supported by this Groq model"); } break; case "gemini": if (error.message?.includes("thinking_config")) { return new Error("Thinking mode not supported by this Gemini model"); } break; case "anthropic": if (error.message?.includes("reasoning")) { return new Error("Reasoning not available via Anthropic's OpenAI compatibility layer"); } break; case "openrouter": // OpenRouter passes through underlying provider errors if (error.message?.includes("not supported")) { return new Error("Feature not supported by the selected model on OpenRouter"); } break; } return error; } ``` ## Complete Implementation Examples ### Basic Chat Client ```typescript import OpenAI from "openai"; import type { ChatCompletionMessageParam } from "openai/resources/chat/completions"; class BasicChatClient { private client: OpenAI; private messages: ChatCompletionMessageParam[] = []; constructor(apiKey: string, baseURL?: string, systemPrompt?: string) { this.client = new OpenAI({ apiKey, baseURL }); if (systemPrompt) { this.messages.push({ role: "system", content: systemPrompt }); } } async chat(userMessage: string): Promise { this.messages.push({ role: "user", content: userMessage }); try { const response = await this.client.chat.completions.create({ model: "gpt-4o", messages: this.messages, max_completion_tokens: 1000, temperature: 0.7 }); const assistantMessage = response.choices[0]?.message?.content || ""; this.messages.push({ role: "assistant", content: assistantMessage }); return assistantMessage; } catch (error) { console.error("Chat error:", error); throw error; } } getHistory(): ChatCompletionMessageParam[] { return [...this.messages]; } clearHistory(): void { this.messages = this.messages.filter(m => m.role === "system"); } } ``` ### Advanced Streaming Client with All Features ```typescript import OpenAI from "openai"; import type { ChatCompletionCreateParamsStreaming, ChatCompletionChunk } from "openai/resources/chat/completions"; interface StreamingClientConfig { apiKey: string; baseURL?: string; model: string; systemPrompt?: string; tools?: any[]; maxTokens?: number; temperature?: number; } interface StreamEvent { type: "content" | "tool_call" | "reasoning" | "usage" | "error" | "complete"; data: any; } class AdvancedStreamingClient { private client: OpenAI; private config: StreamingClientConfig; private messages: any[] = []; private abortController: AbortController | null = null; private tokenCounter = new TokenCounter(); constructor(config: StreamingClientConfig) { this.config = config; this.client = new OpenAI({ apiKey: config.apiKey, baseURL: config.baseURL }); if (config.systemPrompt) { this.messages.push({ role: "system", content: config.systemPrompt }); } } async *streamChat(userMessage: string): AsyncGenerator { this.messages.push({ role: "user", content: userMessage }); this.abortController = new AbortController(); try { const params: ChatCompletionCreateParamsStreaming = { model: this.config.model, messages: this.messages, stream: true, max_completion_tokens: this.config.maxTokens || 1000, temperature: this.config.temperature || 0.7, tools: this.config.tools, tool_choice: this.config.tools ? "auto" : undefined, stream_options: { include_usage: true } }; const stream = await this.client.chat.completions.create(params, { signal: this.abortController.signal }); let assistantContent = ""; let currentToolCalls = new Map(); for await (const chunk of stream) { if (this.abortController.signal.aborted) break; const choice = chunk.choices[0]; if (!choice) continue; // Handle content if (choice.delta?.content) { assistantContent += choice.delta.content; yield { type: "content", data: { delta: choice.delta.content, content: assistantContent } }; } // Handle tool calls if (choice.delta?.tool_calls) { for (const toolCall of choice.delta.tool_calls) { if (!toolCall.id) continue; if (!currentToolCalls.has(toolCall.id)) { currentToolCalls.set(toolCall.id, { id: toolCall.id, name: "", arguments: "" }); } const call = currentToolCalls.get(toolCall.id); if (toolCall.function?.name) { call.name += toolCall.function.name; } if (toolCall.function?.arguments) { call.arguments += toolCall.function.arguments; } yield { type: "tool_call", data: { id: toolCall.id, delta: toolCall, current: call } }; } } // Handle usage if (chunk.usage) { const usage = this.tokenCounter.extractChatCompletionUsage(chunk.usage); yield { type: "usage", data: usage }; } // Handle completion if (choice.finish_reason) { if (choice.finish_reason === "tool_calls") { // Execute tool calls const toolResults = await this.executeToolCalls(Array.from(currentToolCalls.values())); // Add messages and continue this.messages.push({ role: "assistant", content: assistantContent || null, tool_calls: Array.from(currentToolCalls.values()).map(call => ({ id: call.id, type: "function", function: { name: call.name, arguments: call.arguments } })) }); for (const result of toolResults) { this.messages.push({ role: "tool", tool_call_id: result.id, content: result.content }); } // Continue stream for final response yield* this.streamChat(""); return; } else { // Regular completion if (assistantContent) { this.messages.push({ role: "assistant", content: assistantContent }); } yield { type: "complete", data: { reason: choice.finish_reason, content: assistantContent } }; } } } } catch (error) { yield { type: "error", data: { error: error.message } }; } finally { this.abortController = null; } } private async executeToolCalls(toolCalls: any[]): Promise> { const results = []; for (const call of toolCalls) { try { // Tool execution would be implemented here const result = await this.executeTool(call.name, call.arguments); results.push({ id: call.id, content: result }); } catch (error) { results.push({ id: call.id, content: `Error: ${error.message}` }); } } return results; } private async executeTool(name: string, argsJson: string): Promise { // Implement tool execution logic return `Tool ${name} executed with args: ${argsJson}`; } interrupt(): void { this.abortController?.abort(); } getUsage() { return this.tokenCounter.getTotalUsage(); } } // Usage example const client = new AdvancedStreamingClient({ apiKey: process.env.OPENAI_API_KEY!, model: "gpt-4o", systemPrompt: "You are a helpful assistant.", tools: [/* tool definitions */] }); for await (const event of client.streamChat("Help me write a TypeScript function")) { switch (event.type) { case "content": process.stdout.write(event.data.delta); break; case "tool_call": console.log(`\nšŸ”§ Tool: ${event.data.current.name}`); break; case "usage": console.log(`\nšŸ“Š Tokens: ${event.data.totalTokens}`); break; case "complete": console.log(`\nāœ… Complete (${event.data.reason})`); break; case "error": console.log(`\nāŒ Error: ${event.data.error}`); break; } } ``` This comprehensive guide covers all the essential features needed to implement a robust OpenAI SDK integration. Each section provides working code examples, actual types from the SDK, and real-world patterns from the pi-mono codebase. ## Key Takeaways 1. **Always use AbortController** for request cancellation 2. **Handle both Chat Completions and Responses APIs** depending on model capabilities 3. **Implement comprehensive error handling** with proper error types 4. **Track token usage** for cost management and optimization 5. **Support streaming** for better user experience 6. **Handle provider-specific features** like reasoning and caching 7. **Implement proper tool calling workflows** for agentic applications 8. **Serialize conversation state** for session persistence 9. **Use appropriate system prompts** for different model types 10. **Test reasoning support** dynamically for each provider/model combination