import type OpenAI from "openai"; import type { Tool as OpenAITool, ResponseCreateParamsStreaming, ResponseFunctionToolCall, ResponseInput, ResponseInputContent, ResponseInputImage, ResponseInputText, ResponseOutputMessage, ResponseReasoningItem, ResponseStreamEvent, } from "openai/resources/responses/responses.js"; import { calculateCost } from "../models.js"; import type { Api, AssistantMessage, Context, ImageContent, Model, StopReason, TextContent, TextSignatureV1, ThinkingContent, Tool, ToolCall, Usage, } from "../types.js"; import type { AssistantMessageEventStream } from "../utils/event-stream.js"; import { shortHash } from "../utils/hash.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { transformMessages } from "./transform-messages.js"; // ============================================================================= // Utilities // ============================================================================= function encodeTextSignatureV1(id: string, phase?: TextSignatureV1["phase"]): string { const payload: TextSignatureV1 = { v: 1, id }; if (phase) payload.phase = phase; return JSON.stringify(payload); } function parseTextSignature( signature: string | undefined, ): { id: string; phase?: TextSignatureV1["phase"] } | undefined { if (!signature) return undefined; if (signature.startsWith("{")) { try { const parsed = JSON.parse(signature) as Partial; if (parsed.v === 1 && typeof parsed.id === "string") { if (parsed.phase === "commentary" || parsed.phase === "final_answer") { return { id: parsed.id, phase: parsed.phase }; } return { id: parsed.id }; } } catch { // Fall through to legacy plain-string handling. } } return { id: signature }; } export interface OpenAIResponsesStreamOptions { serviceTier?: ResponseCreateParamsStreaming["service_tier"]; applyServiceTierPricing?: ( usage: Usage, serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined, ) => void; } export interface ConvertResponsesMessagesOptions { includeSystemPrompt?: boolean; } export interface ConvertResponsesToolsOptions { strict?: boolean | null; } // ============================================================================= // Message conversion // ============================================================================= export function convertResponsesMessages( model: Model, context: Context, allowedToolCallProviders: ReadonlySet, options?: ConvertResponsesMessagesOptions, ): ResponseInput { const messages: ResponseInput = []; const normalizeToolCallId = (id: string): string => { if (!allowedToolCallProviders.has(model.provider)) return id; if (!id.includes("|")) return id; const [callId, itemId] = id.split("|"); const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_"); let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_"); // OpenAI Responses API requires item id to start with "fc" if (!sanitizedItemId.startsWith("fc")) { sanitizedItemId = `fc_${sanitizedItemId}`; } // Truncate to 64 chars and strip trailing underscores (OpenAI Codex rejects them) let normalizedCallId = sanitizedCallId.length > 64 ? sanitizedCallId.slice(0, 64) : sanitizedCallId; let normalizedItemId = sanitizedItemId.length > 64 ? sanitizedItemId.slice(0, 64) : sanitizedItemId; normalizedCallId = normalizedCallId.replace(/_+$/, ""); normalizedItemId = normalizedItemId.replace(/_+$/, ""); return `${normalizedCallId}|${normalizedItemId}`; }; const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId); const includeSystemPrompt = options?.includeSystemPrompt ?? true; if (includeSystemPrompt && context.systemPrompt) { const role = model.reasoning ? "developer" : "system"; messages.push({ role, content: sanitizeSurrogates(context.systemPrompt), }); } let msgIndex = 0; for (const msg of transformedMessages) { if (msg.role === "user") { if (typeof msg.content === "string") { messages.push({ role: "user", content: [{ type: "input_text", text: sanitizeSurrogates(msg.content) }], }); } else { const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => { if (item.type === "text") { return { type: "input_text", text: sanitizeSurrogates(item.text), } satisfies ResponseInputText; } return { type: "input_image", detail: "auto", image_url: `data:${item.mimeType};base64,${item.data}`, } satisfies ResponseInputImage; }); const filteredContent = !model.input.includes("image") ? content.filter((c) => c.type !== "input_image") : content; if (filteredContent.length === 0) continue; messages.push({ role: "user", content: filteredContent, }); } } else if (msg.role === "assistant") { const output: ResponseInput = []; const assistantMsg = msg as AssistantMessage; const isDifferentModel = assistantMsg.model !== model.id && assistantMsg.provider === model.provider && assistantMsg.api === model.api; for (const block of msg.content) { if (block.type === "thinking") { if (block.thinking.trim().length === 0) continue; if (block.thinkingSignature) { const reasoningItem = JSON.parse(block.thinkingSignature) as ResponseReasoningItem; output.push(reasoningItem); } } else if (block.type === "text") { const textBlock = block as TextContent; const parsedSignature = parseTextSignature(textBlock.textSignature); // OpenAI requires id to be max 64 characters let msgId = parsedSignature?.id; if (!msgId) { msgId = `msg_${msgIndex}`; } else if (msgId.length > 64) { msgId = `msg_${shortHash(msgId)}`; } output.push({ type: "message", role: "assistant", content: [{ type: "output_text", text: sanitizeSurrogates(textBlock.text), annotations: [] }], status: "completed", id: msgId, phase: parsedSignature?.phase, } satisfies ResponseOutputMessage); } else if (block.type === "toolCall") { const toolCall = block as ToolCall; const [callId, itemIdRaw] = toolCall.id.split("|"); let itemId: string | undefined = itemIdRaw; // For different-model messages, set id to undefined to avoid pairing validation. // OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items. // By omitting the id, we avoid triggering that validation (like cross-provider does). if (isDifferentModel && itemId?.startsWith("fc_")) { itemId = undefined; } output.push({ type: "function_call", id: itemId, call_id: callId, name: toolCall.name, arguments: JSON.stringify(toolCall.arguments), }); } } if (output.length === 0) continue; messages.push(...output); } else if (msg.role === "toolResult") { // Extract text and image content const textResult = msg.content .filter((c): c is TextContent => c.type === "text") .map((c) => c.text) .join("\n"); const hasImages = msg.content.some((c): c is ImageContent => c.type === "image"); // Always send function_call_output with text (or placeholder if only images) const hasText = textResult.length > 0; const [callId] = msg.toolCallId.split("|"); messages.push({ type: "function_call_output", call_id: callId, output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"), }); // If there are images and model supports them, send a follow-up user message with images if (hasImages && model.input.includes("image")) { const contentParts: ResponseInputContent[] = []; // Add text prefix contentParts.push({ type: "input_text", text: "Attached image(s) from tool result:", } satisfies ResponseInputText); // Add images for (const block of msg.content) { if (block.type === "image") { contentParts.push({ type: "input_image", detail: "auto", image_url: `data:${block.mimeType};base64,${block.data}`, } satisfies ResponseInputImage); } } messages.push({ role: "user", content: contentParts, }); } } msgIndex++; } return messages; } // ============================================================================= // Tool conversion // ============================================================================= export function convertResponsesTools(tools: Tool[], options?: ConvertResponsesToolsOptions): OpenAITool[] { const strict = options?.strict === undefined ? false : options.strict; return tools.map((tool) => ({ type: "function", name: tool.name, description: tool.description, parameters: tool.parameters as any, // TypeBox already generates JSON Schema strict, })); } // ============================================================================= // Stream processing // ============================================================================= export async function processResponsesStream( openaiStream: AsyncIterable, output: AssistantMessage, stream: AssistantMessageEventStream, model: Model, options?: OpenAIResponsesStreamOptions, ): Promise { let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null; let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null; const blocks = output.content; const blockIndex = () => blocks.length - 1; for await (const event of openaiStream) { if (event.type === "response.output_item.added") { const item = event.item; if (item.type === "reasoning") { currentItem = item; currentBlock = { type: "thinking", thinking: "" }; output.content.push(currentBlock); stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output }); } else if (item.type === "message") { currentItem = item; currentBlock = { type: "text", text: "" }; output.content.push(currentBlock); stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output }); } else if (item.type === "function_call") { currentItem = item; currentBlock = { type: "toolCall", id: `${item.call_id}|${item.id}`, name: item.name, arguments: {}, partialJson: item.arguments || "", }; output.content.push(currentBlock); stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output }); } } else if (event.type === "response.reasoning_summary_part.added") { if (currentItem && currentItem.type === "reasoning") { currentItem.summary = currentItem.summary || []; currentItem.summary.push(event.part); } } else if (event.type === "response.reasoning_summary_text.delta") { if (currentItem?.type === "reasoning" && currentBlock?.type === "thinking") { currentItem.summary = currentItem.summary || []; const lastPart = currentItem.summary[currentItem.summary.length - 1]; if (lastPart) { currentBlock.thinking += event.delta; lastPart.text += event.delta; stream.push({ type: "thinking_delta", contentIndex: blockIndex(), delta: event.delta, partial: output, }); } } } else if (event.type === "response.reasoning_summary_part.done") { if (currentItem?.type === "reasoning" && currentBlock?.type === "thinking") { currentItem.summary = currentItem.summary || []; const lastPart = currentItem.summary[currentItem.summary.length - 1]; if (lastPart) { currentBlock.thinking += "\n\n"; lastPart.text += "\n\n"; stream.push({ type: "thinking_delta", contentIndex: blockIndex(), delta: "\n\n", partial: output, }); } } } else if (event.type === "response.content_part.added") { if (currentItem?.type === "message") { currentItem.content = currentItem.content || []; // Filter out ReasoningText, only accept output_text and refusal if (event.part.type === "output_text" || event.part.type === "refusal") { currentItem.content.push(event.part); } } } else if (event.type === "response.output_text.delta") { if (currentItem?.type === "message" && currentBlock?.type === "text") { if (!currentItem.content || currentItem.content.length === 0) { continue; } const lastPart = currentItem.content[currentItem.content.length - 1]; if (lastPart?.type === "output_text") { currentBlock.text += event.delta; lastPart.text += event.delta; stream.push({ type: "text_delta", contentIndex: blockIndex(), delta: event.delta, partial: output, }); } } } else if (event.type === "response.refusal.delta") { if (currentItem?.type === "message" && currentBlock?.type === "text") { if (!currentItem.content || currentItem.content.length === 0) { continue; } const lastPart = currentItem.content[currentItem.content.length - 1]; if (lastPart?.type === "refusal") { currentBlock.text += event.delta; lastPart.refusal += event.delta; stream.push({ type: "text_delta", contentIndex: blockIndex(), delta: event.delta, partial: output, }); } } } else if (event.type === "response.function_call_arguments.delta") { if (currentItem?.type === "function_call" && currentBlock?.type === "toolCall") { currentBlock.partialJson += event.delta; currentBlock.arguments = parseStreamingJson(currentBlock.partialJson); stream.push({ type: "toolcall_delta", contentIndex: blockIndex(), delta: event.delta, partial: output, }); } } else if (event.type === "response.function_call_arguments.done") { if (currentItem?.type === "function_call" && currentBlock?.type === "toolCall") { currentBlock.partialJson = event.arguments; currentBlock.arguments = parseStreamingJson(currentBlock.partialJson); } } else if (event.type === "response.output_item.done") { const item = event.item; if (item.type === "reasoning" && currentBlock?.type === "thinking") { currentBlock.thinking = item.summary?.map((s) => s.text).join("\n\n") || ""; currentBlock.thinkingSignature = JSON.stringify(item); stream.push({ type: "thinking_end", contentIndex: blockIndex(), content: currentBlock.thinking, partial: output, }); currentBlock = null; } else if (item.type === "message" && currentBlock?.type === "text") { currentBlock.text = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""); currentBlock.textSignature = encodeTextSignatureV1(item.id, item.phase ?? undefined); stream.push({ type: "text_end", contentIndex: blockIndex(), content: currentBlock.text, partial: output, }); currentBlock = null; } else if (item.type === "function_call") { const args = currentBlock?.type === "toolCall" && currentBlock.partialJson ? parseStreamingJson(currentBlock.partialJson) : parseStreamingJson(item.arguments || "{}"); const toolCall: ToolCall = { type: "toolCall", id: `${item.call_id}|${item.id}`, name: item.name, arguments: args, }; currentBlock = null; stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output }); } } else if (event.type === "response.completed") { const response = event.response; if (response?.usage) { const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0; output.usage = { // OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input input: (response.usage.input_tokens || 0) - cachedTokens, output: response.usage.output_tokens || 0, cacheRead: cachedTokens, cacheWrite: 0, totalTokens: response.usage.total_tokens || 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; } calculateCost(model, output.usage); if (options?.applyServiceTierPricing) { const serviceTier = response?.service_tier ?? options.serviceTier; options.applyServiceTierPricing(output.usage, serviceTier); } // Map status to stop reason output.stopReason = mapStopReason(response?.status); if (output.content.some((b) => b.type === "toolCall") && output.stopReason === "stop") { output.stopReason = "toolUse"; } } else if (event.type === "error") { throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error"); } else if (event.type === "response.failed") { throw new Error("Unknown error"); } } } function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason { if (!status) return "stop"; switch (status) { case "completed": return "stop"; case "incomplete": return "length"; case "failed": case "cancelled": return "error"; // These two are wonky ... case "in_progress": case "queued": return "stop"; default: { const _exhaustive: never = status; throw new Error(`Unhandled stop reason: ${_exhaustive}`); } } }