refactor(ai): share openai responses logic

This commit is contained in:
Markus Ylisiurunen 2026-01-22 01:27:33 +02:00 committed by Mario Zechner
parent 085c378d34
commit 284ff81035
3 changed files with 445 additions and 888 deletions

View file

@ -0,0 +1,426 @@
import type OpenAI from "openai";
import type {
Tool as OpenAITool,
ResponseCreateParamsStreaming,
ResponseFunctionToolCall,
ResponseInput,
ResponseInputContent,
ResponseInputImage,
ResponseInputText,
ResponseOutputMessage,
ResponseReasoningItem,
ResponseStreamEvent,
} from "openai/resources/responses/responses.js";
import { calculateCost } from "../models.js";
import type {
Api,
AssistantMessage,
Context,
ImageContent,
Model,
StopReason,
TextContent,
ThinkingContent,
Tool,
ToolCall,
Usage,
} from "../types.js";
import type { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { transformMessages } from "./transform-messages.js";
/** Fast deterministic hash to shorten long strings */
function shortHash(str: string): string {
let h1 = 0xdeadbeef;
let h2 = 0x41c6ce57;
for (let i = 0; i < str.length; i++) {
const ch = str.charCodeAt(i);
h1 = Math.imul(h1 ^ ch, 2654435761);
h2 = Math.imul(h2 ^ ch, 1597334677);
}
h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
}
export interface OpenAIResponsesStreamOptions {
serviceTier?: ResponseCreateParamsStreaming["service_tier"];
applyServiceTierPricing?: (
usage: Usage,
serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined,
) => void;
}
export function convertResponsesMessages<TApi extends Api>(
model: Model<TApi>,
context: Context,
allowedToolCallProviders: ReadonlySet<string>,
): ResponseInput {
const messages: ResponseInput = [];
const normalizeToolCallId = (id: string): string => {
if (!allowedToolCallProviders.has(model.provider)) return id;
if (!id.includes("|")) return id;
const [callId, itemId] = id.split("|");
const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_");
let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_");
// OpenAI Responses API requires item id to start with "fc"
if (!sanitizedItemId.startsWith("fc")) {
sanitizedItemId = `fc_${sanitizedItemId}`;
}
const normalizedCallId = sanitizedCallId.length > 64 ? sanitizedCallId.slice(0, 64) : sanitizedCallId;
const normalizedItemId = sanitizedItemId.length > 64 ? sanitizedItemId.slice(0, 64) : sanitizedItemId;
return `${normalizedCallId}|${normalizedItemId}`;
};
const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
if (context.systemPrompt) {
const role = model.reasoning ? "developer" : "system";
messages.push({
role,
content: sanitizeSurrogates(context.systemPrompt),
});
}
let msgIndex = 0;
for (const msg of transformedMessages) {
if (msg.role === "user") {
if (typeof msg.content === "string") {
messages.push({
role: "user",
content: [{ type: "input_text", text: sanitizeSurrogates(msg.content) }],
});
} else {
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
if (item.type === "text") {
return {
type: "input_text",
text: sanitizeSurrogates(item.text),
} satisfies ResponseInputText;
}
return {
type: "input_image",
detail: "auto",
image_url: `data:${item.mimeType};base64,${item.data}`,
} satisfies ResponseInputImage;
});
const filteredContent = !model.input.includes("image")
? content.filter((c) => c.type !== "input_image")
: content;
if (filteredContent.length === 0) continue;
messages.push({
role: "user",
content: filteredContent,
});
}
} else if (msg.role === "assistant") {
const output: ResponseInput = [];
for (const block of msg.content) {
if (block.type === "thinking") {
if (block.thinkingSignature) {
const reasoningItem = JSON.parse(block.thinkingSignature) as ResponseReasoningItem;
output.push(reasoningItem);
}
} else if (block.type === "text") {
const textBlock = block as TextContent;
// OpenAI requires id to be max 64 characters
let msgId = textBlock.textSignature;
if (!msgId) {
msgId = `msg_${msgIndex}`;
} else if (msgId.length > 64) {
msgId = `msg_${shortHash(msgId)}`;
}
output.push({
type: "message",
role: "assistant",
content: [{ type: "output_text", text: sanitizeSurrogates(textBlock.text), annotations: [] }],
status: "completed",
id: msgId,
} satisfies ResponseOutputMessage);
} else if (block.type === "toolCall") {
const toolCall = block as ToolCall;
const [callId, itemId] = toolCall.id.split("|");
output.push({
type: "function_call",
id: itemId,
call_id: callId,
name: toolCall.name,
arguments: JSON.stringify(toolCall.arguments),
});
}
}
if (output.length === 0) continue;
messages.push(...output);
} else if (msg.role === "toolResult") {
// Extract text and image content
const textResult = msg.content
.filter((c): c is TextContent => c.type === "text")
.map((c) => c.text)
.join("\n");
const hasImages = msg.content.some((c): c is ImageContent => c.type === "image");
// Always send function_call_output with text (or placeholder if only images)
const hasText = textResult.length > 0;
const [callId] = msg.toolCallId.split("|");
messages.push({
type: "function_call_output",
call_id: callId,
output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
});
// If there are images and model supports them, send a follow-up user message with images
if (hasImages && model.input.includes("image")) {
const contentParts: ResponseInputContent[] = [];
// Add text prefix
contentParts.push({
type: "input_text",
text: "Attached image(s) from tool result:",
} satisfies ResponseInputText);
// Add images
for (const block of msg.content) {
if (block.type === "image") {
contentParts.push({
type: "input_image",
detail: "auto",
image_url: `data:${block.mimeType};base64,${block.data}`,
} satisfies ResponseInputImage);
}
}
messages.push({
role: "user",
content: contentParts,
});
}
}
msgIndex++;
}
return messages;
}
export function convertResponsesTools(tools: Tool[]): OpenAITool[] {
return tools.map((tool) => ({
type: "function",
name: tool.name,
description: tool.description,
parameters: tool.parameters as any, // TypeBox already generates JSON Schema
strict: false,
}));
}
export async function processResponsesStream<TApi extends Api>(
openaiStream: AsyncIterable<ResponseStreamEvent>,
output: AssistantMessage,
stream: AssistantMessageEventStream,
model: Model<TApi>,
options?: OpenAIResponsesStreamOptions,
): Promise<void> {
let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
const blocks = output.content;
const blockIndex = () => blocks.length - 1;
for await (const event of openaiStream) {
if (event.type === "response.output_item.added") {
const item = event.item;
if (item.type === "reasoning") {
currentItem = item;
currentBlock = { type: "thinking", thinking: "" };
output.content.push(currentBlock);
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
} else if (item.type === "message") {
currentItem = item;
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
} else if (item.type === "function_call") {
currentItem = item;
currentBlock = {
type: "toolCall",
id: `${item.call_id}|${item.id}`,
name: item.name,
arguments: {},
partialJson: item.arguments || "",
};
output.content.push(currentBlock);
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
}
} else if (event.type === "response.reasoning_summary_part.added") {
if (currentItem && currentItem.type === "reasoning") {
currentItem.summary = currentItem.summary || [];
currentItem.summary.push(event.part);
}
} else if (event.type === "response.reasoning_summary_text.delta") {
if (currentItem?.type === "reasoning" && currentBlock?.type === "thinking") {
currentItem.summary = currentItem.summary || [];
const lastPart = currentItem.summary[currentItem.summary.length - 1];
if (lastPart) {
currentBlock.thinking += event.delta;
lastPart.text += event.delta;
stream.push({
type: "thinking_delta",
contentIndex: blockIndex(),
delta: event.delta,
partial: output,
});
}
}
} else if (event.type === "response.reasoning_summary_part.done") {
if (currentItem?.type === "reasoning" && currentBlock?.type === "thinking") {
currentItem.summary = currentItem.summary || [];
const lastPart = currentItem.summary[currentItem.summary.length - 1];
if (lastPart) {
currentBlock.thinking += "\n\n";
lastPart.text += "\n\n";
stream.push({
type: "thinking_delta",
contentIndex: blockIndex(),
delta: "\n\n",
partial: output,
});
}
}
} else if (event.type === "response.content_part.added") {
if (currentItem?.type === "message") {
currentItem.content = currentItem.content || [];
// Filter out ReasoningText, only accept output_text and refusal
if (event.part.type === "output_text" || event.part.type === "refusal") {
currentItem.content.push(event.part);
}
}
} else if (event.type === "response.output_text.delta") {
if (currentItem?.type === "message" && currentBlock?.type === "text") {
if (!currentItem.content || currentItem.content.length === 0) {
continue;
}
const lastPart = currentItem.content[currentItem.content.length - 1];
if (lastPart?.type === "output_text") {
currentBlock.text += event.delta;
lastPart.text += event.delta;
stream.push({
type: "text_delta",
contentIndex: blockIndex(),
delta: event.delta,
partial: output,
});
}
}
} else if (event.type === "response.refusal.delta") {
if (currentItem?.type === "message" && currentBlock?.type === "text") {
if (!currentItem.content || currentItem.content.length === 0) {
continue;
}
const lastPart = currentItem.content[currentItem.content.length - 1];
if (lastPart?.type === "refusal") {
currentBlock.text += event.delta;
lastPart.refusal += event.delta;
stream.push({
type: "text_delta",
contentIndex: blockIndex(),
delta: event.delta,
partial: output,
});
}
}
} else if (event.type === "response.function_call_arguments.delta") {
if (currentItem?.type === "function_call" && currentBlock?.type === "toolCall") {
currentBlock.partialJson += event.delta;
currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
stream.push({
type: "toolcall_delta",
contentIndex: blockIndex(),
delta: event.delta,
partial: output,
});
}
} else if (event.type === "response.output_item.done") {
const item = event.item;
if (item.type === "reasoning" && currentBlock?.type === "thinking") {
currentBlock.thinking = item.summary?.map((s) => s.text).join("\n\n") || "";
currentBlock.thinkingSignature = JSON.stringify(item);
stream.push({
type: "thinking_end",
contentIndex: blockIndex(),
content: currentBlock.thinking,
partial: output,
});
currentBlock = null;
} else if (item.type === "message" && currentBlock?.type === "text") {
currentBlock.text = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
currentBlock.textSignature = item.id;
stream.push({
type: "text_end",
contentIndex: blockIndex(),
content: currentBlock.text,
partial: output,
});
currentBlock = null;
} else if (item.type === "function_call") {
const toolCall: ToolCall = {
type: "toolCall",
id: `${item.call_id}|${item.id}`,
name: item.name,
arguments: JSON.parse(item.arguments),
};
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
}
} else if (event.type === "response.completed") {
const response = event.response;
if (response?.usage) {
const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
output.usage = {
// OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
input: (response.usage.input_tokens || 0) - cachedTokens,
output: response.usage.output_tokens || 0,
cacheRead: cachedTokens,
cacheWrite: 0,
totalTokens: response.usage.total_tokens || 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
}
calculateCost(model, output.usage);
if (options?.applyServiceTierPricing) {
const serviceTier = response?.service_tier ?? options.serviceTier;
options.applyServiceTierPricing(output.usage, serviceTier);
}
// Map status to stop reason
output.stopReason = mapStopReason(response?.status);
if (output.content.some((b) => b.type === "toolCall") && output.stopReason === "stop") {
output.stopReason = "toolUse";
}
} else if (event.type === "error") {
throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error");
} else if (event.type === "response.failed") {
throw new Error("Unknown error");
}
}
}
function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
if (!status) return "stop";
switch (status) {
case "completed":
return "stop";
case "incomplete":
return "length";
case "failed":
case "cancelled":
return "error";
// These two are wonky ...
case "in_progress":
case "queued":
return "stop";
default: {
const _exhaustive: never = status;
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
}
}
}