add Azure OpenAI Responses provider with deployment-aware model mapping

This commit is contained in:
Markus Ylisiurunen 2026-01-21 20:13:00 +02:00 committed by Mario Zechner
parent 951fb953ed
commit 856012296b
23 changed files with 1465 additions and 21 deletions

View file

@ -1,9 +1,9 @@
export * from "./models.js";
export * from "./providers/anthropic.js";
export * from "./providers/azure-openai-responses.js";
export * from "./providers/google.js";
export * from "./providers/google-gemini-cli.js";
export * from "./providers/google-vertex.js";
export * from "./providers/openai-completions.js";
export * from "./providers/openai-responses.js";
export * from "./stream.js";

View file

@ -1300,6 +1300,586 @@ export const MODELS = {
maxTokens: 64000,
} satisfies Model<"anthropic-messages">,
},
"azure-openai-responses": {
"codex-mini-latest": {
id: "codex-mini-latest",
name: "Codex Mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text"],
cost: {
input: 1.5,
output: 6,
cacheRead: 0.375,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"gpt-4": {
id: "gpt-4",
name: "GPT-4",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text"],
cost: {
input: 30,
output: 60,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 8192,
} satisfies Model<"azure-openai-responses">,
"gpt-4-turbo": {
id: "gpt-4-turbo",
name: "GPT-4 Turbo",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 10,
output: 30,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"azure-openai-responses">,
"gpt-4.1": {
id: "gpt-4.1",
name: "GPT-4.1",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 2,
output: 8,
cacheRead: 0.5,
cacheWrite: 0,
},
contextWindow: 1047576,
maxTokens: 32768,
} satisfies Model<"azure-openai-responses">,
"gpt-4.1-mini": {
id: "gpt-4.1-mini",
name: "GPT-4.1 mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.4,
output: 1.6,
cacheRead: 0.1,
cacheWrite: 0,
},
contextWindow: 1047576,
maxTokens: 32768,
} satisfies Model<"azure-openai-responses">,
"gpt-4.1-nano": {
id: "gpt-4.1-nano",
name: "GPT-4.1 nano",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.1,
output: 0.4,
cacheRead: 0.03,
cacheWrite: 0,
},
contextWindow: 1047576,
maxTokens: 32768,
} satisfies Model<"azure-openai-responses">,
"gpt-4o": {
id: "gpt-4o",
name: "GPT-4o",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 2.5,
output: 10,
cacheRead: 1.25,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-4o-2024-05-13": {
id: "gpt-4o-2024-05-13",
name: "GPT-4o (2024-05-13)",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"azure-openai-responses">,
"gpt-4o-2024-08-06": {
id: "gpt-4o-2024-08-06",
name: "GPT-4o (2024-08-06)",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 2.5,
output: 10,
cacheRead: 1.25,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-4o-2024-11-20": {
id: "gpt-4o-2024-11-20",
name: "GPT-4o (2024-11-20)",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 2.5,
output: 10,
cacheRead: 1.25,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-4o-mini": {
id: "gpt-4o-mini",
name: "GPT-4o mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0.08,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-5": {
id: "gpt-5",
name: "GPT-5",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5-chat-latest": {
id: "gpt-5-chat-latest",
name: "GPT-5 Chat Latest",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-5-codex": {
id: "gpt-5-codex",
name: "GPT-5-Codex",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5-mini": {
id: "gpt-5-mini",
name: "GPT-5 Mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.25,
output: 2,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5-nano": {
id: "gpt-5-nano",
name: "GPT-5 Nano",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.05,
output: 0.4,
cacheRead: 0.005,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5-pro": {
id: "gpt-5-pro",
name: "GPT-5 Pro",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 15,
output: 120,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 272000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1": {
id: "gpt-5.1",
name: "GPT-5.1",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.13,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1-chat-latest": {
id: "gpt-5.1-chat-latest",
name: "GPT-5.1 Chat",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1-codex": {
id: "gpt-5.1-codex",
name: "GPT-5.1 Codex",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1-codex-max": {
id: "gpt-5.1-codex-max",
name: "GPT-5.1 Codex Max",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1-codex-mini": {
id: "gpt-5.1-codex-mini",
name: "GPT-5.1 Codex mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.25,
output: 2,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.2": {
id: "gpt-5.2",
name: "GPT-5.2",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.75,
output: 14,
cacheRead: 0.175,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.2-chat-latest": {
id: "gpt-5.2-chat-latest",
name: "GPT-5.2 Chat",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.75,
output: 14,
cacheRead: 0.175,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-5.2-codex": {
id: "gpt-5.2-codex",
name: "GPT-5.2 Codex",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.75,
output: 14,
cacheRead: 0.175,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.2-pro": {
id: "gpt-5.2-pro",
name: "GPT-5.2 Pro",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 21,
output: 168,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"o1": {
id: "o1",
name: "o1",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 15,
output: 60,
cacheRead: 7.5,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o1-pro": {
id: "o1-pro",
name: "o1-pro",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 150,
output: 600,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o3": {
id: "o3",
name: "o3",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2,
output: 8,
cacheRead: 0.5,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o3-deep-research": {
id: "o3-deep-research",
name: "o3-deep-research",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 10,
output: 40,
cacheRead: 2.5,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o3-mini": {
id: "o3-mini",
name: "o3-mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text"],
cost: {
input: 1.1,
output: 4.4,
cacheRead: 0.55,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o3-pro": {
id: "o3-pro",
name: "o3-pro",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 20,
output: 80,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o4-mini": {
id: "o4-mini",
name: "o4-mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.1,
output: 4.4,
cacheRead: 0.28,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o4-mini-deep-research": {
id: "o4-mini-deep-research",
name: "o4-mini-deep-research",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2,
output: 8,
cacheRead: 0.5,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
},
"cerebras": {
"gpt-oss-120b": {
id: "gpt-oss-120b",

View file

@ -0,0 +1,660 @@
import type OpenAI from "openai";
import { AzureOpenAI } from "openai";
import type {
Tool as OpenAITool,
ResponseCreateParamsStreaming,
ResponseFunctionToolCall,
ResponseInput,
ResponseInputContent,
ResponseInputImage,
ResponseInputText,
ResponseOutputMessage,
ResponseReasoningItem,
} from "openai/resources/responses/responses.js";
import { calculateCost } from "../models.js";
import { getEnvApiKey } from "../stream.js";
import type {
Api,
AssistantMessage,
Context,
Model,
StopReason,
StreamFunction,
StreamOptions,
TextContent,
ThinkingContent,
Tool,
ToolCall,
Usage,
} from "../types.js";
import { AssistantMessageEventStream } from "../utils/event-stream.js";
import { parseStreamingJson } from "../utils/json-parse.js";
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { transformMessages } from "./transform-messages.js";
/** Fast deterministic hash to shorten long strings */
function shortHash(str: string): string {
let h1 = 0xdeadbeef;
let h2 = 0x41c6ce57;
for (let i = 0; i < str.length; i++) {
const ch = str.charCodeAt(i);
h1 = Math.imul(h1 ^ ch, 2654435761);
h2 = Math.imul(h2 ^ ch, 1597334677);
}
h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
}
const DEFAULT_AZURE_API_VERSION = "2025-04-01-preview";
// Azure OpenAI Responses-specific options
export interface AzureOpenAIResponsesOptions extends StreamOptions {
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
reasoningSummary?: "auto" | "detailed" | "concise" | null;
serviceTier?: ResponseCreateParamsStreaming["service_tier"];
azureApiVersion?: string;
azureEndpoint?: string;
azureResourceName?: string;
azureBaseUrl?: string;
azureDeploymentName?: string;
}
/**
* Generate function for Azure OpenAI Responses API
*/
export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"> = (
model: Model<"azure-openai-responses">,
context: Context,
options?: AzureOpenAIResponsesOptions,
): AssistantMessageEventStream => {
const stream = new AssistantMessageEventStream();
// Start async processing
(async () => {
const deploymentName = options?.azureDeploymentName || process.env.AZURE_OPENAI_DEPLOYMENT_NAME || model.id;
const output: AssistantMessage = {
role: "assistant",
content: [],
api: "azure-openai-responses" as Api,
provider: model.provider,
model: deploymentName,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
try {
// Create Azure OpenAI client
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
const client = createClient(model, apiKey, options);
const params = buildParams(model, context, options, deploymentName);
options?.onPayload?.(params);
const openaiStream = await client.responses.create(
params,
options?.signal ? { signal: options.signal } : undefined,
);
stream.push({ type: "start", partial: output });
let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
const blocks = output.content;
const blockIndex = () => blocks.length - 1;
for await (const event of openaiStream) {
// Handle output item start
if (event.type === "response.output_item.added") {
const item = event.item;
if (item.type === "reasoning") {
currentItem = item;
currentBlock = { type: "thinking", thinking: "" };
output.content.push(currentBlock);
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
} else if (item.type === "message") {
currentItem = item;
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
} else if (item.type === "function_call") {
currentItem = item;
currentBlock = {
type: "toolCall",
id: `${item.call_id}|${item.id}`,
name: item.name,
arguments: {},
partialJson: item.arguments || "",
};
output.content.push(currentBlock);
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
}
}
// Handle reasoning summary deltas
else if (event.type === "response.reasoning_summary_part.added") {
if (currentItem && currentItem.type === "reasoning") {
currentItem.summary = currentItem.summary || [];
currentItem.summary.push(event.part);
}
} else if (event.type === "response.reasoning_summary_text.delta") {
if (
currentItem &&
currentItem.type === "reasoning" &&
currentBlock &&
currentBlock.type === "thinking"
) {
currentItem.summary = currentItem.summary || [];
const lastPart = currentItem.summary[currentItem.summary.length - 1];
if (lastPart) {
currentBlock.thinking += event.delta;
lastPart.text += event.delta;
stream.push({
type: "thinking_delta",
contentIndex: blockIndex(),
delta: event.delta,
partial: output,
});
}
}
}
// Add a new line between summary parts (hack...)
else if (event.type === "response.reasoning_summary_part.done") {
if (
currentItem &&
currentItem.type === "reasoning" &&
currentBlock &&
currentBlock.type === "thinking"
) {
currentItem.summary = currentItem.summary || [];
const lastPart = currentItem.summary[currentItem.summary.length - 1];
if (lastPart) {
currentBlock.thinking += "\n\n";
lastPart.text += "\n\n";
stream.push({
type: "thinking_delta",
contentIndex: blockIndex(),
delta: "\n\n",
partial: output,
});
}
}
}
// Handle text output deltas
else if (event.type === "response.content_part.added") {
if (currentItem && currentItem.type === "message") {
currentItem.content = currentItem.content || [];
// Filter out ReasoningText, only accept output_text and refusal
if (event.part.type === "output_text" || event.part.type === "refusal") {
currentItem.content.push(event.part);
}
}
} else if (event.type === "response.output_text.delta") {
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
const lastPart = currentItem.content[currentItem.content.length - 1];
if (lastPart && lastPart.type === "output_text") {
currentBlock.text += event.delta;
lastPart.text += event.delta;
stream.push({
type: "text_delta",
contentIndex: blockIndex(),
delta: event.delta,
partial: output,
});
}
}
} else if (event.type === "response.refusal.delta") {
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
const lastPart = currentItem.content[currentItem.content.length - 1];
if (lastPart && lastPart.type === "refusal") {
currentBlock.text += event.delta;
lastPart.refusal += event.delta;
stream.push({
type: "text_delta",
contentIndex: blockIndex(),
delta: event.delta,
partial: output,
});
}
}
}
// Handle function call argument deltas
else if (event.type === "response.function_call_arguments.delta") {
if (
currentItem &&
currentItem.type === "function_call" &&
currentBlock &&
currentBlock.type === "toolCall"
) {
currentBlock.partialJson += event.delta;
currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
stream.push({
type: "toolcall_delta",
contentIndex: blockIndex(),
delta: event.delta,
partial: output,
});
}
}
// Handle output item completion
else if (event.type === "response.output_item.done") {
const item = event.item;
if (item.type === "reasoning" && currentBlock && currentBlock.type === "thinking") {
currentBlock.thinking = item.summary?.map((s) => s.text).join("\n\n") || "";
currentBlock.thinkingSignature = JSON.stringify(item);
stream.push({
type: "thinking_end",
contentIndex: blockIndex(),
content: currentBlock.thinking,
partial: output,
});
currentBlock = null;
} else if (item.type === "message" && currentBlock && currentBlock.type === "text") {
currentBlock.text = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
currentBlock.textSignature = item.id;
stream.push({
type: "text_end",
contentIndex: blockIndex(),
content: currentBlock.text,
partial: output,
});
currentBlock = null;
} else if (item.type === "function_call") {
const toolCall: ToolCall = {
type: "toolCall",
id: `${item.call_id}|${item.id}`,
name: item.name,
arguments: JSON.parse(item.arguments),
};
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
}
}
// Handle completion
else if (event.type === "response.completed") {
const response = event.response;
if (response?.usage) {
const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
output.usage = {
// OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
input: (response.usage.input_tokens || 0) - cachedTokens,
output: response.usage.output_tokens || 0,
cacheRead: cachedTokens,
cacheWrite: 0,
totalTokens: response.usage.total_tokens || 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
}
calculateCost(model, output.usage);
applyServiceTierPricing(output.usage, response?.service_tier ?? options?.serviceTier);
// Map status to stop reason
output.stopReason = mapStopReason(response?.status);
if (output.content.some((b) => b.type === "toolCall") && output.stopReason === "stop") {
output.stopReason = "toolUse";
}
}
// Handle errors
else if (event.type === "error") {
throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error");
} else if (event.type === "response.failed") {
throw new Error("Unknown error");
}
}
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
if (output.stopReason === "aborted" || output.stopReason === "error") {
throw new Error("An unkown error ocurred");
}
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
for (const block of output.content) delete (block as any).index;
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
stream.push({ type: "error", reason: output.stopReason, error: output });
stream.end();
}
})();
return stream;
};
function normalizeAzureEndpoint(endpoint: string): string {
return endpoint.replace(/\/+$/, "");
}
function getAzureEndpoint(options?: AzureOpenAIResponsesOptions): string | undefined {
const endpoint =
options?.azureEndpoint ||
(options?.azureResourceName ? `https://${options.azureResourceName}.openai.azure.com` : undefined) ||
process.env.AZURE_OPENAI_ENDPOINT ||
(process.env.AZURE_OPENAI_RESOURCE_NAME
? `https://${process.env.AZURE_OPENAI_RESOURCE_NAME}.openai.azure.com`
: undefined);
return endpoint ? normalizeAzureEndpoint(endpoint) : undefined;
}
function resolveAzureConfig(
model: Model<"azure-openai-responses">,
options?: AzureOpenAIResponsesOptions,
): { baseUrl?: string; endpoint?: string; apiVersion: string } {
const apiVersion = options?.azureApiVersion || process.env.AZURE_OPENAI_API_VERSION || DEFAULT_AZURE_API_VERSION;
const baseUrl = options?.azureBaseUrl?.trim() || undefined;
const endpoint = getAzureEndpoint(options);
let resolvedBaseUrl = baseUrl;
const resolvedEndpoint = endpoint;
if (!resolvedBaseUrl && !resolvedEndpoint && model.baseUrl) {
resolvedBaseUrl = model.baseUrl;
}
if (!resolvedBaseUrl && !resolvedEndpoint) {
throw new Error(
"Azure OpenAI endpoint is required. Set AZURE_OPENAI_ENDPOINT or AZURE_OPENAI_RESOURCE_NAME, or pass azureEndpoint, azureResourceName, azureBaseUrl, or model.baseUrl.",
);
}
return {
baseUrl: resolvedBaseUrl,
endpoint: resolvedEndpoint,
apiVersion,
};
}
function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
if (!apiKey) {
if (!process.env.AZURE_OPENAI_API_KEY) {
throw new Error(
"Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.",
);
}
apiKey = process.env.AZURE_OPENAI_API_KEY;
}
const headers = { ...model.headers };
if (options?.headers) {
Object.assign(headers, options.headers);
}
const { baseUrl, endpoint, apiVersion } = resolveAzureConfig(model, options);
return new AzureOpenAI({
apiKey,
apiVersion,
dangerouslyAllowBrowser: true,
defaultHeaders: headers,
...(baseUrl ? { baseURL: baseUrl } : { endpoint }),
});
}
function buildParams(
model: Model<"azure-openai-responses">,
context: Context,
options: AzureOpenAIResponsesOptions | undefined,
deploymentName: string,
) {
const messages = convertMessages(model, context);
const params: ResponseCreateParamsStreaming = {
model: deploymentName,
input: messages,
stream: true,
prompt_cache_key: options?.sessionId,
};
if (options?.maxTokens) {
params.max_output_tokens = options?.maxTokens;
}
if (options?.temperature !== undefined) {
params.temperature = options?.temperature;
}
if (options?.serviceTier !== undefined) {
params.service_tier = options.serviceTier;
}
if (context.tools) {
params.tools = convertTools(context.tools);
}
if (model.reasoning) {
if (options?.reasoningEffort || options?.reasoningSummary) {
params.reasoning = {
effort: options?.reasoningEffort || "medium",
summary: options?.reasoningSummary || "auto",
};
params.include = ["reasoning.encrypted_content"];
} else {
if (model.name.startsWith("gpt-5")) {
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
messages.push({
role: "developer",
content: [
{
type: "input_text",
text: "# Juice: 0 !important",
},
],
});
}
}
}
return params;
}
function convertMessages(model: Model<"azure-openai-responses">, context: Context): ResponseInput {
const messages: ResponseInput = [];
const normalizeToolCallId = (id: string): string => {
const allowedProviders = new Set(["openai", "openai-codex", "opencode", "azure-openai-responses"]);
if (!allowedProviders.has(model.provider)) return id;
if (!id.includes("|")) return id;
const [callId, itemId] = id.split("|");
const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_");
let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_");
// OpenAI Responses API requires item id to start with "fc"
if (!sanitizedItemId.startsWith("fc")) {
sanitizedItemId = `fc_${sanitizedItemId}`;
}
const normalizedCallId = sanitizedCallId.length > 64 ? sanitizedCallId.slice(0, 64) : sanitizedCallId;
const normalizedItemId = sanitizedItemId.length > 64 ? sanitizedItemId.slice(0, 64) : sanitizedItemId;
return `${normalizedCallId}|${normalizedItemId}`;
};
const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
if (context.systemPrompt) {
const role = model.reasoning ? "developer" : "system";
messages.push({
role,
content: sanitizeSurrogates(context.systemPrompt),
});
}
let msgIndex = 0;
for (const msg of transformedMessages) {
if (msg.role === "user") {
if (typeof msg.content === "string") {
messages.push({
role: "user",
content: [{ type: "input_text", text: sanitizeSurrogates(msg.content) }],
});
} else {
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
if (item.type === "text") {
return {
type: "input_text",
text: sanitizeSurrogates(item.text),
} satisfies ResponseInputText;
} else {
return {
type: "input_image",
detail: "auto",
image_url: `data:${item.mimeType};base64,${item.data}`,
} satisfies ResponseInputImage;
}
});
const filteredContent = !model.input.includes("image")
? content.filter((c) => c.type !== "input_image")
: content;
if (filteredContent.length === 0) continue;
messages.push({
role: "user",
content: filteredContent,
});
}
} else if (msg.role === "assistant") {
const output: ResponseInput = [];
for (const block of msg.content) {
if (block.type === "thinking") {
if (block.thinkingSignature) {
const reasoningItem = JSON.parse(block.thinkingSignature);
output.push(reasoningItem);
}
} else if (block.type === "text") {
const textBlock = block as TextContent;
// OpenAI requires id to be max 64 characters
let msgId = textBlock.textSignature;
if (!msgId) {
msgId = `msg_${msgIndex}`;
} else if (msgId.length > 64) {
msgId = `msg_${shortHash(msgId)}`;
}
output.push({
type: "message",
role: "assistant",
content: [{ type: "output_text", text: sanitizeSurrogates(textBlock.text), annotations: [] }],
status: "completed",
id: msgId,
} satisfies ResponseOutputMessage);
} else if (block.type === "toolCall") {
const toolCall = block as ToolCall;
output.push({
type: "function_call",
id: toolCall.id.split("|")[1],
call_id: toolCall.id.split("|")[0],
name: toolCall.name,
arguments: JSON.stringify(toolCall.arguments),
});
}
}
if (output.length === 0) continue;
messages.push(...output);
} else if (msg.role === "toolResult") {
// Extract text and image content
const textResult = msg.content
.filter((c) => c.type === "text")
.map((c) => (c as any).text)
.join("\n");
const hasImages = msg.content.some((c) => c.type === "image");
// Always send function_call_output with text (or placeholder if only images)
const hasText = textResult.length > 0;
messages.push({
type: "function_call_output",
call_id: msg.toolCallId.split("|")[0],
output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
});
// If there are images and model supports them, send a follow-up user message with images
if (hasImages && model.input.includes("image")) {
const contentParts: ResponseInputContent[] = [];
// Add text prefix
contentParts.push({
type: "input_text",
text: "Attached image(s) from tool result:",
} satisfies ResponseInputText);
// Add images
for (const block of msg.content) {
if (block.type === "image") {
contentParts.push({
type: "input_image",
detail: "auto",
image_url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
} satisfies ResponseInputImage);
}
}
messages.push({
role: "user",
content: contentParts,
});
}
}
msgIndex++;
}
return messages;
}
function convertTools(tools: Tool[]): OpenAITool[] {
return tools.map((tool) => ({
type: "function",
name: tool.name,
description: tool.description,
parameters: tool.parameters as any, // TypeBox already generates JSON Schema
strict: false,
}));
}
function getServiceTierCostMultiplier(serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined): number {
switch (serviceTier) {
case "flex":
return 0.5;
case "priority":
return 2;
default:
return 1;
}
}
function applyServiceTierPricing(usage: Usage, serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined) {
const multiplier = getServiceTierCostMultiplier(serviceTier);
if (multiplier === 1) return;
usage.cost.input *= multiplier;
usage.cost.output *= multiplier;
usage.cost.cacheRead *= multiplier;
usage.cost.cacheWrite *= multiplier;
usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite;
}
function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
if (!status) return "stop";
switch (status) {
case "completed":
return "stop";
case "incomplete":
return "length";
case "failed":
case "cancelled":
return "error";
// These two are wonky ...
case "in_progress":
case "queued":
return "stop";
default: {
const _exhaustive: never = status;
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
}
}
}

View file

@ -19,6 +19,7 @@ if (typeof process !== "undefined" && (process.versions?.node || process.version
import { supportsXhigh } from "./models.js";
import { type BedrockOptions, streamBedrock } from "./providers/amazon-bedrock.js";
import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic.js";
import { type AzureOpenAIResponsesOptions, streamAzureOpenAIResponses } from "./providers/azure-openai-responses.js";
import { type GoogleOptions, streamGoogle } from "./providers/google.js";
import {
type GoogleGeminiCliOptions,
@ -118,6 +119,7 @@ export function getEnvApiKey(provider: any): string | undefined {
const envMap: Record<string, string> = {
openai: "OPENAI_API_KEY",
"azure-openai-responses": "AZURE_OPENAI_API_KEY",
google: "GEMINI_API_KEY",
groq: "GROQ_API_KEY",
cerebras: "CEREBRAS_API_KEY",
@ -165,6 +167,9 @@ export function stream<TApi extends Api>(
case "openai-responses":
return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
case "azure-openai-responses":
return streamAzureOpenAIResponses(model as Model<"azure-openai-responses">, context, providerOptions as any);
case "openai-codex-responses":
return streamOpenAICodexResponses(model as Model<"openai-codex-responses">, context, providerOptions as any);
@ -350,6 +355,12 @@ function mapOptionsForApi<TApi extends Api>(
reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
} satisfies OpenAIResponsesOptions;
case "azure-openai-responses":
return {
...base,
reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
} satisfies AzureOpenAIResponsesOptions;
case "openai-codex-responses":
return {
...base,

View file

@ -1,5 +1,6 @@
import type { BedrockOptions } from "./providers/amazon-bedrock.js";
import type { AnthropicOptions } from "./providers/anthropic.js";
import type { AzureOpenAIResponsesOptions } from "./providers/azure-openai-responses.js";
import type { GoogleOptions } from "./providers/google.js";
import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli.js";
import type { GoogleVertexOptions } from "./providers/google-vertex.js";
@ -13,6 +14,7 @@ export type { AssistantMessageEventStream } from "./utils/event-stream.js";
export type Api =
| "openai-completions"
| "openai-responses"
| "azure-openai-responses"
| "openai-codex-responses"
| "anthropic-messages"
| "bedrock-converse-stream"
@ -25,6 +27,7 @@ export interface ApiOptionsMap {
"bedrock-converse-stream": BedrockOptions;
"openai-completions": OpenAICompletionsOptions;
"openai-responses": OpenAIResponsesOptions;
"azure-openai-responses": AzureOpenAIResponsesOptions;
"openai-codex-responses": OpenAICodexResponsesOptions;
"google-generative-ai": GoogleOptions;
"google-gemini-cli": GoogleGeminiCliOptions;
@ -50,6 +53,7 @@ export type KnownProvider =
| "google-antigravity"
| "google-vertex"
| "openai"
| "azure-openai-responses"
| "openai-codex"
| "github-copilot"
| "xai"