add Azure OpenAI Responses provider with deployment-aware model mapping

2026-04-20 06:04:15 +00:00 · 2026-01-21 20:13:00 +02:00 · 2026-01-21 20:13:00 +02:00 · 856012296b
commit 856012296b
parent 951fb953ed
23 changed files with 1465 additions and 21 deletions
--- a/packages/ai/src/index.ts
+++ b/packages/ai/src/index.ts
@ -1,9 +1,9 @@
 export * from "./models.js";
 export * from "./providers/anthropic.js";
+export * from "./providers/azure-openai-responses.js";
 export * from "./providers/google.js";
 export * from "./providers/google-gemini-cli.js";
 export * from "./providers/google-vertex.js";
-
 export * from "./providers/openai-completions.js";
 export * from "./providers/openai-responses.js";
 export * from "./stream.js";
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@ -1300,6 +1300,586 @@ export const MODELS = {
 			maxTokens: 64000,
 		} satisfies Model<"anthropic-messages">,
 	},
+	"azure-openai-responses": {
+		"codex-mini-latest": {
+			id: "codex-mini-latest",
+			name: "Codex Mini",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 1.5,
+				output: 6,
+				cacheRead: 0.375,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 100000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4": {
+			id: "gpt-4",
+			name: "GPT-4",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 30,
+				output: 60,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 8192,
+			maxTokens: 8192,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4-turbo": {
+			id: "gpt-4-turbo",
+			name: "GPT-4 Turbo",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 10,
+				output: 30,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 4096,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4.1": {
+			id: "gpt-4.1",
+			name: "GPT-4.1",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 8,
+				cacheRead: 0.5,
+				cacheWrite: 0,
+			},
+			contextWindow: 1047576,
+			maxTokens: 32768,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4.1-mini": {
+			id: "gpt-4.1-mini",
+			name: "GPT-4.1 mini",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.4,
+				output: 1.6,
+				cacheRead: 0.1,
+				cacheWrite: 0,
+			},
+			contextWindow: 1047576,
+			maxTokens: 32768,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4.1-nano": {
+			id: "gpt-4.1-nano",
+			name: "GPT-4.1 nano",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.1,
+				output: 0.4,
+				cacheRead: 0.03,
+				cacheWrite: 0,
+			},
+			contextWindow: 1047576,
+			maxTokens: 32768,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4o": {
+			id: "gpt-4o",
+			name: "GPT-4o",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 2.5,
+				output: 10,
+				cacheRead: 1.25,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4o-2024-05-13": {
+			id: "gpt-4o-2024-05-13",
+			name: "GPT-4o (2024-05-13)",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 5,
+				output: 15,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 4096,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4o-2024-08-06": {
+			id: "gpt-4o-2024-08-06",
+			name: "GPT-4o (2024-08-06)",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 2.5,
+				output: 10,
+				cacheRead: 1.25,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4o-2024-11-20": {
+			id: "gpt-4o-2024-11-20",
+			name: "GPT-4o (2024-11-20)",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 2.5,
+				output: 10,
+				cacheRead: 1.25,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-4o-mini": {
+			id: "gpt-4o-mini",
+			name: "GPT-4o mini",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.15,
+				output: 0.6,
+				cacheRead: 0.08,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5": {
+			id: "gpt-5",
+			name: "GPT-5",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5-chat-latest": {
+			id: "gpt-5-chat-latest",
+			name: "GPT-5 Chat Latest",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5-codex": {
+			id: "gpt-5-codex",
+			name: "GPT-5-Codex",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5-mini": {
+			id: "gpt-5-mini",
+			name: "GPT-5 Mini",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.25,
+				output: 2,
+				cacheRead: 0.025,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5-nano": {
+			id: "gpt-5-nano",
+			name: "GPT-5 Nano",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.05,
+				output: 0.4,
+				cacheRead: 0.005,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5-pro": {
+			id: "gpt-5-pro",
+			name: "GPT-5 Pro",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 15,
+				output: 120,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 272000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.1": {
+			id: "gpt-5.1",
+			name: "GPT-5.1",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.13,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.1-chat-latest": {
+			id: "gpt-5.1-chat-latest",
+			name: "GPT-5.1 Chat",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.1-codex": {
+			id: "gpt-5.1-codex",
+			name: "GPT-5.1 Codex",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.1-codex-max": {
+			id: "gpt-5.1-codex-max",
+			name: "GPT-5.1 Codex Max",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.1-codex-mini": {
+			id: "gpt-5.1-codex-mini",
+			name: "GPT-5.1 Codex mini",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.25,
+				output: 2,
+				cacheRead: 0.025,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.2": {
+			id: "gpt-5.2",
+			name: "GPT-5.2",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.75,
+				output: 14,
+				cacheRead: 0.175,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.2-chat-latest": {
+			id: "gpt-5.2-chat-latest",
+			name: "GPT-5.2 Chat",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.75,
+				output: 14,
+				cacheRead: 0.175,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.2-codex": {
+			id: "gpt-5.2-codex",
+			name: "GPT-5.2 Codex",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.75,
+				output: 14,
+				cacheRead: 0.175,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"gpt-5.2-pro": {
+			id: "gpt-5.2-pro",
+			name: "GPT-5.2 Pro",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 21,
+				output: 168,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"azure-openai-responses">,
+		"o1": {
+			id: "o1",
+			name: "o1",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 15,
+				output: 60,
+				cacheRead: 7.5,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 100000,
+		} satisfies Model<"azure-openai-responses">,
+		"o1-pro": {
+			id: "o1-pro",
+			name: "o1-pro",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 150,
+				output: 600,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 100000,
+		} satisfies Model<"azure-openai-responses">,
+		"o3": {
+			id: "o3",
+			name: "o3",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 8,
+				cacheRead: 0.5,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 100000,
+		} satisfies Model<"azure-openai-responses">,
+		"o3-deep-research": {
+			id: "o3-deep-research",
+			name: "o3-deep-research",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 10,
+				output: 40,
+				cacheRead: 2.5,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 100000,
+		} satisfies Model<"azure-openai-responses">,
+		"o3-mini": {
+			id: "o3-mini",
+			name: "o3-mini",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 1.1,
+				output: 4.4,
+				cacheRead: 0.55,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 100000,
+		} satisfies Model<"azure-openai-responses">,
+		"o3-pro": {
+			id: "o3-pro",
+			name: "o3-pro",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 20,
+				output: 80,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 100000,
+		} satisfies Model<"azure-openai-responses">,
+		"o4-mini": {
+			id: "o4-mini",
+			name: "o4-mini",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.1,
+				output: 4.4,
+				cacheRead: 0.28,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 100000,
+		} satisfies Model<"azure-openai-responses">,
+		"o4-mini-deep-research": {
+			id: "o4-mini-deep-research",
+			name: "o4-mini-deep-research",
+			api: "azure-openai-responses",
+			provider: "azure-openai-responses",
+			baseUrl: "",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 8,
+				cacheRead: 0.5,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 100000,
+		} satisfies Model<"azure-openai-responses">,
+	},
 	"cerebras": {
 		"gpt-oss-120b": {
 			id: "gpt-oss-120b",
--- a/packages/ai/src/providers/azure-openai-responses.ts
+++ b/packages/ai/src/providers/azure-openai-responses.ts
@ -0,0 +1,660 @@
+import type OpenAI from "openai";
+import { AzureOpenAI } from "openai";
+import type {
+	Tool as OpenAITool,
+	ResponseCreateParamsStreaming,
+	ResponseFunctionToolCall,
+	ResponseInput,
+	ResponseInputContent,
+	ResponseInputImage,
+	ResponseInputText,
+	ResponseOutputMessage,
+	ResponseReasoningItem,
+} from "openai/resources/responses/responses.js";
+import { calculateCost } from "../models.js";
+import { getEnvApiKey } from "../stream.js";
+import type {
+	Api,
+	AssistantMessage,
+	Context,
+	Model,
+	StopReason,
+	StreamFunction,
+	StreamOptions,
+	TextContent,
+	ThinkingContent,
+	Tool,
+	ToolCall,
+	Usage,
+} from "../types.js";
+import { AssistantMessageEventStream } from "../utils/event-stream.js";
+import { parseStreamingJson } from "../utils/json-parse.js";
+import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
+import { transformMessages } from "./transform-messages.js";
+
+/** Fast deterministic hash to shorten long strings */
+function shortHash(str: string): string {
+	let h1 = 0xdeadbeef;
+	let h2 = 0x41c6ce57;
+	for (let i = 0; i < str.length; i++) {
+		const ch = str.charCodeAt(i);
+		h1 = Math.imul(h1 ^ ch, 2654435761);
+		h2 = Math.imul(h2 ^ ch, 1597334677);
+	}
+	h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909);
+	h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909);
+	return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36);
+}
+
+const DEFAULT_AZURE_API_VERSION = "2025-04-01-preview";
+
+// Azure OpenAI Responses-specific options
+export interface AzureOpenAIResponsesOptions extends StreamOptions {
+	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
+	reasoningSummary?: "auto" | "detailed" | "concise" | null;
+	serviceTier?: ResponseCreateParamsStreaming["service_tier"];
+	azureApiVersion?: string;
+	azureEndpoint?: string;
+	azureResourceName?: string;
+	azureBaseUrl?: string;
+	azureDeploymentName?: string;
+}
+
+/**
+ * Generate function for Azure OpenAI Responses API
+ */
+export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"> = (
+	model: Model<"azure-openai-responses">,
+	context: Context,
+	options?: AzureOpenAIResponsesOptions,
+): AssistantMessageEventStream => {
+	const stream = new AssistantMessageEventStream();
+
+	// Start async processing
+	(async () => {
+		const deploymentName = options?.azureDeploymentName || process.env.AZURE_OPENAI_DEPLOYMENT_NAME || model.id;
+
+		const output: AssistantMessage = {
+			role: "assistant",
+			content: [],
+			api: "azure-openai-responses" as Api,
+			provider: model.provider,
+			model: deploymentName,
+			usage: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+				totalTokens: 0,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+			},
+			stopReason: "stop",
+			timestamp: Date.now(),
+		};
+
+		try {
+			// Create Azure OpenAI client
+			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
+			const client = createClient(model, apiKey, options);
+			const params = buildParams(model, context, options, deploymentName);
+			options?.onPayload?.(params);
+			const openaiStream = await client.responses.create(
+				params,
+				options?.signal ? { signal: options.signal } : undefined,
+			);
+			stream.push({ type: "start", partial: output });
+
+			let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
+			let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
+			const blocks = output.content;
+			const blockIndex = () => blocks.length - 1;
+
+			for await (const event of openaiStream) {
+				// Handle output item start
+				if (event.type === "response.output_item.added") {
+					const item = event.item;
+					if (item.type === "reasoning") {
+						currentItem = item;
+						currentBlock = { type: "thinking", thinking: "" };
+						output.content.push(currentBlock);
+						stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+					} else if (item.type === "message") {
+						currentItem = item;
+						currentBlock = { type: "text", text: "" };
+						output.content.push(currentBlock);
+						stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
+					} else if (item.type === "function_call") {
+						currentItem = item;
+						currentBlock = {
+							type: "toolCall",
+							id: `${item.call_id}|${item.id}`,
+							name: item.name,
+							arguments: {},
+							partialJson: item.arguments || "",
+						};
+						output.content.push(currentBlock);
+						stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
+					}
+				}
+				// Handle reasoning summary deltas
+				else if (event.type === "response.reasoning_summary_part.added") {
+					if (currentItem && currentItem.type === "reasoning") {
+						currentItem.summary = currentItem.summary || [];
+						currentItem.summary.push(event.part);
+					}
+				} else if (event.type === "response.reasoning_summary_text.delta") {
+					if (
+						currentItem &&
+						currentItem.type === "reasoning" &&
+						currentBlock &&
+						currentBlock.type === "thinking"
+					) {
+						currentItem.summary = currentItem.summary || [];
+						const lastPart = currentItem.summary[currentItem.summary.length - 1];
+						if (lastPart) {
+							currentBlock.thinking += event.delta;
+							lastPart.text += event.delta;
+							stream.push({
+								type: "thinking_delta",
+								contentIndex: blockIndex(),
+								delta: event.delta,
+								partial: output,
+							});
+						}
+					}
+				}
+				// Add a new line between summary parts (hack...)
+				else if (event.type === "response.reasoning_summary_part.done") {
+					if (
+						currentItem &&
+						currentItem.type === "reasoning" &&
+						currentBlock &&
+						currentBlock.type === "thinking"
+					) {
+						currentItem.summary = currentItem.summary || [];
+						const lastPart = currentItem.summary[currentItem.summary.length - 1];
+						if (lastPart) {
+							currentBlock.thinking += "\n\n";
+							lastPart.text += "\n\n";
+							stream.push({
+								type: "thinking_delta",
+								contentIndex: blockIndex(),
+								delta: "\n\n",
+								partial: output,
+							});
+						}
+					}
+				}
+				// Handle text output deltas
+				else if (event.type === "response.content_part.added") {
+					if (currentItem && currentItem.type === "message") {
+						currentItem.content = currentItem.content || [];
+						// Filter out ReasoningText, only accept output_text and refusal
+						if (event.part.type === "output_text" || event.part.type === "refusal") {
+							currentItem.content.push(event.part);
+						}
+					}
+				} else if (event.type === "response.output_text.delta") {
+					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
+						const lastPart = currentItem.content[currentItem.content.length - 1];
+						if (lastPart && lastPart.type === "output_text") {
+							currentBlock.text += event.delta;
+							lastPart.text += event.delta;
+							stream.push({
+								type: "text_delta",
+								contentIndex: blockIndex(),
+								delta: event.delta,
+								partial: output,
+							});
+						}
+					}
+				} else if (event.type === "response.refusal.delta") {
+					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
+						const lastPart = currentItem.content[currentItem.content.length - 1];
+						if (lastPart && lastPart.type === "refusal") {
+							currentBlock.text += event.delta;
+							lastPart.refusal += event.delta;
+							stream.push({
+								type: "text_delta",
+								contentIndex: blockIndex(),
+								delta: event.delta,
+								partial: output,
+							});
+						}
+					}
+				}
+				// Handle function call argument deltas
+				else if (event.type === "response.function_call_arguments.delta") {
+					if (
+						currentItem &&
+						currentItem.type === "function_call" &&
+						currentBlock &&
+						currentBlock.type === "toolCall"
+					) {
+						currentBlock.partialJson += event.delta;
+						currentBlock.arguments = parseStreamingJson(currentBlock.partialJson);
+						stream.push({
+							type: "toolcall_delta",
+							contentIndex: blockIndex(),
+							delta: event.delta,
+							partial: output,
+						});
+					}
+				}
+				// Handle output item completion
+				else if (event.type === "response.output_item.done") {
+					const item = event.item;
+
+					if (item.type === "reasoning" && currentBlock && currentBlock.type === "thinking") {
+						currentBlock.thinking = item.summary?.map((s) => s.text).join("\n\n") || "";
+						currentBlock.thinkingSignature = JSON.stringify(item);
+						stream.push({
+							type: "thinking_end",
+							contentIndex: blockIndex(),
+							content: currentBlock.thinking,
+							partial: output,
+						});
+						currentBlock = null;
+					} else if (item.type === "message" && currentBlock && currentBlock.type === "text") {
+						currentBlock.text = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
+						currentBlock.textSignature = item.id;
+						stream.push({
+							type: "text_end",
+							contentIndex: blockIndex(),
+							content: currentBlock.text,
+							partial: output,
+						});
+						currentBlock = null;
+					} else if (item.type === "function_call") {
+						const toolCall: ToolCall = {
+							type: "toolCall",
+							id: `${item.call_id}|${item.id}`,
+							name: item.name,
+							arguments: JSON.parse(item.arguments),
+						};
+
+						stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
+					}
+				}
+				// Handle completion
+				else if (event.type === "response.completed") {
+					const response = event.response;
+					if (response?.usage) {
+						const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
+						output.usage = {
+							// OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
+							input: (response.usage.input_tokens || 0) - cachedTokens,
+							output: response.usage.output_tokens || 0,
+							cacheRead: cachedTokens,
+							cacheWrite: 0,
+							totalTokens: response.usage.total_tokens || 0,
+							cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+						};
+					}
+					calculateCost(model, output.usage);
+					applyServiceTierPricing(output.usage, response?.service_tier ?? options?.serviceTier);
+					// Map status to stop reason
+					output.stopReason = mapStopReason(response?.status);
+					if (output.content.some((b) => b.type === "toolCall") && output.stopReason === "stop") {
+						output.stopReason = "toolUse";
+					}
+				}
+				// Handle errors
+				else if (event.type === "error") {
+					throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error");
+				} else if (event.type === "response.failed") {
+					throw new Error("Unknown error");
+				}
+			}
+
+			if (options?.signal?.aborted) {
+				throw new Error("Request was aborted");
+			}
+
+			if (output.stopReason === "aborted" || output.stopReason === "error") {
+				throw new Error("An unkown error ocurred");
+			}
+
+			stream.push({ type: "done", reason: output.stopReason, message: output });
+			stream.end();
+		} catch (error) {
+			for (const block of output.content) delete (block as any).index;
+			output.stopReason = options?.signal?.aborted ? "aborted" : "error";
+			output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
+			stream.push({ type: "error", reason: output.stopReason, error: output });
+			stream.end();
+		}
+	})();
+
+	return stream;
+};
+
+function normalizeAzureEndpoint(endpoint: string): string {
+	return endpoint.replace(/\/+$/, "");
+}
+
+function getAzureEndpoint(options?: AzureOpenAIResponsesOptions): string | undefined {
+	const endpoint =
+		options?.azureEndpoint ||
+		(options?.azureResourceName ? `https://${options.azureResourceName}.openai.azure.com` : undefined) ||
+		process.env.AZURE_OPENAI_ENDPOINT ||
+		(process.env.AZURE_OPENAI_RESOURCE_NAME
+			? `https://${process.env.AZURE_OPENAI_RESOURCE_NAME}.openai.azure.com`
+			: undefined);
+
+	return endpoint ? normalizeAzureEndpoint(endpoint) : undefined;
+}
+
+function resolveAzureConfig(
+	model: Model<"azure-openai-responses">,
+	options?: AzureOpenAIResponsesOptions,
+): { baseUrl?: string; endpoint?: string; apiVersion: string } {
+	const apiVersion = options?.azureApiVersion || process.env.AZURE_OPENAI_API_VERSION || DEFAULT_AZURE_API_VERSION;
+
+	const baseUrl = options?.azureBaseUrl?.trim() || undefined;
+	const endpoint = getAzureEndpoint(options);
+
+	let resolvedBaseUrl = baseUrl;
+	const resolvedEndpoint = endpoint;
+
+	if (!resolvedBaseUrl && !resolvedEndpoint && model.baseUrl) {
+		resolvedBaseUrl = model.baseUrl;
+	}
+
+	if (!resolvedBaseUrl && !resolvedEndpoint) {
+		throw new Error(
+			"Azure OpenAI endpoint is required. Set AZURE_OPENAI_ENDPOINT or AZURE_OPENAI_RESOURCE_NAME, or pass azureEndpoint, azureResourceName, azureBaseUrl, or model.baseUrl.",
+		);
+	}
+
+	return {
+		baseUrl: resolvedBaseUrl,
+		endpoint: resolvedEndpoint,
+		apiVersion,
+	};
+}
+
+function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) {
+	if (!apiKey) {
+		if (!process.env.AZURE_OPENAI_API_KEY) {
+			throw new Error(
+				"Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.",
+			);
+		}
+		apiKey = process.env.AZURE_OPENAI_API_KEY;
+	}
+
+	const headers = { ...model.headers };
+
+	if (options?.headers) {
+		Object.assign(headers, options.headers);
+	}
+
+	const { baseUrl, endpoint, apiVersion } = resolveAzureConfig(model, options);
+
+	return new AzureOpenAI({
+		apiKey,
+		apiVersion,
+		dangerouslyAllowBrowser: true,
+		defaultHeaders: headers,
+		...(baseUrl ? { baseURL: baseUrl } : { endpoint }),
+	});
+}
+
+function buildParams(
+	model: Model<"azure-openai-responses">,
+	context: Context,
+	options: AzureOpenAIResponsesOptions | undefined,
+	deploymentName: string,
+) {
+	const messages = convertMessages(model, context);
+
+	const params: ResponseCreateParamsStreaming = {
+		model: deploymentName,
+		input: messages,
+		stream: true,
+		prompt_cache_key: options?.sessionId,
+	};
+
+	if (options?.maxTokens) {
+		params.max_output_tokens = options?.maxTokens;
+	}
+
+	if (options?.temperature !== undefined) {
+		params.temperature = options?.temperature;
+	}
+
+	if (options?.serviceTier !== undefined) {
+		params.service_tier = options.serviceTier;
+	}
+
+	if (context.tools) {
+		params.tools = convertTools(context.tools);
+	}
+
+	if (model.reasoning) {
+		if (options?.reasoningEffort || options?.reasoningSummary) {
+			params.reasoning = {
+				effort: options?.reasoningEffort || "medium",
+				summary: options?.reasoningSummary || "auto",
+			};
+			params.include = ["reasoning.encrypted_content"];
+		} else {
+			if (model.name.startsWith("gpt-5")) {
+				// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
+				messages.push({
+					role: "developer",
+					content: [
+						{
+							type: "input_text",
+							text: "# Juice: 0 !important",
+						},
+					],
+				});
+			}
+		}
+	}
+
+	return params;
+}
+
+function convertMessages(model: Model<"azure-openai-responses">, context: Context): ResponseInput {
+	const messages: ResponseInput = [];
+
+	const normalizeToolCallId = (id: string): string => {
+		const allowedProviders = new Set(["openai", "openai-codex", "opencode", "azure-openai-responses"]);
+		if (!allowedProviders.has(model.provider)) return id;
+		if (!id.includes("|")) return id;
+		const [callId, itemId] = id.split("|");
+		const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_");
+		let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_");
+		// OpenAI Responses API requires item id to start with "fc"
+		if (!sanitizedItemId.startsWith("fc")) {
+			sanitizedItemId = `fc_${sanitizedItemId}`;
+		}
+		const normalizedCallId = sanitizedCallId.length > 64 ? sanitizedCallId.slice(0, 64) : sanitizedCallId;
+		const normalizedItemId = sanitizedItemId.length > 64 ? sanitizedItemId.slice(0, 64) : sanitizedItemId;
+		return `${normalizedCallId}|${normalizedItemId}`;
+	};
+
+	const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId);
+
+	if (context.systemPrompt) {
+		const role = model.reasoning ? "developer" : "system";
+		messages.push({
+			role,
+			content: sanitizeSurrogates(context.systemPrompt),
+		});
+	}
+
+	let msgIndex = 0;
+	for (const msg of transformedMessages) {
+		if (msg.role === "user") {
+			if (typeof msg.content === "string") {
+				messages.push({
+					role: "user",
+					content: [{ type: "input_text", text: sanitizeSurrogates(msg.content) }],
+				});
+			} else {
+				const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
+					if (item.type === "text") {
+						return {
+							type: "input_text",
+							text: sanitizeSurrogates(item.text),
+						} satisfies ResponseInputText;
+					} else {
+						return {
+							type: "input_image",
+							detail: "auto",
+							image_url: `data:${item.mimeType};base64,${item.data}`,
+						} satisfies ResponseInputImage;
+					}
+				});
+				const filteredContent = !model.input.includes("image")
+					? content.filter((c) => c.type !== "input_image")
+					: content;
+				if (filteredContent.length === 0) continue;
+				messages.push({
+					role: "user",
+					content: filteredContent,
+				});
+			}
+		} else if (msg.role === "assistant") {
+			const output: ResponseInput = [];
+
+			for (const block of msg.content) {
+				if (block.type === "thinking") {
+					if (block.thinkingSignature) {
+						const reasoningItem = JSON.parse(block.thinkingSignature);
+						output.push(reasoningItem);
+					}
+				} else if (block.type === "text") {
+					const textBlock = block as TextContent;
+					// OpenAI requires id to be max 64 characters
+					let msgId = textBlock.textSignature;
+					if (!msgId) {
+						msgId = `msg_${msgIndex}`;
+					} else if (msgId.length > 64) {
+						msgId = `msg_${shortHash(msgId)}`;
+					}
+					output.push({
+						type: "message",
+						role: "assistant",
+						content: [{ type: "output_text", text: sanitizeSurrogates(textBlock.text), annotations: [] }],
+						status: "completed",
+						id: msgId,
+					} satisfies ResponseOutputMessage);
+				} else if (block.type === "toolCall") {
+					const toolCall = block as ToolCall;
+					output.push({
+						type: "function_call",
+						id: toolCall.id.split("|")[1],
+						call_id: toolCall.id.split("|")[0],
+						name: toolCall.name,
+						arguments: JSON.stringify(toolCall.arguments),
+					});
+				}
+			}
+			if (output.length === 0) continue;
+			messages.push(...output);
+		} else if (msg.role === "toolResult") {
+			// Extract text and image content
+			const textResult = msg.content
+				.filter((c) => c.type === "text")
+				.map((c) => (c as any).text)
+				.join("\n");
+			const hasImages = msg.content.some((c) => c.type === "image");
+
+			// Always send function_call_output with text (or placeholder if only images)
+			const hasText = textResult.length > 0;
+			messages.push({
+				type: "function_call_output",
+				call_id: msg.toolCallId.split("|")[0],
+				output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"),
+			});
+
+			// If there are images and model supports them, send a follow-up user message with images
+			if (hasImages && model.input.includes("image")) {
+				const contentParts: ResponseInputContent[] = [];
+
+				// Add text prefix
+				contentParts.push({
+					type: "input_text",
+					text: "Attached image(s) from tool result:",
+				} satisfies ResponseInputText);
+
+				// Add images
+				for (const block of msg.content) {
+					if (block.type === "image") {
+						contentParts.push({
+							type: "input_image",
+							detail: "auto",
+							image_url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
+						} satisfies ResponseInputImage);
+					}
+				}
+
+				messages.push({
+					role: "user",
+					content: contentParts,
+				});
+			}
+		}
+		msgIndex++;
+	}
+
+	return messages;
+}
+
+function convertTools(tools: Tool[]): OpenAITool[] {
+	return tools.map((tool) => ({
+		type: "function",
+		name: tool.name,
+		description: tool.description,
+		parameters: tool.parameters as any, // TypeBox already generates JSON Schema
+		strict: false,
+	}));
+}
+
+function getServiceTierCostMultiplier(serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined): number {
+	switch (serviceTier) {
+		case "flex":
+			return 0.5;
+		case "priority":
+			return 2;
+		default:
+			return 1;
+	}
+}
+
+function applyServiceTierPricing(usage: Usage, serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined) {
+	const multiplier = getServiceTierCostMultiplier(serviceTier);
+	if (multiplier === 1) return;
+
+	usage.cost.input *= multiplier;
+	usage.cost.output *= multiplier;
+	usage.cost.cacheRead *= multiplier;
+	usage.cost.cacheWrite *= multiplier;
+	usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite;
+}
+
+function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
+	if (!status) return "stop";
+	switch (status) {
+		case "completed":
+			return "stop";
+		case "incomplete":
+			return "length";
+		case "failed":
+		case "cancelled":
+			return "error";
+		// These two are wonky ...
+		case "in_progress":
+		case "queued":
+			return "stop";
+		default: {
+			const _exhaustive: never = status;
+			throw new Error(`Unhandled stop reason: ${_exhaustive}`);
+		}
+	}
+}
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -19,6 +19,7 @@ if (typeof process !== "undefined" && (process.versions?.node || process.version
 import { supportsXhigh } from "./models.js";
 import { type BedrockOptions, streamBedrock } from "./providers/amazon-bedrock.js";
 import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic.js";
+import { type AzureOpenAIResponsesOptions, streamAzureOpenAIResponses } from "./providers/azure-openai-responses.js";
 import { type GoogleOptions, streamGoogle } from "./providers/google.js";
 import {
 	type GoogleGeminiCliOptions,
@ -118,6 +119,7 @@ export function getEnvApiKey(provider: any): string | undefined {

 	const envMap: Record<string, string> = {
 		openai: "OPENAI_API_KEY",
+		"azure-openai-responses": "AZURE_OPENAI_API_KEY",
 		google: "GEMINI_API_KEY",
 		groq: "GROQ_API_KEY",
 		cerebras: "CEREBRAS_API_KEY",
@ -165,6 +167,9 @@ export function stream<TApi extends Api>(
 		case "openai-responses":
 			return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);

+		case "azure-openai-responses":
+			return streamAzureOpenAIResponses(model as Model<"azure-openai-responses">, context, providerOptions as any);
+
 		case "openai-codex-responses":
 			return streamOpenAICodexResponses(model as Model<"openai-codex-responses">, context, providerOptions as any);

@ -350,6 +355,12 @@ function mapOptionsForApi<TApi extends Api>(
 				reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
 			} satisfies OpenAIResponsesOptions;

+		case "azure-openai-responses":
+			return {
+				...base,
+				reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
+			} satisfies AzureOpenAIResponsesOptions;
+
 		case "openai-codex-responses":
 			return {
 				...base,
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@ -1,5 +1,6 @@
 import type { BedrockOptions } from "./providers/amazon-bedrock.js";
 import type { AnthropicOptions } from "./providers/anthropic.js";
+import type { AzureOpenAIResponsesOptions } from "./providers/azure-openai-responses.js";
 import type { GoogleOptions } from "./providers/google.js";
 import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli.js";
 import type { GoogleVertexOptions } from "./providers/google-vertex.js";
@ -13,6 +14,7 @@ export type { AssistantMessageEventStream } from "./utils/event-stream.js";
 export type Api =
 	| "openai-completions"
 	| "openai-responses"
+	| "azure-openai-responses"
 	| "openai-codex-responses"
 	| "anthropic-messages"
 	| "bedrock-converse-stream"
@ -25,6 +27,7 @@ export interface ApiOptionsMap {
 	"bedrock-converse-stream": BedrockOptions;
 	"openai-completions": OpenAICompletionsOptions;
 	"openai-responses": OpenAIResponsesOptions;
+	"azure-openai-responses": AzureOpenAIResponsesOptions;
 	"openai-codex-responses": OpenAICodexResponsesOptions;
 	"google-generative-ai": GoogleOptions;
 	"google-gemini-cli": GoogleGeminiCliOptions;
@ -50,6 +53,7 @@ export type KnownProvider =
 	| "google-antigravity"
 	| "google-vertex"
 	| "openai"
+	| "azure-openai-responses"
 	| "openai-codex"
 	| "github-copilot"
 	| "xai"