From 856012296b8bcd1bd103ac7179ebb21dab430800 Mon Sep 17 00:00:00 2001 From: Markus Ylisiurunen Date: Wed, 21 Jan 2026 20:13:00 +0200 Subject: [PATCH] add Azure OpenAI Responses provider with deployment-aware model mapping --- packages/ai/CHANGELOG.md | 1 + packages/ai/README.md | 4 + packages/ai/scripts/generate-models.ts | 12 +- packages/ai/src/index.ts | 2 +- packages/ai/src/models.generated.ts | 580 +++++++++++++++ .../src/providers/azure-openai-responses.ts | 660 ++++++++++++++++++ packages/ai/src/stream.ts | 11 + packages/ai/src/types.ts | 4 + packages/ai/test/abort.test.ts | 15 + packages/ai/test/azure-utils.ts | 9 + packages/ai/test/context-overflow.test.ts | 13 + .../ai/test/cross-provider-handoff.test.ts | 1 + packages/ai/test/empty.test.ts | 23 + packages/ai/test/image-tool-result.test.ts | 15 + packages/ai/test/stream.test.ts | 23 + packages/ai/test/tokens.test.ts | 12 + .../ai/test/tool-call-without-result.test.ts | 11 + packages/ai/test/total-tokens.test.ts | 22 + packages/ai/test/unicode-surrogate.test.ts | 19 + packages/coding-agent/README.md | 3 + packages/coding-agent/src/cli/args.ts | 43 +- .../coding-agent/src/core/model-registry.ts | 2 + .../coding-agent/src/core/model-resolver.ts | 1 + 23 files changed, 1465 insertions(+), 21 deletions(-) create mode 100644 packages/ai/src/providers/azure-openai-responses.ts create mode 100644 packages/ai/test/azure-utils.ts diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index bbc3d3da..4035338b 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -13,6 +13,7 @@ - Added `headers` option to `StreamOptions` for custom HTTP headers in API requests. Supported by all providers except Amazon Bedrock (which uses AWS SDK auth). Headers are merged with provider defaults and `model.headers`, with `options.headers` taking precedence. - Added `originator` option to `loginOpenAICodex()` for custom OAuth client identification - Browser compatibility for pi-ai: replaced top-level Node.js imports with dynamic imports for browser environments ([#873](https://github.com/badlogic/pi-mono/issues/873)) +- Added `azure-openai-responses` provider support for Azure OpenAI Responses API. ### Fixed diff --git a/packages/ai/README.md b/packages/ai/README.md index c033da38..dd8d4a39 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -47,6 +47,7 @@ Unified LLM API with automatic model discovery, provider configuration, token an ## Supported Providers - **OpenAI** +- **Azure OpenAI (Responses)** - **OpenAI Codex** (ChatGPT Plus/Pro subscription, requires OAuth, see below) - **Anthropic** - **Google** @@ -874,6 +875,7 @@ In Node.js environments, you can set environment variables to avoid passing API | Provider | Environment Variable(s) | |----------|------------------------| | OpenAI | `OPENAI_API_KEY` | +| Azure OpenAI | `AZURE_OPENAI_API_KEY` + `AZURE_OPENAI_ENDPOINT` or `AZURE_OPENAI_RESOURCE_NAME` (optional `AZURE_OPENAI_API_VERSION`, `AZURE_OPENAI_DEPLOYMENT_NAME`) | | Anthropic | `ANTHROPIC_API_KEY` or `ANTHROPIC_OAUTH_TOKEN` | | Google | `GEMINI_API_KEY` | | Vertex AI | `GOOGLE_CLOUD_PROJECT` (or `GCLOUD_PROJECT`) + `GOOGLE_CLOUD_LOCATION` + ADC | @@ -1046,6 +1048,8 @@ const response = await complete(model, { **OpenAI Codex**: Requires a ChatGPT Plus or Pro subscription. Provides access to GPT-5.x Codex models with extended context windows and reasoning capabilities. The library automatically handles session-based prompt caching when `sessionId` is provided in stream options. +**Azure OpenAI (Responses)**: Uses the Responses API only. Set `AZURE_OPENAI_API_KEY` and either `AZURE_OPENAI_ENDPOINT` or `AZURE_OPENAI_RESOURCE_NAME`. Deployment names are treated as model IDs by default, override with `azureDeploymentName` or `AZURE_OPENAI_DEPLOYMENT_NAME`. Legacy deployment-based URLs are intentionally unsupported. + **GitHub Copilot**: If you get "The requested model is not supported" error, enable the model manually in VS Code: open Copilot Chat, click the model selector, select the model (warning icon), and click "Enable". **Google Gemini CLI / Antigravity**: These use Google Cloud OAuth. The `apiKey` returned by `getOAuthApiKey()` is a JSON string containing both the token and project ID, which the library handles automatically. diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index 41e34de4..f6e7af09 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -1100,6 +1100,16 @@ async function generateModels() { ]; allModels.push(...vertexModels); + const azureOpenAiModels: Model[] = allModels + .filter((model) => model.provider === "openai" && model.api === "openai-responses") + .map((model) => ({ + ...model, + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + })); + allModels.push(...azureOpenAiModels); + // Group by provider and deduplicate by model ID const providers: Record>> = {}; for (const model of allModels) { @@ -1136,7 +1146,7 @@ export const MODELS = { output += `\t\t\tname: "${model.name}",\n`; output += `\t\t\tapi: "${model.api}",\n`; output += `\t\t\tprovider: "${model.provider}",\n`; - if (model.baseUrl) { + if (model.baseUrl !== undefined) { output += `\t\t\tbaseUrl: "${model.baseUrl}",\n`; } if (model.headers) { diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts index a8cf6bcf..a496335f 100644 --- a/packages/ai/src/index.ts +++ b/packages/ai/src/index.ts @@ -1,9 +1,9 @@ export * from "./models.js"; export * from "./providers/anthropic.js"; +export * from "./providers/azure-openai-responses.js"; export * from "./providers/google.js"; export * from "./providers/google-gemini-cli.js"; export * from "./providers/google-vertex.js"; - export * from "./providers/openai-completions.js"; export * from "./providers/openai-responses.js"; export * from "./stream.js"; diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index f41f8c41..803e67b9 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -1300,6 +1300,586 @@ export const MODELS = { maxTokens: 64000, } satisfies Model<"anthropic-messages">, }, + "azure-openai-responses": { + "codex-mini-latest": { + id: "codex-mini-latest", + name: "Codex Mini", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text"], + cost: { + input: 1.5, + output: 6, + cacheRead: 0.375, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"azure-openai-responses">, + "gpt-4": { + id: "gpt-4", + name: "GPT-4", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text"], + cost: { + input: 30, + output: 60, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 8192, + } satisfies Model<"azure-openai-responses">, + "gpt-4-turbo": { + id: "gpt-4-turbo", + name: "GPT-4 Turbo", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 10, + output: 30, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"azure-openai-responses">, + "gpt-4.1": { + id: "gpt-4.1", + name: "GPT-4.1", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 8, + cacheRead: 0.5, + cacheWrite: 0, + }, + contextWindow: 1047576, + maxTokens: 32768, + } satisfies Model<"azure-openai-responses">, + "gpt-4.1-mini": { + id: "gpt-4.1-mini", + name: "GPT-4.1 mini", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.4, + output: 1.6, + cacheRead: 0.1, + cacheWrite: 0, + }, + contextWindow: 1047576, + maxTokens: 32768, + } satisfies Model<"azure-openai-responses">, + "gpt-4.1-nano": { + id: "gpt-4.1-nano", + name: "GPT-4.1 nano", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.1, + output: 0.4, + cacheRead: 0.03, + cacheWrite: 0, + }, + contextWindow: 1047576, + maxTokens: 32768, + } satisfies Model<"azure-openai-responses">, + "gpt-4o": { + id: "gpt-4o", + name: "GPT-4o", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2.5, + output: 10, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"azure-openai-responses">, + "gpt-4o-2024-05-13": { + id: "gpt-4o-2024-05-13", + name: "GPT-4o (2024-05-13)", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 5, + output: 15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"azure-openai-responses">, + "gpt-4o-2024-08-06": { + id: "gpt-4o-2024-08-06", + name: "GPT-4o (2024-08-06)", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2.5, + output: 10, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"azure-openai-responses">, + "gpt-4o-2024-11-20": { + id: "gpt-4o-2024-11-20", + name: "GPT-4o (2024-11-20)", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2.5, + output: 10, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"azure-openai-responses">, + "gpt-4o-mini": { + id: "gpt-4o-mini", + name: "GPT-4o mini", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0.08, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"azure-openai-responses">, + "gpt-5": { + id: "gpt-5", + name: "GPT-5", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5-chat-latest": { + id: "gpt-5-chat-latest", + name: "GPT-5 Chat Latest", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: false, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"azure-openai-responses">, + "gpt-5-codex": { + id: "gpt-5-codex", + name: "GPT-5-Codex", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5-mini": { + id: "gpt-5-mini", + name: "GPT-5 Mini", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.25, + output: 2, + cacheRead: 0.025, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5-nano": { + id: "gpt-5-nano", + name: "GPT-5 Nano", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.05, + output: 0.4, + cacheRead: 0.005, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5-pro": { + id: "gpt-5-pro", + name: "GPT-5 Pro", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 120, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 272000, + } satisfies Model<"azure-openai-responses">, + "gpt-5.1": { + id: "gpt-5.1", + name: "GPT-5.1", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.13, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5.1-chat-latest": { + id: "gpt-5.1-chat-latest", + name: "GPT-5.1 Chat", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"azure-openai-responses">, + "gpt-5.1-codex": { + id: "gpt-5.1-codex", + name: "GPT-5.1 Codex", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5.1-codex-max": { + id: "gpt-5.1-codex-max", + name: "GPT-5.1 Codex Max", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5.1-codex-mini": { + id: "gpt-5.1-codex-mini", + name: "GPT-5.1 Codex mini", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.25, + output: 2, + cacheRead: 0.025, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5.2": { + id: "gpt-5.2", + name: "GPT-5.2", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5.2-chat-latest": { + id: "gpt-5.2-chat-latest", + name: "GPT-5.2 Chat", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"azure-openai-responses">, + "gpt-5.2-codex": { + id: "gpt-5.2-codex", + name: "GPT-5.2 Codex", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "gpt-5.2-pro": { + id: "gpt-5.2-pro", + name: "GPT-5.2 Pro", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 21, + output: 168, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"azure-openai-responses">, + "o1": { + id: "o1", + name: "o1", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 60, + cacheRead: 7.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"azure-openai-responses">, + "o1-pro": { + id: "o1-pro", + name: "o1-pro", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 150, + output: 600, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"azure-openai-responses">, + "o3": { + id: "o3", + name: "o3", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 8, + cacheRead: 0.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"azure-openai-responses">, + "o3-deep-research": { + id: "o3-deep-research", + name: "o3-deep-research", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 10, + output: 40, + cacheRead: 2.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"azure-openai-responses">, + "o3-mini": { + id: "o3-mini", + name: "o3-mini", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text"], + cost: { + input: 1.1, + output: 4.4, + cacheRead: 0.55, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"azure-openai-responses">, + "o3-pro": { + id: "o3-pro", + name: "o3-pro", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 20, + output: 80, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"azure-openai-responses">, + "o4-mini": { + id: "o4-mini", + name: "o4-mini", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.1, + output: 4.4, + cacheRead: 0.28, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"azure-openai-responses">, + "o4-mini-deep-research": { + id: "o4-mini-deep-research", + name: "o4-mini-deep-research", + api: "azure-openai-responses", + provider: "azure-openai-responses", + baseUrl: "", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 8, + cacheRead: 0.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"azure-openai-responses">, + }, "cerebras": { "gpt-oss-120b": { id: "gpt-oss-120b", diff --git a/packages/ai/src/providers/azure-openai-responses.ts b/packages/ai/src/providers/azure-openai-responses.ts new file mode 100644 index 00000000..c6d89a4e --- /dev/null +++ b/packages/ai/src/providers/azure-openai-responses.ts @@ -0,0 +1,660 @@ +import type OpenAI from "openai"; +import { AzureOpenAI } from "openai"; +import type { + Tool as OpenAITool, + ResponseCreateParamsStreaming, + ResponseFunctionToolCall, + ResponseInput, + ResponseInputContent, + ResponseInputImage, + ResponseInputText, + ResponseOutputMessage, + ResponseReasoningItem, +} from "openai/resources/responses/responses.js"; +import { calculateCost } from "../models.js"; +import { getEnvApiKey } from "../stream.js"; +import type { + Api, + AssistantMessage, + Context, + Model, + StopReason, + StreamFunction, + StreamOptions, + TextContent, + ThinkingContent, + Tool, + ToolCall, + Usage, +} from "../types.js"; +import { AssistantMessageEventStream } from "../utils/event-stream.js"; +import { parseStreamingJson } from "../utils/json-parse.js"; +import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; +import { transformMessages } from "./transform-messages.js"; + +/** Fast deterministic hash to shorten long strings */ +function shortHash(str: string): string { + let h1 = 0xdeadbeef; + let h2 = 0x41c6ce57; + for (let i = 0; i < str.length; i++) { + const ch = str.charCodeAt(i); + h1 = Math.imul(h1 ^ ch, 2654435761); + h2 = Math.imul(h2 ^ ch, 1597334677); + } + h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507) ^ Math.imul(h2 ^ (h2 >>> 13), 3266489909); + h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507) ^ Math.imul(h1 ^ (h1 >>> 13), 3266489909); + return (h2 >>> 0).toString(36) + (h1 >>> 0).toString(36); +} + +const DEFAULT_AZURE_API_VERSION = "2025-04-01-preview"; + +// Azure OpenAI Responses-specific options +export interface AzureOpenAIResponsesOptions extends StreamOptions { + reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh"; + reasoningSummary?: "auto" | "detailed" | "concise" | null; + serviceTier?: ResponseCreateParamsStreaming["service_tier"]; + azureApiVersion?: string; + azureEndpoint?: string; + azureResourceName?: string; + azureBaseUrl?: string; + azureDeploymentName?: string; +} + +/** + * Generate function for Azure OpenAI Responses API + */ +export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"> = ( + model: Model<"azure-openai-responses">, + context: Context, + options?: AzureOpenAIResponsesOptions, +): AssistantMessageEventStream => { + const stream = new AssistantMessageEventStream(); + + // Start async processing + (async () => { + const deploymentName = options?.azureDeploymentName || process.env.AZURE_OPENAI_DEPLOYMENT_NAME || model.id; + + const output: AssistantMessage = { + role: "assistant", + content: [], + api: "azure-openai-responses" as Api, + provider: model.provider, + model: deploymentName, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: Date.now(), + }; + + try { + // Create Azure OpenAI client + const apiKey = options?.apiKey || getEnvApiKey(model.provider) || ""; + const client = createClient(model, apiKey, options); + const params = buildParams(model, context, options, deploymentName); + options?.onPayload?.(params); + const openaiStream = await client.responses.create( + params, + options?.signal ? { signal: options.signal } : undefined, + ); + stream.push({ type: "start", partial: output }); + + let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null; + let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null; + const blocks = output.content; + const blockIndex = () => blocks.length - 1; + + for await (const event of openaiStream) { + // Handle output item start + if (event.type === "response.output_item.added") { + const item = event.item; + if (item.type === "reasoning") { + currentItem = item; + currentBlock = { type: "thinking", thinking: "" }; + output.content.push(currentBlock); + stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output }); + } else if (item.type === "message") { + currentItem = item; + currentBlock = { type: "text", text: "" }; + output.content.push(currentBlock); + stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output }); + } else if (item.type === "function_call") { + currentItem = item; + currentBlock = { + type: "toolCall", + id: `${item.call_id}|${item.id}`, + name: item.name, + arguments: {}, + partialJson: item.arguments || "", + }; + output.content.push(currentBlock); + stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output }); + } + } + // Handle reasoning summary deltas + else if (event.type === "response.reasoning_summary_part.added") { + if (currentItem && currentItem.type === "reasoning") { + currentItem.summary = currentItem.summary || []; + currentItem.summary.push(event.part); + } + } else if (event.type === "response.reasoning_summary_text.delta") { + if ( + currentItem && + currentItem.type === "reasoning" && + currentBlock && + currentBlock.type === "thinking" + ) { + currentItem.summary = currentItem.summary || []; + const lastPart = currentItem.summary[currentItem.summary.length - 1]; + if (lastPart) { + currentBlock.thinking += event.delta; + lastPart.text += event.delta; + stream.push({ + type: "thinking_delta", + contentIndex: blockIndex(), + delta: event.delta, + partial: output, + }); + } + } + } + // Add a new line between summary parts (hack...) + else if (event.type === "response.reasoning_summary_part.done") { + if ( + currentItem && + currentItem.type === "reasoning" && + currentBlock && + currentBlock.type === "thinking" + ) { + currentItem.summary = currentItem.summary || []; + const lastPart = currentItem.summary[currentItem.summary.length - 1]; + if (lastPart) { + currentBlock.thinking += "\n\n"; + lastPart.text += "\n\n"; + stream.push({ + type: "thinking_delta", + contentIndex: blockIndex(), + delta: "\n\n", + partial: output, + }); + } + } + } + // Handle text output deltas + else if (event.type === "response.content_part.added") { + if (currentItem && currentItem.type === "message") { + currentItem.content = currentItem.content || []; + // Filter out ReasoningText, only accept output_text and refusal + if (event.part.type === "output_text" || event.part.type === "refusal") { + currentItem.content.push(event.part); + } + } + } else if (event.type === "response.output_text.delta") { + if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") { + const lastPart = currentItem.content[currentItem.content.length - 1]; + if (lastPart && lastPart.type === "output_text") { + currentBlock.text += event.delta; + lastPart.text += event.delta; + stream.push({ + type: "text_delta", + contentIndex: blockIndex(), + delta: event.delta, + partial: output, + }); + } + } + } else if (event.type === "response.refusal.delta") { + if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") { + const lastPart = currentItem.content[currentItem.content.length - 1]; + if (lastPart && lastPart.type === "refusal") { + currentBlock.text += event.delta; + lastPart.refusal += event.delta; + stream.push({ + type: "text_delta", + contentIndex: blockIndex(), + delta: event.delta, + partial: output, + }); + } + } + } + // Handle function call argument deltas + else if (event.type === "response.function_call_arguments.delta") { + if ( + currentItem && + currentItem.type === "function_call" && + currentBlock && + currentBlock.type === "toolCall" + ) { + currentBlock.partialJson += event.delta; + currentBlock.arguments = parseStreamingJson(currentBlock.partialJson); + stream.push({ + type: "toolcall_delta", + contentIndex: blockIndex(), + delta: event.delta, + partial: output, + }); + } + } + // Handle output item completion + else if (event.type === "response.output_item.done") { + const item = event.item; + + if (item.type === "reasoning" && currentBlock && currentBlock.type === "thinking") { + currentBlock.thinking = item.summary?.map((s) => s.text).join("\n\n") || ""; + currentBlock.thinkingSignature = JSON.stringify(item); + stream.push({ + type: "thinking_end", + contentIndex: blockIndex(), + content: currentBlock.thinking, + partial: output, + }); + currentBlock = null; + } else if (item.type === "message" && currentBlock && currentBlock.type === "text") { + currentBlock.text = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""); + currentBlock.textSignature = item.id; + stream.push({ + type: "text_end", + contentIndex: blockIndex(), + content: currentBlock.text, + partial: output, + }); + currentBlock = null; + } else if (item.type === "function_call") { + const toolCall: ToolCall = { + type: "toolCall", + id: `${item.call_id}|${item.id}`, + name: item.name, + arguments: JSON.parse(item.arguments), + }; + + stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output }); + } + } + // Handle completion + else if (event.type === "response.completed") { + const response = event.response; + if (response?.usage) { + const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0; + output.usage = { + // OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input + input: (response.usage.input_tokens || 0) - cachedTokens, + output: response.usage.output_tokens || 0, + cacheRead: cachedTokens, + cacheWrite: 0, + totalTokens: response.usage.total_tokens || 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }; + } + calculateCost(model, output.usage); + applyServiceTierPricing(output.usage, response?.service_tier ?? options?.serviceTier); + // Map status to stop reason + output.stopReason = mapStopReason(response?.status); + if (output.content.some((b) => b.type === "toolCall") && output.stopReason === "stop") { + output.stopReason = "toolUse"; + } + } + // Handle errors + else if (event.type === "error") { + throw new Error(`Error Code ${event.code}: ${event.message}` || "Unknown error"); + } else if (event.type === "response.failed") { + throw new Error("Unknown error"); + } + } + + if (options?.signal?.aborted) { + throw new Error("Request was aborted"); + } + + if (output.stopReason === "aborted" || output.stopReason === "error") { + throw new Error("An unkown error ocurred"); + } + + stream.push({ type: "done", reason: output.stopReason, message: output }); + stream.end(); + } catch (error) { + for (const block of output.content) delete (block as any).index; + output.stopReason = options?.signal?.aborted ? "aborted" : "error"; + output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error); + stream.push({ type: "error", reason: output.stopReason, error: output }); + stream.end(); + } + })(); + + return stream; +}; + +function normalizeAzureEndpoint(endpoint: string): string { + return endpoint.replace(/\/+$/, ""); +} + +function getAzureEndpoint(options?: AzureOpenAIResponsesOptions): string | undefined { + const endpoint = + options?.azureEndpoint || + (options?.azureResourceName ? `https://${options.azureResourceName}.openai.azure.com` : undefined) || + process.env.AZURE_OPENAI_ENDPOINT || + (process.env.AZURE_OPENAI_RESOURCE_NAME + ? `https://${process.env.AZURE_OPENAI_RESOURCE_NAME}.openai.azure.com` + : undefined); + + return endpoint ? normalizeAzureEndpoint(endpoint) : undefined; +} + +function resolveAzureConfig( + model: Model<"azure-openai-responses">, + options?: AzureOpenAIResponsesOptions, +): { baseUrl?: string; endpoint?: string; apiVersion: string } { + const apiVersion = options?.azureApiVersion || process.env.AZURE_OPENAI_API_VERSION || DEFAULT_AZURE_API_VERSION; + + const baseUrl = options?.azureBaseUrl?.trim() || undefined; + const endpoint = getAzureEndpoint(options); + + let resolvedBaseUrl = baseUrl; + const resolvedEndpoint = endpoint; + + if (!resolvedBaseUrl && !resolvedEndpoint && model.baseUrl) { + resolvedBaseUrl = model.baseUrl; + } + + if (!resolvedBaseUrl && !resolvedEndpoint) { + throw new Error( + "Azure OpenAI endpoint is required. Set AZURE_OPENAI_ENDPOINT or AZURE_OPENAI_RESOURCE_NAME, or pass azureEndpoint, azureResourceName, azureBaseUrl, or model.baseUrl.", + ); + } + + return { + baseUrl: resolvedBaseUrl, + endpoint: resolvedEndpoint, + apiVersion, + }; +} + +function createClient(model: Model<"azure-openai-responses">, apiKey: string, options?: AzureOpenAIResponsesOptions) { + if (!apiKey) { + if (!process.env.AZURE_OPENAI_API_KEY) { + throw new Error( + "Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY environment variable or pass it as an argument.", + ); + } + apiKey = process.env.AZURE_OPENAI_API_KEY; + } + + const headers = { ...model.headers }; + + if (options?.headers) { + Object.assign(headers, options.headers); + } + + const { baseUrl, endpoint, apiVersion } = resolveAzureConfig(model, options); + + return new AzureOpenAI({ + apiKey, + apiVersion, + dangerouslyAllowBrowser: true, + defaultHeaders: headers, + ...(baseUrl ? { baseURL: baseUrl } : { endpoint }), + }); +} + +function buildParams( + model: Model<"azure-openai-responses">, + context: Context, + options: AzureOpenAIResponsesOptions | undefined, + deploymentName: string, +) { + const messages = convertMessages(model, context); + + const params: ResponseCreateParamsStreaming = { + model: deploymentName, + input: messages, + stream: true, + prompt_cache_key: options?.sessionId, + }; + + if (options?.maxTokens) { + params.max_output_tokens = options?.maxTokens; + } + + if (options?.temperature !== undefined) { + params.temperature = options?.temperature; + } + + if (options?.serviceTier !== undefined) { + params.service_tier = options.serviceTier; + } + + if (context.tools) { + params.tools = convertTools(context.tools); + } + + if (model.reasoning) { + if (options?.reasoningEffort || options?.reasoningSummary) { + params.reasoning = { + effort: options?.reasoningEffort || "medium", + summary: options?.reasoningSummary || "auto", + }; + params.include = ["reasoning.encrypted_content"]; + } else { + if (model.name.startsWith("gpt-5")) { + // Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7 + messages.push({ + role: "developer", + content: [ + { + type: "input_text", + text: "# Juice: 0 !important", + }, + ], + }); + } + } + } + + return params; +} + +function convertMessages(model: Model<"azure-openai-responses">, context: Context): ResponseInput { + const messages: ResponseInput = []; + + const normalizeToolCallId = (id: string): string => { + const allowedProviders = new Set(["openai", "openai-codex", "opencode", "azure-openai-responses"]); + if (!allowedProviders.has(model.provider)) return id; + if (!id.includes("|")) return id; + const [callId, itemId] = id.split("|"); + const sanitizedCallId = callId.replace(/[^a-zA-Z0-9_-]/g, "_"); + let sanitizedItemId = itemId.replace(/[^a-zA-Z0-9_-]/g, "_"); + // OpenAI Responses API requires item id to start with "fc" + if (!sanitizedItemId.startsWith("fc")) { + sanitizedItemId = `fc_${sanitizedItemId}`; + } + const normalizedCallId = sanitizedCallId.length > 64 ? sanitizedCallId.slice(0, 64) : sanitizedCallId; + const normalizedItemId = sanitizedItemId.length > 64 ? sanitizedItemId.slice(0, 64) : sanitizedItemId; + return `${normalizedCallId}|${normalizedItemId}`; + }; + + const transformedMessages = transformMessages(context.messages, model, normalizeToolCallId); + + if (context.systemPrompt) { + const role = model.reasoning ? "developer" : "system"; + messages.push({ + role, + content: sanitizeSurrogates(context.systemPrompt), + }); + } + + let msgIndex = 0; + for (const msg of transformedMessages) { + if (msg.role === "user") { + if (typeof msg.content === "string") { + messages.push({ + role: "user", + content: [{ type: "input_text", text: sanitizeSurrogates(msg.content) }], + }); + } else { + const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => { + if (item.type === "text") { + return { + type: "input_text", + text: sanitizeSurrogates(item.text), + } satisfies ResponseInputText; + } else { + return { + type: "input_image", + detail: "auto", + image_url: `data:${item.mimeType};base64,${item.data}`, + } satisfies ResponseInputImage; + } + }); + const filteredContent = !model.input.includes("image") + ? content.filter((c) => c.type !== "input_image") + : content; + if (filteredContent.length === 0) continue; + messages.push({ + role: "user", + content: filteredContent, + }); + } + } else if (msg.role === "assistant") { + const output: ResponseInput = []; + + for (const block of msg.content) { + if (block.type === "thinking") { + if (block.thinkingSignature) { + const reasoningItem = JSON.parse(block.thinkingSignature); + output.push(reasoningItem); + } + } else if (block.type === "text") { + const textBlock = block as TextContent; + // OpenAI requires id to be max 64 characters + let msgId = textBlock.textSignature; + if (!msgId) { + msgId = `msg_${msgIndex}`; + } else if (msgId.length > 64) { + msgId = `msg_${shortHash(msgId)}`; + } + output.push({ + type: "message", + role: "assistant", + content: [{ type: "output_text", text: sanitizeSurrogates(textBlock.text), annotations: [] }], + status: "completed", + id: msgId, + } satisfies ResponseOutputMessage); + } else if (block.type === "toolCall") { + const toolCall = block as ToolCall; + output.push({ + type: "function_call", + id: toolCall.id.split("|")[1], + call_id: toolCall.id.split("|")[0], + name: toolCall.name, + arguments: JSON.stringify(toolCall.arguments), + }); + } + } + if (output.length === 0) continue; + messages.push(...output); + } else if (msg.role === "toolResult") { + // Extract text and image content + const textResult = msg.content + .filter((c) => c.type === "text") + .map((c) => (c as any).text) + .join("\n"); + const hasImages = msg.content.some((c) => c.type === "image"); + + // Always send function_call_output with text (or placeholder if only images) + const hasText = textResult.length > 0; + messages.push({ + type: "function_call_output", + call_id: msg.toolCallId.split("|")[0], + output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"), + }); + + // If there are images and model supports them, send a follow-up user message with images + if (hasImages && model.input.includes("image")) { + const contentParts: ResponseInputContent[] = []; + + // Add text prefix + contentParts.push({ + type: "input_text", + text: "Attached image(s) from tool result:", + } satisfies ResponseInputText); + + // Add images + for (const block of msg.content) { + if (block.type === "image") { + contentParts.push({ + type: "input_image", + detail: "auto", + image_url: `data:${(block as any).mimeType};base64,${(block as any).data}`, + } satisfies ResponseInputImage); + } + } + + messages.push({ + role: "user", + content: contentParts, + }); + } + } + msgIndex++; + } + + return messages; +} + +function convertTools(tools: Tool[]): OpenAITool[] { + return tools.map((tool) => ({ + type: "function", + name: tool.name, + description: tool.description, + parameters: tool.parameters as any, // TypeBox already generates JSON Schema + strict: false, + })); +} + +function getServiceTierCostMultiplier(serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined): number { + switch (serviceTier) { + case "flex": + return 0.5; + case "priority": + return 2; + default: + return 1; + } +} + +function applyServiceTierPricing(usage: Usage, serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined) { + const multiplier = getServiceTierCostMultiplier(serviceTier); + if (multiplier === 1) return; + + usage.cost.input *= multiplier; + usage.cost.output *= multiplier; + usage.cost.cacheRead *= multiplier; + usage.cost.cacheWrite *= multiplier; + usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite; +} + +function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason { + if (!status) return "stop"; + switch (status) { + case "completed": + return "stop"; + case "incomplete": + return "length"; + case "failed": + case "cancelled": + return "error"; + // These two are wonky ... + case "in_progress": + case "queued": + return "stop"; + default: { + const _exhaustive: never = status; + throw new Error(`Unhandled stop reason: ${_exhaustive}`); + } + } +} diff --git a/packages/ai/src/stream.ts b/packages/ai/src/stream.ts index ab71c964..aecb334d 100644 --- a/packages/ai/src/stream.ts +++ b/packages/ai/src/stream.ts @@ -19,6 +19,7 @@ if (typeof process !== "undefined" && (process.versions?.node || process.version import { supportsXhigh } from "./models.js"; import { type BedrockOptions, streamBedrock } from "./providers/amazon-bedrock.js"; import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic.js"; +import { type AzureOpenAIResponsesOptions, streamAzureOpenAIResponses } from "./providers/azure-openai-responses.js"; import { type GoogleOptions, streamGoogle } from "./providers/google.js"; import { type GoogleGeminiCliOptions, @@ -118,6 +119,7 @@ export function getEnvApiKey(provider: any): string | undefined { const envMap: Record = { openai: "OPENAI_API_KEY", + "azure-openai-responses": "AZURE_OPENAI_API_KEY", google: "GEMINI_API_KEY", groq: "GROQ_API_KEY", cerebras: "CEREBRAS_API_KEY", @@ -165,6 +167,9 @@ export function stream( case "openai-responses": return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any); + case "azure-openai-responses": + return streamAzureOpenAIResponses(model as Model<"azure-openai-responses">, context, providerOptions as any); + case "openai-codex-responses": return streamOpenAICodexResponses(model as Model<"openai-codex-responses">, context, providerOptions as any); @@ -350,6 +355,12 @@ function mapOptionsForApi( reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning), } satisfies OpenAIResponsesOptions; + case "azure-openai-responses": + return { + ...base, + reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning), + } satisfies AzureOpenAIResponsesOptions; + case "openai-codex-responses": return { ...base, diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index 4463ec5f..024416fb 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -1,5 +1,6 @@ import type { BedrockOptions } from "./providers/amazon-bedrock.js"; import type { AnthropicOptions } from "./providers/anthropic.js"; +import type { AzureOpenAIResponsesOptions } from "./providers/azure-openai-responses.js"; import type { GoogleOptions } from "./providers/google.js"; import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli.js"; import type { GoogleVertexOptions } from "./providers/google-vertex.js"; @@ -13,6 +14,7 @@ export type { AssistantMessageEventStream } from "./utils/event-stream.js"; export type Api = | "openai-completions" | "openai-responses" + | "azure-openai-responses" | "openai-codex-responses" | "anthropic-messages" | "bedrock-converse-stream" @@ -25,6 +27,7 @@ export interface ApiOptionsMap { "bedrock-converse-stream": BedrockOptions; "openai-completions": OpenAICompletionsOptions; "openai-responses": OpenAIResponsesOptions; + "azure-openai-responses": AzureOpenAIResponsesOptions; "openai-codex-responses": OpenAICodexResponsesOptions; "google-generative-ai": GoogleOptions; "google-gemini-cli": GoogleGeminiCliOptions; @@ -50,6 +53,7 @@ export type KnownProvider = | "google-antigravity" | "google-vertex" | "openai" + | "azure-openai-responses" | "openai-codex" | "github-copilot" | "xai" diff --git a/packages/ai/test/abort.test.ts b/packages/ai/test/abort.test.ts index 74486e9c..a0bf06b9 100644 --- a/packages/ai/test/abort.test.ts +++ b/packages/ai/test/abort.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; import { complete, stream } from "../src/stream.js"; import type { Api, Context, Model, OptionsForApi } from "../src/types.js"; +import { hasAzureOpenAICredentials } from "./azure-utils.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; import { resolveApiKey } from "./oauth.js"; @@ -139,6 +140,20 @@ describe("AI Providers Abort Tests", () => { }); }); + describe.skipIf(!hasAzureOpenAICredentials())("Azure OpenAI Responses Provider Abort", () => { + const llm = getModel("azure-openai-responses", "gpt-4o-mini"); + const azureDeploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME; + const azureOptions = azureDeploymentName ? { azureDeploymentName } : {}; + + it("should abort mid-stream", { retry: 3 }, async () => { + await testAbortSignal(llm, azureOptions); + }); + + it("should handle immediate abort", { retry: 3 }, async () => { + await testImmediateAbort(llm, azureOptions); + }); + }); + describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Abort", () => { const llm = getModel("anthropic", "claude-opus-4-1-20250805"); diff --git a/packages/ai/test/azure-utils.ts b/packages/ai/test/azure-utils.ts new file mode 100644 index 00000000..c000802c --- /dev/null +++ b/packages/ai/test/azure-utils.ts @@ -0,0 +1,9 @@ +/** + * Utility functions for Azure OpenAI tests + */ + +export function hasAzureOpenAICredentials(): boolean { + const hasKey = !!process.env.AZURE_OPENAI_API_KEY; + const hasEndpoint = !!(process.env.AZURE_OPENAI_ENDPOINT || process.env.AZURE_OPENAI_RESOURCE_NAME); + return hasKey && hasEndpoint; +} diff --git a/packages/ai/test/context-overflow.test.ts b/packages/ai/test/context-overflow.test.ts index fae10364..fde846f7 100644 --- a/packages/ai/test/context-overflow.test.ts +++ b/packages/ai/test/context-overflow.test.ts @@ -18,6 +18,7 @@ import { getModel } from "../src/models.js"; import { complete } from "../src/stream.js"; import type { AssistantMessage, Context, Model, Usage } from "../src/types.js"; import { isContextOverflow } from "../src/utils/overflow.js"; +import { hasAzureOpenAICredentials } from "./azure-utils.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; import { resolveApiKey } from "./oauth.js"; @@ -189,6 +190,18 @@ describe("Context overflow error handling", () => { }, 120000); }); + describe.skipIf(!hasAzureOpenAICredentials())("Azure OpenAI Responses", () => { + it("gpt-4o-mini - should detect overflow via isContextOverflow", async () => { + const model = getModel("azure-openai-responses", "gpt-4o-mini"); + const result = await testContextOverflow(model, process.env.AZURE_OPENAI_API_KEY!); + logResult(result); + + expect(result.stopReason).toBe("error"); + expect(result.errorMessage).toMatch(/context|maximum/i); + expect(isContextOverflow(result.response, model.contextWindow)).toBe(true); + }, 120000); + }); + // ============================================================================= // Google // Expected pattern: "input token count (X) exceeds the maximum" diff --git a/packages/ai/test/cross-provider-handoff.test.ts b/packages/ai/test/cross-provider-handoff.test.ts index e8325627..acb4d89c 100644 --- a/packages/ai/test/cross-provider-handoff.test.ts +++ b/packages/ai/test/cross-provider-handoff.test.ts @@ -62,6 +62,7 @@ const PROVIDER_MODEL_PAIRS: ProviderModelPair[] = [ apiOverride: "openai-completions", }, { provider: "openai", model: "gpt-5-mini", label: "openai-responses-gpt-5-mini" }, + { provider: "azure-openai-responses", model: "gpt-4o-mini", label: "azure-openai-responses-gpt-4o-mini" }, // OpenAI Codex { provider: "openai-codex", model: "gpt-5.2-codex", label: "openai-codex-gpt-5.2-codex" }, // Google Antigravity diff --git a/packages/ai/test/empty.test.ts b/packages/ai/test/empty.test.ts index 12415f6c..fb7d0e98 100644 --- a/packages/ai/test/empty.test.ts +++ b/packages/ai/test/empty.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; import { complete } from "../src/stream.js"; import type { Api, AssistantMessage, Context, Model, OptionsForApi, UserMessage } from "../src/types.js"; +import { hasAzureOpenAICredentials } from "./azure-utils.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; import { resolveApiKey } from "./oauth.js"; @@ -202,6 +203,28 @@ describe("AI Providers Empty Message Tests", () => { }); }); + describe.skipIf(!hasAzureOpenAICredentials())("Azure OpenAI Responses Provider Empty Messages", () => { + const llm = getModel("azure-openai-responses", "gpt-4o-mini"); + const azureDeploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME; + const azureOptions = azureDeploymentName ? { azureDeploymentName } : {}; + + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { + await testEmptyMessage(llm, azureOptions); + }); + + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { + await testEmptyStringMessage(llm, azureOptions); + }); + + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { + await testWhitespaceOnlyMessage(llm, azureOptions); + }); + + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { + await testEmptyAssistantMessage(llm, azureOptions); + }); + }); + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Empty Messages", () => { const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); diff --git a/packages/ai/test/image-tool-result.test.ts b/packages/ai/test/image-tool-result.test.ts index 144d6cb5..7fb8b93c 100644 --- a/packages/ai/test/image-tool-result.test.ts +++ b/packages/ai/test/image-tool-result.test.ts @@ -5,6 +5,7 @@ import { describe, expect, it } from "vitest"; import type { Api, Context, Model, Tool, ToolResultMessage } from "../src/index.js"; import { complete, getModel } from "../src/index.js"; import type { OptionsForApi } from "../src/types.js"; +import { hasAzureOpenAICredentials } from "./azure-utils.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; import { resolveApiKey } from "./oauth.js"; @@ -243,6 +244,20 @@ describe("Tool Results with Images", () => { }); }); + describe.skipIf(!hasAzureOpenAICredentials())("Azure OpenAI Responses Provider (gpt-4o-mini)", () => { + const llm = getModel("azure-openai-responses", "gpt-4o-mini"); + const azureDeploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME; + const azureOptions = azureDeploymentName ? { azureDeploymentName } : {}; + + it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => { + await handleToolWithImageResult(llm, azureOptions); + }); + + it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => { + await handleToolWithTextAndImageResult(llm, azureOptions); + }); + }); + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-haiku-4-5)", () => { const model = getModel("anthropic", "claude-haiku-4-5"); diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts index 2a140292..2ad803c2 100644 --- a/packages/ai/test/stream.test.ts +++ b/packages/ai/test/stream.test.ts @@ -8,6 +8,7 @@ import { getModel } from "../src/models.js"; import { complete, stream } from "../src/stream.js"; import type { Api, Context, ImageContent, Model, OptionsForApi, Tool, ToolResultMessage } from "../src/types.js"; import { StringEnum } from "../src/utils/typebox-helpers.js"; +import { hasAzureOpenAICredentials } from "./azure-utils.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; import { resolveApiKey } from "./oauth.js"; @@ -506,6 +507,28 @@ describe("Generate E2E Tests", () => { }); }); + describe.skipIf(!hasAzureOpenAICredentials())("Azure OpenAI Responses Provider (gpt-4o-mini)", () => { + const llm = getModel("azure-openai-responses", "gpt-4o-mini"); + const azureDeploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME; + const azureOptions = azureDeploymentName ? { azureDeploymentName } : {}; + + it("should complete basic text generation", { retry: 3 }, async () => { + await basicTextGeneration(llm, azureOptions); + }); + + it("should handle tool calling", { retry: 3 }, async () => { + await handleToolCall(llm, azureOptions); + }); + + it("should handle streaming", { retry: 3 }, async () => { + await handleStreaming(llm, azureOptions); + }); + + it("should handle image input", { retry: 3 }, async () => { + await handleImage(llm, azureOptions); + }); + }); + describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-code-fast-1 via OpenAI Completions)", () => { const llm = getModel("xai", "grok-code-fast-1"); diff --git a/packages/ai/test/tokens.test.ts b/packages/ai/test/tokens.test.ts index dc897bb5..dd5584a9 100644 --- a/packages/ai/test/tokens.test.ts +++ b/packages/ai/test/tokens.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; import { stream } from "../src/stream.js"; import type { Api, Context, Model, OptionsForApi } from "../src/types.js"; +import { hasAzureOpenAICredentials } from "./azure-utils.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; import { resolveApiKey } from "./oauth.js"; @@ -52,6 +53,7 @@ async function testTokensOnAbort(llm: Model, options: Op if ( llm.api === "openai-completions" || llm.api === "openai-responses" || + llm.api === "azure-openai-responses" || llm.api === "openai-codex-responses" || llm.provider === "google-gemini-cli" || llm.provider === "zai" || @@ -107,6 +109,16 @@ describe("Token Statistics on Abort", () => { }); }); + describe.skipIf(!hasAzureOpenAICredentials())("Azure OpenAI Responses Provider", () => { + const llm = getModel("azure-openai-responses", "gpt-4o-mini"); + const azureDeploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME; + const azureOptions = azureDeploymentName ? { azureDeploymentName } : {}; + + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { + await testTokensOnAbort(llm, azureOptions); + }); + }); + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => { const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); diff --git a/packages/ai/test/tool-call-without-result.test.ts b/packages/ai/test/tool-call-without-result.test.ts index 8cc61e90..500a7588 100644 --- a/packages/ai/test/tool-call-without-result.test.ts +++ b/packages/ai/test/tool-call-without-result.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; import { complete } from "../src/stream.js"; import type { Api, Context, Model, OptionsForApi, Tool } from "../src/types.js"; +import { hasAzureOpenAICredentials } from "./azure-utils.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; import { resolveApiKey } from "./oauth.js"; @@ -125,6 +126,16 @@ describe("Tool Call Without Result Tests", () => { }); }); + describe.skipIf(!hasAzureOpenAICredentials())("Azure OpenAI Responses Provider", () => { + const model = getModel("azure-openai-responses", "gpt-4o-mini"); + const azureDeploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME; + const azureOptions = azureDeploymentName ? { azureDeploymentName } : {}; + + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { + await testToolCallWithoutResult(model, azureOptions); + }); + }); + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => { const model = getModel("anthropic", "claude-3-5-haiku-20241022"); diff --git a/packages/ai/test/total-tokens.test.ts b/packages/ai/test/total-tokens.test.ts index b0a40ebd..6559b450 100644 --- a/packages/ai/test/total-tokens.test.ts +++ b/packages/ai/test/total-tokens.test.ts @@ -16,6 +16,7 @@ import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; import { complete } from "../src/stream.js"; import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js"; +import { hasAzureOpenAICredentials } from "./azure-utils.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; import { resolveApiKey } from "./oauth.js"; @@ -189,6 +190,27 @@ describe("totalTokens field", () => { }); }); + describe.skipIf(!hasAzureOpenAICredentials())("Azure OpenAI Responses", () => { + it( + "gpt-4o-mini - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("azure-openai-responses", "gpt-4o-mini"); + const azureDeploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME; + const azureOptions = azureDeploymentName ? { azureDeploymentName } : {}; + + console.log(`\nAzure OpenAI Responses / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, azureOptions); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); + }); + // ========================================================================= // Google // ========================================================================= diff --git a/packages/ai/test/unicode-surrogate.test.ts b/packages/ai/test/unicode-surrogate.test.ts index 4087d306..0ae02fbe 100644 --- a/packages/ai/test/unicode-surrogate.test.ts +++ b/packages/ai/test/unicode-surrogate.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; import { complete } from "../src/stream.js"; import type { Api, Context, Model, OptionsForApi, ToolResultMessage } from "../src/types.js"; +import { hasAzureOpenAICredentials } from "./azure-utils.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; import { resolveApiKey } from "./oauth.js"; @@ -329,6 +330,24 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => { }); }); + describe.skipIf(!hasAzureOpenAICredentials())("Azure OpenAI Responses Provider Unicode Handling", () => { + const llm = getModel("azure-openai-responses", "gpt-4o-mini"); + const azureDeploymentName = process.env.AZURE_OPENAI_DEPLOYMENT_NAME; + const azureOptions = azureDeploymentName ? { azureDeploymentName } : {}; + + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { + await testEmojiInToolResults(llm, azureOptions); + }); + + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { + await testRealWorldLinkedInData(llm, azureOptions); + }); + + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { + await testUnpairedHighSurrogate(llm, azureOptions); + }); + }); + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Unicode Handling", () => { const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); diff --git a/packages/coding-agent/README.md b/packages/coding-agent/README.md index f2c62a2d..9bc2b27b 100644 --- a/packages/coding-agent/README.md +++ b/packages/coding-agent/README.md @@ -197,6 +197,7 @@ Add API keys to `~/.pi/agent/auth.json`: |----------|--------------|---------------------| | Anthropic | `anthropic` | `ANTHROPIC_API_KEY` | | OpenAI | `openai` | `OPENAI_API_KEY` | +| Azure OpenAI | `azure-openai-responses` | `AZURE_OPENAI_API_KEY` | | Google | `google` | `GEMINI_API_KEY` | | Mistral | `mistral` | `MISTRAL_API_KEY` | | Groq | `groq` | `GROQ_API_KEY` | @@ -209,6 +210,8 @@ Add API keys to `~/.pi/agent/auth.json`: | MiniMax | `minimax` | `MINIMAX_API_KEY` | | MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` | +Azure OpenAI also requires `AZURE_OPENAI_ENDPOINT` or `AZURE_OPENAI_RESOURCE_NAME`. Optional: `AZURE_OPENAI_API_VERSION` (defaults to `2025-04-01-preview`) and `AZURE_OPENAI_DEPLOYMENT_NAME` to override the deployment name. + Auth file keys take priority over environment variables. **OAuth Providers:** diff --git a/packages/coding-agent/src/cli/args.ts b/packages/coding-agent/src/cli/args.ts index 57c28de5..17c3cd8c 100644 --- a/packages/coding-agent/src/cli/args.ts +++ b/packages/coding-agent/src/cli/args.ts @@ -262,25 +262,30 @@ ${chalk.bold("Examples:")} ${APP_NAME} --export session.jsonl output.html ${chalk.bold("Environment Variables:")} - ANTHROPIC_API_KEY - Anthropic Claude API key - ANTHROPIC_OAUTH_TOKEN - Anthropic OAuth token (alternative to API key) - OPENAI_API_KEY - OpenAI GPT API key - GEMINI_API_KEY - Google Gemini API key - GROQ_API_KEY - Groq API key - CEREBRAS_API_KEY - Cerebras API key - XAI_API_KEY - xAI Grok API key - OPENROUTER_API_KEY - OpenRouter API key - AI_GATEWAY_API_KEY - Vercel AI Gateway API key - ZAI_API_KEY - ZAI API key - MISTRAL_API_KEY - Mistral API key - MINIMAX_API_KEY - MiniMax API key - AWS_PROFILE - AWS profile for Amazon Bedrock - AWS_ACCESS_KEY_ID - AWS access key for Amazon Bedrock - AWS_SECRET_ACCESS_KEY - AWS secret key for Amazon Bedrock - AWS_BEARER_TOKEN_BEDROCK - Bedrock API key (bearer token) - AWS_REGION - AWS region for Amazon Bedrock (e.g., us-east-1) - ${ENV_AGENT_DIR.padEnd(23)} - Session storage directory (default: ~/${CONFIG_DIR_NAME}/agent) - PI_SHARE_VIEWER_URL - Base URL for /share command (default: https://buildwithpi.ai/session/) + ANTHROPIC_API_KEY - Anthropic Claude API key + ANTHROPIC_OAUTH_TOKEN - Anthropic OAuth token (alternative to API key) + OPENAI_API_KEY - OpenAI GPT API key + AZURE_OPENAI_API_KEY - Azure OpenAI API key + AZURE_OPENAI_ENDPOINT - Azure OpenAI endpoint (https://{resource}.openai.azure.com) + AZURE_OPENAI_RESOURCE_NAME - Azure OpenAI resource name (alternative to endpoint) + AZURE_OPENAI_API_VERSION - Azure OpenAI API version (default: 2025-04-01-preview) + AZURE_OPENAI_DEPLOYMENT_NAME - Azure OpenAI deployment name override + GEMINI_API_KEY - Google Gemini API key + GROQ_API_KEY - Groq API key + CEREBRAS_API_KEY - Cerebras API key + XAI_API_KEY - xAI Grok API key + OPENROUTER_API_KEY - OpenRouter API key + AI_GATEWAY_API_KEY - Vercel AI Gateway API key + ZAI_API_KEY - ZAI API key + MISTRAL_API_KEY - Mistral API key + MINIMAX_API_KEY - MiniMax API key + AWS_PROFILE - AWS profile for Amazon Bedrock + AWS_ACCESS_KEY_ID - AWS access key for Amazon Bedrock + AWS_SECRET_ACCESS_KEY - AWS secret key for Amazon Bedrock + AWS_BEARER_TOKEN_BEDROCK - Bedrock API key (bearer token) + AWS_REGION - AWS region for Amazon Bedrock (e.g., us-east-1) + ${ENV_AGENT_DIR.padEnd(27)} - Session storage directory (default: ~/${CONFIG_DIR_NAME}/agent) + PI_SHARE_VIEWER_URL - Base URL for /share command (default: https://buildwithpi.ai/session/) ${chalk.bold("Available Tools (default: read, bash, edit, write):")} read - Read file contents diff --git a/packages/coding-agent/src/core/model-registry.ts b/packages/coding-agent/src/core/model-registry.ts index 87728418..a276cce8 100644 --- a/packages/coding-agent/src/core/model-registry.ts +++ b/packages/coding-agent/src/core/model-registry.ts @@ -43,6 +43,7 @@ const ModelDefinitionSchema = Type.Object({ Type.Union([ Type.Literal("openai-completions"), Type.Literal("openai-responses"), + Type.Literal("azure-openai-responses"), Type.Literal("openai-codex-responses"), Type.Literal("anthropic-messages"), Type.Literal("google-generative-ai"), @@ -70,6 +71,7 @@ const ProviderConfigSchema = Type.Object({ Type.Union([ Type.Literal("openai-completions"), Type.Literal("openai-responses"), + Type.Literal("azure-openai-responses"), Type.Literal("openai-codex-responses"), Type.Literal("anthropic-messages"), Type.Literal("google-generative-ai"), diff --git a/packages/coding-agent/src/core/model-resolver.ts b/packages/coding-agent/src/core/model-resolver.ts index c28a8b0e..aa9a7c0f 100644 --- a/packages/coding-agent/src/core/model-resolver.ts +++ b/packages/coding-agent/src/core/model-resolver.ts @@ -14,6 +14,7 @@ export const defaultModelPerProvider: Record = { "amazon-bedrock": "global.anthropic.claude-sonnet-4-5-20250929-v1:0", anthropic: "claude-sonnet-4-5", openai: "gpt-5.1-codex", + "azure-openai-responses": "gpt-5.2", "openai-codex": "gpt-5.2-codex", google: "gemini-2.5-pro", "google-gemini-cli": "gemini-2.5-pro",