add Azure OpenAI Responses provider with deployment-aware model mapping

This commit is contained in:
Markus Ylisiurunen 2026-01-21 20:13:00 +02:00 committed by Mario Zechner
parent 951fb953ed
commit 856012296b
23 changed files with 1465 additions and 21 deletions

View file

@ -1300,6 +1300,586 @@ export const MODELS = {
maxTokens: 64000,
} satisfies Model<"anthropic-messages">,
},
"azure-openai-responses": {
"codex-mini-latest": {
id: "codex-mini-latest",
name: "Codex Mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text"],
cost: {
input: 1.5,
output: 6,
cacheRead: 0.375,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"gpt-4": {
id: "gpt-4",
name: "GPT-4",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text"],
cost: {
input: 30,
output: 60,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 8192,
} satisfies Model<"azure-openai-responses">,
"gpt-4-turbo": {
id: "gpt-4-turbo",
name: "GPT-4 Turbo",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 10,
output: 30,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"azure-openai-responses">,
"gpt-4.1": {
id: "gpt-4.1",
name: "GPT-4.1",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 2,
output: 8,
cacheRead: 0.5,
cacheWrite: 0,
},
contextWindow: 1047576,
maxTokens: 32768,
} satisfies Model<"azure-openai-responses">,
"gpt-4.1-mini": {
id: "gpt-4.1-mini",
name: "GPT-4.1 mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.4,
output: 1.6,
cacheRead: 0.1,
cacheWrite: 0,
},
contextWindow: 1047576,
maxTokens: 32768,
} satisfies Model<"azure-openai-responses">,
"gpt-4.1-nano": {
id: "gpt-4.1-nano",
name: "GPT-4.1 nano",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.1,
output: 0.4,
cacheRead: 0.03,
cacheWrite: 0,
},
contextWindow: 1047576,
maxTokens: 32768,
} satisfies Model<"azure-openai-responses">,
"gpt-4o": {
id: "gpt-4o",
name: "GPT-4o",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 2.5,
output: 10,
cacheRead: 1.25,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-4o-2024-05-13": {
id: "gpt-4o-2024-05-13",
name: "GPT-4o (2024-05-13)",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"azure-openai-responses">,
"gpt-4o-2024-08-06": {
id: "gpt-4o-2024-08-06",
name: "GPT-4o (2024-08-06)",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 2.5,
output: 10,
cacheRead: 1.25,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-4o-2024-11-20": {
id: "gpt-4o-2024-11-20",
name: "GPT-4o (2024-11-20)",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 2.5,
output: 10,
cacheRead: 1.25,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-4o-mini": {
id: "gpt-4o-mini",
name: "GPT-4o mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0.08,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-5": {
id: "gpt-5",
name: "GPT-5",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5-chat-latest": {
id: "gpt-5-chat-latest",
name: "GPT-5 Chat Latest",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: false,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-5-codex": {
id: "gpt-5-codex",
name: "GPT-5-Codex",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5-mini": {
id: "gpt-5-mini",
name: "GPT-5 Mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.25,
output: 2,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5-nano": {
id: "gpt-5-nano",
name: "GPT-5 Nano",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.05,
output: 0.4,
cacheRead: 0.005,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5-pro": {
id: "gpt-5-pro",
name: "GPT-5 Pro",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 15,
output: 120,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 272000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1": {
id: "gpt-5.1",
name: "GPT-5.1",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.13,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1-chat-latest": {
id: "gpt-5.1-chat-latest",
name: "GPT-5.1 Chat",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1-codex": {
id: "gpt-5.1-codex",
name: "GPT-5.1 Codex",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1-codex-max": {
id: "gpt-5.1-codex-max",
name: "GPT-5.1 Codex Max",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.1-codex-mini": {
id: "gpt-5.1-codex-mini",
name: "GPT-5.1 Codex mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.25,
output: 2,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.2": {
id: "gpt-5.2",
name: "GPT-5.2",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.75,
output: 14,
cacheRead: 0.175,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.2-chat-latest": {
id: "gpt-5.2-chat-latest",
name: "GPT-5.2 Chat",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.75,
output: 14,
cacheRead: 0.175,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"azure-openai-responses">,
"gpt-5.2-codex": {
id: "gpt-5.2-codex",
name: "GPT-5.2 Codex",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.75,
output: 14,
cacheRead: 0.175,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"gpt-5.2-pro": {
id: "gpt-5.2-pro",
name: "GPT-5.2 Pro",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 21,
output: 168,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"azure-openai-responses">,
"o1": {
id: "o1",
name: "o1",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 15,
output: 60,
cacheRead: 7.5,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o1-pro": {
id: "o1-pro",
name: "o1-pro",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 150,
output: 600,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o3": {
id: "o3",
name: "o3",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2,
output: 8,
cacheRead: 0.5,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o3-deep-research": {
id: "o3-deep-research",
name: "o3-deep-research",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 10,
output: 40,
cacheRead: 2.5,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o3-mini": {
id: "o3-mini",
name: "o3-mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text"],
cost: {
input: 1.1,
output: 4.4,
cacheRead: 0.55,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o3-pro": {
id: "o3-pro",
name: "o3-pro",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 20,
output: 80,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o4-mini": {
id: "o4-mini",
name: "o4-mini",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.1,
output: 4.4,
cacheRead: 0.28,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
"o4-mini-deep-research": {
id: "o4-mini-deep-research",
name: "o4-mini-deep-research",
api: "azure-openai-responses",
provider: "azure-openai-responses",
baseUrl: "",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2,
output: 8,
cacheRead: 0.5,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"azure-openai-responses">,
},
"cerebras": {
"gpt-oss-120b": {
id: "gpt-oss-120b",