From 93ea8298ab070f284694a8f76a5cdf78db149981 Mon Sep 17 00:00:00 2001 From: Anton Kuzmenko <1917237+default-anton@users.noreply.github.com> Date: Sun, 28 Dec 2025 13:29:36 -0800 Subject: [PATCH 1/4] fix: update zai model API and baseUrl in generate-models script --- packages/ai/scripts/generate-models.ts | 45 ++++++++++++++------------ 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index 8d3c86fd..e2d460e9 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -260,28 +260,31 @@ async function loadModelsDevData(): Promise[]> { // Process xAi models if (data.zai?.models) { - for (const [modelId, model] of Object.entries(data.zai.models)) { - const m = model as ModelsDevModel; - if (m.tool_call !== true) continue; + for (const [modelId, model] of Object.entries(data.zai.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; - models.push({ - id: modelId, - name: m.name || modelId, - api: "anthropic-messages", - provider: "zai", - baseUrl: "https://api.z.ai/api/anthropic", - reasoning: m.reasoning === true, - input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], - cost: { - input: m.cost?.input || 0, - output: m.cost?.output || 0, - cacheRead: m.cost?.cache_read || 0, - cacheWrite: m.cost?.cache_write || 0, - }, - contextWindow: m.limit?.context || 4096, - maxTokens: m.limit?.output || 4096, - }); - } + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + compat: { + supportsDeveloperRole: false, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } } // Process Mistral models From e9c6d95e6b8e223898b3680113554ba5fda572e8 Mon Sep 17 00:00:00 2001 From: Anton Kuzmenko <1917237+default-anton@users.noreply.github.com> Date: Sun, 28 Dec 2025 13:37:34 -0800 Subject: [PATCH 2/4] Migrate zai provider from Anthropic to OpenAI-compatible API - Change all zai models from anthropic-messages to openai-completions API - Update baseUrl from https://api.z.ai/api/anthropic to https://api.z.ai/api/coding/paas/v4 - Add compat setting to disable developer role for zai - Update zai provider tests to use OpenAI-style options (reasoningEffort instead of thinkingEnabled/thinkingBudgetTokens) - Enable previously disabled thinking and image input tests for zai models --- packages/ai/src/models.generated.ts | 49 ++++++++++++++++------------- packages/ai/test/stream.test.ts | 21 ++++++------- 2 files changed, 37 insertions(+), 33 deletions(-) diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 579eab84..6cde070c 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -6978,9 +6978,10 @@ export const MODELS = { "glm-4.5": { id: "glm-4.5", name: "GLM-4.5", - api: "anthropic-messages", + api: "openai-completions", provider: "zai", - baseUrl: "https://api.z.ai/api/anthropic", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false}, reasoning: true, input: ["text"], cost: { @@ -6991,13 +6992,14 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 98304, - } satisfies Model<"anthropic-messages">, + } satisfies Model<"openai-completions">, "glm-4.5-air": { id: "glm-4.5-air", name: "GLM-4.5-Air", - api: "anthropic-messages", + api: "openai-completions", provider: "zai", - baseUrl: "https://api.z.ai/api/anthropic", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false}, reasoning: true, input: ["text"], cost: { @@ -7008,13 +7010,14 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 98304, - } satisfies Model<"anthropic-messages">, + } satisfies Model<"openai-completions">, "glm-4.5-flash": { id: "glm-4.5-flash", name: "GLM-4.5-Flash", - api: "anthropic-messages", + api: "openai-completions", provider: "zai", - baseUrl: "https://api.z.ai/api/anthropic", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false}, reasoning: true, input: ["text"], cost: { @@ -7025,13 +7028,14 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 98304, - } satisfies Model<"anthropic-messages">, + } satisfies Model<"openai-completions">, "glm-4.5v": { id: "glm-4.5v", name: "GLM-4.5V", - api: "anthropic-messages", + api: "openai-completions", provider: "zai", - baseUrl: "https://api.z.ai/api/anthropic", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false}, reasoning: true, input: ["text", "image"], cost: { @@ -7042,13 +7046,14 @@ export const MODELS = { }, contextWindow: 64000, maxTokens: 16384, - } satisfies Model<"anthropic-messages">, + } satisfies Model<"openai-completions">, "glm-4.6": { id: "glm-4.6", name: "GLM-4.6", - api: "anthropic-messages", + api: "openai-completions", provider: "zai", - baseUrl: "https://api.z.ai/api/anthropic", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false}, reasoning: true, input: ["text"], cost: { @@ -7059,13 +7064,14 @@ export const MODELS = { }, contextWindow: 204800, maxTokens: 131072, - } satisfies Model<"anthropic-messages">, + } satisfies Model<"openai-completions">, "glm-4.6v": { id: "glm-4.6v", name: "GLM-4.6V", - api: "anthropic-messages", + api: "openai-completions", provider: "zai", - baseUrl: "https://api.z.ai/api/anthropic", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false}, reasoning: true, input: ["text", "image"], cost: { @@ -7076,13 +7082,14 @@ export const MODELS = { }, contextWindow: 128000, maxTokens: 32768, - } satisfies Model<"anthropic-messages">, + } satisfies Model<"openai-completions">, "glm-4.7": { id: "glm-4.7", name: "GLM-4.7", - api: "anthropic-messages", + api: "openai-completions", provider: "zai", - baseUrl: "https://api.z.ai/api/anthropic", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false}, reasoning: true, input: ["text"], cost: { @@ -7093,6 +7100,6 @@ export const MODELS = { }, contextWindow: 204800, maxTokens: 131072, - } satisfies Model<"anthropic-messages">, + } satisfies Model<"openai-completions">, }, } as const; diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts index 3da9b055..d5e22ac4 100644 --- a/packages/ai/test/stream.test.ts +++ b/packages/ai/test/stream.test.ts @@ -556,7 +556,7 @@ describe("Generate E2E Tests", () => { }); }); - describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via Anthropic Messages)", () => { + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via OpenAI Completions)", () => { const llm = getModel("zai", "glm-4.5-air"); it("should complete basic text generation", { retry: 3 }, async () => { @@ -571,17 +571,16 @@ describe("Generate E2E Tests", () => { await handleStreaming(llm); }); - it("should handle thinking", { retry: 3 }, async () => { - // Prompt doesn't trigger thinking - // await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); + it("should handle thinking mode", { retry: 3 }, async () => { + await handleThinking(llm, { reasoningEffort: "medium" }); }); it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { - await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); + await multiTurn(llm, { reasoningEffort: "medium" }); }); }); - describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5v via Anthropic Messages)", () => { + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5v via OpenAI Completions)", () => { const llm = getModel("zai", "glm-4.5v"); it("should complete basic text generation", { retry: 3 }, async () => { @@ -596,18 +595,16 @@ describe("Generate E2E Tests", () => { await handleStreaming(llm); }); - it("should handle thinking", { retry: 3 }, async () => { - // Prompt doesn't trigger thinking - // await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); + it("should handle thinking mode", { retry: 3 }, async () => { + await handleThinking(llm, { reasoningEffort: "medium" }); }); it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { - await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); + await multiTurn(llm, { reasoningEffort: "medium" }); }); it("should handle image input", { retry: 3 }, async () => { - // Can't see image for some reason? - // await handleImage(llm); + await handleImage(llm); }); }); From 31cbbd211c0e903360419f64a9becc3973a576b8 Mon Sep 17 00:00:00 2001 From: Anton Kuzmenko <1917237+default-anton@users.noreply.github.com> Date: Sun, 28 Dec 2025 16:31:32 -0800 Subject: [PATCH 3/4] fix: update zAI models to use anthropic API and filter empty thinking blocks in messages --- packages/ai/scripts/generate-models.ts | 15 ++++---- packages/ai/src/models.generated.ts | 35 ++++++++----------- .../ai/src/providers/openai-completions.ts | 12 ++++--- packages/ai/test/stream.test.ts | 8 ++--- 4 files changed, 36 insertions(+), 34 deletions(-) diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index e2d460e9..dd51a992 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -263,24 +263,27 @@ async function loadModelsDevData(): Promise[]> { for (const [modelId, model] of Object.entries(data.zai.models)) { const m = model as ModelsDevModel; if (m.tool_call !== true) continue; + const supportsImage = m.modalities?.input?.includes("image") models.push({ id: modelId, name: m.name || modelId, - api: "openai-completions", + api: supportsImage ? "openai-completions" : "anthropic-messages", provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", + baseUrl: supportsImage ? "https://api.z.ai/api/coding/paas/v4" : "https://api.z.ai/api/anthropic", reasoning: m.reasoning === true, - input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + input: supportsImage ? ["text", "image"] : ["text"], cost: { input: m.cost?.input || 0, output: m.cost?.output || 0, cacheRead: m.cost?.cache_read || 0, cacheWrite: m.cost?.cache_write || 0, }, - compat: { - supportsDeveloperRole: false, - }, + ...(supportsImage ? { + compat: { + supportsDeveloperRole: false, + }, + } : {}), contextWindow: m.limit?.context || 4096, maxTokens: m.limit?.output || 4096, }); diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 6cde070c..4fd67bf1 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -6978,10 +6978,9 @@ export const MODELS = { "glm-4.5": { id: "glm-4.5", name: "GLM-4.5", - api: "openai-completions", + api: "anthropic-messages", provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, + baseUrl: "https://api.z.ai/api/anthropic", reasoning: true, input: ["text"], cost: { @@ -6992,14 +6991,13 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 98304, - } satisfies Model<"openai-completions">, + } satisfies Model<"anthropic-messages">, "glm-4.5-air": { id: "glm-4.5-air", name: "GLM-4.5-Air", - api: "openai-completions", + api: "anthropic-messages", provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, + baseUrl: "https://api.z.ai/api/anthropic", reasoning: true, input: ["text"], cost: { @@ -7010,14 +7008,13 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 98304, - } satisfies Model<"openai-completions">, + } satisfies Model<"anthropic-messages">, "glm-4.5-flash": { id: "glm-4.5-flash", name: "GLM-4.5-Flash", - api: "openai-completions", + api: "anthropic-messages", provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, + baseUrl: "https://api.z.ai/api/anthropic", reasoning: true, input: ["text"], cost: { @@ -7028,7 +7025,7 @@ export const MODELS = { }, contextWindow: 131072, maxTokens: 98304, - } satisfies Model<"openai-completions">, + } satisfies Model<"anthropic-messages">, "glm-4.5v": { id: "glm-4.5v", name: "GLM-4.5V", @@ -7050,10 +7047,9 @@ export const MODELS = { "glm-4.6": { id: "glm-4.6", name: "GLM-4.6", - api: "openai-completions", + api: "anthropic-messages", provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, + baseUrl: "https://api.z.ai/api/anthropic", reasoning: true, input: ["text"], cost: { @@ -7064,7 +7060,7 @@ export const MODELS = { }, contextWindow: 204800, maxTokens: 131072, - } satisfies Model<"openai-completions">, + } satisfies Model<"anthropic-messages">, "glm-4.6v": { id: "glm-4.6v", name: "GLM-4.6V", @@ -7086,10 +7082,9 @@ export const MODELS = { "glm-4.7": { id: "glm-4.7", name: "GLM-4.7", - api: "openai-completions", + api: "anthropic-messages", provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, + baseUrl: "https://api.z.ai/api/anthropic", reasoning: true, input: ["text"], cost: { @@ -7100,6 +7095,6 @@ export const MODELS = { }, contextWindow: 204800, maxTokens: 131072, - } satisfies Model<"openai-completions">, + } satisfies Model<"anthropic-messages">, }, } as const; diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index b8bc98eb..e2d74cb3 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -474,10 +474,14 @@ function convertMessages( // Handle thinking blocks const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[]; - if (thinkingBlocks.length > 0) { + // Filter out empty thinking blocks to avoid API validation errors + const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0); + if (nonEmptyThinkingBlocks.length > 0) { if (compat.requiresThinkingAsText) { // Convert thinking blocks to text with delimiters - const thinkingText = thinkingBlocks.map((b) => `\n${b.thinking}\n`).join("\n"); + const thinkingText = nonEmptyThinkingBlocks + .map((b) => `\n${b.thinking}\n`) + .join("\n"); const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null; if (textContent) { textContent.unshift({ type: "text", text: thinkingText }); @@ -486,9 +490,9 @@ function convertMessages( } } else { // Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss) - const signature = thinkingBlocks[0].thinkingSignature; + const signature = nonEmptyThinkingBlocks[0].thinkingSignature; if (signature && signature.length > 0) { - (assistantMsg as any)[signature] = thinkingBlocks.map((b) => b.thinking).join("\n"); + (assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n"); } } } diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts index d5e22ac4..9a05b48d 100644 --- a/packages/ai/test/stream.test.ts +++ b/packages/ai/test/stream.test.ts @@ -556,7 +556,7 @@ describe("Generate E2E Tests", () => { }); }); - describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via OpenAI Completions)", () => { + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via Anthropic Messages)", () => { const llm = getModel("zai", "glm-4.5-air"); it("should complete basic text generation", { retry: 3 }, async () => { @@ -571,12 +571,12 @@ describe("Generate E2E Tests", () => { await handleStreaming(llm); }); - it("should handle thinking mode", { retry: 3 }, async () => { - await handleThinking(llm, { reasoningEffort: "medium" }); + it.skip("should handle thinking mode", { retry: 3 }, async () => { + await handleThinking(llm, { thinkingEnabled: true }); }); it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { - await multiTurn(llm, { reasoningEffort: "medium" }); + await multiTurn(llm, { thinkingEnabled: true }); }); }); From 88d693cc005e733ea9a5e87299805a5768bb5a14 Mon Sep 17 00:00:00 2001 From: Anton Kuzmenko <1917237+default-anton@users.noreply.github.com> Date: Sun, 28 Dec 2025 16:33:25 -0800 Subject: [PATCH 4/4] test: add thinkingBudgetTokens parameter to thinking mode tests --- packages/ai/test/stream.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts index 9a05b48d..6389f49b 100644 --- a/packages/ai/test/stream.test.ts +++ b/packages/ai/test/stream.test.ts @@ -572,11 +572,11 @@ describe("Generate E2E Tests", () => { }); it.skip("should handle thinking mode", { retry: 3 }, async () => { - await handleThinking(llm, { thinkingEnabled: true }); + await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); }); it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { - await multiTurn(llm, { thinkingEnabled: true }); + await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); }); });