From 93ea8298ab070f284694a8f76a5cdf78db149981 Mon Sep 17 00:00:00 2001
From: Anton Kuzmenko <1917237+default-anton@users.noreply.github.com>
Date: Sun, 28 Dec 2025 13:29:36 -0800
Subject: [PATCH 1/4] fix: update zai model API and baseUrl in generate-models
 script

---
 packages/ai/scripts/generate-models.ts | 45 ++++++++++++++------------
 1 file changed, 24 insertions(+), 21 deletions(-)
diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts
index 8d3c86fd..e2d460e9 100644
--- a/packages/ai/scripts/generate-models.ts
+++ b/packages/ai/scripts/generate-models.ts
@@ -260,28 +260,31 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
 
 		// Process xAi models
 		if (data.zai?.models) {
-			for (const [modelId, model] of Object.entries(data.zai.models)) {
-				const m = model as ModelsDevModel;
-				if (m.tool_call !== true) continue;
+      for (const [modelId, model] of Object.entries(data.zai.models)) {
+        const m = model as ModelsDevModel;
+        if (m.tool_call !== true) continue;
 
-				models.push({
-					id: modelId,
-					name: m.name || modelId,
-					api: "anthropic-messages",
-					provider: "zai",
-					baseUrl: "https://api.z.ai/api/anthropic",
-					reasoning: m.reasoning === true,
-					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
-					cost: {
-						input: m.cost?.input || 0,
-						output: m.cost?.output || 0,
-						cacheRead: m.cost?.cache_read || 0,
-						cacheWrite: m.cost?.cache_write || 0,
-					},
-					contextWindow: m.limit?.context || 4096,
-					maxTokens: m.limit?.output || 4096,
-				});
-			}
+        models.push({
+          id: modelId,
+          name: m.name || modelId,
+          api: "openai-completions",
+          provider: "zai",
+          baseUrl: "https://api.z.ai/api/coding/paas/v4",
+          reasoning: m.reasoning === true,
+          input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+          cost: {
+            input: m.cost?.input || 0,
+            output: m.cost?.output || 0,
+            cacheRead: m.cost?.cache_read || 0,
+            cacheWrite: m.cost?.cache_write || 0,
+          },
+          compat: {
+            supportsDeveloperRole: false,
+          },
+          contextWindow: m.limit?.context || 4096,
+          maxTokens: m.limit?.output || 4096,
+        });
+      }
 		}
 
 		// Process Mistral models

From e9c6d95e6b8e223898b3680113554ba5fda572e8 Mon Sep 17 00:00:00 2001
From: Anton Kuzmenko <1917237+default-anton@users.noreply.github.com>
Date: Sun, 28 Dec 2025 13:37:34 -0800
Subject: [PATCH 2/4] Migrate zai provider from Anthropic to OpenAI-compatible
 API

- Change all zai models from anthropic-messages to openai-completions API
- Update baseUrl from https://api.z.ai/api/anthropic to https://api.z.ai/api/coding/paas/v4
- Add compat setting to disable developer role for zai
- Update zai provider tests to use OpenAI-style options (reasoningEffort instead of thinkingEnabled/thinkingBudgetTokens)
- Enable previously disabled thinking and image input tests for zai models
---
 packages/ai/src/models.generated.ts | 49 ++++++++++++++++-------------
 packages/ai/test/stream.test.ts     | 21 ++++++-------
 2 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts
index 579eab84..6cde070c 100644
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@@ -6978,9 +6978,10 @@ export const MODELS = {
 		"glm-4.5": {
 			id: "glm-4.5",
 			name: "GLM-4.5",
-			api: "anthropic-messages",
+			api: "openai-completions",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/anthropic",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			compat: {"supportsDeveloperRole":false},
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -6991,13 +6992,14 @@ export const MODELS = {
 			},
 			contextWindow: 131072,
 			maxTokens: 98304,
-		} satisfies Model<"anthropic-messages">,
+		} satisfies Model<"openai-completions">,
 		"glm-4.5-air": {
 			id: "glm-4.5-air",
 			name: "GLM-4.5-Air",
-			api: "anthropic-messages",
+			api: "openai-completions",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/anthropic",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			compat: {"supportsDeveloperRole":false},
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -7008,13 +7010,14 @@ export const MODELS = {
 			},
 			contextWindow: 131072,
 			maxTokens: 98304,
-		} satisfies Model<"anthropic-messages">,
+		} satisfies Model<"openai-completions">,
 		"glm-4.5-flash": {
 			id: "glm-4.5-flash",
 			name: "GLM-4.5-Flash",
-			api: "anthropic-messages",
+			api: "openai-completions",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/anthropic",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			compat: {"supportsDeveloperRole":false},
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -7025,13 +7028,14 @@ export const MODELS = {
 			},
 			contextWindow: 131072,
 			maxTokens: 98304,
-		} satisfies Model<"anthropic-messages">,
+		} satisfies Model<"openai-completions">,
 		"glm-4.5v": {
 			id: "glm-4.5v",
 			name: "GLM-4.5V",
-			api: "anthropic-messages",
+			api: "openai-completions",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/anthropic",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			compat: {"supportsDeveloperRole":false},
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
@@ -7042,13 +7046,14 @@ export const MODELS = {
 			},
 			contextWindow: 64000,
 			maxTokens: 16384,
-		} satisfies Model<"anthropic-messages">,
+		} satisfies Model<"openai-completions">,
 		"glm-4.6": {
 			id: "glm-4.6",
 			name: "GLM-4.6",
-			api: "anthropic-messages",
+			api: "openai-completions",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/anthropic",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			compat: {"supportsDeveloperRole":false},
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -7059,13 +7064,14 @@ export const MODELS = {
 			},
 			contextWindow: 204800,
 			maxTokens: 131072,
-		} satisfies Model<"anthropic-messages">,
+		} satisfies Model<"openai-completions">,
 		"glm-4.6v": {
 			id: "glm-4.6v",
 			name: "GLM-4.6V",
-			api: "anthropic-messages",
+			api: "openai-completions",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/anthropic",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			compat: {"supportsDeveloperRole":false},
 			reasoning: true,
 			input: ["text", "image"],
 			cost: {
@@ -7076,13 +7082,14 @@ export const MODELS = {
 			},
 			contextWindow: 128000,
 			maxTokens: 32768,
-		} satisfies Model<"anthropic-messages">,
+		} satisfies Model<"openai-completions">,
 		"glm-4.7": {
 			id: "glm-4.7",
 			name: "GLM-4.7",
-			api: "anthropic-messages",
+			api: "openai-completions",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/anthropic",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			compat: {"supportsDeveloperRole":false},
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -7093,6 +7100,6 @@ export const MODELS = {
 			},
 			contextWindow: 204800,
 			maxTokens: 131072,
-		} satisfies Model<"anthropic-messages">,
+		} satisfies Model<"openai-completions">,
 	},
 } as const;
diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts
index 3da9b055..d5e22ac4 100644
--- a/packages/ai/test/stream.test.ts
+++ b/packages/ai/test/stream.test.ts
@@ -556,7 +556,7 @@ describe("Generate E2E Tests", () => {
 		});
 	});
 
-	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via Anthropic Messages)", () => {
+	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via OpenAI Completions)", () => {
 		const llm = getModel("zai", "glm-4.5-air");
 
 		it("should complete basic text generation", { retry: 3 }, async () => {
@@ -571,17 +571,16 @@ describe("Generate E2E Tests", () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle thinking", { retry: 3 }, async () => {
-			// Prompt doesn't trigger thinking
-			// await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
+		it("should handle thinking mode", { retry: 3 }, async () => {
+			await handleThinking(llm, { reasoningEffort: "medium" });
 		});
 
 		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
-			await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
+			await multiTurn(llm, { reasoningEffort: "medium" });
 		});
 	});
 
-	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5v via Anthropic Messages)", () => {
+	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5v via OpenAI Completions)", () => {
 		const llm = getModel("zai", "glm-4.5v");
 
 		it("should complete basic text generation", { retry: 3 }, async () => {
@@ -596,18 +595,16 @@ describe("Generate E2E Tests", () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle thinking", { retry: 3 }, async () => {
-			// Prompt doesn't trigger thinking
-			// await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
+		it("should handle thinking mode", { retry: 3 }, async () => {
+			await handleThinking(llm, { reasoningEffort: "medium" });
 		});
 
 		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
-			await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
+			await multiTurn(llm, { reasoningEffort: "medium" });
 		});
 
 		it("should handle image input", { retry: 3 }, async () => {
-			// Can't see image for some reason?
-			// await handleImage(llm);
+			await handleImage(llm);
 		});
 	});
 

From 31cbbd211c0e903360419f64a9becc3973a576b8 Mon Sep 17 00:00:00 2001
From: Anton Kuzmenko <1917237+default-anton@users.noreply.github.com>
Date: Sun, 28 Dec 2025 16:31:32 -0800
Subject: [PATCH 3/4] fix: update zAI models to use anthropic API and filter
 empty thinking blocks in messages

---
 packages/ai/scripts/generate-models.ts        | 15 ++++----
 packages/ai/src/models.generated.ts           | 35 ++++++++-----------
 .../ai/src/providers/openai-completions.ts    | 12 ++++---
 packages/ai/test/stream.test.ts               |  8 ++---
 4 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts
index e2d460e9..dd51a992 100644
--- a/packages/ai/scripts/generate-models.ts
+++ b/packages/ai/scripts/generate-models.ts
@@ -263,24 +263,27 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
       for (const [modelId, model] of Object.entries(data.zai.models)) {
         const m = model as ModelsDevModel;
         if (m.tool_call !== true) continue;
+        const supportsImage = m.modalities?.input?.includes("image")
 
         models.push({
           id: modelId,
           name: m.name || modelId,
-          api: "openai-completions",
+          api: supportsImage ? "openai-completions" : "anthropic-messages",
           provider: "zai",
-          baseUrl: "https://api.z.ai/api/coding/paas/v4",
+          baseUrl: supportsImage ? "https://api.z.ai/api/coding/paas/v4" : "https://api.z.ai/api/anthropic",
           reasoning: m.reasoning === true,
-          input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
+          input: supportsImage ? ["text", "image"] : ["text"],
           cost: {
             input: m.cost?.input || 0,
             output: m.cost?.output || 0,
             cacheRead: m.cost?.cache_read || 0,
             cacheWrite: m.cost?.cache_write || 0,
           },
-          compat: {
-            supportsDeveloperRole: false,
-          },
+          ...(supportsImage ? {
+            compat: {
+              supportsDeveloperRole: false,
+            },
+          } : {}),
           contextWindow: m.limit?.context || 4096,
           maxTokens: m.limit?.output || 4096,
         });
diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts
index 6cde070c..4fd67bf1 100644
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@@ -6978,10 +6978,9 @@ export const MODELS = {
 		"glm-4.5": {
 			id: "glm-4.5",
 			name: "GLM-4.5",
-			api: "openai-completions",
+			api: "anthropic-messages",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/coding/paas/v4",
-			compat: {"supportsDeveloperRole":false},
+			baseUrl: "https://api.z.ai/api/anthropic",
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -6992,14 +6991,13 @@ export const MODELS = {
 			},
 			contextWindow: 131072,
 			maxTokens: 98304,
-		} satisfies Model<"openai-completions">,
+		} satisfies Model<"anthropic-messages">,
 		"glm-4.5-air": {
 			id: "glm-4.5-air",
 			name: "GLM-4.5-Air",
-			api: "openai-completions",
+			api: "anthropic-messages",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/coding/paas/v4",
-			compat: {"supportsDeveloperRole":false},
+			baseUrl: "https://api.z.ai/api/anthropic",
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -7010,14 +7008,13 @@ export const MODELS = {
 			},
 			contextWindow: 131072,
 			maxTokens: 98304,
-		} satisfies Model<"openai-completions">,
+		} satisfies Model<"anthropic-messages">,
 		"glm-4.5-flash": {
 			id: "glm-4.5-flash",
 			name: "GLM-4.5-Flash",
-			api: "openai-completions",
+			api: "anthropic-messages",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/coding/paas/v4",
-			compat: {"supportsDeveloperRole":false},
+			baseUrl: "https://api.z.ai/api/anthropic",
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -7028,7 +7025,7 @@ export const MODELS = {
 			},
 			contextWindow: 131072,
 			maxTokens: 98304,
-		} satisfies Model<"openai-completions">,
+		} satisfies Model<"anthropic-messages">,
 		"glm-4.5v": {
 			id: "glm-4.5v",
 			name: "GLM-4.5V",
@@ -7050,10 +7047,9 @@ export const MODELS = {
 		"glm-4.6": {
 			id: "glm-4.6",
 			name: "GLM-4.6",
-			api: "openai-completions",
+			api: "anthropic-messages",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/coding/paas/v4",
-			compat: {"supportsDeveloperRole":false},
+			baseUrl: "https://api.z.ai/api/anthropic",
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -7064,7 +7060,7 @@ export const MODELS = {
 			},
 			contextWindow: 204800,
 			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
+		} satisfies Model<"anthropic-messages">,
 		"glm-4.6v": {
 			id: "glm-4.6v",
 			name: "GLM-4.6V",
@@ -7086,10 +7082,9 @@ export const MODELS = {
 		"glm-4.7": {
 			id: "glm-4.7",
 			name: "GLM-4.7",
-			api: "openai-completions",
+			api: "anthropic-messages",
 			provider: "zai",
-			baseUrl: "https://api.z.ai/api/coding/paas/v4",
-			compat: {"supportsDeveloperRole":false},
+			baseUrl: "https://api.z.ai/api/anthropic",
 			reasoning: true,
 			input: ["text"],
 			cost: {
@@ -7100,6 +7095,6 @@ export const MODELS = {
 			},
 			contextWindow: 204800,
 			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
+		} satisfies Model<"anthropic-messages">,
 	},
 } as const;
diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts
index b8bc98eb..e2d74cb3 100644
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@@ -474,10 +474,14 @@ function convertMessages(
 
 			// Handle thinking blocks
 			const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
-			if (thinkingBlocks.length > 0) {
+			// Filter out empty thinking blocks to avoid API validation errors
+			const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
+			if (nonEmptyThinkingBlocks.length > 0) {
 				if (compat.requiresThinkingAsText) {
 					// Convert thinking blocks to text with <thinking> delimiters
-					const thinkingText = thinkingBlocks.map((b) => `<thinking>\n${b.thinking}\n</thinking>`).join("\n");
+					const thinkingText = nonEmptyThinkingBlocks
+						.map((b) => `<thinking>\n${b.thinking}\n</thinking>`)
+						.join("\n");
 					const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
 					if (textContent) {
 						textContent.unshift({ type: "text", text: thinkingText });
@@ -486,9 +490,9 @@ function convertMessages(
 					}
 				} else {
 					// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
-					const signature = thinkingBlocks[0].thinkingSignature;
+					const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
 					if (signature && signature.length > 0) {
-						(assistantMsg as any)[signature] = thinkingBlocks.map((b) => b.thinking).join("\n");
+						(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n");
 					}
 				}
 			}
diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts
index d5e22ac4..9a05b48d 100644
--- a/packages/ai/test/stream.test.ts
+++ b/packages/ai/test/stream.test.ts
@@ -556,7 +556,7 @@ describe("Generate E2E Tests", () => {
 		});
 	});
 
-	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via OpenAI Completions)", () => {
+	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via Anthropic Messages)", () => {
 		const llm = getModel("zai", "glm-4.5-air");
 
 		it("should complete basic text generation", { retry: 3 }, async () => {
@@ -571,12 +571,12 @@ describe("Generate E2E Tests", () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle thinking mode", { retry: 3 }, async () => {
-			await handleThinking(llm, { reasoningEffort: "medium" });
+		it.skip("should handle thinking mode", { retry: 3 }, async () => {
+			await handleThinking(llm, { thinkingEnabled: true });
 		});
 
 		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
-			await multiTurn(llm, { reasoningEffort: "medium" });
+			await multiTurn(llm, { thinkingEnabled: true });
 		});
 	});
 

From 88d693cc005e733ea9a5e87299805a5768bb5a14 Mon Sep 17 00:00:00 2001
From: Anton Kuzmenko <1917237+default-anton@users.noreply.github.com>
Date: Sun, 28 Dec 2025 16:33:25 -0800
Subject: [PATCH 4/4] test: add thinkingBudgetTokens parameter to thinking mode
 tests

---
 packages/ai/test/stream.test.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts
index 9a05b48d..6389f49b 100644
--- a/packages/ai/test/stream.test.ts
+++ b/packages/ai/test/stream.test.ts
@@ -572,11 +572,11 @@ describe("Generate E2E Tests", () => {
 		});
 
 		it.skip("should handle thinking mode", { retry: 3 }, async () => {
-			await handleThinking(llm, { thinkingEnabled: true });
+			await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
 		});
 
 		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
-			await multiTurn(llm, { thinkingEnabled: true });
+			await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
 		});
 	});