diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index 1ddfa16b..25050fe7 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -259,6 +259,32 @@ async function loadModelsDevData(): Promise[]> { } } + // Process xAi models + if (data.zai?.models) { + for (const [modelId, model] of Object.entries(data.zai.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + console.log(`Loaded ${models.length} tool-capable models from models.dev`); return models; } catch (error) { @@ -277,7 +303,7 @@ async function generateModels() { // Combine models (models.dev has priority) const allModels = [...modelsDevModels, ...openRouterModels]; - // Add missing gpt models + // Add missing gpt models (can't use tools) if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) { allModels.push({ id: "gpt-5-chat-latest", diff --git a/packages/ai/src/generate.ts b/packages/ai/src/generate.ts index 461a1c0d..fd67f04b 100644 --- a/packages/ai/src/generate.ts +++ b/packages/ai/src/generate.ts @@ -106,6 +106,7 @@ export function getApiKey(provider: any): string | undefined { cerebras: "CEREBRAS_API_KEY", xai: "XAI_API_KEY", openrouter: "OPENROUTER_API_KEY", + zai: "ZAI_API_KEY", }; const envVar = envMap[provider]; diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 5242d3da..05a0cbbf 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -946,6 +946,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 16384, } satisfies Model<"openai-completions">, + "moonshotai/kimi-k2-instruct-0905": { + id: "moonshotai/kimi-k2-instruct-0905", + name: "Kimi K2 Instruct 0905", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "moonshotai/kimi-k2-instruct": { id: "moonshotai/kimi-k2-instruct", name: "Kimi K2 Instruct", @@ -1325,7 +1342,145 @@ export const MODELS = { maxTokens: 8192, } satisfies Model<"openai-completions">, }, + zai: { + "glm-4.5-air": { + id: "glm-4.5-air", + name: "GLM-4.5-Air", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.2, + output: 1.1, + cacheRead: 0.03, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 98304, + } satisfies Model<"anthropic-messages">, + "glm-4.5v": { + id: "glm-4.5v", + name: "GLM 4.5V", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.6, + output: 1.8, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 64000, + maxTokens: 16384, + } satisfies Model<"anthropic-messages">, + "glm-4.5-flash": { + id: "glm-4.5-flash", + name: "GLM-4.5-Flash", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 98304, + } satisfies Model<"anthropic-messages">, + "glm-4.5": { + id: "glm-4.5", + name: "GLM-4.5", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.2, + cacheRead: 0.11, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 98304, + } satisfies Model<"anthropic-messages">, + }, openrouter: { + "openrouter/sonoma-dusk-alpha": { + id: "openrouter/sonoma-dusk-alpha", + name: "Sonoma Dusk Alpha", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openrouter/sonoma-sky-alpha": { + id: "openrouter/sonoma-sky-alpha", + name: "Sonoma Sky Alpha", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "qwen/qwen3-max": { + id: "qwen/qwen3-max", + name: "Qwen: Qwen3 Max", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1.2, + output: 6, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "moonshotai/kimi-k2-0905": { + id: "moonshotai/kimi-k2-0905", + name: "MoonshotAI: Kimi K2 0905", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.2962, + output: 1.1852999999999998, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "deepcogito/cogito-v2-preview-llama-109b-moe": { id: "deepcogito/cogito-v2-preview-llama-109b-moe", name: "Cogito V2 Preview Llama 109B", @@ -1343,6 +1498,23 @@ export const MODELS = { contextWindow: 32767, maxTokens: 4096, } satisfies Model<"openai-completions">, + "stepfun-ai/step3": { + id: "stepfun-ai/step3", + name: "StepFun: Step3", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5700000000000001, + output: 1.42, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 65536, + maxTokens: 65536, + } satisfies Model<"openai-completions">, "qwen/qwen3-30b-a3b-thinking-2507": { id: "qwen/qwen3-30b-a3b-thinking-2507", name: "Qwen: Qwen3 30B A3B Thinking 2507", @@ -1685,7 +1857,7 @@ export const MODELS = { } satisfies Model<"openai-completions">, "moonshotai/kimi-k2:free": { id: "moonshotai/kimi-k2:free", - name: "MoonshotAI: Kimi K2 (free)", + name: "MoonshotAI: Kimi K2 0711 (free)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -1702,7 +1874,7 @@ export const MODELS = { } satisfies Model<"openai-completions">, "moonshotai/kimi-k2": { id: "moonshotai/kimi-k2", - name: "MoonshotAI: Kimi K2", + name: "MoonshotAI: Kimi K2 0711", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -2236,12 +2408,12 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.075, - output: 0.15, + input: 0.15, + output: 0.39999999999999997, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, "mistralai/mistral-saba": { @@ -2737,23 +2909,6 @@ export const MODELS = { contextWindow: 32768, maxTokens: 16384, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-8b-instruct": { - id: "meta-llama/llama-3.1-8b-instruct", - name: "Meta: Llama 3.1 8B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.015, - output: 0.02, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-70b-instruct": { id: "meta-llama/llama-3.1-70b-instruct", name: "Meta: Llama 3.1 70B Instruct", @@ -2771,6 +2926,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 16384, } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-8b-instruct": { + id: "meta-llama/llama-3.1-8b-instruct", + name: "Meta: Llama 3.1 8B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.015, + output: 0.02, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "mistralai/mistral-nemo": { id: "mistralai/mistral-nemo", name: "Mistral: Mistral Nemo", diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index e81dc677..dbbb2bfc 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -23,7 +23,7 @@ const _exhaustive: _CheckExhaustive = true; // Helper type to get options for a specific API export type OptionsForApi = ApiOptionsMap[TApi]; -export type KnownProvider = "anthropic" | "google" | "openai" | "xai" | "groq" | "cerebras" | "openrouter"; +export type KnownProvider = "anthropic" | "google" | "openai" | "xai" | "groq" | "cerebras" | "openrouter" | "zai"; export type Provider = KnownProvider | string; export type ReasoningEffort = "minimal" | "low" | "medium" | "high"; diff --git a/packages/ai/test/empty.test.ts b/packages/ai/test/empty.test.ts index e806fe8c..8549fed3 100644 --- a/packages/ai/test/empty.test.ts +++ b/packages/ai/test/empty.test.ts @@ -262,4 +262,24 @@ describe("AI Providers Empty Message Tests", () => { await testEmptyAssistantMessage(llm); }); }); + + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Empty Messages", () => { + const llm = getModel("zai", "glm-4.5-air"); + + it("should handle empty content array", async () => { + await testEmptyMessage(llm); + }); + + it("should handle empty string content", async () => { + await testEmptyStringMessage(llm); + }); + + it("should handle whitespace-only content", async () => { + await testWhitespaceOnlyMessage(llm); + }); + + it("should handle empty assistant message in conversation", async () => { + await testEmptyAssistantMessage(llm); + }); + }); }); diff --git a/packages/ai/test/generate.test.ts b/packages/ai/test/generate.test.ts index 4c342d8d..bb852bfe 100644 --- a/packages/ai/test/generate.test.ts +++ b/packages/ai/test/generate.test.ts @@ -118,7 +118,7 @@ async function handleThinking(model: Model, options?: Op messages: [ { role: "user", - content: `Think about ${(Math.random() * 255) | 0} + 27. Think step by step. Then output the result.`, + content: `Think long and hard about ${(Math.random() * 255) | 0} + 27. Think step by step. Then output the result.`, }, ], }; @@ -169,7 +169,7 @@ async function handleImage(model: Model, options?: Optio content: [ { type: "text", - text: "What do you see in this image? Please describe the shape (circle, rectangle, square, triangle, ...) and color (red, blue, green, ...).", + text: "What do you see in this image? Please describe the shape (circle, rectangle, square, triangle, ...) and color (red, blue, green, ...). You MUST reply in English.", }, imageContent, ], @@ -512,6 +512,60 @@ describe("Generate E2E Tests", () => { }); }); + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via Anthropic Messages)", () => { + const llm = getModel("zai", "glm-4.5-air"); + + it("should complete basic text generation", async () => { + await basicTextGeneration(llm); + }); + + it("should handle tool calling", async () => { + await handleToolCall(llm); + }); + + it("should handle streaming", async () => { + await handleStreaming(llm); + }); + + it("should handle thinking", async () => { + // Prompt doesn't trigger thinking + // await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); + }); + + it("should handle multi-turn with thinking and tools", async () => { + await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); + }); + }); + + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5v via Anthropic Messages)", () => { + const llm = getModel("zai", "glm-4.5v"); + + it("should complete basic text generation", async () => { + await basicTextGeneration(llm); + }); + + it("should handle tool calling", async () => { + await handleToolCall(llm); + }); + + it("should handle streaming", async () => { + await handleStreaming(llm); + }); + + it("should handle thinking", async () => { + await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); + }); + + it("should handle multi-turn with thinking and tools", async () => { + await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); + }); + + it("should handle image input", async () => { + // Can't see image for some reason? + // await handleImage(llm); + }); + }); + // Check if ollama is installed let ollamaInstalled = false; try {