diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 4159ccc7..e55872fa 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -4,6 +4,7 @@ ### Added +- Added Hugging Face provider support via OpenAI-compatible Inference Router ([#994](https://github.com/badlogic/pi-mono/issues/994)) - Added `PI_CACHE_RETENTION` environment variable to control cache TTL for Anthropic (5m vs 1h) and OpenAI (in-memory vs 24h). Set to `long` for extended retention. Only applies to direct API calls (api.anthropic.com, api.openai.com). ([#967](https://github.com/badlogic/pi-mono/issues/967)) ### Fixed diff --git a/packages/ai/scripts/generate-models.ts b/packages/ai/scripts/generate-models.ts index caa43423..bc97b3f4 100644 --- a/packages/ai/scripts/generate-models.ts +++ b/packages/ai/scripts/generate-models.ts @@ -474,6 +474,35 @@ async function loadModelsDevData(): Promise[]> { } } + // Process Hugging Face models + if (data.huggingface?.models) { + for (const [modelId, model] of Object.entries(data.huggingface.models)) { + const m = model as ModelsDevModel; + if (m.tool_call !== true) continue; + + models.push({ + id: modelId, + name: m.name || modelId, + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + reasoning: m.reasoning === true, + input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"], + cost: { + input: m.cost?.input || 0, + output: m.cost?.output || 0, + cacheRead: m.cost?.cache_read || 0, + cacheWrite: m.cost?.cache_write || 0, + }, + compat: { + supportsDeveloperRole: false, + }, + contextWindow: m.limit?.context || 4096, + maxTokens: m.limit?.output || 4096, + }); + } + } + // Process OpenCode Zen models // API mapping based on provider.npm field: // - @ai-sdk/openai → openai-responses diff --git a/packages/ai/src/env-api-keys.ts b/packages/ai/src/env-api-keys.ts index 69bdbf9d..e82e9f0c 100644 --- a/packages/ai/src/env-api-keys.ts +++ b/packages/ai/src/env-api-keys.ts @@ -105,6 +105,7 @@ export function getEnvApiKey(provider: any): string | undefined { mistral: "MISTRAL_API_KEY", minimax: "MINIMAX_API_KEY", "minimax-cn": "MINIMAX_CN_API_KEY", + huggingface: "HF_TOKEN", opencode: "OPENCODE_API_KEY", }; diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 777edd2d..57f23166 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -3282,6 +3282,260 @@ export const MODELS = { maxTokens: 16384, } satisfies Model<"openai-completions">, }, + "huggingface": { + "MiniMaxAI/MiniMax-M2.1": { + id: "MiniMaxAI/MiniMax-M2.1", + name: "MiniMax-M2.1", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "Qwen/Qwen3-235B-A22B-Thinking-2507": { + id: "Qwen/Qwen3-235B-A22B-Thinking-2507", + name: "Qwen3-235B-A22B-Thinking-2507", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "Qwen/Qwen3-Coder-480B-A35B-Instruct": { + id: "Qwen/Qwen3-Coder-480B-A35B-Instruct", + name: "Qwen3-Coder-480B-A35B-Instruct", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 66536, + } satisfies Model<"openai-completions">, + "Qwen/Qwen3-Next-80B-A3B-Instruct": { + id: "Qwen/Qwen3-Next-80B-A3B-Instruct", + name: "Qwen3-Next-80B-A3B-Instruct", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: false, + input: ["text"], + cost: { + input: 0.25, + output: 1, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 66536, + } satisfies Model<"openai-completions">, + "Qwen/Qwen3-Next-80B-A3B-Thinking": { + id: "Qwen/Qwen3-Next-80B-A3B-Thinking", + name: "Qwen3-Next-80B-A3B-Thinking", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: false, + input: ["text"], + cost: { + input: 0.3, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "XiaomiMiMo/MiMo-V2-Flash": { + id: "XiaomiMiMo/MiMo-V2-Flash", + name: "MiMo-V2-Flash", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0.1, + output: 0.3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "deepseek-ai/DeepSeek-R1-0528": { + id: "deepseek-ai/DeepSeek-R1-0528", + name: "DeepSeek-R1-0528", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 3, + output: 5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 163840, + maxTokens: 163840, + } satisfies Model<"openai-completions">, + "deepseek-ai/DeepSeek-V3.2": { + id: "deepseek-ai/DeepSeek-V3.2", + name: "DeepSeek-V3.2", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0.28, + output: 0.4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 163840, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "moonshotai/Kimi-K2-Instruct": { + id: "moonshotai/Kimi-K2-Instruct", + name: "Kimi-K2-Instruct", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "moonshotai/Kimi-K2-Instruct-0905": { + id: "moonshotai/Kimi-K2-Instruct-0905", + name: "Kimi-K2-Instruct-0905", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "moonshotai/Kimi-K2-Thinking": { + id: "moonshotai/Kimi-K2-Thinking", + name: "Kimi-K2-Thinking", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.5, + cacheRead: 0.15, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "moonshotai/Kimi-K2.5": { + id: "moonshotai/Kimi-K2.5", + name: "Kimi-K2.5", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.6, + output: 3, + cacheRead: 0.1, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "zai-org/GLM-4.7": { + id: "zai-org/GLM-4.7", + name: "GLM-4.7", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.2, + cacheRead: 0.11, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "zai-org/GLM-4.7-Flash": { + id: "zai-org/GLM-4.7-Flash", + name: "GLM-4.7-Flash", + api: "openai-completions", + provider: "huggingface", + baseUrl: "https://router.huggingface.co/v1", + compat: {"supportsDeveloperRole":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + }, "minimax": { "MiniMax-M2": { id: "MiniMax-M2", @@ -4823,6 +5077,40 @@ export const MODELS = { contextWindow: 262144, maxTokens: 262144, } satisfies Model<"openai-completions">, + "kimi-k2.5": { + id: "kimi-k2.5", + name: "Kimi K2.5", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.6, + output: 3, + cacheRead: 0.1, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "minimax-m2.1": { + id: "minimax-m2.1", + name: "MiniMax M2.1", + api: "openai-completions", + provider: "opencode", + baseUrl: "https://opencode.ai/zen/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0.1, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"openai-completions">, "qwen3-coder": { id: "qwen3-coder", name: "Qwen3 Coder", @@ -5182,6 +5470,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"openai-completions">, + "arcee-ai/trinity-large-preview:free": { + id: "arcee-ai/trinity-large-preview:free", + name: "Arcee AI: Trinity Large Preview (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "arcee-ai/trinity-mini": { id: "arcee-ai/trinity-mini", name: "Arcee AI: Trinity Mini", @@ -5550,7 +5855,7 @@ export const MODELS = { cost: { input: 0.21, output: 0.32, - cacheRead: 0, + cacheRead: 0.21, cacheWrite: 0, }, contextWindow: 163840, @@ -5673,7 +5978,7 @@ export const MODELS = { cacheWrite: 0.08333333333333334, }, contextWindow: 1048576, - maxTokens: 65535, + maxTokens: 65536, } satisfies Model<"openai-completions">, "google/gemini-2.5-pro": { id: "google/gemini-2.5-pro", @@ -6066,23 +6371,6 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, - "mistralai/devstral-2512:free": { - id: "mistralai/devstral-2512:free", - name: "Mistral: Devstral 2 2512 (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "mistralai/devstral-medium": { id: "mistralai/devstral-medium", name: "Mistral: Devstral Medium", @@ -6593,6 +6881,23 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65535, } satisfies Model<"openai-completions">, + "moonshotai/kimi-k2.5": { + id: "moonshotai/kimi-k2.5", + name: "MoonshotAI: Kimi K2.5", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5700000000000001, + output: 2.8499999999999996, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, "nex-agi/deepseek-v3.1-nex-n1": { id: "nex-agi/deepseek-v3.1-nex-n1", name: "Nex AGI: DeepSeek V3.1 Nex N1", @@ -6687,13 +6992,13 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.06, - output: 0.24, + input: 0.049999999999999996, + output: 0.19999999999999998, cacheRead: 0, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 262144, + maxTokens: 4096, } satisfies Model<"openai-completions">, "nvidia/nemotron-3-nano-30b-a3b:free": { id: "nvidia/nemotron-3-nano-30b-a3b:free", @@ -7947,7 +8252,7 @@ export const MODELS = { cost: { input: 0.049999999999999996, output: 0.25, - cacheRead: 0, + cacheRead: 0.049999999999999996, cacheWrite: 0, }, contextWindow: 32000, @@ -8395,6 +8700,23 @@ export const MODELS = { contextWindow: 163840, maxTokens: 65536, } satisfies Model<"openai-completions">, + "upstage/solar-pro-3:free": { + id: "upstage/solar-pro-3:free", + name: "Upstage: Solar Pro 3 (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "x-ai/grok-3": { id: "x-ai/grok-3", name: "xAI: Grok 3", @@ -8890,6 +9212,23 @@ export const MODELS = { contextWindow: 262144, maxTokens: 32768, } satisfies Model<"anthropic-messages">, + "alibaba/qwen3-max-thinking": { + id: "alibaba/qwen3-max-thinking", + name: "Qwen 3 Max Thinking", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text"], + cost: { + input: 1.2, + output: 6, + cacheRead: 0.24, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 256000, + } satisfies Model<"anthropic-messages">, "anthropic/claude-3-haiku": { id: "anthropic/claude-3-haiku", name: "Claude 3 Haiku", @@ -9077,6 +9416,23 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, + "arcee-ai/trinity-large-preview": { + id: "arcee-ai/trinity-large-preview", + name: "Trinity Large Preview", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: false, + input: ["text"], + cost: { + input: 0.25, + output: 1, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131000, + maxTokens: 131000, + } satisfies Model<"anthropic-messages">, "bytedance/seed-1.6": { id: "bytedance/seed-1.6", name: "Seed 1.6", @@ -9774,6 +10130,23 @@ export const MODELS = { contextWindow: 256000, maxTokens: 16384, } satisfies Model<"anthropic-messages">, + "moonshotai/kimi-k2.5": { + id: "moonshotai/kimi-k2.5", + name: "Kimi K2.5", + api: "anthropic-messages", + provider: "vercel-ai-gateway", + baseUrl: "https://ai-gateway.vercel.sh", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.2, + output: 1.2, + cacheRead: 0.6, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 256000, + } satisfies Model<"anthropic-messages">, "nvidia/nemotron-nano-12b-v2-vl": { id: "nvidia/nemotron-nano-12b-v2-vl", name: "Nvidia Nemotron Nano 12B V2 VL", @@ -11298,5 +11671,23 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"openai-completions">, + "glm-4.7-flash": { + id: "glm-4.7-flash", + name: "GLM-4.7-Flash", + api: "openai-completions", + provider: "zai", + baseUrl: "https://api.z.ai/api/coding/paas/v4", + compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"}, + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 131072, + } satisfies Model<"openai-completions">, }, } as const; diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index 5308f142..0da018aa 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -35,6 +35,7 @@ export type KnownProvider = | "mistral" | "minimax" | "minimax-cn" + | "huggingface" | "opencode"; export type Provider = KnownProvider | string; diff --git a/packages/ai/test/context-overflow.test.ts b/packages/ai/test/context-overflow.test.ts index fde846f7..98f2087a 100644 --- a/packages/ai/test/context-overflow.test.ts +++ b/packages/ai/test/context-overflow.test.ts @@ -366,6 +366,22 @@ describe("Context overflow error handling", () => { }, 120000); }); + // ============================================================================= + // Hugging Face + // Uses OpenAI-compatible Inference Router + // ============================================================================= + + describe.skipIf(!process.env.HF_TOKEN)("Hugging Face", () => { + it("Kimi-K2.5 - should detect overflow via isContextOverflow", async () => { + const model = getModel("huggingface", "moonshotai/Kimi-K2.5"); + const result = await testContextOverflow(model, process.env.HF_TOKEN!); + logResult(result); + + expect(result.stopReason).toBe("error"); + expect(isContextOverflow(result.response, model.contextWindow)).toBe(true); + }, 120000); + }); + // ============================================================================= // z.ai // Special case: Sometimes accepts overflow silently, sometimes rate limits diff --git a/packages/ai/test/cross-provider-handoff.test.ts b/packages/ai/test/cross-provider-handoff.test.ts index 9d9bb52d..a9058478 100644 --- a/packages/ai/test/cross-provider-handoff.test.ts +++ b/packages/ai/test/cross-provider-handoff.test.ts @@ -86,6 +86,8 @@ const PROVIDER_MODEL_PAIRS: ProviderModelPair[] = [ { provider: "cerebras", model: "zai-glm-4.7", label: "cerebras-zai-glm-4.7" }, // Groq { provider: "groq", model: "openai/gpt-oss-120b", label: "groq-gpt-oss-120b" }, + // Hugging Face + { provider: "huggingface", model: "moonshotai/Kimi-K2.5", label: "huggingface-kimi-k2.5" }, // Mistral { provider: "mistral", model: "devstral-medium-latest", label: "mistral-devstral-medium" }, // MiniMax diff --git a/packages/ai/test/empty.test.ts b/packages/ai/test/empty.test.ts index 8576ff16..e18d8d03 100644 --- a/packages/ai/test/empty.test.ts +++ b/packages/ai/test/empty.test.ts @@ -308,6 +308,26 @@ describe("AI Providers Empty Message Tests", () => { }); }); + describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider Empty Messages", () => { + const llm = getModel("huggingface", "moonshotai/Kimi-K2.5"); + + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { + await testEmptyMessage(llm); + }); + + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { + await testEmptyStringMessage(llm); + }); + + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { + await testWhitespaceOnlyMessage(llm); + }); + + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { + await testEmptyAssistantMessage(llm); + }); + }); + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Empty Messages", () => { const llm = getModel("zai", "glm-4.5-air"); diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts index 2434db06..cef2f9bc 100644 --- a/packages/ai/test/stream.test.ts +++ b/packages/ai/test/stream.test.ts @@ -604,6 +604,30 @@ describe("Generate E2E Tests", () => { }); }); + describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider (Kimi-K2.5 via OpenAI Completions)", () => { + const llm = getModel("huggingface", "moonshotai/Kimi-K2.5"); + + it("should complete basic text generation", { retry: 3 }, async () => { + await basicTextGeneration(llm); + }); + + it("should handle tool calling", { retry: 3 }, async () => { + await handleToolCall(llm); + }); + + it("should handle streaming", { retry: 3 }, async () => { + await handleStreaming(llm); + }); + + it("should handle thinking mode", { retry: 3 }, async () => { + await handleThinking(llm, { reasoningEffort: "medium" }); + }); + + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { + await multiTurn(llm, { reasoningEffort: "medium" }); + }); + }); + describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => { const llm = getModel("openrouter", "z-ai/glm-4.5v"); diff --git a/packages/ai/test/tokens.test.ts b/packages/ai/test/tokens.test.ts index ce9996ff..c4709c47 100644 --- a/packages/ai/test/tokens.test.ts +++ b/packages/ai/test/tokens.test.ts @@ -154,6 +154,14 @@ describe("Token Statistics on Abort", () => { }); }); + describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider", () => { + const llm = getModel("huggingface", "moonshotai/Kimi-K2.5"); + + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { + await testTokensOnAbort(llm); + }); + }); + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => { const llm = getModel("zai", "glm-4.5-flash"); diff --git a/packages/ai/test/tool-call-without-result.test.ts b/packages/ai/test/tool-call-without-result.test.ts index 92d86d80..8caf22d0 100644 --- a/packages/ai/test/tool-call-without-result.test.ts +++ b/packages/ai/test/tool-call-without-result.test.ts @@ -168,6 +168,14 @@ describe("Tool Call Without Result Tests", () => { }); }); + describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider", () => { + const model = getModel("huggingface", "moonshotai/Kimi-K2.5"); + + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { + await testToolCallWithoutResult(model); + }); + }); + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => { const model = getModel("zai", "glm-4.5-flash"); diff --git a/packages/ai/test/total-tokens.test.ts b/packages/ai/test/total-tokens.test.ts index 34cb9702..b5ce961a 100644 --- a/packages/ai/test/total-tokens.test.ts +++ b/packages/ai/test/total-tokens.test.ts @@ -306,6 +306,25 @@ describe("totalTokens field", () => { ); }); + // ========================================================================= + // Hugging Face + // ========================================================================= + + describe.skipIf(!process.env.HF_TOKEN)("Hugging Face", () => { + it("Kimi-K2.5 - should return totalTokens equal to sum of components", { retry: 3, timeout: 60000 }, async () => { + const llm = getModel("huggingface", "moonshotai/Kimi-K2.5"); + + console.log(`\nHugging Face / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.HF_TOKEN }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }); + }); + // ========================================================================= // z.ai // ========================================================================= diff --git a/packages/ai/test/unicode-surrogate.test.ts b/packages/ai/test/unicode-surrogate.test.ts index afb18eea..38e5426e 100644 --- a/packages/ai/test/unicode-surrogate.test.ts +++ b/packages/ai/test/unicode-surrogate.test.ts @@ -611,6 +611,22 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => { }); }); + describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider Unicode Handling", () => { + const llm = getModel("huggingface", "moonshotai/Kimi-K2.5"); + + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { + await testEmojiInToolResults(llm); + }); + + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { + await testRealWorldLinkedInData(llm); + }); + + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { + await testUnpairedHighSurrogate(llm); + }); + }); + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Unicode Handling", () => { const llm = getModel("zai", "glm-4.5-air"); diff --git a/packages/coding-agent/README.md b/packages/coding-agent/README.md index 06fd497c..5d0c0388 100644 --- a/packages/coding-agent/README.md +++ b/packages/coding-agent/README.md @@ -93,6 +93,7 @@ For each built-in provider, pi maintains a list of tool-capable models, updated - Vercel AI Gateway - ZAI - OpenCode Zen +- Hugging Face - MiniMax See [docs/providers.md](docs/providers.md) for detailed setup instructions. diff --git a/packages/coding-agent/docs/providers.md b/packages/coding-agent/docs/providers.md index 1757a438..842a2ab6 100644 --- a/packages/coding-agent/docs/providers.md +++ b/packages/coding-agent/docs/providers.md @@ -62,6 +62,7 @@ pi | Vercel AI Gateway | `AI_GATEWAY_API_KEY` | | ZAI | `ZAI_API_KEY` | | OpenCode Zen | `OPENCODE_API_KEY` | +| Hugging Face | `HF_TOKEN` | | MiniMax | `MINIMAX_API_KEY` | | MiniMax (China) | `MINIMAX_CN_API_KEY` | diff --git a/packages/coding-agent/src/core/model-resolver.ts b/packages/coding-agent/src/core/model-resolver.ts index b2051fb9..8a7082b7 100644 --- a/packages/coding-agent/src/core/model-resolver.ts +++ b/packages/coding-agent/src/core/model-resolver.ts @@ -31,6 +31,7 @@ export const defaultModelPerProvider: Record = { mistral: "devstral-medium-latest", minimax: "MiniMax-M2.1", "minimax-cn": "MiniMax-M2.1", + huggingface: "moonshotai/Kimi-K2.5", opencode: "claude-opus-4-5", };