feat(ai): add Hugging Face provider support

- Add huggingface to KnownProvider type - Add HF_TOKEN env var mapping - Process huggingface models from models.dev (14 models) - Use openai-completions API with compat settings - Add tests for all provider test suites - Update documentation fixes #994
2026-04-18 15:03:02 +00:00 · 2026-01-29 02:40:14 +01:00 · 2026-01-29 02:40:14 +01:00 · c808de605a
commit c808de605a
parent f3cfb7e1ae
16 changed files with 562 additions and 23 deletions
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@ -3282,6 +3282,260 @@ export const MODELS = {
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
 	},
+	"huggingface": {
+		"MiniMaxAI/MiniMax-M2.1": {
+			id: "MiniMaxAI/MiniMax-M2.1",
+			name: "MiniMax-M2.1",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 1.2,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"Qwen/Qwen3-235B-A22B-Thinking-2507": {
+			id: "Qwen/Qwen3-235B-A22B-Thinking-2507",
+			name: "Qwen3-235B-A22B-Thinking-2507",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 3,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"Qwen/Qwen3-Coder-480B-A35B-Instruct": {
+			id: "Qwen/Qwen3-Coder-480B-A35B-Instruct",
+			name: "Qwen3-Coder-480B-A35B-Instruct",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 2,
+				output: 2,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 66536,
+		} satisfies Model<"openai-completions">,
+		"Qwen/Qwen3-Next-80B-A3B-Instruct": {
+			id: "Qwen/Qwen3-Next-80B-A3B-Instruct",
+			name: "Qwen3-Next-80B-A3B-Instruct",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.25,
+				output: 1,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 66536,
+		} satisfies Model<"openai-completions">,
+		"Qwen/Qwen3-Next-80B-A3B-Thinking": {
+			id: "Qwen/Qwen3-Next-80B-A3B-Thinking",
+			name: "Qwen3-Next-80B-A3B-Thinking",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 2,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"XiaomiMiMo/MiMo-V2-Flash": {
+			id: "XiaomiMiMo/MiMo-V2-Flash",
+			name: "MiMo-V2-Flash",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.1,
+				output: 0.3,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
+		"deepseek-ai/DeepSeek-R1-0528": {
+			id: "deepseek-ai/DeepSeek-R1-0528",
+			name: "DeepSeek-R1-0528",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 3,
+				output: 5,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 163840,
+			maxTokens: 163840,
+		} satisfies Model<"openai-completions">,
+		"deepseek-ai/DeepSeek-V3.2": {
+			id: "deepseek-ai/DeepSeek-V3.2",
+			name: "DeepSeek-V3.2",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.28,
+				output: 0.4,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 163840,
+			maxTokens: 65536,
+		} satisfies Model<"openai-completions">,
+		"moonshotai/Kimi-K2-Instruct": {
+			id: "moonshotai/Kimi-K2-Instruct",
+			name: "Kimi-K2-Instruct",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 1,
+				output: 3,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 16384,
+		} satisfies Model<"openai-completions">,
+		"moonshotai/Kimi-K2-Instruct-0905": {
+			id: "moonshotai/Kimi-K2-Instruct-0905",
+			name: "Kimi-K2-Instruct-0905",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 1,
+				output: 3,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 16384,
+		} satisfies Model<"openai-completions">,
+		"moonshotai/Kimi-K2-Thinking": {
+			id: "moonshotai/Kimi-K2-Thinking",
+			name: "Kimi-K2-Thinking",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.6,
+				output: 2.5,
+				cacheRead: 0.15,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
+		"moonshotai/Kimi-K2.5": {
+			id: "moonshotai/Kimi-K2.5",
+			name: "Kimi-K2.5",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.6,
+				output: 3,
+				cacheRead: 0.1,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
+		"zai-org/GLM-4.7": {
+			id: "zai-org/GLM-4.7",
+			name: "GLM-4.7",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.6,
+				output: 2.2,
+				cacheRead: 0.11,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
+		"zai-org/GLM-4.7-Flash": {
+			id: "zai-org/GLM-4.7-Flash",
+			name: "GLM-4.7-Flash",
+			api: "openai-completions",
+			provider: "huggingface",
+			baseUrl: "https://router.huggingface.co/v1",
+			compat: {"supportsDeveloperRole":false},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-completions">,
+	},
 	"minimax": {
 		"MiniMax-M2": {
 			id: "MiniMax-M2",
@ -4823,6 +5077,40 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 262144,
 		} satisfies Model<"openai-completions">,
+		"kimi-k2.5": {
+			id: "kimi-k2.5",
+			name: "Kimi K2.5",
+			api: "openai-completions",
+			provider: "opencode",
+			baseUrl: "https://opencode.ai/zen/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.6,
+				output: 3,
+				cacheRead: 0.1,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
+		"minimax-m2.1": {
+			id: "minimax-m2.1",
+			name: "MiniMax M2.1",
+			api: "openai-completions",
+			provider: "opencode",
+			baseUrl: "https://opencode.ai/zen/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 1.2,
+				cacheRead: 0.1,
+				cacheWrite: 0,
+			},
+			contextWindow: 204800,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
 		"qwen3-coder": {
 			id: "qwen3-coder",
 			name: "Qwen3 Coder",
@ -5182,6 +5470,23 @@ export const MODELS = {
 			contextWindow: 1000000,
 			maxTokens: 64000,
 		} satisfies Model<"openai-completions">,
+		"arcee-ai/trinity-large-preview:free": {
+			id: "arcee-ai/trinity-large-preview:free",
+			name: "Arcee AI: Trinity Large Preview (free)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131000,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"arcee-ai/trinity-mini": {
 			id: "arcee-ai/trinity-mini",
 			name: "Arcee AI: Trinity Mini",
@ -5550,7 +5855,7 @@ export const MODELS = {
 			cost: {
 				input: 0.21,
 				output: 0.32,
-				cacheRead: 0,
+				cacheRead: 0.21,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
@ -5673,7 +5978,7 @@ export const MODELS = {
 				cacheWrite: 0.08333333333333334,
 			},
 			contextWindow: 1048576,
-			maxTokens: 65535,
+			maxTokens: 65536,
 		} satisfies Model<"openai-completions">,
 		"google/gemini-2.5-pro": {
 			id: "google/gemini-2.5-pro",
@ -6066,23 +6371,6 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 65536,
 		} satisfies Model<"openai-completions">,
-		"mistralai/devstral-2512:free": {
-			id: "mistralai/devstral-2512:free",
-			name: "Mistral: Devstral 2 2512 (free)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 262144,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"mistralai/devstral-medium": {
 			id: "mistralai/devstral-medium",
 			name: "Mistral: Devstral Medium",
@ -6593,6 +6881,23 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 65535,
 		} satisfies Model<"openai-completions">,
+		"moonshotai/kimi-k2.5": {
+			id: "moonshotai/kimi-k2.5",
+			name: "MoonshotAI: Kimi K2.5",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.5700000000000001,
+				output: 2.8499999999999996,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 262144,
+		} satisfies Model<"openai-completions">,
 		"nex-agi/deepseek-v3.1-nex-n1": {
 			id: "nex-agi/deepseek-v3.1-nex-n1",
 			name: "Nex AGI: DeepSeek V3.1 Nex N1",
@ -6687,13 +6992,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.06,
-				output: 0.24,
+				input: 0.049999999999999996,
+				output: 0.19999999999999998,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 262144,
-			maxTokens: 262144,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"nvidia/nemotron-3-nano-30b-a3b:free": {
 			id: "nvidia/nemotron-3-nano-30b-a3b:free",
@ -7947,7 +8252,7 @@ export const MODELS = {
 			cost: {
 				input: 0.049999999999999996,
 				output: 0.25,
-				cacheRead: 0,
+				cacheRead: 0.049999999999999996,
 				cacheWrite: 0,
 			},
 			contextWindow: 32000,
@ -8395,6 +8700,23 @@ export const MODELS = {
 			contextWindow: 163840,
 			maxTokens: 65536,
 		} satisfies Model<"openai-completions">,
+		"upstage/solar-pro-3:free": {
+			id: "upstage/solar-pro-3:free",
+			name: "Upstage: Solar Pro 3 (free)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"x-ai/grok-3": {
 			id: "x-ai/grok-3",
 			name: "xAI: Grok 3",
@ -8890,6 +9212,23 @@ export const MODELS = {
 			contextWindow: 262144,
 			maxTokens: 32768,
 		} satisfies Model<"anthropic-messages">,
+		"alibaba/qwen3-max-thinking": {
+			id: "alibaba/qwen3-max-thinking",
+			name: "Qwen 3 Max Thinking",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 1.2,
+				output: 6,
+				cacheRead: 0.24,
+				cacheWrite: 0,
+			},
+			contextWindow: 256000,
+			maxTokens: 256000,
+		} satisfies Model<"anthropic-messages">,
 		"anthropic/claude-3-haiku": {
 			id: "anthropic/claude-3-haiku",
 			name: "Claude 3 Haiku",
@ -9077,6 +9416,23 @@ export const MODELS = {
 			contextWindow: 1000000,
 			maxTokens: 64000,
 		} satisfies Model<"anthropic-messages">,
+		"arcee-ai/trinity-large-preview": {
+			id: "arcee-ai/trinity-large-preview",
+			name: "Trinity Large Preview",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.25,
+				output: 1,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131000,
+			maxTokens: 131000,
+		} satisfies Model<"anthropic-messages">,
 		"bytedance/seed-1.6": {
 			id: "bytedance/seed-1.6",
 			name: "Seed 1.6",
@ -9774,6 +10130,23 @@ export const MODELS = {
 			contextWindow: 256000,
 			maxTokens: 16384,
 		} satisfies Model<"anthropic-messages">,
+		"moonshotai/kimi-k2.5": {
+			id: "moonshotai/kimi-k2.5",
+			name: "Kimi K2.5",
+			api: "anthropic-messages",
+			provider: "vercel-ai-gateway",
+			baseUrl: "https://ai-gateway.vercel.sh",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.2,
+				output: 1.2,
+				cacheRead: 0.6,
+				cacheWrite: 0,
+			},
+			contextWindow: 256000,
+			maxTokens: 256000,
+		} satisfies Model<"anthropic-messages">,
 		"nvidia/nemotron-nano-12b-v2-vl": {
 			id: "nvidia/nemotron-nano-12b-v2-vl",
 			name: "Nvidia Nemotron Nano 12B V2 VL",
@ -11298,5 +11671,23 @@ export const MODELS = {
 			contextWindow: 204800,
 			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
+		"glm-4.7-flash": {
+			id: "glm-4.7-flash",
+			name: "GLM-4.7-Flash",
+			api: "openai-completions",
+			provider: "zai",
+			baseUrl: "https://api.z.ai/api/coding/paas/v4",
+			compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 200000,
+			maxTokens: 131072,
+		} satisfies Model<"openai-completions">,
 	},
 } as const;