mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 09:01:14 +00:00
feat(ai): add Hugging Face provider support
- Add huggingface to KnownProvider type - Add HF_TOKEN env var mapping - Process huggingface models from models.dev (14 models) - Use openai-completions API with compat settings - Add tests for all provider test suites - Update documentation fixes #994
This commit is contained in:
parent
f3cfb7e1ae
commit
c808de605a
16 changed files with 562 additions and 23 deletions
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
### Added
|
||||
|
||||
- Added Hugging Face provider support via OpenAI-compatible Inference Router ([#994](https://github.com/badlogic/pi-mono/issues/994))
|
||||
- Added `PI_CACHE_RETENTION` environment variable to control cache TTL for Anthropic (5m vs 1h) and OpenAI (in-memory vs 24h). Set to `long` for extended retention. Only applies to direct API calls (api.anthropic.com, api.openai.com). ([#967](https://github.com/badlogic/pi-mono/issues/967))
|
||||
|
||||
### Fixed
|
||||
|
|
|
|||
|
|
@ -474,6 +474,35 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
|
|||
}
|
||||
}
|
||||
|
||||
// Process Hugging Face models
|
||||
if (data.huggingface?.models) {
|
||||
for (const [modelId, model] of Object.entries(data.huggingface.models)) {
|
||||
const m = model as ModelsDevModel;
|
||||
if (m.tool_call !== true) continue;
|
||||
|
||||
models.push({
|
||||
id: modelId,
|
||||
name: m.name || modelId,
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
reasoning: m.reasoning === true,
|
||||
input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
|
||||
cost: {
|
||||
input: m.cost?.input || 0,
|
||||
output: m.cost?.output || 0,
|
||||
cacheRead: m.cost?.cache_read || 0,
|
||||
cacheWrite: m.cost?.cache_write || 0,
|
||||
},
|
||||
compat: {
|
||||
supportsDeveloperRole: false,
|
||||
},
|
||||
contextWindow: m.limit?.context || 4096,
|
||||
maxTokens: m.limit?.output || 4096,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Process OpenCode Zen models
|
||||
// API mapping based on provider.npm field:
|
||||
// - @ai-sdk/openai → openai-responses
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ export function getEnvApiKey(provider: any): string | undefined {
|
|||
mistral: "MISTRAL_API_KEY",
|
||||
minimax: "MINIMAX_API_KEY",
|
||||
"minimax-cn": "MINIMAX_CN_API_KEY",
|
||||
huggingface: "HF_TOKEN",
|
||||
opencode: "OPENCODE_API_KEY",
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -3282,6 +3282,260 @@ export const MODELS = {
|
|||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
},
|
||||
"huggingface": {
|
||||
"MiniMaxAI/MiniMax-M2.1": {
|
||||
id: "MiniMaxAI/MiniMax-M2.1",
|
||||
name: "MiniMax-M2.1",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.3,
|
||||
output: 1.2,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 204800,
|
||||
maxTokens: 131072,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"Qwen/Qwen3-235B-A22B-Thinking-2507": {
|
||||
id: "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||
name: "Qwen3-235B-A22B-Thinking-2507",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.3,
|
||||
output: 3,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 131072,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct": {
|
||||
id: "Qwen/Qwen3-Coder-480B-A35B-Instruct",
|
||||
name: "Qwen3-Coder-480B-A35B-Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 2,
|
||||
output: 2,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 66536,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"Qwen/Qwen3-Next-80B-A3B-Instruct": {
|
||||
id: "Qwen/Qwen3-Next-80B-A3B-Instruct",
|
||||
name: "Qwen3-Next-80B-A3B-Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.25,
|
||||
output: 1,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 66536,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"Qwen/Qwen3-Next-80B-A3B-Thinking": {
|
||||
id: "Qwen/Qwen3-Next-80B-A3B-Thinking",
|
||||
name: "Qwen3-Next-80B-A3B-Thinking",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.3,
|
||||
output: 2,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 131072,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": {
|
||||
id: "XiaomiMiMo/MiMo-V2-Flash",
|
||||
name: "MiMo-V2-Flash",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.1,
|
||||
output: 0.3,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"deepseek-ai/DeepSeek-R1-0528": {
|
||||
id: "deepseek-ai/DeepSeek-R1-0528",
|
||||
name: "DeepSeek-R1-0528",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 3,
|
||||
output: 5,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 163840,
|
||||
maxTokens: 163840,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"deepseek-ai/DeepSeek-V3.2": {
|
||||
id: "deepseek-ai/DeepSeek-V3.2",
|
||||
name: "DeepSeek-V3.2",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.28,
|
||||
output: 0.4,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 163840,
|
||||
maxTokens: 65536,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"moonshotai/Kimi-K2-Instruct": {
|
||||
id: "moonshotai/Kimi-K2-Instruct",
|
||||
name: "Kimi-K2-Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 1,
|
||||
output: 3,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 131072,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"moonshotai/Kimi-K2-Instruct-0905": {
|
||||
id: "moonshotai/Kimi-K2-Instruct-0905",
|
||||
name: "Kimi-K2-Instruct-0905",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 1,
|
||||
output: 3,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"moonshotai/Kimi-K2-Thinking": {
|
||||
id: "moonshotai/Kimi-K2-Thinking",
|
||||
name: "Kimi-K2-Thinking",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.6,
|
||||
output: 2.5,
|
||||
cacheRead: 0.15,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"moonshotai/Kimi-K2.5": {
|
||||
id: "moonshotai/Kimi-K2.5",
|
||||
name: "Kimi-K2.5",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 0.6,
|
||||
output: 3,
|
||||
cacheRead: 0.1,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"zai-org/GLM-4.7": {
|
||||
id: "zai-org/GLM-4.7",
|
||||
name: "GLM-4.7",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.6,
|
||||
output: 2.2,
|
||||
cacheRead: 0.11,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 204800,
|
||||
maxTokens: 131072,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"zai-org/GLM-4.7-Flash": {
|
||||
id: "zai-org/GLM-4.7-Flash",
|
||||
name: "GLM-4.7-Flash",
|
||||
api: "openai-completions",
|
||||
provider: "huggingface",
|
||||
baseUrl: "https://router.huggingface.co/v1",
|
||||
compat: {"supportsDeveloperRole":false},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 128000,
|
||||
} satisfies Model<"openai-completions">,
|
||||
},
|
||||
"minimax": {
|
||||
"MiniMax-M2": {
|
||||
id: "MiniMax-M2",
|
||||
|
|
@ -4823,6 +5077,40 @@ export const MODELS = {
|
|||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"kimi-k2.5": {
|
||||
id: "kimi-k2.5",
|
||||
name: "Kimi K2.5",
|
||||
api: "openai-completions",
|
||||
provider: "opencode",
|
||||
baseUrl: "https://opencode.ai/zen/v1",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 0.6,
|
||||
output: 3,
|
||||
cacheRead: 0.1,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"minimax-m2.1": {
|
||||
id: "minimax-m2.1",
|
||||
name: "MiniMax M2.1",
|
||||
api: "openai-completions",
|
||||
provider: "opencode",
|
||||
baseUrl: "https://opencode.ai/zen/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.3,
|
||||
output: 1.2,
|
||||
cacheRead: 0.1,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 204800,
|
||||
maxTokens: 131072,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"qwen3-coder": {
|
||||
id: "qwen3-coder",
|
||||
name: "Qwen3 Coder",
|
||||
|
|
@ -5182,6 +5470,23 @@ export const MODELS = {
|
|||
contextWindow: 1000000,
|
||||
maxTokens: 64000,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"arcee-ai/trinity-large-preview:free": {
|
||||
id: "arcee-ai/trinity-large-preview:free",
|
||||
name: "Arcee AI: Trinity Large Preview (free)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 131000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"arcee-ai/trinity-mini": {
|
||||
id: "arcee-ai/trinity-mini",
|
||||
name: "Arcee AI: Trinity Mini",
|
||||
|
|
@ -5550,7 +5855,7 @@ export const MODELS = {
|
|||
cost: {
|
||||
input: 0.21,
|
||||
output: 0.32,
|
||||
cacheRead: 0,
|
||||
cacheRead: 0.21,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 163840,
|
||||
|
|
@ -5673,7 +5978,7 @@ export const MODELS = {
|
|||
cacheWrite: 0.08333333333333334,
|
||||
},
|
||||
contextWindow: 1048576,
|
||||
maxTokens: 65535,
|
||||
maxTokens: 65536,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"google/gemini-2.5-pro": {
|
||||
id: "google/gemini-2.5-pro",
|
||||
|
|
@ -6066,23 +6371,6 @@ export const MODELS = {
|
|||
contextWindow: 262144,
|
||||
maxTokens: 65536,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/devstral-2512:free": {
|
||||
id: "mistralai/devstral-2512:free",
|
||||
name: "Mistral: Devstral 2 2512 (free)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/devstral-medium": {
|
||||
id: "mistralai/devstral-medium",
|
||||
name: "Mistral: Devstral Medium",
|
||||
|
|
@ -6593,6 +6881,23 @@ export const MODELS = {
|
|||
contextWindow: 262144,
|
||||
maxTokens: 65535,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"moonshotai/kimi-k2.5": {
|
||||
id: "moonshotai/kimi-k2.5",
|
||||
name: "MoonshotAI: Kimi K2.5",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 0.5700000000000001,
|
||||
output: 2.8499999999999996,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"nex-agi/deepseek-v3.1-nex-n1": {
|
||||
id: "nex-agi/deepseek-v3.1-nex-n1",
|
||||
name: "Nex AGI: DeepSeek V3.1 Nex N1",
|
||||
|
|
@ -6687,13 +6992,13 @@ export const MODELS = {
|
|||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.06,
|
||||
output: 0.24,
|
||||
input: 0.049999999999999996,
|
||||
output: 0.19999999999999998,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 262144,
|
||||
maxTokens: 262144,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"nvidia/nemotron-3-nano-30b-a3b:free": {
|
||||
id: "nvidia/nemotron-3-nano-30b-a3b:free",
|
||||
|
|
@ -7947,7 +8252,7 @@ export const MODELS = {
|
|||
cost: {
|
||||
input: 0.049999999999999996,
|
||||
output: 0.25,
|
||||
cacheRead: 0,
|
||||
cacheRead: 0.049999999999999996,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 32000,
|
||||
|
|
@ -8395,6 +8700,23 @@ export const MODELS = {
|
|||
contextWindow: 163840,
|
||||
maxTokens: 65536,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"upstage/solar-pro-3:free": {
|
||||
id: "upstage/solar-pro-3:free",
|
||||
name: "Upstage: Solar Pro 3 (free)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"x-ai/grok-3": {
|
||||
id: "x-ai/grok-3",
|
||||
name: "xAI: Grok 3",
|
||||
|
|
@ -8890,6 +9212,23 @@ export const MODELS = {
|
|||
contextWindow: 262144,
|
||||
maxTokens: 32768,
|
||||
} satisfies Model<"anthropic-messages">,
|
||||
"alibaba/qwen3-max-thinking": {
|
||||
id: "alibaba/qwen3-max-thinking",
|
||||
name: "Qwen 3 Max Thinking",
|
||||
api: "anthropic-messages",
|
||||
provider: "vercel-ai-gateway",
|
||||
baseUrl: "https://ai-gateway.vercel.sh",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 1.2,
|
||||
output: 6,
|
||||
cacheRead: 0.24,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 256000,
|
||||
maxTokens: 256000,
|
||||
} satisfies Model<"anthropic-messages">,
|
||||
"anthropic/claude-3-haiku": {
|
||||
id: "anthropic/claude-3-haiku",
|
||||
name: "Claude 3 Haiku",
|
||||
|
|
@ -9077,6 +9416,23 @@ export const MODELS = {
|
|||
contextWindow: 1000000,
|
||||
maxTokens: 64000,
|
||||
} satisfies Model<"anthropic-messages">,
|
||||
"arcee-ai/trinity-large-preview": {
|
||||
id: "arcee-ai/trinity-large-preview",
|
||||
name: "Trinity Large Preview",
|
||||
api: "anthropic-messages",
|
||||
provider: "vercel-ai-gateway",
|
||||
baseUrl: "https://ai-gateway.vercel.sh",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.25,
|
||||
output: 1,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 131000,
|
||||
maxTokens: 131000,
|
||||
} satisfies Model<"anthropic-messages">,
|
||||
"bytedance/seed-1.6": {
|
||||
id: "bytedance/seed-1.6",
|
||||
name: "Seed 1.6",
|
||||
|
|
@ -9774,6 +10130,23 @@ export const MODELS = {
|
|||
contextWindow: 256000,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"anthropic-messages">,
|
||||
"moonshotai/kimi-k2.5": {
|
||||
id: "moonshotai/kimi-k2.5",
|
||||
name: "Kimi K2.5",
|
||||
api: "anthropic-messages",
|
||||
provider: "vercel-ai-gateway",
|
||||
baseUrl: "https://ai-gateway.vercel.sh",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 1.2,
|
||||
output: 1.2,
|
||||
cacheRead: 0.6,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 256000,
|
||||
maxTokens: 256000,
|
||||
} satisfies Model<"anthropic-messages">,
|
||||
"nvidia/nemotron-nano-12b-v2-vl": {
|
||||
id: "nvidia/nemotron-nano-12b-v2-vl",
|
||||
name: "Nvidia Nemotron Nano 12B V2 VL",
|
||||
|
|
@ -11298,5 +11671,23 @@ export const MODELS = {
|
|||
contextWindow: 204800,
|
||||
maxTokens: 131072,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"glm-4.7-flash": {
|
||||
id: "glm-4.7-flash",
|
||||
name: "GLM-4.7-Flash",
|
||||
api: "openai-completions",
|
||||
provider: "zai",
|
||||
baseUrl: "https://api.z.ai/api/coding/paas/v4",
|
||||
compat: {"supportsDeveloperRole":false,"thinkingFormat":"zai"},
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 200000,
|
||||
maxTokens: 131072,
|
||||
} satisfies Model<"openai-completions">,
|
||||
},
|
||||
} as const;
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ export type KnownProvider =
|
|||
| "mistral"
|
||||
| "minimax"
|
||||
| "minimax-cn"
|
||||
| "huggingface"
|
||||
| "opencode";
|
||||
export type Provider = KnownProvider | string;
|
||||
|
||||
|
|
|
|||
|
|
@ -366,6 +366,22 @@ describe("Context overflow error handling", () => {
|
|||
}, 120000);
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
// Hugging Face
|
||||
// Uses OpenAI-compatible Inference Router
|
||||
// =============================================================================
|
||||
|
||||
describe.skipIf(!process.env.HF_TOKEN)("Hugging Face", () => {
|
||||
it("Kimi-K2.5 - should detect overflow via isContextOverflow", async () => {
|
||||
const model = getModel("huggingface", "moonshotai/Kimi-K2.5");
|
||||
const result = await testContextOverflow(model, process.env.HF_TOKEN!);
|
||||
logResult(result);
|
||||
|
||||
expect(result.stopReason).toBe("error");
|
||||
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
|
||||
}, 120000);
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
// z.ai
|
||||
// Special case: Sometimes accepts overflow silently, sometimes rate limits
|
||||
|
|
|
|||
|
|
@ -86,6 +86,8 @@ const PROVIDER_MODEL_PAIRS: ProviderModelPair[] = [
|
|||
{ provider: "cerebras", model: "zai-glm-4.7", label: "cerebras-zai-glm-4.7" },
|
||||
// Groq
|
||||
{ provider: "groq", model: "openai/gpt-oss-120b", label: "groq-gpt-oss-120b" },
|
||||
// Hugging Face
|
||||
{ provider: "huggingface", model: "moonshotai/Kimi-K2.5", label: "huggingface-kimi-k2.5" },
|
||||
// Mistral
|
||||
{ provider: "mistral", model: "devstral-medium-latest", label: "mistral-devstral-medium" },
|
||||
// MiniMax
|
||||
|
|
|
|||
|
|
@ -308,6 +308,26 @@ describe("AI Providers Empty Message Tests", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider Empty Messages", () => {
|
||||
const llm = getModel("huggingface", "moonshotai/Kimi-K2.5");
|
||||
|
||||
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
|
||||
await testEmptyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
|
||||
await testEmptyStringMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
|
||||
await testWhitespaceOnlyMessage(llm);
|
||||
});
|
||||
|
||||
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
|
||||
await testEmptyAssistantMessage(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Empty Messages", () => {
|
||||
const llm = getModel("zai", "glm-4.5-air");
|
||||
|
||||
|
|
|
|||
|
|
@ -604,6 +604,30 @@ describe("Generate E2E Tests", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider (Kimi-K2.5 via OpenAI Completions)", () => {
|
||||
const llm = getModel("huggingface", "moonshotai/Kimi-K2.5");
|
||||
|
||||
it("should complete basic text generation", { retry: 3 }, async () => {
|
||||
await basicTextGeneration(llm);
|
||||
});
|
||||
|
||||
it("should handle tool calling", { retry: 3 }, async () => {
|
||||
await handleToolCall(llm);
|
||||
});
|
||||
|
||||
it("should handle streaming", { retry: 3 }, async () => {
|
||||
await handleStreaming(llm);
|
||||
});
|
||||
|
||||
it("should handle thinking mode", { retry: 3 }, async () => {
|
||||
await handleThinking(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
|
||||
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
|
||||
await multiTurn(llm, { reasoningEffort: "medium" });
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => {
|
||||
const llm = getModel("openrouter", "z-ai/glm-4.5v");
|
||||
|
||||
|
|
|
|||
|
|
@ -154,6 +154,14 @@ describe("Token Statistics on Abort", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider", () => {
|
||||
const llm = getModel("huggingface", "moonshotai/Kimi-K2.5");
|
||||
|
||||
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
|
||||
await testTokensOnAbort(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => {
|
||||
const llm = getModel("zai", "glm-4.5-flash");
|
||||
|
||||
|
|
|
|||
|
|
@ -168,6 +168,14 @@ describe("Tool Call Without Result Tests", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider", () => {
|
||||
const model = getModel("huggingface", "moonshotai/Kimi-K2.5");
|
||||
|
||||
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
|
||||
await testToolCallWithoutResult(model);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => {
|
||||
const model = getModel("zai", "glm-4.5-flash");
|
||||
|
||||
|
|
|
|||
|
|
@ -306,6 +306,25 @@ describe("totalTokens field", () => {
|
|||
);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Hugging Face
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.HF_TOKEN)("Hugging Face", () => {
|
||||
it("Kimi-K2.5 - should return totalTokens equal to sum of components", { retry: 3, timeout: 60000 }, async () => {
|
||||
const llm = getModel("huggingface", "moonshotai/Kimi-K2.5");
|
||||
|
||||
console.log(`\nHugging Face / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.HF_TOKEN });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
});
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// z.ai
|
||||
// =========================================================================
|
||||
|
|
|
|||
|
|
@ -611,6 +611,22 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.HF_TOKEN)("Hugging Face Provider Unicode Handling", () => {
|
||||
const llm = getModel("huggingface", "moonshotai/Kimi-K2.5");
|
||||
|
||||
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
|
||||
await testEmojiInToolResults(llm);
|
||||
});
|
||||
|
||||
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
|
||||
await testRealWorldLinkedInData(llm);
|
||||
});
|
||||
|
||||
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
|
||||
await testUnpairedHighSurrogate(llm);
|
||||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Unicode Handling", () => {
|
||||
const llm = getModel("zai", "glm-4.5-air");
|
||||
|
||||
|
|
|
|||
|
|
@ -93,6 +93,7 @@ For each built-in provider, pi maintains a list of tool-capable models, updated
|
|||
- Vercel AI Gateway
|
||||
- ZAI
|
||||
- OpenCode Zen
|
||||
- Hugging Face
|
||||
- MiniMax
|
||||
|
||||
See [docs/providers.md](docs/providers.md) for detailed setup instructions.
|
||||
|
|
|
|||
|
|
@ -62,6 +62,7 @@ pi
|
|||
| Vercel AI Gateway | `AI_GATEWAY_API_KEY` |
|
||||
| ZAI | `ZAI_API_KEY` |
|
||||
| OpenCode Zen | `OPENCODE_API_KEY` |
|
||||
| Hugging Face | `HF_TOKEN` |
|
||||
| MiniMax | `MINIMAX_API_KEY` |
|
||||
| MiniMax (China) | `MINIMAX_CN_API_KEY` |
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ export const defaultModelPerProvider: Record<KnownProvider, string> = {
|
|||
mistral: "devstral-medium-latest",
|
||||
minimax: "MiniMax-M2.1",
|
||||
"minimax-cn": "MiniMax-M2.1",
|
||||
huggingface: "moonshotai/Kimi-K2.5",
|
||||
opencode: "claude-opus-4-5",
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue