feat(ai): add Kimi For Coding provider support

- Add kimi-coding provider using Anthropic Messages API - API endpoint: https://api.kimi.com/coding/v1 - Environment variable: KIMI_API_KEY - Models: kimi-k2-thinking (text), k2p5 (text + image) - Add context overflow detection pattern for Kimi errors - Add tests for all standard test suites
2026-04-17 00:04:50 +00:00 · 2026-01-29 04:12:28 +01:00 · 2026-01-29 04:12:28 +01:00 · 87ab5c5c3b
commit 87ab5c5c3b
parent d1e33599f6
22 changed files with 262 additions and 1 deletions
--- a/packages/ai/src/env-api-keys.ts
+++ b/packages/ai/src/env-api-keys.ts
@ -107,6 +107,7 @@ export function getEnvApiKey(provider: any): string | undefined {
 		"minimax-cn": "MINIMAX_CN_API_KEY",
 		huggingface: "HF_TOKEN",
 		opencode: "OPENCODE_API_KEY",
+		"kimi-coding": "KIMI_API_KEY",
 	};

 	const envVar = envMap[provider];
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@ -3536,6 +3536,42 @@ export const MODELS = {
 			maxTokens: 128000,
 		} satisfies Model<"openai-completions">,
 	},
+	"kimi-coding": {
+		"k2p5": {
+			id: "k2p5",
+			name: "Kimi K2.5",
+			api: "anthropic-messages",
+			provider: "kimi-coding",
+			baseUrl: "https://api.kimi.com/coding",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 32768,
+		} satisfies Model<"anthropic-messages">,
+		"kimi-k2-thinking": {
+			id: "kimi-k2-thinking",
+			name: "Kimi K2 Thinking",
+			api: "anthropic-messages",
+			provider: "kimi-coding",
+			baseUrl: "https://api.kimi.com/coding",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 32768,
+		} satisfies Model<"anthropic-messages">,
+	},
 	"minimax": {
 		"MiniMax-M2": {
 			id: "MiniMax-M2",
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@ -36,7 +36,8 @@ export type KnownProvider =
 	| "minimax"
 	| "minimax-cn"
 	| "huggingface"
-	| "opencode";
+	| "opencode"
+	| "kimi-coding";
 export type Provider = KnownProvider | string;

 export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
--- a/packages/ai/src/utils/overflow.ts
+++ b/packages/ai/src/utils/overflow.ts
@ -18,6 +18,7 @@ import type { AssistantMessage } from "../types.js";
 * - LM Studio: "tokens to keep from the initial prompt is greater than the context length"
 * - GitHub Copilot: "prompt token count of X exceeds the limit of Y"
 * - MiniMax: "invalid params, context window exceeds limit"
+ * - Kimi For Coding: "Your request exceeded model token limit: X (requested: Y)"
 * - Cerebras: Returns "400/413 status code (no body)" - handled separately below
 * - Mistral: Returns "400/413 status code (no body)" - handled separately below
 * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
@ -35,6 +36,7 @@ const OVERFLOW_PATTERNS = [
 	/exceeds the available context size/i, // llama.cpp server
 	/greater than the context length/i, // LM Studio
 	/context window exceeds limit/i, // MiniMax
+	/exceeded model token limit/i, // Kimi For Coding
 	/context[_ ]length[_ ]exceeded/i, // Generic fallback
 	/too many tokens/i, // Generic fallback
 	/token limit exceeded/i, // Generic fallback
@ -62,6 +64,7 @@ const OVERFLOW_PATTERNS = [
 * - OpenRouter (all backends): "maximum context length is X tokens"
 * - llama.cpp: "exceeds the available context size"
 * - LM Studio: "greater than the context length"
+ * - Kimi For Coding: "exceeded model token limit: X (requested: Y)"
 *
 * **Unreliable detection:**
 * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),