diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 6cde070c..2df269d3 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -5,19 +5,87 @@ import type { Model } from "./types.js"; export const MODELS = { "anthropic": { - "claude-3-5-haiku-20241022": { - id: "claude-3-5-haiku-20241022", - name: "Claude Haiku 3.5", + "claude-opus-4-0": { + id: "claude-opus-4-0", + name: "Claude Opus 4 (latest)", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 32000, + } satisfies Model<"anthropic-messages">, + "claude-3-5-sonnet-20241022": { + id: "claude-3-5-sonnet-20241022", + name: "Claude Sonnet 3.5 v2", api: "anthropic-messages", provider: "anthropic", baseUrl: "https://api.anthropic.com", reasoning: false, input: ["text", "image"], cost: { - input: 0.8, - output: 4, - cacheRead: 0.08, - cacheWrite: 1, + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"anthropic-messages">, + "claude-opus-4-1": { + id: "claude-opus-4-1", + name: "Claude Opus 4.1 (latest)", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 32000, + } satisfies Model<"anthropic-messages">, + "claude-haiku-4-5": { + id: "claude-haiku-4-5", + name: "Claude Haiku 4.5 (latest)", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1, + output: 5, + cacheRead: 0.1, + cacheWrite: 1.25, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, + "claude-3-5-sonnet-20240620": { + id: "claude-3-5-sonnet-20240620", + name: "Claude Sonnet 3.5", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, }, contextWindow: 200000, maxTokens: 8192, @@ -39,91 +107,23 @@ export const MODELS = { contextWindow: 200000, maxTokens: 8192, } satisfies Model<"anthropic-messages">, - "claude-3-5-sonnet-20240620": { - id: "claude-3-5-sonnet-20240620", - name: "Claude Sonnet 3.5", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"anthropic-messages">, - "claude-3-5-sonnet-20241022": { - id: "claude-3-5-sonnet-20241022", - name: "Claude Sonnet 3.5 v2", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"anthropic-messages">, - "claude-3-7-sonnet-20250219": { - id: "claude-3-7-sonnet-20250219", - name: "Claude Sonnet 3.7", + "claude-opus-4-5": { + id: "claude-opus-4-5", + name: "Claude Opus 4.5 (latest)", api: "anthropic-messages", provider: "anthropic", baseUrl: "https://api.anthropic.com", reasoning: true, input: ["text", "image"], cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, + input: 5, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, }, contextWindow: 200000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, - "claude-3-7-sonnet-latest": { - id: "claude-3-7-sonnet-latest", - name: "Claude Sonnet 3.7 (latest)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-messages">, - "claude-3-haiku-20240307": { - id: "claude-3-haiku-20240307", - name: "Claude Haiku 3", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.25, - output: 1.25, - cacheRead: 0.03, - cacheWrite: 0.3, - }, - contextWindow: 200000, - maxTokens: 4096, - } satisfies Model<"anthropic-messages">, "claude-3-opus-20240229": { id: "claude-3-opus-20240229", name: "Claude Opus 3", @@ -141,142 +141,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 4096, } satisfies Model<"anthropic-messages">, - "claude-3-sonnet-20240229": { - id: "claude-3-sonnet-20240229", - name: "Claude Sonnet 3", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 0.3, - }, - contextWindow: 200000, - maxTokens: 4096, - } satisfies Model<"anthropic-messages">, - "claude-haiku-4-5": { - id: "claude-haiku-4-5", - name: "Claude Haiku 4.5 (latest)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1, - output: 5, - cacheRead: 0.1, - cacheWrite: 1.25, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-messages">, - "claude-haiku-4-5-20251001": { - id: "claude-haiku-4-5-20251001", - name: "Claude Haiku 4.5", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1, - output: 5, - cacheRead: 0.1, - cacheWrite: 1.25, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-messages">, - "claude-opus-4-0": { - id: "claude-opus-4-0", - name: "Claude Opus 4 (latest)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-messages">, - "claude-opus-4-1": { - id: "claude-opus-4-1", - name: "Claude Opus 4.1 (latest)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-messages">, - "claude-opus-4-1-20250805": { - id: "claude-opus-4-1-20250805", - name: "Claude Opus 4.1", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-messages">, - "claude-opus-4-20250514": { - id: "claude-opus-4-20250514", - name: "Claude Opus 4", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"anthropic-messages">, - "claude-opus-4-5": { - id: "claude-opus-4-5", - name: "Claude Opus 4.5 (latest)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 5, - output: 25, - cacheRead: 0.5, - cacheWrite: 6.25, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-messages">, "claude-opus-4-5-20251101": { id: "claude-opus-4-5-20251101", name: "Claude Opus 4.5", @@ -294,40 +158,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, - "claude-sonnet-4-0": { - id: "claude-sonnet-4-0", - name: "Claude Sonnet 4 (latest)", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-messages">, - "claude-sonnet-4-20250514": { - id: "claude-sonnet-4-20250514", - name: "Claude Sonnet 4", - api: "anthropic-messages", - provider: "anthropic", - baseUrl: "https://api.anthropic.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"anthropic-messages">, "claude-sonnet-4-5": { id: "claude-sonnet-4-5", name: "Claude Sonnet 4.5 (latest)", @@ -362,415 +192,467 @@ export const MODELS = { contextWindow: 200000, maxTokens: 64000, } satisfies Model<"anthropic-messages">, - }, - "cerebras": { - "gpt-oss-120b": { - id: "gpt-oss-120b", - name: "GPT OSS 120B", - api: "openai-completions", - provider: "cerebras", - baseUrl: "https://api.cerebras.ai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.25, - output: 0.69, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen-3-235b-a22b-instruct-2507": { - id: "qwen-3-235b-a22b-instruct-2507", - name: "Qwen 3 235B Instruct", - api: "openai-completions", - provider: "cerebras", - baseUrl: "https://api.cerebras.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.6, - output: 1.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131000, - maxTokens: 32000, - } satisfies Model<"openai-completions">, - "zai-glm-4.6": { - id: "zai-glm-4.6", - name: "Z.AI GLM-4.6", - api: "openai-completions", - provider: "cerebras", - baseUrl: "https://api.cerebras.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 40960, - } satisfies Model<"openai-completions">, - }, - "github-copilot": { - "claude-haiku-4.5": { - id: "claude-haiku-4.5", - name: "Claude Haiku 4.5", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16000, - } satisfies Model<"openai-completions">, - "claude-opus-4.5": { - id: "claude-opus-4.5", - name: "Claude Opus 4.5", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16000, - } satisfies Model<"openai-completions">, - "claude-sonnet-4": { - id: "claude-sonnet-4", + "claude-sonnet-4-20250514": { + id: "claude-sonnet-4-20250514", name: "Claude Sonnet 4", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", reasoning: true, input: ["text", "image"], cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16000, - } satisfies Model<"openai-completions">, - "claude-sonnet-4.5": { - id: "claude-sonnet-4.5", - name: "Claude Sonnet 4.5", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16000, - } satisfies Model<"openai-completions">, - "gemini-2.5-pro": { - id: "gemini-2.5-pro", - name: "Gemini 2.5 Pro", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "gemini-3-flash-preview": { - id: "gemini-3-flash-preview", - name: "Gemini 3 Flash", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "gemini-3-pro-preview": { - id: "gemini-3-pro-preview", - name: "Gemini 3 Pro Preview", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "gpt-4.1": { - id: "gpt-4.1", - name: "GPT-4.1", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "gpt-4o": { - id: "gpt-4o", - name: "GPT-4o", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 64000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "gpt-5": { - id: "gpt-5", - name: "GPT-5", - api: "openai-responses", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5-codex": { - id: "gpt-5-codex", - name: "GPT-5-Codex", - api: "openai-responses", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5-mini": { - id: "gpt-5-mini", - name: "GPT-5-mini", - api: "openai-responses", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 64000, - } satisfies Model<"openai-responses">, - "gpt-5.1": { - id: "gpt-5.1", - name: "GPT-5.1", - api: "openai-responses", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5.1-codex": { - id: "gpt-5.1-codex", - name: "GPT-5.1-Codex", - api: "openai-responses", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5.1-codex-max": { - id: "gpt-5.1-codex-max", - name: "GPT-5.1-Codex-max", - api: "openai-responses", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5.1-codex-mini": { - id: "gpt-5.1-codex-mini", - name: "GPT-5.1-Codex-mini", - api: "openai-responses", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, - "gpt-5.2": { - id: "gpt-5.2", - name: "GPT-5.2", - api: "openai-responses", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 64000, - } satisfies Model<"openai-responses">, - "grok-code-fast-1": { - id: "grok-code-fast-1", - name: "Grok Code Fast 1", - api: "openai-completions", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "oswe-vscode-prime": { - id: "oswe-vscode-prime", - name: "Raptor Mini (Preview)", - api: "openai-responses", - provider: "github-copilot", - baseUrl: "https://api.individual.githubcopilot.com", - headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, }, contextWindow: 200000, maxTokens: 64000, - } satisfies Model<"openai-responses">, + } satisfies Model<"anthropic-messages">, + "claude-opus-4-20250514": { + id: "claude-opus-4-20250514", + name: "Claude Opus 4", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 32000, + } satisfies Model<"anthropic-messages">, + "claude-3-5-haiku-20241022": { + id: "claude-3-5-haiku-20241022", + name: "Claude Haiku 3.5", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.8, + output: 4, + cacheRead: 0.08, + cacheWrite: 1, + }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"anthropic-messages">, + "claude-3-haiku-20240307": { + id: "claude-3-haiku-20240307", + name: "Claude Haiku 3", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.25, + output: 1.25, + cacheRead: 0.03, + cacheWrite: 0.3, + }, + contextWindow: 200000, + maxTokens: 4096, + } satisfies Model<"anthropic-messages">, + "claude-3-7-sonnet-20250219": { + id: "claude-3-7-sonnet-20250219", + name: "Claude Sonnet 3.7", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, + "claude-3-7-sonnet-latest": { + id: "claude-3-7-sonnet-latest", + name: "Claude Sonnet 3.7 (latest)", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, + "claude-sonnet-4-0": { + id: "claude-sonnet-4-0", + name: "Claude Sonnet 4 (latest)", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, + "claude-opus-4-1-20250805": { + id: "claude-opus-4-1-20250805", + name: "Claude Opus 4.1", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 32000, + } satisfies Model<"anthropic-messages">, + "claude-3-sonnet-20240229": { + id: "claude-3-sonnet-20240229", + name: "Claude Sonnet 3", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 0.3, + }, + contextWindow: 200000, + maxTokens: 4096, + } satisfies Model<"anthropic-messages">, + "claude-haiku-4-5-20251001": { + id: "claude-haiku-4-5-20251001", + name: "Claude Haiku 4.5", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1, + output: 5, + cacheRead: 0.1, + cacheWrite: 1.25, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"anthropic-messages">, }, "google": { + "gemini-3-flash-preview": { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash Preview", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5, + output: 3, + cacheRead: 0.05, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-2.5-flash-preview-05-20": { + id: "gemini-2.5-flash-preview-05-20", + name: "Gemini 2.5 Flash Preview 05-20", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0.0375, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-flash-lite-latest": { + id: "gemini-flash-lite-latest", + name: "Gemini Flash-Lite Latest", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.1, + output: 0.4, + cacheRead: 0.025, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-3-pro-preview": { + id: "gemini-3-pro-preview", + name: "Gemini 3 Pro Preview", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 12, + cacheRead: 0.2, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 64000, + } satisfies Model<"google-generative-ai">, + "gemini-2.5-flash": { + id: "gemini-2.5-flash", + name: "Gemini 2.5 Flash", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.3, + output: 2.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-flash-latest": { + id: "gemini-flash-latest", + name: "Gemini Flash Latest", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.3, + output: 2.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-2.5-pro-preview-05-06": { + id: "gemini-2.5-pro-preview-05-06", + name: "Gemini 2.5 Pro Preview 05-06", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.31, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-2.0-flash-lite": { + id: "gemini-2.0-flash-lite", + name: "Gemini 2.0 Flash Lite", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.075, + output: 0.3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"google-generative-ai">, + "gemini-live-2.5-flash-preview-native-audio": { + id: "gemini-live-2.5-flash-preview-native-audio", + name: "Gemini Live 2.5 Flash Preview Native Audio", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text"], + cost: { + input: 0.5, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-2.0-flash": { + id: "gemini-2.0-flash", + name: "Gemini 2.0 Flash", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.1, + output: 0.4, + cacheRead: 0.025, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"google-generative-ai">, + "gemini-2.5-flash-lite": { + id: "gemini-2.5-flash-lite", + name: "Gemini 2.5 Flash Lite", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.1, + output: 0.4, + cacheRead: 0.025, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-2.5-pro-preview-06-05": { + id: "gemini-2.5-pro-preview-06-05", + name: "Gemini 2.5 Pro Preview 06-05", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.31, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-live-2.5-flash": { + id: "gemini-live-2.5-flash", + name: "Gemini Live 2.5 Flash", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 8000, + } satisfies Model<"google-generative-ai">, + "gemini-2.5-flash-lite-preview-06-17": { + id: "gemini-2.5-flash-lite-preview-06-17", + name: "Gemini 2.5 Flash Lite Preview 06-17", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.1, + output: 0.4, + cacheRead: 0.025, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-2.5-flash-preview-09-2025": { + id: "gemini-2.5-flash-preview-09-2025", + name: "Gemini 2.5 Flash Preview 09-25", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.3, + output: 2.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-2.5-flash-preview-04-17": { + id: "gemini-2.5-flash-preview-04-17", + name: "Gemini 2.5 Flash Preview 04-17", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0.0375, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, + "gemini-2.5-pro": { + id: "gemini-2.5-pro", + name: "Gemini 2.5 Pro", + api: "google-generative-ai", + provider: "google", + baseUrl: "https://generativelanguage.googleapis.com/v1beta", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.31, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"google-generative-ai">, "gemini-1.5-flash": { id: "gemini-1.5-flash", name: "Gemini 1.5 Flash", @@ -805,108 +687,6 @@ export const MODELS = { contextWindow: 1000000, maxTokens: 8192, } satisfies Model<"google-generative-ai">, - "gemini-1.5-pro": { - id: "gemini-1.5-pro", - name: "Gemini 1.5 Pro", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: false, - input: ["text", "image"], - cost: { - input: 1.25, - output: 5, - cacheRead: 0.3125, - cacheWrite: 0, - }, - contextWindow: 1000000, - maxTokens: 8192, - } satisfies Model<"google-generative-ai">, - "gemini-2.0-flash": { - id: "gemini-2.0-flash", - name: "Gemini 2.0 Flash", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.1, - output: 0.4, - cacheRead: 0.025, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 8192, - } satisfies Model<"google-generative-ai">, - "gemini-2.0-flash-lite": { - id: "gemini-2.0-flash-lite", - name: "Gemini 2.0 Flash Lite", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.075, - output: 0.3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 8192, - } satisfies Model<"google-generative-ai">, - "gemini-2.5-flash": { - id: "gemini-2.5-flash", - name: "Gemini 2.5 Flash", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.3, - output: 2.5, - cacheRead: 0.075, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-2.5-flash-lite": { - id: "gemini-2.5-flash-lite", - name: "Gemini 2.5 Flash Lite", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.1, - output: 0.4, - cacheRead: 0.025, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-2.5-flash-lite-preview-06-17": { - id: "gemini-2.5-flash-lite-preview-06-17", - name: "Gemini 2.5 Flash Lite Preview 06-17", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.1, - output: 0.4, - cacheRead: 0.025, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, "gemini-2.5-flash-lite-preview-09-2025": { id: "gemini-2.5-flash-lite-preview-09-2025", name: "Gemini 2.5 Flash Lite Preview 09-25", @@ -924,1120 +704,41 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65536, } satisfies Model<"google-generative-ai">, - "gemini-2.5-flash-preview-04-17": { - id: "gemini-2.5-flash-preview-04-17", - name: "Gemini 2.5 Flash Preview 04-17", + "gemini-1.5-pro": { + id: "gemini-1.5-pro", + name: "Gemini 1.5 Pro", api: "google-generative-ai", provider: "google", baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0.0375, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-2.5-flash-preview-05-20": { - id: "gemini-2.5-flash-preview-05-20", - name: "Gemini 2.5 Flash Preview 05-20", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0.0375, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-2.5-flash-preview-09-2025": { - id: "gemini-2.5-flash-preview-09-2025", - name: "Gemini 2.5 Flash Preview 09-25", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.3, - output: 2.5, - cacheRead: 0.075, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-2.5-pro": { - id: "gemini-2.5-pro", - name: "Gemini 2.5 Pro", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, + reasoning: false, input: ["text", "image"], cost: { input: 1.25, - output: 10, - cacheRead: 0.31, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-2.5-pro-preview-05-06": { - id: "gemini-2.5-pro-preview-05-06", - name: "Gemini 2.5 Pro Preview 05-06", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.25, - output: 10, - cacheRead: 0.31, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-2.5-pro-preview-06-05": { - id: "gemini-2.5-pro-preview-06-05", - name: "Gemini 2.5 Pro Preview 06-05", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.25, - output: 10, - cacheRead: 0.31, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-3-flash-preview": { - id: "gemini-3-flash-preview", - name: "Gemini 3 Flash Preview", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.5, - output: 3, - cacheRead: 0.05, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-3-pro-preview": { - id: "gemini-3-pro-preview", - name: "Gemini 3 Pro Preview", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 2, - output: 12, - cacheRead: 0.2, + output: 5, + cacheRead: 0.3125, cacheWrite: 0, }, contextWindow: 1000000, - maxTokens: 64000, + maxTokens: 8192, } satisfies Model<"google-generative-ai">, - "gemini-flash-latest": { - id: "gemini-flash-latest", - name: "Gemini Flash Latest", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.3, - output: 2.5, - cacheRead: 0.075, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-flash-lite-latest": { - id: "gemini-flash-lite-latest", - name: "Gemini Flash-Lite Latest", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, + }, + "openai": { + "gpt-4.1-nano": { + id: "gpt-4.1-nano", + name: "GPT-4.1 nano", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: false, input: ["text", "image"], cost: { input: 0.1, output: 0.4, - cacheRead: 0.025, + cacheRead: 0.03, cacheWrite: 0, }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - "gemini-live-2.5-flash": { - id: "gemini-live-2.5-flash", - name: "Gemini Live 2.5 Flash", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.5, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 8000, - } satisfies Model<"google-generative-ai">, - "gemini-live-2.5-flash-preview-native-audio": { - id: "gemini-live-2.5-flash-preview-native-audio", - name: "Gemini Live 2.5 Flash Preview Native Audio", - api: "google-generative-ai", - provider: "google", - baseUrl: "https://generativelanguage.googleapis.com/v1beta", - reasoning: true, - input: ["text"], - cost: { - input: 0.5, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 65536, - } satisfies Model<"google-generative-ai">, - }, - "google-antigravity": { - "claude-opus-4-5-thinking": { - id: "claude-opus-4-5-thinking", - name: "Claude Opus 4.5 Thinking (Antigravity)", - api: "google-gemini-cli", - provider: "google-antigravity", - baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"google-gemini-cli">, - "claude-sonnet-4-5": { - id: "claude-sonnet-4-5", - name: "Claude Sonnet 4.5 (Antigravity)", - api: "google-gemini-cli", - provider: "google-antigravity", - baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"google-gemini-cli">, - "claude-sonnet-4-5-thinking": { - id: "claude-sonnet-4-5-thinking", - name: "Claude Sonnet 4.5 Thinking (Antigravity)", - api: "google-gemini-cli", - provider: "google-antigravity", - baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"google-gemini-cli">, - "gemini-3-flash": { - id: "gemini-3-flash", - name: "Gemini 3 Flash (Antigravity)", - api: "google-gemini-cli", - provider: "google-antigravity", - baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"google-gemini-cli">, - "gemini-3-pro-high": { - id: "gemini-3-pro-high", - name: "Gemini 3 Pro High (Antigravity)", - api: "google-gemini-cli", - provider: "google-antigravity", - baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"google-gemini-cli">, - "gemini-3-pro-low": { - id: "gemini-3-pro-low", - name: "Gemini 3 Pro Low (Antigravity)", - api: "google-gemini-cli", - provider: "google-antigravity", - baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"google-gemini-cli">, - "gpt-oss-120b-medium": { - id: "gpt-oss-120b-medium", - name: "GPT-OSS 120B Medium (Antigravity)", - api: "google-gemini-cli", - provider: "google-antigravity", - baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, + contextWindow: 1047576, maxTokens: 32768, - } satisfies Model<"google-gemini-cli">, - }, - "google-gemini-cli": { - "gemini-2.0-flash": { - id: "gemini-2.0-flash", - name: "Gemini 2.0 Flash (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: "https://cloudcode-pa.googleapis.com", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 8192, - } satisfies Model<"google-gemini-cli">, - "gemini-2.5-flash": { - id: "gemini-2.5-flash", - name: "Gemini 2.5 Flash (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: "https://cloudcode-pa.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"google-gemini-cli">, - "gemini-2.5-pro": { - id: "gemini-2.5-pro", - name: "Gemini 2.5 Pro (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: "https://cloudcode-pa.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"google-gemini-cli">, - "gemini-3-flash-preview": { - id: "gemini-3-flash-preview", - name: "Gemini 3 Flash Preview (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: "https://cloudcode-pa.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"google-gemini-cli">, - "gemini-3-pro-preview": { - id: "gemini-3-pro-preview", - name: "Gemini 3 Pro Preview (Cloud Code Assist)", - api: "google-gemini-cli", - provider: "google-gemini-cli", - baseUrl: "https://cloudcode-pa.googleapis.com", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"google-gemini-cli">, - }, - "groq": { - "deepseek-r1-distill-llama-70b": { - id: "deepseek-r1-distill-llama-70b", - name: "DeepSeek R1 Distill Llama 70B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.75, - output: 0.99, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "gemma2-9b-it": { - id: "gemma2-9b-it", - name: "Gemma 2 9B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.2, - output: 0.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "llama-3.1-8b-instant": { - id: "llama-3.1-8b-instant", - name: "Llama 3.1 8B Instant", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.05, - output: 0.08, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "llama-3.3-70b-versatile": { - id: "llama-3.3-70b-versatile", - name: "Llama 3.3 70B Versatile", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.59, - output: 0.79, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "llama3-70b-8192": { - id: "llama3-70b-8192", - name: "Llama 3 70B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.59, - output: 0.79, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "llama3-8b-8192": { - id: "llama3-8b-8192", - name: "Llama 3 8B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.05, - output: 0.08, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "meta-llama/llama-4-maverick-17b-128e-instruct": { - id: "meta-llama/llama-4-maverick-17b-128e-instruct", - name: "Llama 4 Maverick 17B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.2, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "meta-llama/llama-4-scout-17b-16e-instruct": { - id: "meta-llama/llama-4-scout-17b-16e-instruct", - name: "Llama 4 Scout 17B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.11, - output: 0.34, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "mistral-saba-24b": { - id: "mistral-saba-24b", - name: "Mistral Saba 24B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.79, - output: 0.79, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2-instruct": { - id: "moonshotai/kimi-k2-instruct", - name: "Kimi K2 Instruct", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2-instruct-0905": { - id: "moonshotai/kimi-k2-instruct-0905", - name: "Kimi K2 Instruct 0905", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "openai/gpt-oss-120b": { - id: "openai/gpt-oss-120b", - name: "GPT OSS 120B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.15, - output: 0.75, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "openai/gpt-oss-20b": { - id: "openai/gpt-oss-20b", - name: "GPT OSS 20B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.1, - output: 0.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen-qwq-32b": { - id: "qwen-qwq-32b", - name: "Qwen QwQ 32B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.29, - output: 0.39, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "qwen/qwen3-32b": { - id: "qwen/qwen3-32b", - name: "Qwen3 32B", - api: "openai-completions", - provider: "groq", - baseUrl: "https://api.groq.com/openai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.29, - output: 0.59, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - }, - "mistral": { - "codestral-latest": { - id: "codestral-latest", - name: "Codestral", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.3, - output: 0.9, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "devstral-2512": { - id: "devstral-2512", - name: "Devstral 2", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "devstral-medium-2507": { - id: "devstral-medium-2507", - name: "Devstral Medium", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.4, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "devstral-medium-latest": { - id: "devstral-medium-latest", - name: "Devstral 2", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.4, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "devstral-small-2505": { - id: "devstral-small-2505", - name: "Devstral Small 2505", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.1, - output: 0.3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "devstral-small-2507": { - id: "devstral-small-2507", - name: "Devstral Small", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.1, - output: 0.3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "labs-devstral-small-2512": { - id: "labs-devstral-small-2512", - name: "Devstral Small 2", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 256000, - } satisfies Model<"openai-completions">, - "magistral-medium-latest": { - id: "magistral-medium-latest", - name: "Magistral Medium", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 2, - output: 5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "magistral-small": { - id: "magistral-small", - name: "Magistral Small", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.5, - output: 1.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "ministral-3b-latest": { - id: "ministral-3b-latest", - name: "Ministral 3B", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.04, - output: 0.04, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "ministral-8b-latest": { - id: "ministral-8b-latest", - name: "Ministral 8B", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.1, - output: 0.1, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "mistral-large-2411": { - id: "mistral-large-2411", - name: "Mistral Large 2.1", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2, - output: 6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "mistral-large-2512": { - id: "mistral-large-2512", - name: "Mistral Large 3", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.5, - output: 1.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "mistral-large-latest": { - id: "mistral-large-latest", - name: "Mistral Large", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.5, - output: 1.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "mistral-medium-2505": { - id: "mistral-medium-2505", - name: "Mistral Medium 3", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.4, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "mistral-medium-2508": { - id: "mistral-medium-2508", - name: "Mistral Medium 3.1", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.4, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "mistral-medium-latest": { - id: "mistral-medium-latest", - name: "Mistral Medium", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.4, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "mistral-nemo": { - id: "mistral-nemo", - name: "Mistral Nemo", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.15, - output: 0.15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "mistral-small-2506": { - id: "mistral-small-2506", - name: "Mistral Small 3.2", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.1, - output: 0.3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "mistral-small-latest": { - id: "mistral-small-latest", - name: "Mistral Small", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.1, - output: 0.3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "open-mistral-7b": { - id: "open-mistral-7b", - name: "Mistral 7B", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.25, - output: 0.25, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8000, - maxTokens: 8000, - } satisfies Model<"openai-completions">, - "open-mixtral-8x22b": { - id: "open-mixtral-8x22b", - name: "Mixtral 8x22B", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2, - output: 6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 64000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "open-mixtral-8x7b": { - id: "open-mixtral-8x7b", - name: "Mixtral 8x7B", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.7, - output: 0.7, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32000, - maxTokens: 32000, - } satisfies Model<"openai-completions">, - "pixtral-12b": { - id: "pixtral-12b", - name: "Pixtral 12B", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.15, - output: 0.15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "pixtral-large-latest": { - id: "pixtral-large-latest", - name: "Pixtral Large", - api: "openai-completions", - provider: "mistral", - baseUrl: "https://api.mistral.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2, - output: 6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - }, - "openai": { - "codex-mini-latest": { - id: "codex-mini-latest", - name: "Codex Mini", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 1.5, - output: 6, - cacheRead: 0.375, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, } satisfies Model<"openai-responses">, "gpt-4": { id: "gpt-4", @@ -2056,90 +757,22 @@ export const MODELS = { contextWindow: 8192, maxTokens: 8192, } satisfies Model<"openai-responses">, - "gpt-4-turbo": { - id: "gpt-4-turbo", - name: "GPT-4 Turbo", + "o1-pro": { + id: "o1-pro", + name: "o1-pro", api: "openai-responses", provider: "openai", baseUrl: "https://api.openai.com/v1", - reasoning: false, + reasoning: true, input: ["text", "image"], cost: { - input: 10, - output: 30, + input: 150, + output: 600, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-responses">, - "gpt-4.1": { - id: "gpt-4.1", - name: "GPT-4.1", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2, - output: 8, - cacheRead: 0.5, - cacheWrite: 0, - }, - contextWindow: 1047576, - maxTokens: 32768, - } satisfies Model<"openai-responses">, - "gpt-4.1-mini": { - id: "gpt-4.1-mini", - name: "GPT-4.1 mini", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.4, - output: 1.6, - cacheRead: 0.1, - cacheWrite: 0, - }, - contextWindow: 1047576, - maxTokens: 32768, - } satisfies Model<"openai-responses">, - "gpt-4.1-nano": { - id: "gpt-4.1-nano", - name: "GPT-4.1 nano", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.1, - output: 0.4, - cacheRead: 0.03, - cacheWrite: 0, - }, - contextWindow: 1047576, - maxTokens: 32768, - } satisfies Model<"openai-responses">, - "gpt-4o": { - id: "gpt-4o", - name: "GPT-4o", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2.5, - output: 10, - cacheRead: 1.25, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, + contextWindow: 200000, + maxTokens: 100000, } satisfies Model<"openai-responses">, "gpt-4o-2024-05-13": { id: "gpt-4o-2024-05-13", @@ -2158,6 +791,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-responses">, + "gpt-5.1-codex": { + id: "gpt-5.1-codex", + name: "GPT-5.1 Codex", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, "gpt-4o-2024-08-06": { id: "gpt-4o-2024-08-06", name: "GPT-4o (2024-08-06)", @@ -2175,6 +825,295 @@ export const MODELS = { contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-responses">, + "gpt-4.1-mini": { + id: "gpt-4.1-mini", + name: "GPT-4.1 mini", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.4, + output: 1.6, + cacheRead: 0.1, + cacheWrite: 0, + }, + contextWindow: 1047576, + maxTokens: 32768, + } satisfies Model<"openai-responses">, + "o3-deep-research": { + id: "o3-deep-research", + name: "o3-deep-research", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 10, + output: 40, + cacheRead: 2.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-responses">, + "gpt-5.2-pro": { + id: "gpt-5.2-pro", + name: "GPT-5.2 Pro", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 21, + output: 168, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "gpt-4-turbo": { + id: "gpt-4-turbo", + name: "GPT-4 Turbo", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 10, + output: 30, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-responses">, + "gpt-5.1-codex-mini": { + id: "gpt-5.1-codex-mini", + name: "GPT-5.1 Codex mini", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.25, + output: 2, + cacheRead: 0.025, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "o3-mini": { + id: "o3-mini", + name: "o3-mini", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1.1, + output: 4.4, + cacheRead: 0.55, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-responses">, + "gpt-5.2-chat-latest": { + id: "gpt-5.2-chat-latest", + name: "GPT-5.2 Chat", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-responses">, + "gpt-5.1": { + id: "gpt-5.1", + name: "GPT-5.1", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.13, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "codex-mini-latest": { + id: "codex-mini-latest", + name: "Codex Mini", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text"], + cost: { + input: 1.5, + output: 6, + cacheRead: 0.375, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-responses">, + "gpt-5-nano": { + id: "gpt-5-nano", + name: "GPT-5 Nano", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.05, + output: 0.4, + cacheRead: 0.01, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "gpt-5-codex": { + id: "gpt-5-codex", + name: "GPT-5-Codex", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "gpt-4o": { + id: "gpt-4o", + name: "GPT-4o", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2.5, + output: 10, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-responses">, + "gpt-4.1": { + id: "gpt-4.1", + name: "GPT-4.1", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 8, + cacheRead: 0.5, + cacheWrite: 0, + }, + contextWindow: 1047576, + maxTokens: 32768, + } satisfies Model<"openai-responses">, + "o4-mini": { + id: "o4-mini", + name: "o4-mini", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.1, + output: 4.4, + cacheRead: 0.28, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-responses">, + "o1": { + id: "o1", + name: "o1", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 60, + cacheRead: 7.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-responses">, + "gpt-5-mini": { + id: "gpt-5-mini", + name: "GPT-5 Mini", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.25, + output: 2, + cacheRead: 0.03, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "o3-pro": { + id: "o3-pro", + name: "o3-pro", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 20, + output: 80, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-responses">, "gpt-4o-2024-11-20": { id: "gpt-4o-2024-11-20", name: "GPT-4o (2024-11-20)", @@ -2192,6 +1131,57 @@ export const MODELS = { contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-responses">, + "gpt-5.1-codex-max": { + id: "gpt-5.1-codex-max", + name: "GPT-5.1 Codex Max", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "o3": { + id: "o3", + name: "o3", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 8, + cacheRead: 0.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-responses">, + "o4-mini-deep-research": { + id: "o4-mini-deep-research", + name: "o4-mini-deep-research", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 8, + cacheRead: 0.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-responses">, "gpt-4o-mini": { id: "gpt-4o-mini", name: "GPT-4o mini", @@ -2226,74 +1216,6 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, - "gpt-5-chat-latest": { - id: "gpt-5-chat-latest", - name: "GPT-5 Chat Latest", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 1.25, - output: 10, - cacheRead: 0.125, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-responses">, - "gpt-5-codex": { - id: "gpt-5-codex", - name: "GPT-5-Codex", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.25, - output: 10, - cacheRead: 0.125, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5-mini": { - id: "gpt-5-mini", - name: "GPT-5 Mini", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.25, - output: 2, - cacheRead: 0.03, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5-nano": { - id: "gpt-5-nano", - name: "GPT-5 Nano", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.05, - output: 0.4, - cacheRead: 0.01, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, "gpt-5-pro": { id: "gpt-5-pro", name: "GPT-5 Pro", @@ -2311,18 +1233,18 @@ export const MODELS = { contextWindow: 400000, maxTokens: 272000, } satisfies Model<"openai-responses">, - "gpt-5.1": { - id: "gpt-5.1", - name: "GPT-5.1", + "gpt-5.2": { + id: "gpt-5.2", + name: "GPT-5.2", api: "openai-responses", provider: "openai", baseUrl: "https://api.openai.com/v1", reasoning: true, input: ["text", "image"], cost: { - input: 1.25, - output: 10, - cacheRead: 0.13, + input: 1.75, + output: 14, + cacheRead: 0.175, cacheWrite: 0, }, contextWindow: 400000, @@ -2345,13 +1267,13 @@ export const MODELS = { contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-responses">, - "gpt-5.1-codex": { - id: "gpt-5.1-codex", - name: "GPT-5.1 Codex", + "gpt-5-chat-latest": { + id: "gpt-5-chat-latest", + name: "GPT-5 Chat Latest", api: "openai-responses", provider: "openai", baseUrl: "https://api.openai.com/v1", - reasoning: true, + reasoning: false, input: ["text", "image"], cost: { input: 1.25, @@ -2359,707 +1281,1615 @@ export const MODELS = { cacheRead: 0.125, cacheWrite: 0, }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5.1-codex-max": { - id: "gpt-5.1-codex-max", - name: "GPT-5.1 Codex Max", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.25, - output: 10, - cacheRead: 0.125, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5.1-codex-mini": { - id: "gpt-5.1-codex-mini", - name: "GPT-5.1 Codex mini", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.25, - output: 2, - cacheRead: 0.025, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5.2": { - id: "gpt-5.2", - name: "GPT-5.2", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.75, - output: 14, - cacheRead: 0.175, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "gpt-5.2-chat-latest": { - id: "gpt-5.2-chat-latest", - name: "GPT-5.2 Chat", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.75, - output: 14, - cacheRead: 0.175, - cacheWrite: 0, - }, contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-responses">, - "gpt-5.2-pro": { - id: "gpt-5.2-pro", - name: "GPT-5.2 Pro", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 21, - output: 168, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-responses">, - "o1": { - id: "o1", - name: "o1", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 60, - cacheRead: 7.5, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, - "o1-pro": { - id: "o1-pro", - name: "o1-pro", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 150, - output: 600, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, - "o3": { - id: "o3", - name: "o3", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 2, - output: 8, - cacheRead: 0.5, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, - "o3-deep-research": { - id: "o3-deep-research", - name: "o3-deep-research", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 10, - output: 40, - cacheRead: 2.5, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, - "o3-mini": { - id: "o3-mini", - name: "o3-mini", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text"], - cost: { - input: 1.1, - output: 4.4, - cacheRead: 0.55, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, - "o3-pro": { - id: "o3-pro", - name: "o3-pro", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 20, - output: 80, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, - "o4-mini": { - id: "o4-mini", - name: "o4-mini", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.1, - output: 4.4, - cacheRead: 0.28, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, - "o4-mini-deep-research": { - id: "o4-mini-deep-research", - name: "o4-mini-deep-research", - api: "openai-responses", - provider: "openai", - baseUrl: "https://api.openai.com/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 2, - output: 8, - cacheRead: 0.5, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-responses">, }, - "openrouter": { - "ai21/jamba-large-1.7": { - id: "ai21/jamba-large-1.7", - name: "AI21: Jamba Large 1.7", + "groq": { + "llama-3.1-8b-instant": { + id: "llama-3.1-8b-instant", + name: "Llama 3.1 8B Instant", api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", reasoning: false, input: ["text"], cost: { - input: 2, - output: 8, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "ai21/jamba-mini-1.7": { - id: "ai21/jamba-mini-1.7", - name: "AI21: Jamba Mini 1.7", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.19999999999999998, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "alibaba/tongyi-deepresearch-30b-a3b": { - id: "alibaba/tongyi-deepresearch-30b-a3b", - name: "Tongyi DeepResearch 30B A3B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.09, - output: 0.39999999999999997, + input: 0.05, + output: 0.08, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "alibaba/tongyi-deepresearch-30b-a3b:free": { - id: "alibaba/tongyi-deepresearch-30b-a3b:free", - name: "Tongyi DeepResearch 30B A3B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "allenai/olmo-3-7b-instruct": { - id: "allenai/olmo-3-7b-instruct", - name: "AllenAI: Olmo 3 7B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.19999999999999998, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 65536, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "amazon/nova-2-lite-v1": { - id: "amazon/nova-2-lite-v1", - name: "Amazon: Nova 2 Lite", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.3, - output: 2.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1000000, - maxTokens: 65535, - } satisfies Model<"openai-completions">, - "amazon/nova-lite-v1": { - id: "amazon/nova-lite-v1", - name: "Amazon: Nova Lite 1.0", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.06, - output: 0.24, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 300000, - maxTokens: 5120, - } satisfies Model<"openai-completions">, - "amazon/nova-micro-v1": { - id: "amazon/nova-micro-v1", - name: "Amazon: Nova Micro 1.0", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.035, - output: 0.14, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 5120, - } satisfies Model<"openai-completions">, - "amazon/nova-premier-v1": { - id: "amazon/nova-premier-v1", - name: "Amazon: Nova Premier 1.0", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2.5, - output: 12.5, - cacheRead: 0.625, - cacheWrite: 0, - }, - contextWindow: 1000000, - maxTokens: 32000, - } satisfies Model<"openai-completions">, - "amazon/nova-pro-v1": { - id: "amazon/nova-pro-v1", - name: "Amazon: Nova Pro 1.0", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.7999999999999999, - output: 3.1999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 300000, - maxTokens: 5120, - } satisfies Model<"openai-completions">, - "anthropic/claude-3-haiku": { - id: "anthropic/claude-3-haiku", - name: "Anthropic: Claude 3 Haiku", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.25, - output: 1.25, - cacheRead: 0.03, - cacheWrite: 0.3, - }, - contextWindow: 200000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "anthropic/claude-3-opus": { - id: "anthropic/claude-3-opus", - name: "Anthropic: Claude 3 Opus", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "anthropic/claude-3.5-haiku": { - id: "anthropic/claude-3.5-haiku", - name: "Anthropic: Claude 3.5 Haiku", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.7999999999999999, - output: 4, - cacheRead: 0.08, - cacheWrite: 1, - }, - contextWindow: 200000, maxTokens: 8192, } satisfies Model<"openai-completions">, - "anthropic/claude-3.5-haiku-20241022": { - id: "anthropic/claude-3.5-haiku-20241022", - name: "Anthropic: Claude 3.5 Haiku (2024-10-22)", + "mistral-saba-24b": { + id: "mistral-saba-24b", + name: "Mistral Saba 24B", api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", reasoning: false, - input: ["text", "image"], + input: ["text"], cost: { - input: 0.7999999999999999, - output: 4, - cacheRead: 0.08, - cacheWrite: 1, - }, - contextWindow: 200000, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "anthropic/claude-3.5-sonnet": { - id: "anthropic/claude-3.5-sonnet", - name: "Anthropic: Claude 3.5 Sonnet", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 6, - output: 30, + input: 0.79, + output: 0.79, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 200000, + contextWindow: 32768, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "llama3-8b-8192": { + id: "llama3-8b-8192", + name: "Llama 3 8B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.05, + output: 0.08, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, maxTokens: 8192, } satisfies Model<"openai-completions">, - "anthropic/claude-3.7-sonnet": { - id: "anthropic/claude-3.7-sonnet", - name: "Anthropic: Claude 3.7 Sonnet", + "qwen-qwq-32b": { + id: "qwen-qwq-32b", + name: "Qwen QwQ 32B", api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "anthropic/claude-3.7-sonnet:thinking": { - id: "anthropic/claude-3.7-sonnet:thinking", - name: "Anthropic: Claude 3.7 Sonnet (thinking)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "anthropic/claude-haiku-4.5": { - id: "anthropic/claude-haiku-4.5", - name: "Anthropic: Claude Haiku 4.5", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1, - output: 5, - cacheRead: 0.09999999999999999, - cacheWrite: 1.25, - }, - contextWindow: 200000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "anthropic/claude-opus-4": { - id: "anthropic/claude-opus-4", - name: "Anthropic: Claude Opus 4", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"openai-completions">, - "anthropic/claude-opus-4.1": { - id: "anthropic/claude-opus-4.1", - name: "Anthropic: Claude Opus 4.1", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 75, - cacheRead: 1.5, - cacheWrite: 18.75, - }, - contextWindow: 200000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "anthropic/claude-opus-4.5": { - id: "anthropic/claude-opus-4.5", - name: "Anthropic: Claude Opus 4.5", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 5, - output: 25, - cacheRead: 0.5, - cacheWrite: 6.25, - }, - contextWindow: 200000, - maxTokens: 32000, - } satisfies Model<"openai-completions">, - "anthropic/claude-sonnet-4": { - id: "anthropic/claude-sonnet-4", - name: "Anthropic: Claude Sonnet 4", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 1000000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "anthropic/claude-sonnet-4.5": { - id: "anthropic/claude-sonnet-4.5", - name: "Anthropic: Claude Sonnet 4.5", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.3, - cacheWrite: 3.75, - }, - contextWindow: 1000000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "arcee-ai/trinity-mini": { - id: "arcee-ai/trinity-mini", - name: "Arcee AI: Trinity Mini", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", reasoning: true, input: ["text"], cost: { - input: 0.045, - output: 0.15, + input: 0.29, + output: 0.39, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 131072, + maxTokens: 16384, } satisfies Model<"openai-completions">, - "arcee-ai/trinity-mini:free": { - id: "arcee-ai/trinity-mini:free", - name: "Arcee AI: Trinity Mini (free)", + "llama3-70b-8192": { + id: "llama3-70b-8192", + name: "Llama 3 70B", api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: false, input: ["text"], cost: { - input: 0, - output: 0, + input: 0.59, + output: 0.79, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 4096, + contextWindow: 8192, + maxTokens: 8192, } satisfies Model<"openai-completions">, - "arcee-ai/virtuoso-large": { - id: "arcee-ai/virtuoso-large", - name: "Arcee AI: Virtuoso Large", + "deepseek-r1-distill-llama-70b": { + id: "deepseek-r1-distill-llama-70b", + name: "DeepSeek R1 Distill Llama 70B", api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, input: ["text"], cost: { input: 0.75, + output: 0.99, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "gemma2-9b-it": { + id: "gemma2-9b-it", + name: "Gemma 2 9B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.2, + output: 0.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "llama-3.3-70b-versatile": { + id: "llama-3.3-70b-versatile", + name: "Llama 3.3 70B Versatile", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.59, + output: 0.79, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "moonshotai/kimi-k2-instruct-0905": { + id: "moonshotai/kimi-k2-instruct-0905", + name: "Kimi K2 Instruct 0905", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "moonshotai/kimi-k2-instruct": { + id: "moonshotai/kimi-k2-instruct", + name: "Kimi K2 Instruct", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "openai/gpt-oss-20b": { + id: "openai/gpt-oss-20b", + name: "GPT OSS 20B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.1, + output: 0.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "openai/gpt-oss-120b": { + id: "openai/gpt-oss-120b", + name: "GPT OSS 120B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.15, + output: 0.75, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "qwen/qwen3-32b": { + id: "qwen/qwen3-32b", + name: "Qwen3 32B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.29, + output: 0.59, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "meta-llama/llama-4-scout-17b-16e-instruct": { + id: "meta-llama/llama-4-scout-17b-16e-instruct", + name: "Llama 4 Scout 17B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.11, + output: 0.34, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "meta-llama/llama-4-maverick-17b-128e-instruct": { + id: "meta-llama/llama-4-maverick-17b-128e-instruct", + name: "Llama 4 Maverick 17B", + api: "openai-completions", + provider: "groq", + baseUrl: "https://api.groq.com/openai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.2, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + }, + "cerebras": { + "qwen-3-235b-a22b-instruct-2507": { + id: "qwen-3-235b-a22b-instruct-2507", + name: "Qwen 3 235B Instruct", + api: "openai-completions", + provider: "cerebras", + baseUrl: "https://api.cerebras.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.6, output: 1.2, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 64000, + contextWindow: 131000, + maxTokens: 32000, } satisfies Model<"openai-completions">, - "baidu/ernie-4.5-21b-a3b": { - id: "baidu/ernie-4.5-21b-a3b", - name: "Baidu: ERNIE 4.5 21B A3B", + "zai-glm-4.6": { + id: "zai-glm-4.6", + name: "Z.AI GLM-4.6", api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", + provider: "cerebras", + baseUrl: "https://api.cerebras.ai/v1", reasoning: false, input: ["text"], cost: { - input: 0.056, - output: 0.224, + input: 0, + output: 0, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 120000, + contextWindow: 131072, + maxTokens: 40960, + } satisfies Model<"openai-completions">, + "gpt-oss-120b": { + id: "gpt-oss-120b", + name: "GPT OSS 120B", + api: "openai-completions", + provider: "cerebras", + baseUrl: "https://api.cerebras.ai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.25, + output: 0.69, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + }, + "xai": { + "grok-4-fast-non-reasoning": { + id: "grok-4-fast-non-reasoning", + name: "Grok 4 Fast (Non-Reasoning)", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.2, + output: 0.5, + cacheRead: 0.05, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 30000, + } satisfies Model<"openai-completions">, + "grok-3-fast": { + id: "grok-3-fast", + name: "Grok 3 Fast", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 5, + output: 25, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-4": { + id: "grok-4", + name: "Grok 4", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 3, + output: 15, + cacheRead: 0.75, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "grok-2-vision": { + id: "grok-2-vision", + name: "Grok 2 Vision", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 10, + cacheRead: 2, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "grok-code-fast-1": { + id: "grok-code-fast-1", + name: "Grok Code Fast 1", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.2, + output: 1.5, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 10000, + } satisfies Model<"openai-completions">, + "grok-2": { + id: "grok-2", + name: "Grok 2", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 10, + cacheRead: 2, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-3-mini-fast-latest": { + id: "grok-3-mini-fast-latest", + name: "Grok 3 Mini Fast Latest", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 4, + cacheRead: 0.15, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-2-vision-1212": { + id: "grok-2-vision-1212", + name: "Grok 2 Vision (1212)", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 10, + cacheRead: 2, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "grok-3": { + id: "grok-3", + name: "Grok 3", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 3, + output: 15, + cacheRead: 0.75, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-4-fast": { + id: "grok-4-fast", + name: "Grok 4 Fast", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 0.5, + cacheRead: 0.05, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 30000, + } satisfies Model<"openai-completions">, + "grok-2-latest": { + id: "grok-2-latest", + name: "Grok 2 Latest", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 10, + cacheRead: 2, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-4-1-fast": { + id: "grok-4-1-fast", + name: "Grok 4.1 Fast", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.2, + output: 0.5, + cacheRead: 0.05, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 30000, + } satisfies Model<"openai-completions">, + "grok-2-1212": { + id: "grok-2-1212", + name: "Grok 2 (1212)", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 10, + cacheRead: 2, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-3-fast-latest": { + id: "grok-3-fast-latest", + name: "Grok 3 Fast Latest", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 5, + output: 25, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-3-latest": { + id: "grok-3-latest", + name: "Grok 3 Latest", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 3, + output: 15, + cacheRead: 0.75, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-2-vision-latest": { + id: "grok-2-vision-latest", + name: "Grok 2 Vision Latest", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 10, + cacheRead: 2, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "grok-vision-beta": { + id: "grok-vision-beta", + name: "Grok Vision Beta", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 5, + output: 15, + cacheRead: 5, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "grok-3-mini": { + id: "grok-3-mini", + name: "Grok 3 Mini", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 0.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-beta": { + id: "grok-beta", + name: "Grok Beta", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 5, + output: 15, + cacheRead: 5, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "grok-3-mini-latest": { + id: "grok-3-mini-latest", + name: "Grok 3 Mini Latest", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 0.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "grok-4-1-fast-non-reasoning": { + id: "grok-4-1-fast-non-reasoning", + name: "Grok 4.1 Fast (Non-Reasoning)", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.2, + output: 0.5, + cacheRead: 0.05, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 30000, + } satisfies Model<"openai-completions">, + "grok-3-mini-fast": { + id: "grok-3-mini-fast", + name: "Grok 3 Mini Fast", + api: "openai-completions", + provider: "xai", + baseUrl: "https://api.x.ai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 4, + cacheRead: 0.15, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + }, + "zai": { + "glm-4.7": { + id: "glm-4.7", + name: "GLM-4.7", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.2, + cacheRead: 0.11, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "glm-4.5-flash": { + id: "glm-4.5-flash", + name: "GLM-4.5-Flash", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 98304, + } satisfies Model<"anthropic-messages">, + "glm-4.5": { + id: "glm-4.5", + name: "GLM-4.5", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.2, + cacheRead: 0.11, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 98304, + } satisfies Model<"anthropic-messages">, + "glm-4.5-air": { + id: "glm-4.5-air", + name: "GLM-4.5-Air", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.2, + output: 1.1, + cacheRead: 0.03, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 98304, + } satisfies Model<"anthropic-messages">, + "glm-4.5v": { + id: "glm-4.5v", + name: "GLM-4.5V", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.6, + output: 1.8, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 64000, + maxTokens: 16384, + } satisfies Model<"anthropic-messages">, + "glm-4.6": { + id: "glm-4.6", + name: "GLM-4.6", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text"], + cost: { + input: 0.6, + output: 2.2, + cacheRead: 0.11, + cacheWrite: 0, + }, + contextWindow: 204800, + maxTokens: 131072, + } satisfies Model<"anthropic-messages">, + "glm-4.6v": { + id: "glm-4.6v", + name: "GLM-4.6V", + api: "anthropic-messages", + provider: "zai", + baseUrl: "https://api.z.ai/api/anthropic", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.3, + output: 0.9, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 32768, + } satisfies Model<"anthropic-messages">, + }, + "mistral": { + "devstral-medium-2507": { + id: "devstral-medium-2507", + name: "Devstral Medium", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "mistral-large-2512": { + id: "mistral-large-2512", + name: "Mistral Large 3", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.5, + output: 1.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "open-mixtral-8x22b": { + id: "open-mixtral-8x22b", + name: "Mixtral 8x22B", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 64000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "ministral-8b-latest": { + id: "ministral-8b-latest", + name: "Ministral 8B", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.1, + output: 0.1, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "pixtral-large-latest": { + id: "pixtral-large-latest", + name: "Pixtral Large", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "mistral-small-2506": { + id: "mistral-small-2506", + name: "Mistral Small 3.2", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.1, + output: 0.3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "devstral-2512": { + id: "devstral-2512", + name: "Devstral 2", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "ministral-3b-latest": { + id: "ministral-3b-latest", + name: "Ministral 3B", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.04, + output: 0.04, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "pixtral-12b": { + id: "pixtral-12b", + name: "Pixtral 12B", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "mistral-medium-2505": { + id: "mistral-medium-2505", + name: "Mistral Medium 3", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "labs-devstral-small-2512": { + id: "labs-devstral-small-2512", + name: "Devstral Small 2", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 256000, + } satisfies Model<"openai-completions">, + "devstral-medium-latest": { + id: "devstral-medium-latest", + name: "Devstral 2", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "devstral-small-2505": { + id: "devstral-small-2505", + name: "Devstral Small 2505", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.1, + output: 0.3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "mistral-medium-2508": { + id: "mistral-medium-2508", + name: "Mistral Medium 3.1", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "mistral-small-latest": { + id: "mistral-small-latest", + name: "Mistral Small", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.1, + output: 0.3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "magistral-small": { + id: "magistral-small", + name: "Magistral Small", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.5, + output: 1.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "devstral-small-2507": { + id: "devstral-small-2507", + name: "Devstral Small", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.1, + output: 0.3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "codestral-latest": { + id: "codestral-latest", + name: "Codestral", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.3, + output: 0.9, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "open-mixtral-8x7b": { + id: "open-mixtral-8x7b", + name: "Mixtral 8x7B", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.7, + output: 0.7, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32000, + maxTokens: 32000, + } satisfies Model<"openai-completions">, + "mistral-nemo": { + id: "mistral-nemo", + name: "Mistral Nemo", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.15, + output: 0.15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "open-mistral-7b": { + id: "open-mistral-7b", + name: "Mistral 7B", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.25, + output: 0.25, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8000, maxTokens: 8000, } satisfies Model<"openai-completions">, - "baidu/ernie-4.5-vl-28b-a3b": { - id: "baidu/ernie-4.5-vl-28b-a3b", - name: "Baidu: ERNIE 4.5 VL 28B A3B", + "mistral-large-latest": { + id: "mistral-large-latest", + name: "Mistral Large", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.5, + output: 1.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "mistral-medium-latest": { + id: "mistral-medium-latest", + name: "Mistral Medium", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.4, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "mistral-large-2411": { + id: "mistral-large-2411", + name: "Mistral Large 2.1", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "magistral-medium-latest": { + id: "magistral-medium-latest", + name: "Magistral Medium", + api: "openai-completions", + provider: "mistral", + baseUrl: "https://api.mistral.ai/v1", + reasoning: true, + input: ["text"], + cost: { + input: 2, + output: 5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + }, + "github-copilot": { + "gemini-3-flash-preview": { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "grok-code-fast-1": { + id: "grok-code-fast-1", + name: "Grok Code Fast 1", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "gpt-5.1-codex": { + id: "gpt-5.1-codex", + name: "GPT-5.1-Codex", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "claude-haiku-4.5": { + id: "claude-haiku-4.5", + name: "Claude Haiku 4.5", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16000, + } satisfies Model<"openai-completions">, + "gemini-3-pro-preview": { + id: "gemini-3-pro-preview", + name: "Gemini 3 Pro Preview", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "oswe-vscode-prime": { + id: "oswe-vscode-prime", + name: "Raptor Mini (Preview)", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"openai-responses">, + "gpt-5.1-codex-mini": { + id: "gpt-5.1-codex-mini", + name: "GPT-5.1-Codex-mini", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 100000, + } satisfies Model<"openai-responses">, + "gpt-5.1": { + id: "gpt-5.1", + name: "GPT-5.1", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "gpt-5-codex": { + id: "gpt-5-codex", + name: "GPT-5-Codex", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "gpt-4o": { + id: "gpt-4o", + name: "GPT-4o", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 64000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "gpt-4.1": { + id: "gpt-4.1", + name: "GPT-4.1", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "gpt-5-mini": { + id: "gpt-5-mini", + name: "GPT-5-mini", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 64000, + } satisfies Model<"openai-responses">, + "gemini-2.5-pro": { + id: "gemini-2.5-pro", + name: "Gemini 2.5 Pro", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "gpt-5.1-codex-max": { + id: "gpt-5.1-codex-max", + name: "GPT-5.1-Codex-max", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "claude-sonnet-4": { + id: "claude-sonnet-4", + name: "Claude Sonnet 4", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16000, + } satisfies Model<"openai-completions">, + "gpt-5": { + id: "gpt-5", + name: "GPT-5", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-responses">, + "claude-opus-4.5": { + id: "claude-opus-4.5", + name: "Claude Opus 4.5", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16000, + } satisfies Model<"openai-completions">, + "gpt-5.2": { + id: "gpt-5.2", + name: "GPT-5.2", + api: "openai-responses", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 64000, + } satisfies Model<"openai-responses">, + "claude-sonnet-4.5": { + id: "claude-sonnet-4.5", + name: "Claude Sonnet 4.5", + api: "openai-completions", + provider: "github-copilot", + baseUrl: "https://api.individual.githubcopilot.com", + headers: {"User-Agent":"GitHubCopilotChat/0.35.0","Editor-Version":"vscode/1.107.0","Editor-Plugin-Version":"copilot-chat/0.35.0","Copilot-Integration-Id":"vscode-chat"}, + compat: {"supportsStore":false,"supportsDeveloperRole":false,"supportsReasoningEffort":false}, + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16000, + } satisfies Model<"openai-completions">, + }, + "openrouter": { + "bytedance-seed/seed-1.6-flash": { + id: "bytedance-seed/seed-1.6-flash", + name: "ByteDance Seed: Seed 1.6 Flash", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text", "image"], cost: { - input: 0.112, - output: 0.448, + input: 0.075, + output: 0.3, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 30000, - maxTokens: 8000, + contextWindow: 262144, + maxTokens: 16384, } satisfies Model<"openai-completions">, "bytedance-seed/seed-1.6": { id: "bytedance-seed/seed-1.6", @@ -3078,162 +2908,9 @@ export const MODELS = { contextWindow: 262144, maxTokens: 32768, } satisfies Model<"openai-completions">, - "bytedance-seed/seed-1.6-flash": { - id: "bytedance-seed/seed-1.6-flash", - name: "ByteDance Seed: Seed 1.6 Flash", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.075, - output: 0.3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "cohere/command-r-08-2024": { - id: "cohere/command-r-08-2024", - name: "Cohere: Command R (08-2024)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4000, - } satisfies Model<"openai-completions">, - "cohere/command-r-plus-08-2024": { - id: "cohere/command-r-plus-08-2024", - name: "Cohere: Command R+ (08-2024)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2.5, - output: 10, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4000, - } satisfies Model<"openai-completions">, - "deepcogito/cogito-v2-preview-llama-109b-moe": { - id: "deepcogito/cogito-v2-preview-llama-109b-moe", - name: "Cogito V2 Preview Llama 109B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.18, - output: 0.59, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32767, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "deepcogito/cogito-v2-preview-llama-405b": { - id: "deepcogito/cogito-v2-preview-llama-405b", - name: "Deep Cogito: Cogito V2 Preview Llama 405B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 3.5, - output: 3.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "deepcogito/cogito-v2-preview-llama-70b": { - id: "deepcogito/cogito-v2-preview-llama-70b", - name: "Deep Cogito: Cogito V2 Preview Llama 70B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.88, - output: 0.88, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "deepseek/deepseek-chat": { - id: "deepseek/deepseek-chat", - name: "DeepSeek: DeepSeek V3", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.3, - output: 1.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 163840, - } satisfies Model<"openai-completions">, - "deepseek/deepseek-chat-v3-0324": { - id: "deepseek/deepseek-chat-v3-0324", - name: "DeepSeek: DeepSeek V3 0324", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.19999999999999998, - output: 0.88, - cacheRead: 0.106, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "deepseek/deepseek-chat-v3.1": { - id: "deepseek/deepseek-chat-v3.1", - name: "DeepSeek: DeepSeek V3.1", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.15, - output: 0.75, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 7168, - } satisfies Model<"openai-completions">, - "deepseek/deepseek-r1": { - id: "deepseek/deepseek-r1", - name: "DeepSeek: R1", + "minimax/minimax-m2.1": { + id: "minimax/minimax-m2.1", + name: "MiniMax: MiniMax M2.1", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -3242,15 +2919,15 @@ export const MODELS = { cost: { input: 0.3, output: 1.2, - cacheRead: 0, - cacheWrite: 0, + cacheRead: 0.03, + cacheWrite: 0.375, }, - contextWindow: 163840, - maxTokens: 4096, + contextWindow: 204800, + maxTokens: 131072, } satisfies Model<"openai-completions">, - "deepseek/deepseek-r1-0528": { - id: "deepseek/deepseek-r1-0528", - name: "DeepSeek: R1 0528", + "z-ai/glm-4.7": { + id: "z-ai/glm-4.7", + name: "Z.AI: GLM 4.7", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -3258,266 +2935,11 @@ export const MODELS = { input: ["text"], cost: { input: 0.39999999999999997, - output: 1.75, + output: 1.5, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 163840, - maxTokens: 163840, - } satisfies Model<"openai-completions">, - "deepseek/deepseek-r1-distill-llama-70b": { - id: "deepseek/deepseek-r1-distill-llama-70b", - name: "DeepSeek: R1 Distill Llama 70B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.03, - output: 0.11, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "deepseek/deepseek-v3.1-terminus": { - id: "deepseek/deepseek-v3.1-terminus", - name: "DeepSeek: DeepSeek V3.1 Terminus", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.21, - output: 0.7899999999999999, - cacheRead: 0.16799999999999998, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "deepseek/deepseek-v3.1-terminus:exacto": { - id: "deepseek/deepseek-v3.1-terminus:exacto", - name: "DeepSeek: DeepSeek V3.1 Terminus (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.21, - output: 0.7899999999999999, - cacheRead: 0.16799999999999998, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "deepseek/deepseek-v3.2": { - id: "deepseek/deepseek-v3.2", - name: "DeepSeek: DeepSeek V3.2", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.224, - output: 0.32, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "deepseek/deepseek-v3.2-exp": { - id: "deepseek/deepseek-v3.2-exp", - name: "DeepSeek: DeepSeek V3.2 Exp", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.21, - output: 0.32, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "google/gemini-2.0-flash-001": { - id: "google/gemini-2.0-flash-001", - name: "Google: Gemini 2.0 Flash", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.09999999999999999, - output: 0.39999999999999997, - cacheRead: 0.024999999999999998, - cacheWrite: 0.18330000000000002, - }, - contextWindow: 1048576, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "google/gemini-2.0-flash-exp:free": { - id: "google/gemini-2.0-flash-exp:free", - name: "Google: Gemini 2.0 Flash Experimental (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "google/gemini-2.0-flash-lite-001": { - id: "google/gemini-2.0-flash-lite-001", - name: "Google: Gemini 2.0 Flash Lite", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.075, - output: 0.3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "google/gemini-2.5-flash": { - id: "google/gemini-2.5-flash", - name: "Google: Gemini 2.5 Flash", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.3, - output: 2.5, - cacheRead: 0.03, - cacheWrite: 0.3833, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"openai-completions">, - "google/gemini-2.5-flash-lite": { - id: "google/gemini-2.5-flash-lite", - name: "Google: Gemini 2.5 Flash Lite", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.09999999999999999, - output: 0.39999999999999997, - cacheRead: 0.01, - cacheWrite: 0.18330000000000002, - }, - contextWindow: 1048576, - maxTokens: 65535, - } satisfies Model<"openai-completions">, - "google/gemini-2.5-flash-lite-preview-09-2025": { - id: "google/gemini-2.5-flash-lite-preview-09-2025", - name: "Google: Gemini 2.5 Flash Lite Preview 09-2025", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.09999999999999999, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "google/gemini-2.5-flash-preview-09-2025": { - id: "google/gemini-2.5-flash-preview-09-2025", - name: "Google: Gemini 2.5 Flash Preview 09-2025", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.3, - output: 2.5, - cacheRead: 0.075, - cacheWrite: 0.3833, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "google/gemini-2.5-pro": { - id: "google/gemini-2.5-pro", - name: "Google: Gemini 2.5 Pro", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.25, - output: 10, - cacheRead: 0.125, - cacheWrite: 1.625, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "google/gemini-2.5-pro-preview": { - id: "google/gemini-2.5-pro-preview", - name: "Google: Gemini 2.5 Pro Preview 06-05", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.25, - output: 10, - cacheRead: 0.31, - cacheWrite: 1.625, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "google/gemini-2.5-pro-preview-05-06": { - id: "google/gemini-2.5-pro-preview-05-06", - name: "Google: Gemini 2.5 Pro Preview 05-06", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.25, - output: 10, - cacheRead: 0.31, - cacheWrite: 1.625, - }, - contextWindow: 1048576, + contextWindow: 202752, maxTokens: 65535, } satisfies Model<"openai-completions">, "google/gemini-3-flash-preview": { @@ -3537,213 +2959,9 @@ export const MODELS = { contextWindow: 1048576, maxTokens: 65535, } satisfies Model<"openai-completions">, - "google/gemini-3-pro-preview": { - id: "google/gemini-3-pro-preview", - name: "Google: Gemini 3 Pro Preview", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 2, - output: 12, - cacheRead: 0.19999999999999998, - cacheWrite: 2.375, - }, - contextWindow: 1048576, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "google/gemma-3-27b-it": { - id: "google/gemma-3-27b-it", - name: "Google: Gemma 3 27B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.04, - output: 0.15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 96000, - maxTokens: 96000, - } satisfies Model<"openai-completions">, - "google/gemma-3-27b-it:free": { - id: "google/gemma-3-27b-it:free", - name: "Google: Gemma 3 27B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "inception/mercury": { - id: "inception/mercury", - name: "Inception: Mercury", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.25, - output: 1, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "inception/mercury-coder": { - id: "inception/mercury-coder", - name: "Inception: Mercury Coder", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.25, - output: 1, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "kwaipilot/kat-coder-pro:free": { - id: "kwaipilot/kat-coder-pro:free", - name: "Kwaipilot: KAT-Coder-Pro V1 (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "meta-llama/llama-3-70b-instruct": { - id: "meta-llama/llama-3-70b-instruct", - name: "Meta: Llama 3 70B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.3, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "meta-llama/llama-3-8b-instruct": { - id: "meta-llama/llama-3-8b-instruct", - name: "Meta: Llama 3 8B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.03, - output: 0.06, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-405b-instruct": { - id: "meta-llama/llama-3.1-405b-instruct", - name: "Meta: Llama 3.1 405B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 3.5, - output: 3.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 10000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-70b-instruct": { - id: "meta-llama/llama-3.1-70b-instruct", - name: "Meta: Llama 3.1 70B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.39999999999999997, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-8b-instruct": { - id: "meta-llama/llama-3.1-8b-instruct", - name: "Meta: Llama 3.1 8B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.02, - output: 0.03, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "meta-llama/llama-3.2-3b-instruct": { - id: "meta-llama/llama-3.2-3b-instruct", - name: "Meta: Llama 3.2 3B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.02, - output: 0.02, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "meta-llama/llama-3.3-70b-instruct": { - id: "meta-llama/llama-3.3-70b-instruct", - name: "Meta: Llama 3.3 70B Instruct", + "mistralai/mistral-small-creative": { + id: "mistralai/mistral-small-creative", + name: "Mistral: Mistral Small Creative", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -3751,183 +2969,132 @@ export const MODELS = { input: ["text"], cost: { input: 0.09999999999999999, - output: 0.32, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "meta-llama/llama-3.3-70b-instruct:free": { - id: "meta-llama/llama-3.3-70b-instruct:free", - name: "Meta: Llama 3.3 70B Instruct (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "meta-llama/llama-4-maverick": { - id: "meta-llama/llama-4-maverick", - name: "Meta: Llama 4 Maverick", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1048576, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "meta-llama/llama-4-scout": { - id: "meta-llama/llama-4-scout", - name: "Meta: Llama 4 Scout", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.08, output: 0.3, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 327680, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "microsoft/phi-3-medium-128k-instruct": { - id: "microsoft/phi-3-medium-128k-instruct", - name: "Microsoft: Phi-3 Medium 128K Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, + contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, - "microsoft/phi-3-mini-128k-instruct": { - id: "microsoft/phi-3-mini-128k-instruct", - name: "Microsoft: Phi-3 Mini 128K Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "microsoft/phi-3.5-mini-128k-instruct": { - id: "microsoft/phi-3.5-mini-128k-instruct", - name: "Microsoft: Phi-3.5 Mini 128K Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "minimax/minimax-m1": { - id: "minimax/minimax-m1", - name: "MiniMax: MiniMax M1", + "xiaomi/mimo-v2-flash:free": { + id: "xiaomi/mimo-v2-flash:free", + name: "Xiaomi: MiMo-V2-Flash (free)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text"], cost: { - input: 0.39999999999999997, - output: 2.2, + input: 0, + output: 0, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 1000000, - maxTokens: 40000, + contextWindow: 262144, + maxTokens: 65536, } satisfies Model<"openai-completions">, - "minimax/minimax-m2": { - id: "minimax/minimax-m2", - name: "MiniMax: MiniMax M2", + "nvidia/nemotron-3-nano-30b-a3b:free": { + id: "nvidia/nemotron-3-nano-30b-a3b:free", + name: "NVIDIA: Nemotron 3 Nano 30B A3B (free)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text"], cost: { - input: 0.19999999999999998, - output: 1, - cacheRead: 0.03, - cacheWrite: 0, - }, - contextWindow: 196608, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "minimax/minimax-m2.1": { - id: "minimax/minimax-m2.1", - name: "MiniMax: MiniMax M2.1", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.3, - output: 1.2, - cacheRead: 0.03, - cacheWrite: 0.375, - }, - contextWindow: 204800, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "mistralai/codestral-2508": { - id: "mistralai/codestral-2508", - name: "Mistral: Codestral 2508", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.3, - output: 0.8999999999999999, + input: 0, + output: 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: 256000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "nvidia/nemotron-3-nano-30b-a3b": { + id: "nvidia/nemotron-3-nano-30b-a3b", + name: "NVIDIA: Nemotron 3 Nano 30B A3B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.06, + output: 0.24, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "openai/gpt-5.2-chat": { + id: "openai/gpt-5.2-chat", + name: "OpenAI: GPT-5.2 Chat", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "openai/gpt-5.2-pro": { + id: "openai/gpt-5.2-pro", + name: "OpenAI: GPT-5.2 Pro", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 21, + output: 168, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "openai/gpt-5.2": { + id: "openai/gpt-5.2", + name: "OpenAI: GPT-5.2", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.75, + output: 14, + cacheRead: 0.175, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "mistralai/devstral-2512:free": { + id: "mistralai/devstral-2512:free", + name: "Mistral: Devstral 2 2512 (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/devstral-2512": { id: "mistralai/devstral-2512", name: "Mistral: Devstral 2 2512", @@ -3945,9 +3112,43 @@ export const MODELS = { contextWindow: 262144, maxTokens: 65536, } satisfies Model<"openai-completions">, - "mistralai/devstral-2512:free": { - id: "mistralai/devstral-2512:free", - name: "Mistral: Devstral 2 2512 (free)", + "relace/relace-search": { + id: "relace/relace-search", + name: "Relace: Relace Search", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "z-ai/glm-4.6v": { + id: "z-ai/glm-4.6v", + name: "Z.AI: GLM 4.6V", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.3, + output: 0.8999999999999999, + cacheRead: 0.049999999999999996, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 24000, + } satisfies Model<"openai-completions">, + "nex-agi/deepseek-v3.1-nex-n1:free": { + id: "nex-agi/deepseek-v3.1-nex-n1:free", + name: "Nex AGI: DeepSeek V3.1 Nex N1 (free)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -3959,42 +3160,42 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 262144, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/devstral-medium": { - id: "mistralai/devstral-medium", - name: "Mistral: Devstral Medium", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.39999999999999997, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, contextWindow: 131072, - maxTokens: 4096, + maxTokens: 163840, } satisfies Model<"openai-completions">, - "mistralai/devstral-small": { - id: "mistralai/devstral-small", - name: "Mistral: Devstral Small 1.1", + "openai/gpt-5.1-codex-max": { + id: "openai/gpt-5.1-codex-max", + name: "OpenAI: GPT-5.1-Codex-Max", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], + reasoning: true, + input: ["text", "image"], cost: { - input: 0.07, - output: 0.28, + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "amazon/nova-2-lite-v1": { + id: "amazon/nova-2-lite-v1", + name: "Amazon: Nova 2 Lite", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.3, + output: 2.5, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 4096, + contextWindow: 1000000, + maxTokens: 65535, } satisfies Model<"openai-completions">, "mistralai/ministral-14b-2512": { id: "mistralai/ministral-14b-2512", @@ -4013,57 +3214,6 @@ export const MODELS = { contextWindow: 262144, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/ministral-3b": { - id: "mistralai/ministral-3b", - name: "Mistral: Ministral 3B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.04, - output: 0.04, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/ministral-3b-2512": { - id: "mistralai/ministral-3b-2512", - name: "Mistral: Ministral 3 3B 2512", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/ministral-8b": { - id: "mistralai/ministral-8b", - name: "Mistral: Ministral 8B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "mistralai/ministral-8b-2512": { id: "mistralai/ministral-8b-2512", name: "Mistral: Ministral 3 8B 2512", @@ -4081,85 +3231,17 @@ export const MODELS = { contextWindow: 262144, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-7b-instruct": { - id: "mistralai/mistral-7b-instruct", - name: "Mistral: Mistral 7B Instruct", + "mistralai/ministral-3b-2512": { + id: "mistralai/ministral-3b-2512", + name: "Mistral: Ministral 3 3B 2512", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: false, - input: ["text"], + input: ["text", "image"], cost: { - input: 0.028, - output: 0.054, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "mistralai/mistral-7b-instruct:free": { - id: "mistralai/mistral-7b-instruct:free", - name: "Mistral: Mistral 7B Instruct (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "mistralai/mistral-large": { - id: "mistralai/mistral-large", - name: "Mistral Large", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2, - output: 6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/mistral-large-2407": { - id: "mistralai/mistral-large-2407", - name: "Mistral Large 2407", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2, - output: 6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/mistral-large-2411": { - id: "mistralai/mistral-large-2411", - name: "Mistral Large 2411", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2, - output: 6, + input: 0.09999999999999999, + output: 0.09999999999999999, cacheRead: 0, cacheWrite: 0, }, @@ -4183,366 +3265,94 @@ export const MODELS = { contextWindow: 262144, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-medium-3": { - id: "mistralai/mistral-medium-3", - name: "Mistral: Mistral Medium 3", + "arcee-ai/trinity-mini:free": { + id: "arcee-ai/trinity-mini:free", + name: "Arcee AI: Trinity Mini (free)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.39999999999999997, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/mistral-medium-3.1": { - id: "mistralai/mistral-medium-3.1", - name: "Mistral: Mistral Medium 3.1", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.39999999999999997, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/mistral-nemo": { - id: "mistralai/mistral-nemo", - name: "Mistral: Mistral Nemo", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, + reasoning: true, input: ["text"], cost: { - input: 0.02, - output: 0.04, + input: 0, + output: 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 16384, + maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-saba": { - id: "mistralai/mistral-saba", - name: "Mistral: Saba", + "arcee-ai/trinity-mini": { + id: "arcee-ai/trinity-mini", + name: "Arcee AI: Trinity Mini", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, + reasoning: true, + input: ["text"], + cost: { + input: 0.045, + output: 0.15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "deepseek/deepseek-v3.2": { + id: "deepseek/deepseek-v3.2", + name: "DeepSeek: DeepSeek V3.2", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.224, + output: 0.32, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 163840, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "prime-intellect/intellect-3": { + id: "prime-intellect/intellect-3", + name: "Prime Intellect: INTELLECT-3", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, input: ["text"], cost: { input: 0.19999999999999998, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/mistral-small-24b-instruct-2501": { - id: "mistralai/mistral-small-24b-instruct-2501", - name: "Mistral: Mistral Small 3", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.03, - output: 0.11, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "mistralai/mistral-small-3.1-24b-instruct": { - id: "mistralai/mistral-small-3.1-24b-instruct", - name: "Mistral: Mistral Small 3.1 24B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.03, - output: 0.11, + output: 1.1, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, - "mistralai/mistral-small-3.1-24b-instruct:free": { - id: "mistralai/mistral-small-3.1-24b-instruct:free", - name: "Mistral: Mistral Small 3.1 24B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/mistral-small-3.2-24b-instruct": { - id: "mistralai/mistral-small-3.2-24b-instruct", - name: "Mistral: Mistral Small 3.2 24B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.06, - output: 0.18, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "mistralai/mistral-small-creative": { - id: "mistralai/mistral-small-creative", - name: "Mistral: Mistral Small Creative", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/mistral-tiny": { - id: "mistralai/mistral-tiny", - name: "Mistral Tiny", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.25, - output: 0.25, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/mixtral-8x22b-instruct": { - id: "mistralai/mixtral-8x22b-instruct", - name: "Mistral: Mixtral 8x22B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2, - output: 6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 65536, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/mixtral-8x7b-instruct": { - id: "mistralai/mixtral-8x7b-instruct", - name: "Mistral: Mixtral 8x7B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.54, - output: 0.54, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "mistralai/pixtral-12b": { - id: "mistralai/pixtral-12b", - name: "Mistral: Pixtral 12B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/pixtral-large-2411": { - id: "mistralai/pixtral-large-2411", - name: "Mistral: Pixtral Large 2411", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2, - output: 6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "mistralai/voxtral-small-24b-2507": { - id: "mistralai/voxtral-small-24b-2507", - name: "Mistral: Voxtral Small 24B 2507", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2": { - id: "moonshotai/kimi-k2", - name: "MoonshotAI: Kimi K2 0711", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.456, - output: 1.8399999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2-0905": { - id: "moonshotai/kimi-k2-0905", - name: "MoonshotAI: Kimi K2 0905", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.39, - output: 1.9, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2-0905:exacto": { - id: "moonshotai/kimi-k2-0905:exacto", - name: "MoonshotAI: Kimi K2 0905 (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.6, - output: 2.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "moonshotai/kimi-k2-thinking": { - id: "moonshotai/kimi-k2-thinking", - name: "MoonshotAI: Kimi K2 Thinking", + "tngtech/tng-r1t-chimera:free": { + id: "tngtech/tng-r1t-chimera:free", + name: "TNG: R1T Chimera (free)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text"], - cost: { - input: 0.39999999999999997, - output: 1.75, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65535, - } satisfies Model<"openai-completions">, - "nex-agi/deepseek-v3.1-nex-n1:free": { - id: "nex-agi/deepseek-v3.1-nex-n1:free", - name: "Nex AGI: DeepSeek V3.1 Nex N1 (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 163840, maxTokens: 163840, } satisfies Model<"openai-completions">, - "nousresearch/deephermes-3-mistral-24b-preview": { - id: "nousresearch/deephermes-3-mistral-24b-preview", - name: "Nous: DeepHermes 3 Mistral 24B Preview", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.02, - output: 0.09999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "nousresearch/hermes-4-405b": { - id: "nousresearch/hermes-4-405b", - name: "Nous: Hermes 4 405B", + "tngtech/tng-r1t-chimera": { + id: "tngtech/tng-r1t-chimera", + name: "TNG: R1T Chimera", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -4554,603 +3364,76 @@ export const MODELS = { cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 131072, + contextWindow: 163840, + maxTokens: 65536, } satisfies Model<"openai-completions">, - "nousresearch/hermes-4-70b": { - id: "nousresearch/hermes-4-70b", - name: "Nous: Hermes 4 70B", + "anthropic/claude-opus-4.5": { + id: "anthropic/claude-opus-4.5", + name: "Anthropic: Claude Opus 4.5", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, - input: ["text"], - cost: { - input: 0.11, - output: 0.38, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "nvidia/llama-3.1-nemotron-70b-instruct": { - id: "nvidia/llama-3.1-nemotron-70b-instruct", - name: "NVIDIA: Llama 3.1 Nemotron 70B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1.2, - output: 1.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "nvidia/llama-3.3-nemotron-super-49b-v1.5": { - id: "nvidia/llama-3.3-nemotron-super-49b-v1.5", - name: "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "nvidia/nemotron-3-nano-30b-a3b": { - id: "nvidia/nemotron-3-nano-30b-a3b", - name: "NVIDIA: Nemotron 3 Nano 30B A3B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.06, - output: 0.24, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "nvidia/nemotron-3-nano-30b-a3b:free": { - id: "nvidia/nemotron-3-nano-30b-a3b:free", - name: "NVIDIA: Nemotron 3 Nano 30B A3B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "nvidia/nemotron-nano-12b-v2-vl:free": { - id: "nvidia/nemotron-nano-12b-v2-vl:free", - name: "NVIDIA: Nemotron Nano 12B 2 VL (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "nvidia/nemotron-nano-9b-v2": { - id: "nvidia/nemotron-nano-9b-v2", - name: "NVIDIA: Nemotron Nano 9B V2", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.04, - output: 0.16, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "nvidia/nemotron-nano-9b-v2:free": { - id: "nvidia/nemotron-nano-9b-v2:free", - name: "NVIDIA: Nemotron Nano 9B V2 (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/codex-mini": { - id: "openai/codex-mini", - name: "OpenAI: Codex Mini", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.5, - output: 6, - cacheRead: 0.375, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-completions">, - "openai/gpt-3.5-turbo": { - id: "openai/gpt-3.5-turbo", - name: "OpenAI: GPT-3.5 Turbo", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.5, - output: 1.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 16385, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-3.5-turbo-0613": { - id: "openai/gpt-3.5-turbo-0613", - name: "OpenAI: GPT-3.5 Turbo (older v0613)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 4095, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-3.5-turbo-16k": { - id: "openai/gpt-3.5-turbo-16k", - name: "OpenAI: GPT-3.5 Turbo 16k", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 3, - output: 4, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 16385, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-4": { - id: "openai/gpt-4", - name: "OpenAI: GPT-4", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 30, - output: 60, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8191, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-4-0314": { - id: "openai/gpt-4-0314", - name: "OpenAI: GPT-4 (older v0314)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 30, - output: 60, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8191, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-4-1106-preview": { - id: "openai/gpt-4-1106-preview", - name: "OpenAI: GPT-4 Turbo (older v1106)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 10, - output: 30, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-4-turbo": { - id: "openai/gpt-4-turbo", - name: "OpenAI: GPT-4 Turbo", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 10, - output: 30, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-4-turbo-preview": { - id: "openai/gpt-4-turbo-preview", - name: "OpenAI: GPT-4 Turbo Preview", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 10, - output: 30, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-4.1": { - id: "openai/gpt-4.1", - name: "OpenAI: GPT-4.1", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2, - output: 8, - cacheRead: 0.5, - cacheWrite: 0, - }, - contextWindow: 1047576, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "openai/gpt-4.1-mini": { - id: "openai/gpt-4.1-mini", - name: "OpenAI: GPT-4.1 Mini", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.39999999999999997, - output: 1.5999999999999999, - cacheRead: 0.09999999999999999, - cacheWrite: 0, - }, - contextWindow: 1047576, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "openai/gpt-4.1-nano": { - id: "openai/gpt-4.1-nano", - name: "OpenAI: GPT-4.1 Nano", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.09999999999999999, - output: 0.39999999999999997, - cacheRead: 0.024999999999999998, - cacheWrite: 0, - }, - contextWindow: 1047576, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "openai/gpt-4o": { - id: "openai/gpt-4o", - name: "OpenAI: GPT-4o", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2.5, - output: 10, - cacheRead: 1.25, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "openai/gpt-4o-2024-05-13": { - id: "openai/gpt-4o-2024-05-13", - name: "OpenAI: GPT-4o (2024-05-13)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, input: ["text", "image"], cost: { input: 5, - output: 15, - cacheRead: 0, - cacheWrite: 0, + output: 25, + cacheRead: 0.5, + cacheWrite: 6.25, }, - contextWindow: 128000, - maxTokens: 4096, + contextWindow: 200000, + maxTokens: 32000, } satisfies Model<"openai-completions">, - "openai/gpt-4o-2024-08-06": { - id: "openai/gpt-4o-2024-08-06", - name: "OpenAI: GPT-4o (2024-08-06)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2.5, - output: 10, - cacheRead: 1.25, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "openai/gpt-4o-2024-11-20": { - id: "openai/gpt-4o-2024-11-20", - name: "OpenAI: GPT-4o (2024-11-20)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2.5, - output: 10, - cacheRead: 1.25, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "openai/gpt-4o-audio-preview": { - id: "openai/gpt-4o-audio-preview", - name: "OpenAI: GPT-4o Audio", + "allenai/olmo-3-7b-instruct": { + id: "allenai/olmo-3-7b-instruct", + name: "AllenAI: Olmo 3 7B Instruct", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: false, input: ["text"], cost: { - input: 2.5, - output: 10, + input: 0.09999999999999999, + output: 0.19999999999999998, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 16384, + contextWindow: 65536, + maxTokens: 65536, } satisfies Model<"openai-completions">, - "openai/gpt-4o-mini": { - id: "openai/gpt-4o-mini", - name: "OpenAI: GPT-4o-mini", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0.075, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "openai/gpt-4o-mini-2024-07-18": { - id: "openai/gpt-4o-mini-2024-07-18", - name: "OpenAI: GPT-4o-mini (2024-07-18)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0.075, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "openai/gpt-4o:extended": { - id: "openai/gpt-4o:extended", - name: "OpenAI: GPT-4o (extended)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 6, - output: 18, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 64000, - } satisfies Model<"openai-completions">, - "openai/gpt-5": { - id: "openai/gpt-5", - name: "OpenAI: GPT-5", + "x-ai/grok-4.1-fast": { + id: "x-ai/grok-4.1-fast", + name: "xAI: Grok 4.1 Fast", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text", "image"], cost: { - input: 1.25, - output: 10, - cacheRead: 0.125, + input: 0.19999999999999998, + output: 0.5, + cacheRead: 0.049999999999999996, cacheWrite: 0, }, - contextWindow: 400000, - maxTokens: 128000, + contextWindow: 2000000, + maxTokens: 30000, } satisfies Model<"openai-completions">, - "openai/gpt-5-codex": { - id: "openai/gpt-5-codex", - name: "OpenAI: GPT-5 Codex", + "google/gemini-3-pro-preview": { + id: "google/gemini-3-pro-preview", + name: "Google: Gemini 3 Pro Preview", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text", "image"], cost: { - input: 1.25, - output: 10, - cacheRead: 0.125, - cacheWrite: 0, + input: 2, + output: 12, + cacheRead: 0.19999999999999998, + cacheWrite: 2.375, }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "openai/gpt-5-image": { - id: "openai/gpt-5-image", - name: "OpenAI: GPT-5 Image", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 10, - output: 10, - cacheRead: 1.25, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "openai/gpt-5-image-mini": { - id: "openai/gpt-5-image-mini", - name: "OpenAI: GPT-5 Image Mini", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 2.5, - output: 2, - cacheRead: 0.25, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "openai/gpt-5-mini": { - id: "openai/gpt-5-mini", - name: "OpenAI: GPT-5 Mini", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.25, - output: 2, - cacheRead: 0.024999999999999998, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "openai/gpt-5-nano": { - id: "openai/gpt-5-nano", - name: "OpenAI: GPT-5 Nano", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.049999999999999996, - output: 0.39999999999999997, - cacheRead: 0.005, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "openai/gpt-5-pro": { - id: "openai/gpt-5-pro", - name: "OpenAI: GPT-5 Pro", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 15, - output: 120, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, + contextWindow: 1048576, + maxTokens: 65536, } satisfies Model<"openai-completions">, "openai/gpt-5.1": { id: "openai/gpt-5.1", @@ -5203,23 +3486,6 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-completions">, - "openai/gpt-5.1-codex-max": { - id: "openai/gpt-5.1-codex-max", - name: "OpenAI: GPT-5.1-Codex-Max", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.25, - output: 10, - cacheRead: 0.125, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, "openai/gpt-5.1-codex-mini": { id: "openai/gpt-5.1-codex-mini", name: "OpenAI: GPT-5.1-Codex-Mini", @@ -5237,142 +3503,74 @@ export const MODELS = { contextWindow: 400000, maxTokens: 100000, } satisfies Model<"openai-completions">, - "openai/gpt-5.2": { - id: "openai/gpt-5.2", - name: "OpenAI: GPT-5.2", + "kwaipilot/kat-coder-pro:free": { + id: "kwaipilot/kat-coder-pro:free", + name: "Kwaipilot: KAT-Coder-Pro V1 (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "moonshotai/kimi-k2-thinking": { + id: "moonshotai/kimi-k2-thinking", + name: "MoonshotAI: Kimi K2 Thinking", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, - input: ["text", "image"], + input: ["text"], cost: { - input: 1.75, - output: 14, - cacheRead: 0.175, + input: 0.39999999999999997, + output: 1.75, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 400000, - maxTokens: 128000, + contextWindow: 262144, + maxTokens: 65535, } satisfies Model<"openai-completions">, - "openai/gpt-5.2-chat": { - id: "openai/gpt-5.2-chat", - name: "OpenAI: GPT-5.2 Chat", + "amazon/nova-premier-v1": { + id: "amazon/nova-premier-v1", + name: "Amazon: Nova Premier 1.0", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: false, input: ["text", "image"], cost: { - input: 1.75, - output: 14, - cacheRead: 0.175, + input: 2.5, + output: 12.5, + cacheRead: 0.625, cacheWrite: 0, }, - contextWindow: 128000, - maxTokens: 16384, + contextWindow: 1000000, + maxTokens: 32000, } satisfies Model<"openai-completions">, - "openai/gpt-5.2-pro": { - id: "openai/gpt-5.2-pro", - name: "OpenAI: GPT-5.2 Pro", + "mistralai/voxtral-small-24b-2507": { + id: "mistralai/voxtral-small-24b-2507", + name: "Mistral: Voxtral Small 24B 2507", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 21, - output: 168, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 400000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "openai/gpt-oss-120b": { - id: "openai/gpt-oss-120b", - name: "OpenAI: gpt-oss-120b", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, + reasoning: false, input: ["text"], cost: { - input: 0.039, - output: 0.19, + input: 0.09999999999999999, + output: 0.3, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 32000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "openai/gpt-oss-120b:exacto": { - id: "openai/gpt-oss-120b:exacto", - name: "OpenAI: gpt-oss-120b (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.039, - output: 0.19, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-oss-120b:free": { - id: "openai/gpt-oss-120b:free", - name: "OpenAI: gpt-oss-120b (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-oss-20b": { - id: "openai/gpt-oss-20b", - name: "OpenAI: gpt-oss-20b", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.03, - output: 0.14, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "openai/gpt-oss-20b:free": { - id: "openai/gpt-oss-20b:free", - name: "OpenAI: gpt-oss-20b (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 128000, - } satisfies Model<"openai-completions">, "openai/gpt-oss-safeguard-20b": { id: "openai/gpt-oss-safeguard-20b", name: "OpenAI: gpt-oss-safeguard-20b", @@ -5390,39 +3588,141 @@ export const MODELS = { contextWindow: 131072, maxTokens: 65536, } satisfies Model<"openai-completions">, - "openai/o1": { - id: "openai/o1", - name: "OpenAI: o1", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 15, - output: 60, - cacheRead: 7.5, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-completions">, - "openai/o3": { - id: "openai/o3", - name: "OpenAI: o3", + "nvidia/nemotron-nano-12b-v2-vl:free": { + id: "nvidia/nemotron-nano-12b-v2-vl:free", + name: "NVIDIA: Nemotron Nano 12B 2 VL (free)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text", "image"], cost: { - input: 2, - output: 8, - cacheRead: 0.5, + input: 0, + output: 0, + cacheRead: 0, cacheWrite: 0, }, + contextWindow: 128000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "minimax/minimax-m2": { + id: "minimax/minimax-m2", + name: "MiniMax: MiniMax M2", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.19999999999999998, + output: 1, + cacheRead: 0.03, + cacheWrite: 0, + }, + contextWindow: 196608, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "deepcogito/cogito-v2-preview-llama-405b": { + id: "deepcogito/cogito-v2-preview-llama-405b", + name: "Deep Cogito: Cogito V2 Preview Llama 405B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 3.5, + output: 3.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-5-image-mini": { + id: "openai/gpt-5-image-mini", + name: "OpenAI: GPT-5 Image Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2.5, + output: 2, + cacheRead: 0.25, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "anthropic/claude-haiku-4.5": { + id: "anthropic/claude-haiku-4.5", + name: "Anthropic: Claude Haiku 4.5", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1, + output: 5, + cacheRead: 0.09999999999999999, + cacheWrite: 1.25, + }, contextWindow: 200000, - maxTokens: 100000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "qwen/qwen3-vl-8b-thinking": { + id: "qwen/qwen3-vl-8b-thinking", + name: "Qwen: Qwen3 VL 8B Thinking", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.18, + output: 2.0999999999999996, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "qwen/qwen3-vl-8b-instruct": { + id: "qwen/qwen3-vl-8b-instruct", + name: "Qwen: Qwen3 VL 8B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.064, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "openai/gpt-5-image": { + id: "openai/gpt-5-image", + name: "OpenAI: GPT-5 Image", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 10, + output: 10, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, } satisfies Model<"openai-completions">, "openai/o3-deep-research": { id: "openai/o3-deep-research", @@ -5441,74 +3741,6 @@ export const MODELS = { contextWindow: 200000, maxTokens: 100000, } satisfies Model<"openai-completions">, - "openai/o3-mini": { - id: "openai/o3-mini", - name: "OpenAI: o3 Mini", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1.1, - output: 4.4, - cacheRead: 0.55, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-completions">, - "openai/o3-mini-high": { - id: "openai/o3-mini-high", - name: "OpenAI: o3 Mini High", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1.1, - output: 4.4, - cacheRead: 0.55, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-completions">, - "openai/o3-pro": { - id: "openai/o3-pro", - name: "OpenAI: o3 Pro", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 20, - output: 80, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-completions">, - "openai/o4-mini": { - id: "openai/o4-mini", - name: "OpenAI: o4 Mini", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.1, - output: 4.4, - cacheRead: 0.275, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-completions">, "openai/o4-mini-deep-research": { id: "openai/o4-mini-deep-research", name: "OpenAI: o4 Mini Deep Research", @@ -5526,548 +3758,21 @@ export const MODELS = { contextWindow: 200000, maxTokens: 100000, } satisfies Model<"openai-completions">, - "openai/o4-mini-high": { - id: "openai/o4-mini-high", - name: "OpenAI: o4 Mini High", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 1.1, - output: 4.4, - cacheRead: 0.275, - cacheWrite: 0, - }, - contextWindow: 200000, - maxTokens: 100000, - } satisfies Model<"openai-completions">, - "openrouter/auto": { - id: "openrouter/auto", - name: "OpenRouter: Auto Router", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 2000000, - maxTokens: 30000, - } satisfies Model<"openai-completions">, - "prime-intellect/intellect-3": { - id: "prime-intellect/intellect-3", - name: "Prime Intellect: INTELLECT-3", + "nvidia/llama-3.3-nemotron-super-49b-v1.5": { + id: "nvidia/llama-3.3-nemotron-super-49b-v1.5", + name: "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text"], cost: { - input: 0.19999999999999998, - output: 1.1, + input: 0.09999999999999999, + output: 0.39999999999999997, cacheRead: 0, cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "qwen/qwen-2.5-72b-instruct": { - id: "qwen/qwen-2.5-72b-instruct", - name: "Qwen2.5 72B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.12, - output: 0.39, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "qwen/qwen-max": { - id: "qwen/qwen-max", - name: "Qwen: Qwen-Max ", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1.5999999999999999, - output: 6.3999999999999995, - cacheRead: 0.64, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "qwen/qwen-plus": { - id: "qwen/qwen-plus", - name: "Qwen: Qwen-Plus", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.39999999999999997, - output: 1.2, - cacheRead: 0.16, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "qwen/qwen-plus-2025-07-28": { - id: "qwen/qwen-plus-2025-07-28", - name: "Qwen: Qwen Plus 0728", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.39999999999999997, - output: 1.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1000000, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen/qwen-plus-2025-07-28:thinking": { - id: "qwen/qwen-plus-2025-07-28:thinking", - name: "Qwen: Qwen Plus 0728 (thinking)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.39999999999999997, - output: 4, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 1000000, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen/qwen-turbo": { - id: "qwen/qwen-turbo", - name: "Qwen: Qwen-Turbo", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.049999999999999996, - output: 0.19999999999999998, - cacheRead: 0.02, - cacheWrite: 0, - }, - contextWindow: 1000000, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "qwen/qwen-vl-max": { - id: "qwen/qwen-vl-max", - name: "Qwen: Qwen VL Max", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.7999999999999999, - output: 3.1999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "qwen/qwen3-14b": { - id: "qwen/qwen3-14b", - name: "Qwen: Qwen3 14B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.049999999999999996, - output: 0.22, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 40960, - maxTokens: 40960, - } satisfies Model<"openai-completions">, - "qwen/qwen3-235b-a22b": { - id: "qwen/qwen3-235b-a22b", - name: "Qwen: Qwen3 235B A22B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.18, - output: 0.54, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 40960, - maxTokens: 40960, - } satisfies Model<"openai-completions">, - "qwen/qwen3-235b-a22b-2507": { - id: "qwen/qwen3-235b-a22b-2507", - name: "Qwen: Qwen3 235B A22B Instruct 2507", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.071, - output: 0.463, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "qwen/qwen3-235b-a22b-thinking-2507": { - id: "qwen/qwen3-235b-a22b-thinking-2507", - name: "Qwen: Qwen3 235B A22B Thinking 2507", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.11, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "qwen/qwen3-30b-a3b": { - id: "qwen/qwen3-30b-a3b", - name: "Qwen: Qwen3 30B A3B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.06, - output: 0.22, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 40960, - maxTokens: 40960, - } satisfies Model<"openai-completions">, - "qwen/qwen3-30b-a3b-instruct-2507": { - id: "qwen/qwen3-30b-a3b-instruct-2507", - name: "Qwen: Qwen3 30B A3B Instruct 2507", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.08, - output: 0.33, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "qwen/qwen3-30b-a3b-thinking-2507": { - id: "qwen/qwen3-30b-a3b-thinking-2507", - name: "Qwen: Qwen3 30B A3B Thinking 2507", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.051, - output: 0.33999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "qwen/qwen3-32b": { - id: "qwen/qwen3-32b", - name: "Qwen: Qwen3 32B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.08, - output: 0.24, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 40960, - maxTokens: 40960, - } satisfies Model<"openai-completions">, - "qwen/qwen3-4b:free": { - id: "qwen/qwen3-4b:free", - name: "Qwen: Qwen3 4B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 40960, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "qwen/qwen3-8b": { - id: "qwen/qwen3-8b", - name: "Qwen: Qwen3 8B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.028, - output: 0.1104, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 20000, - } satisfies Model<"openai-completions">, - "qwen/qwen3-coder": { - id: "qwen/qwen3-coder", - name: "Qwen: Qwen3 Coder 480B A35B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.22, - output: 0.95, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "qwen/qwen3-coder-30b-a3b-instruct": { - id: "qwen/qwen3-coder-30b-a3b-instruct", - name: "Qwen: Qwen3 Coder 30B A3B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.07, - output: 0.27, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 160000, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen/qwen3-coder-flash": { - id: "qwen/qwen3-coder-flash", - name: "Qwen: Qwen3 Coder Flash", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.3, - output: 1.5, - cacheRead: 0.08, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "qwen/qwen3-coder-plus": { - id: "qwen/qwen3-coder-plus", - name: "Qwen: Qwen3 Coder Plus", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 5, - cacheRead: 0.09999999999999999, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "qwen/qwen3-coder:exacto": { - id: "qwen/qwen3-coder:exacto", - name: "Qwen: Qwen3 Coder 480B A35B (exacto)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.22, - output: 1.7999999999999998, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "qwen/qwen3-coder:free": { - id: "qwen/qwen3-coder:free", - name: "Qwen: Qwen3 Coder 480B A35B (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262000, - maxTokens: 262000, - } satisfies Model<"openai-completions">, - "qwen/qwen3-max": { - id: "qwen/qwen3-max", - name: "Qwen: Qwen3 Max", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1.2, - output: 6, - cacheRead: 0.24, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen/qwen3-next-80b-a3b-instruct": { - id: "qwen/qwen3-next-80b-a3b-instruct", - name: "Qwen: Qwen3 Next 80B A3B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.09, - output: 1.1, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "qwen/qwen3-next-80b-a3b-thinking": { - id: "qwen/qwen3-next-80b-a3b-thinking", - name: "Qwen: Qwen3 Next 80B A3B Thinking", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.12, - output: 1.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen/qwen3-vl-235b-a22b-instruct": { - id: "qwen/qwen3-vl-235b-a22b-instruct", - name: "Qwen: Qwen3 VL 235B A22B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.19999999999999998, - output: 1.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "qwen/qwen3-vl-235b-a22b-thinking": { - id: "qwen/qwen3-vl-235b-a22b-thinking", - name: "Qwen: Qwen3 VL 235B A22B Thinking", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.3, - output: 1.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, - maxTokens: 262144, - } satisfies Model<"openai-completions">, - "qwen/qwen3-vl-30b-a3b-instruct": { - id: "qwen/qwen3-vl-30b-a3b-instruct", - name: "Qwen: Qwen3 VL 30B A3B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 262144, maxTokens: 4096, } satisfies Model<"openai-completions">, "qwen/qwen3-vl-30b-a3b-thinking": { @@ -6087,447 +3792,39 @@ export const MODELS = { contextWindow: 131072, maxTokens: 32768, } satisfies Model<"openai-completions">, - "qwen/qwen3-vl-8b-instruct": { - id: "qwen/qwen3-vl-8b-instruct", - name: "Qwen: Qwen3 VL 8B Instruct", + "qwen/qwen3-vl-30b-a3b-instruct": { + id: "qwen/qwen3-vl-30b-a3b-instruct", + name: "Qwen: Qwen3 VL 30B A3B Instruct", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: false, input: ["text", "image"], - cost: { - input: 0.064, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen/qwen3-vl-8b-thinking": { - id: "qwen/qwen3-vl-8b-thinking", - name: "Qwen: Qwen3 VL 8B Thinking", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.18, - output: 2.0999999999999996, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 32768, - } satisfies Model<"openai-completions">, - "qwen/qwq-32b": { - id: "qwen/qwq-32b", - name: "Qwen: QwQ 32B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], cost: { input: 0.15, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "relace/relace-search": { - id: "relace/relace-search", - name: "Relace: Relace Search", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 3, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 128000, - } satisfies Model<"openai-completions">, - "sao10k/l3-euryale-70b": { - id: "sao10k/l3-euryale-70b", - name: "Sao10k: Llama 3 Euryale 70B v2.1", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1.48, - output: 1.48, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "sao10k/l3.1-euryale-70b": { - id: "sao10k/l3.1-euryale-70b", - name: "Sao10K: Llama 3.1 Euryale 70B v2.2", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.65, - output: 0.75, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "stepfun-ai/step3": { - id: "stepfun-ai/step3", - name: "StepFun: Step3", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.5700000000000001, - output: 1.42, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 65536, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "thedrummer/rocinante-12b": { - id: "thedrummer/rocinante-12b", - name: "TheDrummer: Rocinante 12B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.16999999999999998, - output: 0.43, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "thedrummer/unslopnemo-12b": { - id: "thedrummer/unslopnemo-12b", - name: "TheDrummer: UnslopNemo 12B", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.39999999999999997, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "tngtech/deepseek-r1t2-chimera": { - id: "tngtech/deepseek-r1t2-chimera", - name: "TNG: DeepSeek R1T2 Chimera", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.3, - output: 1.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 163840, - } satisfies Model<"openai-completions">, - "tngtech/tng-r1t-chimera": { - id: "tngtech/tng-r1t-chimera", - name: "TNG: R1T Chimera", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.3, - output: 1.2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "tngtech/tng-r1t-chimera:free": { - id: "tngtech/tng-r1t-chimera:free", - name: "TNG: R1T Chimera (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 163840, - maxTokens: 163840, - } satisfies Model<"openai-completions">, - "x-ai/grok-3": { - id: "x-ai/grok-3", - name: "xAI: Grok 3", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 3, - output: 15, - cacheRead: 0.75, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "x-ai/grok-3-beta": { - id: "x-ai/grok-3-beta", - name: "xAI: Grok 3 Beta", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 3, - output: 15, - cacheRead: 0.75, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "x-ai/grok-3-mini": { - id: "x-ai/grok-3-mini", - name: "xAI: Grok 3 Mini", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.3, - output: 0.5, - cacheRead: 0.075, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "x-ai/grok-3-mini-beta": { - id: "x-ai/grok-3-mini-beta", - name: "xAI: Grok 3 Mini Beta", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.3, - output: 0.5, - cacheRead: 0.075, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "x-ai/grok-4": { - id: "x-ai/grok-4", - name: "xAI: Grok 4", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 3, - output: 15, - cacheRead: 0.75, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "x-ai/grok-4-fast": { - id: "x-ai/grok-4-fast", - name: "xAI: Grok 4 Fast", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.19999999999999998, - output: 0.5, - cacheRead: 0.049999999999999996, - cacheWrite: 0, - }, - contextWindow: 2000000, - maxTokens: 30000, - } satisfies Model<"openai-completions">, - "x-ai/grok-4.1-fast": { - id: "x-ai/grok-4.1-fast", - name: "xAI: Grok 4.1 Fast", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.19999999999999998, - output: 0.5, - cacheRead: 0.049999999999999996, - cacheWrite: 0, - }, - contextWindow: 2000000, - maxTokens: 30000, - } satisfies Model<"openai-completions">, - "x-ai/grok-code-fast-1": { - id: "x-ai/grok-code-fast-1", - name: "xAI: Grok Code Fast 1", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.19999999999999998, - output: 1.5, - cacheRead: 0.02, - cacheWrite: 0, - }, - contextWindow: 256000, - maxTokens: 10000, - } satisfies Model<"openai-completions">, - "xiaomi/mimo-v2-flash:free": { - id: "xiaomi/mimo-v2-flash:free", - name: "Xiaomi: MiMo-V2-Flash (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, + output: 0.6, cacheRead: 0, cacheWrite: 0, }, contextWindow: 262144, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "z-ai/glm-4-32b": { - id: "z-ai/glm-4-32b", - name: "Z.AI: GLM 4 32B ", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.09999999999999999, - output: 0.09999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "z-ai/glm-4.5": { - id: "z-ai/glm-4.5", - name: "Z.AI: GLM 4.5", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.35, - output: 1.55, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 65536, - } satisfies Model<"openai-completions">, - "z-ai/glm-4.5-air": { - id: "z-ai/glm-4.5-air", - name: "Z.AI: GLM 4.5 Air", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0.10400000000000001, - output: 0.6799999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 98304, - } satisfies Model<"openai-completions">, - "z-ai/glm-4.5-air:free": { - id: "z-ai/glm-4.5-air:free", - name: "Z.AI: GLM 4.5 Air (free)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: true, - input: ["text"], - cost: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 131072, - } satisfies Model<"openai-completions">, - "z-ai/glm-4.5v": { - id: "z-ai/glm-4.5v", - name: "Z.AI: GLM 4.5V", + "openai/gpt-5-pro": { + id: "openai/gpt-5-pro", + name: "OpenAI: GPT-5 Pro", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text", "image"], cost: { - input: 0.48, - output: 1.44, - cacheRead: 0.088, + input: 15, + output: 120, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 65536, - maxTokens: 16384, + contextWindow: 400000, + maxTokens: 128000, } satisfies Model<"openai-completions">, "z-ai/glm-4.6": { id: "z-ai/glm-4.6", @@ -6563,461 +3860,216 @@ export const MODELS = { contextWindow: 204800, maxTokens: 131072, } satisfies Model<"openai-completions">, - "z-ai/glm-4.6v": { - id: "z-ai/glm-4.6v", - name: "Z.AI: GLM 4.6V", + "anthropic/claude-sonnet-4.5": { + id: "anthropic/claude-sonnet-4.5", + name: "Anthropic: Claude Sonnet 4.5", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text", "image"], cost: { - input: 0.3, - output: 0.8999999999999999, - cacheRead: 0.049999999999999996, - cacheWrite: 0, + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, }, - contextWindow: 131072, - maxTokens: 24000, + contextWindow: 1000000, + maxTokens: 64000, } satisfies Model<"openai-completions">, - "z-ai/glm-4.7": { - id: "z-ai/glm-4.7", - name: "Z.AI: GLM 4.7", + "deepseek/deepseek-v3.2-exp": { + id: "deepseek/deepseek-v3.2-exp", + name: "DeepSeek: DeepSeek V3.2 Exp", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text"], cost: { - input: 0.39999999999999997, - output: 1.5, + input: 0.21, + output: 0.32, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 202752, - maxTokens: 65535, + contextWindow: 163840, + maxTokens: 65536, } satisfies Model<"openai-completions">, - }, - "xai": { - "grok-2": { - id: "grok-2", - name: "Grok 2", + "google/gemini-2.5-flash-preview-09-2025": { + id: "google/gemini-2.5-flash-preview-09-2025", + name: "Google: Gemini 2.5 Flash Preview 09-2025", api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2, - output: 10, - cacheRead: 2, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "grok-2-1212": { - id: "grok-2-1212", - name: "Grok 2 (1212)", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2, - output: 10, - cacheRead: 2, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "grok-2-latest": { - id: "grok-2-latest", - name: "Grok 2 Latest", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2, - output: 10, - cacheRead: 2, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "grok-2-vision": { - id: "grok-2-vision", - name: "Grok 2 Vision", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2, - output: 10, - cacheRead: 2, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "grok-2-vision-1212": { - id: "grok-2-vision-1212", - name: "Grok 2 Vision (1212)", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2, - output: 10, - cacheRead: 2, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "grok-2-vision-latest": { - id: "grok-2-vision-latest", - name: "Grok 2 Vision Latest", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 2, - output: 10, - cacheRead: 2, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 4096, - } satisfies Model<"openai-completions">, - "grok-3": { - id: "grok-3", - name: "Grok 3", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 3, - output: 15, - cacheRead: 0.75, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "grok-3-fast": { - id: "grok-3-fast", - name: "Grok 3 Fast", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 5, - output: 25, - cacheRead: 1.25, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "grok-3-fast-latest": { - id: "grok-3-fast-latest", - name: "Grok 3 Fast Latest", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 5, - output: 25, - cacheRead: 1.25, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "grok-3-latest": { - id: "grok-3-latest", - name: "Grok 3 Latest", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text"], - cost: { - input: 3, - output: 15, - cacheRead: 0.75, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "grok-3-mini": { - id: "grok-3-mini", - name: "Grok 3 Mini", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 0.3, - output: 0.5, + output: 2.5, cacheRead: 0.075, - cacheWrite: 0, + cacheWrite: 0.3833, }, - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 1048576, + maxTokens: 65536, } satisfies Model<"openai-completions">, - "grok-3-mini-fast": { - id: "grok-3-mini-fast", - name: "Grok 3 Mini Fast", + "google/gemini-2.5-flash-lite-preview-09-2025": { + id: "google/gemini-2.5-flash-lite-preview-09-2025", + name: "Google: Gemini 2.5 Flash Lite Preview 09-2025", api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { - input: 0.6, - output: 4, - cacheRead: 0.15, + input: 0.09999999999999999, + output: 0.39999999999999997, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 1048576, + maxTokens: 65536, } satisfies Model<"openai-completions">, - "grok-3-mini-fast-latest": { - id: "grok-3-mini-fast-latest", - name: "Grok 3 Mini Fast Latest", + "qwen/qwen3-vl-235b-a22b-thinking": { + id: "qwen/qwen3-vl-235b-a22b-thinking", + name: "Qwen: Qwen3 VL 235B A22B Thinking", api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", reasoning: true, - input: ["text"], - cost: { - input: 0.6, - output: 4, - cacheRead: 0.15, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 8192, - } satisfies Model<"openai-completions">, - "grok-3-mini-latest": { - id: "grok-3-mini-latest", - name: "Grok 3 Mini Latest", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { input: 0.3, - output: 0.5, - cacheRead: 0.075, + output: 1.2, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 262144, + maxTokens: 262144, } satisfies Model<"openai-completions">, - "grok-4": { - id: "grok-4", - name: "Grok 4", + "qwen/qwen3-vl-235b-a22b-instruct": { + id: "qwen/qwen3-vl-235b-a22b-instruct", + name: "Qwen: Qwen3 VL 235B A22B Instruct", api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: true, + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.19999999999999998, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "qwen/qwen3-max": { + id: "qwen/qwen3-max", + name: "Qwen: Qwen3 Max", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, input: ["text"], cost: { - input: 3, - output: 15, - cacheRead: 0.75, + input: 1.2, + output: 6, + cacheRead: 0.24, cacheWrite: 0, }, contextWindow: 256000, - maxTokens: 64000, + maxTokens: 32768, } satisfies Model<"openai-completions">, - "grok-4-1-fast": { - id: "grok-4-1-fast", - name: "Grok 4.1 Fast", + "qwen/qwen3-coder-plus": { + id: "qwen/qwen3-coder-plus", + name: "Qwen: Qwen3 Coder Plus", api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.2, - output: 0.5, - cacheRead: 0.05, - cacheWrite: 0, - }, - contextWindow: 2000000, - maxTokens: 30000, - } satisfies Model<"openai-completions">, - "grok-4-1-fast-non-reasoning": { - id: "grok-4-1-fast-non-reasoning", - name: "Grok 4.1 Fast (Non-Reasoning)", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.2, - output: 0.5, - cacheRead: 0.05, - cacheWrite: 0, - }, - contextWindow: 2000000, - maxTokens: 30000, - } satisfies Model<"openai-completions">, - "grok-4-fast": { - id: "grok-4-fast", - name: "Grok 4 Fast", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.2, - output: 0.5, - cacheRead: 0.05, - cacheWrite: 0, - }, - contextWindow: 2000000, - maxTokens: 30000, - } satisfies Model<"openai-completions">, - "grok-4-fast-non-reasoning": { - id: "grok-4-fast-non-reasoning", - name: "Grok 4 Fast (Non-Reasoning)", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 0.2, - output: 0.5, - cacheRead: 0.05, - cacheWrite: 0, - }, - contextWindow: 2000000, - maxTokens: 30000, - } satisfies Model<"openai-completions">, - "grok-beta": { - id: "grok-beta", - name: "Grok Beta", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", reasoning: false, input: ["text"], cost: { - input: 5, - output: 15, - cacheRead: 5, + input: 1, + output: 5, + cacheRead: 0.09999999999999999, cacheWrite: 0, }, - contextWindow: 131072, + contextWindow: 128000, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "openai/gpt-5-codex": { + id: "openai/gpt-5-codex", + name: "OpenAI: GPT-5 Codex", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "deepseek/deepseek-v3.1-terminus:exacto": { + id: "deepseek/deepseek-v3.1-terminus:exacto", + name: "DeepSeek: DeepSeek V3.1 Terminus (exacto)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.21, + output: 0.7899999999999999, + cacheRead: 0.16799999999999998, + cacheWrite: 0, + }, + contextWindow: 163840, maxTokens: 4096, } satisfies Model<"openai-completions">, - "grok-code-fast-1": { - id: "grok-code-fast-1", - name: "Grok Code Fast 1", + "deepseek/deepseek-v3.1-terminus": { + id: "deepseek/deepseek-v3.1-terminus", + name: "DeepSeek: DeepSeek V3.1 Terminus", api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text"], cost: { - input: 0.2, - output: 1.5, - cacheRead: 0.02, + input: 0.21, + output: 0.7899999999999999, + cacheRead: 0.16799999999999998, cacheWrite: 0, }, - contextWindow: 256000, - maxTokens: 10000, - } satisfies Model<"openai-completions">, - "grok-vision-beta": { - id: "grok-vision-beta", - name: "Grok Vision Beta", - api: "openai-completions", - provider: "xai", - baseUrl: "https://api.x.ai/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 5, - output: 15, - cacheRead: 5, - cacheWrite: 0, - }, - contextWindow: 8192, + contextWindow: 163840, maxTokens: 4096, } satisfies Model<"openai-completions">, - }, - "zai": { - "glm-4.5": { - id: "glm-4.5", - name: "GLM-4.5", + "x-ai/grok-4-fast": { + id: "x-ai/grok-4-fast", + name: "xAI: Grok 4 Fast", api: "openai-completions", - provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", reasoning: true, - input: ["text"], + input: ["text", "image"], cost: { - input: 0.6, - output: 2.2, - cacheRead: 0.11, + input: 0.19999999999999998, + output: 0.5, + cacheRead: 0.049999999999999996, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 98304, + contextWindow: 2000000, + maxTokens: 30000, } satisfies Model<"openai-completions">, - "glm-4.5-air": { - id: "glm-4.5-air", - name: "GLM-4.5-Air", + "alibaba/tongyi-deepresearch-30b-a3b:free": { + id: "alibaba/tongyi-deepresearch-30b-a3b:free", + name: "Tongyi DeepResearch 30B A3B (free)", api: "openai-completions", - provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, - reasoning: true, - input: ["text"], - cost: { - input: 0.2, - output: 1.1, - cacheRead: 0.03, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 98304, - } satisfies Model<"openai-completions">, - "glm-4.5-flash": { - id: "glm-4.5-flash", - name: "GLM-4.5-Flash", - api: "openai-completions", - provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text"], cost: { @@ -7027,79 +4079,3020 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 98304, + maxTokens: 131072, } satisfies Model<"openai-completions">, - "glm-4.5v": { - id: "glm-4.5v", - name: "GLM-4.5V", + "alibaba/tongyi-deepresearch-30b-a3b": { + id: "alibaba/tongyi-deepresearch-30b-a3b", + name: "Tongyi DeepResearch 30B A3B", api: "openai-completions", - provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, - reasoning: true, - input: ["text", "image"], - cost: { - input: 0.6, - output: 1.8, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 64000, - maxTokens: 16384, - } satisfies Model<"openai-completions">, - "glm-4.6": { - id: "glm-4.6", - name: "GLM-4.6", - api: "openai-completions", - provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text"], cost: { - input: 0.6, - output: 2.2, - cacheRead: 0.11, + input: 0.09, + output: 0.39999999999999997, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 204800, + contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, - "glm-4.6v": { - id: "glm-4.6v", - name: "GLM-4.6V", + "qwen/qwen3-coder-flash": { + id: "qwen/qwen3-coder-flash", + name: "Qwen: Qwen3 Coder Flash", api: "openai-completions", - provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, - reasoning: true, - input: ["text", "image"], + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], cost: { input: 0.3, - output: 0.9, + output: 1.5, + cacheRead: 0.08, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "qwen/qwen3-next-80b-a3b-thinking": { + id: "qwen/qwen3-next-80b-a3b-thinking", + name: "Qwen: Qwen3 Next 80B A3B Thinking", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.12, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "qwen/qwen3-next-80b-a3b-instruct": { + id: "qwen/qwen3-next-80b-a3b-instruct", + name: "Qwen: Qwen3 Next 80B A3B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.09, + output: 1.1, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "qwen/qwen-plus-2025-07-28": { + id: "qwen/qwen-plus-2025-07-28", + name: "Qwen: Qwen Plus 0728", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "qwen/qwen-plus-2025-07-28:thinking": { + id: "qwen/qwen-plus-2025-07-28:thinking", + name: "Qwen: Qwen Plus 0728 (thinking)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "nvidia/nemotron-nano-9b-v2:free": { + id: "nvidia/nemotron-nano-9b-v2:free", + name: "NVIDIA: Nemotron Nano 9B V2 (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, cacheRead: 0, cacheWrite: 0, }, contextWindow: 128000, - maxTokens: 32768, + maxTokens: 4096, } satisfies Model<"openai-completions">, - "glm-4.7": { - id: "glm-4.7", - name: "GLM-4.7", + "nvidia/nemotron-nano-9b-v2": { + id: "nvidia/nemotron-nano-9b-v2", + name: "NVIDIA: Nemotron Nano 9B V2", api: "openai-completions", - provider: "zai", - baseUrl: "https://api.z.ai/api/coding/paas/v4", - compat: {"supportsDeveloperRole":false}, + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", reasoning: true, input: ["text"], cost: { - input: 0.6, - output: 2.2, - cacheRead: 0.11, + input: 0.04, + output: 0.16, + cacheRead: 0, cacheWrite: 0, }, - contextWindow: 204800, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "moonshotai/kimi-k2-0905": { + id: "moonshotai/kimi-k2-0905", + name: "MoonshotAI: Kimi K2 0905", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39, + output: 1.9, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "moonshotai/kimi-k2-0905:exacto": { + id: "moonshotai/kimi-k2-0905:exacto", + name: "MoonshotAI: Kimi K2 0905 (exacto)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.6, + output: 2.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "deepcogito/cogito-v2-preview-llama-70b": { + id: "deepcogito/cogito-v2-preview-llama-70b", + name: "Deep Cogito: Cogito V2 Preview Llama 70B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.88, + output: 0.88, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "deepcogito/cogito-v2-preview-llama-109b-moe": { + id: "deepcogito/cogito-v2-preview-llama-109b-moe", + name: "Cogito V2 Preview Llama 109B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.18, + output: 0.59, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32767, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "stepfun-ai/step3": { + id: "stepfun-ai/step3", + name: "StepFun: Step3", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.5700000000000001, + output: 1.42, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 65536, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "qwen/qwen3-30b-a3b-thinking-2507": { + id: "qwen/qwen3-30b-a3b-thinking-2507", + name: "Qwen: Qwen3 30B A3B Thinking 2507", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.051, + output: 0.33999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "x-ai/grok-code-fast-1": { + id: "x-ai/grok-code-fast-1", + name: "xAI: Grok Code Fast 1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.19999999999999998, + output: 1.5, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 10000, + } satisfies Model<"openai-completions">, + "nousresearch/hermes-4-70b": { + id: "nousresearch/hermes-4-70b", + name: "Nous: Hermes 4 70B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.11, + output: 0.38, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, maxTokens: 131072, } satisfies Model<"openai-completions">, + "nousresearch/hermes-4-405b": { + id: "nousresearch/hermes-4-405b", + name: "Nous: Hermes 4 405B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "deepseek/deepseek-chat-v3.1": { + id: "deepseek/deepseek-chat-v3.1", + name: "DeepSeek: DeepSeek V3.1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.15, + output: 0.75, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 7168, + } satisfies Model<"openai-completions">, + "openai/gpt-4o-audio-preview": { + id: "openai/gpt-4o-audio-preview", + name: "OpenAI: GPT-4o Audio", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2.5, + output: 10, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "mistralai/mistral-medium-3.1": { + id: "mistralai/mistral-medium-3.1", + name: "Mistral: Mistral Medium 3.1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.39999999999999997, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "baidu/ernie-4.5-21b-a3b": { + id: "baidu/ernie-4.5-21b-a3b", + name: "Baidu: ERNIE 4.5 21B A3B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.056, + output: 0.224, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 120000, + maxTokens: 8000, + } satisfies Model<"openai-completions">, + "baidu/ernie-4.5-vl-28b-a3b": { + id: "baidu/ernie-4.5-vl-28b-a3b", + name: "Baidu: ERNIE 4.5 VL 28B A3B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.112, + output: 0.448, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 30000, + maxTokens: 8000, + } satisfies Model<"openai-completions">, + "z-ai/glm-4.5v": { + id: "z-ai/glm-4.5v", + name: "Z.AI: GLM 4.5V", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.48, + output: 1.44, + cacheRead: 0.088, + cacheWrite: 0, + }, + contextWindow: 65536, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "ai21/jamba-mini-1.7": { + id: "ai21/jamba-mini-1.7", + name: "AI21: Jamba Mini 1.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.19999999999999998, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "ai21/jamba-large-1.7": { + id: "ai21/jamba-large-1.7", + name: "AI21: Jamba Large 1.7", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 8, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-5": { + id: "openai/gpt-5", + name: "OpenAI: GPT-5", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "openai/gpt-5-mini": { + id: "openai/gpt-5-mini", + name: "OpenAI: GPT-5 Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.25, + output: 2, + cacheRead: 0.024999999999999998, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "openai/gpt-5-nano": { + id: "openai/gpt-5-nano", + name: "OpenAI: GPT-5 Nano", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.049999999999999996, + output: 0.39999999999999997, + cacheRead: 0.005, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "openai/gpt-oss-120b:free": { + id: "openai/gpt-oss-120b:free", + name: "OpenAI: gpt-oss-120b (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-oss-120b": { + id: "openai/gpt-oss-120b", + name: "OpenAI: gpt-oss-120b", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.039, + output: 0.19, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-oss-120b:exacto": { + id: "openai/gpt-oss-120b:exacto", + name: "OpenAI: gpt-oss-120b (exacto)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.039, + output: 0.19, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-oss-20b:free": { + id: "openai/gpt-oss-20b:free", + name: "OpenAI: gpt-oss-20b (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 128000, + } satisfies Model<"openai-completions">, + "openai/gpt-oss-20b": { + id: "openai/gpt-oss-20b", + name: "OpenAI: gpt-oss-20b", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.03, + output: 0.14, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "anthropic/claude-opus-4.1": { + id: "anthropic/claude-opus-4.1", + name: "Anthropic: Claude Opus 4.1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/codestral-2508": { + id: "mistralai/codestral-2508", + name: "Mistral: Codestral 2508", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.3, + output: 0.8999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "qwen/qwen3-coder-30b-a3b-instruct": { + id: "qwen/qwen3-coder-30b-a3b-instruct", + name: "Qwen: Qwen3 Coder 30B A3B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.07, + output: 0.27, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 160000, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "qwen/qwen3-30b-a3b-instruct-2507": { + id: "qwen/qwen3-30b-a3b-instruct-2507", + name: "Qwen: Qwen3 30B A3B Instruct 2507", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.08, + output: 0.33, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "z-ai/glm-4.5": { + id: "z-ai/glm-4.5", + name: "Z.AI: GLM 4.5", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.35, + output: 1.55, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "z-ai/glm-4.5-air:free": { + id: "z-ai/glm-4.5-air:free", + name: "Z.AI: GLM 4.5 Air (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "z-ai/glm-4.5-air": { + id: "z-ai/glm-4.5-air", + name: "Z.AI: GLM 4.5 Air", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.10400000000000001, + output: 0.6799999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 98304, + } satisfies Model<"openai-completions">, + "qwen/qwen3-235b-a22b-thinking-2507": { + id: "qwen/qwen3-235b-a22b-thinking-2507", + name: "Qwen: Qwen3 235B A22B Thinking 2507", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.11, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "z-ai/glm-4-32b": { + id: "z-ai/glm-4-32b", + name: "Z.AI: GLM 4 32B ", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.09999999999999999, + output: 0.09999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "qwen/qwen3-coder:free": { + id: "qwen/qwen3-coder:free", + name: "Qwen: Qwen3 Coder 480B A35B (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262000, + maxTokens: 262000, + } satisfies Model<"openai-completions">, + "qwen/qwen3-coder": { + id: "qwen/qwen3-coder", + name: "Qwen: Qwen3 Coder 480B A35B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.22, + output: 0.95, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 262144, + } satisfies Model<"openai-completions">, + "qwen/qwen3-coder:exacto": { + id: "qwen/qwen3-coder:exacto", + name: "Qwen: Qwen3 Coder 480B A35B (exacto)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.22, + output: 1.7999999999999998, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "google/gemini-2.5-flash-lite": { + id: "google/gemini-2.5-flash-lite", + name: "Google: Gemini 2.5 Flash Lite", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.09999999999999999, + output: 0.39999999999999997, + cacheRead: 0.01, + cacheWrite: 0.18330000000000002, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"openai-completions">, + "qwen/qwen3-235b-a22b-2507": { + id: "qwen/qwen3-235b-a22b-2507", + name: "Qwen: Qwen3 235B A22B Instruct 2507", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.071, + output: 0.463, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 262144, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "moonshotai/kimi-k2": { + id: "moonshotai/kimi-k2", + name: "MoonshotAI: Kimi K2 0711", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.456, + output: 1.8399999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "mistralai/devstral-medium": { + id: "mistralai/devstral-medium", + name: "Mistral: Devstral Medium", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/devstral-small": { + id: "mistralai/devstral-small", + name: "Mistral: Devstral Small 1.1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.07, + output: 0.28, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "x-ai/grok-4": { + id: "x-ai/grok-4", + name: "xAI: Grok 4", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.75, + cacheWrite: 0, + }, + contextWindow: 256000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "tngtech/deepseek-r1t2-chimera": { + id: "tngtech/deepseek-r1t2-chimera", + name: "TNG: DeepSeek R1T2 Chimera", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 163840, + maxTokens: 163840, + } satisfies Model<"openai-completions">, + "inception/mercury": { + id: "inception/mercury", + name: "Inception: Mercury", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.25, + output: 1, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "mistralai/mistral-small-3.2-24b-instruct": { + id: "mistralai/mistral-small-3.2-24b-instruct", + name: "Mistral: Mistral Small 3.2 24B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.06, + output: 0.18, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "minimax/minimax-m1": { + id: "minimax/minimax-m1", + name: "MiniMax: MiniMax M1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 2.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 40000, + } satisfies Model<"openai-completions">, + "google/gemini-2.5-flash": { + id: "google/gemini-2.5-flash", + name: "Google: Gemini 2.5 Flash", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0.3, + output: 2.5, + cacheRead: 0.03, + cacheWrite: 0.3833, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"openai-completions">, + "google/gemini-2.5-pro": { + id: "google/gemini-2.5-pro", + name: "Google: Gemini 2.5 Pro", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.125, + cacheWrite: 1.625, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "openai/o3-pro": { + id: "openai/o3-pro", + name: "OpenAI: o3 Pro", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 20, + output: 80, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-completions">, + "x-ai/grok-3-mini": { + id: "x-ai/grok-3-mini", + name: "xAI: Grok 3 Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 0.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "x-ai/grok-3": { + id: "x-ai/grok-3", + name: "xAI: Grok 3", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 3, + output: 15, + cacheRead: 0.75, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "google/gemini-2.5-pro-preview": { + id: "google/gemini-2.5-pro-preview", + name: "Google: Gemini 2.5 Pro Preview 06-05", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.31, + cacheWrite: 1.625, + }, + contextWindow: 1048576, + maxTokens: 65536, + } satisfies Model<"openai-completions">, + "deepseek/deepseek-r1-0528": { + id: "deepseek/deepseek-r1-0528", + name: "DeepSeek: R1 0528", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 1.75, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 163840, + maxTokens: 163840, + } satisfies Model<"openai-completions">, + "anthropic/claude-opus-4": { + id: "anthropic/claude-opus-4", + name: "Anthropic: Claude Opus 4", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 32000, + } satisfies Model<"openai-completions">, + "anthropic/claude-sonnet-4": { + id: "anthropic/claude-sonnet-4", + name: "Anthropic: Claude Sonnet 4", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 1000000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "openai/codex-mini": { + id: "openai/codex-mini", + name: "OpenAI: Codex Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.5, + output: 6, + cacheRead: 0.375, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-completions">, + "nousresearch/deephermes-3-mistral-24b-preview": { + id: "nousresearch/deephermes-3-mistral-24b-preview", + name: "Nous: DeepHermes 3 Mistral 24B Preview", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.02, + output: 0.09999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "mistralai/mistral-medium-3": { + id: "mistralai/mistral-medium-3", + name: "Mistral: Mistral Medium 3", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.39999999999999997, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "google/gemini-2.5-pro-preview-05-06": { + id: "google/gemini-2.5-pro-preview-05-06", + name: "Google: Gemini 2.5 Pro Preview 05-06", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.25, + output: 10, + cacheRead: 0.31, + cacheWrite: 1.625, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"openai-completions">, + "arcee-ai/virtuoso-large": { + id: "arcee-ai/virtuoso-large", + name: "Arcee AI: Virtuoso Large", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.75, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "inception/mercury-coder": { + id: "inception/mercury-coder", + name: "Inception: Mercury Coder", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.25, + output: 1, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "qwen/qwen3-4b:free": { + id: "qwen/qwen3-4b:free", + name: "Qwen: Qwen3 4B (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 40960, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "qwen/qwen3-30b-a3b": { + id: "qwen/qwen3-30b-a3b", + name: "Qwen: Qwen3 30B A3B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.06, + output: 0.22, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 40960, + maxTokens: 40960, + } satisfies Model<"openai-completions">, + "qwen/qwen3-8b": { + id: "qwen/qwen3-8b", + name: "Qwen: Qwen3 8B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.028, + output: 0.1104, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 20000, + } satisfies Model<"openai-completions">, + "qwen/qwen3-14b": { + id: "qwen/qwen3-14b", + name: "Qwen: Qwen3 14B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.049999999999999996, + output: 0.22, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 40960, + maxTokens: 40960, + } satisfies Model<"openai-completions">, + "qwen/qwen3-32b": { + id: "qwen/qwen3-32b", + name: "Qwen: Qwen3 32B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.08, + output: 0.24, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 40960, + maxTokens: 40960, + } satisfies Model<"openai-completions">, + "qwen/qwen3-235b-a22b": { + id: "qwen/qwen3-235b-a22b", + name: "Qwen: Qwen3 235B A22B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.18, + output: 0.54, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 40960, + maxTokens: 40960, + } satisfies Model<"openai-completions">, + "openai/o4-mini-high": { + id: "openai/o4-mini-high", + name: "OpenAI: o4 Mini High", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.1, + output: 4.4, + cacheRead: 0.275, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-completions">, + "openai/o3": { + id: "openai/o3", + name: "OpenAI: o3", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 2, + output: 8, + cacheRead: 0.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-completions">, + "openai/o4-mini": { + id: "openai/o4-mini", + name: "OpenAI: o4 Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 1.1, + output: 4.4, + cacheRead: 0.275, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-completions">, + "openai/gpt-4.1": { + id: "openai/gpt-4.1", + name: "OpenAI: GPT-4.1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 8, + cacheRead: 0.5, + cacheWrite: 0, + }, + contextWindow: 1047576, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "openai/gpt-4.1-mini": { + id: "openai/gpt-4.1-mini", + name: "OpenAI: GPT-4.1 Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.39999999999999997, + output: 1.5999999999999999, + cacheRead: 0.09999999999999999, + cacheWrite: 0, + }, + contextWindow: 1047576, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "openai/gpt-4.1-nano": { + id: "openai/gpt-4.1-nano", + name: "OpenAI: GPT-4.1 Nano", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.09999999999999999, + output: 0.39999999999999997, + cacheRead: 0.024999999999999998, + cacheWrite: 0, + }, + contextWindow: 1047576, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "x-ai/grok-3-mini-beta": { + id: "x-ai/grok-3-mini-beta", + name: "xAI: Grok 3 Mini Beta", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 0.5, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "x-ai/grok-3-beta": { + id: "x-ai/grok-3-beta", + name: "xAI: Grok 3 Beta", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 3, + output: 15, + cacheRead: 0.75, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "meta-llama/llama-4-maverick": { + id: "meta-llama/llama-4-maverick", + name: "Meta: Llama 4 Maverick", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "meta-llama/llama-4-scout": { + id: "meta-llama/llama-4-scout", + name: "Meta: Llama 4 Scout", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.08, + output: 0.3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 327680, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "deepseek/deepseek-chat-v3-0324": { + id: "deepseek/deepseek-chat-v3-0324", + name: "DeepSeek: DeepSeek V3 0324", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.19999999999999998, + output: 0.88, + cacheRead: 0.106, + cacheWrite: 0, + }, + contextWindow: 163840, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/mistral-small-3.1-24b-instruct:free": { + id: "mistralai/mistral-small-3.1-24b-instruct:free", + name: "Mistral: Mistral Small 3.1 24B (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/mistral-small-3.1-24b-instruct": { + id: "mistralai/mistral-small-3.1-24b-instruct", + name: "Mistral: Mistral Small 3.1 24B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.03, + output: 0.11, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "google/gemma-3-27b-it:free": { + id: "google/gemma-3-27b-it:free", + name: "Google: Gemma 3 27B (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "google/gemma-3-27b-it": { + id: "google/gemma-3-27b-it", + name: "Google: Gemma 3 27B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.04, + output: 0.15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 96000, + maxTokens: 96000, + } satisfies Model<"openai-completions">, + "qwen/qwq-32b": { + id: "qwen/qwq-32b", + name: "Qwen: QwQ 32B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.15, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "google/gemini-2.0-flash-lite-001": { + id: "google/gemini-2.0-flash-lite-001", + name: "Google: Gemini 2.0 Flash Lite", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.075, + output: 0.3, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "anthropic/claude-3.7-sonnet:thinking": { + id: "anthropic/claude-3.7-sonnet:thinking", + name: "Anthropic: Claude 3.7 Sonnet (thinking)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "anthropic/claude-3.7-sonnet": { + id: "anthropic/claude-3.7-sonnet", + name: "Anthropic: Claude 3.7 Sonnet", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 3, + output: 15, + cacheRead: 0.3, + cacheWrite: 3.75, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "mistralai/mistral-saba": { + id: "mistralai/mistral-saba", + name: "Mistral: Saba", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.19999999999999998, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/o3-mini-high": { + id: "openai/o3-mini-high", + name: "OpenAI: o3 Mini High", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1.1, + output: 4.4, + cacheRead: 0.55, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-completions">, + "google/gemini-2.0-flash-001": { + id: "google/gemini-2.0-flash-001", + name: "Google: Gemini 2.0 Flash", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.09999999999999999, + output: 0.39999999999999997, + cacheRead: 0.024999999999999998, + cacheWrite: 0.18330000000000002, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "qwen/qwen-vl-max": { + id: "qwen/qwen-vl-max", + name: "Qwen: Qwen VL Max", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.7999999999999999, + output: 3.1999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "qwen/qwen-turbo": { + id: "qwen/qwen-turbo", + name: "Qwen: Qwen-Turbo", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.049999999999999996, + output: 0.19999999999999998, + cacheRead: 0.02, + cacheWrite: 0, + }, + contextWindow: 1000000, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "qwen/qwen-plus": { + id: "qwen/qwen-plus", + name: "Qwen: Qwen-Plus", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 1.2, + cacheRead: 0.16, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "qwen/qwen-max": { + id: "qwen/qwen-max", + name: "Qwen: Qwen-Max ", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1.5999999999999999, + output: 6.3999999999999995, + cacheRead: 0.64, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "openai/o3-mini": { + id: "openai/o3-mini", + name: "OpenAI: o3 Mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1.1, + output: 4.4, + cacheRead: 0.55, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-completions">, + "mistralai/mistral-small-24b-instruct-2501": { + id: "mistralai/mistral-small-24b-instruct-2501", + name: "Mistral: Mistral Small 3", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.03, + output: 0.11, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 32768, + } satisfies Model<"openai-completions">, + "deepseek/deepseek-r1-distill-llama-70b": { + id: "deepseek/deepseek-r1-distill-llama-70b", + name: "DeepSeek: R1 Distill Llama 70B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.03, + output: 0.11, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 131072, + } satisfies Model<"openai-completions">, + "deepseek/deepseek-r1": { + id: "deepseek/deepseek-r1", + name: "DeepSeek: R1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 163840, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "deepseek/deepseek-chat": { + id: "deepseek/deepseek-chat", + name: "DeepSeek: DeepSeek V3", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.3, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 163840, + maxTokens: 163840, + } satisfies Model<"openai-completions">, + "openai/o1": { + id: "openai/o1", + name: "OpenAI: o1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 15, + output: 60, + cacheRead: 7.5, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 100000, + } satisfies Model<"openai-completions">, + "google/gemini-2.0-flash-exp:free": { + id: "google/gemini-2.0-flash-exp:free", + name: "Google: Gemini 2.0 Flash Experimental (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "meta-llama/llama-3.3-70b-instruct:free": { + id: "meta-llama/llama-3.3-70b-instruct:free", + name: "Meta: Llama 3.3 70B Instruct (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "meta-llama/llama-3.3-70b-instruct": { + id: "meta-llama/llama-3.3-70b-instruct", + name: "Meta: Llama 3.3 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.09999999999999999, + output: 0.32, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "amazon/nova-lite-v1": { + id: "amazon/nova-lite-v1", + name: "Amazon: Nova Lite 1.0", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.06, + output: 0.24, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 300000, + maxTokens: 5120, + } satisfies Model<"openai-completions">, + "amazon/nova-micro-v1": { + id: "amazon/nova-micro-v1", + name: "Amazon: Nova Micro 1.0", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.035, + output: 0.14, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 5120, + } satisfies Model<"openai-completions">, + "amazon/nova-pro-v1": { + id: "amazon/nova-pro-v1", + name: "Amazon: Nova Pro 1.0", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.7999999999999999, + output: 3.1999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 300000, + maxTokens: 5120, + } satisfies Model<"openai-completions">, + "openai/gpt-4o-2024-11-20": { + id: "openai/gpt-4o-2024-11-20", + name: "OpenAI: GPT-4o (2024-11-20)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2.5, + output: 10, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "mistralai/mistral-large-2411": { + id: "mistralai/mistral-large-2411", + name: "Mistral Large 2411", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/mistral-large-2407": { + id: "mistralai/mistral-large-2407", + name: "Mistral Large 2407", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/pixtral-large-2411": { + id: "mistralai/pixtral-large-2411", + name: "Mistral: Pixtral Large 2411", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "thedrummer/unslopnemo-12b": { + id: "thedrummer/unslopnemo-12b", + name: "TheDrummer: UnslopNemo 12B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "anthropic/claude-3.5-haiku-20241022": { + id: "anthropic/claude-3.5-haiku-20241022", + name: "Anthropic: Claude 3.5 Haiku (2024-10-22)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.7999999999999999, + output: 4, + cacheRead: 0.08, + cacheWrite: 1, + }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "anthropic/claude-3.5-haiku": { + id: "anthropic/claude-3.5-haiku", + name: "Anthropic: Claude 3.5 Haiku", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.7999999999999999, + output: 4, + cacheRead: 0.08, + cacheWrite: 1, + }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "anthropic/claude-3.5-sonnet": { + id: "anthropic/claude-3.5-sonnet", + name: "Anthropic: Claude 3.5 Sonnet", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 6, + output: 30, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "mistralai/ministral-8b": { + id: "mistralai/ministral-8b", + name: "Mistral: Ministral 8B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.09999999999999999, + output: 0.09999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/ministral-3b": { + id: "mistralai/ministral-3b", + name: "Mistral: Ministral 3B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.04, + output: 0.04, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "nvidia/llama-3.1-nemotron-70b-instruct": { + id: "nvidia/llama-3.1-nemotron-70b-instruct", + name: "NVIDIA: Llama 3.1 Nemotron 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1.2, + output: 1.2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "thedrummer/rocinante-12b": { + id: "thedrummer/rocinante-12b", + name: "TheDrummer: Rocinante 12B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.16999999999999998, + output: 0.43, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "meta-llama/llama-3.2-3b-instruct": { + id: "meta-llama/llama-3.2-3b-instruct", + name: "Meta: Llama 3.2 3B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.02, + output: 0.02, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "qwen/qwen-2.5-72b-instruct": { + id: "qwen/qwen-2.5-72b-instruct", + name: "Qwen2.5 72B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.12, + output: 0.39, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "mistralai/pixtral-12b": { + id: "mistralai/pixtral-12b", + name: "Mistral: Pixtral 12B", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.09999999999999999, + output: 0.09999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "cohere/command-r-08-2024": { + id: "cohere/command-r-08-2024", + name: "Cohere: Command R (08-2024)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4000, + } satisfies Model<"openai-completions">, + "cohere/command-r-plus-08-2024": { + id: "cohere/command-r-plus-08-2024", + name: "Cohere: Command R+ (08-2024)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2.5, + output: 10, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4000, + } satisfies Model<"openai-completions">, + "sao10k/l3.1-euryale-70b": { + id: "sao10k/l3.1-euryale-70b", + name: "Sao10K: Llama 3.1 Euryale 70B v2.2", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.65, + output: 0.75, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "microsoft/phi-3.5-mini-128k-instruct": { + id: "microsoft/phi-3.5-mini-128k-instruct", + name: "Microsoft: Phi-3.5 Mini 128K Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.09999999999999999, + output: 0.09999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-4o-2024-08-06": { + id: "openai/gpt-4o-2024-08-06", + name: "OpenAI: GPT-4o (2024-08-06)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2.5, + output: 10, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-8b-instruct": { + id: "meta-llama/llama-3.1-8b-instruct", + name: "Meta: Llama 3.1 8B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.02, + output: 0.03, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-405b-instruct": { + id: "meta-llama/llama-3.1-405b-instruct", + name: "Meta: Llama 3.1 405B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 3.5, + output: 3.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 10000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-70b-instruct": { + id: "meta-llama/llama-3.1-70b-instruct", + name: "Meta: Llama 3.1 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/mistral-nemo": { + id: "mistralai/mistral-nemo", + name: "Mistral: Mistral Nemo", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.02, + output: 0.04, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "openai/gpt-4o-mini-2024-07-18": { + id: "openai/gpt-4o-mini-2024-07-18", + name: "OpenAI: GPT-4o-mini (2024-07-18)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "openai/gpt-4o-mini": { + id: "openai/gpt-4o-mini", + name: "OpenAI: GPT-4o-mini", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0.075, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "sao10k/l3-euryale-70b": { + id: "sao10k/l3-euryale-70b", + name: "Sao10k: Llama 3 Euryale 70B v2.1", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1.48, + output: 1.48, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 8192, + } satisfies Model<"openai-completions">, + "mistralai/mistral-7b-instruct:free": { + id: "mistralai/mistral-7b-instruct:free", + name: "Mistral: Mistral 7B Instruct (free)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "mistralai/mistral-7b-instruct": { + id: "mistralai/mistral-7b-instruct", + name: "Mistral: Mistral 7B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.028, + output: 0.054, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "microsoft/phi-3-mini-128k-instruct": { + id: "microsoft/phi-3-mini-128k-instruct", + name: "Microsoft: Phi-3 Mini 128K Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.09999999999999999, + output: 0.09999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "microsoft/phi-3-medium-128k-instruct": { + id: "microsoft/phi-3-medium-128k-instruct", + name: "Microsoft: Phi-3 Medium 128K Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 1, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-4o-2024-05-13": { + id: "openai/gpt-4o-2024-05-13", + name: "OpenAI: GPT-4o (2024-05-13)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 5, + output: 15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-4o": { + id: "openai/gpt-4o", + name: "OpenAI: GPT-4o", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 2.5, + output: 10, + cacheRead: 1.25, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "openai/gpt-4o:extended": { + id: "openai/gpt-4o:extended", + name: "OpenAI: GPT-4o (extended)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 6, + output: 18, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 64000, + } satisfies Model<"openai-completions">, + "meta-llama/llama-3-70b-instruct": { + id: "meta-llama/llama-3-70b-instruct", + name: "Meta: Llama 3 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.3, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "meta-llama/llama-3-8b-instruct": { + id: "meta-llama/llama-3-8b-instruct", + name: "Meta: Llama 3 8B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.03, + output: 0.06, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "mistralai/mixtral-8x22b-instruct": { + id: "mistralai/mixtral-8x22b-instruct", + name: "Mistral: Mixtral 8x22B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 65536, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-4-turbo": { + id: "openai/gpt-4-turbo", + name: "OpenAI: GPT-4 Turbo", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 10, + output: 30, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "anthropic/claude-3-haiku": { + id: "anthropic/claude-3-haiku", + name: "Anthropic: Claude 3 Haiku", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0.25, + output: 1.25, + cacheRead: 0.03, + cacheWrite: 0.3, + }, + contextWindow: 200000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "anthropic/claude-3-opus": { + id: "anthropic/claude-3-opus", + name: "Anthropic: Claude 3 Opus", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 15, + output: 75, + cacheRead: 1.5, + cacheWrite: 18.75, + }, + contextWindow: 200000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/mistral-large": { + id: "mistralai/mistral-large", + name: "Mistral Large", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2, + output: 6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-3.5-turbo-0613": { + id: "openai/gpt-3.5-turbo-0613", + name: "OpenAI: GPT-3.5 Turbo (older v0613)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 4095, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-4-turbo-preview": { + id: "openai/gpt-4-turbo-preview", + name: "OpenAI: GPT-4 Turbo Preview", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 10, + output: 30, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/mistral-tiny": { + id: "mistralai/mistral-tiny", + name: "Mistral Tiny", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.25, + output: 0.25, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "mistralai/mixtral-8x7b-instruct": { + id: "mistralai/mixtral-8x7b-instruct", + name: "Mistral: Mixtral 8x7B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.54, + output: 0.54, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 16384, + } satisfies Model<"openai-completions">, + "openai/gpt-4-1106-preview": { + id: "openai/gpt-4-1106-preview", + name: "OpenAI: GPT-4 Turbo (older v1106)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 10, + output: 30, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-3.5-turbo-16k": { + id: "openai/gpt-3.5-turbo-16k", + name: "OpenAI: GPT-3.5 Turbo 16k", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 3, + output: 4, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 16385, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-4-0314": { + id: "openai/gpt-4-0314", + name: "OpenAI: GPT-4 (older v0314)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 30, + output: 60, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8191, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-4": { + id: "openai/gpt-4", + name: "OpenAI: GPT-4", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 30, + output: 60, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8191, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openai/gpt-3.5-turbo": { + id: "openai/gpt-3.5-turbo", + name: "OpenAI: GPT-3.5 Turbo", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.5, + output: 1.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 16385, + maxTokens: 4096, + } satisfies Model<"openai-completions">, + "openrouter/auto": { + id: "openrouter/auto", + name: "OpenRouter: Auto Router", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 2000000, + maxTokens: 30000, + } satisfies Model<"openai-completions">, + }, + "google-gemini-cli": { + "gemini-2.5-pro": { + id: "gemini-2.5-pro", + name: "Gemini 2.5 Pro (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: "https://cloudcode-pa.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"google-gemini-cli">, + "gemini-2.5-flash": { + id: "gemini-2.5-flash", + name: "Gemini 2.5 Flash (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: "https://cloudcode-pa.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"google-gemini-cli">, + "gemini-2.0-flash": { + id: "gemini-2.0-flash", + name: "Gemini 2.0 Flash (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: "https://cloudcode-pa.googleapis.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 8192, + } satisfies Model<"google-gemini-cli">, + "gemini-3-pro-preview": { + id: "gemini-3-pro-preview", + name: "Gemini 3 Pro Preview (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: "https://cloudcode-pa.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"google-gemini-cli">, + "gemini-3-flash-preview": { + id: "gemini-3-flash-preview", + name: "Gemini 3 Flash Preview (Cloud Code Assist)", + api: "google-gemini-cli", + provider: "google-gemini-cli", + baseUrl: "https://cloudcode-pa.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"google-gemini-cli">, + }, + "google-antigravity": { + "gemini-3-pro-high": { + id: "gemini-3-pro-high", + name: "Gemini 3 Pro High (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"google-gemini-cli">, + "gemini-3-pro-low": { + id: "gemini-3-pro-low", + name: "Gemini 3 Pro Low (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"google-gemini-cli">, + "gemini-3-flash": { + id: "gemini-3-flash", + name: "Gemini 3 Flash (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 1048576, + maxTokens: 65535, + } satisfies Model<"google-gemini-cli">, + "claude-sonnet-4-5": { + id: "claude-sonnet-4-5", + name: "Claude Sonnet 4.5 (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", + reasoning: false, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"google-gemini-cli">, + "claude-sonnet-4-5-thinking": { + id: "claude-sonnet-4-5-thinking", + name: "Claude Sonnet 4.5 Thinking (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"google-gemini-cli">, + "claude-opus-4-5-thinking": { + id: "claude-opus-4-5-thinking", + name: "Claude Opus 4.5 Thinking (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", + reasoning: true, + input: ["text", "image"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 200000, + maxTokens: 64000, + } satisfies Model<"google-gemini-cli">, + "gpt-oss-120b-medium": { + id: "gpt-oss-120b-medium", + name: "GPT-OSS 120B Medium (Antigravity)", + api: "google-gemini-cli", + provider: "google-antigravity", + baseUrl: "https://daily-cloudcode-pa.sandbox.googleapis.com", + reasoning: false, + input: ["text"], + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 32768, + } satisfies Model<"google-gemini-cli">, }, } as const; diff --git a/packages/coding-agent/docs/session-tree-plan.md b/packages/coding-agent/docs/session-tree-plan.md new file mode 100644 index 00000000..5f2371df --- /dev/null +++ b/packages/coding-agent/docs/session-tree-plan.md @@ -0,0 +1,86 @@ +# Session Tree Implementation Plan + +Reference: [session-tree.md](./session-tree.md) + +## Phase 1: SessionManager Core + +- [x] Update entry types with `id`, `parentId` fields (using TreeNode intersection) +- [x] Add `version` field to `SessionHeader` +- [x] Change `CompactionEntry.firstKeptEntryIndex` → `firstKeptEntryId` +- [x] Add `BranchSummaryEntry` type +- [x] Add `byId: Map` index +- [x] Add `leafId: string` tracking +- [x] Implement `getPath(fromId?)` tree traversal +- [x] Implement `getEntry(id)` lookup +- [x] Implement `getLeafId()` helper +- [x] Update `_buildIndex()` to populate `byId` map +- [x] Update `saveMessage()` to include id/parentId (returns id) +- [x] Update `saveCompaction()` signature and fields (returns id) +- [x] Update `saveThinkingLevelChange()` to include id/parentId (returns id) +- [x] Update `saveModelChange()` to include id/parentId (returns id) +- [x] Update `buildSessionContext()` to use `getPath()` traversal + +### Type Hierarchy + +```typescript +// Tree fields (added by SessionManager) +interface TreeNode { id, parentId, timestamp } + +// Content types (for input) +interface MessageContent { type: "message"; message: AppMessage } +interface CompactionContent { type: "compaction"; summary; firstKeptEntryId; tokensBefore } +// etc... + +// Full entry types (TreeNode & Content) +type SessionMessageEntry = TreeNode & MessageContent; +type CompactionEntry = TreeNode & CompactionContent; +// etc... +``` + +## Phase 2: Migration + +- [x] Add `CURRENT_SESSION_VERSION = 2` constant +- [x] Implement `_migrateToV2()` for v1→v2 +- [x] Update `setSessionFile()` to detect version and migrate +- [x] Implement `_rewriteFile()` for post-migration persistence +- [x] Handle `firstKeptEntryIndex` → `firstKeptEntryId` conversion in migration + +## Phase 3: Branching + +- [x] Implement `branchInPlace(id)` - switch leaf pointer +- [x] Implement `branchWithSummary(id, summary)` - create summary entry +- [x] Update `branchToNewFile()` to use IDs (no remapping) +- [ ] Update `AgentSession.branch()` to use new API + +## Phase 4: Compaction Integration + +- [x] Update `compaction.ts` to work with IDs +- [x] Update `prepareCompaction()` to return `firstKeptEntryId` +- [x] Update `compact()` to return `CompactionResult` with `firstKeptEntryId` +- [x] Update `AgentSession` compaction methods +- [x] Add `firstKeptEntryId` to `before_compact` hook event + +## Phase 5: Testing + +- [ ] Add test fixtures from existing sessions +- [ ] Test migration of v1 sessions +- [ ] Test context building with tree structure +- [ ] Test branching operations +- [ ] Test compaction with IDs +- [x] Update existing tests for new types + +## Phase 6: UI Integration + +- [ ] Update `/branch` command for new API +- [ ] Add `/branch-here` command for in-place branching +- [ ] Add `/branches` command to list branches (future) +- [ ] Update session display to show tree info (future) + +## Notes + +- All save methods return the new entry's ID +- Migration rewrites file on first load if version < CURRENT_VERSION +- Existing sessions become linear chains after migration (parentId = previous entry) +- Tree features available immediately after migration +- SessionHeader does NOT have id/parentId (it's metadata, not part of tree) +- Content types allow clean input/output separation diff --git a/packages/coding-agent/examples/hooks/custom-compaction.ts b/packages/coding-agent/examples/hooks/custom-compaction.ts index 3967b1ce..1781e7e3 100644 --- a/packages/coding-agent/examples/hooks/custom-compaction.ts +++ b/packages/coding-agent/examples/hooks/custom-compaction.ts @@ -94,14 +94,12 @@ Format the summary as structured markdown with clear sections.`, return; } - // Return a compaction entry that discards ALL messages - // firstKeptEntryIndex points past all current entries + // Return compaction content - SessionManager adds id/parentId + // Use firstKeptEntryId from event to keep recent messages return { - compactionEntry: { - type: "compaction" as const, - timestamp: new Date().toISOString(), + compaction: { summary, - firstKeptEntryIndex: entries.length, + firstKeptEntryId: event.firstKeptEntryId, tokensBefore, }, }; diff --git a/packages/coding-agent/src/core/agent-session.ts b/packages/coding-agent/src/core/agent-session.ts index 16f33e27..fe4804a1 100644 --- a/packages/coding-agent/src/core/agent-session.ts +++ b/packages/coding-agent/src/core/agent-session.ts @@ -754,7 +754,12 @@ export class AgentSession { const preparation = prepareCompaction(entries, settings); if (!preparation) { - throw new Error("Already compacted"); + // Check why we can't compact + const lastEntry = entries[entries.length - 1]; + if (lastEntry?.type === "compaction") { + throw new Error("Already compacted"); + } + throw new Error("Nothing to compact (session too small or needs migration)"); } // Find previous compaction summary if any @@ -766,7 +771,7 @@ export class AgentSession { } } - let compactionEntry: CompactionEntry | undefined; + let hookCompaction: { summary: string; firstKeptEntryId: string; tokensBefore: number } | undefined; let fromHook = false; if (this._hookRunner?.hasHandlers("session")) { @@ -777,6 +782,7 @@ export class AgentSession { previousSessionFile: null, reason: "before_compact", cutPoint: preparation.cutPoint, + firstKeptEntryId: preparation.firstKeptEntryId, previousSummary, messagesToSummarize: [...preparation.messagesToSummarize], messagesToKeep: [...preparation.messagesToKeep], @@ -791,14 +797,24 @@ export class AgentSession { throw new Error("Compaction cancelled"); } - if (result?.compactionEntry) { - compactionEntry = result.compactionEntry; + if (result?.compaction) { + hookCompaction = result.compaction; fromHook = true; } } - if (!compactionEntry) { - compactionEntry = await compact( + let summary: string; + let firstKeptEntryId: string; + let tokensBefore: number; + + if (hookCompaction) { + // Hook provided compaction content + summary = hookCompaction.summary; + firstKeptEntryId = hookCompaction.firstKeptEntryId; + tokensBefore = hookCompaction.tokensBefore; + } else { + // Generate compaction result + const result = await compact( entries, this.model, settings, @@ -806,33 +822,41 @@ export class AgentSession { this._compactionAbortController.signal, customInstructions, ); + summary = result.summary; + firstKeptEntryId = result.firstKeptEntryId; + tokensBefore = result.tokensBefore; } if (this._compactionAbortController.signal.aborted) { throw new Error("Compaction cancelled"); } - this.sessionManager.saveCompaction(compactionEntry); + this.sessionManager.saveCompaction(summary, firstKeptEntryId, tokensBefore); const newEntries = this.sessionManager.getEntries(); const sessionContext = this.sessionManager.buildSessionContext(); this.agent.replaceMessages(sessionContext.messages); - if (this._hookRunner) { + // Get the saved compaction entry for the hook + const savedCompactionEntry = newEntries.find((e) => e.type === "compaction" && e.summary === summary) as + | CompactionEntry + | undefined; + + if (this._hookRunner && savedCompactionEntry) { await this._hookRunner.emit({ type: "session", entries: newEntries, sessionFile: this.sessionFile, previousSessionFile: null, reason: "compact", - compactionEntry, - tokensBefore: compactionEntry.tokensBefore, + compactionEntry: savedCompactionEntry, + tokensBefore, fromHook, }); } return { - tokensBefore: compactionEntry.tokensBefore, - summary: compactionEntry.summary, + tokensBefore, + summary, }; } finally { this._compactionAbortController = null; @@ -928,7 +952,7 @@ export class AgentSession { } } - let compactionEntry: CompactionEntry | undefined; + let hookCompaction: { summary: string; firstKeptEntryId: string; tokensBefore: number } | undefined; let fromHook = false; if (this._hookRunner?.hasHandlers("session")) { @@ -939,6 +963,7 @@ export class AgentSession { previousSessionFile: null, reason: "before_compact", cutPoint: preparation.cutPoint, + firstKeptEntryId: preparation.firstKeptEntryId, previousSummary, messagesToSummarize: [...preparation.messagesToSummarize], messagesToKeep: [...preparation.messagesToKeep], @@ -954,20 +979,33 @@ export class AgentSession { return; } - if (hookResult?.compactionEntry) { - compactionEntry = hookResult.compactionEntry; + if (hookResult?.compaction) { + hookCompaction = hookResult.compaction; fromHook = true; } } - if (!compactionEntry) { - compactionEntry = await compact( + let summary: string; + let firstKeptEntryId: string; + let tokensBefore: number; + + if (hookCompaction) { + // Hook provided compaction content + summary = hookCompaction.summary; + firstKeptEntryId = hookCompaction.firstKeptEntryId; + tokensBefore = hookCompaction.tokensBefore; + } else { + // Generate compaction result + const compactResult = await compact( entries, this.model, settings, apiKey, this._autoCompactionAbortController.signal, ); + summary = compactResult.summary; + firstKeptEntryId = compactResult.firstKeptEntryId; + tokensBefore = compactResult.tokensBefore; } if (this._autoCompactionAbortController.signal.aborted) { @@ -975,27 +1013,32 @@ export class AgentSession { return; } - this.sessionManager.saveCompaction(compactionEntry); + this.sessionManager.saveCompaction(summary, firstKeptEntryId, tokensBefore); const newEntries = this.sessionManager.getEntries(); const sessionContext = this.sessionManager.buildSessionContext(); this.agent.replaceMessages(sessionContext.messages); - if (this._hookRunner) { + // Get the saved compaction entry for the hook + const savedCompactionEntry = newEntries.find((e) => e.type === "compaction" && e.summary === summary) as + | CompactionEntry + | undefined; + + if (this._hookRunner && savedCompactionEntry) { await this._hookRunner.emit({ type: "session", entries: newEntries, sessionFile: this.sessionFile, previousSessionFile: null, reason: "compact", - compactionEntry, - tokensBefore: compactionEntry.tokensBefore, + compactionEntry: savedCompactionEntry, + tokensBefore, fromHook, }); } const result: CompactionResult = { - tokensBefore: compactionEntry.tokensBefore, - summary: compactionEntry.summary, + tokensBefore, + summary, }; this._emit({ type: "auto_compaction_end", result, aborted: false, willRetry }); diff --git a/packages/coding-agent/src/core/compaction.ts b/packages/coding-agent/src/core/compaction.ts index af4edfb8..8abfef74 100644 --- a/packages/coding-agent/src/core/compaction.ts +++ b/packages/coding-agent/src/core/compaction.ts @@ -9,7 +9,14 @@ import type { AppMessage } from "@mariozechner/pi-agent-core"; import type { AssistantMessage, Model, Usage } from "@mariozechner/pi-ai"; import { complete } from "@mariozechner/pi-ai"; import { messageTransformer } from "./messages.js"; -import type { CompactionEntry, SessionEntry } from "./session-manager.js"; +import type { CompactionEntry, ConversationEntry, SessionEntry } from "./session-manager.js"; + +/** Result from compact() - SessionManager adds uuid/parentUuid when saving */ +export interface CompactionResult { + summary: string; + firstKeptEntryId: string; + tokensBefore: number; +} // ============================================================================ // Types @@ -327,6 +334,8 @@ export async function generateSummary( export interface CompactionPreparation { cutPoint: CutPointResult; + /** UUID of first entry to keep */ + firstKeptEntryId: string; /** Messages that will be summarized and discarded */ messagesToSummarize: AppMessage[]; /** Messages that will be kept after the summary (recent turns) */ @@ -355,6 +364,16 @@ export function prepareCompaction(entries: SessionEntry[], settings: CompactionS const cutPoint = findCutPoint(entries, boundaryStart, boundaryEnd, settings.keepRecentTokens); + // Get UUID of first kept entry + const firstKeptEntry = entries[cutPoint.firstKeptEntryIndex]; + if (firstKeptEntry.type === "session") { + return null; // Can't compact if first kept is header + } + const firstKeptEntryId = (firstKeptEntry as ConversationEntry).id; + if (!firstKeptEntryId) { + return null; // Session needs migration + } + const historyEnd = cutPoint.isSplitTurn ? cutPoint.turnStartIndex : cutPoint.firstKeptEntryIndex; // Messages to summarize (will be discarded after summary) @@ -375,7 +394,7 @@ export function prepareCompaction(entries: SessionEntry[], settings: CompactionS } } - return { cutPoint, messagesToSummarize, messagesToKeep, tokensBefore, boundaryStart }; + return { cutPoint, firstKeptEntryId, messagesToSummarize, messagesToKeep, tokensBefore, boundaryStart }; } // ============================================================================ @@ -394,9 +413,9 @@ Be concise. Focus on information needed to understand the retained recent work.` /** * Calculate compaction and generate summary. - * Returns the CompactionEntry to append to the session file. + * Returns CompactionResult - SessionManager adds uuid/parentUuid when saving. * - * @param entries - All session entries + * @param entries - All session entries (must have uuid fields for v2) * @param model - Model to use for summarization * @param settings - Compaction settings * @param apiKey - API key for LLM @@ -410,7 +429,7 @@ export async function compact( apiKey: string, signal?: AbortSignal, customInstructions?: string, -): Promise { +): Promise { // Don't compact if the last entry is already a compaction if (entries.length > 0 && entries[entries.length - 1].type === "compaction") { throw new Error("Already compacted"); @@ -490,11 +509,19 @@ export async function compact( ); } + // Get UUID of first kept entry + const firstKeptEntry = entries[cutResult.firstKeptEntryIndex]; + if (firstKeptEntry.type === "session") { + throw new Error("Cannot compact: first kept entry is session header"); + } + const firstKeptEntryId = (firstKeptEntry as ConversationEntry).id; + if (!firstKeptEntryId) { + throw new Error("First kept entry has no UUID - session may need migration"); + } + return { - type: "compaction", - timestamp: new Date().toISOString(), summary, - firstKeptEntryIndex: cutResult.firstKeptEntryIndex, + firstKeptEntryId, tokensBefore, }; } diff --git a/packages/coding-agent/src/core/hooks/types.ts b/packages/coding-agent/src/core/hooks/types.ts index a60d009b..a51a93bf 100644 --- a/packages/coding-agent/src/core/hooks/types.ts +++ b/packages/coding-agent/src/core/hooks/types.ts @@ -130,6 +130,8 @@ export type SessionEvent = | (SessionEventBase & { reason: "before_compact"; cutPoint: CutPointResult; + /** ID of first entry to keep (for hooks that return CompactionEntry) */ + firstKeptEntryId: string; /** Summary from previous compaction, if any. Include this in your summary to preserve context. */ previousSummary?: string; /** Messages that will be summarized and discarded */ @@ -351,8 +353,12 @@ export interface SessionEventResult { cancel?: boolean; /** If true (for before_branch only), skip restoring conversation to branch point while still creating the branched session file */ skipConversationRestore?: boolean; - /** Custom compaction entry (for before_compact event) */ - compactionEntry?: CompactionEntry; + /** Custom compaction result (for before_compact event) - SessionManager adds id/parentId */ + compaction?: { + summary: string; + firstKeptEntryId: string; + tokensBefore: number; + }; } // ============================================================================ diff --git a/packages/coding-agent/src/core/session-manager.ts b/packages/coding-agent/src/core/session-manager.ts index dd118afd..3b9eda0b 100644 --- a/packages/coding-agent/src/core/session-manager.ts +++ b/packages/coding-agent/src/core/session-manager.ts @@ -1,9 +1,11 @@ import type { AppMessage } from "@mariozechner/pi-agent-core"; import { randomBytes } from "crypto"; -import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync, statSync } from "fs"; +import { appendFileSync, existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "fs"; import { join, resolve } from "path"; import { getAgentDir as getDefaultAgentDir } from "../config.js"; +export const CURRENT_SESSION_VERSION = 2; + function uuidv4(): string { const bytes = randomBytes(16); bytes[6] = (bytes[6] & 0x0f) | 0x40; @@ -12,47 +14,89 @@ function uuidv4(): string { return `${hex.slice(0, 8)}-${hex.slice(8, 12)}-${hex.slice(12, 16)}-${hex.slice(16, 20)}-${hex.slice(20, 32)}`; } +// ============================================================================ +// Session Header (metadata, not part of conversation tree) +// ============================================================================ + export interface SessionHeader { type: "session"; + version?: number; // v1 sessions don't have this id: string; timestamp: string; cwd: string; branchedFrom?: string; } -export interface SessionMessageEntry { - type: "message"; +// ============================================================================ +// Tree Node (added by SessionManager to all conversation entries) +// ============================================================================ + +export interface TreeNode { + id: string; + parentId: string | null; timestamp: string; +} + +// ============================================================================ +// Content Types (what distinguishes entries - used for input) +// ============================================================================ + +export interface MessageContent { + type: "message"; message: AppMessage; } -export interface ThinkingLevelChangeEntry { +export interface ThinkingLevelContent { type: "thinking_level_change"; - timestamp: string; thinkingLevel: string; } -export interface ModelChangeEntry { +export interface ModelChangeContent { type: "model_change"; - timestamp: string; provider: string; modelId: string; } -export interface CompactionEntry { +export interface CompactionContent { type: "compaction"; - timestamp: string; summary: string; - firstKeptEntryIndex: number; + firstKeptEntryId: string; tokensBefore: number; } -export type SessionEntry = - | SessionHeader +export interface BranchSummaryContent { + type: "branch_summary"; + summary: string; +} + +/** Union of all content types (for input) */ +export type ConversationContent = + | MessageContent + | ThinkingLevelContent + | ModelChangeContent + | CompactionContent + | BranchSummaryContent; + +// ============================================================================ +// Full Entry Types (TreeNode + Content - returned from SessionManager) +// ============================================================================ + +export type SessionMessageEntry = TreeNode & MessageContent; +export type ThinkingLevelChangeEntry = TreeNode & ThinkingLevelContent; +export type ModelChangeEntry = TreeNode & ModelChangeContent; +export type CompactionEntry = TreeNode & CompactionContent; +export type BranchSummaryEntry = TreeNode & BranchSummaryContent; + +/** Conversation entry - has id/parentId for tree structure */ +export type ConversationEntry = | SessionMessageEntry | ThinkingLevelChangeEntry | ModelChangeEntry - | CompactionEntry; + | CompactionEntry + | BranchSummaryEntry; + +/** Any session entry (header or conversation) */ +export type SessionEntry = SessionHeader | ConversationEntry; export interface SessionContext { messages: AppMessage[]; @@ -87,6 +131,45 @@ export function createSummaryMessage(summary: string): AppMessage { }; } +/** + * Migrate v1 entries to v2 format by adding id/parentId fields. + * Mutates entries in place. Safe to call on already-migrated entries. + */ +export function migrateSessionEntries(entries: SessionEntry[]): void { + // Check if already migrated + const firstConv = entries.find((e) => e.type !== "session"); + if (firstConv && "id" in firstConv && firstConv.id) { + return; // Already migrated + } + + let prevId: string | null = null; + for (const entry of entries) { + if (entry.type === "session") { + entry.version = CURRENT_SESSION_VERSION; + continue; + } + + // Add id/parentId to conversation entries + const convEntry = entry as ConversationEntry; + convEntry.id = uuidv4(); + convEntry.parentId = prevId; + prevId = convEntry.id; + + // Convert firstKeptEntryIndex to firstKeptEntryId for compaction + if (entry.type === "compaction") { + const comp = entry as CompactionEntry & { firstKeptEntryIndex?: number }; + if (typeof comp.firstKeptEntryIndex === "number") { + // Find the entry at that index and get its id + const targetEntry = entries[comp.firstKeptEntryIndex]; + if (targetEntry && targetEntry.type !== "session") { + comp.firstKeptEntryId = (targetEntry as ConversationEntry).id; + } + delete comp.firstKeptEntryIndex; + } + } + } +} + /** Exported for compaction.test.ts */ export function parseSessionEntries(content: string): SessionEntry[] { const entries: SessionEntry[] = []; @@ -115,59 +198,108 @@ export function getLatestCompactionEntry(entries: SessionEntry[]): CompactionEnt } /** - * Build the session context from entries. This is what gets sent to the LLM. - * - * If there's a compaction entry, returns the summary message plus messages - * from `firstKeptEntryIndex` onwards. Otherwise returns all messages. - * - * Also extracts the current thinking level and model from the entries. + * Build the session context from entries using tree traversal. + * If leafId is provided, walks from that entry to root. + * Handles compaction and branch summaries along the path. */ -export function buildSessionContext(entries: SessionEntry[]): SessionContext { +export function buildSessionContext(entries: SessionEntry[], leafId?: string): SessionContext { + // Build uuid index for conversation entries + const byId = new Map(); + for (const entry of entries) { + if (entry.type !== "session") { + byId.set(entry.id, entry); + } + } + + // Find leaf + let leaf: ConversationEntry | undefined; + if (leafId) { + leaf = byId.get(leafId); + } else { + // Find last conversation entry + for (let i = entries.length - 1; i >= 0; i--) { + if (entries[i].type !== "session") { + leaf = entries[i] as ConversationEntry; + break; + } + } + } + + if (!leaf) { + return { messages: [], thinkingLevel: "off", model: null }; + } + + // Walk from leaf to root, collecting path + const path: ConversationEntry[] = []; + let current: ConversationEntry | undefined = leaf; + while (current) { + path.unshift(current); + current = current.parentId ? byId.get(current.parentId) : undefined; + } + + // Extract settings and find compaction let thinkingLevel = "off"; let model: { provider: string; modelId: string } | null = null; + let compaction: CompactionEntry | null = null; - for (const entry of entries) { + for (const entry of path) { if (entry.type === "thinking_level_change") { thinkingLevel = entry.thinkingLevel; } else if (entry.type === "model_change") { model = { provider: entry.provider, modelId: entry.modelId }; } else if (entry.type === "message" && entry.message.role === "assistant") { model = { provider: entry.message.provider, modelId: entry.message.model }; + } else if (entry.type === "compaction") { + compaction = entry; } } - let latestCompactionIndex = -1; - for (let i = entries.length - 1; i >= 0; i--) { - if (entries[i].type === "compaction") { - latestCompactionIndex = i; - break; - } - } + // Build messages - handle compaction ordering correctly + // When there's a compaction, we need to: + // 1. Emit summary first + // 2. Emit kept messages (from firstKeptEntryId up to compaction) + // 3. Emit messages after compaction + const messages: AppMessage[] = []; - if (latestCompactionIndex === -1) { - const messages: AppMessage[] = []; - for (const entry of entries) { - if (entry.type === "message") { + if (compaction) { + // Emit summary first + messages.push(createSummaryMessage(compaction.summary)); + + // Find compaction index in path + const compactionIdx = path.findIndex((e) => e.type === "compaction" && e.id === compaction.id); + + // Emit kept messages (before compaction, starting from firstKeptEntryId) + let foundFirstKept = false; + for (let i = 0; i < compactionIdx; i++) { + const entry = path[i]; + if (entry.id === compaction.firstKeptEntryId) { + foundFirstKept = true; + } + if (foundFirstKept && entry.type === "message") { messages.push(entry.message); } } - return { messages, thinkingLevel, model }; - } - const compactionEvent = entries[latestCompactionIndex] as CompactionEntry; - - const keptMessages: AppMessage[] = []; - for (let i = compactionEvent.firstKeptEntryIndex; i < entries.length; i++) { - const entry = entries[i]; - if (entry.type === "message") { - keptMessages.push(entry.message); + // Emit messages after compaction + for (let i = compactionIdx + 1; i < path.length; i++) { + const entry = path[i]; + if (entry.type === "message") { + messages.push(entry.message); + } else if (entry.type === "branch_summary") { + messages.push(createSummaryMessage(entry.summary)); + } + } + } else { + // No compaction - emit all messages, handle branch summaries + for (const entry of path) { + if (entry.type === "message") { + messages.push(entry.message); + } else if (entry.type === "branch_summary") { + messages.push(createSummaryMessage(entry.summary)); + } } } - const messages: AppMessage[] = []; - messages.push(createSummaryMessage(compactionEvent.summary)); - messages.push(...keptMessages); - return { messages, thinkingLevel, model }; } @@ -229,6 +361,10 @@ export class SessionManager { private flushed: boolean = false; private inMemoryEntries: SessionEntry[] = []; + // Tree structure (v2) + private byId: Map = new Map(); + private leafId: string = ""; + private constructor(cwd: string, sessionDir: string, sessionFile: string | null, persist: boolean) { this.cwd = cwd; this.sessionDir = sessionDir; @@ -240,10 +376,7 @@ export class SessionManager { if (sessionFile) { this.setSessionFile(sessionFile); } else { - this.sessionId = uuidv4(); - const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); - const sessionFile = join(this.getSessionDir(), `${timestamp}_${this.sessionId}.jsonl`); - this.setSessionFile(sessionFile); + this._initNewSession(); } } @@ -252,23 +385,61 @@ export class SessionManager { this.sessionFile = resolve(sessionFile); if (existsSync(this.sessionFile)) { this.inMemoryEntries = loadEntriesFromFile(this.sessionFile); - const header = this.inMemoryEntries.find((e) => e.type === "session"); - this.sessionId = header ? (header as SessionHeader).id : uuidv4(); + const header = this.inMemoryEntries.find((e) => e.type === "session") as SessionHeader | undefined; + this.sessionId = header?.id ?? uuidv4(); + + // Migrate v1 to v2 if needed + const version = header?.version ?? 1; + if (version < CURRENT_SESSION_VERSION) { + this._migrateToV2(); + this._rewriteFile(); + } + + this._buildIndex(); this.flushed = true; } else { - this.sessionId = uuidv4(); - this.inMemoryEntries = []; - this.flushed = false; - const entry: SessionHeader = { - type: "session", - id: this.sessionId, - timestamp: new Date().toISOString(), - cwd: this.cwd, - }; - this.inMemoryEntries.push(entry); + this._initNewSession(); } } + private _initNewSession(): void { + this.sessionId = uuidv4(); + const timestamp = new Date().toISOString(); + const header: SessionHeader = { + type: "session", + version: CURRENT_SESSION_VERSION, + id: this.sessionId, + timestamp, + cwd: this.cwd, + }; + this.inMemoryEntries = [header]; + this.byId.clear(); + this.leafId = ""; + this.flushed = false; + const fileTimestamp = timestamp.replace(/[:.]/g, "-"); + this.sessionFile = join(this.getSessionDir(), `${fileTimestamp}_${this.sessionId}.jsonl`); + } + + private _migrateToV2(): void { + migrateSessionEntries(this.inMemoryEntries); + } + + private _buildIndex(): void { + this.byId.clear(); + this.leafId = ""; + for (const entry of this.inMemoryEntries) { + if (entry.type === "session") continue; + this.byId.set(entry.id, entry); + this.leafId = entry.id; + } + } + + private _rewriteFile(): void { + if (!this.persist) return; + const content = `${this.inMemoryEntries.map((e) => JSON.stringify(e)).join("\n")}\n`; + writeFileSync(this.sessionFile, content); + } + isPersisted(): boolean { return this.persist; } @@ -290,18 +461,7 @@ export class SessionManager { } reset(): void { - this.sessionId = uuidv4(); - this.flushed = false; - const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); - this.sessionFile = join(this.getSessionDir(), `${timestamp}_${this.sessionId}.jsonl`); - this.inMemoryEntries = [ - { - type: "session", - id: this.sessionId, - timestamp: new Date().toISOString(), - cwd: this.cwd, - }, - ]; + this._initNewSession(); } _persist(entry: SessionEntry): void { @@ -320,49 +480,93 @@ export class SessionManager { } } - saveMessage(message: AppMessage): void { + private _appendEntry(entry: ConversationEntry): void { + this.inMemoryEntries.push(entry); + this.byId.set(entry.id, entry); + this.leafId = entry.id; + this._persist(entry); + } + + saveMessage(message: AppMessage): string { const entry: SessionMessageEntry = { type: "message", + id: uuidv4(), + parentId: this.leafId || null, timestamp: new Date().toISOString(), message, }; - this.inMemoryEntries.push(entry); - this._persist(entry); + this._appendEntry(entry); + return entry.id; } - saveThinkingLevelChange(thinkingLevel: string): void { + saveThinkingLevelChange(thinkingLevel: string): string { const entry: ThinkingLevelChangeEntry = { type: "thinking_level_change", + id: uuidv4(), + parentId: this.leafId || null, timestamp: new Date().toISOString(), thinkingLevel, }; - this.inMemoryEntries.push(entry); - this._persist(entry); + this._appendEntry(entry); + return entry.id; } - saveModelChange(provider: string, modelId: string): void { + saveModelChange(provider: string, modelId: string): string { const entry: ModelChangeEntry = { type: "model_change", + id: uuidv4(), + parentId: this.leafId || null, timestamp: new Date().toISOString(), provider, modelId, }; - this.inMemoryEntries.push(entry); - this._persist(entry); + this._appendEntry(entry); + return entry.id; } - saveCompaction(entry: CompactionEntry): void { - this.inMemoryEntries.push(entry); - this._persist(entry); + saveCompaction(summary: string, firstKeptEntryId: string, tokensBefore: number): string { + const entry: CompactionEntry = { + type: "compaction", + id: uuidv4(), + parentId: this.leafId || null, + timestamp: new Date().toISOString(), + summary, + firstKeptEntryId, + tokensBefore, + }; + this._appendEntry(entry); + return entry.id; + } + + // ========================================================================= + // Tree Traversal + // ========================================================================= + + getLeafUuid(): string { + return this.leafId; + } + + getEntry(id: string): ConversationEntry | undefined { + return this.byId.get(id); + } + + /** Walk from entry to root, returning path (conversation entries only) */ + getPath(fromId?: string): ConversationEntry[] { + const path: ConversationEntry[] = []; + let current = this.byId.get(fromId ?? this.leafId); + while (current) { + path.unshift(current); + current = current.parentId ? this.byId.get(current.parentId) : undefined; + } + return path; } /** * Build the session context (what gets sent to the LLM). - * If compacted, returns summary + kept messages. Otherwise all messages. - * Includes thinking level and model. + * Uses tree traversal from current leaf. */ buildSessionContext(): SessionContext { - return buildSessionContext(this.getEntries()); + return buildSessionContext(this.getEntries(), this.leafId); } /** @@ -373,6 +577,35 @@ export class SessionManager { return [...this.inMemoryEntries]; } + // ========================================================================= + // Branching + // ========================================================================= + + /** Branch in-place by changing the leaf pointer */ + branchInPlace(branchFromId: string): void { + if (!this.byId.has(branchFromId)) { + throw new Error(`Entry ${branchFromId} not found`); + } + this.leafId = branchFromId; + } + + /** Branch with a summary of the abandoned path */ + branchWithSummary(branchFromId: string, summary: string): string { + if (!this.byId.has(branchFromId)) { + throw new Error(`Entry ${branchFromId} not found`); + } + this.leafId = branchFromId; + const entry: BranchSummaryEntry = { + type: "branch_summary", + id: uuidv4(), + parentId: branchFromId, + timestamp: new Date().toISOString(), + summary, + }; + this._appendEntry(entry); + return entry.id; + } + createBranchedSessionFromEntries(entries: SessionEntry[], branchBeforeIndex: number): string | null { const newSessionId = uuidv4(); const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); @@ -385,6 +618,7 @@ export class SessionManager { if (entry.type === "session") { newEntries.push({ ...entry, + version: CURRENT_SESSION_VERSION, id: newSessionId, timestamp: new Date().toISOString(), branchedFrom: this.persist ? this.sessionFile : undefined, @@ -402,6 +636,7 @@ export class SessionManager { } this.inMemoryEntries = newEntries; this.sessionId = newSessionId; + this._buildIndex(); return null; } diff --git a/packages/coding-agent/src/index.ts b/packages/coding-agent/src/index.ts index 2683765e..04151f0b 100644 --- a/packages/coding-agent/src/index.ts +++ b/packages/coding-agent/src/index.ts @@ -107,11 +107,20 @@ export { readOnlyTools, } from "./core/sdk.js"; export { + type BranchSummaryContent, + type BranchSummaryEntry, buildSessionContext, + type CompactionContent, type CompactionEntry, + type ConversationContent, + type ConversationEntry, + CURRENT_SESSION_VERSION, createSummaryMessage, getLatestCompactionEntry, + type MessageContent, + type ModelChangeContent, type ModelChangeEntry, + migrateSessionEntries, parseSessionEntries, type SessionContext as LoadedSession, type SessionEntry, @@ -122,6 +131,9 @@ export { SUMMARY_PREFIX, SUMMARY_SUFFIX, type ThinkingLevelChangeEntry, + type ThinkingLevelContent, + // Tree types (v2) + type TreeNode, } from "./core/session-manager.js"; export { type CompactionSettings, diff --git a/packages/coding-agent/test/agent-session-compaction.test.ts b/packages/coding-agent/test/agent-session-compaction.test.ts index ff519eb5..2b7f5e71 100644 --- a/packages/coding-agent/test/agent-session-compaction.test.ts +++ b/packages/coding-agent/test/agent-session-compaction.test.ts @@ -156,9 +156,9 @@ describe.skipIf(!API_KEY)("AgentSession compaction e2e", () => { expect(compaction.type).toBe("compaction"); if (compaction.type === "compaction") { expect(compaction.summary.length).toBeGreaterThan(0); - // firstKeptEntryIndex can be 0 if all messages fit within keepRecentTokens + // firstKeptEntryId can be 0 if all messages fit within keepRecentTokens // (which is the case for small conversations) - expect(compaction.firstKeptEntryIndex).toBeGreaterThanOrEqual(0); + expect(compaction.firstKeptEntryId).toBeGreaterThanOrEqual(0); expect(compaction.tokensBefore).toBeGreaterThan(0); } }, 120000); diff --git a/packages/coding-agent/test/compaction-hooks-example.test.ts b/packages/coding-agent/test/compaction-hooks-example.test.ts index 476d40fb..9c5f5479 100644 --- a/packages/coding-agent/test/compaction-hooks-example.test.ts +++ b/packages/coding-agent/test/compaction-hooks-example.test.ts @@ -4,7 +4,6 @@ import { describe, expect, it } from "vitest"; import type { HookAPI } from "../src/core/hooks/index.js"; -import type { CompactionEntry } from "../src/core/session-manager.js"; describe("Documentation example", () => { it("custom compaction example should type-check correctly", () => { @@ -20,29 +19,30 @@ describe("Documentation example", () => { const tokensBefore = event.tokensBefore; const model = event.model; const resolveApiKey = event.resolveApiKey; + const firstKeptEntryId = event.firstKeptEntryId; // Verify types expect(Array.isArray(messages)).toBe(true); expect(Array.isArray(messagesToKeep)).toBe(true); - expect(typeof cutPoint.firstKeptEntryIndex).toBe("number"); + expect(typeof cutPoint.firstKeptEntryIndex).toBe("number"); // cutPoint still uses index expect(typeof tokensBefore).toBe("number"); expect(model).toBeDefined(); expect(typeof resolveApiKey).toBe("function"); + expect(typeof firstKeptEntryId).toBe("string"); const summary = messages .filter((m) => m.role === "user") .map((m) => `- ${typeof m.content === "string" ? m.content.slice(0, 100) : "[complex]"}`) .join("\n"); - const compactionEntry: CompactionEntry = { - type: "compaction", - timestamp: new Date().toISOString(), - summary: `User requests:\n${summary}`, - firstKeptEntryIndex: event.cutPoint.firstKeptEntryIndex, - tokensBefore: event.tokensBefore, + // Hooks return compaction content - SessionManager adds id/parentId + return { + compaction: { + summary: `User requests:\n${summary}`, + firstKeptEntryId, + tokensBefore, + }, }; - - return { compactionEntry }; }); }; diff --git a/packages/coding-agent/test/compaction.test.ts b/packages/coding-agent/test/compaction.test.ts index 24c7e89d..787e863e 100644 --- a/packages/coding-agent/test/compaction.test.ts +++ b/packages/coding-agent/test/compaction.test.ts @@ -3,7 +3,7 @@ import type { AssistantMessage, Usage } from "@mariozechner/pi-ai"; import { getModel } from "@mariozechner/pi-ai"; import { readFileSync } from "fs"; import { join } from "path"; -import { describe, expect, it } from "vitest"; +import { beforeEach, describe, expect, it } from "vitest"; import { type CompactionSettings, calculateContextTokens, @@ -17,9 +17,12 @@ import { buildSessionContext, type CompactionEntry, createSummaryMessage, + type ModelChangeEntry, + migrateSessionEntries, parseSessionEntries, type SessionEntry, type SessionMessageEntry, + type ThinkingLevelChangeEntry, } from "../src/core/session-manager.js"; // ============================================================================ @@ -29,7 +32,9 @@ import { function loadLargeSessionEntries(): SessionEntry[] { const sessionPath = join(__dirname, "fixtures/large-session.jsonl"); const content = readFileSync(sessionPath, "utf-8"); - return parseSessionEntries(content); + const entries = parseSessionEntries(content); + migrateSessionEntries(entries); // Add id/parentId for v1 fixtures + return entries; } function createMockUsage(input: number, output: number, cacheRead = 0, cacheWrite = 0): Usage { @@ -60,18 +65,82 @@ function createAssistantMessage(text: string, usage?: Usage): AssistantMessage { }; } -function createMessageEntry(message: AppMessage): SessionMessageEntry { - return { type: "message", timestamp: new Date().toISOString(), message }; +let entryCounter = 0; +let lastId: string | null = null; + +function resetEntryCounter() { + entryCounter = 0; + lastId = null; } -function createCompactionEntry(summary: string, firstKeptEntryIndex: number): CompactionEntry { +// Reset counter before each test to get predictable IDs +beforeEach(() => { + resetEntryCounter(); +}); + +function createSessionHeader() { return { + type: "session" as const, + version: 2, + id: "test-session", + timestamp: "", + cwd: "", + }; +} + +function createMessageEntry(message: AppMessage): SessionMessageEntry { + const id = `test-id-${entryCounter++}`; + const entry: SessionMessageEntry = { + type: "message", + id, + parentId: lastId, + timestamp: new Date().toISOString(), + message, + }; + lastId = id; + return entry; +} + +function createCompactionEntry(summary: string, firstKeptEntryId: string): CompactionEntry { + const id = `test-id-${entryCounter++}`; + const entry: CompactionEntry = { type: "compaction", + id, + parentId: lastId, timestamp: new Date().toISOString(), summary, - firstKeptEntryIndex, + firstKeptEntryId, tokensBefore: 10000, }; + lastId = id; + return entry; +} + +function createModelChangeEntry(provider: string, modelId: string): ModelChangeEntry { + const id = `test-id-${entryCounter++}`; + const entry: ModelChangeEntry = { + type: "model_change", + id, + parentId: lastId, + timestamp: new Date().toISOString(), + provider, + modelId, + }; + lastId = id; + return entry; +} + +function createThinkingLevelEntry(thinkingLevel: string): ThinkingLevelChangeEntry { + const id = `test-id-${entryCounter++}`; + const entry: ThinkingLevelChangeEntry = { + type: "thinking_level_change", + id, + parentId: lastId, + timestamp: new Date().toISOString(), + thinkingLevel, + }; + lastId = id; + return entry; } // ============================================================================ @@ -248,78 +317,59 @@ describe("buildSessionContext", () => { }); it("should handle single compaction", () => { - // indices: 0=session, 1=u1, 2=a1, 3=u2, 4=a2, 5=compaction, 6=u3, 7=a3 - const entries: SessionEntry[] = [ - { - type: "session", - id: "1", - timestamp: "", - cwd: "", - }, - createMessageEntry(createUserMessage("1")), - createMessageEntry(createAssistantMessage("a")), - createMessageEntry(createUserMessage("2")), - createMessageEntry(createAssistantMessage("b")), - createCompactionEntry("Summary of 1,a,2,b", 3), // keep from index 3 (u2) onwards - createMessageEntry(createUserMessage("3")), - createMessageEntry(createAssistantMessage("c")), - ]; + // IDs: u1=test-id-0, a1=test-id-1, u2=test-id-2, a2=test-id-3, compaction=test-id-4, u3=test-id-5, a3=test-id-6 + const u1 = createMessageEntry(createUserMessage("1")); + const a1 = createMessageEntry(createAssistantMessage("a")); + const u2 = createMessageEntry(createUserMessage("2")); + const a2 = createMessageEntry(createAssistantMessage("b")); + const compaction = createCompactionEntry("Summary of 1,a,2,b", u2.id); // keep from u2 onwards + const u3 = createMessageEntry(createUserMessage("3")); + const a3 = createMessageEntry(createAssistantMessage("c")); + + const entries: SessionEntry[] = [createSessionHeader(), u1, a1, u2, a2, compaction, u3, a3]; const loaded = buildSessionContext(entries); - // summary + kept (u2,a2 from idx 3-4) + after (u3,a3 from idx 6-7) = 5 + // summary + kept (u2, a2) + after (u3, a3) = 5 expect(loaded.messages.length).toBe(5); expect(loaded.messages[0].role).toBe("user"); expect((loaded.messages[0] as any).content).toContain("Summary of 1,a,2,b"); }); it("should handle multiple compactions (only latest matters)", () => { - // indices: 0=session, 1=u1, 2=a1, 3=compact1, 4=u2, 5=b, 6=u3, 7=c, 8=compact2, 9=u4, 10=d - const entries: SessionEntry[] = [ - { - type: "session", - id: "1", - timestamp: "", - cwd: "", - }, - createMessageEntry(createUserMessage("1")), - createMessageEntry(createAssistantMessage("a")), - createCompactionEntry("First summary", 1), // keep from index 1 - createMessageEntry(createUserMessage("2")), - createMessageEntry(createAssistantMessage("b")), - createMessageEntry(createUserMessage("3")), - createMessageEntry(createAssistantMessage("c")), - createCompactionEntry("Second summary", 6), // keep from index 6 (u3) onwards - createMessageEntry(createUserMessage("4")), - createMessageEntry(createAssistantMessage("d")), - ]; + // First batch + const u1 = createMessageEntry(createUserMessage("1")); + const a1 = createMessageEntry(createAssistantMessage("a")); + const compact1 = createCompactionEntry("First summary", u1.id); + // Second batch + const u2 = createMessageEntry(createUserMessage("2")); + const b = createMessageEntry(createAssistantMessage("b")); + const u3 = createMessageEntry(createUserMessage("3")); + const c = createMessageEntry(createAssistantMessage("c")); + const compact2 = createCompactionEntry("Second summary", u3.id); // keep from u3 onwards + // After second compaction + const u4 = createMessageEntry(createUserMessage("4")); + const d = createMessageEntry(createAssistantMessage("d")); + + const entries: SessionEntry[] = [createSessionHeader(), u1, a1, compact1, u2, b, u3, c, compact2, u4, d]; const loaded = buildSessionContext(entries); - // summary + kept from idx 6 (u3,c) + after (u4,d) = 5 + // summary + kept from u3 (u3, c) + after (u4, d) = 5 expect(loaded.messages.length).toBe(5); expect((loaded.messages[0] as any).content).toContain("Second summary"); }); - it("should clamp firstKeptEntryIndex to valid range", () => { - // indices: 0=session, 1=u1, 2=a1, 3=compact1, 4=u2, 5=b, 6=compact2 - const entries: SessionEntry[] = [ - { - type: "session", - id: "1", - timestamp: "", - cwd: "", - }, - createMessageEntry(createUserMessage("1")), - createMessageEntry(createAssistantMessage("a")), - createCompactionEntry("First summary", 1), - createMessageEntry(createUserMessage("2")), - createMessageEntry(createAssistantMessage("b")), - createCompactionEntry("Second summary", 0), // index 0 is before compaction1, should still work - ]; + it("should keep all messages when firstKeptEntryId is first entry", () => { + const u1 = createMessageEntry(createUserMessage("1")); + const a1 = createMessageEntry(createAssistantMessage("a")); + const compact1 = createCompactionEntry("First summary", u1.id); // keep from first entry + const u2 = createMessageEntry(createUserMessage("2")); + const b = createMessageEntry(createAssistantMessage("b")); + + const entries: SessionEntry[] = [createSessionHeader(), u1, a1, compact1, u2, b]; const loaded = buildSessionContext(entries); - // Keeps from index 0, but compaction entries are skipped, so u1,a1,u2,b = 4 + summary = 5 - // Actually index 0 is session header, so messages are u1,a1,u2,b - expect(loaded.messages.length).toBe(5); // summary + 4 messages + // summary + all messages (u1, a1, u2, b) = 5 + expect(loaded.messages.length).toBe(5); }); it("should track model and thinking level changes", () => { @@ -331,9 +381,9 @@ describe("buildSessionContext", () => { cwd: "", }, createMessageEntry(createUserMessage("1")), - { type: "model_change", timestamp: "", provider: "openai", modelId: "gpt-4" }, + createModelChangeEntry("openai", "gpt-4"), createMessageEntry(createAssistantMessage("a")), - { type: "thinking_level_change", timestamp: "", thinkingLevel: "high" }, + createThinkingLevelEntry("high"), ]; const loaded = buildSessionContext(entries); @@ -380,27 +430,26 @@ describe("Large session fixture", () => { // ============================================================================ describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("LLM summarization", () => { - it("should generate a compaction event for the large session", async () => { + it("should generate a compaction result for the large session", async () => { const entries = loadLargeSessionEntries(); const model = getModel("anthropic", "claude-sonnet-4-5")!; - const compactionEvent = await compact( + const compactionResult = await compact( entries, model, DEFAULT_COMPACTION_SETTINGS, process.env.ANTHROPIC_OAUTH_TOKEN!, ); - expect(compactionEvent.type).toBe("compaction"); - expect(compactionEvent.summary.length).toBeGreaterThan(100); - expect(compactionEvent.firstKeptEntryIndex).toBeGreaterThan(0); - expect(compactionEvent.tokensBefore).toBeGreaterThan(0); + expect(compactionResult.summary.length).toBeGreaterThan(100); + expect(compactionResult.firstKeptEntryId).toBeTruthy(); + expect(compactionResult.tokensBefore).toBeGreaterThan(0); - console.log("Summary length:", compactionEvent.summary.length); - console.log("First kept entry index:", compactionEvent.firstKeptEntryIndex); - console.log("Tokens before:", compactionEvent.tokensBefore); + console.log("Summary length:", compactionResult.summary.length); + console.log("First kept entry ID:", compactionResult.firstKeptEntryId); + console.log("Tokens before:", compactionResult.tokensBefore); console.log("\n--- SUMMARY ---\n"); - console.log(compactionEvent.summary); + console.log(compactionResult.summary); }, 60000); it("should produce valid session after compaction", async () => { @@ -408,21 +457,30 @@ describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("LLM summarization", () => { const loaded = buildSessionContext(entries); const model = getModel("anthropic", "claude-sonnet-4-5")!; - const compactionEvent = await compact( + const compactionResult = await compact( entries, model, DEFAULT_COMPACTION_SETTINGS, process.env.ANTHROPIC_OAUTH_TOKEN!, ); - // Simulate appending compaction to entries - const newEntries = [...entries, compactionEvent]; + // Simulate appending compaction to entries by creating a proper entry + const lastEntry = entries[entries.length - 1]; + const parentId = lastEntry.type === "session" ? null : lastEntry.id; + const compactionEntry: CompactionEntry = { + type: "compaction", + id: "compaction-test-id", + parentId, + timestamp: new Date().toISOString(), + ...compactionResult, + }; + const newEntries = [...entries, compactionEntry]; const reloaded = buildSessionContext(newEntries); // Should have summary + kept messages expect(reloaded.messages.length).toBeLessThan(loaded.messages.length); expect(reloaded.messages[0].role).toBe("user"); - expect((reloaded.messages[0] as any).content).toContain(compactionEvent.summary); + expect((reloaded.messages[0] as any).content).toContain(compactionResult.summary); console.log("Original messages:", loaded.messages.length); console.log("After compaction:", reloaded.messages.length); diff --git a/packages/mom/src/context.ts b/packages/mom/src/context.ts index 00920ae4..2e24bf5a 100644 --- a/packages/mom/src/context.ts +++ b/packages/mom/src/context.ts @@ -15,10 +15,14 @@ import { buildSessionContext, type CompactionEntry, type LoadedSession, + type MessageContent, + type ModelChangeContent, type ModelChangeEntry, type SessionEntry, type SessionMessageEntry, type ThinkingLevelChangeEntry, + type ThinkingLevelContent, + type TreeNode, } from "@mariozechner/pi-coding-agent"; import { randomBytes } from "crypto"; import { appendFileSync, existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"; @@ -49,6 +53,7 @@ export class MomSessionManager { private channelDir: string; private flushed: boolean = false; private inMemoryEntries: SessionEntry[] = []; + private leafId: string | null = null; constructor(channelDir: string) { this.channelDir = channelDir; @@ -64,12 +69,14 @@ export class MomSessionManager { if (existsSync(this.contextFile)) { this.inMemoryEntries = this.loadEntriesFromFile(); this.sessionId = this.extractSessionId() || uuidv4(); + this._updateLeafId(); this.flushed = true; } else { this.sessionId = uuidv4(); this.inMemoryEntries = [ { type: "session", + version: 2, id: this.sessionId, timestamp: new Date().toISOString(), cwd: this.channelDir, @@ -79,6 +86,28 @@ export class MomSessionManager { // Note: syncFromLog() is called explicitly from agent.ts with excludeTimestamp } + private _updateLeafId(): void { + for (let i = this.inMemoryEntries.length - 1; i >= 0; i--) { + const entry = this.inMemoryEntries[i]; + if (entry.type !== "session") { + this.leafId = entry.id; + return; + } + } + this.leafId = null; + } + + private _createTreeNode(): TreeNode { + const id = uuidv4(); + const node: TreeNode = { + id, + parentId: this.leafId, + timestamp: new Date().toISOString(), + }; + this.leafId = id; + return node; + } + private _persist(entry: SessionEntry): void { const hasAssistant = this.inMemoryEntries.some((e) => e.type === "message" && e.message.role === "assistant"); if (!hasAssistant) return; @@ -206,11 +235,15 @@ export class MomSessionManager { newMessages.sort((a, b) => new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()); for (const { timestamp, message } of newMessages) { + const id = uuidv4(); const entry: SessionMessageEntry = { type: "message", + id, + parentId: this.leafId, timestamp, // Use log date as entry timestamp for consistent deduplication message, }; + this.leafId = id; this.inMemoryEntries.push(entry); appendFileSync(this.contextFile, `${JSON.stringify(entry)}\n`); @@ -247,32 +280,22 @@ export class MomSessionManager { } saveMessage(message: AppMessage): void { - const entry: SessionMessageEntry = { - type: "message", - timestamp: new Date().toISOString(), - message, - }; + const content: MessageContent = { type: "message", message }; + const entry: SessionMessageEntry = { ...this._createTreeNode(), ...content }; this.inMemoryEntries.push(entry); this._persist(entry); } saveThinkingLevelChange(thinkingLevel: string): void { - const entry: ThinkingLevelChangeEntry = { - type: "thinking_level_change", - timestamp: new Date().toISOString(), - thinkingLevel, - }; + const content: ThinkingLevelContent = { type: "thinking_level_change", thinkingLevel }; + const entry: ThinkingLevelChangeEntry = { ...this._createTreeNode(), ...content }; this.inMemoryEntries.push(entry); this._persist(entry); } saveModelChange(provider: string, modelId: string): void { - const entry: ModelChangeEntry = { - type: "model_change", - timestamp: new Date().toISOString(), - provider, - modelId, - }; + const content: ModelChangeContent = { type: "model_change", provider, modelId }; + const entry: ModelChangeEntry = { ...this._createTreeNode(), ...content }; this.inMemoryEntries.push(entry); this._persist(entry); }