Merge branch 'main' into fix/chutes-ai-provider-400-error

This commit is contained in:
butelo 2025-11-20 15:04:29 +01:00 committed by GitHub
commit b76f7a0f88
63 changed files with 4781 additions and 3540 deletions

View file

@ -164,6 +164,9 @@ async function streamAssistantResponse(
} else {
context.messages.push(finalMessage);
}
if (!addedPartial) {
stream.push({ type: "message_start", message: { ...finalMessage } });
}
stream.push({ type: "message_end", message: finalMessage });
return finalMessage;
}

View file

@ -364,6 +364,23 @@ export const MODELS = {
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"google-generative-ai">,
"gemini-3-pro-preview": {
id: "gemini-3-pro-preview",
name: "Gemini 3 Pro Preview",
api: "google-generative-ai",
provider: "google",
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2,
output: 12,
cacheRead: 0.2,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 64000,
} satisfies Model<"google-generative-ai">,
"gemini-2.5-flash": {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
@ -723,6 +740,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-responses">,
"gpt-5.1-codex": {
id: "gpt-5.1-codex",
name: "GPT-5.1 Codex",
api: "openai-responses",
provider: "openai",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"openai-responses">,
"gpt-4o-2024-08-06": {
id: "gpt-4o-2024-08-06",
name: "GPT-4o (2024-08-06)",
@ -791,6 +825,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-responses">,
"gpt-5.1-codex-mini": {
id: "gpt-5.1-codex-mini",
name: "GPT-5.1 Codex mini",
api: "openai-responses",
provider: "openai",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.25,
output: 2,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"openai-responses">,
"o3-mini": {
id: "o3-mini",
name: "o3-mini",
@ -1080,6 +1131,23 @@ export const MODELS = {
contextWindow: 400000,
maxTokens: 272000,
} satisfies Model<"openai-responses">,
"gpt-5.1-chat-latest": {
id: "gpt-5.1-chat-latest",
name: "GPT-5.1 Chat",
api: "openai-responses",
provider: "openai",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-responses">,
"gpt-5-chat-latest": {
id: "gpt-5-chat-latest",
name: "GPT-5 Chat Latest",
@ -1562,6 +1630,23 @@ export const MODELS = {
contextWindow: 8192,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"grok-4.1-fast-non-reasoning": {
id: "grok-4.1-fast-non-reasoning",
name: "Grok 4.1 Fast (Non-Reasoning)",
api: "openai-completions",
provider: "xai",
baseUrl: "https://api.x.ai/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.2,
output: 0.5,
cacheRead: 0.05,
cacheWrite: 0,
},
contextWindow: 2000000,
maxTokens: 30000,
} satisfies Model<"openai-completions">,
"grok-3": {
id: "grok-3",
name: "Grok 3",
@ -1732,6 +1817,23 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"grok-4.1-fast": {
id: "grok-4.1-fast",
name: "Grok 4.1 Fast",
api: "openai-completions",
provider: "xai",
baseUrl: "https://api.x.ai/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.2,
output: 0.5,
cacheRead: 0.05,
cacheWrite: 0,
},
contextWindow: 2000000,
maxTokens: 30000,
} satisfies Model<"openai-completions">,
"grok-3-mini-latest": {
id: "grok-3-mini-latest",
name: "Grok 3 Mini Latest",
@ -1855,6 +1957,40 @@ export const MODELS = {
} satisfies Model<"anthropic-messages">,
},
openrouter: {
"x-ai/grok-4.1-fast": {
id: "x-ai/grok-4.1-fast",
name: "xAI: Grok 4.1 Fast",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 2000000,
maxTokens: 30000,
} satisfies Model<"openai-completions">,
"google/gemini-3-pro-preview": {
id: "google/gemini-3-pro-preview",
name: "Google: Gemini 3 Pro Preview",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2,
output: 12,
cacheRead: 0.19999999999999998,
cacheWrite: 2.375,
},
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"openai-completions">,
"openai/gpt-5.1": {
id: "openai/gpt-5.1",
name: "OpenAI: GPT-5.1",
@ -1872,6 +2008,23 @@ export const MODELS = {
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"openai/gpt-5.1-chat": {
id: "openai/gpt-5.1-chat",
name: "OpenAI: GPT-5.1 Chat",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-5.1-codex": {
id: "openai/gpt-5.1-codex",
name: "OpenAI: GPT-5.1-Codex",
@ -1932,8 +2085,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.55,
output: 2.25,
input: 0.44999999999999996,
output: 2.35,
cacheRead: 0,
cacheWrite: 0,
},
@ -2127,40 +2280,6 @@ export const MODELS = {
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"inclusionai/ring-1t": {
id: "inclusionai/ring-1t",
name: "inclusionAI: Ring 1T",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0.5700000000000001,
output: 2.2800000000000002,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"inclusionai/ling-1t": {
id: "inclusionai/ling-1t",
name: "inclusionAI: Ling-1T",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.5700000000000001,
output: 2.2800000000000002,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"openai/o3-deep-research": {
id: "openai/o3-deep-research",
name: "OpenAI: o3 Deep Research",
@ -2391,13 +2510,13 @@ export const MODELS = {
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.22,
output: 0.88,
input: 0.21,
output: 1.9,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 4096,
contextWindow: 131072,
maxTokens: 32768,
} satisfies Model<"openai-completions">,
"qwen/qwen3-max": {
id: "qwen/qwen3-max",
@ -2765,13 +2884,13 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.09,
output: 0.3,
input: 0.051,
output: 0.33999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 131072,
contextWindow: 32768,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"x-ai/grok-code-fast-1": {
id: "x-ai/grok-code-fast-1",
@ -3020,13 +3139,13 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0,
output: 0,
input: 0.04,
output: 0.39999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"openai/gpt-oss-120b:exacto": {
id: "openai/gpt-oss-120b:exacto",
@ -3470,23 +3589,6 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 40000,
} satisfies Model<"openai-completions">,
"google/gemini-2.5-flash-lite-preview-06-17": {
id: "google/gemini-2.5-flash-lite-preview-06-17",
name: "Google: Gemini 2.5 Flash Lite Preview 06-17",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.09999999999999999,
output: 0.39999999999999997,
cacheRead: 0.024999999999999998,
cacheWrite: 0.18330000000000002,
},
contextWindow: 1048576,
maxTokens: 65535,
} satisfies Model<"openai-completions">,
"google/gemini-2.5-flash": {
id: "google/gemini-2.5-flash",
name: "Google: Gemini 2.5 Flash",
@ -3649,8 +3751,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 1.75,
input: 0.19999999999999998,
output: 4.5,
cacheRead: 0,
cacheWrite: 0,
},
@ -3725,23 +3827,6 @@ export const MODELS = {
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.3-8b-instruct:free": {
id: "meta-llama/llama-3.3-8b-instruct:free",
name: "Meta: Llama 3.3 8B Instruct (free)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4028,
} satisfies Model<"openai-completions">,
"nousresearch/deephermes-3-mistral-24b-preview": {
id: "nousresearch/deephermes-3-mistral-24b-preview",
name: "Nous: DeepHermes 3 Mistral 24B Preview",
@ -4082,23 +4167,6 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-maverick:free": {
id: "meta-llama/llama-4-maverick:free",
name: "Meta: Llama 4 Maverick (free)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4028,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-maverick": {
id: "meta-llama/llama-4-maverick",
name: "Meta: Llama 4 Maverick",
@ -4116,23 +4184,6 @@ export const MODELS = {
contextWindow: 1048576,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-scout:free": {
id: "meta-llama/llama-4-scout:free",
name: "Meta: Llama 4 Scout (free)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4028,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-scout": {
id: "meta-llama/llama-4-scout",
name: "Meta: Llama 4 Scout",
@ -4227,13 +4278,13 @@ export const MODELS = {
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.09,
output: 0.16,
input: 0.07,
output: 0.5,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 16384,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"qwen/qwq-32b": {
id: "qwen/qwq-32b",
@ -4779,23 +4830,6 @@ export const MODELS = {
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">,
"mistralai/ministral-8b": {
id: "mistralai/ministral-8b",
name: "Mistral: Ministral 8B",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.09999999999999999,
output: 0.09999999999999999,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/ministral-3b": {
id: "mistralai/ministral-3b",
name: "Mistral: Ministral 3B",
@ -4813,6 +4847,23 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/ministral-8b": {
id: "mistralai/ministral-8b",
name: "Mistral: Ministral 8B",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.09999999999999999,
output: 0.09999999999999999,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"qwen/qwen-2.5-7b-instruct": {
id: "qwen/qwen-2.5-7b-instruct",
name: "Qwen: Qwen2.5 7B Instruct",
@ -4839,8 +4890,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.6,
output: 0.6,
input: 1.2,
output: 1.2,
cacheRead: 0,
cacheWrite: 0,
},
@ -5017,22 +5068,22 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-8b-instruct": {
id: "meta-llama/llama-3.1-8b-instruct",
name: "Meta: Llama 3.1 8B Instruct",
"meta-llama/llama-3.1-70b-instruct": {
id: "meta-llama/llama-3.1-70b-instruct",
name: "Meta: Llama 3.1 70B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.02,
output: 0.03,
input: 0.39999999999999997,
output: 0.39999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 16384,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-405b-instruct": {
id: "meta-llama/llama-3.1-405b-instruct",
@ -5051,22 +5102,22 @@ export const MODELS = {
contextWindow: 130815,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-70b-instruct": {
id: "meta-llama/llama-3.1-70b-instruct",
name: "Meta: Llama 3.1 70B Instruct",
"meta-llama/llama-3.1-8b-instruct": {
id: "meta-llama/llama-3.1-8b-instruct",
name: "Meta: Llama 3.1 8B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 0.39999999999999997,
input: 0.02,
output: 0.03,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"mistralai/mistral-nemo": {
id: "mistralai/mistral-nemo",
@ -5085,23 +5136,6 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-mini-2024-07-18": {
id: "openai/gpt-4o-mini-2024-07-18",
name: "OpenAI: GPT-4o-mini (2024-07-18)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0.075,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-mini": {
id: "openai/gpt-4o-mini",
name: "OpenAI: GPT-4o-mini",
@ -5119,22 +5153,22 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"anthropic/claude-3.5-sonnet-20240620": {
id: "anthropic/claude-3.5-sonnet-20240620",
name: "Anthropic: Claude 3.5 Sonnet (2024-06-20)",
"openai/gpt-4o-mini-2024-07-18": {
id: "openai/gpt-4o-mini-2024-07-18",
name: "OpenAI: GPT-4o-mini (2024-07-18)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 3,
output: 15,
cacheRead: 0.3,
cacheWrite: 3.75,
input: 0.15,
output: 0.6,
cacheRead: 0.075,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 8192,
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"sao10k/l3-euryale-70b": {
id: "sao10k/l3-euryale-70b",
@ -5221,23 +5255,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-2024-05-13": {
id: "openai/gpt-4o-2024-05-13",
name: "OpenAI: GPT-4o (2024-05-13)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4o": {
id: "openai/gpt-4o",
name: "OpenAI: GPT-4o",
@ -5272,6 +5289,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 64000,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-2024-05-13": {
id: "openai/gpt-4o-2024-05-13",
name: "OpenAI: GPT-4o (2024-05-13)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3-70b-instruct": {
id: "meta-llama/llama-3-70b-instruct",
name: "Meta: Llama 3 70B Instruct",
@ -5391,23 +5425,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo-0613": {
id: "openai/gpt-3.5-turbo-0613",
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 4095,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4-turbo-preview": {
id: "openai/gpt-4-turbo-preview",
name: "OpenAI: GPT-4 Turbo Preview",
@ -5425,6 +5442,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo-0613": {
id: "openai/gpt-3.5-turbo-0613",
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 4095,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-small": {
id: "mistralai/mistral-small",
name: "Mistral Small",
@ -5493,23 +5527,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-7b-instruct-v0.1": {
id: "mistralai/mistral-7b-instruct-v0.1",
name: "Mistral: Mistral 7B Instruct v0.1",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.11,
output: 0.19,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 2824,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo-16k": {
id: "openai/gpt-3.5-turbo-16k",
name: "OpenAI: GPT-3.5 Turbo 16k",
@ -5544,23 +5561,6 @@ export const MODELS = {
contextWindow: 8191,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4": {
id: "openai/gpt-4",
name: "OpenAI: GPT-4",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 30,
output: 60,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8191,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo": {
id: "openai/gpt-3.5-turbo",
name: "OpenAI: GPT-3.5 Turbo",
@ -5578,6 +5578,23 @@ export const MODELS = {
contextWindow: 16385,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4": {
id: "openai/gpt-4",
name: "OpenAI: GPT-4",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 30,
output: 60,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8191,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openrouter/auto": {
id: "openrouter/auto",
name: "OpenRouter: Auto Router",

View file

@ -460,11 +460,20 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
});
} else if (block.type === "thinking") {
if (block.thinking.trim().length === 0) continue;
blocks.push({
type: "thinking",
thinking: sanitizeSurrogates(block.thinking),
signature: block.thinkingSignature || "",
});
// If thinking signature is missing/empty (e.g., from aborted stream),
// convert to text block to avoid API rejection
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
blocks.push({
type: "text",
text: sanitizeSurrogates(`<thinking>\n${block.thinking}\n</thinking>`),
});
} else {
blocks.push({
type: "thinking",
thinking: sanitizeSurrogates(block.thinking),
signature: block.thinkingSignature,
});
}
} else if (block.type === "toolCall") {
blocks.push({
type: "tool_use",

View file

@ -162,6 +162,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
id: toolCallId,
name: part.functionCall.name || "",
arguments: part.functionCall.args as Record<string, any>,
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
};
// Validate tool arguments if tool definition is available
@ -361,13 +362,17 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)
};
parts.push(thinkingPart);
} else if (block.type === "toolCall") {
parts.push({
const part: Part = {
functionCall: {
id: block.id,
name: block.name,
args: block.arguments,
},
});
};
if (block.thoughtSignature) {
part.thoughtSignature = block.thoughtSignature;
}
parts.push(part);
}
}

View file

@ -273,7 +273,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
stream_options: { include_usage: true },
};
// Cerebras/xAI/Mistral/Chutes dont like the "store" field
// Cerebras/xAI/Mistral dont like the "store" field
if (
!model.baseUrl.includes("cerebras.ai") &&
!model.baseUrl.includes("api.x.ai") &&
@ -284,8 +284,8 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
}
if (options?.maxTokens) {
// Mistral/Chutes use max_tokens instead of max_completion_tokens
if (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) {
// Mistral/Chutes uses max_tokens instead of max_completion_tokens
iif (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) {
(params as any).max_tokens = options?.maxTokens;
} else {
params.max_completion_tokens = options?.maxTokens;

View file

@ -74,6 +74,7 @@ export interface ToolCall {
id: string;
name: string;
arguments: Record<string, any>;
thoughtSignature?: string; // Google-specific: opaque signature for reusing thought context
}
export interface Usage {