Merge branch 'main' into fix/chutes-ai-provider-400-error

This commit is contained in:
butelo 2025-11-20 15:04:29 +01:00 committed by GitHub
commit b76f7a0f88
63 changed files with 4781 additions and 3540 deletions

View file

@ -1,6 +1,6 @@
{
"name": "@mariozechner/pi-ai",
"version": "0.7.10",
"version": "0.7.25",
"description": "Unified LLM API with automatic model discovery and provider configuration",
"type": "module",
"main": "./dist/index.js",
@ -21,7 +21,7 @@
},
"dependencies": {
"@anthropic-ai/sdk": "^0.61.0",
"@google/genai": "^1.17.0",
"@google/genai": "^1.30.0",
"@sinclair/typebox": "^0.34.41",
"ajv": "^8.17.1",
"ajv-formats": "^3.0.1",

View file

@ -295,7 +295,7 @@ async function generateModels() {
// Combine models (models.dev has priority)
const allModels = [...modelsDevModels, ...openRouterModels];
// Add missing gpt models (can't use tools)
// Add missing gpt models
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) {
allModels.push({
id: "gpt-5-chat-latest",
@ -316,6 +316,26 @@ async function generateModels() {
});
}
if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex")) {
allModels.push({
id: "gpt-5.1-codex",
name: "GPT-5.1 Codex",
api: "openai-responses",
baseUrl: "https://api.openai.com/v1",
provider: "openai",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 5,
cacheRead: 0.125,
cacheWrite: 1.25,
},
contextWindow: 400000,
maxTokens: 128000,
});
}
// Add missing Grok models
if (!allModels.some(m => m.provider === "xai" && m.id === "grok-code-fast-1")) {
allModels.push({

View file

@ -164,6 +164,9 @@ async function streamAssistantResponse(
} else {
context.messages.push(finalMessage);
}
if (!addedPartial) {
stream.push({ type: "message_start", message: { ...finalMessage } });
}
stream.push({ type: "message_end", message: finalMessage });
return finalMessage;
}

View file

@ -364,6 +364,23 @@ export const MODELS = {
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"google-generative-ai">,
"gemini-3-pro-preview": {
id: "gemini-3-pro-preview",
name: "Gemini 3 Pro Preview",
api: "google-generative-ai",
provider: "google",
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2,
output: 12,
cacheRead: 0.2,
cacheWrite: 0,
},
contextWindow: 1000000,
maxTokens: 64000,
} satisfies Model<"google-generative-ai">,
"gemini-2.5-flash": {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
@ -723,6 +740,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-responses">,
"gpt-5.1-codex": {
id: "gpt-5.1-codex",
name: "GPT-5.1 Codex",
api: "openai-responses",
provider: "openai",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"openai-responses">,
"gpt-4o-2024-08-06": {
id: "gpt-4o-2024-08-06",
name: "GPT-4o (2024-08-06)",
@ -791,6 +825,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-responses">,
"gpt-5.1-codex-mini": {
id: "gpt-5.1-codex-mini",
name: "GPT-5.1 Codex mini",
api: "openai-responses",
provider: "openai",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.25,
output: 2,
cacheRead: 0.025,
cacheWrite: 0,
},
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"openai-responses">,
"o3-mini": {
id: "o3-mini",
name: "o3-mini",
@ -1080,6 +1131,23 @@ export const MODELS = {
contextWindow: 400000,
maxTokens: 272000,
} satisfies Model<"openai-responses">,
"gpt-5.1-chat-latest": {
id: "gpt-5.1-chat-latest",
name: "GPT-5.1 Chat",
api: "openai-responses",
provider: "openai",
baseUrl: "https://api.openai.com/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-responses">,
"gpt-5-chat-latest": {
id: "gpt-5-chat-latest",
name: "GPT-5 Chat Latest",
@ -1562,6 +1630,23 @@ export const MODELS = {
contextWindow: 8192,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"grok-4.1-fast-non-reasoning": {
id: "grok-4.1-fast-non-reasoning",
name: "Grok 4.1 Fast (Non-Reasoning)",
api: "openai-completions",
provider: "xai",
baseUrl: "https://api.x.ai/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.2,
output: 0.5,
cacheRead: 0.05,
cacheWrite: 0,
},
contextWindow: 2000000,
maxTokens: 30000,
} satisfies Model<"openai-completions">,
"grok-3": {
id: "grok-3",
name: "Grok 3",
@ -1732,6 +1817,23 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"grok-4.1-fast": {
id: "grok-4.1-fast",
name: "Grok 4.1 Fast",
api: "openai-completions",
provider: "xai",
baseUrl: "https://api.x.ai/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.2,
output: 0.5,
cacheRead: 0.05,
cacheWrite: 0,
},
contextWindow: 2000000,
maxTokens: 30000,
} satisfies Model<"openai-completions">,
"grok-3-mini-latest": {
id: "grok-3-mini-latest",
name: "Grok 3 Mini Latest",
@ -1855,6 +1957,40 @@ export const MODELS = {
} satisfies Model<"anthropic-messages">,
},
openrouter: {
"x-ai/grok-4.1-fast": {
id: "x-ai/grok-4.1-fast",
name: "xAI: Grok 4.1 Fast",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 2000000,
maxTokens: 30000,
} satisfies Model<"openai-completions">,
"google/gemini-3-pro-preview": {
id: "google/gemini-3-pro-preview",
name: "Google: Gemini 3 Pro Preview",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 2,
output: 12,
cacheRead: 0.19999999999999998,
cacheWrite: 2.375,
},
contextWindow: 1048576,
maxTokens: 65536,
} satisfies Model<"openai-completions">,
"openai/gpt-5.1": {
id: "openai/gpt-5.1",
name: "OpenAI: GPT-5.1",
@ -1872,6 +2008,23 @@ export const MODELS = {
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"openai/gpt-5.1-chat": {
id: "openai/gpt-5.1-chat",
name: "OpenAI: GPT-5.1 Chat",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 1.25,
output: 10,
cacheRead: 0.125,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-5.1-codex": {
id: "openai/gpt-5.1-codex",
name: "OpenAI: GPT-5.1-Codex",
@ -1932,8 +2085,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.55,
output: 2.25,
input: 0.44999999999999996,
output: 2.35,
cacheRead: 0,
cacheWrite: 0,
},
@ -2127,40 +2280,6 @@ export const MODELS = {
contextWindow: 400000,
maxTokens: 128000,
} satisfies Model<"openai-completions">,
"inclusionai/ring-1t": {
id: "inclusionai/ring-1t",
name: "inclusionAI: Ring 1T",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text"],
cost: {
input: 0.5700000000000001,
output: 2.2800000000000002,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"inclusionai/ling-1t": {
id: "inclusionai/ling-1t",
name: "inclusionAI: Ling-1T",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.5700000000000001,
output: 2.2800000000000002,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"openai/o3-deep-research": {
id: "openai/o3-deep-research",
name: "OpenAI: o3 Deep Research",
@ -2391,13 +2510,13 @@ export const MODELS = {
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.22,
output: 0.88,
input: 0.21,
output: 1.9,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 4096,
contextWindow: 131072,
maxTokens: 32768,
} satisfies Model<"openai-completions">,
"qwen/qwen3-max": {
id: "qwen/qwen3-max",
@ -2765,13 +2884,13 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.09,
output: 0.3,
input: 0.051,
output: 0.33999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 262144,
maxTokens: 131072,
contextWindow: 32768,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"x-ai/grok-code-fast-1": {
id: "x-ai/grok-code-fast-1",
@ -3020,13 +3139,13 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0,
output: 0,
input: 0.04,
output: 0.39999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"openai/gpt-oss-120b:exacto": {
id: "openai/gpt-oss-120b:exacto",
@ -3470,23 +3589,6 @@ export const MODELS = {
contextWindow: 1000000,
maxTokens: 40000,
} satisfies Model<"openai-completions">,
"google/gemini-2.5-flash-lite-preview-06-17": {
id: "google/gemini-2.5-flash-lite-preview-06-17",
name: "Google: Gemini 2.5 Flash Lite Preview 06-17",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: true,
input: ["text", "image"],
cost: {
input: 0.09999999999999999,
output: 0.39999999999999997,
cacheRead: 0.024999999999999998,
cacheWrite: 0.18330000000000002,
},
contextWindow: 1048576,
maxTokens: 65535,
} satisfies Model<"openai-completions">,
"google/gemini-2.5-flash": {
id: "google/gemini-2.5-flash",
name: "Google: Gemini 2.5 Flash",
@ -3649,8 +3751,8 @@ export const MODELS = {
reasoning: true,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 1.75,
input: 0.19999999999999998,
output: 4.5,
cacheRead: 0,
cacheWrite: 0,
},
@ -3725,23 +3827,6 @@ export const MODELS = {
contextWindow: 200000,
maxTokens: 100000,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.3-8b-instruct:free": {
id: "meta-llama/llama-3.3-8b-instruct:free",
name: "Meta: Llama 3.3 8B Instruct (free)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4028,
} satisfies Model<"openai-completions">,
"nousresearch/deephermes-3-mistral-24b-preview": {
id: "nousresearch/deephermes-3-mistral-24b-preview",
name: "Nous: DeepHermes 3 Mistral 24B Preview",
@ -4082,23 +4167,6 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-maverick:free": {
id: "meta-llama/llama-4-maverick:free",
name: "Meta: Llama 4 Maverick (free)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4028,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-maverick": {
id: "meta-llama/llama-4-maverick",
name: "Meta: Llama 4 Maverick",
@ -4116,23 +4184,6 @@ export const MODELS = {
contextWindow: 1048576,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-scout:free": {
id: "meta-llama/llama-4-scout:free",
name: "Meta: Llama 4 Scout (free)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4028,
} satisfies Model<"openai-completions">,
"meta-llama/llama-4-scout": {
id: "meta-llama/llama-4-scout",
name: "Meta: Llama 4 Scout",
@ -4227,13 +4278,13 @@ export const MODELS = {
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.09,
output: 0.16,
input: 0.07,
output: 0.5,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 16384,
maxTokens: 131072,
} satisfies Model<"openai-completions">,
"qwen/qwq-32b": {
id: "qwen/qwq-32b",
@ -4779,23 +4830,6 @@ export const MODELS = {
contextWindow: 200000,
maxTokens: 8192,
} satisfies Model<"openai-completions">,
"mistralai/ministral-8b": {
id: "mistralai/ministral-8b",
name: "Mistral: Ministral 8B",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.09999999999999999,
output: 0.09999999999999999,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/ministral-3b": {
id: "mistralai/ministral-3b",
name: "Mistral: Ministral 3B",
@ -4813,6 +4847,23 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/ministral-8b": {
id: "mistralai/ministral-8b",
name: "Mistral: Ministral 8B",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.09999999999999999,
output: 0.09999999999999999,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"qwen/qwen-2.5-7b-instruct": {
id: "qwen/qwen-2.5-7b-instruct",
name: "Qwen: Qwen2.5 7B Instruct",
@ -4839,8 +4890,8 @@ export const MODELS = {
reasoning: false,
input: ["text"],
cost: {
input: 0.6,
output: 0.6,
input: 1.2,
output: 1.2,
cacheRead: 0,
cacheWrite: 0,
},
@ -5017,22 +5068,22 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-8b-instruct": {
id: "meta-llama/llama-3.1-8b-instruct",
name: "Meta: Llama 3.1 8B Instruct",
"meta-llama/llama-3.1-70b-instruct": {
id: "meta-llama/llama-3.1-70b-instruct",
name: "Meta: Llama 3.1 70B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.02,
output: 0.03,
input: 0.39999999999999997,
output: 0.39999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 16384,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-405b-instruct": {
id: "meta-llama/llama-3.1-405b-instruct",
@ -5051,22 +5102,22 @@ export const MODELS = {
contextWindow: 130815,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-70b-instruct": {
id: "meta-llama/llama-3.1-70b-instruct",
name: "Meta: Llama 3.1 70B Instruct",
"meta-llama/llama-3.1-8b-instruct": {
id: "meta-llama/llama-3.1-8b-instruct",
name: "Meta: Llama 3.1 8B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.39999999999999997,
output: 0.39999999999999997,
input: 0.02,
output: 0.03,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 131072,
maxTokens: 4096,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"mistralai/mistral-nemo": {
id: "mistralai/mistral-nemo",
@ -5085,23 +5136,6 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-mini-2024-07-18": {
id: "openai/gpt-4o-mini-2024-07-18",
name: "OpenAI: GPT-4o-mini (2024-07-18)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 0.15,
output: 0.6,
cacheRead: 0.075,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-mini": {
id: "openai/gpt-4o-mini",
name: "OpenAI: GPT-4o-mini",
@ -5119,22 +5153,22 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"anthropic/claude-3.5-sonnet-20240620": {
id: "anthropic/claude-3.5-sonnet-20240620",
name: "Anthropic: Claude 3.5 Sonnet (2024-06-20)",
"openai/gpt-4o-mini-2024-07-18": {
id: "openai/gpt-4o-mini-2024-07-18",
name: "OpenAI: GPT-4o-mini (2024-07-18)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 3,
output: 15,
cacheRead: 0.3,
cacheWrite: 3.75,
input: 0.15,
output: 0.6,
cacheRead: 0.075,
cacheWrite: 0,
},
contextWindow: 200000,
maxTokens: 8192,
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"sao10k/l3-euryale-70b": {
id: "sao10k/l3-euryale-70b",
@ -5221,23 +5255,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-2024-05-13": {
id: "openai/gpt-4o-2024-05-13",
name: "OpenAI: GPT-4o (2024-05-13)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4o": {
id: "openai/gpt-4o",
name: "OpenAI: GPT-4o",
@ -5272,6 +5289,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 64000,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-2024-05-13": {
id: "openai/gpt-4o-2024-05-13",
name: "OpenAI: GPT-4o (2024-05-13)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3-70b-instruct": {
id: "meta-llama/llama-3-70b-instruct",
name: "Meta: Llama 3 70B Instruct",
@ -5391,23 +5425,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo-0613": {
id: "openai/gpt-3.5-turbo-0613",
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 4095,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4-turbo-preview": {
id: "openai/gpt-4-turbo-preview",
name: "OpenAI: GPT-4 Turbo Preview",
@ -5425,6 +5442,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo-0613": {
id: "openai/gpt-3.5-turbo-0613",
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 4095,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-small": {
id: "mistralai/mistral-small",
name: "Mistral Small",
@ -5493,23 +5527,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-7b-instruct-v0.1": {
id: "mistralai/mistral-7b-instruct-v0.1",
name: "Mistral: Mistral 7B Instruct v0.1",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.11,
output: 0.19,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 2824,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo-16k": {
id: "openai/gpt-3.5-turbo-16k",
name: "OpenAI: GPT-3.5 Turbo 16k",
@ -5544,23 +5561,6 @@ export const MODELS = {
contextWindow: 8191,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4": {
id: "openai/gpt-4",
name: "OpenAI: GPT-4",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 30,
output: 60,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8191,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo": {
id: "openai/gpt-3.5-turbo",
name: "OpenAI: GPT-3.5 Turbo",
@ -5578,6 +5578,23 @@ export const MODELS = {
contextWindow: 16385,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4": {
id: "openai/gpt-4",
name: "OpenAI: GPT-4",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 30,
output: 60,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8191,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openrouter/auto": {
id: "openrouter/auto",
name: "OpenRouter: Auto Router",

View file

@ -460,11 +460,20 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
});
} else if (block.type === "thinking") {
if (block.thinking.trim().length === 0) continue;
blocks.push({
type: "thinking",
thinking: sanitizeSurrogates(block.thinking),
signature: block.thinkingSignature || "",
});
// If thinking signature is missing/empty (e.g., from aborted stream),
// convert to text block to avoid API rejection
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
blocks.push({
type: "text",
text: sanitizeSurrogates(`<thinking>\n${block.thinking}\n</thinking>`),
});
} else {
blocks.push({
type: "thinking",
thinking: sanitizeSurrogates(block.thinking),
signature: block.thinkingSignature,
});
}
} else if (block.type === "toolCall") {
blocks.push({
type: "tool_use",

View file

@ -162,6 +162,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
id: toolCallId,
name: part.functionCall.name || "",
arguments: part.functionCall.args as Record<string, any>,
...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
};
// Validate tool arguments if tool definition is available
@ -361,13 +362,17 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)
};
parts.push(thinkingPart);
} else if (block.type === "toolCall") {
parts.push({
const part: Part = {
functionCall: {
id: block.id,
name: block.name,
args: block.arguments,
},
});
};
if (block.thoughtSignature) {
part.thoughtSignature = block.thoughtSignature;
}
parts.push(part);
}
}

View file

@ -273,7 +273,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
stream_options: { include_usage: true },
};
// Cerebras/xAI/Mistral/Chutes dont like the "store" field
// Cerebras/xAI/Mistral dont like the "store" field
if (
!model.baseUrl.includes("cerebras.ai") &&
!model.baseUrl.includes("api.x.ai") &&
@ -284,8 +284,8 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
}
if (options?.maxTokens) {
// Mistral/Chutes use max_tokens instead of max_completion_tokens
if (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) {
// Mistral/Chutes uses max_tokens instead of max_completion_tokens
iif (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) {
(params as any).max_tokens = options?.maxTokens;
} else {
params.max_completion_tokens = options?.maxTokens;

View file

@ -74,6 +74,7 @@ export interface ToolCall {
id: string;
name: string;
arguments: Record<string, any>;
thoughtSignature?: string; // Google-specific: opaque signature for reusing thought context
}
export interface Usage {

View file

@ -0,0 +1,95 @@
import { type Static, Type } from "@sinclair/typebox";
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Context, Tool } from "../src/types.js";
// Simple read tool
const readSchema = Type.Object({
path: Type.String({ description: "Path to the file to read" }),
});
type ReadParams = Static<typeof readSchema>;
const readTool: Tool = {
name: "read",
description: "Read contents of a file",
parameters: readSchema,
};
describe("Google Thought Signature Tests", () => {
describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini 3 Pro - Text + Tool Call", () => {
const model = getModel("google", "gemini-3-pro-preview");
it("should handle text + tool call in same response and preserve thoughtSignature on subsequent requests", async () => {
// Create a prompt that encourages the model to generate text/thoughts AND a tool call
const context: Context = {
systemPrompt: "You are a helpful assistant. Think through your actions before using tools.",
messages: [],
tools: [readTool],
};
// Ask something that should trigger both explanation text and a tool call
context.messages.push({
role: "user",
content:
"I need you to read the file packages/coding-agent/CHANGELOG.md. First explain what you're going to do, then use the read tool.",
timestamp: Date.now(),
});
// Get first response - should contain text + tool call
const firstResponse = await complete(model, context);
console.log("First response:", JSON.stringify(firstResponse, null, 2));
// Verify it has both text and tool call
const hasText = firstResponse.content.some((b) => b.type === "text");
const hasToolCall = firstResponse.content.some((b) => b.type === "toolCall");
// If model didn't generate both, skip the test (model behavior varies)
if (!hasText || !hasToolCall) {
console.log("Model did not generate text + tool call in same response, skipping test");
return;
}
// Check if thoughtSignature was captured
const toolCall = firstResponse.content.find((b) => b.type === "toolCall");
if (toolCall && toolCall.type === "toolCall") {
console.log("Tool call thoughtSignature:", toolCall.thoughtSignature);
}
context.messages.push(firstResponse);
// Provide tool result
const toolCallBlock = firstResponse.content.find((b) => b.type === "toolCall");
if (!toolCallBlock || toolCallBlock.type !== "toolCall") {
throw new Error("Expected tool call");
}
context.messages.push({
role: "toolResult",
toolCallId: toolCallBlock.id,
toolName: toolCallBlock.name,
content: [{ type: "text", text: "# Changelog\n\n## [Unreleased]\n\n### Fixed\n\n- Some fix" }],
isError: false,
timestamp: Date.now(),
});
// Send follow-up message - this will convert the assistant message (with text + tool call)
// back to Google's format. If thoughtSignature is missing, Google will error.
context.messages.push({
role: "user",
content: "Great, now tell me what version is unreleased?",
timestamp: Date.now(),
});
// This is where the error would occur if thoughtSignature is not preserved
const secondResponse = await complete(model, context);
console.log("Second response:", JSON.stringify(secondResponse, null, 2));
// The request should succeed
expect(secondResponse.stopReason).not.toBe("error");
expect(secondResponse.errorMessage).toBeUndefined();
expect(secondResponse.content.length).toBeGreaterThan(0);
}, 30000);
});
});