Merge branch 'main' into fix/chutes-ai-provider-400-error

2026-04-17 07:03:25 +00:00 · 2025-11-20 15:04:29 +01:00 · 2025-11-20 15:04:29 +01:00 · b76f7a0f88
commit b76f7a0f88
parent bd1731c9ba 4ccc7d47e0
63 changed files with 4781 additions and 3540 deletions
--- a/packages/ai/package.json
+++ b/packages/ai/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-ai",
-	"version": "0.7.10",
+	"version": "0.7.25",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./dist/index.js",
@ -21,7 +21,7 @@
 	},
 	"dependencies": {
 		"@anthropic-ai/sdk": "^0.61.0",
-		"@google/genai": "^1.17.0",
+		"@google/genai": "^1.30.0",
 		"@sinclair/typebox": "^0.34.41",
 		"ajv": "^8.17.1",
 		"ajv-formats": "^3.0.1",
--- a/packages/ai/scripts/generate-models.ts
+++ b/packages/ai/scripts/generate-models.ts
@ -295,7 +295,7 @@ async function generateModels() {
 	// Combine models (models.dev has priority)
 	const allModels = [...modelsDevModels, ...openRouterModels];

-	// Add missing gpt models (can't use tools)
+	// Add missing gpt models
 	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5-chat-latest")) {
 		allModels.push({
 			id: "gpt-5-chat-latest",
@ -316,6 +316,26 @@ async function generateModels() {
 		});
 	}

+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex")) {
+		allModels.push({
+			id: "gpt-5.1-codex",
+			name: "GPT-5.1 Codex",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 5,
+				cacheRead: 0.125,
+				cacheWrite: 1.25,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		});
+	}
+
 	// Add missing Grok models
 	if (!allModels.some(m => m.provider === "xai" && m.id === "grok-code-fast-1")) {
 		allModels.push({
--- a/packages/ai/src/agent/agent-loop.ts
+++ b/packages/ai/src/agent/agent-loop.ts
@ -164,6 +164,9 @@ async function streamAssistantResponse(
 				} else {
 					context.messages.push(finalMessage);
 				}
+				if (!addedPartial) {
+					stream.push({ type: "message_start", message: { ...finalMessage } });
+				}
 				stream.push({ type: "message_end", message: finalMessage });
 				return finalMessage;
 			}
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@ -364,6 +364,23 @@ export const MODELS = {
 			contextWindow: 1048576,
 			maxTokens: 65536,
 		} satisfies Model<"google-generative-ai">,
+		"gemini-3-pro-preview": {
+			id: "gemini-3-pro-preview",
+			name: "Gemini 3 Pro Preview",
+			api: "google-generative-ai",
+			provider: "google",
+			baseUrl: "https://generativelanguage.googleapis.com/v1beta",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 12,
+				cacheRead: 0.2,
+				cacheWrite: 0,
+			},
+			contextWindow: 1000000,
+			maxTokens: 64000,
+		} satisfies Model<"google-generative-ai">,
 		"gemini-2.5-flash": {
 			id: "gemini-2.5-flash",
 			name: "Gemini 2.5 Flash",
@ -723,6 +740,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-responses">,
+		"gpt-5.1-codex": {
+			id: "gpt-5.1-codex",
+			name: "GPT-5.1 Codex",
+			api: "openai-responses",
+			provider: "openai",
+			baseUrl: "https://api.openai.com/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-responses">,
 		"gpt-4o-2024-08-06": {
 			id: "gpt-4o-2024-08-06",
 			name: "GPT-4o (2024-08-06)",
@ -791,6 +825,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-responses">,
+		"gpt-5.1-codex-mini": {
+			id: "gpt-5.1-codex-mini",
+			name: "GPT-5.1 Codex mini",
+			api: "openai-responses",
+			provider: "openai",
+			baseUrl: "https://api.openai.com/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.25,
+				output: 2,
+				cacheRead: 0.025,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-responses">,
 		"o3-mini": {
 			id: "o3-mini",
 			name: "o3-mini",
@ -1080,6 +1131,23 @@ export const MODELS = {
 			contextWindow: 400000,
 			maxTokens: 272000,
 		} satisfies Model<"openai-responses">,
+		"gpt-5.1-chat-latest": {
+			id: "gpt-5.1-chat-latest",
+			name: "GPT-5.1 Chat",
+			api: "openai-responses",
+			provider: "openai",
+			baseUrl: "https://api.openai.com/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		} satisfies Model<"openai-responses">,
 		"gpt-5-chat-latest": {
 			id: "gpt-5-chat-latest",
 			name: "GPT-5 Chat Latest",
@ -1562,6 +1630,23 @@ export const MODELS = {
 			contextWindow: 8192,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
+		"grok-4.1-fast-non-reasoning": {
+			id: "grok-4.1-fast-non-reasoning",
+			name: "Grok 4.1 Fast (Non-Reasoning)",
+			api: "openai-completions",
+			provider: "xai",
+			baseUrl: "https://api.x.ai/v1",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.2,
+				output: 0.5,
+				cacheRead: 0.05,
+				cacheWrite: 0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 30000,
+		} satisfies Model<"openai-completions">,
 		"grok-3": {
 			id: "grok-3",
 			name: "Grok 3",
@ -1732,6 +1817,23 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
+		"grok-4.1-fast": {
+			id: "grok-4.1-fast",
+			name: "Grok 4.1 Fast",
+			api: "openai-completions",
+			provider: "xai",
+			baseUrl: "https://api.x.ai/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0.2,
+				output: 0.5,
+				cacheRead: 0.05,
+				cacheWrite: 0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 30000,
+		} satisfies Model<"openai-completions">,
 		"grok-3-mini-latest": {
 			id: "grok-3-mini-latest",
 			name: "Grok 3 Mini Latest",
@ -1855,6 +1957,40 @@ export const MODELS = {
 		} satisfies Model<"anthropic-messages">,
 	},
 	openrouter: {
+		"x-ai/grok-4.1-fast": {
+			id: "x-ai/grok-4.1-fast",
+			name: "xAI: Grok 4.1 Fast",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 2000000,
+			maxTokens: 30000,
+		} satisfies Model<"openai-completions">,
+		"google/gemini-3-pro-preview": {
+			id: "google/gemini-3-pro-preview",
+			name: "Google: Gemini 3 Pro Preview",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 2,
+				output: 12,
+				cacheRead: 0.19999999999999998,
+				cacheWrite: 2.375,
+			},
+			contextWindow: 1048576,
+			maxTokens: 65536,
+		} satisfies Model<"openai-completions">,
 		"openai/gpt-5.1": {
 			id: "openai/gpt-5.1",
 			name: "OpenAI: GPT-5.1",
@ -1872,6 +2008,23 @@ export const MODELS = {
 			contextWindow: 400000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-completions">,
+		"openai/gpt-5.1-chat": {
+			id: "openai/gpt-5.1-chat",
+			name: "OpenAI: GPT-5.1 Chat",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 16384,
+		} satisfies Model<"openai-completions">,
 		"openai/gpt-5.1-codex": {
 			id: "openai/gpt-5.1-codex",
 			name: "OpenAI: GPT-5.1-Codex",
@ -1932,8 +2085,8 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.55,
-				output: 2.25,
+				input: 0.44999999999999996,
+				output: 2.35,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@ -2127,40 +2280,6 @@ export const MODELS = {
 			contextWindow: 400000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-completions">,
-		"inclusionai/ring-1t": {
-			id: "inclusionai/ring-1t",
-			name: "inclusionAI: Ring 1T",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.5700000000000001,
-				output: 2.2800000000000002,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
-		"inclusionai/ling-1t": {
-			id: "inclusionai/ling-1t",
-			name: "inclusionAI: Ling-1T",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.5700000000000001,
-				output: 2.2800000000000002,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 131072,
-		} satisfies Model<"openai-completions">,
 		"openai/o3-deep-research": {
 			id: "openai/o3-deep-research",
 			name: "OpenAI: o3 Deep Research",
@ -2391,13 +2510,13 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text", "image"],
 			cost: {
-				input: 0.22,
-				output: 0.88,
+				input: 0.21,
+				output: 1.9,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 262144,
-			maxTokens: 4096,
+			contextWindow: 131072,
+			maxTokens: 32768,
 		} satisfies Model<"openai-completions">,
 		"qwen/qwen3-max": {
 			id: "qwen/qwen3-max",
@ -2765,13 +2884,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.09,
-				output: 0.3,
+				input: 0.051,
+				output: 0.33999999999999997,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 262144,
-			maxTokens: 131072,
+			contextWindow: 32768,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"x-ai/grok-code-fast-1": {
 			id: "x-ai/grok-code-fast-1",
@ -3020,13 +3139,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0,
-				output: 0,
+				input: 0.04,
+				output: 0.39999999999999997,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 4096,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
 		"openai/gpt-oss-120b:exacto": {
 			id: "openai/gpt-oss-120b:exacto",
@ -3470,23 +3589,6 @@ export const MODELS = {
 			contextWindow: 1000000,
 			maxTokens: 40000,
 		} satisfies Model<"openai-completions">,
-		"google/gemini-2.5-flash-lite-preview-06-17": {
-			id: "google/gemini-2.5-flash-lite-preview-06-17",
-			name: "Google: Gemini 2.5 Flash Lite Preview 06-17",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: true,
-			input: ["text", "image"],
-			cost: {
-				input: 0.09999999999999999,
-				output: 0.39999999999999997,
-				cacheRead: 0.024999999999999998,
-				cacheWrite: 0.18330000000000002,
-			},
-			contextWindow: 1048576,
-			maxTokens: 65535,
-		} satisfies Model<"openai-completions">,
 		"google/gemini-2.5-flash": {
 			id: "google/gemini-2.5-flash",
 			name: "Google: Gemini 2.5 Flash",
@ -3649,8 +3751,8 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.39999999999999997,
-				output: 1.75,
+				input: 0.19999999999999998,
+				output: 4.5,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@ -3725,23 +3827,6 @@ export const MODELS = {
 			contextWindow: 200000,
 			maxTokens: 100000,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3.3-8b-instruct:free": {
-			id: "meta-llama/llama-3.3-8b-instruct:free",
-			name: "Meta: Llama 3.3 8B Instruct (free)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4028,
-		} satisfies Model<"openai-completions">,
 		"nousresearch/deephermes-3-mistral-24b-preview": {
 			id: "nousresearch/deephermes-3-mistral-24b-preview",
 			name: "Nous: DeepHermes 3 Mistral 24B Preview",
@ -4082,23 +4167,6 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-4-maverick:free": {
-			id: "meta-llama/llama-4-maverick:free",
-			name: "Meta: Llama 4 Maverick (free)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4028,
-		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-4-maverick": {
 			id: "meta-llama/llama-4-maverick",
 			name: "Meta: Llama 4 Maverick",
@ -4116,23 +4184,6 @@ export const MODELS = {
 			contextWindow: 1048576,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-4-scout:free": {
-			id: "meta-llama/llama-4-scout:free",
-			name: "Meta: Llama 4 Scout (free)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0,
-				output: 0,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4028,
-		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-4-scout": {
 			id: "meta-llama/llama-4-scout",
 			name: "Meta: Llama 4 Scout",
@ -4227,13 +4278,13 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text", "image"],
 			cost: {
-				input: 0.09,
-				output: 0.16,
+				input: 0.07,
+				output: 0.5,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 16384,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
 		"qwen/qwq-32b": {
 			id: "qwen/qwq-32b",
@ -4779,23 +4830,6 @@ export const MODELS = {
 			contextWindow: 200000,
 			maxTokens: 8192,
 		} satisfies Model<"openai-completions">,
-		"mistralai/ministral-8b": {
-			id: "mistralai/ministral-8b",
-			name: "Mistral: Ministral 8B",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.09999999999999999,
-				output: 0.09999999999999999,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"mistralai/ministral-3b": {
 			id: "mistralai/ministral-3b",
 			name: "Mistral: Ministral 3B",
@ -4813,6 +4847,23 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
+		"mistralai/ministral-8b": {
+			id: "mistralai/ministral-8b",
+			name: "Mistral: Ministral 8B",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.09999999999999999,
+				output: 0.09999999999999999,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"qwen/qwen-2.5-7b-instruct": {
 			id: "qwen/qwen-2.5-7b-instruct",
 			name: "Qwen: Qwen2.5 7B Instruct",
@ -4839,8 +4890,8 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.6,
-				output: 0.6,
+				input: 1.2,
+				output: 1.2,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
@ -5017,22 +5068,22 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3.1-8b-instruct": {
-			id: "meta-llama/llama-3.1-8b-instruct",
-			name: "Meta: Llama 3.1 8B Instruct",
+		"meta-llama/llama-3.1-70b-instruct": {
+			id: "meta-llama/llama-3.1-70b-instruct",
+			name: "Meta: Llama 3.1 70B Instruct",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.02,
-				output: 0.03,
+				input: 0.39999999999999997,
+				output: 0.39999999999999997,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 16384,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3.1-405b-instruct": {
 			id: "meta-llama/llama-3.1-405b-instruct",
@ -5051,22 +5102,22 @@ export const MODELS = {
 			contextWindow: 130815,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3.1-70b-instruct": {
-			id: "meta-llama/llama-3.1-70b-instruct",
-			name: "Meta: Llama 3.1 70B Instruct",
+		"meta-llama/llama-3.1-8b-instruct": {
+			id: "meta-llama/llama-3.1-8b-instruct",
+			name: "Meta: Llama 3.1 8B Instruct",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.39999999999999997,
-				output: 0.39999999999999997,
+				input: 0.02,
+				output: 0.03,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 4096,
+			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-nemo": {
 			id: "mistralai/mistral-nemo",
@ -5085,23 +5136,6 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-4o-mini-2024-07-18": {
-			id: "openai/gpt-4o-mini-2024-07-18",
-			name: "OpenAI: GPT-4o-mini (2024-07-18)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 0.15,
-				output: 0.6,
-				cacheRead: 0.075,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 16384,
-		} satisfies Model<"openai-completions">,
 		"openai/gpt-4o-mini": {
 			id: "openai/gpt-4o-mini",
 			name: "OpenAI: GPT-4o-mini",
@ -5119,22 +5153,22 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"anthropic/claude-3.5-sonnet-20240620": {
-			id: "anthropic/claude-3.5-sonnet-20240620",
-			name: "Anthropic: Claude 3.5 Sonnet (2024-06-20)",
+		"openai/gpt-4o-mini-2024-07-18": {
+			id: "openai/gpt-4o-mini-2024-07-18",
+			name: "OpenAI: GPT-4o-mini (2024-07-18)",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
 			reasoning: false,
 			input: ["text", "image"],
 			cost: {
-				input: 3,
-				output: 15,
-				cacheRead: 0.3,
-				cacheWrite: 3.75,
+				input: 0.15,
+				output: 0.6,
+				cacheRead: 0.075,
+				cacheWrite: 0,
 			},
-			contextWindow: 200000,
-			maxTokens: 8192,
+			contextWindow: 128000,
+			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
 		"sao10k/l3-euryale-70b": {
 			id: "sao10k/l3-euryale-70b",
@ -5221,23 +5255,6 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-4o-2024-05-13": {
-			id: "openai/gpt-4o-2024-05-13",
-			name: "OpenAI: GPT-4o (2024-05-13)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 5,
-				output: 15,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"openai/gpt-4o": {
 			id: "openai/gpt-4o",
 			name: "OpenAI: GPT-4o",
@ -5272,6 +5289,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 64000,
 		} satisfies Model<"openai-completions">,
+		"openai/gpt-4o-2024-05-13": {
+			id: "openai/gpt-4o-2024-05-13",
+			name: "OpenAI: GPT-4o (2024-05-13)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 5,
+				output: 15,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3-70b-instruct": {
 			id: "meta-llama/llama-3-70b-instruct",
 			name: "Meta: Llama 3 70B Instruct",
@ -5391,23 +5425,6 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-3.5-turbo-0613": {
-			id: "openai/gpt-3.5-turbo-0613",
-			name: "OpenAI: GPT-3.5 Turbo (older v0613)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 1,
-				output: 2,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 4095,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"openai/gpt-4-turbo-preview": {
 			id: "openai/gpt-4-turbo-preview",
 			name: "OpenAI: GPT-4 Turbo Preview",
@ -5425,6 +5442,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
+		"openai/gpt-3.5-turbo-0613": {
+			id: "openai/gpt-3.5-turbo-0613",
+			name: "OpenAI: GPT-3.5 Turbo (older v0613)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 1,
+				output: 2,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 4095,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-small": {
 			id: "mistralai/mistral-small",
 			name: "Mistral Small",
@ -5493,23 +5527,6 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"mistralai/mistral-7b-instruct-v0.1": {
-			id: "mistralai/mistral-7b-instruct-v0.1",
-			name: "Mistral: Mistral 7B Instruct v0.1",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.11,
-				output: 0.19,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 2824,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"openai/gpt-3.5-turbo-16k": {
 			id: "openai/gpt-3.5-turbo-16k",
 			name: "OpenAI: GPT-3.5 Turbo 16k",
@ -5544,23 +5561,6 @@ export const MODELS = {
 			contextWindow: 8191,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-4": {
-			id: "openai/gpt-4",
-			name: "OpenAI: GPT-4",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 30,
-				output: 60,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 8191,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"openai/gpt-3.5-turbo": {
 			id: "openai/gpt-3.5-turbo",
 			name: "OpenAI: GPT-3.5 Turbo",
@ -5578,6 +5578,23 @@ export const MODELS = {
 			contextWindow: 16385,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
+		"openai/gpt-4": {
+			id: "openai/gpt-4",
+			name: "OpenAI: GPT-4",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 30,
+				output: 60,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 8191,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"openrouter/auto": {
 			id: "openrouter/auto",
 			name: "OpenRouter: Auto Router",
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@ -460,11 +460,20 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
 					});
 				} else if (block.type === "thinking") {
 					if (block.thinking.trim().length === 0) continue;
-					blocks.push({
-						type: "thinking",
-						thinking: sanitizeSurrogates(block.thinking),
-						signature: block.thinkingSignature || "",
-					});
+					// If thinking signature is missing/empty (e.g., from aborted stream),
+					// convert to text block to avoid API rejection
+					if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
+						blocks.push({
+							type: "text",
+							text: sanitizeSurrogates(`<thinking>\n${block.thinking}\n</thinking>`),
+						});
+					} else {
+						blocks.push({
+							type: "thinking",
+							thinking: sanitizeSurrogates(block.thinking),
+							signature: block.thinkingSignature,
+						});
+					}
 				} else if (block.type === "toolCall") {
 					blocks.push({
 						type: "tool_use",
--- a/packages/ai/src/providers/google.ts
+++ b/packages/ai/src/providers/google.ts
@ -162,6 +162,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
 								id: toolCallId,
 								name: part.functionCall.name || "",
 								arguments: part.functionCall.args as Record<string, any>,
+								...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
 							};

 							// Validate tool arguments if tool definition is available
@ -361,13 +362,17 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)
 					};
 					parts.push(thinkingPart);
 				} else if (block.type === "toolCall") {
-					parts.push({
+					const part: Part = {
 						functionCall: {
 							id: block.id,
 							name: block.name,
 							args: block.arguments,
 						},
-					});
+					};
+					if (block.thoughtSignature) {
+						part.thoughtSignature = block.thoughtSignature;
+					}
+					parts.push(part);
 				}
 			}

--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@ -273,7 +273,7 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
 		stream_options: { include_usage: true },
 	};

-	// Cerebras/xAI/Mistral/Chutes dont like the "store" field
+	// Cerebras/xAI/Mistral dont like the "store" field
 	if (
 		!model.baseUrl.includes("cerebras.ai") &&
 		!model.baseUrl.includes("api.x.ai") &&
@ -284,8 +284,8 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
 	}

 	if (options?.maxTokens) {
-		// Mistral/Chutes use max_tokens instead of max_completion_tokens
-		if (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) {
+		// Mistral/Chutes uses max_tokens instead of max_completion_tokens
+		iif (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) {
 			(params as any).max_tokens = options?.maxTokens;
 		} else {
 			params.max_completion_tokens = options?.maxTokens;
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@ -74,6 +74,7 @@ export interface ToolCall {
 	id: string;
 	name: string;
 	arguments: Record<string, any>;
+	thoughtSignature?: string; // Google-specific: opaque signature for reusing thought context
 }

 export interface Usage {
--- a/packages/ai/test/google-thought-signature.test.ts
+++ b/packages/ai/test/google-thought-signature.test.ts
@ -0,0 +1,95 @@
+import { type Static, Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete } from "../src/stream.js";
+import type { Context, Tool } from "../src/types.js";
+
+// Simple read tool
+const readSchema = Type.Object({
+	path: Type.String({ description: "Path to the file to read" }),
+});
+
+type ReadParams = Static<typeof readSchema>;
+
+const readTool: Tool = {
+	name: "read",
+	description: "Read contents of a file",
+	parameters: readSchema,
+};
+
+describe("Google Thought Signature Tests", () => {
+	describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini 3 Pro - Text + Tool Call", () => {
+		const model = getModel("google", "gemini-3-pro-preview");
+
+		it("should handle text + tool call in same response and preserve thoughtSignature on subsequent requests", async () => {
+			// Create a prompt that encourages the model to generate text/thoughts AND a tool call
+			const context: Context = {
+				systemPrompt: "You are a helpful assistant. Think through your actions before using tools.",
+				messages: [],
+				tools: [readTool],
+			};
+
+			// Ask something that should trigger both explanation text and a tool call
+			context.messages.push({
+				role: "user",
+				content:
+					"I need you to read the file packages/coding-agent/CHANGELOG.md. First explain what you're going to do, then use the read tool.",
+				timestamp: Date.now(),
+			});
+
+			// Get first response - should contain text + tool call
+			const firstResponse = await complete(model, context);
+			console.log("First response:", JSON.stringify(firstResponse, null, 2));
+
+			// Verify it has both text and tool call
+			const hasText = firstResponse.content.some((b) => b.type === "text");
+			const hasToolCall = firstResponse.content.some((b) => b.type === "toolCall");
+
+			// If model didn't generate both, skip the test (model behavior varies)
+			if (!hasText || !hasToolCall) {
+				console.log("Model did not generate text + tool call in same response, skipping test");
+				return;
+			}
+
+			// Check if thoughtSignature was captured
+			const toolCall = firstResponse.content.find((b) => b.type === "toolCall");
+			if (toolCall && toolCall.type === "toolCall") {
+				console.log("Tool call thoughtSignature:", toolCall.thoughtSignature);
+			}
+
+			context.messages.push(firstResponse);
+
+			// Provide tool result
+			const toolCallBlock = firstResponse.content.find((b) => b.type === "toolCall");
+			if (!toolCallBlock || toolCallBlock.type !== "toolCall") {
+				throw new Error("Expected tool call");
+			}
+
+			context.messages.push({
+				role: "toolResult",
+				toolCallId: toolCallBlock.id,
+				toolName: toolCallBlock.name,
+				content: [{ type: "text", text: "# Changelog\n\n## [Unreleased]\n\n### Fixed\n\n- Some fix" }],
+				isError: false,
+				timestamp: Date.now(),
+			});
+
+			// Send follow-up message - this will convert the assistant message (with text + tool call)
+			// back to Google's format. If thoughtSignature is missing, Google will error.
+			context.messages.push({
+				role: "user",
+				content: "Great, now tell me what version is unreleased?",
+				timestamp: Date.now(),
+			});
+
+			// This is where the error would occur if thoughtSignature is not preserved
+			const secondResponse = await complete(model, context);
+			console.log("Second response:", JSON.stringify(secondResponse, null, 2));
+
+			// The request should succeed
+			expect(secondResponse.stopReason).not.toBe("error");
+			expect(secondResponse.errorMessage).toBeUndefined();
+			expect(secondResponse.content.length).toBeGreaterThan(0);
+		}, 30000);
+	});
+});