Merge PR #264: Add Gemini 3 preview models to google-gemini-cli provider

2026-04-20 20:01:06 +00:00 · 2025-12-21 20:31:19 +01:00 · 2025-12-21 20:31:19 +01:00 · 329b3a0a36
commit 329b3a0a36
parent ee9b498380 6683e06376
10 changed files with 166 additions and 38 deletions
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@ -6019,9 +6019,9 @@ export const MODELS = {
 			contextWindow: 32768,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"anthropic/claude-3.5-haiku": {
-			id: "anthropic/claude-3.5-haiku",
-			name: "Anthropic: Claude 3.5 Haiku",
+		"anthropic/claude-3.5-haiku-20241022": {
+			id: "anthropic/claude-3.5-haiku-20241022",
+			name: "Anthropic: Claude 3.5 Haiku (2024-10-22)",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
@ -6036,9 +6036,9 @@ export const MODELS = {
 			contextWindow: 200000,
 			maxTokens: 8192,
 		} satisfies Model<"openai-completions">,
-		"anthropic/claude-3.5-haiku-20241022": {
-			id: "anthropic/claude-3.5-haiku-20241022",
-			name: "Anthropic: Claude 3.5 Haiku (2024-10-22)",
+		"anthropic/claude-3.5-haiku": {
+			id: "anthropic/claude-3.5-haiku",
+			name: "Anthropic: Claude 3.5 Haiku",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
@ -6274,23 +6274,6 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3.1-405b-instruct": {
-			id: "meta-llama/llama-3.1-405b-instruct",
-			name: "Meta: Llama 3.1 405B Instruct",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 3.5,
-				output: 3.5,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 130815,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3.1-8b-instruct": {
 			id: "meta-llama/llama-3.1-8b-instruct",
 			name: "Meta: Llama 3.1 8B Instruct",
@ -6308,6 +6291,23 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
+		"meta-llama/llama-3.1-405b-instruct": {
+			id: "meta-llama/llama-3.1-405b-instruct",
+			name: "Meta: Llama 3.1 405B Instruct",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 3.5,
+				output: 3.5,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 130815,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3.1-70b-instruct": {
 			id: "meta-llama/llama-3.1-70b-instruct",
 			name: "Meta: Llama 3.1 70B Instruct",
--- a/packages/ai/src/providers/google-gemini-cli.ts
+++ b/packages/ai/src/providers/google-gemini-cli.ts
@ -23,10 +23,19 @@ import { convertMessages, convertTools, mapStopReasonString, mapToolChoice } fro

 export interface GoogleGeminiCliOptions extends StreamOptions {
 	toolChoice?: "auto" | "none" | "any";
+	/**
+	 * Thinking/reasoning configuration.
+	 * - Gemini 2.x models: use `budgetTokens` to set the thinking budget
+	 * - Gemini 3 models (gemini-3-pro-*, gemini-3-flash-*): use `level` instead
+	 *
+	 * When using `streamSimple`, this is handled automatically based on the model.
+	 */
 	thinking?: {
 		enabled: boolean;
+		/** Thinking budget in tokens. Use for Gemini 2.x models. */
 		budgetTokens?: number;
-		level?: ThinkingLevel; // For Gemini 3 models
+		/** Thinking level. Use for Gemini 3 models (LOW/HIGH for Pro, MINIMAL/LOW/MEDIUM/HIGH for Flash). */
+		level?: ThinkingLevel;
 	};
 	projectId?: string;
 }