Add Gemini 3 preview models to google-gemini-cli provider

- Add gemini-3-pro-preview and gemini-3-flash-preview to Cloud Code Assist - Handle thinkingLevel config for Gemini 3 (vs thinkingBudget for Gemini 2.x) - Gemini 3 Pro: LOW/HIGH levels only - Gemini 3 Flash: all four levels (MINIMAL/LOW/MEDIUM/HIGH)
2026-04-15 11:02:17 +00:00 · 2025-12-20 22:10:47 -06:00 · 2025-12-20 22:10:47 -06:00 · ee9b498380
commit ee9b498380
parent 299986f06b
5 changed files with 107 additions and 4 deletions
--- a/packages/ai/scripts/generate-models.ts
+++ b/packages/ai/scripts/generate-models.ts
@ -522,6 +522,30 @@ async function generateModels() {
 			contextWindow: 1048576,
 			maxTokens: 8192,
 		},
+		{
+			id: "gemini-3-pro-preview",
+			name: "Gemini 3 Pro Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
+		{
+			id: "gemini-3-flash-preview",
+			name: "Gemini 3 Flash Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: CLOUD_CODE_ASSIST_ENDPOINT,
+			reasoning: true,
+			input: ["text", "image"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		},
 	];
 	allModels.push(...cloudCodeAssistModels);

--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@ -3225,7 +3225,7 @@ export const MODELS = {
 			cost: {
 				input: 0.24,
 				output: 0.38,
-				cacheRead: 0.11,
+				cacheRead: 0.02,
 				cacheWrite: 0,
 			},
 			contextWindow: 163840,
@ -6854,6 +6854,40 @@ export const MODELS = {
 			contextWindow: 1048576,
 			maxTokens: 8192,
 		} satisfies Model<"google-gemini-cli">,
+		"gemini-3-pro-preview": {
+			id: "gemini-3-pro-preview",
+			name: "Gemini 3 Pro Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: "https://cloudcode-pa.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		} satisfies Model<"google-gemini-cli">,
+		"gemini-3-flash-preview": {
+			id: "gemini-3-flash-preview",
+			name: "Gemini 3 Flash Preview (Cloud Code Assist)",
+			api: "google-gemini-cli",
+			provider: "google-gemini-cli",
+			baseUrl: "https://cloudcode-pa.googleapis.com",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 0,
+				output: 0,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 1048576,
+			maxTokens: 65535,
+		} satisfies Model<"google-gemini-cli">,
 	},
 	"google-antigravity": {
 		"gemini-3-pro-high": {
--- a/packages/ai/src/providers/google-gemini-cli.ts
+++ b/packages/ai/src/providers/google-gemini-cli.ts
@ -4,7 +4,7 @@
 * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
 */

-import type { Content, ThinkingConfig } from "@google/genai";
+import type { Content, ThinkingConfig, ThinkingLevel } from "@google/genai";
 import { calculateCost } from "../models.js";
 import type {
 	Api,
@ -26,6 +26,7 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
 	thinking?: {
 		enabled: boolean;
 		budgetTokens?: number;
+		level?: ThinkingLevel; // For Gemini 3 models
 	};
 	projectId?: string;
 }
@ -424,7 +425,10 @@ function buildRequest(
 		generationConfig.thinkingConfig = {
 			includeThoughts: true,
 		};
-		if (options.thinking.budgetTokens !== undefined) {
+		// Gemini 3 models use thinkingLevel, older models use thinkingBudget
+		if (options.thinking.level !== undefined) {
+			generationConfig.thinkingConfig.thinkingLevel = options.thinking.level;
+		} else if (options.thinking.budgetTokens !== undefined) {
 			generationConfig.thinkingConfig.thinkingBudget = options.thinking.budgetTokens;
 		}
 	}
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -237,12 +237,24 @@ function mapOptionsForApi<TApi extends Api>(
 		}

 		case "google-gemini-cli": {
-			// Cloud Code Assist uses thinking budget tokens like Gemini 2.5
 			if (!options?.reasoning) {
 				return { ...base, thinking: { enabled: false } } satisfies GoogleGeminiCliOptions;
 			}

 			const effort = clampReasoning(options.reasoning)!;
+
+			// Gemini 3 models use thinkingLevel instead of thinkingBudget
+			if (model.id.includes("3-pro") || model.id.includes("3-flash")) {
+				return {
+					...base,
+					thinking: {
+						enabled: true,
+						level: getGeminiCliThinkingLevel(effort, model.id),
+					},
+				} satisfies GoogleGeminiCliOptions;
+			}
+
+			// Gemini 2.x models use thinkingBudget
 			const budgets: Record<ClampedReasoningEffort, number> = {
 				minimal: 1024,
 				low: 2048,
@ -304,6 +316,31 @@ function getGemini3ThinkingLevel(effort: ClampedReasoningEffort, model: Model<"g
 	}
 }

+function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: string): ThinkingLevel {
+	if (modelId.includes("3-pro")) {
+		// Gemini 3 Pro only supports LOW/HIGH (for now)
+		switch (effort) {
+			case "minimal":
+			case "low":
+				return ThinkingLevel.LOW;
+			case "medium":
+			case "high":
+				return ThinkingLevel.HIGH;
+		}
+	}
+	// Gemini 3 Flash supports all four levels
+	switch (effort) {
+		case "minimal":
+			return ThinkingLevel.MINIMAL;
+		case "low":
+			return ThinkingLevel.LOW;
+		case "medium":
+			return ThinkingLevel.MEDIUM;
+		case "high":
+			return ThinkingLevel.HIGH;
+	}
+}
+
 function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedReasoningEffort): number {
 	// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
 	if (model.id.includes("2.5-pro")) {
--- a/packages/coding-agent/CHANGELOG.md
+++ b/packages/coding-agent/CHANGELOG.md
@ -2,6 +2,10 @@

 ## [Unreleased]

+### Added
+
+- **Gemini 3 preview models**: Added `gemini-3-pro-preview` and `gemini-3-flash-preview` to the google-gemini-cli provider.
+
 ## [0.25.2] - 2025-12-21

 ### Fixed