From 3e1422d3d709749d1bd5d6d9cc2ce6bb5b36a694 Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Mon, 25 Aug 2025 10:26:23 +0200
Subject: [PATCH] feat(ai): Add proper thinking support for Gemini 2.5 models

- Added thinkingConfig with includeThoughts and thinkingBudget support
- Use part.thought boolean flag to detect thinking content per API docs
- Capture and preserve thought signatures for multi-turn function calling
- Added supportsThinking() check for Gemini 2.5 series models
- Updated example to demonstrate thinking configuration
- Handle SDK type limitations with proper type assertions
---
 packages/ai/src/providers/gemini.ts | 60 +++++++++++++++++++++++------
 packages/ai/test/examples/gemini.ts |  9 ++++-
 2 files changed, 56 insertions(+), 13 deletions(-)
diff --git a/packages/ai/src/providers/gemini.ts b/packages/ai/src/providers/gemini.ts
index a3910fb0..dbe26a41 100644
--- a/packages/ai/src/providers/gemini.ts
+++ b/packages/ai/src/providers/gemini.ts
@@ -13,6 +13,10 @@ import type {
 
 export interface GeminiLLMOptions extends LLMOptions {
 	toolChoice?: "auto" | "none" | "any";
+	thinking?: {
+		enabled: boolean;
+		budgetTokens?: number; // -1 for dynamic, 0 to disable
+	};
 }
 
 export class GeminiLLM implements LLM<GeminiLLMOptions> {
@@ -49,16 +53,29 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
 
 			const contents = this.convertMessages(context.messages);
 
-			const stream = await model.generateContentStream({
+			const config: any = {
 				contents,
 				generationConfig: {
 					temperature: options?.temperature,
 					maxOutputTokens: options?.maxTokens,
 				},
-			});
+			};
+
+			// Add thinking configuration if enabled
+			if (options?.thinking?.enabled && this.supportsThinking()) {
+				config.config = {
+					thinkingConfig: {
+						includeThoughts: true,
+						thinkingBudget: options.thinking.budgetTokens ?? -1, // Default to dynamic
+					},
+				};
+			}
+
+			const stream = await model.generateContentStream(config);
 
 			let content = "";
 			let thinking = "";
+			let thoughtSignature: string | undefined;
 			const toolCalls: ToolCall[] = [];
 			let usage: TokenUsage = {
 				input: 0,
@@ -76,24 +93,30 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
 				const candidate = chunk.candidates?.[0];
 				if (candidate?.content?.parts) {
 					for (const part of candidate.content.parts) {
-						if (part.text) {
-							// Check if it's thinking content
-							if ((part as any).thought) {
-								thinking += part.text;
-								options?.onThinking?.(part.text, false);
-								inThinkingBlock = true;
+						// Cast to any to access thinking properties not yet in SDK types
+						const partWithThinking = part as any;
+						if (partWithThinking.text !== undefined) {
+							// Check if it's thinking content using the thought boolean flag
+							if (partWithThinking.thought === true) {
 								if (inTextBlock) {
 									options?.onText?.("", true);
 									inTextBlock = false;
 								}
+								thinking += partWithThinking.text;
+								options?.onThinking?.(partWithThinking.text, false);
+								inThinkingBlock = true;
+								// Capture thought signature if present
+								if (partWithThinking.thoughtSignature) {
+									thoughtSignature = partWithThinking.thoughtSignature;
+								}
 							} else {
-								content += part.text;
-								options?.onText?.(part.text, false);
-								inTextBlock = true;
 								if (inThinkingBlock) {
 									options?.onThinking?.("", true);
 									inThinkingBlock = false;
 								}
+								content += partWithThinking.text;
+								options?.onText?.(partWithThinking.text, false);
+								inTextBlock = true;
 							}
 						}
 
@@ -146,6 +169,7 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
 				role: "assistant",
 				content: content || undefined,
 				thinking: thinking || undefined,
+				thinkingSignature: thoughtSignature,
 				toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
 				model: this.model,
 				usage,
@@ -179,6 +203,15 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
 			} else if (msg.role === "assistant") {
 				const parts: any[] = [];
 
+				// Add thinking if present (with thought signature for function calling)
+				if (msg.thinking && msg.thinkingSignature) {
+					parts.push({
+						text: msg.thinking,
+						thought: true,
+						thoughtSignature: msg.thinkingSignature,
+					});
+				}
+
 				if (msg.content) {
 					parts.push({ text: msg.content });
 				}
@@ -261,4 +294,9 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
 				return "stop";
 		}
 	}
+
+	private supportsThinking(): boolean {
+		// Gemini 2.5 series models support thinking
+		return this.model.includes("2.5") || this.model.includes("gemini-2");
+	}
 }
diff --git a/packages/ai/test/examples/gemini.ts b/packages/ai/test/examples/gemini.ts
index 813358fb..9a5a2ab5 100644
--- a/packages/ai/test/examples/gemini.ts
+++ b/packages/ai/test/examples/gemini.ts
@@ -23,10 +23,15 @@ const tools: Tool[] = [
 const options: GeminiLLMOptions = {
     onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
     onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
-    toolChoice: "auto"
+    toolChoice: "auto",
+    // Enable thinking for Gemini 2.5 models
+    thinking: {
+        enabled: true,
+        budgetTokens: -1 // Dynamic thinking
+    }
 };
 
-const ai = new GeminiLLM("gemini-2.0-flash-exp", process.env.GEMINI_API_KEY || "fake-api-key-for-testing");
+const ai = new GeminiLLM("gemini-2.5-flash", process.env.GEMINI_API_KEY || "fake-api-key-for-testing");
 const context: Context = {
     systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
     messages: [