From 3e1422d3d709749d1bd5d6d9cc2ce6bb5b36a694 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Mon, 25 Aug 2025 10:26:23 +0200 Subject: [PATCH] feat(ai): Add proper thinking support for Gemini 2.5 models - Added thinkingConfig with includeThoughts and thinkingBudget support - Use part.thought boolean flag to detect thinking content per API docs - Capture and preserve thought signatures for multi-turn function calling - Added supportsThinking() check for Gemini 2.5 series models - Updated example to demonstrate thinking configuration - Handle SDK type limitations with proper type assertions --- packages/ai/src/providers/gemini.ts | 60 +++++++++++++++++++++++------ packages/ai/test/examples/gemini.ts | 9 ++++- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/packages/ai/src/providers/gemini.ts b/packages/ai/src/providers/gemini.ts index a3910fb0..dbe26a41 100644 --- a/packages/ai/src/providers/gemini.ts +++ b/packages/ai/src/providers/gemini.ts @@ -13,6 +13,10 @@ import type { export interface GeminiLLMOptions extends LLMOptions { toolChoice?: "auto" | "none" | "any"; + thinking?: { + enabled: boolean; + budgetTokens?: number; // -1 for dynamic, 0 to disable + }; } export class GeminiLLM implements LLM { @@ -49,16 +53,29 @@ export class GeminiLLM implements LLM { const contents = this.convertMessages(context.messages); - const stream = await model.generateContentStream({ + const config: any = { contents, generationConfig: { temperature: options?.temperature, maxOutputTokens: options?.maxTokens, }, - }); + }; + + // Add thinking configuration if enabled + if (options?.thinking?.enabled && this.supportsThinking()) { + config.config = { + thinkingConfig: { + includeThoughts: true, + thinkingBudget: options.thinking.budgetTokens ?? -1, // Default to dynamic + }, + }; + } + + const stream = await model.generateContentStream(config); let content = ""; let thinking = ""; + let thoughtSignature: string | undefined; const toolCalls: ToolCall[] = []; let usage: TokenUsage = { input: 0, @@ -76,24 +93,30 @@ export class GeminiLLM implements LLM { const candidate = chunk.candidates?.[0]; if (candidate?.content?.parts) { for (const part of candidate.content.parts) { - if (part.text) { - // Check if it's thinking content - if ((part as any).thought) { - thinking += part.text; - options?.onThinking?.(part.text, false); - inThinkingBlock = true; + // Cast to any to access thinking properties not yet in SDK types + const partWithThinking = part as any; + if (partWithThinking.text !== undefined) { + // Check if it's thinking content using the thought boolean flag + if (partWithThinking.thought === true) { if (inTextBlock) { options?.onText?.("", true); inTextBlock = false; } + thinking += partWithThinking.text; + options?.onThinking?.(partWithThinking.text, false); + inThinkingBlock = true; + // Capture thought signature if present + if (partWithThinking.thoughtSignature) { + thoughtSignature = partWithThinking.thoughtSignature; + } } else { - content += part.text; - options?.onText?.(part.text, false); - inTextBlock = true; if (inThinkingBlock) { options?.onThinking?.("", true); inThinkingBlock = false; } + content += partWithThinking.text; + options?.onText?.(partWithThinking.text, false); + inTextBlock = true; } } @@ -146,6 +169,7 @@ export class GeminiLLM implements LLM { role: "assistant", content: content || undefined, thinking: thinking || undefined, + thinkingSignature: thoughtSignature, toolCalls: toolCalls.length > 0 ? toolCalls : undefined, model: this.model, usage, @@ -179,6 +203,15 @@ export class GeminiLLM implements LLM { } else if (msg.role === "assistant") { const parts: any[] = []; + // Add thinking if present (with thought signature for function calling) + if (msg.thinking && msg.thinkingSignature) { + parts.push({ + text: msg.thinking, + thought: true, + thoughtSignature: msg.thinkingSignature, + }); + } + if (msg.content) { parts.push({ text: msg.content }); } @@ -261,4 +294,9 @@ export class GeminiLLM implements LLM { return "stop"; } } + + private supportsThinking(): boolean { + // Gemini 2.5 series models support thinking + return this.model.includes("2.5") || this.model.includes("gemini-2"); + } } diff --git a/packages/ai/test/examples/gemini.ts b/packages/ai/test/examples/gemini.ts index 813358fb..9a5a2ab5 100644 --- a/packages/ai/test/examples/gemini.ts +++ b/packages/ai/test/examples/gemini.ts @@ -23,10 +23,15 @@ const tools: Tool[] = [ const options: GeminiLLMOptions = { onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")), onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))), - toolChoice: "auto" + toolChoice: "auto", + // Enable thinking for Gemini 2.5 models + thinking: { + enabled: true, + budgetTokens: -1 // Dynamic thinking + } }; -const ai = new GeminiLLM("gemini-2.0-flash-exp", process.env.GEMINI_API_KEY || "fake-api-key-for-testing"); +const ai = new GeminiLLM("gemini-2.5-flash", process.env.GEMINI_API_KEY || "fake-api-key-for-testing"); const context: Context = { systemPrompt: "You are a helpful assistant that can use tools to answer questions.", messages: [