feat(ai): Add proper thinking support for Gemini 2.5 models

- Added thinkingConfig with includeThoughts and thinkingBudget support
- Use part.thought boolean flag to detect thinking content per API docs
- Capture and preserve thought signatures for multi-turn function calling
- Added supportsThinking() check for Gemini 2.5 series models
- Updated example to demonstrate thinking configuration
- Handle SDK type limitations with proper type assertions
This commit is contained in:
Mario Zechner 2025-08-25 10:26:23 +02:00
parent a8ba19f0b4
commit 3e1422d3d7
2 changed files with 56 additions and 13 deletions

View file

@ -13,6 +13,10 @@ import type {
export interface GeminiLLMOptions extends LLMOptions { export interface GeminiLLMOptions extends LLMOptions {
toolChoice?: "auto" | "none" | "any"; toolChoice?: "auto" | "none" | "any";
thinking?: {
enabled: boolean;
budgetTokens?: number; // -1 for dynamic, 0 to disable
};
} }
export class GeminiLLM implements LLM<GeminiLLMOptions> { export class GeminiLLM implements LLM<GeminiLLMOptions> {
@ -49,16 +53,29 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
const contents = this.convertMessages(context.messages); const contents = this.convertMessages(context.messages);
const stream = await model.generateContentStream({ const config: any = {
contents, contents,
generationConfig: { generationConfig: {
temperature: options?.temperature, temperature: options?.temperature,
maxOutputTokens: options?.maxTokens, maxOutputTokens: options?.maxTokens,
}, },
}); };
// Add thinking configuration if enabled
if (options?.thinking?.enabled && this.supportsThinking()) {
config.config = {
thinkingConfig: {
includeThoughts: true,
thinkingBudget: options.thinking.budgetTokens ?? -1, // Default to dynamic
},
};
}
const stream = await model.generateContentStream(config);
let content = ""; let content = "";
let thinking = ""; let thinking = "";
let thoughtSignature: string | undefined;
const toolCalls: ToolCall[] = []; const toolCalls: ToolCall[] = [];
let usage: TokenUsage = { let usage: TokenUsage = {
input: 0, input: 0,
@ -76,24 +93,30 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
const candidate = chunk.candidates?.[0]; const candidate = chunk.candidates?.[0];
if (candidate?.content?.parts) { if (candidate?.content?.parts) {
for (const part of candidate.content.parts) { for (const part of candidate.content.parts) {
if (part.text) { // Cast to any to access thinking properties not yet in SDK types
// Check if it's thinking content const partWithThinking = part as any;
if ((part as any).thought) { if (partWithThinking.text !== undefined) {
thinking += part.text; // Check if it's thinking content using the thought boolean flag
options?.onThinking?.(part.text, false); if (partWithThinking.thought === true) {
inThinkingBlock = true;
if (inTextBlock) { if (inTextBlock) {
options?.onText?.("", true); options?.onText?.("", true);
inTextBlock = false; inTextBlock = false;
} }
thinking += partWithThinking.text;
options?.onThinking?.(partWithThinking.text, false);
inThinkingBlock = true;
// Capture thought signature if present
if (partWithThinking.thoughtSignature) {
thoughtSignature = partWithThinking.thoughtSignature;
}
} else { } else {
content += part.text;
options?.onText?.(part.text, false);
inTextBlock = true;
if (inThinkingBlock) { if (inThinkingBlock) {
options?.onThinking?.("", true); options?.onThinking?.("", true);
inThinkingBlock = false; inThinkingBlock = false;
} }
content += partWithThinking.text;
options?.onText?.(partWithThinking.text, false);
inTextBlock = true;
} }
} }
@ -146,6 +169,7 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
role: "assistant", role: "assistant",
content: content || undefined, content: content || undefined,
thinking: thinking || undefined, thinking: thinking || undefined,
thinkingSignature: thoughtSignature,
toolCalls: toolCalls.length > 0 ? toolCalls : undefined, toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
model: this.model, model: this.model,
usage, usage,
@ -179,6 +203,15 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
} else if (msg.role === "assistant") { } else if (msg.role === "assistant") {
const parts: any[] = []; const parts: any[] = [];
// Add thinking if present (with thought signature for function calling)
if (msg.thinking && msg.thinkingSignature) {
parts.push({
text: msg.thinking,
thought: true,
thoughtSignature: msg.thinkingSignature,
});
}
if (msg.content) { if (msg.content) {
parts.push({ text: msg.content }); parts.push({ text: msg.content });
} }
@ -261,4 +294,9 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
return "stop"; return "stop";
} }
} }
private supportsThinking(): boolean {
// Gemini 2.5 series models support thinking
return this.model.includes("2.5") || this.model.includes("gemini-2");
}
} }

View file

@ -23,10 +23,15 @@ const tools: Tool[] = [
const options: GeminiLLMOptions = { const options: GeminiLLMOptions = {
onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")), onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))), onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
toolChoice: "auto" toolChoice: "auto",
// Enable thinking for Gemini 2.5 models
thinking: {
enabled: true,
budgetTokens: -1 // Dynamic thinking
}
}; };
const ai = new GeminiLLM("gemini-2.0-flash-exp", process.env.GEMINI_API_KEY || "fake-api-key-for-testing"); const ai = new GeminiLLM("gemini-2.5-flash", process.env.GEMINI_API_KEY || "fake-api-key-for-testing");
const context: Context = { const context: Context = {
systemPrompt: "You are a helpful assistant that can use tools to answer questions.", systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
messages: [ messages: [