mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-17 05:00:16 +00:00
feat(ai): Add proper thinking support for Gemini 2.5 models
- Added thinkingConfig with includeThoughts and thinkingBudget support - Use part.thought boolean flag to detect thinking content per API docs - Capture and preserve thought signatures for multi-turn function calling - Added supportsThinking() check for Gemini 2.5 series models - Updated example to demonstrate thinking configuration - Handle SDK type limitations with proper type assertions
This commit is contained in:
parent
a8ba19f0b4
commit
3e1422d3d7
2 changed files with 56 additions and 13 deletions
|
|
@ -13,6 +13,10 @@ import type {
|
||||||
|
|
||||||
export interface GeminiLLMOptions extends LLMOptions {
|
export interface GeminiLLMOptions extends LLMOptions {
|
||||||
toolChoice?: "auto" | "none" | "any";
|
toolChoice?: "auto" | "none" | "any";
|
||||||
|
thinking?: {
|
||||||
|
enabled: boolean;
|
||||||
|
budgetTokens?: number; // -1 for dynamic, 0 to disable
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export class GeminiLLM implements LLM<GeminiLLMOptions> {
|
export class GeminiLLM implements LLM<GeminiLLMOptions> {
|
||||||
|
|
@ -49,16 +53,29 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
|
||||||
|
|
||||||
const contents = this.convertMessages(context.messages);
|
const contents = this.convertMessages(context.messages);
|
||||||
|
|
||||||
const stream = await model.generateContentStream({
|
const config: any = {
|
||||||
contents,
|
contents,
|
||||||
generationConfig: {
|
generationConfig: {
|
||||||
temperature: options?.temperature,
|
temperature: options?.temperature,
|
||||||
maxOutputTokens: options?.maxTokens,
|
maxOutputTokens: options?.maxTokens,
|
||||||
},
|
},
|
||||||
});
|
};
|
||||||
|
|
||||||
|
// Add thinking configuration if enabled
|
||||||
|
if (options?.thinking?.enabled && this.supportsThinking()) {
|
||||||
|
config.config = {
|
||||||
|
thinkingConfig: {
|
||||||
|
includeThoughts: true,
|
||||||
|
thinkingBudget: options.thinking.budgetTokens ?? -1, // Default to dynamic
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const stream = await model.generateContentStream(config);
|
||||||
|
|
||||||
let content = "";
|
let content = "";
|
||||||
let thinking = "";
|
let thinking = "";
|
||||||
|
let thoughtSignature: string | undefined;
|
||||||
const toolCalls: ToolCall[] = [];
|
const toolCalls: ToolCall[] = [];
|
||||||
let usage: TokenUsage = {
|
let usage: TokenUsage = {
|
||||||
input: 0,
|
input: 0,
|
||||||
|
|
@ -76,24 +93,30 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
|
||||||
const candidate = chunk.candidates?.[0];
|
const candidate = chunk.candidates?.[0];
|
||||||
if (candidate?.content?.parts) {
|
if (candidate?.content?.parts) {
|
||||||
for (const part of candidate.content.parts) {
|
for (const part of candidate.content.parts) {
|
||||||
if (part.text) {
|
// Cast to any to access thinking properties not yet in SDK types
|
||||||
// Check if it's thinking content
|
const partWithThinking = part as any;
|
||||||
if ((part as any).thought) {
|
if (partWithThinking.text !== undefined) {
|
||||||
thinking += part.text;
|
// Check if it's thinking content using the thought boolean flag
|
||||||
options?.onThinking?.(part.text, false);
|
if (partWithThinking.thought === true) {
|
||||||
inThinkingBlock = true;
|
|
||||||
if (inTextBlock) {
|
if (inTextBlock) {
|
||||||
options?.onText?.("", true);
|
options?.onText?.("", true);
|
||||||
inTextBlock = false;
|
inTextBlock = false;
|
||||||
}
|
}
|
||||||
|
thinking += partWithThinking.text;
|
||||||
|
options?.onThinking?.(partWithThinking.text, false);
|
||||||
|
inThinkingBlock = true;
|
||||||
|
// Capture thought signature if present
|
||||||
|
if (partWithThinking.thoughtSignature) {
|
||||||
|
thoughtSignature = partWithThinking.thoughtSignature;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
content += part.text;
|
|
||||||
options?.onText?.(part.text, false);
|
|
||||||
inTextBlock = true;
|
|
||||||
if (inThinkingBlock) {
|
if (inThinkingBlock) {
|
||||||
options?.onThinking?.("", true);
|
options?.onThinking?.("", true);
|
||||||
inThinkingBlock = false;
|
inThinkingBlock = false;
|
||||||
}
|
}
|
||||||
|
content += partWithThinking.text;
|
||||||
|
options?.onText?.(partWithThinking.text, false);
|
||||||
|
inTextBlock = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -146,6 +169,7 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content: content || undefined,
|
content: content || undefined,
|
||||||
thinking: thinking || undefined,
|
thinking: thinking || undefined,
|
||||||
|
thinkingSignature: thoughtSignature,
|
||||||
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
|
||||||
model: this.model,
|
model: this.model,
|
||||||
usage,
|
usage,
|
||||||
|
|
@ -179,6 +203,15 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
|
||||||
} else if (msg.role === "assistant") {
|
} else if (msg.role === "assistant") {
|
||||||
const parts: any[] = [];
|
const parts: any[] = [];
|
||||||
|
|
||||||
|
// Add thinking if present (with thought signature for function calling)
|
||||||
|
if (msg.thinking && msg.thinkingSignature) {
|
||||||
|
parts.push({
|
||||||
|
text: msg.thinking,
|
||||||
|
thought: true,
|
||||||
|
thoughtSignature: msg.thinkingSignature,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if (msg.content) {
|
if (msg.content) {
|
||||||
parts.push({ text: msg.content });
|
parts.push({ text: msg.content });
|
||||||
}
|
}
|
||||||
|
|
@ -261,4 +294,9 @@ export class GeminiLLM implements LLM<GeminiLLMOptions> {
|
||||||
return "stop";
|
return "stop";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private supportsThinking(): boolean {
|
||||||
|
// Gemini 2.5 series models support thinking
|
||||||
|
return this.model.includes("2.5") || this.model.includes("gemini-2");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,10 +23,15 @@ const tools: Tool[] = [
|
||||||
const options: GeminiLLMOptions = {
|
const options: GeminiLLMOptions = {
|
||||||
onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
|
onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
|
||||||
onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
|
onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
|
||||||
toolChoice: "auto"
|
toolChoice: "auto",
|
||||||
|
// Enable thinking for Gemini 2.5 models
|
||||||
|
thinking: {
|
||||||
|
enabled: true,
|
||||||
|
budgetTokens: -1 // Dynamic thinking
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const ai = new GeminiLLM("gemini-2.0-flash-exp", process.env.GEMINI_API_KEY || "fake-api-key-for-testing");
|
const ai = new GeminiLLM("gemini-2.5-flash", process.env.GEMINI_API_KEY || "fake-api-key-for-testing");
|
||||||
const context: Context = {
|
const context: Context = {
|
||||||
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
|
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
|
||||||
messages: [
|
messages: [
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue