feat: add thinkingBudgets option to customize token budgets

2026-04-18 08:02:48 +00:00 · 2026-01-07 15:13:26 +03:00 · 2026-01-07 15:13:26 +03:00 · 0f27eae77e
commit 0f27eae77e
parent 10e651f99b
8 changed files with 75 additions and 7 deletions
--- a/packages/agent/CHANGELOG.md
+++ b/packages/agent/CHANGELOG.md
@ -2,6 +2,10 @@
 ## [Unreleased]
 ### Added
 - `thinkingBudgets` option on `Agent` and `AgentOptions` to customize token budgets per thinking level ([#521](https://github.com/badlogic/pi-mono/issues/521))
 ## [0.37.8] - 2026-01-07
 ## [0.37.7] - 2026-01-07
--- a/packages/agent/src/agent.ts
+++ b/packages/agent/src/agent.ts
@ -10,6 +10,7 @@ import {
 	type Model,
 	streamSimple,
 	type TextContent,
 	type ThinkingBudgets,
 } from "@mariozechner/pi-ai";
 import { agentLoop, agentLoopContinue } from "./agent-loop.js";
 import type {
@ -71,6 +72,11 @@ export interface AgentOptions {
 	 * Useful for expiring tokens (e.g., GitHub Copilot OAuth).
 	 */
 	getApiKey?: (provider: string) => Promise<string | undefined> | string | undefined;
 	/**
 	 * Custom token budgets for thinking levels (token-based providers only).
 	 */
 	thinkingBudgets?: ThinkingBudgets;
 }
 export class Agent {
@ -99,6 +105,7 @@ export class Agent {
 	public getApiKey?: (provider: string) => Promise<string | undefined> | string | undefined;
 	private runningPrompt?: Promise<void>;
 	private resolveRunningPrompt?: () => void;
 	private _thinkingBudgets?: ThinkingBudgets;
 	constructor(opts: AgentOptions = {}) {
 		this._state = { ...this._state, ...opts.initialState };
@ -109,6 +116,7 @@ export class Agent {
 		this.streamFn = opts.streamFn || streamSimple;
 		this._sessionId = opts.sessionId;
 		this.getApiKey = opts.getApiKey;
 		this._thinkingBudgets = opts.thinkingBudgets;
 	}
 	/**
@ -126,6 +134,20 @@ export class Agent {
 		this._sessionId = value;
 	}
 	/**
 	 * Get the current thinking budgets.
 	 */
 	get thinkingBudgets(): ThinkingBudgets | undefined {
 		return this._thinkingBudgets;
 	}
 	/**
 	 * Set custom thinking budgets for token-based providers.
 	 */
 	set thinkingBudgets(value: ThinkingBudgets | undefined) {
 		this._thinkingBudgets = value;
 	}
 	get state(): AgentState {
 		return this._state;
 	}
@ -310,6 +332,7 @@ export class Agent {
 			model,
 			reasoning,
 			sessionId: this._sessionId,
 			thinkingBudgets: this._thinkingBudgets,
 			convertToLlm: this.convertToLlm,
 			transformContext: this.transformContext,
 			getApiKey: this.getApiKey,
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@ -2,6 +2,10 @@
 ## [Unreleased]
 ### Added
 - `thinkingBudgets` option in `SimpleStreamOptions` for customizing token budgets per thinking level on token-based providers ([#521](https://github.com/badlogic/pi-mono/issues/521))
 ## [0.37.8] - 2026-01-07
 ## [0.37.7] - 2026-01-07
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -22,6 +22,7 @@ import type {
 	Model,
 	OptionsForApi,
 	SimpleStreamOptions,
 	ThinkingBudgets,
 	ThinkingLevel,
 } from "./types.js";
@ -192,15 +193,17 @@ function mapOptionsForApi<TApi extends Api>(
 			// Claude requires max_tokens > thinking.budget_tokens
 			// So we need to ensure maxTokens accounts for both thinking and output
-			const anthropicBudgets = {
+			const defaultBudgets: ThinkingBudgets = {
 				minimal: 1024,
 				low: 2048,
 				medium: 8192,
 				high: 16384,
 			};
 			const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
 			const minOutputTokens = 1024;
-			let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!];
+			const level = clampReasoning(options.reasoning)!;
 			let thinkingBudget = budgets[level]!;
 			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
 			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
@ -261,7 +264,7 @@ function mapOptionsForApi<TApi extends Api>(
 				...base,
 				thinking: {
 					enabled: true,
-					budgetTokens: getGoogleBudget(googleModel, effort),
+					budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
 				},
 			} satisfies GoogleOptions;
 		}
@ -287,15 +290,16 @@ function mapOptionsForApi<TApi extends Api>(
 			// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
 			// Claude requires max_tokens > thinking.budget_tokens
 			// So we need to ensure maxTokens accounts for both thinking and output
-			const budgets: Record<ClampedThinkingLevel, number> = {
+			const defaultBudgets: ThinkingBudgets = {
 				minimal: 1024,
 				low: 2048,
 				medium: 8192,
 				high: 16384,
 			};
 			const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
 			const minOutputTokens = 1024;
-			let thinkingBudget = budgets[effort];
+			let thinkingBudget = budgets[effort]!;
 			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
 			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
@ -338,7 +342,7 @@ function mapOptionsForApi<TApi extends Api>(
 				...base,
 				thinking: {
 					enabled: true,
-					budgetTokens: getGoogleBudget(geminiModel, effort),
+					budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
 				},
 			} satisfies GoogleVertexOptions;
 		}
@ -416,7 +420,16 @@ function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string
 	}
 }
-function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedThinkingLevel): number {
+function getGoogleBudget(
 	model: Model<"google-generative-ai">,
 	effort: ClampedThinkingLevel,
 	customBudgets?: ThinkingBudgets,
 ): number {
 	// Custom budgets take precedence if provided for this level
 	if (customBudgets?.[effort] !== undefined) {
 		return customBudgets[effort]!;
 	}
 	// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
 	if (model.id.includes("2.5-pro")) {
 		const budgets: Record<ClampedThinkingLevel, number> = {
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@ -58,6 +58,14 @@ export type Provider = KnownProvider | string;
 export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
 /** Token budgets for each thinking level (token-based providers only) */
 export interface ThinkingBudgets {
 	minimal?: number;
 	low?: number;
 	medium?: number;
 	high?: number;
 }
 // Base options all providers share
 export interface StreamOptions {
 	temperature?: number;
@ -75,6 +83,8 @@ export interface StreamOptions {
 // Unified options with reasoning passed to streamSimple() and completeSimple()
 export interface SimpleStreamOptions extends StreamOptions {
 	reasoning?: ThinkingLevel;
 	/** Custom token budgets for thinking levels (token-based providers only) */
 	thinkingBudgets?: ThinkingBudgets;
 }
 // Generic StreamFunction with typed options
--- a/packages/coding-agent/CHANGELOG.md
+++ b/packages/coding-agent/CHANGELOG.md
@ -5,6 +5,7 @@
 ### Added
 - Extension UI dialogs (`ctx.ui.select()`, `ctx.ui.confirm()`, `ctx.ui.input()`) now support a `timeout` option that auto-dismisses the dialog with a live countdown display. Simpler alternative to `AbortSignal` for timed dialogs.
 - `thinkingBudgets` setting to customize token budgets per thinking level for token-based providers ([#521](https://github.com/badlogic/pi-mono/issues/521))
 ## [0.37.8] - 2026-01-07
--- a/packages/coding-agent/src/core/sdk.ts
+++ b/packages/coding-agent/src/core/sdk.ts
@ -662,6 +662,7 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 			: undefined,
 		steeringMode: settingsManager.getSteeringMode(),
 		followUpMode: settingsManager.getFollowUpMode(),
 		thinkingBudgets: settingsManager.getThinkingBudgets(),
 		getApiKey: async () => {
 			const currentModel = agent.state.model;
 			if (!currentModel) {
--- a/packages/coding-agent/src/core/settings-manager.ts
+++ b/packages/coding-agent/src/core/settings-manager.ts
@ -39,6 +39,13 @@ export interface ImageSettings {
 	blockImages?: boolean; // default: false - when true, prevents all images from being sent to LLM providers
 }
 export interface ThinkingBudgetsSettings {
 	minimal?: number;
 	low?: number;
 	medium?: number;
 	high?: number;
 }
 export interface Settings {
 	lastChangelogVersion?: string;
 	defaultProvider?: string;
@ -59,6 +66,7 @@ export interface Settings {
 	images?: ImageSettings;
 	enabledModels?: string[]; // Model patterns for cycling (same format as --models CLI flag)
 	doubleEscapeAction?: "branch" | "tree"; // Action for double-escape with empty editor (default: "tree")
 	thinkingBudgets?: ThinkingBudgetsSettings; // Custom token budgets for thinking levels
 }
 /** Deep merge settings: project/overrides take precedence, nested objects merge recursively */
@ -381,6 +389,10 @@ export class SettingsManager {
 		};
 	}
 	getThinkingBudgets(): ThinkingBudgetsSettings | undefined {
 		return this.settings.thinkingBudgets;
 	}
 	getShowImages(): boolean {
 		return this.settings.terminal?.showImages ?? true;
 	}