fix(ai): ensure maxTokens > thinkingBudget for Claude thinking models

Claude requires max_tokens > thinking.budget_tokens. When caller specifies a small maxTokens (e.g. compaction with ~13k tokens) and reasoning is enabled with high budget (16k tokens), the constraint was violated. Fix: In mapOptionsForApi, add thinkingBudget on top of caller's maxTokens (capped at model.maxTokens). If still not enough room, reduce thinkingBudget to leave space for output. Applied to both anthropic-messages and google-gemini-cli APIs. Also adds test utilities for OAuth credential resolution and tests for compaction with thinking models. fixes #413
2026-04-16 22:03:45 +00:00 · 2026-01-03 02:45:30 +01:00 · 2026-01-03 02:45:30 +01:00 · 8df22faedf
commit 8df22faedf
parent 97af788344
4 changed files with 347 additions and 7 deletions
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -159,6 +159,8 @@ function mapOptionsForApi<TApi extends Api>(
 				return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
 			}
 			// Claude requires max_tokens > thinking.budget_tokens
 			// So we need to ensure maxTokens accounts for both thinking and output
 			const anthropicBudgets = {
 				minimal: 1024,
 				low: 2048,
@ -166,10 +168,21 @@ function mapOptionsForApi<TApi extends Api>(
 				high: 16384,
 			};
 			const minOutputTokens = 1024;
 			let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!];
 			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
 			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
 			// If not enough room for thinking + output, reduce thinking budget
 			if (maxTokens <= thinkingBudget) {
 				thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
 			}
 			return {
 				...base,
 				maxTokens,
 				thinkingEnabled: true,
-				thinkingBudgetTokens: anthropicBudgets[clampReasoning(options.reasoning)!],
+				thinkingBudgetTokens: thinkingBudget,
 			} satisfies AnthropicOptions;
 		}
@ -234,7 +247,9 @@ function mapOptionsForApi<TApi extends Api>(
 				} satisfies GoogleGeminiCliOptions;
 			}
-			// Gemini 2.x models use thinkingBudget
+			// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
 			// Claude requires max_tokens > thinking.budget_tokens
 			// So we need to ensure maxTokens accounts for both thinking and output
 			const budgets: Record<ClampedReasoningEffort, number> = {
 				minimal: 1024,
 				low: 2048,
@ -242,11 +257,22 @@ function mapOptionsForApi<TApi extends Api>(
 				high: 16384,
 			};
 			const minOutputTokens = 1024;
 			let thinkingBudget = budgets[effort];
 			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
 			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
 			// If not enough room for thinking + output, reduce thinking budget
 			if (maxTokens <= thinkingBudget) {
 				thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
 			}
 			return {
 				...base,
 				maxTokens,
 				thinking: {
 					enabled: true,
-					budgetTokens: budgets[effort],
+					budgetTokens: thinkingBudget,
 				},
 			} satisfies GoogleGeminiCliOptions;
 		}
--- a/packages/coding-agent/README.md
+++ b/packages/coding-agent/README.md
@ -586,6 +586,7 @@ Global `~/.pi/agent/settings.json` stores persistent preferences:
 | `retry.baseDelayMs` | Base delay for exponential backoff | `2000` |
 | `terminal.showImages` | Render images inline (supported terminals) | `true` |
 | `images.autoResize` | Auto-resize images to 2000x2000 max for better model compatibility | `true` |
 | `doubleEscapeAction` | Action for double-escape with empty editor: `tree` or `branch` | `tree` |
 | `hooks` | Additional hook file paths | `[]` |
 | `customTools` | Additional custom tool file paths | `[]` |
--- a/packages/coding-agent/test/compaction-thinking-model.test.ts
+++ b/packages/coding-agent/test/compaction-thinking-model.test.ts
@ -0,0 +1,213 @@
 /**
 * Test for compaction with thinking models.
 *
 * Tests both:
 * - Claude via Antigravity (google-gemini-cli API)
 * - Claude via real Anthropic API (anthropic-messages API)
 *
 * Reproduces issue where compact fails when maxTokens < thinkingBudget.
 */
 import { existsSync, mkdirSync, rmSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { Agent, type ThinkingLevel } from "@mariozechner/pi-agent-core";
 import { getModel, type Model } from "@mariozechner/pi-ai";
 import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
 import { AgentSession } from "../src/core/agent-session.js";
 import { ModelRegistry } from "../src/core/model-registry.js";
 import { SessionManager } from "../src/core/session-manager.js";
 import { SettingsManager } from "../src/core/settings-manager.js";
 import { codingTools } from "../src/core/tools/index.js";
 import { API_KEY, getRealAuthStorage, hasAuthForProvider, resolveApiKey } from "./utilities.js";
 // Check for auth
 const HAS_ANTIGRAVITY_AUTH = hasAuthForProvider("google-antigravity");
 const HAS_ANTHROPIC_AUTH = !!API_KEY;
 describe.skipIf(!HAS_ANTIGRAVITY_AUTH)("Compaction with thinking models (Antigravity)", () => {
 	let session: AgentSession;
 	let tempDir: string;
 	let apiKey: string;
 	beforeAll(async () => {
 		const key = await resolveApiKey("google-antigravity");
 		if (!key) throw new Error("Failed to resolve google-antigravity API key");
 		apiKey = key;
 	});
 	beforeEach(() => {
 		tempDir = join(tmpdir(), `pi-thinking-compaction-test-${Date.now()}`);
 		mkdirSync(tempDir, { recursive: true });
 	});
 	afterEach(async () => {
 		if (session) {
 			session.dispose();
 		}
 		if (tempDir && existsSync(tempDir)) {
 			rmSync(tempDir, { recursive: true });
 		}
 	});
 	function createSession(
 		modelId: "claude-opus-4-5-thinking" | "claude-sonnet-4-5",
 		thinkingLevel: ThinkingLevel = "high",
 	) {
 		const model = getModel("google-antigravity", modelId);
 		if (!model) {
 			throw new Error(`Model not found: google-antigravity/${modelId}`);
 		}
 		const agent = new Agent({
 			getApiKey: () => apiKey,
 			initialState: {
 				model,
 				systemPrompt: "You are a helpful assistant. Be concise.",
 				tools: codingTools,
 				thinkingLevel,
 			},
 		});
 		const sessionManager = SessionManager.inMemory();
 		const settingsManager = SettingsManager.create(tempDir, tempDir);
 		// Use minimal keepRecentTokens so small test conversations have something to summarize
 		// settingsManager.applyOverrides({ compaction: { keepRecentTokens: 1 } });
 		const authStorage = getRealAuthStorage();
 		const modelRegistry = new ModelRegistry(authStorage);
 		session = new AgentSession({
 			agent,
 			sessionManager,
 			settingsManager,
 			modelRegistry,
 		});
 		session.subscribe(() => {});
 		return session;
 	}
 	it("should compact successfully with claude-opus-4-5-thinking and thinking level high", async () => {
 		createSession("claude-opus-4-5-thinking", "high");
 		// Send a simple prompt
 		await session.prompt("Write down the first 10 prime numbers.");
 		await session.agent.waitForIdle();
 		// Verify we got a response
 		const messages = session.messages;
 		expect(messages.length).toBeGreaterThan(0);
 		const assistantMessages = messages.filter((m) => m.role === "assistant");
 		expect(assistantMessages.length).toBeGreaterThan(0);
 		// Now try to compact - this should not throw
 		const result = await session.compact();
 		expect(result.summary).toBeDefined();
 		expect(result.summary.length).toBeGreaterThan(0);
 		expect(result.tokensBefore).toBeGreaterThan(0);
 		// Verify session is still usable after compaction
 		const messagesAfterCompact = session.messages;
 		expect(messagesAfterCompact.length).toBeGreaterThan(0);
 		expect(messagesAfterCompact[0].role).toBe("compactionSummary");
 	}, 180000);
 	it("should compact successfully with claude-sonnet-4-5 (non-thinking) for comparison", async () => {
 		createSession("claude-sonnet-4-5", "off");
 		await session.prompt("Write down the first 10 prime numbers.");
 		await session.agent.waitForIdle();
 		const messages = session.messages;
 		expect(messages.length).toBeGreaterThan(0);
 		const result = await session.compact();
 		expect(result.summary).toBeDefined();
 		expect(result.summary.length).toBeGreaterThan(0);
 	}, 180000);
 });
 // ============================================================================
 // Real Anthropic API tests (for comparison)
 // ============================================================================
 describe.skipIf(!HAS_ANTHROPIC_AUTH)("Compaction with thinking models (Anthropic)", () => {
 	let session: AgentSession;
 	let tempDir: string;
 	beforeEach(() => {
 		tempDir = join(tmpdir(), `pi-thinking-compaction-anthropic-test-${Date.now()}`);
 		mkdirSync(tempDir, { recursive: true });
 	});
 	afterEach(async () => {
 		if (session) {
 			session.dispose();
 		}
 		if (tempDir && existsSync(tempDir)) {
 			rmSync(tempDir, { recursive: true });
 		}
 	});
 	function createSession(model: Model<any>, thinkingLevel: ThinkingLevel = "high") {
 		const agent = new Agent({
 			getApiKey: () => API_KEY,
 			initialState: {
 				model,
 				systemPrompt: "You are a helpful assistant. Be concise.",
 				tools: codingTools,
 				thinkingLevel,
 			},
 		});
 		const sessionManager = SessionManager.inMemory();
 		const settingsManager = SettingsManager.create(tempDir, tempDir);
 		const authStorage = getRealAuthStorage();
 		const modelRegistry = new ModelRegistry(authStorage);
 		session = new AgentSession({
 			agent,
 			sessionManager,
 			settingsManager,
 			modelRegistry,
 		});
 		session.subscribe(() => {});
 		return session;
 	}
 	it("should compact successfully with claude-3-7-sonnet and thinking level high", async () => {
 		const model = getModel("anthropic", "claude-3-7-sonnet-latest")!;
 		createSession(model, "high");
 		// Send a simple prompt
 		await session.prompt("Write down the first 10 prime numbers.");
 		await session.agent.waitForIdle();
 		// Verify we got a response
 		const messages = session.messages;
 		expect(messages.length).toBeGreaterThan(0);
 		const assistantMessages = messages.filter((m) => m.role === "assistant");
 		expect(assistantMessages.length).toBeGreaterThan(0);
 		// Now try to compact - this should not throw
 		const result = await session.compact();
 		expect(result.summary).toBeDefined();
 		expect(result.summary.length).toBeGreaterThan(0);
 		expect(result.tokensBefore).toBeGreaterThan(0);
 		// Verify session is still usable after compaction
 		const messagesAfterCompact = session.messages;
 		expect(messagesAfterCompact.length).toBeGreaterThan(0);
 		expect(messagesAfterCompact[0].role).toBe("compactionSummary");
 	}, 180000);
 });
--- a/packages/coding-agent/test/utilities.ts
+++ b/packages/coding-agent/test/utilities.ts
@ -2,11 +2,11 @@
 * Shared test utilities for coding-agent tests.
 */
-import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { chmodSync, existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
-import { tmpdir } from "node:os";
+import { homedir, tmpdir } from "node:os";
-import { join } from "node:path";
+import { dirname, join } from "node:path";
 import { Agent } from "@mariozechner/pi-agent-core";
-import { getModel } from "@mariozechner/pi-ai";
+import { getModel, getOAuthApiKey, type OAuthCredentials, type OAuthProvider } from "@mariozechner/pi-ai";
 import { AgentSession } from "../src/core/agent-session.js";
 import { AuthStorage } from "../src/core/auth-storage.js";
 import { ModelRegistry } from "../src/core/model-registry.js";
@ -20,6 +20,106 @@ import { codingTools } from "../src/core/tools/index.js";
 */
 export const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
 // ============================================================================
 // OAuth API key resolution from ~/.pi/agent/auth.json
 // ============================================================================
 const AUTH_PATH = join(homedir(), ".pi", "agent", "auth.json");
 type ApiKeyCredential = {
 	type: "api_key";
 	key: string;
 };
 type OAuthCredentialEntry = {
 	type: "oauth";
 } & OAuthCredentials;
 type AuthCredential = ApiKeyCredential | OAuthCredentialEntry;
 type AuthStorageData = Record<string, AuthCredential>;
 function loadAuthStorage(): AuthStorageData {
 	if (!existsSync(AUTH_PATH)) {
 		return {};
 	}
 	try {
 		const content = readFileSync(AUTH_PATH, "utf-8");
 		return JSON.parse(content);
 	} catch {
 		return {};
 	}
 }
 function saveAuthStorage(storage: AuthStorageData): void {
 	const configDir = dirname(AUTH_PATH);
 	if (!existsSync(configDir)) {
 		mkdirSync(configDir, { recursive: true, mode: 0o700 });
 	}
 	writeFileSync(AUTH_PATH, JSON.stringify(storage, null, 2), "utf-8");
 	chmodSync(AUTH_PATH, 0o600);
 }
 /**
 * Resolve API key for a provider from ~/.pi/agent/auth.json
 *
 * For API key credentials, returns the key directly.
 * For OAuth credentials, returns the access token (refreshing if expired and saving back).
 *
 * For google-gemini-cli and google-antigravity, returns JSON-encoded { token, projectId }
 */
 export async function resolveApiKey(provider: string): Promise<string | undefined> {
 	const storage = loadAuthStorage();
 	const entry = storage[provider];
 	if (!entry) return undefined;
 	if (entry.type === "api_key") {
 		return entry.key;
 	}
 	if (entry.type === "oauth") {
 		// Build OAuthCredentials record for getOAuthApiKey
 		const oauthCredentials: Record<string, OAuthCredentials> = {};
 		for (const [key, value] of Object.entries(storage)) {
 			if (value.type === "oauth") {
 				const { type: _, ...creds } = value;
 				oauthCredentials[key] = creds;
 			}
 		}
 		const result = await getOAuthApiKey(provider as OAuthProvider, oauthCredentials);
 		if (!result) return undefined;
 		// Save refreshed credentials back to auth.json
 		storage[provider] = { type: "oauth", ...result.newCredentials };
 		saveAuthStorage(storage);
 		return result.apiKey;
 	}
 	return undefined;
 }
 /**
 * Check if a provider has credentials in ~/.pi/agent/auth.json
 */
 export function hasAuthForProvider(provider: string): boolean {
 	const storage = loadAuthStorage();
 	return provider in storage;
 }
 /** Path to the real pi agent config directory */
 export const PI_AGENT_DIR = join(homedir(), ".pi", "agent");
 /**
 * Get an AuthStorage instance backed by ~/.pi/agent/auth.json
 * Use this for tests that need real OAuth credentials.
 */
 export function getRealAuthStorage(): AuthStorage {
 	return new AuthStorage(AUTH_PATH);
 }
 /**
 * Create a minimal user message for testing.
 */