diff --git a/packages/ai/src/stream.ts b/packages/ai/src/stream.ts index 05e742ba..5b870ae8 100644 --- a/packages/ai/src/stream.ts +++ b/packages/ai/src/stream.ts @@ -159,6 +159,8 @@ function mapOptionsForApi( return { ...base, thinkingEnabled: false } satisfies AnthropicOptions; } + // Claude requires max_tokens > thinking.budget_tokens + // So we need to ensure maxTokens accounts for both thinking and output const anthropicBudgets = { minimal: 1024, low: 2048, @@ -166,10 +168,21 @@ function mapOptionsForApi( high: 16384, }; + const minOutputTokens = 1024; + let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!]; + // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit + const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens); + + // If not enough room for thinking + output, reduce thinking budget + if (maxTokens <= thinkingBudget) { + thinkingBudget = Math.max(0, maxTokens - minOutputTokens); + } + return { ...base, + maxTokens, thinkingEnabled: true, - thinkingBudgetTokens: anthropicBudgets[clampReasoning(options.reasoning)!], + thinkingBudgetTokens: thinkingBudget, } satisfies AnthropicOptions; } @@ -234,7 +247,9 @@ function mapOptionsForApi( } satisfies GoogleGeminiCliOptions; } - // Gemini 2.x models use thinkingBudget + // Models using thinkingBudget (Gemini 2.x, Claude via Antigravity) + // Claude requires max_tokens > thinking.budget_tokens + // So we need to ensure maxTokens accounts for both thinking and output const budgets: Record = { minimal: 1024, low: 2048, @@ -242,11 +257,22 @@ function mapOptionsForApi( high: 16384, }; + const minOutputTokens = 1024; + let thinkingBudget = budgets[effort]; + // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit + const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens); + + // If not enough room for thinking + output, reduce thinking budget + if (maxTokens <= thinkingBudget) { + thinkingBudget = Math.max(0, maxTokens - minOutputTokens); + } + return { ...base, + maxTokens, thinking: { enabled: true, - budgetTokens: budgets[effort], + budgetTokens: thinkingBudget, }, } satisfies GoogleGeminiCliOptions; } diff --git a/packages/coding-agent/README.md b/packages/coding-agent/README.md index 747d5b37..277131b6 100644 --- a/packages/coding-agent/README.md +++ b/packages/coding-agent/README.md @@ -586,6 +586,7 @@ Global `~/.pi/agent/settings.json` stores persistent preferences: | `retry.baseDelayMs` | Base delay for exponential backoff | `2000` | | `terminal.showImages` | Render images inline (supported terminals) | `true` | | `images.autoResize` | Auto-resize images to 2000x2000 max for better model compatibility | `true` | + | `doubleEscapeAction` | Action for double-escape with empty editor: `tree` or `branch` | `tree` | | `hooks` | Additional hook file paths | `[]` | | `customTools` | Additional custom tool file paths | `[]` | diff --git a/packages/coding-agent/test/compaction-thinking-model.test.ts b/packages/coding-agent/test/compaction-thinking-model.test.ts new file mode 100644 index 00000000..85624d2f --- /dev/null +++ b/packages/coding-agent/test/compaction-thinking-model.test.ts @@ -0,0 +1,213 @@ +/** + * Test for compaction with thinking models. + * + * Tests both: + * - Claude via Antigravity (google-gemini-cli API) + * - Claude via real Anthropic API (anthropic-messages API) + * + * Reproduces issue where compact fails when maxTokens < thinkingBudget. + */ + +import { existsSync, mkdirSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { Agent, type ThinkingLevel } from "@mariozechner/pi-agent-core"; +import { getModel, type Model } from "@mariozechner/pi-ai"; +import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest"; +import { AgentSession } from "../src/core/agent-session.js"; +import { ModelRegistry } from "../src/core/model-registry.js"; +import { SessionManager } from "../src/core/session-manager.js"; +import { SettingsManager } from "../src/core/settings-manager.js"; +import { codingTools } from "../src/core/tools/index.js"; +import { API_KEY, getRealAuthStorage, hasAuthForProvider, resolveApiKey } from "./utilities.js"; + +// Check for auth +const HAS_ANTIGRAVITY_AUTH = hasAuthForProvider("google-antigravity"); +const HAS_ANTHROPIC_AUTH = !!API_KEY; + +describe.skipIf(!HAS_ANTIGRAVITY_AUTH)("Compaction with thinking models (Antigravity)", () => { + let session: AgentSession; + let tempDir: string; + let apiKey: string; + + beforeAll(async () => { + const key = await resolveApiKey("google-antigravity"); + if (!key) throw new Error("Failed to resolve google-antigravity API key"); + apiKey = key; + }); + + beforeEach(() => { + tempDir = join(tmpdir(), `pi-thinking-compaction-test-${Date.now()}`); + mkdirSync(tempDir, { recursive: true }); + }); + + afterEach(async () => { + if (session) { + session.dispose(); + } + if (tempDir && existsSync(tempDir)) { + rmSync(tempDir, { recursive: true }); + } + }); + + function createSession( + modelId: "claude-opus-4-5-thinking" | "claude-sonnet-4-5", + thinkingLevel: ThinkingLevel = "high", + ) { + const model = getModel("google-antigravity", modelId); + if (!model) { + throw new Error(`Model not found: google-antigravity/${modelId}`); + } + + const agent = new Agent({ + getApiKey: () => apiKey, + initialState: { + model, + systemPrompt: "You are a helpful assistant. Be concise.", + tools: codingTools, + thinkingLevel, + }, + }); + + const sessionManager = SessionManager.inMemory(); + const settingsManager = SettingsManager.create(tempDir, tempDir); + // Use minimal keepRecentTokens so small test conversations have something to summarize + // settingsManager.applyOverrides({ compaction: { keepRecentTokens: 1 } }); + + const authStorage = getRealAuthStorage(); + const modelRegistry = new ModelRegistry(authStorage); + + session = new AgentSession({ + agent, + sessionManager, + settingsManager, + modelRegistry, + }); + + session.subscribe(() => {}); + + return session; + } + + it("should compact successfully with claude-opus-4-5-thinking and thinking level high", async () => { + createSession("claude-opus-4-5-thinking", "high"); + + // Send a simple prompt + await session.prompt("Write down the first 10 prime numbers."); + await session.agent.waitForIdle(); + + // Verify we got a response + const messages = session.messages; + expect(messages.length).toBeGreaterThan(0); + + const assistantMessages = messages.filter((m) => m.role === "assistant"); + expect(assistantMessages.length).toBeGreaterThan(0); + + // Now try to compact - this should not throw + const result = await session.compact(); + + expect(result.summary).toBeDefined(); + expect(result.summary.length).toBeGreaterThan(0); + expect(result.tokensBefore).toBeGreaterThan(0); + + // Verify session is still usable after compaction + const messagesAfterCompact = session.messages; + expect(messagesAfterCompact.length).toBeGreaterThan(0); + expect(messagesAfterCompact[0].role).toBe("compactionSummary"); + }, 180000); + + it("should compact successfully with claude-sonnet-4-5 (non-thinking) for comparison", async () => { + createSession("claude-sonnet-4-5", "off"); + + await session.prompt("Write down the first 10 prime numbers."); + await session.agent.waitForIdle(); + + const messages = session.messages; + expect(messages.length).toBeGreaterThan(0); + + const result = await session.compact(); + + expect(result.summary).toBeDefined(); + expect(result.summary.length).toBeGreaterThan(0); + }, 180000); +}); + +// ============================================================================ +// Real Anthropic API tests (for comparison) +// ============================================================================ + +describe.skipIf(!HAS_ANTHROPIC_AUTH)("Compaction with thinking models (Anthropic)", () => { + let session: AgentSession; + let tempDir: string; + + beforeEach(() => { + tempDir = join(tmpdir(), `pi-thinking-compaction-anthropic-test-${Date.now()}`); + mkdirSync(tempDir, { recursive: true }); + }); + + afterEach(async () => { + if (session) { + session.dispose(); + } + if (tempDir && existsSync(tempDir)) { + rmSync(tempDir, { recursive: true }); + } + }); + + function createSession(model: Model, thinkingLevel: ThinkingLevel = "high") { + const agent = new Agent({ + getApiKey: () => API_KEY, + initialState: { + model, + systemPrompt: "You are a helpful assistant. Be concise.", + tools: codingTools, + thinkingLevel, + }, + }); + + const sessionManager = SessionManager.inMemory(); + const settingsManager = SettingsManager.create(tempDir, tempDir); + + const authStorage = getRealAuthStorage(); + const modelRegistry = new ModelRegistry(authStorage); + + session = new AgentSession({ + agent, + sessionManager, + settingsManager, + modelRegistry, + }); + + session.subscribe(() => {}); + + return session; + } + + it("should compact successfully with claude-3-7-sonnet and thinking level high", async () => { + const model = getModel("anthropic", "claude-3-7-sonnet-latest")!; + createSession(model, "high"); + + // Send a simple prompt + await session.prompt("Write down the first 10 prime numbers."); + await session.agent.waitForIdle(); + + // Verify we got a response + const messages = session.messages; + expect(messages.length).toBeGreaterThan(0); + + const assistantMessages = messages.filter((m) => m.role === "assistant"); + expect(assistantMessages.length).toBeGreaterThan(0); + + // Now try to compact - this should not throw + const result = await session.compact(); + + expect(result.summary).toBeDefined(); + expect(result.summary.length).toBeGreaterThan(0); + expect(result.tokensBefore).toBeGreaterThan(0); + + // Verify session is still usable after compaction + const messagesAfterCompact = session.messages; + expect(messagesAfterCompact.length).toBeGreaterThan(0); + expect(messagesAfterCompact[0].role).toBe("compactionSummary"); + }, 180000); +}); diff --git a/packages/coding-agent/test/utilities.ts b/packages/coding-agent/test/utilities.ts index 3fe5d4cc..acea2e01 100644 --- a/packages/coding-agent/test/utilities.ts +++ b/packages/coding-agent/test/utilities.ts @@ -2,11 +2,11 @@ * Shared test utilities for coding-agent tests. */ -import { existsSync, mkdirSync, rmSync } from "node:fs"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; +import { chmodSync, existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"; +import { homedir, tmpdir } from "node:os"; +import { dirname, join } from "node:path"; import { Agent } from "@mariozechner/pi-agent-core"; -import { getModel } from "@mariozechner/pi-ai"; +import { getModel, getOAuthApiKey, type OAuthCredentials, type OAuthProvider } from "@mariozechner/pi-ai"; import { AgentSession } from "../src/core/agent-session.js"; import { AuthStorage } from "../src/core/auth-storage.js"; import { ModelRegistry } from "../src/core/model-registry.js"; @@ -20,6 +20,106 @@ import { codingTools } from "../src/core/tools/index.js"; */ export const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY; +// ============================================================================ +// OAuth API key resolution from ~/.pi/agent/auth.json +// ============================================================================ + +const AUTH_PATH = join(homedir(), ".pi", "agent", "auth.json"); + +type ApiKeyCredential = { + type: "api_key"; + key: string; +}; + +type OAuthCredentialEntry = { + type: "oauth"; +} & OAuthCredentials; + +type AuthCredential = ApiKeyCredential | OAuthCredentialEntry; + +type AuthStorageData = Record; + +function loadAuthStorage(): AuthStorageData { + if (!existsSync(AUTH_PATH)) { + return {}; + } + try { + const content = readFileSync(AUTH_PATH, "utf-8"); + return JSON.parse(content); + } catch { + return {}; + } +} + +function saveAuthStorage(storage: AuthStorageData): void { + const configDir = dirname(AUTH_PATH); + if (!existsSync(configDir)) { + mkdirSync(configDir, { recursive: true, mode: 0o700 }); + } + writeFileSync(AUTH_PATH, JSON.stringify(storage, null, 2), "utf-8"); + chmodSync(AUTH_PATH, 0o600); +} + +/** + * Resolve API key for a provider from ~/.pi/agent/auth.json + * + * For API key credentials, returns the key directly. + * For OAuth credentials, returns the access token (refreshing if expired and saving back). + * + * For google-gemini-cli and google-antigravity, returns JSON-encoded { token, projectId } + */ +export async function resolveApiKey(provider: string): Promise { + const storage = loadAuthStorage(); + const entry = storage[provider]; + + if (!entry) return undefined; + + if (entry.type === "api_key") { + return entry.key; + } + + if (entry.type === "oauth") { + // Build OAuthCredentials record for getOAuthApiKey + const oauthCredentials: Record = {}; + for (const [key, value] of Object.entries(storage)) { + if (value.type === "oauth") { + const { type: _, ...creds } = value; + oauthCredentials[key] = creds; + } + } + + const result = await getOAuthApiKey(provider as OAuthProvider, oauthCredentials); + if (!result) return undefined; + + // Save refreshed credentials back to auth.json + storage[provider] = { type: "oauth", ...result.newCredentials }; + saveAuthStorage(storage); + + return result.apiKey; + } + + return undefined; +} + +/** + * Check if a provider has credentials in ~/.pi/agent/auth.json + */ +export function hasAuthForProvider(provider: string): boolean { + const storage = loadAuthStorage(); + return provider in storage; +} + +/** Path to the real pi agent config directory */ +export const PI_AGENT_DIR = join(homedir(), ".pi", "agent"); + +/** + * Get an AuthStorage instance backed by ~/.pi/agent/auth.json + * Use this for tests that need real OAuth credentials. + */ +export function getRealAuthStorage(): AuthStorage { + return new AuthStorage(AUTH_PATH); +} + /** * Create a minimal user message for testing. */