fix(ai): ensure maxTokens > thinkingBudget for Claude thinking models

Claude requires max_tokens > thinking.budget_tokens. When caller specifies
a small maxTokens (e.g. compaction with ~13k tokens) and reasoning is enabled
with high budget (16k tokens), the constraint was violated.

Fix: In mapOptionsForApi, add thinkingBudget on top of caller's maxTokens
(capped at model.maxTokens). If still not enough room, reduce thinkingBudget
to leave space for output.

Applied to both anthropic-messages and google-gemini-cli APIs.

Also adds test utilities for OAuth credential resolution and tests for
compaction with thinking models.

fixes #413
This commit is contained in:
Mario Zechner 2026-01-03 02:45:30 +01:00
parent 97af788344
commit 8df22faedf
4 changed files with 347 additions and 7 deletions

View file

@ -0,0 +1,213 @@
/**
* Test for compaction with thinking models.
*
* Tests both:
* - Claude via Antigravity (google-gemini-cli API)
* - Claude via real Anthropic API (anthropic-messages API)
*
* Reproduces issue where compact fails when maxTokens < thinkingBudget.
*/
import { existsSync, mkdirSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { Agent, type ThinkingLevel } from "@mariozechner/pi-agent-core";
import { getModel, type Model } from "@mariozechner/pi-ai";
import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
import { AgentSession } from "../src/core/agent-session.js";
import { ModelRegistry } from "../src/core/model-registry.js";
import { SessionManager } from "../src/core/session-manager.js";
import { SettingsManager } from "../src/core/settings-manager.js";
import { codingTools } from "../src/core/tools/index.js";
import { API_KEY, getRealAuthStorage, hasAuthForProvider, resolveApiKey } from "./utilities.js";
// Check for auth
const HAS_ANTIGRAVITY_AUTH = hasAuthForProvider("google-antigravity");
const HAS_ANTHROPIC_AUTH = !!API_KEY;
describe.skipIf(!HAS_ANTIGRAVITY_AUTH)("Compaction with thinking models (Antigravity)", () => {
let session: AgentSession;
let tempDir: string;
let apiKey: string;
beforeAll(async () => {
const key = await resolveApiKey("google-antigravity");
if (!key) throw new Error("Failed to resolve google-antigravity API key");
apiKey = key;
});
beforeEach(() => {
tempDir = join(tmpdir(), `pi-thinking-compaction-test-${Date.now()}`);
mkdirSync(tempDir, { recursive: true });
});
afterEach(async () => {
if (session) {
session.dispose();
}
if (tempDir && existsSync(tempDir)) {
rmSync(tempDir, { recursive: true });
}
});
function createSession(
modelId: "claude-opus-4-5-thinking" | "claude-sonnet-4-5",
thinkingLevel: ThinkingLevel = "high",
) {
const model = getModel("google-antigravity", modelId);
if (!model) {
throw new Error(`Model not found: google-antigravity/${modelId}`);
}
const agent = new Agent({
getApiKey: () => apiKey,
initialState: {
model,
systemPrompt: "You are a helpful assistant. Be concise.",
tools: codingTools,
thinkingLevel,
},
});
const sessionManager = SessionManager.inMemory();
const settingsManager = SettingsManager.create(tempDir, tempDir);
// Use minimal keepRecentTokens so small test conversations have something to summarize
// settingsManager.applyOverrides({ compaction: { keepRecentTokens: 1 } });
const authStorage = getRealAuthStorage();
const modelRegistry = new ModelRegistry(authStorage);
session = new AgentSession({
agent,
sessionManager,
settingsManager,
modelRegistry,
});
session.subscribe(() => {});
return session;
}
it("should compact successfully with claude-opus-4-5-thinking and thinking level high", async () => {
createSession("claude-opus-4-5-thinking", "high");
// Send a simple prompt
await session.prompt("Write down the first 10 prime numbers.");
await session.agent.waitForIdle();
// Verify we got a response
const messages = session.messages;
expect(messages.length).toBeGreaterThan(0);
const assistantMessages = messages.filter((m) => m.role === "assistant");
expect(assistantMessages.length).toBeGreaterThan(0);
// Now try to compact - this should not throw
const result = await session.compact();
expect(result.summary).toBeDefined();
expect(result.summary.length).toBeGreaterThan(0);
expect(result.tokensBefore).toBeGreaterThan(0);
// Verify session is still usable after compaction
const messagesAfterCompact = session.messages;
expect(messagesAfterCompact.length).toBeGreaterThan(0);
expect(messagesAfterCompact[0].role).toBe("compactionSummary");
}, 180000);
it("should compact successfully with claude-sonnet-4-5 (non-thinking) for comparison", async () => {
createSession("claude-sonnet-4-5", "off");
await session.prompt("Write down the first 10 prime numbers.");
await session.agent.waitForIdle();
const messages = session.messages;
expect(messages.length).toBeGreaterThan(0);
const result = await session.compact();
expect(result.summary).toBeDefined();
expect(result.summary.length).toBeGreaterThan(0);
}, 180000);
});
// ============================================================================
// Real Anthropic API tests (for comparison)
// ============================================================================
describe.skipIf(!HAS_ANTHROPIC_AUTH)("Compaction with thinking models (Anthropic)", () => {
let session: AgentSession;
let tempDir: string;
beforeEach(() => {
tempDir = join(tmpdir(), `pi-thinking-compaction-anthropic-test-${Date.now()}`);
mkdirSync(tempDir, { recursive: true });
});
afterEach(async () => {
if (session) {
session.dispose();
}
if (tempDir && existsSync(tempDir)) {
rmSync(tempDir, { recursive: true });
}
});
function createSession(model: Model<any>, thinkingLevel: ThinkingLevel = "high") {
const agent = new Agent({
getApiKey: () => API_KEY,
initialState: {
model,
systemPrompt: "You are a helpful assistant. Be concise.",
tools: codingTools,
thinkingLevel,
},
});
const sessionManager = SessionManager.inMemory();
const settingsManager = SettingsManager.create(tempDir, tempDir);
const authStorage = getRealAuthStorage();
const modelRegistry = new ModelRegistry(authStorage);
session = new AgentSession({
agent,
sessionManager,
settingsManager,
modelRegistry,
});
session.subscribe(() => {});
return session;
}
it("should compact successfully with claude-3-7-sonnet and thinking level high", async () => {
const model = getModel("anthropic", "claude-3-7-sonnet-latest")!;
createSession(model, "high");
// Send a simple prompt
await session.prompt("Write down the first 10 prime numbers.");
await session.agent.waitForIdle();
// Verify we got a response
const messages = session.messages;
expect(messages.length).toBeGreaterThan(0);
const assistantMessages = messages.filter((m) => m.role === "assistant");
expect(assistantMessages.length).toBeGreaterThan(0);
// Now try to compact - this should not throw
const result = await session.compact();
expect(result.summary).toBeDefined();
expect(result.summary.length).toBeGreaterThan(0);
expect(result.tokensBefore).toBeGreaterThan(0);
// Verify session is still usable after compaction
const messagesAfterCompact = session.messages;
expect(messagesAfterCompact.length).toBeGreaterThan(0);
expect(messagesAfterCompact[0].role).toBe("compactionSummary");
}, 180000);
});

View file

@ -2,11 +2,11 @@
* Shared test utilities for coding-agent tests.
*/
import { existsSync, mkdirSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { chmodSync, existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
import { homedir, tmpdir } from "node:os";
import { dirname, join } from "node:path";
import { Agent } from "@mariozechner/pi-agent-core";
import { getModel } from "@mariozechner/pi-ai";
import { getModel, getOAuthApiKey, type OAuthCredentials, type OAuthProvider } from "@mariozechner/pi-ai";
import { AgentSession } from "../src/core/agent-session.js";
import { AuthStorage } from "../src/core/auth-storage.js";
import { ModelRegistry } from "../src/core/model-registry.js";
@ -20,6 +20,106 @@ import { codingTools } from "../src/core/tools/index.js";
*/
export const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
// ============================================================================
// OAuth API key resolution from ~/.pi/agent/auth.json
// ============================================================================
const AUTH_PATH = join(homedir(), ".pi", "agent", "auth.json");
type ApiKeyCredential = {
type: "api_key";
key: string;
};
type OAuthCredentialEntry = {
type: "oauth";
} & OAuthCredentials;
type AuthCredential = ApiKeyCredential | OAuthCredentialEntry;
type AuthStorageData = Record<string, AuthCredential>;
function loadAuthStorage(): AuthStorageData {
if (!existsSync(AUTH_PATH)) {
return {};
}
try {
const content = readFileSync(AUTH_PATH, "utf-8");
return JSON.parse(content);
} catch {
return {};
}
}
function saveAuthStorage(storage: AuthStorageData): void {
const configDir = dirname(AUTH_PATH);
if (!existsSync(configDir)) {
mkdirSync(configDir, { recursive: true, mode: 0o700 });
}
writeFileSync(AUTH_PATH, JSON.stringify(storage, null, 2), "utf-8");
chmodSync(AUTH_PATH, 0o600);
}
/**
* Resolve API key for a provider from ~/.pi/agent/auth.json
*
* For API key credentials, returns the key directly.
* For OAuth credentials, returns the access token (refreshing if expired and saving back).
*
* For google-gemini-cli and google-antigravity, returns JSON-encoded { token, projectId }
*/
export async function resolveApiKey(provider: string): Promise<string | undefined> {
const storage = loadAuthStorage();
const entry = storage[provider];
if (!entry) return undefined;
if (entry.type === "api_key") {
return entry.key;
}
if (entry.type === "oauth") {
// Build OAuthCredentials record for getOAuthApiKey
const oauthCredentials: Record<string, OAuthCredentials> = {};
for (const [key, value] of Object.entries(storage)) {
if (value.type === "oauth") {
const { type: _, ...creds } = value;
oauthCredentials[key] = creds;
}
}
const result = await getOAuthApiKey(provider as OAuthProvider, oauthCredentials);
if (!result) return undefined;
// Save refreshed credentials back to auth.json
storage[provider] = { type: "oauth", ...result.newCredentials };
saveAuthStorage(storage);
return result.apiKey;
}
return undefined;
}
/**
* Check if a provider has credentials in ~/.pi/agent/auth.json
*/
export function hasAuthForProvider(provider: string): boolean {
const storage = loadAuthStorage();
return provider in storage;
}
/** Path to the real pi agent config directory */
export const PI_AGENT_DIR = join(homedir(), ".pi", "agent");
/**
* Get an AuthStorage instance backed by ~/.pi/agent/auth.json
* Use this for tests that need real OAuth credentials.
*/
export function getRealAuthStorage(): AuthStorage {
return new AuthStorage(AUTH_PATH);
}
/**
* Create a minimal user message for testing.
*/