mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 10:05:14 +00:00
fix(ai): ensure maxTokens > thinkingBudget for Claude thinking models
Claude requires max_tokens > thinking.budget_tokens. When caller specifies a small maxTokens (e.g. compaction with ~13k tokens) and reasoning is enabled with high budget (16k tokens), the constraint was violated. Fix: In mapOptionsForApi, add thinkingBudget on top of caller's maxTokens (capped at model.maxTokens). If still not enough room, reduce thinkingBudget to leave space for output. Applied to both anthropic-messages and google-gemini-cli APIs. Also adds test utilities for OAuth credential resolution and tests for compaction with thinking models. fixes #413
This commit is contained in:
parent
97af788344
commit
8df22faedf
4 changed files with 347 additions and 7 deletions
|
|
@ -159,6 +159,8 @@ function mapOptionsForApi<TApi extends Api>(
|
|||
return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
|
||||
}
|
||||
|
||||
// Claude requires max_tokens > thinking.budget_tokens
|
||||
// So we need to ensure maxTokens accounts for both thinking and output
|
||||
const anthropicBudgets = {
|
||||
minimal: 1024,
|
||||
low: 2048,
|
||||
|
|
@ -166,10 +168,21 @@ function mapOptionsForApi<TApi extends Api>(
|
|||
high: 16384,
|
||||
};
|
||||
|
||||
const minOutputTokens = 1024;
|
||||
let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!];
|
||||
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
||||
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
||||
|
||||
// If not enough room for thinking + output, reduce thinking budget
|
||||
if (maxTokens <= thinkingBudget) {
|
||||
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
||||
}
|
||||
|
||||
return {
|
||||
...base,
|
||||
maxTokens,
|
||||
thinkingEnabled: true,
|
||||
thinkingBudgetTokens: anthropicBudgets[clampReasoning(options.reasoning)!],
|
||||
thinkingBudgetTokens: thinkingBudget,
|
||||
} satisfies AnthropicOptions;
|
||||
}
|
||||
|
||||
|
|
@ -234,7 +247,9 @@ function mapOptionsForApi<TApi extends Api>(
|
|||
} satisfies GoogleGeminiCliOptions;
|
||||
}
|
||||
|
||||
// Gemini 2.x models use thinkingBudget
|
||||
// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
|
||||
// Claude requires max_tokens > thinking.budget_tokens
|
||||
// So we need to ensure maxTokens accounts for both thinking and output
|
||||
const budgets: Record<ClampedReasoningEffort, number> = {
|
||||
minimal: 1024,
|
||||
low: 2048,
|
||||
|
|
@ -242,11 +257,22 @@ function mapOptionsForApi<TApi extends Api>(
|
|||
high: 16384,
|
||||
};
|
||||
|
||||
const minOutputTokens = 1024;
|
||||
let thinkingBudget = budgets[effort];
|
||||
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
||||
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
||||
|
||||
// If not enough room for thinking + output, reduce thinking budget
|
||||
if (maxTokens <= thinkingBudget) {
|
||||
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
||||
}
|
||||
|
||||
return {
|
||||
...base,
|
||||
maxTokens,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
budgetTokens: budgets[effort],
|
||||
budgetTokens: thinkingBudget,
|
||||
},
|
||||
} satisfies GoogleGeminiCliOptions;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -586,6 +586,7 @@ Global `~/.pi/agent/settings.json` stores persistent preferences:
|
|||
| `retry.baseDelayMs` | Base delay for exponential backoff | `2000` |
|
||||
| `terminal.showImages` | Render images inline (supported terminals) | `true` |
|
||||
| `images.autoResize` | Auto-resize images to 2000x2000 max for better model compatibility | `true` |
|
||||
|
||||
| `doubleEscapeAction` | Action for double-escape with empty editor: `tree` or `branch` | `tree` |
|
||||
| `hooks` | Additional hook file paths | `[]` |
|
||||
| `customTools` | Additional custom tool file paths | `[]` |
|
||||
|
|
|
|||
213
packages/coding-agent/test/compaction-thinking-model.test.ts
Normal file
213
packages/coding-agent/test/compaction-thinking-model.test.ts
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
/**
|
||||
* Test for compaction with thinking models.
|
||||
*
|
||||
* Tests both:
|
||||
* - Claude via Antigravity (google-gemini-cli API)
|
||||
* - Claude via real Anthropic API (anthropic-messages API)
|
||||
*
|
||||
* Reproduces issue where compact fails when maxTokens < thinkingBudget.
|
||||
*/
|
||||
|
||||
import { existsSync, mkdirSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { Agent, type ThinkingLevel } from "@mariozechner/pi-agent-core";
|
||||
import { getModel, type Model } from "@mariozechner/pi-ai";
|
||||
import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||
import { AgentSession } from "../src/core/agent-session.js";
|
||||
import { ModelRegistry } from "../src/core/model-registry.js";
|
||||
import { SessionManager } from "../src/core/session-manager.js";
|
||||
import { SettingsManager } from "../src/core/settings-manager.js";
|
||||
import { codingTools } from "../src/core/tools/index.js";
|
||||
import { API_KEY, getRealAuthStorage, hasAuthForProvider, resolveApiKey } from "./utilities.js";
|
||||
|
||||
// Check for auth
|
||||
const HAS_ANTIGRAVITY_AUTH = hasAuthForProvider("google-antigravity");
|
||||
const HAS_ANTHROPIC_AUTH = !!API_KEY;
|
||||
|
||||
describe.skipIf(!HAS_ANTIGRAVITY_AUTH)("Compaction with thinking models (Antigravity)", () => {
|
||||
let session: AgentSession;
|
||||
let tempDir: string;
|
||||
let apiKey: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
const key = await resolveApiKey("google-antigravity");
|
||||
if (!key) throw new Error("Failed to resolve google-antigravity API key");
|
||||
apiKey = key;
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
tempDir = join(tmpdir(), `pi-thinking-compaction-test-${Date.now()}`);
|
||||
mkdirSync(tempDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
if (session) {
|
||||
session.dispose();
|
||||
}
|
||||
if (tempDir && existsSync(tempDir)) {
|
||||
rmSync(tempDir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
function createSession(
|
||||
modelId: "claude-opus-4-5-thinking" | "claude-sonnet-4-5",
|
||||
thinkingLevel: ThinkingLevel = "high",
|
||||
) {
|
||||
const model = getModel("google-antigravity", modelId);
|
||||
if (!model) {
|
||||
throw new Error(`Model not found: google-antigravity/${modelId}`);
|
||||
}
|
||||
|
||||
const agent = new Agent({
|
||||
getApiKey: () => apiKey,
|
||||
initialState: {
|
||||
model,
|
||||
systemPrompt: "You are a helpful assistant. Be concise.",
|
||||
tools: codingTools,
|
||||
thinkingLevel,
|
||||
},
|
||||
});
|
||||
|
||||
const sessionManager = SessionManager.inMemory();
|
||||
const settingsManager = SettingsManager.create(tempDir, tempDir);
|
||||
// Use minimal keepRecentTokens so small test conversations have something to summarize
|
||||
// settingsManager.applyOverrides({ compaction: { keepRecentTokens: 1 } });
|
||||
|
||||
const authStorage = getRealAuthStorage();
|
||||
const modelRegistry = new ModelRegistry(authStorage);
|
||||
|
||||
session = new AgentSession({
|
||||
agent,
|
||||
sessionManager,
|
||||
settingsManager,
|
||||
modelRegistry,
|
||||
});
|
||||
|
||||
session.subscribe(() => {});
|
||||
|
||||
return session;
|
||||
}
|
||||
|
||||
it("should compact successfully with claude-opus-4-5-thinking and thinking level high", async () => {
|
||||
createSession("claude-opus-4-5-thinking", "high");
|
||||
|
||||
// Send a simple prompt
|
||||
await session.prompt("Write down the first 10 prime numbers.");
|
||||
await session.agent.waitForIdle();
|
||||
|
||||
// Verify we got a response
|
||||
const messages = session.messages;
|
||||
expect(messages.length).toBeGreaterThan(0);
|
||||
|
||||
const assistantMessages = messages.filter((m) => m.role === "assistant");
|
||||
expect(assistantMessages.length).toBeGreaterThan(0);
|
||||
|
||||
// Now try to compact - this should not throw
|
||||
const result = await session.compact();
|
||||
|
||||
expect(result.summary).toBeDefined();
|
||||
expect(result.summary.length).toBeGreaterThan(0);
|
||||
expect(result.tokensBefore).toBeGreaterThan(0);
|
||||
|
||||
// Verify session is still usable after compaction
|
||||
const messagesAfterCompact = session.messages;
|
||||
expect(messagesAfterCompact.length).toBeGreaterThan(0);
|
||||
expect(messagesAfterCompact[0].role).toBe("compactionSummary");
|
||||
}, 180000);
|
||||
|
||||
it("should compact successfully with claude-sonnet-4-5 (non-thinking) for comparison", async () => {
|
||||
createSession("claude-sonnet-4-5", "off");
|
||||
|
||||
await session.prompt("Write down the first 10 prime numbers.");
|
||||
await session.agent.waitForIdle();
|
||||
|
||||
const messages = session.messages;
|
||||
expect(messages.length).toBeGreaterThan(0);
|
||||
|
||||
const result = await session.compact();
|
||||
|
||||
expect(result.summary).toBeDefined();
|
||||
expect(result.summary.length).toBeGreaterThan(0);
|
||||
}, 180000);
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// Real Anthropic API tests (for comparison)
|
||||
// ============================================================================
|
||||
|
||||
describe.skipIf(!HAS_ANTHROPIC_AUTH)("Compaction with thinking models (Anthropic)", () => {
|
||||
let session: AgentSession;
|
||||
let tempDir: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tempDir = join(tmpdir(), `pi-thinking-compaction-anthropic-test-${Date.now()}`);
|
||||
mkdirSync(tempDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
if (session) {
|
||||
session.dispose();
|
||||
}
|
||||
if (tempDir && existsSync(tempDir)) {
|
||||
rmSync(tempDir, { recursive: true });
|
||||
}
|
||||
});
|
||||
|
||||
function createSession(model: Model<any>, thinkingLevel: ThinkingLevel = "high") {
|
||||
const agent = new Agent({
|
||||
getApiKey: () => API_KEY,
|
||||
initialState: {
|
||||
model,
|
||||
systemPrompt: "You are a helpful assistant. Be concise.",
|
||||
tools: codingTools,
|
||||
thinkingLevel,
|
||||
},
|
||||
});
|
||||
|
||||
const sessionManager = SessionManager.inMemory();
|
||||
const settingsManager = SettingsManager.create(tempDir, tempDir);
|
||||
|
||||
const authStorage = getRealAuthStorage();
|
||||
const modelRegistry = new ModelRegistry(authStorage);
|
||||
|
||||
session = new AgentSession({
|
||||
agent,
|
||||
sessionManager,
|
||||
settingsManager,
|
||||
modelRegistry,
|
||||
});
|
||||
|
||||
session.subscribe(() => {});
|
||||
|
||||
return session;
|
||||
}
|
||||
|
||||
it("should compact successfully with claude-3-7-sonnet and thinking level high", async () => {
|
||||
const model = getModel("anthropic", "claude-3-7-sonnet-latest")!;
|
||||
createSession(model, "high");
|
||||
|
||||
// Send a simple prompt
|
||||
await session.prompt("Write down the first 10 prime numbers.");
|
||||
await session.agent.waitForIdle();
|
||||
|
||||
// Verify we got a response
|
||||
const messages = session.messages;
|
||||
expect(messages.length).toBeGreaterThan(0);
|
||||
|
||||
const assistantMessages = messages.filter((m) => m.role === "assistant");
|
||||
expect(assistantMessages.length).toBeGreaterThan(0);
|
||||
|
||||
// Now try to compact - this should not throw
|
||||
const result = await session.compact();
|
||||
|
||||
expect(result.summary).toBeDefined();
|
||||
expect(result.summary.length).toBeGreaterThan(0);
|
||||
expect(result.tokensBefore).toBeGreaterThan(0);
|
||||
|
||||
// Verify session is still usable after compaction
|
||||
const messagesAfterCompact = session.messages;
|
||||
expect(messagesAfterCompact.length).toBeGreaterThan(0);
|
||||
expect(messagesAfterCompact[0].role).toBe("compactionSummary");
|
||||
}, 180000);
|
||||
});
|
||||
|
|
@ -2,11 +2,11 @@
|
|||
* Shared test utilities for coding-agent tests.
|
||||
*/
|
||||
|
||||
import { existsSync, mkdirSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { chmodSync, existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
||||
import { homedir, tmpdir } from "node:os";
|
||||
import { dirname, join } from "node:path";
|
||||
import { Agent } from "@mariozechner/pi-agent-core";
|
||||
import { getModel } from "@mariozechner/pi-ai";
|
||||
import { getModel, getOAuthApiKey, type OAuthCredentials, type OAuthProvider } from "@mariozechner/pi-ai";
|
||||
import { AgentSession } from "../src/core/agent-session.js";
|
||||
import { AuthStorage } from "../src/core/auth-storage.js";
|
||||
import { ModelRegistry } from "../src/core/model-registry.js";
|
||||
|
|
@ -20,6 +20,106 @@ import { codingTools } from "../src/core/tools/index.js";
|
|||
*/
|
||||
export const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
|
||||
|
||||
// ============================================================================
|
||||
// OAuth API key resolution from ~/.pi/agent/auth.json
|
||||
// ============================================================================
|
||||
|
||||
const AUTH_PATH = join(homedir(), ".pi", "agent", "auth.json");
|
||||
|
||||
type ApiKeyCredential = {
|
||||
type: "api_key";
|
||||
key: string;
|
||||
};
|
||||
|
||||
type OAuthCredentialEntry = {
|
||||
type: "oauth";
|
||||
} & OAuthCredentials;
|
||||
|
||||
type AuthCredential = ApiKeyCredential | OAuthCredentialEntry;
|
||||
|
||||
type AuthStorageData = Record<string, AuthCredential>;
|
||||
|
||||
function loadAuthStorage(): AuthStorageData {
|
||||
if (!existsSync(AUTH_PATH)) {
|
||||
return {};
|
||||
}
|
||||
try {
|
||||
const content = readFileSync(AUTH_PATH, "utf-8");
|
||||
return JSON.parse(content);
|
||||
} catch {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
function saveAuthStorage(storage: AuthStorageData): void {
|
||||
const configDir = dirname(AUTH_PATH);
|
||||
if (!existsSync(configDir)) {
|
||||
mkdirSync(configDir, { recursive: true, mode: 0o700 });
|
||||
}
|
||||
writeFileSync(AUTH_PATH, JSON.stringify(storage, null, 2), "utf-8");
|
||||
chmodSync(AUTH_PATH, 0o600);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve API key for a provider from ~/.pi/agent/auth.json
|
||||
*
|
||||
* For API key credentials, returns the key directly.
|
||||
* For OAuth credentials, returns the access token (refreshing if expired and saving back).
|
||||
*
|
||||
* For google-gemini-cli and google-antigravity, returns JSON-encoded { token, projectId }
|
||||
*/
|
||||
export async function resolveApiKey(provider: string): Promise<string | undefined> {
|
||||
const storage = loadAuthStorage();
|
||||
const entry = storage[provider];
|
||||
|
||||
if (!entry) return undefined;
|
||||
|
||||
if (entry.type === "api_key") {
|
||||
return entry.key;
|
||||
}
|
||||
|
||||
if (entry.type === "oauth") {
|
||||
// Build OAuthCredentials record for getOAuthApiKey
|
||||
const oauthCredentials: Record<string, OAuthCredentials> = {};
|
||||
for (const [key, value] of Object.entries(storage)) {
|
||||
if (value.type === "oauth") {
|
||||
const { type: _, ...creds } = value;
|
||||
oauthCredentials[key] = creds;
|
||||
}
|
||||
}
|
||||
|
||||
const result = await getOAuthApiKey(provider as OAuthProvider, oauthCredentials);
|
||||
if (!result) return undefined;
|
||||
|
||||
// Save refreshed credentials back to auth.json
|
||||
storage[provider] = { type: "oauth", ...result.newCredentials };
|
||||
saveAuthStorage(storage);
|
||||
|
||||
return result.apiKey;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a provider has credentials in ~/.pi/agent/auth.json
|
||||
*/
|
||||
export function hasAuthForProvider(provider: string): boolean {
|
||||
const storage = loadAuthStorage();
|
||||
return provider in storage;
|
||||
}
|
||||
|
||||
/** Path to the real pi agent config directory */
|
||||
export const PI_AGENT_DIR = join(homedir(), ".pi", "agent");
|
||||
|
||||
/**
|
||||
* Get an AuthStorage instance backed by ~/.pi/agent/auth.json
|
||||
* Use this for tests that need real OAuth credentials.
|
||||
*/
|
||||
export function getRealAuthStorage(): AuthStorage {
|
||||
return new AuthStorage(AUTH_PATH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a minimal user message for testing.
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue