mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-16 22:03:45 +00:00
fix(ai): ensure maxTokens > thinkingBudget for Claude thinking models
Claude requires max_tokens > thinking.budget_tokens. When caller specifies a small maxTokens (e.g. compaction with ~13k tokens) and reasoning is enabled with high budget (16k tokens), the constraint was violated. Fix: In mapOptionsForApi, add thinkingBudget on top of caller's maxTokens (capped at model.maxTokens). If still not enough room, reduce thinkingBudget to leave space for output. Applied to both anthropic-messages and google-gemini-cli APIs. Also adds test utilities for OAuth credential resolution and tests for compaction with thinking models. fixes #413
This commit is contained in:
parent
97af788344
commit
8df22faedf
4 changed files with 347 additions and 7 deletions
|
|
@ -159,6 +159,8 @@ function mapOptionsForApi<TApi extends Api>(
|
||||||
return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
|
return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Claude requires max_tokens > thinking.budget_tokens
|
||||||
|
// So we need to ensure maxTokens accounts for both thinking and output
|
||||||
const anthropicBudgets = {
|
const anthropicBudgets = {
|
||||||
minimal: 1024,
|
minimal: 1024,
|
||||||
low: 2048,
|
low: 2048,
|
||||||
|
|
@ -166,10 +168,21 @@ function mapOptionsForApi<TApi extends Api>(
|
||||||
high: 16384,
|
high: 16384,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const minOutputTokens = 1024;
|
||||||
|
let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!];
|
||||||
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
||||||
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
||||||
|
|
||||||
|
// If not enough room for thinking + output, reduce thinking budget
|
||||||
|
if (maxTokens <= thinkingBudget) {
|
||||||
|
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...base,
|
...base,
|
||||||
|
maxTokens,
|
||||||
thinkingEnabled: true,
|
thinkingEnabled: true,
|
||||||
thinkingBudgetTokens: anthropicBudgets[clampReasoning(options.reasoning)!],
|
thinkingBudgetTokens: thinkingBudget,
|
||||||
} satisfies AnthropicOptions;
|
} satisfies AnthropicOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -234,7 +247,9 @@ function mapOptionsForApi<TApi extends Api>(
|
||||||
} satisfies GoogleGeminiCliOptions;
|
} satisfies GoogleGeminiCliOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Gemini 2.x models use thinkingBudget
|
// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
|
||||||
|
// Claude requires max_tokens > thinking.budget_tokens
|
||||||
|
// So we need to ensure maxTokens accounts for both thinking and output
|
||||||
const budgets: Record<ClampedReasoningEffort, number> = {
|
const budgets: Record<ClampedReasoningEffort, number> = {
|
||||||
minimal: 1024,
|
minimal: 1024,
|
||||||
low: 2048,
|
low: 2048,
|
||||||
|
|
@ -242,11 +257,22 @@ function mapOptionsForApi<TApi extends Api>(
|
||||||
high: 16384,
|
high: 16384,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const minOutputTokens = 1024;
|
||||||
|
let thinkingBudget = budgets[effort];
|
||||||
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
||||||
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
||||||
|
|
||||||
|
// If not enough room for thinking + output, reduce thinking budget
|
||||||
|
if (maxTokens <= thinkingBudget) {
|
||||||
|
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...base,
|
...base,
|
||||||
|
maxTokens,
|
||||||
thinking: {
|
thinking: {
|
||||||
enabled: true,
|
enabled: true,
|
||||||
budgetTokens: budgets[effort],
|
budgetTokens: thinkingBudget,
|
||||||
},
|
},
|
||||||
} satisfies GoogleGeminiCliOptions;
|
} satisfies GoogleGeminiCliOptions;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -586,6 +586,7 @@ Global `~/.pi/agent/settings.json` stores persistent preferences:
|
||||||
| `retry.baseDelayMs` | Base delay for exponential backoff | `2000` |
|
| `retry.baseDelayMs` | Base delay for exponential backoff | `2000` |
|
||||||
| `terminal.showImages` | Render images inline (supported terminals) | `true` |
|
| `terminal.showImages` | Render images inline (supported terminals) | `true` |
|
||||||
| `images.autoResize` | Auto-resize images to 2000x2000 max for better model compatibility | `true` |
|
| `images.autoResize` | Auto-resize images to 2000x2000 max for better model compatibility | `true` |
|
||||||
|
|
||||||
| `doubleEscapeAction` | Action for double-escape with empty editor: `tree` or `branch` | `tree` |
|
| `doubleEscapeAction` | Action for double-escape with empty editor: `tree` or `branch` | `tree` |
|
||||||
| `hooks` | Additional hook file paths | `[]` |
|
| `hooks` | Additional hook file paths | `[]` |
|
||||||
| `customTools` | Additional custom tool file paths | `[]` |
|
| `customTools` | Additional custom tool file paths | `[]` |
|
||||||
|
|
|
||||||
213
packages/coding-agent/test/compaction-thinking-model.test.ts
Normal file
213
packages/coding-agent/test/compaction-thinking-model.test.ts
Normal file
|
|
@ -0,0 +1,213 @@
|
||||||
|
/**
|
||||||
|
* Test for compaction with thinking models.
|
||||||
|
*
|
||||||
|
* Tests both:
|
||||||
|
* - Claude via Antigravity (google-gemini-cli API)
|
||||||
|
* - Claude via real Anthropic API (anthropic-messages API)
|
||||||
|
*
|
||||||
|
* Reproduces issue where compact fails when maxTokens < thinkingBudget.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { existsSync, mkdirSync, rmSync } from "node:fs";
|
||||||
|
import { tmpdir } from "node:os";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { Agent, type ThinkingLevel } from "@mariozechner/pi-agent-core";
|
||||||
|
import { getModel, type Model } from "@mariozechner/pi-ai";
|
||||||
|
import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
|
||||||
|
import { AgentSession } from "../src/core/agent-session.js";
|
||||||
|
import { ModelRegistry } from "../src/core/model-registry.js";
|
||||||
|
import { SessionManager } from "../src/core/session-manager.js";
|
||||||
|
import { SettingsManager } from "../src/core/settings-manager.js";
|
||||||
|
import { codingTools } from "../src/core/tools/index.js";
|
||||||
|
import { API_KEY, getRealAuthStorage, hasAuthForProvider, resolveApiKey } from "./utilities.js";
|
||||||
|
|
||||||
|
// Check for auth
|
||||||
|
const HAS_ANTIGRAVITY_AUTH = hasAuthForProvider("google-antigravity");
|
||||||
|
const HAS_ANTHROPIC_AUTH = !!API_KEY;
|
||||||
|
|
||||||
|
describe.skipIf(!HAS_ANTIGRAVITY_AUTH)("Compaction with thinking models (Antigravity)", () => {
|
||||||
|
let session: AgentSession;
|
||||||
|
let tempDir: string;
|
||||||
|
let apiKey: string;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
const key = await resolveApiKey("google-antigravity");
|
||||||
|
if (!key) throw new Error("Failed to resolve google-antigravity API key");
|
||||||
|
apiKey = key;
|
||||||
|
});
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
tempDir = join(tmpdir(), `pi-thinking-compaction-test-${Date.now()}`);
|
||||||
|
mkdirSync(tempDir, { recursive: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
if (session) {
|
||||||
|
session.dispose();
|
||||||
|
}
|
||||||
|
if (tempDir && existsSync(tempDir)) {
|
||||||
|
rmSync(tempDir, { recursive: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function createSession(
|
||||||
|
modelId: "claude-opus-4-5-thinking" | "claude-sonnet-4-5",
|
||||||
|
thinkingLevel: ThinkingLevel = "high",
|
||||||
|
) {
|
||||||
|
const model = getModel("google-antigravity", modelId);
|
||||||
|
if (!model) {
|
||||||
|
throw new Error(`Model not found: google-antigravity/${modelId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const agent = new Agent({
|
||||||
|
getApiKey: () => apiKey,
|
||||||
|
initialState: {
|
||||||
|
model,
|
||||||
|
systemPrompt: "You are a helpful assistant. Be concise.",
|
||||||
|
tools: codingTools,
|
||||||
|
thinkingLevel,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const sessionManager = SessionManager.inMemory();
|
||||||
|
const settingsManager = SettingsManager.create(tempDir, tempDir);
|
||||||
|
// Use minimal keepRecentTokens so small test conversations have something to summarize
|
||||||
|
// settingsManager.applyOverrides({ compaction: { keepRecentTokens: 1 } });
|
||||||
|
|
||||||
|
const authStorage = getRealAuthStorage();
|
||||||
|
const modelRegistry = new ModelRegistry(authStorage);
|
||||||
|
|
||||||
|
session = new AgentSession({
|
||||||
|
agent,
|
||||||
|
sessionManager,
|
||||||
|
settingsManager,
|
||||||
|
modelRegistry,
|
||||||
|
});
|
||||||
|
|
||||||
|
session.subscribe(() => {});
|
||||||
|
|
||||||
|
return session;
|
||||||
|
}
|
||||||
|
|
||||||
|
it("should compact successfully with claude-opus-4-5-thinking and thinking level high", async () => {
|
||||||
|
createSession("claude-opus-4-5-thinking", "high");
|
||||||
|
|
||||||
|
// Send a simple prompt
|
||||||
|
await session.prompt("Write down the first 10 prime numbers.");
|
||||||
|
await session.agent.waitForIdle();
|
||||||
|
|
||||||
|
// Verify we got a response
|
||||||
|
const messages = session.messages;
|
||||||
|
expect(messages.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const assistantMessages = messages.filter((m) => m.role === "assistant");
|
||||||
|
expect(assistantMessages.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Now try to compact - this should not throw
|
||||||
|
const result = await session.compact();
|
||||||
|
|
||||||
|
expect(result.summary).toBeDefined();
|
||||||
|
expect(result.summary.length).toBeGreaterThan(0);
|
||||||
|
expect(result.tokensBefore).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Verify session is still usable after compaction
|
||||||
|
const messagesAfterCompact = session.messages;
|
||||||
|
expect(messagesAfterCompact.length).toBeGreaterThan(0);
|
||||||
|
expect(messagesAfterCompact[0].role).toBe("compactionSummary");
|
||||||
|
}, 180000);
|
||||||
|
|
||||||
|
it("should compact successfully with claude-sonnet-4-5 (non-thinking) for comparison", async () => {
|
||||||
|
createSession("claude-sonnet-4-5", "off");
|
||||||
|
|
||||||
|
await session.prompt("Write down the first 10 prime numbers.");
|
||||||
|
await session.agent.waitForIdle();
|
||||||
|
|
||||||
|
const messages = session.messages;
|
||||||
|
expect(messages.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const result = await session.compact();
|
||||||
|
|
||||||
|
expect(result.summary).toBeDefined();
|
||||||
|
expect(result.summary.length).toBeGreaterThan(0);
|
||||||
|
}, 180000);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Real Anthropic API tests (for comparison)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
describe.skipIf(!HAS_ANTHROPIC_AUTH)("Compaction with thinking models (Anthropic)", () => {
|
||||||
|
let session: AgentSession;
|
||||||
|
let tempDir: string;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
tempDir = join(tmpdir(), `pi-thinking-compaction-anthropic-test-${Date.now()}`);
|
||||||
|
mkdirSync(tempDir, { recursive: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
if (session) {
|
||||||
|
session.dispose();
|
||||||
|
}
|
||||||
|
if (tempDir && existsSync(tempDir)) {
|
||||||
|
rmSync(tempDir, { recursive: true });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
function createSession(model: Model<any>, thinkingLevel: ThinkingLevel = "high") {
|
||||||
|
const agent = new Agent({
|
||||||
|
getApiKey: () => API_KEY,
|
||||||
|
initialState: {
|
||||||
|
model,
|
||||||
|
systemPrompt: "You are a helpful assistant. Be concise.",
|
||||||
|
tools: codingTools,
|
||||||
|
thinkingLevel,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const sessionManager = SessionManager.inMemory();
|
||||||
|
const settingsManager = SettingsManager.create(tempDir, tempDir);
|
||||||
|
|
||||||
|
const authStorage = getRealAuthStorage();
|
||||||
|
const modelRegistry = new ModelRegistry(authStorage);
|
||||||
|
|
||||||
|
session = new AgentSession({
|
||||||
|
agent,
|
||||||
|
sessionManager,
|
||||||
|
settingsManager,
|
||||||
|
modelRegistry,
|
||||||
|
});
|
||||||
|
|
||||||
|
session.subscribe(() => {});
|
||||||
|
|
||||||
|
return session;
|
||||||
|
}
|
||||||
|
|
||||||
|
it("should compact successfully with claude-3-7-sonnet and thinking level high", async () => {
|
||||||
|
const model = getModel("anthropic", "claude-3-7-sonnet-latest")!;
|
||||||
|
createSession(model, "high");
|
||||||
|
|
||||||
|
// Send a simple prompt
|
||||||
|
await session.prompt("Write down the first 10 prime numbers.");
|
||||||
|
await session.agent.waitForIdle();
|
||||||
|
|
||||||
|
// Verify we got a response
|
||||||
|
const messages = session.messages;
|
||||||
|
expect(messages.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
const assistantMessages = messages.filter((m) => m.role === "assistant");
|
||||||
|
expect(assistantMessages.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Now try to compact - this should not throw
|
||||||
|
const result = await session.compact();
|
||||||
|
|
||||||
|
expect(result.summary).toBeDefined();
|
||||||
|
expect(result.summary.length).toBeGreaterThan(0);
|
||||||
|
expect(result.tokensBefore).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
// Verify session is still usable after compaction
|
||||||
|
const messagesAfterCompact = session.messages;
|
||||||
|
expect(messagesAfterCompact.length).toBeGreaterThan(0);
|
||||||
|
expect(messagesAfterCompact[0].role).toBe("compactionSummary");
|
||||||
|
}, 180000);
|
||||||
|
});
|
||||||
|
|
@ -2,11 +2,11 @@
|
||||||
* Shared test utilities for coding-agent tests.
|
* Shared test utilities for coding-agent tests.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { existsSync, mkdirSync, rmSync } from "node:fs";
|
import { chmodSync, existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
||||||
import { tmpdir } from "node:os";
|
import { homedir, tmpdir } from "node:os";
|
||||||
import { join } from "node:path";
|
import { dirname, join } from "node:path";
|
||||||
import { Agent } from "@mariozechner/pi-agent-core";
|
import { Agent } from "@mariozechner/pi-agent-core";
|
||||||
import { getModel } from "@mariozechner/pi-ai";
|
import { getModel, getOAuthApiKey, type OAuthCredentials, type OAuthProvider } from "@mariozechner/pi-ai";
|
||||||
import { AgentSession } from "../src/core/agent-session.js";
|
import { AgentSession } from "../src/core/agent-session.js";
|
||||||
import { AuthStorage } from "../src/core/auth-storage.js";
|
import { AuthStorage } from "../src/core/auth-storage.js";
|
||||||
import { ModelRegistry } from "../src/core/model-registry.js";
|
import { ModelRegistry } from "../src/core/model-registry.js";
|
||||||
|
|
@ -20,6 +20,106 @@ import { codingTools } from "../src/core/tools/index.js";
|
||||||
*/
|
*/
|
||||||
export const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
|
export const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// OAuth API key resolution from ~/.pi/agent/auth.json
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
const AUTH_PATH = join(homedir(), ".pi", "agent", "auth.json");
|
||||||
|
|
||||||
|
type ApiKeyCredential = {
|
||||||
|
type: "api_key";
|
||||||
|
key: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type OAuthCredentialEntry = {
|
||||||
|
type: "oauth";
|
||||||
|
} & OAuthCredentials;
|
||||||
|
|
||||||
|
type AuthCredential = ApiKeyCredential | OAuthCredentialEntry;
|
||||||
|
|
||||||
|
type AuthStorageData = Record<string, AuthCredential>;
|
||||||
|
|
||||||
|
function loadAuthStorage(): AuthStorageData {
|
||||||
|
if (!existsSync(AUTH_PATH)) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const content = readFileSync(AUTH_PATH, "utf-8");
|
||||||
|
return JSON.parse(content);
|
||||||
|
} catch {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function saveAuthStorage(storage: AuthStorageData): void {
|
||||||
|
const configDir = dirname(AUTH_PATH);
|
||||||
|
if (!existsSync(configDir)) {
|
||||||
|
mkdirSync(configDir, { recursive: true, mode: 0o700 });
|
||||||
|
}
|
||||||
|
writeFileSync(AUTH_PATH, JSON.stringify(storage, null, 2), "utf-8");
|
||||||
|
chmodSync(AUTH_PATH, 0o600);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve API key for a provider from ~/.pi/agent/auth.json
|
||||||
|
*
|
||||||
|
* For API key credentials, returns the key directly.
|
||||||
|
* For OAuth credentials, returns the access token (refreshing if expired and saving back).
|
||||||
|
*
|
||||||
|
* For google-gemini-cli and google-antigravity, returns JSON-encoded { token, projectId }
|
||||||
|
*/
|
||||||
|
export async function resolveApiKey(provider: string): Promise<string | undefined> {
|
||||||
|
const storage = loadAuthStorage();
|
||||||
|
const entry = storage[provider];
|
||||||
|
|
||||||
|
if (!entry) return undefined;
|
||||||
|
|
||||||
|
if (entry.type === "api_key") {
|
||||||
|
return entry.key;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entry.type === "oauth") {
|
||||||
|
// Build OAuthCredentials record for getOAuthApiKey
|
||||||
|
const oauthCredentials: Record<string, OAuthCredentials> = {};
|
||||||
|
for (const [key, value] of Object.entries(storage)) {
|
||||||
|
if (value.type === "oauth") {
|
||||||
|
const { type: _, ...creds } = value;
|
||||||
|
oauthCredentials[key] = creds;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await getOAuthApiKey(provider as OAuthProvider, oauthCredentials);
|
||||||
|
if (!result) return undefined;
|
||||||
|
|
||||||
|
// Save refreshed credentials back to auth.json
|
||||||
|
storage[provider] = { type: "oauth", ...result.newCredentials };
|
||||||
|
saveAuthStorage(storage);
|
||||||
|
|
||||||
|
return result.apiKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a provider has credentials in ~/.pi/agent/auth.json
|
||||||
|
*/
|
||||||
|
export function hasAuthForProvider(provider: string): boolean {
|
||||||
|
const storage = loadAuthStorage();
|
||||||
|
return provider in storage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Path to the real pi agent config directory */
|
||||||
|
export const PI_AGENT_DIR = join(homedir(), ".pi", "agent");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get an AuthStorage instance backed by ~/.pi/agent/auth.json
|
||||||
|
* Use this for tests that need real OAuth credentials.
|
||||||
|
*/
|
||||||
|
export function getRealAuthStorage(): AuthStorage {
|
||||||
|
return new AuthStorage(AUTH_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a minimal user message for testing.
|
* Create a minimal user message for testing.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue