mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-16 17:01:02 +00:00
fix(ai): ensure maxTokens > thinkingBudget for Claude thinking models
Claude requires max_tokens > thinking.budget_tokens. When caller specifies a small maxTokens (e.g. compaction with ~13k tokens) and reasoning is enabled with high budget (16k tokens), the constraint was violated. Fix: In mapOptionsForApi, add thinkingBudget on top of caller's maxTokens (capped at model.maxTokens). If still not enough room, reduce thinkingBudget to leave space for output. Applied to both anthropic-messages and google-gemini-cli APIs. Also adds test utilities for OAuth credential resolution and tests for compaction with thinking models. fixes #413
This commit is contained in:
parent
97af788344
commit
8df22faedf
4 changed files with 347 additions and 7 deletions
|
|
@ -159,6 +159,8 @@ function mapOptionsForApi<TApi extends Api>(
|
|||
return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
|
||||
}
|
||||
|
||||
// Claude requires max_tokens > thinking.budget_tokens
|
||||
// So we need to ensure maxTokens accounts for both thinking and output
|
||||
const anthropicBudgets = {
|
||||
minimal: 1024,
|
||||
low: 2048,
|
||||
|
|
@ -166,10 +168,21 @@ function mapOptionsForApi<TApi extends Api>(
|
|||
high: 16384,
|
||||
};
|
||||
|
||||
const minOutputTokens = 1024;
|
||||
let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!];
|
||||
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
||||
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
||||
|
||||
// If not enough room for thinking + output, reduce thinking budget
|
||||
if (maxTokens <= thinkingBudget) {
|
||||
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
||||
}
|
||||
|
||||
return {
|
||||
...base,
|
||||
maxTokens,
|
||||
thinkingEnabled: true,
|
||||
thinkingBudgetTokens: anthropicBudgets[clampReasoning(options.reasoning)!],
|
||||
thinkingBudgetTokens: thinkingBudget,
|
||||
} satisfies AnthropicOptions;
|
||||
}
|
||||
|
||||
|
|
@ -234,7 +247,9 @@ function mapOptionsForApi<TApi extends Api>(
|
|||
} satisfies GoogleGeminiCliOptions;
|
||||
}
|
||||
|
||||
// Gemini 2.x models use thinkingBudget
|
||||
// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
|
||||
// Claude requires max_tokens > thinking.budget_tokens
|
||||
// So we need to ensure maxTokens accounts for both thinking and output
|
||||
const budgets: Record<ClampedReasoningEffort, number> = {
|
||||
minimal: 1024,
|
||||
low: 2048,
|
||||
|
|
@ -242,11 +257,22 @@ function mapOptionsForApi<TApi extends Api>(
|
|||
high: 16384,
|
||||
};
|
||||
|
||||
const minOutputTokens = 1024;
|
||||
let thinkingBudget = budgets[effort];
|
||||
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
||||
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
||||
|
||||
// If not enough room for thinking + output, reduce thinking budget
|
||||
if (maxTokens <= thinkingBudget) {
|
||||
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
||||
}
|
||||
|
||||
return {
|
||||
...base,
|
||||
maxTokens,
|
||||
thinking: {
|
||||
enabled: true,
|
||||
budgetTokens: budgets[effort],
|
||||
budgetTokens: thinkingBudget,
|
||||
},
|
||||
} satisfies GoogleGeminiCliOptions;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue