mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-19 19:04:41 +00:00
fix: ensure max_tokens > thinking.budget_tokens for bedrock claude (#797)
Bedrock Claude models require max_tokens to exceed thinking.budget_tokens. This constraint was handled for anthropic-messages API but missing for bedrock-converse-stream, causing compaction failures. Extracted adjustMaxTokensForThinking() helper that: - Adds thinking budget on top of desired output tokens - Reduces thinking budget if insufficient room (min 1024 output tokens) - Applied to both anthropic-messages and bedrock-converse-stream APIs
This commit is contained in:
parent
fc538f6ca1
commit
cd43b8a9ca
1 changed files with 70 additions and 21 deletions
|
|
@ -217,6 +217,39 @@ function mapOptionsForApi<TApi extends Api>(
|
||||||
// Helper to clamp xhigh to high for providers that don't support it
|
// Helper to clamp xhigh to high for providers that don't support it
|
||||||
const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
|
const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adjust maxTokens to account for thinking budget.
|
||||||
|
* APIs like Anthropic and Bedrock require max_tokens > thinking.budget_tokens.
|
||||||
|
* Returns { adjustedMaxTokens, adjustedThinkingBudget }
|
||||||
|
*/
|
||||||
|
const adjustMaxTokensForThinking = (
|
||||||
|
baseMaxTokens: number,
|
||||||
|
modelMaxTokens: number,
|
||||||
|
reasoningLevel: ThinkingLevel,
|
||||||
|
customBudgets?: ThinkingBudgets,
|
||||||
|
): { maxTokens: number; thinkingBudget: number } => {
|
||||||
|
const defaultBudgets: ThinkingBudgets = {
|
||||||
|
minimal: 1024,
|
||||||
|
low: 2048,
|
||||||
|
medium: 8192,
|
||||||
|
high: 16384,
|
||||||
|
};
|
||||||
|
const budgets = { ...defaultBudgets, ...customBudgets };
|
||||||
|
|
||||||
|
const minOutputTokens = 1024;
|
||||||
|
const level = clampReasoning(reasoningLevel)!;
|
||||||
|
let thinkingBudget = budgets[level]!;
|
||||||
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
||||||
|
const maxTokens = Math.min(baseMaxTokens + thinkingBudget, modelMaxTokens);
|
||||||
|
|
||||||
|
// If not enough room for thinking + output, reduce thinking budget
|
||||||
|
if (maxTokens <= thinkingBudget) {
|
||||||
|
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { maxTokens, thinkingBudget };
|
||||||
|
};
|
||||||
|
|
||||||
switch (model.api) {
|
switch (model.api) {
|
||||||
case "anthropic-messages": {
|
case "anthropic-messages": {
|
||||||
// Explicitly disable thinking when reasoning is not specified
|
// Explicitly disable thinking when reasoning is not specified
|
||||||
|
|
@ -226,39 +259,55 @@ function mapOptionsForApi<TApi extends Api>(
|
||||||
|
|
||||||
// Claude requires max_tokens > thinking.budget_tokens
|
// Claude requires max_tokens > thinking.budget_tokens
|
||||||
// So we need to ensure maxTokens accounts for both thinking and output
|
// So we need to ensure maxTokens accounts for both thinking and output
|
||||||
const defaultBudgets: ThinkingBudgets = {
|
const adjusted = adjustMaxTokensForThinking(
|
||||||
minimal: 1024,
|
base.maxTokens || 0,
|
||||||
low: 2048,
|
model.maxTokens,
|
||||||
medium: 8192,
|
options.reasoning,
|
||||||
high: 16384,
|
options?.thinkingBudgets,
|
||||||
};
|
);
|
||||||
const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
|
|
||||||
|
|
||||||
const minOutputTokens = 1024;
|
|
||||||
const level = clampReasoning(options.reasoning)!;
|
|
||||||
let thinkingBudget = budgets[level]!;
|
|
||||||
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
||||||
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
|
||||||
|
|
||||||
// If not enough room for thinking + output, reduce thinking budget
|
|
||||||
if (maxTokens <= thinkingBudget) {
|
|
||||||
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
...base,
|
...base,
|
||||||
maxTokens,
|
maxTokens: adjusted.maxTokens,
|
||||||
thinkingEnabled: true,
|
thinkingEnabled: true,
|
||||||
thinkingBudgetTokens: thinkingBudget,
|
thinkingBudgetTokens: adjusted.thinkingBudget,
|
||||||
} satisfies AnthropicOptions;
|
} satisfies AnthropicOptions;
|
||||||
}
|
}
|
||||||
|
|
||||||
case "bedrock-converse-stream":
|
case "bedrock-converse-stream": {
|
||||||
|
// Explicitly disable thinking when reasoning is not specified
|
||||||
|
if (!options?.reasoning) {
|
||||||
|
return { ...base, reasoning: undefined } satisfies BedrockOptions;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Claude requires max_tokens > thinking.budget_tokens (same as Anthropic direct API)
|
||||||
|
// So we need to ensure maxTokens accounts for both thinking and output
|
||||||
|
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
|
||||||
|
const adjusted = adjustMaxTokensForThinking(
|
||||||
|
base.maxTokens || 0,
|
||||||
|
model.maxTokens,
|
||||||
|
options.reasoning,
|
||||||
|
options?.thinkingBudgets,
|
||||||
|
);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...base,
|
||||||
|
maxTokens: adjusted.maxTokens,
|
||||||
|
reasoning: options.reasoning,
|
||||||
|
thinkingBudgets: {
|
||||||
|
...(options?.thinkingBudgets || {}),
|
||||||
|
[clampReasoning(options.reasoning)!]: adjusted.thinkingBudget,
|
||||||
|
},
|
||||||
|
} satisfies BedrockOptions;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Non-Claude models - pass through
|
||||||
return {
|
return {
|
||||||
...base,
|
...base,
|
||||||
reasoning: options?.reasoning,
|
reasoning: options?.reasoning,
|
||||||
thinkingBudgets: options?.thinkingBudgets,
|
thinkingBudgets: options?.thinkingBudgets,
|
||||||
} satisfies BedrockOptions;
|
} satisfies BedrockOptions;
|
||||||
|
}
|
||||||
|
|
||||||
case "openai-completions":
|
case "openai-completions":
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue