From cd43b8a9ca61c66290ff5b59dd5ebd71e6dbfe7f Mon Sep 17 00:00:00 2001 From: Pablo Tovar Date: Sat, 17 Jan 2026 10:55:30 +0100 Subject: [PATCH] fix: ensure max_tokens > thinking.budget_tokens for bedrock claude (#797) Bedrock Claude models require max_tokens to exceed thinking.budget_tokens. This constraint was handled for anthropic-messages API but missing for bedrock-converse-stream, causing compaction failures. Extracted adjustMaxTokensForThinking() helper that: - Adds thinking budget on top of desired output tokens - Reduces thinking budget if insufficient room (min 1024 output tokens) - Applied to both anthropic-messages and bedrock-converse-stream APIs --- packages/ai/src/stream.ts | 91 ++++++++++++++++++++++++++++++--------- 1 file changed, 70 insertions(+), 21 deletions(-) diff --git a/packages/ai/src/stream.ts b/packages/ai/src/stream.ts index e1548f40..d91820c6 100644 --- a/packages/ai/src/stream.ts +++ b/packages/ai/src/stream.ts @@ -217,6 +217,39 @@ function mapOptionsForApi( // Helper to clamp xhigh to high for providers that don't support it const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort); + /** + * Adjust maxTokens to account for thinking budget. + * APIs like Anthropic and Bedrock require max_tokens > thinking.budget_tokens. + * Returns { adjustedMaxTokens, adjustedThinkingBudget } + */ + const adjustMaxTokensForThinking = ( + baseMaxTokens: number, + modelMaxTokens: number, + reasoningLevel: ThinkingLevel, + customBudgets?: ThinkingBudgets, + ): { maxTokens: number; thinkingBudget: number } => { + const defaultBudgets: ThinkingBudgets = { + minimal: 1024, + low: 2048, + medium: 8192, + high: 16384, + }; + const budgets = { ...defaultBudgets, ...customBudgets }; + + const minOutputTokens = 1024; + const level = clampReasoning(reasoningLevel)!; + let thinkingBudget = budgets[level]!; + // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit + const maxTokens = Math.min(baseMaxTokens + thinkingBudget, modelMaxTokens); + + // If not enough room for thinking + output, reduce thinking budget + if (maxTokens <= thinkingBudget) { + thinkingBudget = Math.max(0, maxTokens - minOutputTokens); + } + + return { maxTokens, thinkingBudget }; + }; + switch (model.api) { case "anthropic-messages": { // Explicitly disable thinking when reasoning is not specified @@ -226,39 +259,55 @@ function mapOptionsForApi( // Claude requires max_tokens > thinking.budget_tokens // So we need to ensure maxTokens accounts for both thinking and output - const defaultBudgets: ThinkingBudgets = { - minimal: 1024, - low: 2048, - medium: 8192, - high: 16384, - }; - const budgets = { ...defaultBudgets, ...options?.thinkingBudgets }; - - const minOutputTokens = 1024; - const level = clampReasoning(options.reasoning)!; - let thinkingBudget = budgets[level]!; - // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit - const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens); - - // If not enough room for thinking + output, reduce thinking budget - if (maxTokens <= thinkingBudget) { - thinkingBudget = Math.max(0, maxTokens - minOutputTokens); - } + const adjusted = adjustMaxTokensForThinking( + base.maxTokens || 0, + model.maxTokens, + options.reasoning, + options?.thinkingBudgets, + ); return { ...base, - maxTokens, + maxTokens: adjusted.maxTokens, thinkingEnabled: true, - thinkingBudgetTokens: thinkingBudget, + thinkingBudgetTokens: adjusted.thinkingBudget, } satisfies AnthropicOptions; } - case "bedrock-converse-stream": + case "bedrock-converse-stream": { + // Explicitly disable thinking when reasoning is not specified + if (!options?.reasoning) { + return { ...base, reasoning: undefined } satisfies BedrockOptions; + } + + // Claude requires max_tokens > thinking.budget_tokens (same as Anthropic direct API) + // So we need to ensure maxTokens accounts for both thinking and output + if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) { + const adjusted = adjustMaxTokensForThinking( + base.maxTokens || 0, + model.maxTokens, + options.reasoning, + options?.thinkingBudgets, + ); + + return { + ...base, + maxTokens: adjusted.maxTokens, + reasoning: options.reasoning, + thinkingBudgets: { + ...(options?.thinkingBudgets || {}), + [clampReasoning(options.reasoning)!]: adjusted.thinkingBudget, + }, + } satisfies BedrockOptions; + } + + // Non-Claude models - pass through return { ...base, reasoning: options?.reasoning, thinkingBudgets: options?.thinkingBudgets, } satisfies BedrockOptions; + } case "openai-completions": return {