fix: ensure max_tokens > thinking.budget_tokens for bedrock claude (#797)

Bedrock Claude models require max_tokens to exceed thinking.budget_tokens. This constraint was handled for anthropic-messages API but missing for bedrock-converse-stream, causing compaction failures. Extracted adjustMaxTokensForThinking() helper that: - Adds thinking budget on top of desired output tokens - Reduces thinking budget if insufficient room (min 1024 output tokens) - Applied to both anthropic-messages and bedrock-converse-stream APIs
2026-04-15 14:03:49 +00:00 · 2026-01-17 10:55:30 +01:00 · 2026-01-17 10:55:30 +01:00 · cd43b8a9ca
commit cd43b8a9ca
parent fc538f6ca1
1 changed files with 70 additions and 21 deletions
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -217,6 +217,39 @@ function mapOptionsForApi<TApi extends Api>(
 	// Helper to clamp xhigh to high for providers that don't support it
 	const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);

+	/**
+	 * Adjust maxTokens to account for thinking budget.
+	 * APIs like Anthropic and Bedrock require max_tokens > thinking.budget_tokens.
+	 * Returns { adjustedMaxTokens, adjustedThinkingBudget }
+	 */
+	const adjustMaxTokensForThinking = (
+		baseMaxTokens: number,
+		modelMaxTokens: number,
+		reasoningLevel: ThinkingLevel,
+		customBudgets?: ThinkingBudgets,
+	): { maxTokens: number; thinkingBudget: number } => {
+		const defaultBudgets: ThinkingBudgets = {
+			minimal: 1024,
+			low: 2048,
+			medium: 8192,
+			high: 16384,
+		};
+		const budgets = { ...defaultBudgets, ...customBudgets };
+
+		const minOutputTokens = 1024;
+		const level = clampReasoning(reasoningLevel)!;
+		let thinkingBudget = budgets[level]!;
+		// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
+		const maxTokens = Math.min(baseMaxTokens + thinkingBudget, modelMaxTokens);
+
+		// If not enough room for thinking + output, reduce thinking budget
+		if (maxTokens <= thinkingBudget) {
+			thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
+		}
+
+		return { maxTokens, thinkingBudget };
+	};
+
 	switch (model.api) {
 		case "anthropic-messages": {
 			// Explicitly disable thinking when reasoning is not specified
@ -226,39 +259,55 @@ function mapOptionsForApi<TApi extends Api>(

 			// Claude requires max_tokens > thinking.budget_tokens
 			// So we need to ensure maxTokens accounts for both thinking and output
-			const defaultBudgets: ThinkingBudgets = {
-				minimal: 1024,
-				low: 2048,
-				medium: 8192,
-				high: 16384,
-			};
-			const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
-
-			const minOutputTokens = 1024;
-			const level = clampReasoning(options.reasoning)!;
-			let thinkingBudget = budgets[level]!;
-			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
-			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
-
-			// If not enough room for thinking + output, reduce thinking budget
-			if (maxTokens <= thinkingBudget) {
-				thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
-			}
+			const adjusted = adjustMaxTokensForThinking(
+				base.maxTokens || 0,
+				model.maxTokens,
+				options.reasoning,
+				options?.thinkingBudgets,
+			);

 			return {
 				...base,
-				maxTokens,
+				maxTokens: adjusted.maxTokens,
 				thinkingEnabled: true,
-				thinkingBudgetTokens: thinkingBudget,
+				thinkingBudgetTokens: adjusted.thinkingBudget,
 			} satisfies AnthropicOptions;
 		}

-		case "bedrock-converse-stream":
+		case "bedrock-converse-stream": {
+			// Explicitly disable thinking when reasoning is not specified
+			if (!options?.reasoning) {
+				return { ...base, reasoning: undefined } satisfies BedrockOptions;
+			}
+
+			// Claude requires max_tokens > thinking.budget_tokens (same as Anthropic direct API)
+			// So we need to ensure maxTokens accounts for both thinking and output
+			if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
+				const adjusted = adjustMaxTokensForThinking(
+					base.maxTokens || 0,
+					model.maxTokens,
+					options.reasoning,
+					options?.thinkingBudgets,
+				);
+
+				return {
+					...base,
+					maxTokens: adjusted.maxTokens,
+					reasoning: options.reasoning,
+					thinkingBudgets: {
+						...(options?.thinkingBudgets || {}),
+						[clampReasoning(options.reasoning)!]: adjusted.thinkingBudget,
+					},
+				} satisfies BedrockOptions;
+			}
+
+			// Non-Claude models - pass through
 			return {
 				...base,
 				reasoning: options?.reasoning,
 				thinkingBudgets: options?.thinkingBudgets,
 			} satisfies BedrockOptions;
+		}

 		case "openai-completions":
 			return {