fix: clean up Codex thinking level handling

- Remove per-thinking-level model variants (gpt-5.2-codex-high, etc.) - Remove thinkingLevels from Model type - Provider clamps reasoning effort internally - Omit reasoning field when thinking is off fixes #472
2026-04-17 08:00:59 +00:00 · 2026-01-05 21:58:26 +01:00 · 2026-01-05 21:58:26 +01:00 · 0b9e3ada0c
commit 0b9e3ada0c
parent 02b72b49d5
11 changed files with 45 additions and 148 deletions
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -21,8 +21,8 @@ import type {
 	KnownProvider,
 	Model,
 	OptionsForApi,
-	ReasoningEffort,
 	SimpleStreamOptions,
+	ThinkingLevel,
 } from "./types.js";

 const VERTEX_ADC_CREDENTIALS_PATH = join(homedir(), ".config", "gcloud", "application_default_credentials.json");
@ -180,7 +180,7 @@ function mapOptionsForApi<TApi extends Api>(
 	};

 	// Helper to clamp xhigh to high for providers that don't support it
-	const clampReasoning = (effort: ReasoningEffort | undefined) => (effort === "xhigh" ? "high" : effort);
+	const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);

 	switch (model.api) {
 		case "anthropic-messages": {
@ -286,7 +286,7 @@ function mapOptionsForApi<TApi extends Api>(
 			// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
 			// Claude requires max_tokens > thinking.budget_tokens
 			// So we need to ensure maxTokens accounts for both thinking and output
-			const budgets: Record<ClampedReasoningEffort, number> = {
+			const budgets: Record<ClampedThinkingLevel, number> = {
 				minimal: 1024,
 				low: 2048,
 				medium: 8192,
@ -350,7 +350,7 @@ function mapOptionsForApi<TApi extends Api>(
 	}
 }

-type ClampedReasoningEffort = Exclude<ReasoningEffort, "xhigh">;
+type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;

 function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
 	// Covers gemini-3-pro, gemini-3-pro-preview, and possible other prefixed ids in the future
@ -363,7 +363,7 @@ function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
 }

 function getGemini3ThinkingLevel(
-	effort: ClampedReasoningEffort,
+	effort: ClampedThinkingLevel,
 	model: Model<"google-generative-ai">,
 ): GoogleThinkingLevel {
 	if (isGemini3ProModel(model)) {
@ -390,7 +390,7 @@ function getGemini3ThinkingLevel(
 	}
 }

-function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: string): GoogleThinkingLevel {
+function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {
 	if (modelId.includes("3-pro")) {
 		// Gemini 3 Pro only supports LOW/HIGH (for now)
 		switch (effort) {
@ -415,10 +415,10 @@ function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: stri
 	}
 }

-function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedReasoningEffort): number {
+function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedThinkingLevel): number {
 	// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
 	if (model.id.includes("2.5-pro")) {
-		const budgets: Record<ClampedReasoningEffort, number> = {
+		const budgets: Record<ClampedThinkingLevel, number> = {
 			minimal: 128,
 			low: 2048,
 			medium: 8192,
@ -429,7 +429,7 @@ function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedRe

 	if (model.id.includes("2.5-flash")) {
 		// Covers 2.5-flash-lite as well
-		const budgets: Record<ClampedReasoningEffort, number> = {
+		const budgets: Record<ClampedThinkingLevel, number> = {
 			minimal: 128,
 			low: 2048,
 			medium: 8192,