fix: clean up Codex thinking level handling

- Remove per-thinking-level model variants (gpt-5.2-codex-high, etc.)
- Remove thinkingLevels from Model type
- Provider clamps reasoning effort internally
- Omit reasoning field when thinking is off

fixes #472
This commit is contained in:
Mario Zechner 2026-01-05 21:58:26 +01:00
parent 02b72b49d5
commit 0b9e3ada0c
11 changed files with 45 additions and 148 deletions

View file

@ -21,8 +21,8 @@ import type {
KnownProvider,
Model,
OptionsForApi,
ReasoningEffort,
SimpleStreamOptions,
ThinkingLevel,
} from "./types.js";
const VERTEX_ADC_CREDENTIALS_PATH = join(homedir(), ".config", "gcloud", "application_default_credentials.json");
@ -180,7 +180,7 @@ function mapOptionsForApi<TApi extends Api>(
};
// Helper to clamp xhigh to high for providers that don't support it
const clampReasoning = (effort: ReasoningEffort | undefined) => (effort === "xhigh" ? "high" : effort);
const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
switch (model.api) {
case "anthropic-messages": {
@ -286,7 +286,7 @@ function mapOptionsForApi<TApi extends Api>(
// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
// Claude requires max_tokens > thinking.budget_tokens
// So we need to ensure maxTokens accounts for both thinking and output
const budgets: Record<ClampedReasoningEffort, number> = {
const budgets: Record<ClampedThinkingLevel, number> = {
minimal: 1024,
low: 2048,
medium: 8192,
@ -350,7 +350,7 @@ function mapOptionsForApi<TApi extends Api>(
}
}
type ClampedReasoningEffort = Exclude<ReasoningEffort, "xhigh">;
type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
// Covers gemini-3-pro, gemini-3-pro-preview, and possible other prefixed ids in the future
@ -363,7 +363,7 @@ function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
}
function getGemini3ThinkingLevel(
effort: ClampedReasoningEffort,
effort: ClampedThinkingLevel,
model: Model<"google-generative-ai">,
): GoogleThinkingLevel {
if (isGemini3ProModel(model)) {
@ -390,7 +390,7 @@ function getGemini3ThinkingLevel(
}
}
function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: string): GoogleThinkingLevel {
function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {
if (modelId.includes("3-pro")) {
// Gemini 3 Pro only supports LOW/HIGH (for now)
switch (effort) {
@ -415,10 +415,10 @@ function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: stri
}
}
function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedReasoningEffort): number {
function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedThinkingLevel): number {
// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
if (model.id.includes("2.5-pro")) {
const budgets: Record<ClampedReasoningEffort, number> = {
const budgets: Record<ClampedThinkingLevel, number> = {
minimal: 128,
low: 2048,
medium: 8192,
@ -429,7 +429,7 @@ function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedRe
if (model.id.includes("2.5-flash")) {
// Covers 2.5-flash-lite as well
const budgets: Record<ClampedReasoningEffort, number> = {
const budgets: Record<ClampedThinkingLevel, number> = {
minimal: 128,
low: 2048,
medium: 8192,