Add xhigh thinking level for OpenAI codex-max models

- Add 'xhigh' to ThinkingLevel type in ai and agent packages - Map xhigh to reasoning_effort: 'max' for OpenAI providers - Add thinkingXhigh color token to theme schema and built-in themes - Show xhigh option only when using codex-max models - Update CHANGELOG for both ai and coding-agent packages closes #143
2026-04-22 00:00:27 +00:00 · 2025-12-08 21:12:54 +01:00 · 2025-12-08 21:12:54 +01:00 · 00370cab39
commit 00370cab39
parent 87a1a9ded4
19 changed files with 300 additions and 54 deletions
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@ -12,6 +12,12 @@

 - **OpenAI compatibility overrides**: Added `compat` field to `Model` for `openai-completions` API, allowing explicit configuration of provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Falls back to URL-based detection if not set. Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/pi-mono/issues/133), thanks @fink-andreas for the initial idea and PR)

+- **xhigh reasoning level**: Added `xhigh` to `ReasoningEffort` type for OpenAI codex-max models. For non-OpenAI providers (Anthropic, Google), `xhigh` is automatically mapped to `high`. ([#143](https://github.com/badlogic/pi-mono/issues/143))
+
+### Changed
+
+- **Updated SDK versions**: OpenAI SDK 5.21.0 → 6.10.0, Anthropic SDK 0.61.0 → 0.71.2, Google GenAI SDK 1.30.0 → 1.31.0
+
 ## [0.13.0] - 2025-12-06

 ### Breaking Changes
--- a/packages/ai/README.md
+++ b/packages/ai/README.md
@ -387,7 +387,7 @@ if (model.reasoning) {
 const response = await completeSimple(model, {
  messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13' }]
 }, {
-  reasoning: 'medium'  // 'minimal' | 'low' | 'medium' | 'high'
+  reasoning: 'medium'  // 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' (xhigh maps to high on non-OpenAI providers)
 });

 // Access thinking and text blocks
--- a/packages/ai/package.json
+++ b/packages/ai/package.json
@ -20,13 +20,13 @@
 		"prepublishOnly": "npm run clean && npm run build"
 	},
 	"dependencies": {
-		"@anthropic-ai/sdk": "^0.61.0",
-		"@google/genai": "^1.30.0",
+		"@anthropic-ai/sdk": "0.71.2",
+		"@google/genai": "1.31.0",
 		"@sinclair/typebox": "^0.34.41",
 		"ajv": "^8.17.1",
 		"ajv-formats": "^3.0.1",
 		"chalk": "^5.6.2",
-		"openai": "5.21.0",
+		"openai": "6.10.0",
 		"partial-json": "^0.1.7",
 		"zod-to-json-schema": "^3.24.6"
 	},
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@ -29,7 +29,7 @@ import { transformMessages } from "./transorm-messages.js";

 export interface OpenAICompletionsOptions extends StreamOptions {
 	toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
-	reasoningEffort?: "minimal" | "low" | "medium" | "high";
+	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
 }

 export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@ -32,7 +32,7 @@ import { transformMessages } from "./transorm-messages.js";

 // OpenAI Responses-specific options
 export interface OpenAIResponsesOptions extends StreamOptions {
-	reasoningEffort?: "minimal" | "low" | "medium" | "high";
+	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
 	reasoningSummary?: "auto" | "detailed" | "concise" | null;
 }

@ -158,7 +158,10 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 				else if (event.type === "response.content_part.added") {
 					if (currentItem && currentItem.type === "message") {
 						currentItem.content = currentItem.content || [];
-						currentItem.content.push(event.part);
+						// Filter out ReasoningText, only accept output_text and refusal
+						if (event.part.type === "output_text" || event.part.type === "refusal") {
+							currentItem.content.push(event.part);
+						}
 					}
 				} else if (event.type === "response.output_text.delta") {
 					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -122,6 +122,9 @@ function mapOptionsForApi<TApi extends Api>(
 		apiKey: apiKey || options?.apiKey,
 	};

+	// Helper to clamp xhigh to high for providers that don't support it
+	const clampReasoning = (effort: ReasoningEffort | undefined) => (effort === "xhigh" ? "high" : effort);
+
 	switch (model.api) {
 		case "anthropic-messages": {
 			if (!options?.reasoning) return base satisfies AnthropicOptions;
@ -136,7 +139,7 @@ function mapOptionsForApi<TApi extends Api>(
 			return {
 				...base,
 				thinkingEnabled: true,
-				thinkingBudgetTokens: anthropicBudgets[options.reasoning],
+				thinkingBudgetTokens: anthropicBudgets[clampReasoning(options.reasoning)!],
 			} satisfies AnthropicOptions;
 		}

@ -155,7 +158,10 @@ function mapOptionsForApi<TApi extends Api>(
 		case "google-generative-ai": {
 			if (!options?.reasoning) return base as any;

-			const googleBudget = getGoogleBudget(model as Model<"google-generative-ai">, options.reasoning);
+			const googleBudget = getGoogleBudget(
+				model as Model<"google-generative-ai">,
+				clampReasoning(options.reasoning)!,
+			);
 			return {
 				...base,
 				thinking: {
@ -173,10 +179,12 @@ function mapOptionsForApi<TApi extends Api>(
 	}
 }

-function getGoogleBudget(model: Model<"google-generative-ai">, effort: ReasoningEffort): number {
+type ClampedReasoningEffort = Exclude<ReasoningEffort, "xhigh">;
+
+function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedReasoningEffort): number {
 	// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
 	if (model.id.includes("2.5-pro")) {
-		const budgets = {
+		const budgets: Record<ClampedReasoningEffort, number> = {
 			minimal: 128,
 			low: 2048,
 			medium: 8192,
@ -187,7 +195,7 @@ function getGoogleBudget(model: Model<"google-generative-ai">, effort: Reasoning

 	if (model.id.includes("2.5-flash")) {
 		// Covers 2.5-flash-lite as well
-		const budgets = {
+		const budgets: Record<ClampedReasoningEffort, number> = {
 			minimal: 128,
 			low: 2048,
 			medium: 8192,
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@ -29,7 +29,7 @@ export type OptionsForApi<TApi extends Api> = ApiOptionsMap[TApi];
 export type KnownProvider = "anthropic" | "google" | "openai" | "xai" | "groq" | "cerebras" | "openrouter" | "zai";
 export type Provider = KnownProvider | string;

-export type ReasoningEffort = "minimal" | "low" | "medium" | "high";
+export type ReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";

 // Base options all providers share
 export interface StreamOptions {
--- a/packages/ai/test/xhigh.test.ts
+++ b/packages/ai/test/xhigh.test.ts
@ -0,0 +1,69 @@
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { stream } from "../src/stream.js";
+import type { Context, Model } from "../src/types.js";
+
+function makeContext(): Context {
+	return {
+		messages: [
+			{
+				role: "user",
+				content: `What is ${(Math.random() * 100) | 0} + ${(Math.random() * 100) | 0}? Think step by step.`,
+				timestamp: Date.now(),
+			},
+		],
+	};
+}
+
+describe.skipIf(!process.env.OPENAI_API_KEY)("xhigh reasoning", () => {
+	describe("codex-max (supports xhigh)", () => {
+		// Note: codex models only support the responses API, not chat completions
+		it("should work with openai-responses", async () => {
+			const model = getModel("openai", "gpt-5.1-codex-max");
+			const s = stream(model, makeContext(), { reasoningEffort: "xhigh" });
+			let hasThinking = false;
+
+			for await (const event of s) {
+				if (event.type === "thinking_start" || event.type === "thinking_delta") {
+					hasThinking = true;
+				}
+			}
+
+			const response = await s.result();
+			expect(response.stopReason, `Error: ${response.errorMessage}`).toBe("stop");
+			expect(response.content.some((b) => b.type === "text")).toBe(true);
+			expect(hasThinking || response.content.some((b) => b.type === "thinking")).toBe(true);
+		});
+	});
+
+	describe("gpt-5-mini (does not support xhigh)", () => {
+		it("should error with openai-responses when using xhigh", async () => {
+			const model = getModel("openai", "gpt-5-mini");
+			const s = stream(model, makeContext(), { reasoningEffort: "xhigh" });
+
+			for await (const _ of s) {
+				// drain events
+			}
+
+			const response = await s.result();
+			expect(response.stopReason).toBe("error");
+			expect(response.errorMessage).toContain("xhigh");
+		});
+
+		it("should error with openai-completions when using xhigh", async () => {
+			const model: Model<"openai-completions"> = {
+				...getModel("openai", "gpt-5-mini"),
+				api: "openai-completions",
+			};
+			const s = stream(model, makeContext(), { reasoningEffort: "xhigh" });
+
+			for await (const _ of s) {
+				// drain events
+			}
+
+			const response = await s.result();
+			expect(response.stopReason).toBe("error");
+			expect(response.errorMessage).toContain("xhigh");
+		});
+	});
+});