Merge branch 'main' into feat/tui-overlay-options

2026-04-17 08:00:59 +00:00 · 2026-01-13 22:06:02 +01:00 · 2026-01-13 22:06:02 +01:00 · 7d45e434de
commit 7d45e434de
parent a4ccff382c 9994ebbedd
90 changed files with 10277 additions and 1700 deletions
--- a/.github/workflows/build-binaries.yml
+++ b/.github/workflows/build-binaries.yml
@ -28,7 +28,7 @@ jobs:
      - name: Setup Bun
        uses: oven-sh/setup-bun@4bc047ad259df6fc24a6c9b0f9a0cb08cf17fbe5 # v2.0.1
        with:
-          bun-version: 1.3.4
+          bun-version: 1.2.20
      - name: Setup Node.js
        uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
@ -108,9 +108,10 @@ jobs:
          # Create archives
          cd binaries
-          # Unix platforms (tar.gz)
+          # Unix platforms (tar.gz) - use wrapper directory for mise compatibility
          # mise auto-detects single-directory archives and strips one component
          for platform in darwin-arm64 darwin-x64 linux-x64 linux-arm64; do
-            tar -czf pi-$platform.tar.gz -C $platform .
+            mv $platform pi && tar -czf pi-$platform.tar.gz pi && mv pi $platform
          done
          # Windows (zip)
--- a/AGENTS.md
+++ b/AGENTS.md
@ -75,6 +75,46 @@ Use these sections under `## [Unreleased]`:
 - **Internal changes (from issues)**: `Fixed foo bar ([#123](https://github.com/badlogic/pi-mono/issues/123))`
 - **External contributions**: `Added feature X ([#456](https://github.com/badlogic/pi-mono/pull/456) by [@username](https://github.com/username))`
 ## Adding a New LLM Provider (packages/ai)
 Adding a new provider requires changes across multiple files:
 ### 1. Core Types (`packages/ai/src/types.ts`)
 - Add API identifier to `Api` type union (e.g., `"bedrock-converse-stream"`)
 - Create options interface extending `StreamOptions`
 - Add mapping to `ApiOptionsMap`
 - Add provider name to `KnownProvider` type union
 ### 2. Provider Implementation (`packages/ai/src/providers/`)
 Create provider file exporting:
 - `stream<Provider>()` function returning `AssistantMessageEventStream`
 - Message/tool conversion functions
 - Response parsing emitting standardized events (`text`, `tool_call`, `thinking`, `usage`, `stop`)
 ### 3. Stream Integration (`packages/ai/src/stream.ts`)
 - Import provider's stream function and options type
 - Add credential detection in `getEnvApiKey()`
 - Add case in `mapOptionsForApi()` for `SimpleStreamOptions` mapping
 - Add provider to `streamFunctions` map
 ### 4. Model Generation (`packages/ai/scripts/generate-models.ts`)
 - Add logic to fetch/parse models from provider source
 - Map to standardized `Model` interface
 ### 5. Tests (`packages/ai/test/`)
 Add provider to: `stream.test.ts`, `tokens.test.ts`, `abort.test.ts`, `empty.test.ts`, `context-overflow.test.ts`, `image-limits.test.ts`, `unicode-surrogate.test.ts`, `tool-call-without-result.test.ts`, `image-tool-result.test.ts`, `total-tokens.test.ts`
 For non-standard auth, create utility (e.g., `bedrock-utils.ts`) with credential detection.
 ### 6. Coding Agent (`packages/coding-agent/`)
 - `src/core/model-resolver.ts`: Add default model ID to `DEFAULT_MODELS`
 - `src/cli/args.ts`: Add env var documentation
 - `README.md`: Add provider setup instructions
 ### 7. Documentation
 - `packages/ai/README.md`: Add to providers table, document options/auth, add env vars
 - `packages/ai/CHANGELOG.md`: Add entry under `## [Unreleased]`
 ## Releasing
 **Lockstep versioning**: All packages always share the same version number. Every release updates all packages together.
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -40,6 +40,7 @@
 	},
 	"version": "0.0.3",
 	"dependencies": {
 		"@mariozechner/jiti": "^2.6.5",
 		"@mariozechner/pi-coding-agent": "^0.30.2",
 		"get-east-asian-width": "^1.4.0"
 	}
--- a/packages/agent/CHANGELOG.md
+++ b/packages/agent/CHANGELOG.md
@ -2,6 +2,20 @@
 ## [Unreleased]
 ## [0.45.5] - 2026-01-13
 ## [0.45.4] - 2026-01-13
 ## [0.45.3] - 2026-01-13
 ## [0.45.2] - 2026-01-13
 ## [0.45.1] - 2026-01-13
 ## [0.45.0] - 2026-01-13
 ## [0.44.0] - 2026-01-12
 ## [0.43.0] - 2026-01-11
 ## [0.42.5] - 2026-01-11
--- a/packages/agent/package.json
+++ b/packages/agent/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-agent-core",
-	"version": "0.43.0",
+	"version": "0.45.5",
 	"description": "General-purpose agent with transport abstraction, state management, and attachment support",
 	"type": "module",
 	"main": "./dist/index.js",
@ -17,8 +17,8 @@
 		"prepublishOnly": "npm run clean && npm run build"
 	},
 	"dependencies": {
-		"@mariozechner/pi-ai": "^0.43.0",
+		"@mariozechner/pi-ai": "^0.45.5",
-		"@mariozechner/pi-tui": "^0.43.0"
+		"@mariozechner/pi-tui": "^0.45.5"
 	},
 	"keywords": [
 		"ai",
--- a/packages/agent/test/bedrock-utils.ts
+++ b/packages/agent/test/bedrock-utils.ts
@ -0,0 +1,18 @@
 /**
 * Utility functions for Amazon Bedrock tests
 */
 /**
 * Check if any valid AWS credentials are configured for Bedrock.
 * Returns true if any of the following are set:
 * - AWS_PROFILE (named profile from ~/.aws/credentials)
 * - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (IAM keys)
 * - AWS_BEARER_TOKEN_BEDROCK (Bedrock API key)
 */
 export function hasBedrockCredentials(): boolean {
 	return !!(
 		process.env.AWS_PROFILE ||
 		(process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY) ||
 		process.env.AWS_BEARER_TOKEN_BEDROCK
 	);
 }
--- a/packages/agent/test/e2e.test.ts
+++ b/packages/agent/test/e2e.test.ts
@ -2,6 +2,7 @@ import type { AssistantMessage, Model, ToolResultMessage, UserMessage } from "@m
 import { getModel } from "@mariozechner/pi-ai";
 import { describe, expect, it } from "vitest";
 import { Agent } from "../src/index.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { calculateTool } from "./utils/calculate.js";
 async function basicPrompt(model: Model<any>) {
@ -324,6 +325,30 @@ describe("Agent E2E Tests", () => {
 			await multiTurnConversation(model);
 		});
 	});
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider (claude-sonnet-4-5)", () => {
 		const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 		it("should handle basic text prompt", async () => {
 			await basicPrompt(model);
 		});
 		it("should execute tools correctly", async () => {
 			await toolExecution(model);
 		});
 		it("should handle abort during execution", async () => {
 			await abortExecution(model);
 		});
 		it("should emit state updates during streaming", async () => {
 			await stateUpdates(model);
 		});
 		it("should maintain context across multiple turns", async () => {
 			await multiTurnConversation(model);
 		});
 	});
 });
 describe("Agent.continue()", () => {
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@ -2,6 +2,36 @@
 ## [Unreleased]
 ## [0.45.5] - 2026-01-13
 ## [0.45.4] - 2026-01-13
 ### Added
 - Added Vercel AI Gateway provider with model discovery and `AI_GATEWAY_API_KEY` env support ([#689](https://github.com/badlogic/pi-mono/pull/689) by [@timolins](https://github.com/timolins))
 ### Fixed
 - Fixed z.ai thinking/reasoning: z.ai uses `thinking: { type: "enabled" }` instead of OpenAI's `reasoning_effort`. Added `thinkingFormat` compat flag to handle this. ([#688](https://github.com/badlogic/pi-mono/issues/688))
 ## [0.45.3] - 2026-01-13
 ## [0.45.2] - 2026-01-13
 ## [0.45.1] - 2026-01-13
 ## [0.45.0] - 2026-01-13
 ### Added
 - MiniMax provider support with M2 and M2.1 models via Anthropic-compatible API ([#656](https://github.com/badlogic/pi-mono/pull/656) by [@dannote](https://github.com/dannote))
 - Add Amazon Bedrock provider with prompt caching for Claude models (experimental, tested with Anthropic Claude models only) ([#494](https://github.com/badlogic/pi-mono/pull/494) by [@unexge](https://github.com/unexge))
 - Added `serviceTier` option for OpenAI Responses requests ([#672](https://github.com/badlogic/pi-mono/pull/672) by [@markusylisiurunen](https://github.com/markusylisiurunen))
 - **Anthropic caching on OpenRouter**: Interactions with Anthropic models via OpenRouter now set a 5-minute cache point using Anthropic-style `cache_control` breakpoints on the last assistant or user message. ([#584](https://github.com/badlogic/pi-mono/pull/584) by [@nathyong](https://github.com/nathyong))
 - **Google Gemini CLI provider improvements**: Added Antigravity endpoint fallback (tries daily sandbox then prod when `baseUrl` is unset), header-based retry delay parsing (`Retry-After`, `x-ratelimit-reset`, `x-ratelimit-reset-after`), stable `sessionId` derivation from first user message for cache affinity, empty SSE stream retry with backoff, and `anthropic-beta` header for Claude thinking models ([#670](https://github.com/badlogic/pi-mono/pull/670) by [@kim0](https://github.com/kim0))
 ## [0.44.0] - 2026-01-12
 ## [0.43.0] - 2026-01-11
 ### Fixed
--- a/packages/ai/README.md
+++ b/packages/ai/README.md
@ -56,9 +56,12 @@ Unified LLM API with automatic model discovery, provider configuration, token an
 - **Cerebras**
 - **xAI**
 - **OpenRouter**
 - **Vercel AI Gateway**
 - **MiniMax**
 - **GitHub Copilot** (requires OAuth, see below)
 - **Google Gemini CLI** (requires OAuth, see below)
 - **Antigravity** (requires OAuth, see below)
 - **Amazon Bedrock**
 - **Any OpenAI-compatible API**: Ollama, vLLM, LM Studio, etc.
 ## Installation
@ -708,6 +711,7 @@ interface OpenAICompat {
  supportsDeveloperRole?: boolean;   // Whether provider supports `developer` role vs `system` (default: true)
  supportsReasoningEffort?: boolean; // Whether provider supports `reasoning_effort` (default: true)
  maxTokensField?: 'max_completion_tokens' | 'max_tokens';  // Which field name to use (default: max_completion_tokens)
  thinkingFormat?: 'openai' | 'zai'; // Format for reasoning param: 'openai' uses reasoning_effort, 'zai' uses thinking: { type: "enabled" } (default: openai)
 }
 ```
@ -860,7 +864,9 @@ In Node.js environments, you can set environment variables to avoid passing API
 | Cerebras | `CEREBRAS_API_KEY` |
 | xAI | `XAI_API_KEY` |
 | OpenRouter | `OPENROUTER_API_KEY` |
 | Vercel AI Gateway | `AI_GATEWAY_API_KEY` |
 | zAI | `ZAI_API_KEY` |
 | MiniMax | `MINIMAX_API_KEY` |
 | GitHub Copilot | `COPILOT_GITHUB_TOKEN` or `GH_TOKEN` or `GITHUB_TOKEN` |
 When set, the library automatically uses these keys:
@ -1026,6 +1032,90 @@ const response = await complete(model, {
 **Google Gemini CLI / Antigravity**: These use Google Cloud OAuth. The `apiKey` returned by `getOAuthApiKey()` is a JSON string containing both the token and project ID, which the library handles automatically.
 ## Development
 ### Adding a New Provider
 Adding a new LLM provider requires changes across multiple files. This checklist covers all necessary steps:
 #### 1. Core Types (`src/types.ts`)
 - Add the API identifier to the `Api` type union (e.g., `"bedrock-converse-stream"`)
 - Create an options interface extending `StreamOptions` (e.g., `BedrockOptions`)
 - Add the mapping to `ApiOptionsMap`
 - Add the provider name to `KnownProvider` type union (e.g., `"amazon-bedrock"`)
 #### 2. Provider Implementation (`src/providers/`)
 Create a new provider file (e.g., `amazon-bedrock.ts`) that exports:
 - `stream<Provider>()` function returning `AssistantMessageEventStream`
 - Provider-specific options interface
 - Message conversion functions to transform `Context` to provider format
 - Tool conversion if the provider supports tools
 - Response parsing to emit standardized events (`text`, `tool_call`, `thinking`, `usage`, `stop`)
 #### 3. Stream Integration (`src/stream.ts`)
 - Import the provider's stream function and options type
 - Add credential detection in `getEnvApiKey()` for the new provider
 - Add a case in `mapOptionsForApi()` to map `SimpleStreamOptions` to provider options
 - Add the provider's stream function to the `streamFunctions` map
 #### 4. Model Generation (`scripts/generate-models.ts`)
 - Add logic to fetch and parse models from the provider's source (e.g., models.dev API)
 - Map provider model data to the standardized `Model` interface
 - Handle provider-specific quirks (pricing format, capability flags, model ID transformations)
 #### 5. Tests (`test/`)
 Create or update test files to cover the new provider:
 - `stream.test.ts` - Basic streaming and tool use
 - `tokens.test.ts` - Token usage reporting
 - `abort.test.ts` - Request cancellation
 - `empty.test.ts` - Empty message handling
 - `context-overflow.test.ts` - Context limit errors
 - `image-limits.test.ts` - Image support (if applicable)
 - `unicode-surrogate.test.ts` - Unicode handling
 - `tool-call-without-result.test.ts` - Orphaned tool calls
 - `image-tool-result.test.ts` - Images in tool results
 - `total-tokens.test.ts` - Token counting accuracy
 For providers with non-standard auth (AWS, Google Vertex), create a utility like `bedrock-utils.ts` with credential detection helpers.
 #### 6. Coding Agent Integration (`../coding-agent/`)
 Update `src/core/model-resolver.ts`:
 - Add a default model ID for the provider in `DEFAULT_MODELS`
 Update `src/cli/args.ts`:
 - Add environment variable documentation in the help text
 Update `README.md`:
 - Add the provider to the providers section with setup instructions
 #### 7. Documentation
 Update `packages/ai/README.md`:
 - Add to the Supported Providers table
 - Document any provider-specific options or authentication requirements
 - Add environment variable to the Environment Variables section
 #### 8. Changelog
 Add an entry to `packages/ai/CHANGELOG.md` under `## [Unreleased]`:
 ```markdown
 ### Added
 - Added support for [Provider Name] provider ([#PR](link) by [@author](link))
 ```
 ## License
 MIT
--- a/packages/ai/package.json
+++ b/packages/ai/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-ai",
-	"version": "0.43.0",
+	"version": "0.45.5",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./dist/index.js",
@ -23,6 +23,7 @@
 	},
 	"dependencies": {
 		"@anthropic-ai/sdk": "0.71.2",
 		"@aws-sdk/client-bedrock-runtime": "^3.966.0",
 		"@google/genai": "1.34.0",
 		"@mistralai/mistralai": "1.10.0",
 		"@sinclair/typebox": "^0.34.41",
@ -39,6 +40,7 @@
 		"openai",
 		"anthropic",
 		"gemini",
 		"bedrock",
 		"unified",
 		"api"
 	],
--- a/packages/ai/scripts/generate-models.ts
+++ b/packages/ai/scripts/generate-models.ts
@ -32,6 +32,20 @@ interface ModelsDevModel {
 	};
 }
 interface AiGatewayModel {
 	id: string;
 	name?: string;
 	context_window?: number;
 	max_tokens?: number;
 	tags?: string[];
 	pricing?: {
 		input?: string | number;
 		output?: string | number;
 		input_cache_read?: string | number;
 		input_cache_write?: string | number;
 	};
 }
 const COPILOT_STATIC_HEADERS = {
 	"User-Agent": "GitHubCopilotChat/0.35.0",
 	"Editor-Version": "vscode/1.107.0",
@ -39,6 +53,9 @@ const COPILOT_STATIC_HEADERS = {
 	"Copilot-Integration-Id": "vscode-chat",
 } as const;
 const AI_GATEWAY_MODELS_URL = "https://ai-gateway.vercel.sh/v1";
 const AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh";
 async function fetchOpenRouterModels(): Promise<Model<any>[]> {
 	try {
 		console.log("Fetching models from OpenRouter API...");
@ -97,6 +114,64 @@ async function fetchOpenRouterModels(): Promise<Model<any>[]> {
 	}
 }
 async function fetchAiGatewayModels(): Promise<Model<any>[]> {
 	try {
 		console.log("Fetching models from Vercel AI Gateway API...");
 		const response = await fetch(`${AI_GATEWAY_MODELS_URL}/models`);
 		const data = await response.json();
 		const models: Model<any>[] = [];
 		const toNumber = (value: string | number | undefined): number => {
 			if (typeof value === "number") {
 				return Number.isFinite(value) ? value : 0;
 			}
 			const parsed = parseFloat(value ?? "0");
 			return Number.isFinite(parsed) ? parsed : 0;
 		};
 		const items = Array.isArray(data.data) ? (data.data as AiGatewayModel[]) : [];
 		for (const model of items) {
 			const tags = Array.isArray(model.tags) ? model.tags : [];
 			// Only include models that support tools
 			if (!tags.includes("tool-use")) continue;
 			const input: ("text" | "image")[] = ["text"];
 			if (tags.includes("vision")) {
 				input.push("image");
 			}
 			const inputCost = toNumber(model.pricing?.input) * 1_000_000;
 			const outputCost = toNumber(model.pricing?.output) * 1_000_000;
 			const cacheReadCost = toNumber(model.pricing?.input_cache_read) * 1_000_000;
 			const cacheWriteCost = toNumber(model.pricing?.input_cache_write) * 1_000_000;
 			models.push({
 				id: model.id,
 				name: model.name || model.id,
 				api: "anthropic-messages",
 				baseUrl: AI_GATEWAY_BASE_URL,
 				provider: "vercel-ai-gateway",
 				reasoning: tags.includes("reasoning"),
 				input,
 				cost: {
 					input: inputCost,
 					output: outputCost,
 					cacheRead: cacheReadCost,
 					cacheWrite: cacheWriteCost,
 				},
 				contextWindow: model.context_window || 4096,
 				maxTokens: model.max_tokens || 4096,
 			});
 		}
 		console.log(`Fetched ${models.length} tool-capable models from Vercel AI Gateway`);
 		return models;
 	} catch (error) {
 		console.error("Failed to fetch Vercel AI Gateway models:", error);
 		return [];
 	}
 }
 async function loadModelsDevData(): Promise<Model<any>[]> {
 	try {
 		console.log("Fetching models from models.dev API...");
@ -105,6 +180,87 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
 		const models: Model<any>[] = [];
 		// Process Amazon Bedrock models
 		if (data["amazon-bedrock"]?.models) {
 			for (const [modelId, model] of Object.entries(data["amazon-bedrock"].models)) {
 				const m = model as ModelsDevModel;
 				if (m.tool_call !== true) continue;
 				let id = modelId;
 				if (id.startsWith("ai21.jamba")) {
 					// These models doesn't support tool use in streaming mode
 					continue;
 				}
 				if (id.startsWith("amazon.titan-text-express") ||
 				    id.startsWith("mistral.mistral-7b-instruct-v0")) {
 					// These models doesn't support system messages
 					continue;
 				}
 				// Some Amazon Bedrock models require cross-region inference profiles to work.
 				// To use cross-region inference, we need to add a region prefix to the models.
 				// See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system
 				// TODO: Remove Claude models once https://github.com/anomalyco/models.dev/pull/607 is merged, and follow-up with other models.
 				// Models with global cross-region inference profiles
 				if (id.startsWith("anthropic.claude-haiku-4-5") ||
 						id.startsWith("anthropic.claude-sonnet-4") ||
 						id.startsWith("anthropic.claude-opus-4-5") ||
 						id.startsWith("amazon.nova-2-lite") ||
 						id.startsWith("cohere.embed-v4") ||
 						id.startsWith("twelvelabs.pegasus-1-2")) {
 						id = "global." + id;
 				}
 				// Models with US cross-region inference profiles
 				if (id.startsWith("amazon.nova-lite") ||
 						id.startsWith("amazon.nova-micro") ||
 						id.startsWith("amazon.nova-premier") ||
 						id.startsWith("amazon.nova-pro") ||
 						id.startsWith("anthropic.claude-3-7-sonnet") ||
 						id.startsWith("anthropic.claude-opus-4-1") ||
 						id.startsWith("anthropic.claude-opus-4-20250514") ||
 						id.startsWith("deepseek.r1") ||
 						id.startsWith("meta.llama3-2") ||
 						id.startsWith("meta.llama3-3") ||
 						id.startsWith("meta.llama4")) {
 						id = "us." + id;
 				}
 				const bedrockModel = {
 					id,
 					name: m.name || id,
 					api: "bedrock-converse-stream" as const,
 					provider: "amazon-bedrock" as const,
 					baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
 					reasoning: m.reasoning === true,
 					input: (m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"]) as ("text" | "image")[],
 					cost: {
 						input: m.cost?.input || 0,
 						output: m.cost?.output || 0,
 						cacheRead: m.cost?.cache_read || 0,
 						cacheWrite: m.cost?.cache_write || 0,
 					},
 					contextWindow: m.limit?.context || 4096,
 					maxTokens: m.limit?.output || 4096,
 				};
 				models.push(bedrockModel);
 				// Add EU cross-region inference variants for Claude models
 				if (modelId.startsWith("anthropic.claude-haiku-4-5") ||
 						modelId.startsWith("anthropic.claude-sonnet-4-5") ||
 						modelId.startsWith("anthropic.claude-opus-4-5")) {
 					models.push({
 						...bedrockModel,
 						id: "eu." + modelId,
 						name: (m.name || modelId) + " (EU)",
 					});
 				}
 			}
 		}
 		// Process Anthropic models
 		if (data.anthropic?.models) {
 			for (const [modelId, model] of Object.entries(data.anthropic.models)) {
@ -284,6 +440,7 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
 				},
 				compat: {
 					supportsDeveloperRole: false,
 					thinkingFormat: "zai",
 				},
 				contextWindow: m.limit?.context || 4096,
 				maxTokens: m.limit?.output || 4096,
@ -409,6 +566,33 @@ async function loadModelsDevData(): Promise<Model<any>[]> {
 			}
 		}
 		// Process MiniMax models
 		if (data.minimax?.models) {
 			for (const [modelId, model] of Object.entries(data.minimax.models)) {
 				const m = model as ModelsDevModel;
 				if (m.tool_call !== true) continue;
 				models.push({
 					id: modelId,
 					name: m.name || modelId,
 					api: "anthropic-messages",
 					provider: "minimax",
 					// MiniMax's Anthropic-compatible API - SDK appends /v1/messages
 					baseUrl: "https://api.minimax.io/anthropic",
 					reasoning: m.reasoning === true,
 					input: m.modalities?.input?.includes("image") ? ["text", "image"] : ["text"],
 					cost: {
 						input: m.cost?.input || 0,
 						output: m.cost?.output || 0,
 						cacheRead: m.cost?.cache_read || 0,
 						cacheWrite: m.cost?.cache_write || 0,
 					},
 					contextWindow: m.limit?.context || 4096,
 					maxTokens: m.limit?.output || 4096,
 				});
 			}
 		}
 		console.log(`Loaded ${models.length} tool-capable models from models.dev`);
 		return models;
 	} catch (error) {
@ -421,11 +605,13 @@ async function generateModels() {
 	// Fetch models from both sources
 	// models.dev: Anthropic, Google, OpenAI, Groq, Cerebras
 	// OpenRouter: xAI and other providers (excluding Anthropic, Google, OpenAI)
 	// AI Gateway: OpenAI-compatible catalog with tool-capable models
 	const modelsDevModels = await loadModelsDevData();
 	const openRouterModels = await fetchOpenRouterModels();
 	const aiGatewayModels = await fetchAiGatewayModels();
 	// Combine models (models.dev has priority)
-	const allModels = [...modelsDevModels, ...openRouterModels];
+	const allModels = [...modelsDevModels, ...openRouterModels, ...aiGatewayModels];
 	// Fix incorrect cache pricing for Claude Opus 4.5 from models.dev
 	// models.dev has 3x the correct pricing (1.5/18.75 instead of 0.5/6.25)
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
--- a/packages/ai/src/providers/amazon-bedrock.ts
+++ b/packages/ai/src/providers/amazon-bedrock.ts
@ -0,0 +1,548 @@
 import {
 	BedrockRuntimeClient,
 	StopReason as BedrockStopReason,
 	type Tool as BedrockTool,
 	CachePointType,
 	type ContentBlock,
 	type ContentBlockDeltaEvent,
 	type ContentBlockStartEvent,
 	type ContentBlockStopEvent,
 	ConversationRole,
 	ConverseStreamCommand,
 	type ConverseStreamMetadataEvent,
 	ImageFormat,
 	type Message,
 	type SystemContentBlock,
 	type ToolChoice,
 	type ToolConfiguration,
 	ToolResultStatus,
 } from "@aws-sdk/client-bedrock-runtime";
 import { calculateCost } from "../models.js";
 import type {
 	Api,
 	AssistantMessage,
 	Context,
 	Model,
 	StopReason,
 	StreamFunction,
 	StreamOptions,
 	TextContent,
 	ThinkingBudgets,
 	ThinkingContent,
 	ThinkingLevel,
 	Tool,
 	ToolCall,
 	ToolResultMessage,
 } from "../types.js";
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
 export interface BedrockOptions extends StreamOptions {
 	region?: string;
 	profile?: string;
 	toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
 	/* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-reasoning.html for supported models. */
 	reasoning?: ThinkingLevel;
 	/* Custom token budgets per thinking level. Overrides default budgets. */
 	thinkingBudgets?: ThinkingBudgets;
 	/* Only supported by Claude 4.x models, see https://docs.aws.amazon.com/bedrock/latest/userguide/claude-messages-extended-thinking.html#claude-messages-extended-thinking-tool-use-interleaved */
 	interleavedThinking?: boolean;
 }
 type Block = (TextContent | ThinkingContent | ToolCall) & { index?: number; partialJson?: string };
 export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
 	model: Model<"bedrock-converse-stream">,
 	context: Context,
 	options: BedrockOptions,
 ): AssistantMessageEventStream => {
 	const stream = new AssistantMessageEventStream();
 	(async () => {
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
 			api: "bedrock-converse-stream" as Api,
 			provider: model.provider,
 			model: model.id,
 			usage: {
 				input: 0,
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 				totalTokens: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "stop",
 			timestamp: Date.now(),
 		};
 		const blocks = output.content as Block[];
 		try {
 			const client = new BedrockRuntimeClient({
 				region: options.region || process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || "us-east-1",
 				profile: options.profile,
 			});
 			const command = new ConverseStreamCommand({
 				modelId: model.id,
 				messages: convertMessages(context, model),
 				system: buildSystemPrompt(context.systemPrompt, model),
 				inferenceConfig: { maxTokens: options.maxTokens, temperature: options.temperature },
 				toolConfig: convertToolConfig(context.tools, options.toolChoice),
 				additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
 			});
 			const response = await client.send(command, { abortSignal: options.signal });
 			for await (const item of response.stream!) {
 				if (item.messageStart) {
 					if (item.messageStart.role !== ConversationRole.ASSISTANT) {
 						throw new Error("Unexpected assistant message start but got user message start instead");
 					}
 					stream.push({ type: "start", partial: output });
 				} else if (item.contentBlockStart) {
 					handleContentBlockStart(item.contentBlockStart, blocks, output, stream);
 				} else if (item.contentBlockDelta) {
 					handleContentBlockDelta(item.contentBlockDelta, blocks, output, stream);
 				} else if (item.contentBlockStop) {
 					handleContentBlockStop(item.contentBlockStop, blocks, output, stream);
 				} else if (item.messageStop) {
 					output.stopReason = mapStopReason(item.messageStop.stopReason);
 				} else if (item.metadata) {
 					handleMetadata(item.metadata, model, output);
 				} else if (item.internalServerException) {
 					throw new Error(`Internal server error: ${item.internalServerException.message}`);
 				} else if (item.modelStreamErrorException) {
 					throw new Error(`Model stream error: ${item.modelStreamErrorException.message}`);
 				} else if (item.validationException) {
 					throw new Error(`Validation error: ${item.validationException.message}`);
 				} else if (item.throttlingException) {
 					throw new Error(`Throttling error: ${item.throttlingException.message}`);
 				} else if (item.serviceUnavailableException) {
 					throw new Error(`Service unavailable: ${item.serviceUnavailableException.message}`);
 				}
 			}
 			if (options.signal?.aborted) {
 				throw new Error("Request was aborted");
 			}
 			if (output.stopReason === "error" || output.stopReason === "aborted") {
 				throw new Error("An unknown error occurred");
 			}
 			stream.push({ type: "done", reason: output.stopReason, message: output });
 			stream.end();
 		} catch (error) {
 			for (const block of output.content) {
 				delete (block as Block).index;
 				delete (block as Block).partialJson;
 			}
 			output.stopReason = options.signal?.aborted ? "aborted" : "error";
 			output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
 			stream.push({ type: "error", reason: output.stopReason, error: output });
 			stream.end();
 		}
 	})();
 	return stream;
 };
 function handleContentBlockStart(
 	event: ContentBlockStartEvent,
 	blocks: Block[],
 	output: AssistantMessage,
 	stream: AssistantMessageEventStream,
 ): void {
 	const index = event.contentBlockIndex!;
 	const start = event.start;
 	if (start?.toolUse) {
 		const block: Block = {
 			type: "toolCall",
 			id: start.toolUse.toolUseId || "",
 			name: start.toolUse.name || "",
 			arguments: {},
 			partialJson: "",
 			index,
 		};
 		output.content.push(block);
 		stream.push({ type: "toolcall_start", contentIndex: blocks.length - 1, partial: output });
 	}
 }
 function handleContentBlockDelta(
 	event: ContentBlockDeltaEvent,
 	blocks: Block[],
 	output: AssistantMessage,
 	stream: AssistantMessageEventStream,
 ): void {
 	const contentBlockIndex = event.contentBlockIndex!;
 	const delta = event.delta;
 	let index = blocks.findIndex((b) => b.index === contentBlockIndex);
 	let block = blocks[index];
 	if (delta?.text !== undefined) {
 		// If no text block exists yet, create one, as `handleContentBlockStart` is not sent for text blocks
 		if (!block) {
 			const newBlock: Block = { type: "text", text: "", index: contentBlockIndex };
 			output.content.push(newBlock);
 			index = blocks.length - 1;
 			block = blocks[index];
 			stream.push({ type: "text_start", contentIndex: index, partial: output });
 		}
 		if (block.type === "text") {
 			block.text += delta.text;
 			stream.push({ type: "text_delta", contentIndex: index, delta: delta.text, partial: output });
 		}
 	} else if (delta?.toolUse && block?.type === "toolCall") {
 		block.partialJson = (block.partialJson || "") + (delta.toolUse.input || "");
 		block.arguments = parseStreamingJson(block.partialJson);
 		stream.push({ type: "toolcall_delta", contentIndex: index, delta: delta.toolUse.input || "", partial: output });
 	} else if (delta?.reasoningContent) {
 		let thinkingBlock = block;
 		let thinkingIndex = index;
 		if (!thinkingBlock) {
 			const newBlock: Block = { type: "thinking", thinking: "", thinkingSignature: "", index: contentBlockIndex };
 			output.content.push(newBlock);
 			thinkingIndex = blocks.length - 1;
 			thinkingBlock = blocks[thinkingIndex];
 			stream.push({ type: "thinking_start", contentIndex: thinkingIndex, partial: output });
 		}
 		if (thinkingBlock?.type === "thinking") {
 			if (delta.reasoningContent.text) {
 				thinkingBlock.thinking += delta.reasoningContent.text;
 				stream.push({
 					type: "thinking_delta",
 					contentIndex: thinkingIndex,
 					delta: delta.reasoningContent.text,
 					partial: output,
 				});
 			}
 			if (delta.reasoningContent.signature) {
 				thinkingBlock.thinkingSignature =
 					(thinkingBlock.thinkingSignature || "") + delta.reasoningContent.signature;
 			}
 		}
 	}
 }
 function handleMetadata(
 	event: ConverseStreamMetadataEvent,
 	model: Model<"bedrock-converse-stream">,
 	output: AssistantMessage,
 ): void {
 	if (event.usage) {
 		output.usage.input = event.usage.inputTokens || 0;
 		output.usage.output = event.usage.outputTokens || 0;
 		output.usage.cacheRead = event.usage.cacheReadInputTokens || 0;
 		output.usage.cacheWrite = event.usage.cacheWriteInputTokens || 0;
 		output.usage.totalTokens = event.usage.totalTokens || output.usage.input + output.usage.output;
 		calculateCost(model, output.usage);
 	}
 }
 function handleContentBlockStop(
 	event: ContentBlockStopEvent,
 	blocks: Block[],
 	output: AssistantMessage,
 	stream: AssistantMessageEventStream,
 ): void {
 	const index = blocks.findIndex((b) => b.index === event.contentBlockIndex);
 	const block = blocks[index];
 	if (!block) return;
 	delete (block as Block).index;
 	switch (block.type) {
 		case "text":
 			stream.push({ type: "text_end", contentIndex: index, content: block.text, partial: output });
 			break;
 		case "thinking":
 			stream.push({ type: "thinking_end", contentIndex: index, content: block.thinking, partial: output });
 			break;
 		case "toolCall":
 			block.arguments = parseStreamingJson(block.partialJson);
 			delete (block as Block).partialJson;
 			stream.push({ type: "toolcall_end", contentIndex: index, toolCall: block, partial: output });
 			break;
 	}
 }
 /**
 * Check if the model supports prompt caching.
 * Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models
 */
 function supportsPromptCaching(model: Model<"bedrock-converse-stream">): boolean {
 	const id = model.id.toLowerCase();
 	// Claude 4.x models (opus-4, sonnet-4, haiku-4)
 	if (id.includes("claude") && (id.includes("-4-") || id.includes("-4."))) return true;
 	// Claude 3.7 Sonnet
 	if (id.includes("claude-3-7-sonnet")) return true;
 	// Claude 3.5 Haiku
 	if (id.includes("claude-3-5-haiku")) return true;
 	return false;
 }
 function buildSystemPrompt(
 	systemPrompt: string | undefined,
 	model: Model<"bedrock-converse-stream">,
 ): SystemContentBlock[] | undefined {
 	if (!systemPrompt) return undefined;
 	const blocks: SystemContentBlock[] = [{ text: sanitizeSurrogates(systemPrompt) }];
 	// Add cache point for supported Claude models
 	if (supportsPromptCaching(model)) {
 		blocks.push({ cachePoint: { type: CachePointType.DEFAULT } });
 	}
 	return blocks;
 }
 function convertMessages(context: Context, model: Model<"bedrock-converse-stream">): Message[] {
 	const result: Message[] = [];
 	const messages = context.messages;
 	for (let i = 0; i < messages.length; i++) {
 		const m = messages[i];
 		switch (m.role) {
 			case "user":
 				result.push({
 					role: ConversationRole.USER,
 					content:
 						typeof m.content === "string"
 							? [{ text: sanitizeSurrogates(m.content) }]
 							: m.content.map((c) => {
 									switch (c.type) {
 										case "text":
 											return { text: sanitizeSurrogates(c.text) };
 										case "image":
 											return { image: createImageBlock(c.mimeType, c.data) };
 										default:
 											throw new Error("Unknown user content type");
 									}
 								}),
 				});
 				break;
 			case "assistant": {
 				// Skip assistant messages with empty content (e.g., from aborted requests)
 				// Bedrock rejects messages with empty content arrays
 				if (m.content.length === 0) {
 					continue;
 				}
 				const contentBlocks: ContentBlock[] = [];
 				for (const c of m.content) {
 					switch (c.type) {
 						case "text":
 							// Skip empty text blocks
 							if (c.text.trim().length === 0) continue;
 							contentBlocks.push({ text: sanitizeSurrogates(c.text) });
 							break;
 						case "toolCall":
 							contentBlocks.push({
 								toolUse: { toolUseId: c.id, name: c.name, input: c.arguments },
 							});
 							break;
 						case "thinking":
 							// Skip empty thinking blocks
 							if (c.thinking.trim().length === 0) continue;
 							contentBlocks.push({
 								reasoningContent: {
 									reasoningText: { text: sanitizeSurrogates(c.thinking), signature: c.thinkingSignature },
 								},
 							});
 							break;
 						default:
 							throw new Error("Unknown assistant content type");
 					}
 				}
 				// Skip if all content blocks were filtered out
 				if (contentBlocks.length === 0) {
 					continue;
 				}
 				result.push({
 					role: ConversationRole.ASSISTANT,
 					content: contentBlocks,
 				});
 				break;
 			}
 			case "toolResult": {
 				// Collect all consecutive toolResult messages into a single user message
 				// Bedrock requires all tool results to be in one message
 				const toolResults: ContentBlock.ToolResultMember[] = [];
 				// Add current tool result with all content blocks combined
 				toolResults.push({
 					toolResult: {
 						toolUseId: m.toolCallId,
 						content: m.content.map((c) =>
 							c.type === "image"
 								? { image: createImageBlock(c.mimeType, c.data) }
 								: { text: sanitizeSurrogates(c.text) },
 						),
 						status: m.isError ? ToolResultStatus.ERROR : ToolResultStatus.SUCCESS,
 					},
 				});
 				// Look ahead for consecutive toolResult messages
 				let j = i + 1;
 				while (j < messages.length && messages[j].role === "toolResult") {
 					const nextMsg = messages[j] as ToolResultMessage;
 					toolResults.push({
 						toolResult: {
 							toolUseId: nextMsg.toolCallId,
 							content: nextMsg.content.map((c) =>
 								c.type === "image"
 									? { image: createImageBlock(c.mimeType, c.data) }
 									: { text: sanitizeSurrogates(c.text) },
 							),
 							status: nextMsg.isError ? ToolResultStatus.ERROR : ToolResultStatus.SUCCESS,
 						},
 					});
 					j++;
 				}
 				// Skip the messages we've already processed
 				i = j - 1;
 				result.push({
 					role: ConversationRole.USER,
 					content: toolResults,
 				});
 				break;
 			}
 			default:
 				throw new Error("Unknown message role");
 		}
 	}
 	// Add cache point to the last user message for supported Claude models
 	if (supportsPromptCaching(model) && result.length > 0) {
 		const lastMessage = result[result.length - 1];
 		if (lastMessage.role === ConversationRole.USER && lastMessage.content) {
 			(lastMessage.content as ContentBlock[]).push({ cachePoint: { type: CachePointType.DEFAULT } });
 		}
 	}
 	return result;
 }
 function convertToolConfig(
 	tools: Tool[] | undefined,
 	toolChoice: BedrockOptions["toolChoice"],
 ): ToolConfiguration | undefined {
 	if (!tools?.length || toolChoice === "none") return undefined;
 	const bedrockTools: BedrockTool[] = tools.map((tool) => ({
 		toolSpec: {
 			name: tool.name,
 			description: tool.description,
 			inputSchema: { json: tool.parameters },
 		},
 	}));
 	let bedrockToolChoice: ToolChoice | undefined;
 	switch (toolChoice) {
 		case "auto":
 			bedrockToolChoice = { auto: {} };
 			break;
 		case "any":
 			bedrockToolChoice = { any: {} };
 			break;
 		default:
 			if (toolChoice?.type === "tool") {
 				bedrockToolChoice = { tool: { name: toolChoice.name } };
 			}
 	}
 	return { tools: bedrockTools, toolChoice: bedrockToolChoice };
 }
 function mapStopReason(reason: string | undefined): StopReason {
 	switch (reason) {
 		case BedrockStopReason.END_TURN:
 		case BedrockStopReason.STOP_SEQUENCE:
 			return "stop";
 		case BedrockStopReason.MAX_TOKENS:
 		case BedrockStopReason.MODEL_CONTEXT_WINDOW_EXCEEDED:
 			return "length";
 		case BedrockStopReason.TOOL_USE:
 			return "toolUse";
 		default:
 			return "error";
 	}
 }
 function buildAdditionalModelRequestFields(
 	model: Model<"bedrock-converse-stream">,
 	options: BedrockOptions,
 ): Record<string, any> | undefined {
 	if (!options.reasoning || !model.reasoning) {
 		return undefined;
 	}
 	if (model.id.includes("anthropic.claude")) {
 		const defaultBudgets: Record<ThinkingLevel, number> = {
 			minimal: 1024,
 			low: 2048,
 			medium: 8192,
 			high: 16384,
 			xhigh: 16384, // Claude doesn't support xhigh, clamp to high
 		};
 		// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
 		const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
 		const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];
 		const result: Record<string, any> = {
 			thinking: {
 				type: "enabled",
 				budget_tokens: budget,
 			},
 		};
 		if (options.interleavedThinking) {
 			result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
 		}
 		return result;
 	}
 	return undefined;
 }
 function createImageBlock(mimeType: string, data: string) {
 	let format: ImageFormat;
 	switch (mimeType) {
 		case "image/jpeg":
 		case "image/jpg":
 			format = ImageFormat.JPEG;
 			break;
 		case "image/png":
 			format = ImageFormat.PNG;
 			break;
 		case "image/gif":
 			format = ImageFormat.GIF;
 			break;
 		case "image/webp":
 			format = ImageFormat.WEBP;
 			break;
 		default:
 			throw new Error(`Unknown image type: ${mimeType}`);
 	}
 	const binaryString = atob(data);
 	const bytes = new Uint8Array(binaryString.length);
 	for (let i = 0; i < binaryString.length; i++) {
 		bytes[i] = binaryString.charCodeAt(i);
 	}
 	return { source: { bytes }, format };
 }
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@ -287,7 +287,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			}
 			if (output.stopReason === "aborted" || output.stopReason === "error") {
-				throw new Error("An unkown error ocurred");
+				throw new Error("An unknown error occurred");
 			}
 			stream.push({ type: "done", reason: output.stopReason, message: output });
--- a/packages/ai/src/providers/google-gemini-cli.ts
+++ b/packages/ai/src/providers/google-gemini-cli.ts
@ -4,6 +4,7 @@
 * Uses the Cloud Code Assist API endpoint to access Gemini and Claude models.
 */
 import { createHash } from "node:crypto";
 import type { Content, ThinkingConfig } from "@google/genai";
 import { calculateCost } from "../models.js";
 import type {
@ -54,6 +55,8 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
 }
 const DEFAULT_ENDPOINT = "https://cloudcode-pa.googleapis.com";
 const ANTIGRAVITY_DAILY_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googleapis.com";
 const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, DEFAULT_ENDPOINT] as const;
 // Headers for Gemini CLI (prod endpoint)
 const GEMINI_CLI_HEADERS = {
 	"User-Agent": "google-cloud-sdk vscode_cloudshelleditor/0.1",
@ -163,16 +166,66 @@ let toolCallCounter = 0;
 // Retry configuration
 const MAX_RETRIES = 3;
 const BASE_DELAY_MS = 1000;
 const MAX_EMPTY_STREAM_RETRIES = 2;
 const EMPTY_STREAM_BASE_DELAY_MS = 500;
 const CLAUDE_THINKING_BETA_HEADER = "interleaved-thinking-2025-05-14";
 /**
 * Extract retry delay from Gemini error response (in milliseconds).
- * Parses patterns like:
+ * Checks headers first (Retry-After, x-ratelimit-reset, x-ratelimit-reset-after),
 * then parses body patterns like:
 * - "Your quota will reset after 39s"
 * - "Your quota will reset after 18h31m10s"
 * - "Please retry in Xs" or "Please retry in Xms"
 * - "retryDelay": "34.074824224s" (JSON field)
 */
-function extractRetryDelay(errorText: string): number | undefined {
+export function extractRetryDelay(errorText: string, response?: Response | Headers): number | undefined {
 	const normalizeDelay = (ms: number): number | undefined => (ms > 0 ? Math.ceil(ms + 1000) : undefined);
 	const headers = response instanceof Headers ? response : response?.headers;
 	if (headers) {
 		const retryAfter = headers.get("retry-after");
 		if (retryAfter) {
 			const retryAfterSeconds = Number(retryAfter);
 			if (Number.isFinite(retryAfterSeconds)) {
 				const delay = normalizeDelay(retryAfterSeconds * 1000);
 				if (delay !== undefined) {
 					return delay;
 				}
 			}
 			const retryAfterDate = new Date(retryAfter);
 			const retryAfterMs = retryAfterDate.getTime();
 			if (!Number.isNaN(retryAfterMs)) {
 				const delay = normalizeDelay(retryAfterMs - Date.now());
 				if (delay !== undefined) {
 					return delay;
 				}
 			}
 		}
 		const rateLimitReset = headers.get("x-ratelimit-reset");
 		if (rateLimitReset) {
 			const resetSeconds = Number.parseInt(rateLimitReset, 10);
 			if (!Number.isNaN(resetSeconds)) {
 				const delay = normalizeDelay(resetSeconds * 1000 - Date.now());
 				if (delay !== undefined) {
 					return delay;
 				}
 			}
 		}
 		const rateLimitResetAfter = headers.get("x-ratelimit-reset-after");
 		if (rateLimitResetAfter) {
 			const resetAfterSeconds = Number(rateLimitResetAfter);
 			if (Number.isFinite(resetAfterSeconds)) {
 				const delay = normalizeDelay(resetAfterSeconds * 1000);
 				if (delay !== undefined) {
 					return delay;
 				}
 			}
 		}
 	}
 	// Pattern 1: "Your quota will reset after ..." (formats: "18h31m10s", "10m15s", "6s", "39s")
 	const durationMatch = errorText.match(/reset after (?:(\d+)h)?(?:(\d+)m)?(\d+(?:\.\d+)?)s/i);
 	if (durationMatch) {
@ -181,8 +234,9 @@ function extractRetryDelay(errorText: string): number | undefined {
 		const seconds = parseFloat(durationMatch[3]);
 		if (!Number.isNaN(seconds)) {
 			const totalMs = ((hours * 60 + minutes) * 60 + seconds) * 1000;
-			if (totalMs > 0) {
+			const delay = normalizeDelay(totalMs);
-				return Math.ceil(totalMs + 1000); // Add 1s buffer
+			if (delay !== undefined) {
 				return delay;
 			}
 		}
 	}
@ -193,7 +247,10 @@ function extractRetryDelay(errorText: string): number | undefined {
 		const value = parseFloat(retryInMatch[1]);
 		if (!Number.isNaN(value) && value > 0) {
 			const ms = retryInMatch[2].toLowerCase() === "ms" ? value : value * 1000;
-			return Math.ceil(ms + 1000);
+			const delay = normalizeDelay(ms);
 			if (delay !== undefined) {
 				return delay;
 			}
 		}
 	}
@ -203,21 +260,45 @@ function extractRetryDelay(errorText: string): number | undefined {
 		const value = parseFloat(retryDelayMatch[1]);
 		if (!Number.isNaN(value) && value > 0) {
 			const ms = retryDelayMatch[2].toLowerCase() === "ms" ? value : value * 1000;
-			return Math.ceil(ms + 1000);
+			const delay = normalizeDelay(ms);
 			if (delay !== undefined) {
 				return delay;
 			}
 		}
 	}
 	return undefined;
 }
 function isClaudeThinkingModel(modelId: string): boolean {
 	const normalized = modelId.toLowerCase();
 	return normalized.includes("claude") && normalized.includes("thinking");
 }
 /**
- * Check if an error is retryable (rate limit, server error, etc.)
+ * Check if an error is retryable (rate limit, server error, network error, etc.)
 */
 function isRetryableError(status: number, errorText: string): boolean {
 	if (status === 429 || status === 500 || status === 502 || status === 503 || status === 504) {
 		return true;
 	}
-	return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable/i.test(errorText);
+	return /resource.?exhausted|rate.?limit|overloaded|service.?unavailable|other.?side.?closed/i.test(errorText);
 }
 /**
 * Extract a clean, user-friendly error message from Google API error response.
 * Parses JSON error responses and returns just the message field.
 */
 function extractErrorMessage(errorText: string): string {
 	try {
 		const parsed = JSON.parse(errorText) as { error?: { message?: string } };
 		if (parsed.error?.message) {
 			return parsed.error.message;
 		}
 	} catch {
 		// Not JSON, return as-is
 	}
 	return errorText;
 }
 /**
@ -242,6 +323,7 @@ interface CloudCodeAssistRequest {
 	model: string;
 	request: {
 		contents: Content[];
 		sessionId?: string;
 		systemInstruction?: { role?: string; parts: { text: string }[] };
 		generationConfig?: {
 			maxOutputTokens?: number;
@ -339,17 +421,26 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 				throw new Error("Missing token or projectId in Google Cloud credentials. Use /login to re-authenticate.");
 			}
-			const endpoint = model.baseUrl || DEFAULT_ENDPOINT;
+			const isAntigravity = model.provider === "google-antigravity";
-			const url = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
+			const baseUrl = model.baseUrl?.trim();
 			const endpoints = baseUrl ? [baseUrl] : isAntigravity ? ANTIGRAVITY_ENDPOINT_FALLBACKS : [DEFAULT_ENDPOINT];
 			// Use Antigravity headers for sandbox endpoint, otherwise Gemini CLI headers
 			const isAntigravity = endpoint.includes("sandbox.googleapis.com");
 			const requestBody = buildRequest(model, context, projectId, options, isAntigravity);
 			const headers = isAntigravity ? ANTIGRAVITY_HEADERS : GEMINI_CLI_HEADERS;
 			const requestHeaders = {
 				Authorization: `Bearer ${accessToken}`,
 				"Content-Type": "application/json",
 				Accept: "text/event-stream",
 				...headers,
 				...(isClaudeThinkingModel(model.id) ? { "anthropic-beta": CLAUDE_THINKING_BETA_HEADER } : {}),
 			};
 			const requestBodyJson = JSON.stringify(requestBody);
 			// Fetch with retry logic for rate limits and transient errors
 			let response: Response | undefined;
 			let lastError: Error | undefined;
 			let requestUrl: string | undefined;
 			for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
 				if (options?.signal?.aborted) {
@ -357,15 +448,12 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 				}
 				try {
-					response = await fetch(url, {
+					const endpoint = endpoints[Math.min(attempt, endpoints.length - 1)];
 					requestUrl = `${endpoint}/v1internal:streamGenerateContent?alt=sse`;
 					response = await fetch(requestUrl, {
 						method: "POST",
-						headers: {
+						headers: requestHeaders,
-							Authorization: `Bearer ${accessToken}`,
+						body: requestBodyJson,
 							"Content-Type": "application/json",
 							Accept: "text/event-stream",
 							...headers,
 						},
 						body: JSON.stringify(requestBody),
 						signal: options?.signal,
 					});
@ -378,14 +466,14 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 					// Check if retryable
 					if (attempt < MAX_RETRIES && isRetryableError(response.status, errorText)) {
 						// Use server-provided delay or exponential backoff
-						const serverDelay = extractRetryDelay(errorText);
+						const serverDelay = extractRetryDelay(errorText, response);
 						const delayMs = serverDelay ?? BASE_DELAY_MS * 2 ** attempt;
 						await sleep(delayMs, options?.signal);
 						continue;
 					}
 					// Not retryable or max retries exceeded
-					throw new Error(`Cloud Code Assist API error (${response.status}): ${errorText}`);
+					throw new Error(`Cloud Code Assist API error (${response.status}): ${extractErrorMessage(errorText)}`);
 				} catch (error) {
 					// Check for abort - fetch throws AbortError, our code throws "Request was aborted"
 					if (error instanceof Error) {
@ -393,7 +481,11 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 							throw new Error("Request was aborted");
 						}
 					}
 					// Extract detailed error message from fetch errors (Node includes cause)
 					lastError = error instanceof Error ? error : new Error(String(error));
 					if (lastError.message === "fetch failed" && lastError.cause instanceof Error) {
 						lastError = new Error(`Network error: ${lastError.cause.message}`);
 					}
 					// Network errors are retryable
 					if (attempt < MAX_RETRIES) {
 						const delayMs = BASE_DELAY_MS * 2 ** attempt;
@ -408,73 +500,160 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 				throw lastError ?? new Error("Failed to get response after retries");
 			}
-			if (!response.body) {
+			let started = false;
-				throw new Error("No response body");
+			const ensureStarted = () => {
-			}
+				if (!started) {
-
+					stream.push({ type: "start", partial: output });
-			stream.push({ type: "start", partial: output });
+					started = true;
-
+				}
 			let currentBlock: TextContent | ThinkingContent | null = null;
 			const blocks = output.content;
 			const blockIndex = () => blocks.length - 1;
 			// Read SSE stream
 			const reader = response.body.getReader();
 			const decoder = new TextDecoder();
 			let buffer = "";
 			// Set up abort handler to cancel reader when signal fires
 			const abortHandler = () => {
 				void reader.cancel().catch(() => {});
 			};
 			options?.signal?.addEventListener("abort", abortHandler);
-			try {
+			const resetOutput = () => {
-				while (true) {
+				output.content = [];
-					// Check abort signal before each read
+				output.usage = {
-					if (options?.signal?.aborted) {
+					input: 0,
-						throw new Error("Request was aborted");
+					output: 0,
-					}
+					cacheRead: 0,
 					cacheWrite: 0,
 					totalTokens: 0,
 					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 				};
 				output.stopReason = "stop";
 				output.errorMessage = undefined;
 				output.timestamp = Date.now();
 				started = false;
 			};
-					const { done, value } = await reader.read();
+			const streamResponse = async (activeResponse: Response): Promise<boolean> => {
-					if (done) break;
+				if (!activeResponse.body) {
 					throw new Error("No response body");
 				}
-					buffer += decoder.decode(value, { stream: true });
+				let hasContent = false;
-					const lines = buffer.split("\n");
+				let currentBlock: TextContent | ThinkingContent | null = null;
-					buffer = lines.pop() || "";
+				const blocks = output.content;
 				const blockIndex = () => blocks.length - 1;
-					for (const line of lines) {
+				// Read SSE stream
-						if (!line.startsWith("data:")) continue;
+				const reader = activeResponse.body.getReader();
 				const decoder = new TextDecoder();
 				let buffer = "";
-						const jsonStr = line.slice(5).trim();
+				// Set up abort handler to cancel reader when signal fires
-						if (!jsonStr) continue;
+				const abortHandler = () => {
 					void reader.cancel().catch(() => {});
 				};
 				options?.signal?.addEventListener("abort", abortHandler);
-						let chunk: CloudCodeAssistResponseChunk;
+				try {
-						try {
+					while (true) {
-							chunk = JSON.parse(jsonStr);
+						// Check abort signal before each read
-						} catch {
+						if (options?.signal?.aborted) {
-							continue;
+							throw new Error("Request was aborted");
 						}
-						// Unwrap the response
+						const { done, value } = await reader.read();
-						const responseData = chunk.response;
+						if (done) break;
 						if (!responseData) continue;
-						const candidate = responseData.candidates?.[0];
+						buffer += decoder.decode(value, { stream: true });
-						if (candidate?.content?.parts) {
+						const lines = buffer.split("\n");
-							for (const part of candidate.content.parts) {
+						buffer = lines.pop() || "";
-								if (part.text !== undefined) {
+
-									const isThinking = isThinkingPart(part);
+						for (const line of lines) {
-									if (
+							if (!line.startsWith("data:")) continue;
-										!currentBlock ||
+
-										(isThinking && currentBlock.type !== "thinking") ||
+							const jsonStr = line.slice(5).trim();
-										(!isThinking && currentBlock.type !== "text")
+							if (!jsonStr) continue;
-									) {
+
 							let chunk: CloudCodeAssistResponseChunk;
 							try {
 								chunk = JSON.parse(jsonStr);
 							} catch {
 								continue;
 							}
 							// Unwrap the response
 							const responseData = chunk.response;
 							if (!responseData) continue;
 							const candidate = responseData.candidates?.[0];
 							if (candidate?.content?.parts) {
 								for (const part of candidate.content.parts) {
 									if (part.text !== undefined) {
 										hasContent = true;
 										const isThinking = isThinkingPart(part);
 										if (
 											!currentBlock ||
 											(isThinking && currentBlock.type !== "thinking") ||
 											(!isThinking && currentBlock.type !== "text")
 										) {
 											if (currentBlock) {
 												if (currentBlock.type === "text") {
 													stream.push({
 														type: "text_end",
 														contentIndex: blocks.length - 1,
 														content: currentBlock.text,
 														partial: output,
 													});
 												} else {
 													stream.push({
 														type: "thinking_end",
 														contentIndex: blockIndex(),
 														content: currentBlock.thinking,
 														partial: output,
 													});
 												}
 											}
 											if (isThinking) {
 												currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
 												output.content.push(currentBlock);
 												ensureStarted();
 												stream.push({
 													type: "thinking_start",
 													contentIndex: blockIndex(),
 													partial: output,
 												});
 											} else {
 												currentBlock = { type: "text", text: "" };
 												output.content.push(currentBlock);
 												ensureStarted();
 												stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
 											}
 										}
 										if (currentBlock.type === "thinking") {
 											currentBlock.thinking += part.text;
 											currentBlock.thinkingSignature = retainThoughtSignature(
 												currentBlock.thinkingSignature,
 												part.thoughtSignature,
 											);
 											stream.push({
 												type: "thinking_delta",
 												contentIndex: blockIndex(),
 												delta: part.text,
 												partial: output,
 											});
 										} else {
 											currentBlock.text += part.text;
 											currentBlock.textSignature = retainThoughtSignature(
 												currentBlock.textSignature,
 												part.thoughtSignature,
 											);
 											stream.push({
 												type: "text_delta",
 												contentIndex: blockIndex(),
 												delta: part.text,
 												partial: output,
 											});
 										}
 									}
 									if (part.functionCall) {
 										hasContent = true;
 										if (currentBlock) {
 											if (currentBlock.type === "text") {
 												stream.push({
 													type: "text_end",
-													contentIndex: blocks.length - 1,
+													contentIndex: blockIndex(),
 													content: currentBlock.text,
 													partial: output,
 												});
@ -486,143 +665,142 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 													partial: output,
 												});
 											}
 											currentBlock = null;
 										}
-										if (isThinking) {
+
-											currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
+										const providedId = part.functionCall.id;
-											output.content.push(currentBlock);
+										const needsNewId =
-											stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+											!providedId ||
-										} else {
+											output.content.some((b) => b.type === "toolCall" && b.id === providedId);
-											currentBlock = { type: "text", text: "" };
+										const toolCallId = needsNewId
-											output.content.push(currentBlock);
+											? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
-											stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
+											: providedId;
-										}
+
-									}
+										const toolCall: ToolCall = {
-									if (currentBlock.type === "thinking") {
+											type: "toolCall",
-										currentBlock.thinking += part.text;
+											id: toolCallId,
-										currentBlock.thinkingSignature = retainThoughtSignature(
+											name: part.functionCall.name || "",
-											currentBlock.thinkingSignature,
+											arguments: part.functionCall.args as Record<string, unknown>,
-											part.thoughtSignature,
+											...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
-										);
+										};
 										output.content.push(toolCall);
 										ensureStarted();
 										stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
 										stream.push({
-											type: "thinking_delta",
+											type: "toolcall_delta",
 											contentIndex: blockIndex(),
-											delta: part.text,
+											delta: JSON.stringify(toolCall.arguments),
 											partial: output,
 										});
 									} else {
 										currentBlock.text += part.text;
 										currentBlock.textSignature = retainThoughtSignature(
 											currentBlock.textSignature,
 											part.thoughtSignature,
 										);
 										stream.push({
-											type: "text_delta",
+											type: "toolcall_end",
 											contentIndex: blockIndex(),
-											delta: part.text,
+											toolCall,
 											partial: output,
 										});
 									}
 								}
 							}
-								if (part.functionCall) {
+							if (candidate?.finishReason) {
-									if (currentBlock) {
+								output.stopReason = mapStopReasonString(candidate.finishReason);
-										if (currentBlock.type === "text") {
+								if (output.content.some((b) => b.type === "toolCall")) {
-											stream.push({
+									output.stopReason = "toolUse";
 												type: "text_end",
 												contentIndex: blockIndex(),
 												content: currentBlock.text,
 												partial: output,
 											});
 										} else {
 											stream.push({
 												type: "thinking_end",
 												contentIndex: blockIndex(),
 												content: currentBlock.thinking,
 												partial: output,
 											});
 										}
 										currentBlock = null;
 									}
 									const providedId = part.functionCall.id;
 									const needsNewId =
 										!providedId || output.content.some((b) => b.type === "toolCall" && b.id === providedId);
 									const toolCallId = needsNewId
 										? `${part.functionCall.name}_${Date.now()}_${++toolCallCounter}`
 										: providedId;
 									const toolCall: ToolCall = {
 										type: "toolCall",
 										id: toolCallId,
 										name: part.functionCall.name || "",
 										arguments: part.functionCall.args as Record<string, unknown>,
 										...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
 									};
 									output.content.push(toolCall);
 									stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
 									stream.push({
 										type: "toolcall_delta",
 										contentIndex: blockIndex(),
 										delta: JSON.stringify(toolCall.arguments),
 										partial: output,
 									});
 									stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
 								}
 							}
 						}
-						if (candidate?.finishReason) {
+							if (responseData.usageMetadata) {
-							output.stopReason = mapStopReasonString(candidate.finishReason);
+								// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
-							if (output.content.some((b) => b.type === "toolCall")) {
+								const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
-								output.stopReason = "toolUse";
+								const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
-							}
+								output.usage = {
-						}
+									input: promptTokens - cacheReadTokens,
-
+									output:
-						if (responseData.usageMetadata) {
+										(responseData.usageMetadata.candidatesTokenCount || 0) +
-							// promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input
+										(responseData.usageMetadata.thoughtsTokenCount || 0),
-							const promptTokens = responseData.usageMetadata.promptTokenCount || 0;
+									cacheRead: cacheReadTokens,
 							const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0;
 							output.usage = {
 								input: promptTokens - cacheReadTokens,
 								output:
 									(responseData.usageMetadata.candidatesTokenCount || 0) +
 									(responseData.usageMetadata.thoughtsTokenCount || 0),
 								cacheRead: cacheReadTokens,
 								cacheWrite: 0,
 								totalTokens: responseData.usageMetadata.totalTokenCount || 0,
 								cost: {
 									input: 0,
 									output: 0,
 									cacheRead: 0,
 									cacheWrite: 0,
-									total: 0,
+									totalTokens: responseData.usageMetadata.totalTokenCount || 0,
-								},
+									cost: {
-							};
+										input: 0,
-							calculateCost(model, output.usage);
+										output: 0,
 										cacheRead: 0,
 										cacheWrite: 0,
 										total: 0,
 									},
 								};
 								calculateCost(model, output.usage);
 							}
 						}
 					}
 				} finally {
 					options?.signal?.removeEventListener("abort", abortHandler);
 				}
 				if (currentBlock) {
 					if (currentBlock.type === "text") {
 						stream.push({
 							type: "text_end",
 							contentIndex: blockIndex(),
 							content: currentBlock.text,
 							partial: output,
 						});
 					} else {
 						stream.push({
 							type: "thinking_end",
 							contentIndex: blockIndex(),
 							content: currentBlock.thinking,
 							partial: output,
 						});
 					}
 				}
 				return hasContent;
 			};
 			let receivedContent = false;
 			let currentResponse = response;
 			for (let emptyAttempt = 0; emptyAttempt <= MAX_EMPTY_STREAM_RETRIES; emptyAttempt++) {
 				if (options?.signal?.aborted) {
 					throw new Error("Request was aborted");
 				}
 				if (emptyAttempt > 0) {
 					const backoffMs = EMPTY_STREAM_BASE_DELAY_MS * 2 ** (emptyAttempt - 1);
 					await sleep(backoffMs, options?.signal);
 					if (!requestUrl) {
 						throw new Error("Missing request URL");
 					}
 					currentResponse = await fetch(requestUrl, {
 						method: "POST",
 						headers: requestHeaders,
 						body: requestBodyJson,
 						signal: options?.signal,
 					});
 					if (!currentResponse.ok) {
 						const retryErrorText = await currentResponse.text();
 						throw new Error(`Cloud Code Assist API error (${currentResponse.status}): ${retryErrorText}`);
 					}
 				}
 				const streamed = await streamResponse(currentResponse);
 				if (streamed) {
 					receivedContent = true;
 					break;
 				}
 				if (emptyAttempt < MAX_EMPTY_STREAM_RETRIES) {
 					resetOutput();
 				}
 			} finally {
 				options?.signal?.removeEventListener("abort", abortHandler);
 			}
-			if (currentBlock) {
+			if (!receivedContent) {
-				if (currentBlock.type === "text") {
+				throw new Error("Cloud Code Assist API returned an empty response");
 					stream.push({
 						type: "text_end",
 						contentIndex: blockIndex(),
 						content: currentBlock.text,
 						partial: output,
 					});
 				} else {
 					stream.push({
 						type: "thinking_end",
 						contentIndex: blockIndex(),
 						content: currentBlock.thinking,
 						partial: output,
 					});
 				}
 			}
 			if (options?.signal?.aborted) {
@ -651,7 +829,34 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
 	return stream;
 };
-function buildRequest(
+function deriveSessionId(context: Context): string | undefined {
 	for (const message of context.messages) {
 		if (message.role !== "user") {
 			continue;
 		}
 		let text = "";
 		if (typeof message.content === "string") {
 			text = message.content;
 		} else if (Array.isArray(message.content)) {
 			text = message.content
 				.filter((item): item is TextContent => item.type === "text")
 				.map((item) => item.text)
 				.join("\n");
 		}
 		if (!text || text.trim().length === 0) {
 			return undefined;
 		}
 		const hash = createHash("sha256").update(text).digest("hex");
 		return hash.slice(0, 32);
 	}
 	return undefined;
 }
 export function buildRequest(
 	model: Model<"google-gemini-cli">,
 	context: Context,
 	projectId: string,
@ -686,6 +891,11 @@ function buildRequest(
 		contents,
 	};
 	const sessionId = deriveSessionId(context);
 	if (sessionId) {
 		request.sessionId = sessionId;
 	}
 	// System instruction must be object with parts, not plain string
 	if (context.systemPrompt) {
 		request.systemInstruction = {
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@ -365,6 +365,7 @@ function createClient(model: Model<"openai-completions">, context: Context, apiK
 function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
 	const compat = getCompat(model);
 	const messages = convertMessages(model, context, compat);
 	maybeAddOpenRouterAnthropicCacheControl(model, messages);
 	const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 		model: model.id,
@ -403,13 +404,51 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
 		params.tool_choice = options.toolChoice;
 	}
-	if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
+	if (compat.thinkingFormat === "zai" && model.reasoning) {
 		// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
 		// Must explicitly disable since z.ai defaults to thinking enabled
 		(params as any).thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
 	} else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
 		// OpenAI-style reasoning_effort
 		params.reasoning_effort = options.reasoningEffort;
 	}
 	return params;
 }
 function maybeAddOpenRouterAnthropicCacheControl(
 	model: Model<"openai-completions">,
 	messages: ChatCompletionMessageParam[],
 ): void {
 	if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
 	// Anthropic-style caching requires cache_control on a text part. Add a breakpoint
 	// on the last user/assistant message (walking backwards until we find text content).
 	for (let i = messages.length - 1; i >= 0; i--) {
 		const msg = messages[i];
 		if (msg.role !== "user" && msg.role !== "assistant") continue;
 		const content = msg.content;
 		if (typeof content === "string") {
 			msg.content = [
 				Object.assign({ type: "text" as const, text: content }, { cache_control: { type: "ephemeral" } }),
 			];
 			return;
 		}
 		if (!Array.isArray(content)) continue;
 		// Find last text part and add cache_control
 		for (let j = content.length - 1; j >= 0; j--) {
 			const part = content[j];
 			if (part?.type === "text") {
 				Object.assign(part, { cache_control: { type: "ephemeral" } });
 				return;
 			}
 		}
 	}
 }
 function convertMessages(
 	model: Model<"openai-completions">,
 	context: Context,
@ -644,11 +683,14 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto
 * Returns a fully resolved OpenAICompat object with all fields set.
 */
 function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
 	const isZai = baseUrl.includes("api.z.ai");
 	const isNonStandard =
 		baseUrl.includes("cerebras.ai") ||
 		baseUrl.includes("api.x.ai") ||
 		baseUrl.includes("mistral.ai") ||
-		baseUrl.includes("chutes.ai");
+		baseUrl.includes("chutes.ai") ||
 		isZai;
 	const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
@ -659,13 +701,14 @@ function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
 	return {
 		supportsStore: !isNonStandard,
 		supportsDeveloperRole: !isNonStandard,
-		supportsReasoningEffort: !isGrok,
+		supportsReasoningEffort: !isGrok && !isZai,
 		supportsUsageInStreaming: true,
 		maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
 		requiresToolResultName: isMistral,
 		requiresAssistantAfterToolResult: false, // Mistral no longer requires this as of Dec 2024
 		requiresThinkingAsText: isMistral,
 		requiresMistralToolIds: isMistral,
 		thinkingFormat: isZai ? "zai" : "openai",
 	};
 }
@ -688,5 +731,6 @@ function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
 			model.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,
 		requiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,
 		requiresMistralToolIds: model.compat.requiresMistralToolIds ?? detected.requiresMistralToolIds,
 		thinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,
 	};
 }
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@ -24,6 +24,7 @@ import type {
 	ThinkingContent,
 	Tool,
 	ToolCall,
 	Usage,
 } from "../types.js";
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
@ -48,6 +49,7 @@ function shortHash(str: string): string {
 export interface OpenAIResponsesOptions extends StreamOptions {
 	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
 	reasoningSummary?: "auto" | "detailed" | "concise" | null;
 	serviceTier?: ResponseCreateParamsStreaming["service_tier"];
 }
 /**
@ -85,7 +87,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
 			const client = createClient(model, context, apiKey);
 			const params = buildParams(model, context, options);
-			const openaiStream = await client.responses.create(params, { signal: options?.signal });
+			const openaiStream = await client.responses.create(params, { signal: options?.signal, timeout: undefined });
 			stream.push({ type: "start", partial: output });
 			let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
@ -276,6 +278,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 						};
 					}
 					calculateCost(model, output.usage);
 					applyServiceTierPricing(output.usage, response?.service_tier ?? options?.serviceTier);
 					// Map status to stop reason
 					output.stopReason = mapStopReason(response?.status);
 					if (output.content.some((b) => b.type === "toolCall") && output.stopReason === "stop") {
@ -363,6 +366,7 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
 		model: model.id,
 		input: messages,
 		stream: true,
 		prompt_cache_key: options?.sessionId,
 	};
 	if (options?.maxTokens) {
@ -373,6 +377,10 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
 		params.temperature = options?.temperature;
 	}
 	if (options?.serviceTier !== undefined) {
 		params.service_tier = options.serviceTier;
 	}
 	if (context.tools) {
 		params.tools = convertTools(context.tools);
 	}
@ -547,6 +555,28 @@ function convertTools(tools: Tool[]): OpenAITool[] {
 	}));
 }
 function getServiceTierCostMultiplier(serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined): number {
 	switch (serviceTier) {
 		case "flex":
 			return 0.5;
 		case "priority":
 			return 2;
 		default:
 			return 1;
 	}
 }
 function applyServiceTierPricing(usage: Usage, serviceTier: ResponseCreateParamsStreaming["service_tier"] | undefined) {
 	const multiplier = getServiceTierCostMultiplier(serviceTier);
 	if (multiplier === 1) return;
 	usage.cost.input *= multiplier;
 	usage.cost.output *= multiplier;
 	usage.cost.cacheRead *= multiplier;
 	usage.cost.cacheWrite *= multiplier;
 	usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite;
 }
 function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
 	if (!status) return "stop";
 	switch (status) {
--- a/packages/ai/src/providers/transform-messages.ts
+++ b/packages/ai/src/providers/transform-messages.ts
@ -1,11 +1,11 @@
 import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage } from "../types.js";
 /**
- * Normalize tool call ID for GitHub Copilot cross-API compatibility.
+ * Normalize tool call ID for cross-provider compatibility.
 * OpenAI Responses API generates IDs that are 450+ chars with special characters like `|`.
- * Other APIs (Claude, etc.) require max 40 chars and only alphanumeric + underscore + hyphen.
+ * Anthropic APIs require IDs matching ^[a-zA-Z0-9_-]+$ (max 64 chars).
 */
-function normalizeCopilotToolCallId(id: string): string {
+function normalizeToolCallId(id: string): string {
 	return id.replace(/[^a-zA-Z0-9_-]/g, "").slice(0, 40);
 }
@ -38,11 +38,17 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
 				return msg;
 			}
-			// Check if we need to normalize tool call IDs (github-copilot cross-API)
+			// Check if we need to normalize tool call IDs
-			const needsToolCallIdNormalization =
+			// Anthropic APIs require IDs matching ^[a-zA-Z0-9_-]+$ (max 64 chars)
 			// OpenAI Responses API generates IDs with `|` and 450+ chars
 			// GitHub Copilot routes to Anthropic for Claude models
 			const targetRequiresStrictIds = model.api === "anthropic-messages" || model.provider === "github-copilot";
 			const crossProviderSwitch = assistantMsg.provider !== model.provider;
 			const copilotCrossApiSwitch =
 				assistantMsg.provider === "github-copilot" &&
 				model.provider === "github-copilot" &&
 				assistantMsg.api !== model.api;
 			const needsToolCallIdNormalization = targetRequiresStrictIds && (crossProviderSwitch || copilotCrossApiSwitch);
 			// Transform message from different provider/model
 			const transformedContent = assistantMsg.content.flatMap((block) => {
@ -54,10 +60,10 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
 						text: block.thinking,
 					};
 				}
-				// Normalize tool call IDs for github-copilot cross-API switches
+				// Normalize tool call IDs when target API requires strict format
 				if (block.type === "toolCall" && needsToolCallIdNormalization) {
 					const toolCall = block as ToolCall;
-					const normalizedId = normalizeCopilotToolCallId(toolCall.id);
+					const normalizedId = normalizeToolCallId(toolCall.id);
 					if (normalizedId !== toolCall.id) {
 						toolCallIdMap.set(toolCall.id, normalizedId);
 						return { ...toolCall, id: normalizedId };
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -2,6 +2,7 @@ import { existsSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 import { supportsXhigh } from "./models.js";
 import { type BedrockOptions, streamBedrock } from "./providers/amazon-bedrock.js";
 import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic.js";
 import { type GoogleOptions, streamGoogle } from "./providers/google.js";
 import {
@ -74,6 +75,20 @@ export function getEnvApiKey(provider: any): string | undefined {
 		}
 	}
 	if (provider === "amazon-bedrock") {
 		// Amazon Bedrock supports multiple credential sources:
 		// 1. AWS_PROFILE - named profile from ~/.aws/credentials
 		// 2. AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY - standard IAM keys
 		// 3. AWS_BEARER_TOKEN_BEDROCK - Bedrock API keys (bearer token)
 		if (
 			process.env.AWS_PROFILE ||
 			(process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY) ||
 			process.env.AWS_BEARER_TOKEN_BEDROCK
 		) {
 			return "<authenticated>";
 		}
 	}
 	const envMap: Record<string, string> = {
 		openai: "OPENAI_API_KEY",
 		google: "GEMINI_API_KEY",
@ -81,8 +96,10 @@ export function getEnvApiKey(provider: any): string | undefined {
 		cerebras: "CEREBRAS_API_KEY",
 		xai: "XAI_API_KEY",
 		openrouter: "OPENROUTER_API_KEY",
 		"vercel-ai-gateway": "AI_GATEWAY_API_KEY",
 		zai: "ZAI_API_KEY",
 		mistral: "MISTRAL_API_KEY",
 		minimax: "MINIMAX_API_KEY",
 		opencode: "OPENCODE_API_KEY",
 	};
@ -98,6 +115,9 @@ export function stream<TApi extends Api>(
 	// Vertex AI uses Application Default Credentials, not API keys
 	if (model.api === "google-vertex") {
 		return streamGoogleVertex(model as Model<"google-vertex">, context, options as GoogleVertexOptions);
 	} else if (model.api === "bedrock-converse-stream") {
 		// Bedrock doesn't have any API keys instead it sources credentials from standard AWS env variables or from given AWS profile.
 		return streamBedrock(model as Model<"bedrock-converse-stream">, context, (options || {}) as BedrockOptions);
 	}
 	const apiKey = options?.apiKey || getEnvApiKey(model.provider);
@ -156,6 +176,10 @@ export function streamSimple<TApi extends Api>(
 	if (model.api === "google-vertex") {
 		const providerOptions = mapOptionsForApi(model, options, undefined);
 		return stream(model, context, providerOptions);
 	} else if (model.api === "bedrock-converse-stream") {
 		// Bedrock doesn't have any API keys instead it sources credentials from standard AWS env variables or from given AWS profile.
 		const providerOptions = mapOptionsForApi(model, options, undefined);
 		return stream(model, context, providerOptions);
 	}
 	const apiKey = options?.apiKey || getEnvApiKey(model.provider);
@ -228,6 +252,13 @@ function mapOptionsForApi<TApi extends Api>(
 			} satisfies AnthropicOptions;
 		}
 		case "bedrock-converse-stream":
 			return {
 				...base,
 				reasoning: options?.reasoning,
 				thinkingBudgets: options?.thinkingBudgets,
 			} satisfies BedrockOptions;
 		case "openai-completions":
 			return {
 				...base,
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@ -1,3 +1,4 @@
 import type { BedrockOptions } from "./providers/amazon-bedrock.js";
 import type { AnthropicOptions } from "./providers/anthropic.js";
 import type { GoogleOptions } from "./providers/google.js";
 import type { GoogleGeminiCliOptions } from "./providers/google-gemini-cli.js";
@ -14,12 +15,14 @@ export type Api =
 	| "openai-responses"
 	| "openai-codex-responses"
 	| "anthropic-messages"
 	| "bedrock-converse-stream"
 	| "google-generative-ai"
 	| "google-gemini-cli"
 	| "google-vertex";
 export interface ApiOptionsMap {
 	"anthropic-messages": AnthropicOptions;
 	"bedrock-converse-stream": BedrockOptions;
 	"openai-completions": OpenAICompletionsOptions;
 	"openai-responses": OpenAIResponsesOptions;
 	"openai-codex-responses": OpenAICodexResponsesOptions;
@ -40,6 +43,7 @@ const _exhaustive: _CheckExhaustive = true;
 export type OptionsForApi<TApi extends Api> = ApiOptionsMap[TApi];
 export type KnownProvider =
 	| "amazon-bedrock"
 	| "anthropic"
 	| "google"
 	| "google-gemini-cli"
@ -52,8 +56,10 @@ export type KnownProvider =
 	| "groq"
 	| "cerebras"
 	| "openrouter"
 	| "vercel-ai-gateway"
 	| "zai"
 	| "mistral"
 	| "minimax"
 	| "opencode";
 export type Provider = KnownProvider | string;
@ -219,6 +225,8 @@ export interface OpenAICompat {
 	requiresThinkingAsText?: boolean;
 	/** Whether tool call IDs must be normalized to Mistral format (exactly 9 alphanumeric chars). Default: auto-detected from URL. */
 	requiresMistralToolIds?: boolean;
 	/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "zai" uses thinking: { type: "enabled" }. Default: "openai". */
 	thinkingFormat?: "openai" | "zai";
 }
 // Model interface for the unified model system
--- a/packages/ai/src/utils/overflow.ts
+++ b/packages/ai/src/utils/overflow.ts
@ -17,6 +17,7 @@ import type { AssistantMessage } from "../types.js";
 * - llama.cpp: "the request exceeds the available context size, try increasing it"
 * - LM Studio: "tokens to keep from the initial prompt is greater than the context length"
 * - GitHub Copilot: "prompt token count of X exceeds the limit of Y"
 * - MiniMax: "invalid params, context window exceeds limit"
 * - Cerebras: Returns "400 status code (no body)" - handled separately below
 * - Mistral: Returns "400 status code (no body)" - handled separately below
 * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
@ -24,6 +25,7 @@ import type { AssistantMessage } from "../types.js";
 */
 const OVERFLOW_PATTERNS = [
 	/prompt is too long/i, // Anthropic
 	/input is too long for requested model/i, // Amazon Bedrock
 	/exceeds the context window/i, // OpenAI (Completions & Responses API)
 	/input token count.*exceeds the maximum/i, // Google (Gemini)
 	/maximum prompt length is \d+/i, // xAI (Grok)
@ -32,6 +34,7 @@ const OVERFLOW_PATTERNS = [
 	/exceeds the limit of \d+/i, // GitHub Copilot
 	/exceeds the available context size/i, // llama.cpp server
 	/greater than the context length/i, // LM Studio
 	/context window exceeds limit/i, // MiniMax
 	/context[_ ]length[_ ]exceeded/i, // Generic fallback
 	/too many tokens/i, // Generic fallback
 	/token limit exceeded/i, // Generic fallback
--- a/packages/ai/test/abort.test.ts
+++ b/packages/ai/test/abort.test.ts
@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
 import { complete, stream } from "../src/stream.js";
 import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { resolveApiKey } from "./oauth.js";
 // Resolve OAuth tokens at module level (async, runs before tests)
@ -66,6 +67,35 @@ async function testImmediateAbort<TApi extends Api>(llm: Model<TApi>, options: O
 	expect(response.stopReason).toBe("aborted");
 }
 async function testAbortThenNewMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
 	// First request: abort immediately before any response content arrives
 	const controller = new AbortController();
 	controller.abort();
 	const context: Context = {
 		messages: [{ role: "user", content: "Hello, how are you?", timestamp: Date.now() }],
 	};
 	const abortedResponse = await complete(llm, context, { ...options, signal: controller.signal });
 	expect(abortedResponse.stopReason).toBe("aborted");
 	// The aborted message has empty content since we aborted before anything arrived
 	expect(abortedResponse.content.length).toBe(0);
 	// Add the aborted assistant message to context (this is what happens in the real coding agent)
 	context.messages.push(abortedResponse);
 	// Second request: send a new message - this should work even with the aborted message in context
 	context.messages.push({
 		role: "user",
 		content: "What is 2 + 2?",
 		timestamp: Date.now(),
 	});
 	const followUp = await complete(llm, context, options);
 	expect(followUp.stopReason).toBe("stop");
 	expect(followUp.content.length).toBeGreaterThan(0);
 }
 describe("AI Providers Abort Tests", () => {
 	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Abort", () => {
 		const llm = getModel("google", "gemini-2.5-flash");
@ -130,6 +160,30 @@ describe("AI Providers Abort Tests", () => {
 		});
 	});
 	describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider Abort", () => {
 		const llm = getModel("minimax", "MiniMax-M2.1");
 		it("should abort mid-stream", { retry: 3 }, async () => {
 			await testAbortSignal(llm);
 		});
 		it("should handle immediate abort", { retry: 3 }, async () => {
 			await testImmediateAbort(llm);
 		});
 	});
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider Abort", () => {
 		const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 		it("should abort mid-stream", { retry: 3 }, async () => {
 			await testAbortSignal(llm);
 		});
 		it("should handle immediate abort", { retry: 3 }, async () => {
 			await testImmediateAbort(llm);
 		});
 	});
 	// Google Gemini CLI / Antigravity share the same provider, so one test covers both
 	describe("Google Gemini CLI Provider Abort", () => {
 		it.skipIf(!geminiCliToken)("should abort mid-stream", { retry: 3 }, async () => {
@ -154,4 +208,20 @@ describe("AI Providers Abort Tests", () => {
 			await testImmediateAbort(llm, { apiKey: openaiCodexToken });
 		});
 	});
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider Abort", () => {
 		const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 		it("should abort mid-stream", { retry: 3 }, async () => {
 			await testAbortSignal(llm, { reasoning: "medium" });
 		});
 		it("should handle immediate abort", { retry: 3 }, async () => {
 			await testImmediateAbort(llm);
 		});
 		it("should handle abort then new message", { retry: 3 }, async () => {
 			await testAbortThenNewMessage(llm);
 		});
 	});
 });
--- a/packages/ai/test/bedrock-models.test.ts
+++ b/packages/ai/test/bedrock-models.test.ts
@ -0,0 +1,66 @@
 /**
 * A test suite to ensure all configured Amazon Bedrock models are usable.
 *
 * This is here to make sure we got correct model identifiers from models.dev and other sources.
 * Because Amazon Bedrock requires cross-region inference in some models,
 * plain model identifiers are not always usable and it requires tweaking of model identifiers to use cross-region inference.
 * See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system for more details.
 *
 * This test suite is not enabled by default unless AWS credentials and `BEDROCK_EXTENSIVE_MODEL_TEST` environment variables are set.
 * This test suite takes ~2 minutes to run. Because not all models are available in all regions,
 * it's recommended to use `us-west-2` region for best coverage for running this test suite.
 *
 * You can run this test suite with:
 * ```bash
 * $ AWS_REGION=us-west-2 BEDROCK_EXTENSIVE_MODEL_TEST=1 AWS_PROFILE=... npm test -- ./test/bedrock-models.test.ts
 * ```
 */
 import { describe, expect, it } from "vitest";
 import { getModels } from "../src/models.js";
 import { complete } from "../src/stream.js";
 import type { Context } from "../src/types.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 describe("Amazon Bedrock Models", () => {
 	const models = getModels("amazon-bedrock");
 	it("should get all available Bedrock models", () => {
 		expect(models.length).toBeGreaterThan(0);
 		console.log(`Found ${models.length} Bedrock models`);
 	});
 	if (hasBedrockCredentials() && process.env.BEDROCK_EXTENSIVE_MODEL_TEST) {
 		for (const model of models) {
 			it(`should make a simple request with ${model.id}`, { timeout: 10_000 }, async () => {
 				const context: Context = {
 					systemPrompt: "You are a helpful assistant. Be extremely concise.",
 					messages: [
 						{
 							role: "user",
 							content: "Reply with exactly: 'OK'",
 							timestamp: Date.now(),
 						},
 					],
 				};
 				const response = await complete(model, context);
 				expect(response.role).toBe("assistant");
 				expect(response.content).toBeTruthy();
 				expect(response.content.length).toBeGreaterThan(0);
 				expect(response.usage.input + response.usage.cacheRead).toBeGreaterThan(0);
 				expect(response.usage.output).toBeGreaterThan(0);
 				expect(response.errorMessage).toBeFalsy();
 				const textContent = response.content
 					.filter((b) => b.type === "text")
 					.map((b) => (b.type === "text" ? b.text : ""))
 					.join("")
 					.trim();
 				expect(textContent).toBeTruthy();
 				console.log(`${model.id}: ${textContent.substring(0, 100)}`);
 			});
 		}
 	}
 });
--- a/packages/ai/test/bedrock-utils.ts
+++ b/packages/ai/test/bedrock-utils.ts
@ -0,0 +1,18 @@
 /**
 * Utility functions for Amazon Bedrock tests
 */
 /**
 * Check if any valid AWS credentials are configured for Bedrock.
 * Returns true if any of the following are set:
 * - AWS_PROFILE (named profile from ~/.aws/credentials)
 * - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (IAM keys)
 * - AWS_BEARER_TOKEN_BEDROCK (Bedrock API key)
 */
 export function hasBedrockCredentials(): boolean {
 	return !!(
 		process.env.AWS_PROFILE ||
 		(process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY) ||
 		process.env.AWS_BEARER_TOKEN_BEDROCK
 	);
 }
--- a/packages/ai/test/context-overflow.test.ts
+++ b/packages/ai/test/context-overflow.test.ts
@ -18,6 +18,7 @@ import { getModel } from "../src/models.js";
 import { complete } from "../src/stream.js";
 import type { AssistantMessage, Context, Model, Usage } from "../src/types.js";
 import { isContextOverflow } from "../src/utils/overflow.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { resolveApiKey } from "./oauth.js";
 // Resolve OAuth tokens at module level (async, runs before tests)
@ -284,6 +285,22 @@ describe("Context overflow error handling", () => {
 		);
 	});
 	// =============================================================================
 	// Amazon Bedrock
 	// Expected pattern: "Input is too long for requested model"
 	// =============================================================================
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock", () => {
 		it("claude-sonnet-4-5 - should detect overflow via isContextOverflow", async () => {
 			const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 			const result = await testContextOverflow(model, "");
 			logResult(result);
 			expect(result.stopReason).toBe("error");
 			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
 		}, 120000);
 	});
 	// =============================================================================
 	// xAI
 	// Expected pattern: "maximum prompt length is X but the request contains Y"
@ -379,6 +396,37 @@ describe("Context overflow error handling", () => {
 		}, 120000);
 	});
 	// =============================================================================
 	// MiniMax
 	// Expected pattern: TBD - need to test actual error message
 	// =============================================================================
 	describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax", () => {
 		it("MiniMax-M2.1 - should detect overflow via isContextOverflow", async () => {
 			const model = getModel("minimax", "MiniMax-M2.1");
 			const result = await testContextOverflow(model, process.env.MINIMAX_API_KEY!);
 			logResult(result);
 			expect(result.stopReason).toBe("error");
 			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
 		}, 120000);
 	});
 	// =============================================================================
 	// Vercel AI Gateway - Unified API for multiple providers
 	// =============================================================================
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway", () => {
 		it("google/gemini-2.5-flash via AI Gateway - should detect overflow via isContextOverflow", async () => {
 			const model = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 			const result = await testContextOverflow(model, process.env.AI_GATEWAY_API_KEY!);
 			logResult(result);
 			expect(result.stopReason).toBe("error");
 			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
 		}, 120000);
 	});
 	// =============================================================================
 	// OpenRouter - Multiple backend providers
 	// Expected pattern: "maximum context length is X tokens"
--- a/packages/ai/test/empty.test.ts
+++ b/packages/ai/test/empty.test.ts
@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
 import { complete } from "../src/stream.js";
 import type { Api, AssistantMessage, Context, Model, OptionsForApi, UserMessage } from "../src/types.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { resolveApiKey } from "./oauth.js";
 // Resolve OAuth tokens at module level (async, runs before tests)
@ -321,6 +322,66 @@ describe("AI Providers Empty Message Tests", () => {
 		});
 	});
 	describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider Empty Messages", () => {
 		const llm = getModel("minimax", "MiniMax-M2.1");
 		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider Empty Messages", () => {
 		const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider Empty Messages", () => {
 		const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
 	// =========================================================================
 	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
 	// =========================================================================
--- a/packages/ai/test/google-gemini-cli-claude-thinking-header.test.ts
+++ b/packages/ai/test/google-gemini-cli-claude-thinking-header.test.ts
@ -0,0 +1,103 @@
 import { afterEach, describe, expect, it, vi } from "vitest";
 import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
 import type { Context, Model } from "../src/types.js";
 const originalFetch = global.fetch;
 const apiKey = JSON.stringify({ token: "token", projectId: "project" });
 const createSseResponse = () => {
 	const sse = `${[
 		`data: ${JSON.stringify({
 			response: {
 				candidates: [
 					{
 						content: { role: "model", parts: [{ text: "Hello" }] },
 						finishReason: "STOP",
 					},
 				],
 			},
 		})}`,
 	].join("\n\n")}\n\n`;
 	const encoder = new TextEncoder();
 	const stream = new ReadableStream<Uint8Array>({
 		start(controller) {
 			controller.enqueue(encoder.encode(sse));
 			controller.close();
 		},
 	});
 	return new Response(stream, {
 		status: 200,
 		headers: { "content-type": "text/event-stream" },
 	});
 };
 afterEach(() => {
 	global.fetch = originalFetch;
 	vi.restoreAllMocks();
 });
 describe("google-gemini-cli Claude thinking header", () => {
 	const context: Context = {
 		messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
 	};
 	it("adds anthropic-beta for Claude thinking models", async () => {
 		const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
 			const headers = new Headers(init?.headers);
 			expect(headers.get("anthropic-beta")).toBe("interleaved-thinking-2025-05-14");
 			return createSseResponse();
 		});
 		global.fetch = fetchMock as typeof fetch;
 		const model: Model<"google-gemini-cli"> = {
 			id: "claude-opus-4-5-thinking",
 			name: "Claude Opus 4.5 Thinking",
 			api: "google-gemini-cli",
 			provider: "google-antigravity",
 			baseUrl: "https://cloudcode-pa.googleapis.com",
 			reasoning: true,
 			input: ["text"],
 			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
 			contextWindow: 128000,
 			maxTokens: 8192,
 		};
 		const stream = streamGoogleGeminiCli(model, context, { apiKey });
 		for await (const _event of stream) {
 			// exhaust stream
 		}
 		await stream.result();
 	});
 	it("does not add anthropic-beta for Gemini models", async () => {
 		const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
 			const headers = new Headers(init?.headers);
 			expect(headers.has("anthropic-beta")).toBe(false);
 			return createSseResponse();
 		});
 		global.fetch = fetchMock as typeof fetch;
 		const model: Model<"google-gemini-cli"> = {
 			id: "gemini-2.5-flash",
 			name: "Gemini 2.5 Flash",
 			api: "google-gemini-cli",
 			provider: "google-gemini-cli",
 			baseUrl: "https://cloudcode-pa.googleapis.com",
 			reasoning: false,
 			input: ["text"],
 			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
 			contextWindow: 128000,
 			maxTokens: 8192,
 		};
 		const stream = streamGoogleGeminiCli(model, context, { apiKey });
 		for await (const _event of stream) {
 			// exhaust stream
 		}
 		await stream.result();
 	});
 });
--- a/packages/ai/test/google-gemini-cli-empty-stream.test.ts
+++ b/packages/ai/test/google-gemini-cli-empty-stream.test.ts
@ -0,0 +1,108 @@
 import { afterEach, describe, expect, it, vi } from "vitest";
 import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
 import type { Context, Model } from "../src/types.js";
 const originalFetch = global.fetch;
 afterEach(() => {
 	global.fetch = originalFetch;
 	vi.restoreAllMocks();
 });
 describe("google-gemini-cli empty stream retry", () => {
 	it("retries empty SSE responses without duplicate start", async () => {
 		const emptyStream = new ReadableStream<Uint8Array>({
 			start(controller) {
 				controller.close();
 			},
 		});
 		const sse = `${[
 			`data: ${JSON.stringify({
 				response: {
 					candidates: [
 						{
 							content: { role: "model", parts: [{ text: "Hello" }] },
 							finishReason: "STOP",
 						},
 					],
 					usageMetadata: {
 						promptTokenCount: 1,
 						candidatesTokenCount: 1,
 						totalTokenCount: 2,
 					},
 				},
 			})}`,
 		].join("\n\n")}\n\n`;
 		const encoder = new TextEncoder();
 		const dataStream = new ReadableStream<Uint8Array>({
 			start(controller) {
 				controller.enqueue(encoder.encode(sse));
 				controller.close();
 			},
 		});
 		let callCount = 0;
 		const fetchMock = vi.fn(async () => {
 			callCount += 1;
 			if (callCount === 1) {
 				return new Response(emptyStream, {
 					status: 200,
 					headers: { "content-type": "text/event-stream" },
 				});
 			}
 			return new Response(dataStream, {
 				status: 200,
 				headers: { "content-type": "text/event-stream" },
 			});
 		});
 		global.fetch = fetchMock as typeof fetch;
 		const model: Model<"google-gemini-cli"> = {
 			id: "gemini-2.5-flash",
 			name: "Gemini 2.5 Flash",
 			api: "google-gemini-cli",
 			provider: "google-gemini-cli",
 			baseUrl: "https://cloudcode-pa.googleapis.com",
 			reasoning: false,
 			input: ["text"],
 			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
 			contextWindow: 128000,
 			maxTokens: 8192,
 		};
 		const context: Context = {
 			messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
 		};
 		const stream = streamGoogleGeminiCli(model, context, {
 			apiKey: JSON.stringify({ token: "token", projectId: "project" }),
 		});
 		let startCount = 0;
 		let doneCount = 0;
 		let text = "";
 		for await (const event of stream) {
 			if (event.type === "start") {
 				startCount += 1;
 			}
 			if (event.type === "done") {
 				doneCount += 1;
 			}
 			if (event.type === "text_delta") {
 				text += event.delta;
 			}
 		}
 		const result = await stream.result();
 		expect(text).toBe("Hello");
 		expect(result.stopReason).toBe("stop");
 		expect(startCount).toBe(1);
 		expect(doneCount).toBe(1);
 		expect(fetchMock).toHaveBeenCalledTimes(2);
 	});
 });
--- a/packages/ai/test/google-gemini-cli-retry-delay.test.ts
+++ b/packages/ai/test/google-gemini-cli-retry-delay.test.ts
@ -0,0 +1,53 @@
 import { afterEach, describe, expect, it, vi } from "vitest";
 import { extractRetryDelay } from "../src/providers/google-gemini-cli.js";
 describe("extractRetryDelay header parsing", () => {
 	afterEach(() => {
 		vi.useRealTimers();
 	});
 	it("prefers Retry-After seconds header", () => {
 		vi.useFakeTimers();
 		vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
 		const response = new Response("", { headers: { "Retry-After": "5" } });
 		const delay = extractRetryDelay("Please retry in 1s", response);
 		expect(delay).toBe(6000);
 	});
 	it("parses Retry-After HTTP date header", () => {
 		vi.useFakeTimers();
 		const now = new Date("2025-01-01T00:00:00Z");
 		vi.setSystemTime(now);
 		const retryAt = new Date(now.getTime() + 12000).toUTCString();
 		const response = new Response("", { headers: { "Retry-After": retryAt } });
 		const delay = extractRetryDelay("", response);
 		expect(delay).toBe(13000);
 	});
 	it("parses x-ratelimit-reset header", () => {
 		vi.useFakeTimers();
 		const now = new Date("2025-01-01T00:00:00Z");
 		vi.setSystemTime(now);
 		const resetAtMs = now.getTime() + 20000;
 		const resetSeconds = Math.floor(resetAtMs / 1000).toString();
 		const response = new Response("", { headers: { "x-ratelimit-reset": resetSeconds } });
 		const delay = extractRetryDelay("", response);
 		expect(delay).toBe(21000);
 	});
 	it("parses x-ratelimit-reset-after header", () => {
 		vi.useFakeTimers();
 		vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
 		const response = new Response("", { headers: { "x-ratelimit-reset-after": "30" } });
 		const delay = extractRetryDelay("", response);
 		expect(delay).toBe(31000);
 	});
 });
--- a/packages/ai/test/google-gemini-cli-session-id.test.ts
+++ b/packages/ai/test/google-gemini-cli-session-id.test.ts
@ -0,0 +1,50 @@
 import { createHash } from "node:crypto";
 import { describe, expect, it } from "vitest";
 import { buildRequest } from "../src/providers/google-gemini-cli.js";
 import type { Context, Model } from "../src/types.js";
 const model: Model<"google-gemini-cli"> = {
 	id: "gemini-2.5-flash",
 	name: "Gemini 2.5 Flash",
 	api: "google-gemini-cli",
 	provider: "google-gemini-cli",
 	baseUrl: "https://cloudcode-pa.googleapis.com",
 	reasoning: false,
 	input: ["text"],
 	cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
 	contextWindow: 128000,
 	maxTokens: 8192,
 };
 describe("buildRequest sessionId", () => {
 	it("derives sessionId from the first user message", () => {
 		const context: Context = {
 			messages: [
 				{ role: "user", content: "First message", timestamp: Date.now() },
 				{ role: "user", content: "Second message", timestamp: Date.now() },
 			],
 		};
 		const result = buildRequest(model, context, "project-id");
 		const expected = createHash("sha256").update("First message").digest("hex").slice(0, 32);
 		expect(result.request.sessionId).toBe(expected);
 	});
 	it("omits sessionId when the first user message has no text", () => {
 		const context: Context = {
 			messages: [
 				{
 					role: "user",
 					content: [{ type: "image", data: "Zm9v", mimeType: "image/png" }],
 					timestamp: Date.now(),
 				},
 				{ role: "user", content: "Later text", timestamp: Date.now() },
 			],
 		};
 		const result = buildRequest(model, context, "project-id");
 		expect(result.request.sessionId).toBeUndefined();
 	});
 });
--- a/packages/ai/test/image-limits.test.ts
+++ b/packages/ai/test/image-limits.test.ts
@ -75,6 +75,7 @@ import { afterAll, beforeAll, describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
 import { complete } from "../src/stream.js";
 import type { Api, Context, ImageContent, Model, OptionsForApi, UserMessage } from "../src/types.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = dirname(__filename);
@ -840,6 +841,122 @@ describe("Image Limits E2E Tests", () => {
 		});
 	});
 	// -------------------------------------------------------------------------
 	// Vercel AI Gateway (google/gemini-2.5-flash)
 	// -------------------------------------------------------------------------
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway (google/gemini-2.5-flash)", () => {
 		const model = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 		it("should accept a small number of images (5)", async () => {
 			const result = await testImageCount(model, 5, smallImage);
 			expect(result.success, result.error).toBe(true);
 		});
 		it("should find maximum image count limit", { timeout: 600000 }, async () => {
 			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 10, 100, 10);
 			console.log(`\n  Vercel AI Gateway max images: ~${limit} (last error: ${lastError})`);
 			expect(limit).toBeGreaterThanOrEqual(5);
 		});
 		it("should find maximum image size limit", { timeout: 600000 }, async () => {
 			const MB = 1024 * 1024;
 			const sizes = [5, 10, 15, 20];
 			let lastSuccess = 0;
 			let lastError: string | undefined;
 			for (const sizeMB of sizes) {
 				console.log(`  Testing size: ${sizeMB}MB...`);
 				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
 				const result = await testImageSize(model, imageBase64);
 				if (result.success) {
 					lastSuccess = sizeMB;
 					console.log(`    SUCCESS`);
 				} else {
 					lastError = result.error;
 					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
 					break;
 				}
 			}
 			console.log(`\n  Vercel AI Gateway max image size: ~${lastSuccess}MB (last error: ${lastError})`);
 			expect(lastSuccess).toBeGreaterThanOrEqual(5);
 		});
 	});
 	// -------------------------------------------------------------------------
 	// Amazon Bedrock (claude-sonnet-4-5)
 	// Limits: 100 images (Anthropic), 5MB per image, 8000px max dimension
 	// -------------------------------------------------------------------------
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock (claude-sonnet-4-5)", () => {
 		const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 		it("should accept a small number of images (5)", async () => {
 			const result = await testImageCount(model, 5, smallImage);
 			expect(result.success, result.error).toBe(true);
 		});
 		it("should find maximum image count limit", { timeout: 600000 }, async () => {
 			// Anthropic limit: 100 images
 			const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 20, 120, 20);
 			console.log(`\n  Bedrock max images: ~${limit} (last error: ${lastError})`);
 			expect(limit).toBeGreaterThanOrEqual(80);
 			expect(limit).toBeLessThanOrEqual(100);
 		});
 		it("should find maximum image size limit", { timeout: 600000 }, async () => {
 			const MB = 1024 * 1024;
 			// Anthropic limit: 5MB per image
 			const sizes = [1, 2, 3, 4, 5, 6];
 			let lastSuccess = 0;
 			let lastError: string | undefined;
 			for (const sizeMB of sizes) {
 				console.log(`  Testing size: ${sizeMB}MB...`);
 				const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
 				const result = await testImageSize(model, imageBase64);
 				if (result.success) {
 					lastSuccess = sizeMB;
 					console.log(`    SUCCESS`);
 				} else {
 					lastError = result.error;
 					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
 					break;
 				}
 			}
 			console.log(`\n  Bedrock max image size: ~${lastSuccess}MB (last error: ${lastError})`);
 			expect(lastSuccess).toBeGreaterThanOrEqual(1);
 		});
 		it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
 			// Anthropic limit: 8000px
 			const dimensions = [1000, 2000, 4000, 6000, 8000, 10000];
 			let lastSuccess = 0;
 			let lastError: string | undefined;
 			for (const dim of dimensions) {
 				console.log(`  Testing dimension: ${dim}x${dim}...`);
 				const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
 				const result = await testImageDimensions(model, imageBase64);
 				if (result.success) {
 					lastSuccess = dim;
 					console.log(`    SUCCESS`);
 				} else {
 					lastError = result.error;
 					console.log(`    FAILED: ${result.error?.substring(0, 100)}`);
 					break;
 				}
 			}
 			console.log(`\n  Bedrock max dimension: ~${lastSuccess}px (last error: ${lastError})`);
 			expect(lastSuccess).toBeGreaterThanOrEqual(6000);
 			expect(lastSuccess).toBeLessThanOrEqual(8000);
 		});
 	});
 	// =========================================================================
 	// MAX SIZE IMAGES TEST
 	// =========================================================================
@ -898,6 +1015,38 @@ describe("Image Limits E2E Tests", () => {
 			},
 		);
 		// Amazon Bedrock (Claude) - 5MB per image limit, same as Anthropic direct
 		// Using 3MB to stay under 5MB limit
 		it.skipIf(!hasBedrockCredentials())(
 			"Bedrock: max ~3MB images before rejection",
 			{ timeout: 900000 },
 			async () => {
 				const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 				const image3mb = getImageAtSize(3);
 				// Similar to Anthropic, test progressively
 				const counts = [1, 2, 4, 6, 8, 10, 12];
 				let lastSuccess = 0;
 				let lastError: string | undefined;
 				for (const count of counts) {
 					console.log(`  Testing ${count} x ~3MB images...`);
 					const result = await testImageCount(model, count, image3mb);
 					if (result.success) {
 						lastSuccess = count;
 						console.log(`    SUCCESS`);
 					} else {
 						lastError = result.error;
 						console.log(`    FAILED: ${result.error?.substring(0, 150)}`);
 						break;
 					}
 				}
 				console.log(`\n  Bedrock max ~3MB images: ${lastSuccess} (last error: ${lastError})`);
 				expect(lastSuccess).toBeGreaterThanOrEqual(1);
 			},
 		);
 		// OpenAI - 20MB per image documented, we found ≥25MB works
 		// Test with 15MB images to stay safely under limit
 		it.skipIf(!process.env.OPENAI_API_KEY)(
--- a/packages/ai/test/image-tool-result.test.ts
+++ b/packages/ai/test/image-tool-result.test.ts
@ -5,6 +5,7 @@ import { describe, expect, it } from "vitest";
 import type { Api, Context, Model, Tool, ToolResultMessage } from "../src/index.js";
 import { complete, getModel } from "../src/index.js";
 import type { OptionsForApi } from "../src/types.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { resolveApiKey } from "./oauth.js";
 // Resolve OAuth tokens at module level (async, runs before tests)
@ -273,6 +274,30 @@ describe("Tool Results with Images", () => {
 		});
 	});
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider (google/gemini-2.5-flash)", () => {
 		const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 		it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithImageResult(llm);
 		});
 		it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithTextAndImageResult(llm);
 		});
 	});
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider (claude-sonnet-4-5)", () => {
 		const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 		it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithImageResult(llm);
 		});
 		it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithTextAndImageResult(llm);
 		});
 	});
 	// =========================================================================
 	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
 	// =========================================================================
--- a/packages/ai/test/stream.test.ts
+++ b/packages/ai/test/stream.test.ts
@ -8,6 +8,7 @@ import { getModel } from "../src/models.js";
 import { complete, stream } from "../src/stream.js";
 import type { Api, Context, ImageContent, Model, OptionsForApi, Tool, ToolResultMessage } from "../src/types.js";
 import { StringEnum } from "../src/utils/typebox-helpers.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { resolveApiKey } from "./oauth.js";
 const __filename = fileURLToPath(import.meta.url);
@ -356,7 +357,7 @@ describe("Generate E2E Tests", () => {
 			await handleStreaming(llm);
 		});
-		it("should handle ", { retry: 3 }, async () => {
+		it("should handle thinking", { retry: 3 }, async () => {
 			await handleThinking(llm, { thinking: { enabled: true, budgetTokens: 1024 } });
 		});
@ -597,6 +598,87 @@ describe("Generate E2E Tests", () => {
 		});
 	});
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
 		"Vercel AI Gateway Provider (google/gemini-2.5-flash via Anthropic Messages)",
 		() => {
 			const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 			it("should complete basic text generation", { retry: 3 }, async () => {
 				await basicTextGeneration(llm);
 			});
 			it("should handle tool calling", { retry: 3 }, async () => {
 				await handleToolCall(llm);
 			});
 			it("should handle streaming", { retry: 3 }, async () => {
 				await handleStreaming(llm);
 			});
 			it("should handle image input", { retry: 3 }, async () => {
 				await handleImage(llm);
 			});
 			it("should handle multi-turn with tools", { retry: 3 }, async () => {
 				await multiTurn(llm);
 			});
 		},
 	);
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
 		"Vercel AI Gateway Provider (anthropic/claude-opus-4.5 via Anthropic Messages)",
 		() => {
 			const llm = getModel("vercel-ai-gateway", "anthropic/claude-opus-4.5");
 			it("should complete basic text generation", { retry: 3 }, async () => {
 				await basicTextGeneration(llm);
 			});
 			it("should handle tool calling", { retry: 3 }, async () => {
 				await handleToolCall(llm);
 			});
 			it("should handle streaming", { retry: 3 }, async () => {
 				await handleStreaming(llm);
 			});
 			it("should handle image input", { retry: 3 }, async () => {
 				await handleImage(llm);
 			});
 			it("should handle multi-turn with tools", { retry: 3 }, async () => {
 				await multiTurn(llm);
 			});
 		},
 	);
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
 		"Vercel AI Gateway Provider (openai/gpt-5.1-codex-max via Anthropic Messages)",
 		() => {
 			const llm = getModel("vercel-ai-gateway", "openai/gpt-5.1-codex-max");
 			it("should complete basic text generation", { retry: 3 }, async () => {
 				await basicTextGeneration(llm);
 			});
 			it("should handle tool calling", { retry: 3 }, async () => {
 				await handleToolCall(llm);
 			});
 			it("should handle streaming", { retry: 3 }, async () => {
 				await handleStreaming(llm);
 			});
 			it("should handle image input", { retry: 3 }, async () => {
 				await handleImage(llm);
 			});
 			it("should handle multi-turn with tools", { retry: 3 }, async () => {
 				await multiTurn(llm);
 			});
 		},
 	);
 	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via OpenAI Completions)", () => {
 		const llm = getModel("zai", "glm-4.5-air");
@ -698,6 +780,30 @@ describe("Generate E2E Tests", () => {
 		});
 	});
 	describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider (MiniMax-M2.1 via Anthropic Messages)", () => {
 		const llm = getModel("minimax", "MiniMax-M2.1");
 		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 		it("should handle thinking mode", { retry: 3 }, async () => {
 			await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
 		});
 		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
 		});
 	});
 	// =========================================================================
 	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
 	// Tokens are resolved at module level (see oauthTokens above)
@ -907,6 +1013,34 @@ describe("Generate E2E Tests", () => {
 		});
 	});
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider (claude-sonnet-4-5)", () => {
 		const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 		it("should handle thinking", { retry: 3 }, async () => {
 			await handleThinking(llm, { reasoning: "medium" });
 		});
 		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { reasoning: "high" });
 		});
 		it("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm);
 		});
 	});
 	// Check if ollama is installed and local LLM tests are enabled
 	let ollamaInstalled = false;
 	if (!process.env.PI_NO_LOCAL_LLM) {
--- a/packages/ai/test/tokens.test.ts
+++ b/packages/ai/test/tokens.test.ts
@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
 import { stream } from "../src/stream.js";
 import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { resolveApiKey } from "./oauth.js";
 // Resolve OAuth tokens at module level (async, runs before tests)
@ -44,18 +45,25 @@ async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: Op
 	expect(msg.stopReason).toBe("aborted");
-	// OpenAI providers, OpenAI Codex, Gemini CLI, zai, and the GPT-OSS model on Antigravity only send usage in the final chunk,
+	// OpenAI providers, OpenAI Codex, Gemini CLI, zai, Amazon Bedrock, and the GPT-OSS model on Antigravity only send usage in the final chunk,
-	// so when aborted they have no token stats Anthropic and Google send usage information early in the stream
+	// so when aborted they have no token stats. Anthropic and Google send usage information early in the stream.
 	// MiniMax reports input tokens but not output tokens when aborted.
 	if (
 		llm.api === "openai-completions" ||
 		llm.api === "openai-responses" ||
 		llm.api === "openai-codex-responses" ||
 		llm.provider === "google-gemini-cli" ||
 		llm.provider === "zai" ||
 		llm.provider === "amazon-bedrock" ||
 		llm.provider === "vercel-ai-gateway" ||
 		(llm.provider === "google-antigravity" && llm.id.includes("gpt-oss"))
 	) {
 		expect(msg.usage.input).toBe(0);
 		expect(msg.usage.output).toBe(0);
 	} else if (llm.provider === "minimax") {
 		// MiniMax reports input tokens early but output tokens only in final chunk
 		expect(msg.usage.input).toBeGreaterThan(0);
 		expect(msg.usage.output).toBe(0);
 	} else {
 		expect(msg.usage.input).toBeGreaterThan(0);
 		expect(msg.usage.output).toBeGreaterThan(0);
@ -144,6 +152,22 @@ describe("Token Statistics on Abort", () => {
 		});
 	});
 	describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider", () => {
 		const llm = getModel("minimax", "MiniMax-M2.1");
 		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
 			await testTokensOnAbort(llm);
 		});
 	});
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider", () => {
 		const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
 			await testTokensOnAbort(llm);
 		});
 	});
 	// =========================================================================
 	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
 	// =========================================================================
@ -230,4 +254,12 @@ describe("Token Statistics on Abort", () => {
 			},
 		);
 	});
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider", () => {
 		const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
 			await testTokensOnAbort(llm);
 		});
 	});
 });
--- a/packages/ai/test/tool-call-without-result.test.ts
+++ b/packages/ai/test/tool-call-without-result.test.ts
@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
 import { complete } from "../src/stream.js";
 import type { Api, Context, Model, OptionsForApi, Tool } from "../src/types.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { resolveApiKey } from "./oauth.js";
 // Resolve OAuth tokens at module level (async, runs before tests)
@ -170,6 +171,30 @@ describe("Tool Call Without Result Tests", () => {
 		});
 	});
 	describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider", () => {
 		const model = getModel("minimax", "MiniMax-M2.1");
 		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testToolCallWithoutResult(model);
 		});
 	});
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider", () => {
 		const model = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testToolCallWithoutResult(model);
 		});
 	});
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider", () => {
 		const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testToolCallWithoutResult(model);
 		});
 	});
 	// =========================================================================
 	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
 	// =========================================================================
--- a/packages/ai/test/total-tokens.test.ts
+++ b/packages/ai/test/total-tokens.test.ts
@ -16,6 +16,7 @@ import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
 import { complete } from "../src/stream.js";
 import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { resolveApiKey } from "./oauth.js";
 // Resolve OAuth tokens at module level (async, runs before tests)
@ -324,6 +325,52 @@ describe("totalTokens field", () => {
 		);
 	});
 	// =========================================================================
 	// MiniMax
 	// =========================================================================
 	describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax", () => {
 		it(
 			"MiniMax-M2.1 - should return totalTokens equal to sum of components",
 			{ retry: 3, timeout: 60000 },
 			async () => {
 				const llm = getModel("minimax", "MiniMax-M2.1");
 				console.log(`\nMiniMax / ${llm.id}:`);
 				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MINIMAX_API_KEY });
 				logUsage("First request", first);
 				logUsage("Second request", second);
 				assertTotalTokensEqualsComponents(first);
 				assertTotalTokensEqualsComponents(second);
 			},
 		);
 	});
 	// =========================================================================
 	// Vercel AI Gateway
 	// =========================================================================
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway", () => {
 		it(
 			"google/gemini-2.5-flash - should return totalTokens equal to sum of components",
 			{ retry: 3, timeout: 60000 },
 			async () => {
 				const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 				console.log(`\nVercel AI Gateway / ${llm.id}:`);
 				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.AI_GATEWAY_API_KEY });
 				logUsage("First request", first);
 				logUsage("Second request", second);
 				assertTotalTokensEqualsComponents(first);
 				assertTotalTokensEqualsComponents(second);
 			},
 		);
 	});
 	// =========================================================================
 	// OpenRouter - Multiple backend providers
 	// =========================================================================
@ -535,6 +582,25 @@ describe("totalTokens field", () => {
 		);
 	});
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock", () => {
 		it(
 			"claude-sonnet-4-5 - should return totalTokens equal to sum of components",
 			{ retry: 3, timeout: 60000 },
 			async () => {
 				const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 				console.log(`\nAmazon Bedrock / ${llm.id}:`);
 				const { first, second } = await testTotalTokensWithCache(llm);
 				logUsage("First request", first);
 				logUsage("Second request", second);
 				assertTotalTokensEqualsComponents(first);
 				assertTotalTokensEqualsComponents(second);
 			},
 		);
 	});
 	// =========================================================================
 	// OpenAI Codex (OAuth)
 	// =========================================================================
--- a/packages/ai/test/unicode-surrogate.test.ts
+++ b/packages/ai/test/unicode-surrogate.test.ts
@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
 import { complete } from "../src/stream.js";
 import type { Api, Context, Model, OptionsForApi, ToolResultMessage } from "../src/types.js";
 import { hasBedrockCredentials } from "./bedrock-utils.js";
 import { resolveApiKey } from "./oauth.js";
 // Empty schema for test tools - must be proper OBJECT type for Cloud Code Assist
@ -617,6 +618,54 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
 		});
 	});
 	describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider Unicode Handling", () => {
 		const llm = getModel("minimax", "MiniMax-M2.1");
 		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
 	describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider Unicode Handling", () => {
 		const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
 		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
 	describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider Unicode Handling", () => {
 		const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
 		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
 	describe("OpenAI Codex Provider Unicode Handling", () => {
 		it.skipIf(!openaiCodexToken)(
 			"gpt-5.2-codex - should handle emoji in tool results",
--- a/packages/coding-agent/.gitignore
+++ b/packages/coding-agent/.gitignore
@ -0,0 +1 @@
 *.bun-build
--- a/packages/coding-agent/CHANGELOG.md
+++ b/packages/coding-agent/CHANGELOG.md
@ -2,6 +2,60 @@
 ## [Unreleased]
 ## [0.45.5] - 2026-01-13
 ### Fixed
 - Skip changelog display on fresh install (only show on upgrades)
 ## [0.45.4] - 2026-01-13
 ### Changed
 - Light theme colors adjusted for WCAG AA compliance (4.5:1 contrast ratio against white backgrounds)
 - Replaced `sharp` with `wasm-vips` for image processing (resize, PNG conversion). Eliminates native build requirements that caused installation failures on some systems. ([#696](https://github.com/badlogic/pi-mono/issues/696))
 ### Added
 - Extension example: `summarize.ts` for summarizing conversations using custom UI and an external model ([#684](https://github.com/badlogic/pi-mono/pull/684) by [@scutifer](https://github.com/scutifer))
 - Extension example: `question.ts` enhanced with custom UI for asking user questions ([#693](https://github.com/badlogic/pi-mono/pull/693) by [@ferologics](https://github.com/ferologics))
 - Extension example: `plan-mode/` enhanced with explicit step tracking and progress widget ([#694](https://github.com/badlogic/pi-mono/pull/694) by [@ferologics](https://github.com/ferologics))
 - Extension example: `questionnaire.ts` for multi-question input with tab bar navigation ([#695](https://github.com/badlogic/pi-mono/pull/695) by [@ferologics](https://github.com/ferologics))
 - Experimental Vercel AI Gateway provider support: set `AI_GATEWAY_API_KEY` and use `--provider vercel-ai-gateway`. Token usage is currently reported incorrectly by Anthropic Messages compatible endpoint. ([#689](https://github.com/badlogic/pi-mono/pull/689) by [@timolins](https://github.com/timolins))
 ### Fixed
 - Fix API key resolution after model switches by using provider argument ([#691](https://github.com/badlogic/pi-mono/pull/691) by [@joshp123](https://github.com/joshp123))
 - Fixed z.ai thinking/reasoning: thinking toggle now correctly enables/disables thinking for z.ai models ([#688](https://github.com/badlogic/pi-mono/issues/688))
 - Fixed extension loading in compiled Bun binary: extensions with local file imports now work correctly. Updated `@mariozechner/jiti` to v2.6.5 which bundles babel for Bun binary compatibility. ([#681](https://github.com/badlogic/pi-mono/issues/681))
 - Fixed theme loading when installed via mise: use wrapper directory in release tarballs for compatibility with mise's `strip_components=1` extraction. ([#681](https://github.com/badlogic/pi-mono/issues/681))
 ## [0.45.3] - 2026-01-13
 ## [0.45.2] - 2026-01-13
 ### Fixed
 - Extensions now load correctly in compiled Bun binary using `@mariozechner/jiti` fork with `virtualModules` support. Bundled packages (`@sinclair/typebox`, `@mariozechner/pi-tui`, `@mariozechner/pi-ai`, `@mariozechner/pi-coding-agent`) are accessible to extensions without filesystem node_modules.
 ## [0.45.1] - 2026-01-13
 ### Changed
 - `/share` now outputs `buildwithpi.ai` session preview URLs instead of `shittycodingagent.ai`
 ## [0.45.0] - 2026-01-13
 ### Added
 - MiniMax provider support: set `MINIMAX_API_KEY` and use `minimax/MiniMax-M2.1` ([#656](https://github.com/badlogic/pi-mono/pull/656) by [@dannote](https://github.com/dannote))
 - `/scoped-models`: Alt+Up/Down to reorder enabled models. Order is preserved when saving with Ctrl+S and determines Ctrl+P cycling order. ([#676](https://github.com/badlogic/pi-mono/pull/676) by [@thomasmhr](https://github.com/thomasmhr))
 - Amazon Bedrock provider support (experimental, tested with Anthropic Claude models only) ([#494](https://github.com/badlogic/pi-mono/pull/494) by [@unexge](https://github.com/unexge))
 - Extension example: `sandbox/` for OS-level bash sandboxing using `@anthropic-ai/sandbox-runtime` with per-project config ([#673](https://github.com/badlogic/pi-mono/pull/673) by [@dannote](https://github.com/dannote))
 - Print mode JSON output now emits the session header as the first line.
 ## [0.44.0] - 2026-01-12
 ### Breaking Changes
 - `pi.getAllTools()` now returns `ToolInfo[]` (with `name` and `description`) instead of `string[]`. Extensions that only need names can use `.map(t => t.name)`. ([#648](https://github.com/badlogic/pi-mono/pull/648) by [@carsonfarmer](https://github.com/carsonfarmer))
--- a/packages/coding-agent/README.md
+++ b/packages/coding-agent/README.md
@ -166,7 +166,9 @@ Add API keys to `~/.pi/agent/auth.json`:
 | Cerebras | `cerebras` | `CEREBRAS_API_KEY` |
 | xAI | `xai` | `XAI_API_KEY` |
 | OpenRouter | `openrouter` | `OPENROUTER_API_KEY` |
 | Vercel AI Gateway | `vercel-ai-gateway` | `AI_GATEWAY_API_KEY` |
 | ZAI | `zai` | `ZAI_API_KEY` |
 | MiniMax | `minimax` | `MINIMAX_API_KEY` |
 Auth file keys take priority over environment variables.
@ -211,6 +213,29 @@ Credentials stored in `~/.pi/agent/auth.json`. Use `/logout` to clear.
 - **Token expired / refresh failed:** Run `/login` again for the provider to refresh credentials.
 - **Usage limits (429):** Wait for the reset window; pi will surface a friendly message with the approximate retry time.
 **Amazon Bedrock:**
 Amazon Bedrock supports multiple authentication methods:
 ```bash
 # Option 1: AWS Profile (from ~/.aws/credentials)
 export AWS_PROFILE=your-profile-name
 # Option 2: IAM Access Keys
 export AWS_ACCESS_KEY_ID=AKIA...
 export AWS_SECRET_ACCESS_KEY=...
 # Option 3: Bedrock API Key (bearer token)
 export AWS_BEARER_TOKEN_BEDROCK=...
 # Optional: Set region (defaults to us-east-1)
 export AWS_REGION=us-east-1
 pi --provider amazon-bedrock --model global.anthropic.claude-sonnet-4-5-20250929-v1:0
 ```
 See [Supported foundation models in Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html).
 ### Quick Start
 ```bash
@ -1119,7 +1144,7 @@ pi [options] [@files...] [messages...]
 | Option | Description |
 |--------|-------------|
-| `--provider <name>` | Provider: `anthropic`, `openai`, `openai-codex`, `google`, `mistral`, `xai`, `groq`, `cerebras`, `openrouter`, `zai`, `github-copilot`, `google-gemini-cli`, `google-antigravity`, or custom |
+| `--provider <name>` | Provider: `anthropic`, `openai`, `openai-codex`, `google`, `google-vertex`, `amazon-bedrock`, `mistral`, `xai`, `groq`, `cerebras`, `openrouter`, `vercel-ai-gateway`, `zai`, `minimax`, `github-copilot`, `google-gemini-cli`, `google-antigravity`, or custom |
 | `--model <id>` | Model ID |
 | `--api-key <key>` | API key (overrides environment) |
 | `--system-prompt <text\|file>` | Custom system prompt (text or file path) |
--- a/packages/coding-agent/docs/extensions.md
+++ b/packages/coding-agent/docs/extensions.md
@ -18,6 +18,7 @@ Extensions are TypeScript modules that extend pi's behavior. They can subscribe
 - Git checkpointing (stash at each turn, restore on branch)
 - Path protection (block writes to `.env`, `node_modules/`)
 - Custom compaction (summarize conversation your way)
 - Conversation summaries (see `summarize.ts` example)
 - Interactive tools (questions, wizards, custom dialogs)
 - Stateful tools (todo lists, connection pools)
 - External integrations (file watchers, webhooks, CI triggers)
@ -438,7 +439,7 @@ pi.on("before_agent_start", async (event, ctx) => {
 });
 ```
-**Examples:** [claude-rules.ts](../examples/extensions/claude-rules.ts), [pirate.ts](../examples/extensions/pirate.ts), [plan-mode.ts](../examples/extensions/plan-mode.ts), [preset.ts](../examples/extensions/preset.ts), [ssh.ts](../examples/extensions/ssh.ts)
+**Examples:** [claude-rules.ts](../examples/extensions/claude-rules.ts), [pirate.ts](../examples/extensions/pirate.ts), [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [preset.ts](../examples/extensions/preset.ts), [ssh.ts](../examples/extensions/ssh.ts)
 #### agent_start / agent_end
@ -452,7 +453,7 @@ pi.on("agent_end", async (event, ctx) => {
 });
 ```
-**Examples:** [chalk-logger.ts](../examples/extensions/chalk-logger.ts), [git-checkpoint.ts](../examples/extensions/git-checkpoint.ts), [plan-mode.ts](../examples/extensions/plan-mode.ts)
+**Examples:** [chalk-logger.ts](../examples/extensions/chalk-logger.ts), [git-checkpoint.ts](../examples/extensions/git-checkpoint.ts), [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts)
 #### turn_start / turn_end
@ -468,7 +469,7 @@ pi.on("turn_end", async (event, ctx) => {
 });
 ```
-**Examples:** [git-checkpoint.ts](../examples/extensions/git-checkpoint.ts), [plan-mode.ts](../examples/extensions/plan-mode.ts), [status-line.ts](../examples/extensions/status-line.ts)
+**Examples:** [git-checkpoint.ts](../examples/extensions/git-checkpoint.ts), [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [status-line.ts](../examples/extensions/status-line.ts)
 #### context
@ -482,7 +483,7 @@ pi.on("context", async (event, ctx) => {
 });
 ```
-**Examples:** [plan-mode.ts](../examples/extensions/plan-mode.ts)
+**Examples:** [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts)
 ### Model Events
@ -527,7 +528,7 @@ pi.on("tool_call", async (event, ctx) => {
 });
 ```
-**Examples:** [chalk-logger.ts](../examples/extensions/chalk-logger.ts), [permission-gate.ts](../examples/extensions/permission-gate.ts), [plan-mode.ts](../examples/extensions/plan-mode.ts), [protected-paths.ts](../examples/extensions/protected-paths.ts)
+**Examples:** [chalk-logger.ts](../examples/extensions/chalk-logger.ts), [permission-gate.ts](../examples/extensions/permission-gate.ts), [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [protected-paths.ts](../examples/extensions/protected-paths.ts)
 #### tool_result
@ -549,7 +550,7 @@ pi.on("tool_result", async (event, ctx) => {
 });
 ```
-**Examples:** [git-checkpoint.ts](../examples/extensions/git-checkpoint.ts), [plan-mode.ts](../examples/extensions/plan-mode.ts)
+**Examples:** [git-checkpoint.ts](../examples/extensions/git-checkpoint.ts), [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts)
 ### User Bash Events
@ -723,7 +724,7 @@ pi.registerTool({
 });
 ```
-**Examples:** [hello.ts](../examples/extensions/hello.ts), [question.ts](../examples/extensions/question.ts), [todo.ts](../examples/extensions/todo.ts), [truncated-tool.ts](../examples/extensions/truncated-tool.ts)
+**Examples:** [hello.ts](../examples/extensions/hello.ts), [question.ts](../examples/extensions/question.ts), [questionnaire.ts](../examples/extensions/questionnaire.ts), [todo.ts](../examples/extensions/todo.ts), [truncated-tool.ts](../examples/extensions/truncated-tool.ts)
 ### pi.sendMessage(message, options?)
@ -748,7 +749,7 @@ pi.sendMessage({
  - `"nextTurn"` - Queued for next user prompt. Does not interrupt or trigger anything.
 - `triggerTurn: true` - If agent is idle, trigger an LLM response immediately. Only applies to `"steer"` and `"followUp"` modes (ignored for `"nextTurn"`).
-**Examples:** [file-trigger.ts](../examples/extensions/file-trigger.ts), [plan-mode.ts](../examples/extensions/plan-mode.ts)
+**Examples:** [file-trigger.ts](../examples/extensions/file-trigger.ts), [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts)
 ### pi.sendUserMessage(content, options?)
@ -795,7 +796,7 @@ pi.on("session_start", async (_event, ctx) => {
 });
 ```
-**Examples:** [plan-mode.ts](../examples/extensions/plan-mode.ts), [preset.ts](../examples/extensions/preset.ts), [snake.ts](../examples/extensions/snake.ts), [tools.ts](../examples/extensions/tools.ts)
+**Examples:** [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [preset.ts](../examples/extensions/preset.ts), [snake.ts](../examples/extensions/snake.ts), [tools.ts](../examples/extensions/tools.ts)
 ### pi.setSessionName(name)
@ -830,7 +831,7 @@ pi.registerCommand("stats", {
 });
 ```
-**Examples:** [custom-footer.ts](../examples/extensions/custom-footer.ts), [custom-header.ts](../examples/extensions/custom-header.ts), [handoff.ts](../examples/extensions/handoff.ts), [pirate.ts](../examples/extensions/pirate.ts), [plan-mode.ts](../examples/extensions/plan-mode.ts), [preset.ts](../examples/extensions/preset.ts), [qna.ts](../examples/extensions/qna.ts), [send-user-message.ts](../examples/extensions/send-user-message.ts), [snake.ts](../examples/extensions/snake.ts), [todo.ts](../examples/extensions/todo.ts), [tools.ts](../examples/extensions/tools.ts)
+**Examples:** [custom-footer.ts](../examples/extensions/custom-footer.ts), [custom-header.ts](../examples/extensions/custom-header.ts), [handoff.ts](../examples/extensions/handoff.ts), [pirate.ts](../examples/extensions/pirate.ts), [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [preset.ts](../examples/extensions/preset.ts), [qna.ts](../examples/extensions/qna.ts), [send-user-message.ts](../examples/extensions/send-user-message.ts), [snake.ts](../examples/extensions/snake.ts), [summarize.ts](../examples/extensions/summarize.ts), [todo.ts](../examples/extensions/todo.ts), [tools.ts](../examples/extensions/tools.ts)
 ### pi.registerMessageRenderer(customType, renderer)
@ -849,7 +850,7 @@ pi.registerShortcut("ctrl+shift+p", {
 });
 ```
-**Examples:** [plan-mode.ts](../examples/extensions/plan-mode.ts), [preset.ts](../examples/extensions/preset.ts)
+**Examples:** [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [preset.ts](../examples/extensions/preset.ts)
 ### pi.registerFlag(name, options)
@ -868,7 +869,7 @@ if (pi.getFlag("--plan")) {
 }
 ```
-**Examples:** [plan-mode.ts](../examples/extensions/plan-mode.ts), [preset.ts](../examples/extensions/preset.ts)
+**Examples:** [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [preset.ts](../examples/extensions/preset.ts)
 ### pi.exec(command, args, options?)
@ -892,7 +893,7 @@ const names = all.map(t => t.name);  // Just names if needed
 pi.setActiveTools(["read", "bash"]); // Switch to read-only
 ```
-**Examples:** [plan-mode.ts](../examples/extensions/plan-mode.ts), [preset.ts](../examples/extensions/preset.ts), [tools.ts](../examples/extensions/tools.ts)
+**Examples:** [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [preset.ts](../examples/extensions/preset.ts), [tools.ts](../examples/extensions/tools.ts)
 ### pi.setModel(model)
@ -1243,7 +1244,7 @@ ctx.ui.notify("Done!", "info");  // "info" | "warning" | "error"
 ```
 **Examples:**
- `ctx.ui.select()`: [confirm-destructive.ts](../examples/extensions/confirm-destructive.ts), [dirty-repo-guard.ts](../examples/extensions/dirty-repo-guard.ts), [git-checkpoint.ts](../examples/extensions/git-checkpoint.ts), [permission-gate.ts](../examples/extensions/permission-gate.ts), [plan-mode.ts](../examples/extensions/plan-mode.ts), [question.ts](../examples/extensions/question.ts)
+- `ctx.ui.select()`: [confirm-destructive.ts](../examples/extensions/confirm-destructive.ts), [dirty-repo-guard.ts](../examples/extensions/dirty-repo-guard.ts), [git-checkpoint.ts](../examples/extensions/git-checkpoint.ts), [permission-gate.ts](../examples/extensions/permission-gate.ts), [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [question.ts](../examples/extensions/question.ts), [questionnaire.ts](../examples/extensions/questionnaire.ts)
 - `ctx.ui.confirm()`: [confirm-destructive.ts](../examples/extensions/confirm-destructive.ts)
 - `ctx.ui.editor()`: [handoff.ts](../examples/extensions/handoff.ts)
 - `ctx.ui.setEditorText()`: [handoff.ts](../examples/extensions/handoff.ts), [qna.ts](../examples/extensions/qna.ts)
@ -1345,8 +1346,8 @@ ctx.ui.theme.fg("accent", "styled text");  // Access current theme
 ```
 **Examples:**
- `ctx.ui.setStatus()`: [plan-mode.ts](../examples/extensions/plan-mode.ts), [preset.ts](../examples/extensions/preset.ts), [status-line.ts](../examples/extensions/status-line.ts)
+- `ctx.ui.setStatus()`: [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [preset.ts](../examples/extensions/preset.ts), [status-line.ts](../examples/extensions/status-line.ts)
- `ctx.ui.setWidget()`: [plan-mode.ts](../examples/extensions/plan-mode.ts)
+- `ctx.ui.setWidget()`: [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts)
 - `ctx.ui.setFooter()`: [custom-footer.ts](../examples/extensions/custom-footer.ts)
 - `ctx.ui.setHeader()`: [custom-header.ts](../examples/extensions/custom-header.ts)
 - `ctx.ui.setEditorComponent()`: [modal-editor.ts](../examples/extensions/modal-editor.ts)
@ -1395,9 +1396,22 @@ const result = await ctx.ui.custom<string | null>(
 );
 ```
-Overlay components should define a `width` property to control their size. The overlay is centered by default. See [overlay-test.ts](../examples/extensions/overlay-test.ts) for a complete example.
+For advanced positioning (anchors, margins, percentages, responsive visibility), pass `overlayOptions`. Use `onHandle` to control visibility programmatically:
-**Examples:** [handoff.ts](../examples/extensions/handoff.ts), [plan-mode.ts](../examples/extensions/plan-mode.ts), [preset.ts](../examples/extensions/preset.ts), [qna.ts](../examples/extensions/qna.ts), [snake.ts](../examples/extensions/snake.ts), [todo.ts](../examples/extensions/todo.ts), [tools.ts](../examples/extensions/tools.ts), [overlay-test.ts](../examples/extensions/overlay-test.ts)
+```typescript
 const result = await ctx.ui.custom<string | null>(
  (tui, theme, keybindings, done) => new MyOverlayComponent({ onClose: done }),
  {
    overlay: true,
    overlayOptions: { anchor: "top-right", width: "50%", margin: 2 },
    onHandle: (handle) => { /* handle.setHidden(true/false) */ }
  }
 );
 ```
 See [tui.md](tui.md) for the full `OverlayOptions` API and [overlay-qa-tests.ts](../examples/extensions/overlay-qa-tests.ts) for examples.
 **Examples:** [handoff.ts](../examples/extensions/handoff.ts), [plan-mode/index.ts](../examples/extensions/plan-mode/index.ts), [preset.ts](../examples/extensions/preset.ts), [qna.ts](../examples/extensions/qna.ts), [snake.ts](../examples/extensions/snake.ts), [summarize.ts](../examples/extensions/summarize.ts), [todo.ts](../examples/extensions/todo.ts), [tools.ts](../examples/extensions/tools.ts), [overlay-test.ts](../examples/extensions/overlay-test.ts)
 ### Custom Editor
--- a/packages/coding-agent/docs/sdk.md
+++ b/packages/coding-agent/docs/sdk.md
@ -735,12 +735,12 @@ import {
  discoverAuthStorage,
  discoverModels,
  discoverSkills,
-  discoverHooks,
+  discoverExtensions,
  discoverCustomTools,
  discoverContextFiles,
  discoverPromptTemplates,
  loadSettings,
  buildSystemPrompt,
  createEventBus,
 } from "@mariozechner/pi-coding-agent";
 // Auth and Models
@ -754,19 +754,16 @@ const builtIn = getModel("anthropic", "claude-opus-4-5"); // Built-in only
 // Skills
 const { skills, warnings } = discoverSkills(cwd, agentDir, skillsSettings);
-// Hooks (async - loads TypeScript)
+// Extensions (async - loads TypeScript)
-// Pass eventBus to share pi.events across hooks/tools
+// Pass eventBus to share pi.events across extensions
 const eventBus = createEventBus();
-const hooks = await discoverHooks(eventBus, cwd, agentDir);
+const { extensions, errors } = await discoverExtensions(eventBus, cwd, agentDir);
 // Custom tools (async - loads TypeScript)
 const tools = await discoverCustomTools(eventBus, cwd, agentDir);
 // Context files
 const contextFiles = discoverContextFiles(cwd, agentDir);
 // Prompt templates
-const commands = discoverPromptTemplates(cwd, agentDir);
+const templates = discoverPromptTemplates(cwd, agentDir);
 // Settings (global + project merged)
 const settings = loadSettings(cwd, agentDir);
@ -816,8 +813,8 @@ import {
  SettingsManager,
  readTool,
  bashTool,
-  type HookFactory,
+  type ExtensionFactory,
-  type CustomTool,
+  type ToolDefinition,
 } from "@mariozechner/pi-coding-agent";
 // Set up auth storage (custom location)
@ -831,16 +828,16 @@ if (process.env.MY_KEY) {
 // Model registry (no custom models.json)
 const modelRegistry = new ModelRegistry(authStorage);
-// Inline hook
+// Inline extension
-const auditHook: HookFactory = (api) => {
+const auditExtension: ExtensionFactory = (pi) => {
-  api.on("tool_call", async (event) => {
+  pi.on("tool_call", async (event) => {
    console.log(`[Audit] ${event.toolName}`);
    return undefined;
  });
 };
 // Inline tool
-const statusTool: CustomTool = {
+const statusTool: ToolDefinition = {
  name: "status",
  label: "Status",
  description: "Get system status",
@ -872,8 +869,8 @@ const { session } = await createAgentSession({
  systemPrompt: "You are a minimal assistant. Be concise.",
  tools: [readTool, bashTool],
-  customTools: [{ tool: statusTool }],
+  customTools: [statusTool],
-  hooks: [{ factory: auditHook }],
+  extensions: [auditExtension],
  skills: [],
  contextFiles: [],
  promptTemplates: [],
@ -961,7 +958,7 @@ The SDK is preferred when:
 - You want type safety
 - You're in the same Node.js process
 - You need direct access to agent state
- You want to customize tools/hooks programmatically
+- You want to customize tools/extensions programmatically
 RPC mode is preferred when:
 - You're integrating from another language
@ -984,12 +981,11 @@ discoverModels
 // Discovery
 discoverSkills
-discoverHooks
+discoverExtensions
 discoverCustomTools
 discoverContextFiles
 discoverPromptTemplates
-// Event Bus (for shared hook/tool communication)
+// Event Bus (for shared extension communication)
 createEventBus
 // Helpers
@ -1015,8 +1011,9 @@ createGrepTool, createFindTool, createLsTool
 // Types
 type CreateAgentSessionOptions
 type CreateAgentSessionResult
-type CustomTool
+type ExtensionFactory
-type HookFactory
+type ExtensionAPI
 type ToolDefinition
 type Skill
 type PromptTemplate
 type Settings
@ -1024,28 +1021,4 @@ type SkillsSettings
 type Tool
 ```
-For hook types, import from the hooks subpath:
+For extension types, see [extensions.md](extensions.md) for the full API.
 ```typescript
 import type {
  HookAPI,
  HookMessage,
  HookFactory,
  HookEventContext,
  HookCommandContext,
  ToolCallEvent,
  ToolResultEvent,
 } from "@mariozechner/pi-coding-agent/hooks";
 ```
 For message utilities:
 ```typescript
 import { isHookMessage, createHookMessage } from "@mariozechner/pi-coding-agent";
 ```
 For config utilities:
 ```typescript
 import { getAgentDir } from "@mariozechner/pi-coding-agent/config";
 ```
--- a/packages/coding-agent/docs/tui.md
+++ b/packages/coding-agent/docs/tui.md
@ -48,6 +48,56 @@ async execute(toolCallId, params, onUpdate, ctx, signal) {
 }
 ```
 ## Overlays
 Overlays render components on top of existing content without clearing the screen. Pass `{ overlay: true }` to `ctx.ui.custom()`:
 ```typescript
 const result = await ctx.ui.custom<string | null>(
  (tui, theme, keybindings, done) => new MyDialog({ onClose: done }),
  { overlay: true }
 );
 ```
 For positioning and sizing, use `overlayOptions`:
 ```typescript
 const result = await ctx.ui.custom<string | null>(
  (tui, theme, keybindings, done) => new SidePanel({ onClose: done }),
  {
    overlay: true,
    overlayOptions: {
      // Size: number or percentage string
      width: "50%",          // 50% of terminal width
      minWidth: 40,          // minimum 40 columns
      maxHeight: "80%",      // max 80% of terminal height
      // Position: anchor-based (default: "center")
      anchor: "right-center", // 9 positions: center, top-left, top-center, etc.
      offsetX: -2,            // offset from anchor
      offsetY: 0,
      // Or percentage/absolute positioning
      row: "25%",            // 25% from top
      col: 10,               // column 10
      // Margins
      margin: 2,             // all sides, or { top, right, bottom, left }
      // Responsive: hide on narrow terminals
      visible: (termWidth, termHeight) => termWidth >= 80,
    },
    // Get handle for programmatic visibility control
    onHandle: (handle) => {
      // handle.setHidden(true/false) - toggle visibility
      // handle.hide() - permanently remove
    },
  }
 );
 ```
 See [overlay-qa-tests.ts](../examples/extensions/overlay-qa-tests.ts) for comprehensive examples covering anchors, margins, stacking, responsive visibility, and animation.
 ## Built-in Components
 Import from `@mariozechner/pi-tui`:
--- a/packages/coding-agent/examples/README.md
+++ b/packages/coding-agent/examples/README.md
@ -10,9 +10,12 @@ Programmatic usage via `createAgentSession()`. Shows how to customize models, pr
 ### [extensions/](extensions/)
 Example extensions demonstrating:
 - Lifecycle event handlers (tool interception, safety gates, context modifications)
- Custom tools (todo lists, subagents)
+- Custom tools (todo lists, questions, subagents, output truncation)
 - Commands and keyboard shortcuts
- External integrations (git, file watchers)
+- Custom UI (footers, headers, editors, overlays)
 - Git integration (checkpoints, auto-commit)
 - System prompt modifications and custom compaction
 - External integrations (SSH, file watchers, system theme sync)
 ## Documentation
--- a/packages/coding-agent/examples/extensions/README.md
+++ b/packages/coding-agent/examples/extensions/README.md
@ -22,6 +22,7 @@ cp permission-gate.ts ~/.pi/agent/extensions/
 | `protected-paths.ts` | Blocks writes to protected paths (.env, .git/, node_modules/) |
 | `confirm-destructive.ts` | Confirms before destructive session actions (clear, switch, fork) |
 | `dirty-repo-guard.ts` | Prevents session changes with uncommitted git changes |
 | `sandbox/` | OS-level sandboxing using `@anthropic-ai/sandbox-runtime` with per-project config |
 ### Custom Tools
@ -29,8 +30,10 @@ cp permission-gate.ts ~/.pi/agent/extensions/
 |-----------|-------------|
 | `todo.ts` | Todo list tool + `/todos` command with custom rendering and state persistence |
 | `hello.ts` | Minimal custom tool example |
-| `question.ts` | Demonstrates `ctx.ui.select()` for asking the user questions |
+| `question.ts` | Demonstrates `ctx.ui.select()` for asking the user questions with custom UI |
 | `questionnaire.ts` | Multi-question input with tab bar navigation between questions |
 | `tool-override.ts` | Override built-in tools (e.g., add logging/access control to `read`) |
 | `truncated-tool.ts` | Wraps ripgrep with proper output truncation (50KB/2000 lines) |
 | `ssh.ts` | Delegate all tools to a remote machine via SSH using pluggable operations |
 | `subagent/` | Delegate tasks to specialized subagents with isolated context windows |
@ -39,19 +42,26 @@ cp permission-gate.ts ~/.pi/agent/extensions/
 | Extension | Description |
 |-----------|-------------|
 | `preset.ts` | Named presets for model, thinking level, tools, and instructions via `--preset` flag and `/preset` command |
-| `plan-mode.ts` | Claude Code-style plan mode for read-only exploration with `/plan` command |
+| `plan-mode/` | Claude Code-style plan mode for read-only exploration with `/plan` command and step tracking |
 | `tools.ts` | Interactive `/tools` command to enable/disable tools with session persistence |
 | `handoff.ts` | Transfer context to a new focused session via `/handoff <goal>` |
 | `qna.ts` | Extracts questions from last response into editor via `ctx.ui.setEditorText()` |
 | `status-line.ts` | Shows turn progress in footer via `ctx.ui.setStatus()` with themed colors |
 | `model-status.ts` | Shows model changes in status bar via `model_select` hook |
 | `snake.ts` | Snake game with custom UI, keyboard handling, and session persistence |
 | `send-user-message.ts` | Demonstrates `pi.sendUserMessage()` for sending user messages from extensions |
 | `timed-confirm.ts` | Demonstrates AbortSignal for auto-dismissing `ctx.ui.confirm()` and `ctx.ui.select()` dialogs |
 | `modal-editor.ts` | Custom vim-like modal editor via `ctx.ui.setEditorComponent()` |
 | `rainbow-editor.ts` | Animated rainbow text effect via custom editor |
 | `notify.ts` | Desktop notifications via OSC 777 when agent finishes (Ghostty, iTerm2, WezTerm) |
 | `summarize.ts` | Summarize conversation with GPT-5.2 and show in transient UI |
 | `custom-footer.ts` | Custom footer with git branch and token stats via `ctx.ui.setFooter()` |
 | `custom-header.ts` | Custom header via `ctx.ui.setHeader()` |
 | `overlay-test.ts` | Test overlay compositing with inline text inputs and edge cases |
 | `overlay-qa-tests.ts` | Comprehensive overlay QA tests: anchors, margins, stacking, overflow, animation |
 | `doom-overlay/` | DOOM game running as an overlay at 35 FPS (demonstrates real-time game rendering) |
 | `shutdown-command.ts` | Adds `/quit` command demonstrating `ctx.shutdown()` |
 | `interactive-shell.ts` | Run interactive commands (vim, htop) with full terminal via `user_bash` hook |
 ### Git Integration
@ -65,8 +75,15 @@ cp permission-gate.ts ~/.pi/agent/extensions/
 | Extension | Description |
 |-----------|-------------|
 | `pirate.ts` | Demonstrates `systemPromptAppend` to dynamically modify system prompt |
 | `claude-rules.ts` | Scans `.claude/rules/` folder and lists rules in system prompt |
 | `custom-compaction.ts` | Custom compaction that summarizes entire conversation |
 ### System Integration
 | Extension | Description |
 |-----------|-------------|
 | `mac-system-theme.ts` | Syncs pi theme with macOS dark/light mode |
 ### External Dependencies
 | Extension | Description |
--- a/packages/coding-agent/examples/extensions/plan-mode.ts
+++ b/packages/coding-agent/examples/extensions/plan-mode.ts
@ -1,548 +0,0 @@
 /**
 * Plan Mode Extension
 *
 * Provides a Claude Code-style "plan mode" for safe code exploration.
 * When enabled, the agent can only use read-only tools and cannot modify files.
 *
 * Features:
 * - /plan command to toggle plan mode
 * - In plan mode: only read, bash (read-only), grep, find, ls are available
 * - Injects system context telling the agent about the restrictions
 * - After each agent response, prompts to execute the plan or continue planning
 * - Shows "plan" indicator in footer when active
 * - Extracts todo list from plan and tracks progress during execution
 * - Uses ID-based tracking: agent outputs [DONE:id] to mark steps complete
 *
 * Usage:
 * 1. Copy this file to ~/.pi/agent/extensions/ or your project's .pi/extensions/
 * 2. Use /plan to toggle plan mode on/off
 * 3. Or start in plan mode with --plan flag
 */
 import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
 import { Key } from "@mariozechner/pi-tui";
 // Read-only tools for plan mode
 const PLAN_MODE_TOOLS = ["read", "bash", "grep", "find", "ls"];
 // Full set of tools for normal mode
 const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"];
 // Patterns for destructive bash commands that should be blocked in plan mode
 const DESTRUCTIVE_PATTERNS = [
 	/\brm\b/i,
 	/\brmdir\b/i,
 	/\bmv\b/i,
 	/\bcp\b/i,
 	/\bmkdir\b/i,
 	/\btouch\b/i,
 	/\bchmod\b/i,
 	/\bchown\b/i,
 	/\bchgrp\b/i,
 	/\bln\b/i,
 	/\btee\b/i,
 	/\btruncate\b/i,
 	/\bdd\b/i,
 	/\bshred\b/i,
 	/[^<]>(?!>)/,
 	/>>/,
 	/\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
 	/\byarn\s+(add|remove|install|publish)/i,
 	/\bpnpm\s+(add|remove|install|publish)/i,
 	/\bpip\s+(install|uninstall)/i,
 	/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
 	/\bbrew\s+(install|uninstall|upgrade)/i,
 	/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout\s+-b|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i,
 	/\bsudo\b/i,
 	/\bsu\b/i,
 	/\bkill\b/i,
 	/\bpkill\b/i,
 	/\bkillall\b/i,
 	/\breboot\b/i,
 	/\bshutdown\b/i,
 	/\bsystemctl\s+(start|stop|restart|enable|disable)/i,
 	/\bservice\s+\S+\s+(start|stop|restart)/i,
 	/\b(vim?|nano|emacs|code|subl)\b/i,
 ];
 // Read-only commands that are always safe
 const SAFE_COMMANDS = [
 	/^\s*cat\b/,
 	/^\s*head\b/,
 	/^\s*tail\b/,
 	/^\s*less\b/,
 	/^\s*more\b/,
 	/^\s*grep\b/,
 	/^\s*find\b/,
 	/^\s*ls\b/,
 	/^\s*pwd\b/,
 	/^\s*echo\b/,
 	/^\s*printf\b/,
 	/^\s*wc\b/,
 	/^\s*sort\b/,
 	/^\s*uniq\b/,
 	/^\s*diff\b/,
 	/^\s*file\b/,
 	/^\s*stat\b/,
 	/^\s*du\b/,
 	/^\s*df\b/,
 	/^\s*tree\b/,
 	/^\s*which\b/,
 	/^\s*whereis\b/,
 	/^\s*type\b/,
 	/^\s*env\b/,
 	/^\s*printenv\b/,
 	/^\s*uname\b/,
 	/^\s*whoami\b/,
 	/^\s*id\b/,
 	/^\s*date\b/,
 	/^\s*cal\b/,
 	/^\s*uptime\b/,
 	/^\s*ps\b/,
 	/^\s*top\b/,
 	/^\s*htop\b/,
 	/^\s*free\b/,
 	/^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
 	/^\s*git\s+ls-/i,
 	/^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
 	/^\s*yarn\s+(list|info|why|audit)/i,
 	/^\s*node\s+--version/i,
 	/^\s*python\s+--version/i,
 	/^\s*curl\s/i,
 	/^\s*wget\s+-O\s*-/i,
 	/^\s*jq\b/,
 	/^\s*sed\s+-n/i,
 	/^\s*awk\b/,
 	/^\s*rg\b/,
 	/^\s*fd\b/,
 	/^\s*bat\b/,
 	/^\s*exa\b/,
 ];
 function isSafeCommand(command: string): boolean {
 	if (SAFE_COMMANDS.some((pattern) => pattern.test(command))) {
 		if (!DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) {
 			return true;
 		}
 	}
 	if (DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) {
 		return false;
 	}
 	return true;
 }
 // Todo item with step number
 interface TodoItem {
 	step: number;
 	text: string;
 	completed: boolean;
 }
 /**
 * Clean up extracted step text for display.
 */
 function cleanStepText(text: string): string {
 	let cleaned = text
 		// Remove markdown bold/italic
 		.replace(/\*{1,2}([^*]+)\*{1,2}/g, "$1")
 		// Remove markdown code
 		.replace(/`([^`]+)`/g, "$1")
 		// Remove leading action words that are redundant
 		.replace(
 			/^(Use|Run|Execute|Create|Write|Read|Check|Verify|Update|Modify|Add|Remove|Delete|Install)\s+(the\s+)?/i,
 			"",
 		)
 		// Clean up extra whitespace
 		.replace(/\s+/g, " ")
 		.trim();
 	// Capitalize first letter
 	if (cleaned.length > 0) {
 		cleaned = cleaned.charAt(0).toUpperCase() + cleaned.slice(1);
 	}
 	// Truncate if too long
 	if (cleaned.length > 50) {
 		cleaned = `${cleaned.slice(0, 47)}...`;
 	}
 	return cleaned;
 }
 /**
 * Extract todo items from assistant message.
 */
 function extractTodoItems(message: string): TodoItem[] {
 	const items: TodoItem[] = [];
 	// Match numbered lists: "1. Task" or "1) Task" - also handle **bold** prefixes
 	const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm;
 	for (const match of message.matchAll(numberedPattern)) {
 		let text = match[2].trim();
 		text = text.replace(/\*{1,2}$/, "").trim();
 		// Skip if too short or looks like code/command
 		if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) {
 			const cleaned = cleanStepText(text);
 			if (cleaned.length > 3) {
 				items.push({ step: items.length + 1, text: cleaned, completed: false });
 			}
 		}
 	}
 	// If no numbered items, try bullet points
 	if (items.length === 0) {
 		const stepPattern = /^\s*[-*]\s*(?:Step\s*\d+[:.])?\s*\*{0,2}([^*\n]+)/gim;
 		for (const match of message.matchAll(stepPattern)) {
 			let text = match[1].trim();
 			text = text.replace(/\*{1,2}$/, "").trim();
 			if (text.length > 10 && !text.startsWith("`")) {
 				const cleaned = cleanStepText(text);
 				if (cleaned.length > 3) {
 					items.push({ step: items.length + 1, text: cleaned, completed: false });
 				}
 			}
 		}
 	}
 	return items;
 }
 export default function planModeExtension(pi: ExtensionAPI) {
 	let planModeEnabled = false;
 	let toolsCalledThisTurn = false;
 	let executionMode = false;
 	let todoItems: TodoItem[] = [];
 	// Register --plan CLI flag
 	pi.registerFlag("plan", {
 		description: "Start in plan mode (read-only exploration)",
 		type: "boolean",
 		default: false,
 	});
 	// Helper to update status displays
 	function updateStatus(ctx: ExtensionContext) {
 		if (executionMode && todoItems.length > 0) {
 			const completed = todoItems.filter((t) => t.completed).length;
 			ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`));
 		} else if (planModeEnabled) {
 			ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("warning", "⏸ plan"));
 		} else {
 			ctx.ui.setStatus("plan-mode", undefined);
 		}
 		// Show widget during execution (no IDs shown to user)
 		if (executionMode && todoItems.length > 0) {
 			const lines: string[] = [];
 			for (const item of todoItems) {
 				if (item.completed) {
 					lines.push(
 						ctx.ui.theme.fg("success", "☑ ") + ctx.ui.theme.fg("muted", ctx.ui.theme.strikethrough(item.text)),
 					);
 				} else {
 					lines.push(ctx.ui.theme.fg("muted", "☐ ") + item.text);
 				}
 			}
 			ctx.ui.setWidget("plan-todos", lines);
 		} else {
 			ctx.ui.setWidget("plan-todos", undefined);
 		}
 	}
 	function togglePlanMode(ctx: ExtensionContext) {
 		planModeEnabled = !planModeEnabled;
 		executionMode = false;
 		todoItems = [];
 		if (planModeEnabled) {
 			pi.setActiveTools(PLAN_MODE_TOOLS);
 			ctx.ui.notify(`Plan mode enabled. Tools: ${PLAN_MODE_TOOLS.join(", ")}`);
 		} else {
 			pi.setActiveTools(NORMAL_MODE_TOOLS);
 			ctx.ui.notify("Plan mode disabled. Full access restored.");
 		}
 		updateStatus(ctx);
 	}
 	// Register /plan command
 	pi.registerCommand("plan", {
 		description: "Toggle plan mode (read-only exploration)",
 		handler: async (_args, ctx) => {
 			togglePlanMode(ctx);
 		},
 	});
 	// Register /todos command
 	pi.registerCommand("todos", {
 		description: "Show current plan todo list",
 		handler: async (_args, ctx) => {
 			if (todoItems.length === 0) {
 				ctx.ui.notify("No todos. Create a plan first with /plan", "info");
 				return;
 			}
 			const todoList = todoItems
 				.map((item, i) => {
 					const checkbox = item.completed ? "✓" : "○";
 					return `${i + 1}. ${checkbox} ${item.text}`;
 				})
 				.join("\n");
 			ctx.ui.notify(`Plan Progress:\n${todoList}`, "info");
 		},
 	});
 	// Register Shift+P shortcut
 	pi.registerShortcut(Key.shift("p"), {
 		description: "Toggle plan mode",
 		handler: async (ctx) => {
 			togglePlanMode(ctx);
 		},
 	});
 	// Block destructive bash in plan mode
 	pi.on("tool_call", async (event) => {
 		if (!planModeEnabled) return;
 		if (event.toolName !== "bash") return;
 		const command = event.input.command as string;
 		if (!isSafeCommand(command)) {
 			return {
 				block: true,
 				reason: `Plan mode: destructive command blocked. Use /plan to disable plan mode first.\nCommand: ${command}`,
 			};
 		}
 	});
 	// Track step completion based on tool results
 	pi.on("tool_result", async (_event, ctx) => {
 		toolsCalledThisTurn = true;
 		if (!executionMode || todoItems.length === 0) return;
 		// Mark the first uncompleted step as done when any tool succeeds
 		const nextStep = todoItems.find((t) => !t.completed);
 		if (nextStep) {
 			nextStep.completed = true;
 			updateStatus(ctx);
 		}
 	});
 	// Filter out stale plan mode context messages from LLM context
 	// This ensures the agent only sees the CURRENT state (plan mode on/off)
 	pi.on("context", async (event) => {
 		// Only filter when NOT in plan mode (i.e., when executing)
 		if (planModeEnabled) {
 			return;
 		}
 		// Remove any previous plan-mode-context messages
 		const _beforeCount = event.messages.length;
 		const filtered = event.messages.filter((m) => {
 			if (m.role === "user" && Array.isArray(m.content)) {
 				const hasOldContext = m.content.some((c) => c.type === "text" && c.text.includes("[PLAN MODE ACTIVE]"));
 				if (hasOldContext) {
 					return false;
 				}
 			}
 			return true;
 		});
 		return { messages: filtered };
 	});
 	// Inject plan mode context
 	pi.on("before_agent_start", async () => {
 		if (!planModeEnabled && !executionMode) {
 			return;
 		}
 		if (planModeEnabled) {
 			return {
 				message: {
 					customType: "plan-mode-context",
 					content: `[PLAN MODE ACTIVE]
 You are in plan mode - a read-only exploration mode for safe code analysis.
 Restrictions:
 - You can only use: read, bash, grep, find, ls
 - You CANNOT use: edit, write (file modifications are disabled)
 - Bash is restricted to READ-ONLY commands
 - Focus on analysis, planning, and understanding the codebase
 Create a detailed numbered plan:
 1. First step description
 2. Second step description
 ...
 Do NOT attempt to make changes - just describe what you would do.`,
 					display: false,
 				},
 			};
 		}
 		if (executionMode && todoItems.length > 0) {
 			const remaining = todoItems.filter((t) => !t.completed);
 			const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n");
 			return {
 				message: {
 					customType: "plan-execution-context",
 					content: `[EXECUTING PLAN - Full tool access enabled]
 Remaining steps:
 ${todoList}
 Execute each step in order.`,
 					display: false,
 				},
 			};
 		}
 	});
 	// After agent finishes
 	pi.on("agent_end", async (event, ctx) => {
 		// In execution mode, check if all steps complete
 		if (executionMode && todoItems.length > 0) {
 			const allComplete = todoItems.every((t) => t.completed);
 			if (allComplete) {
 				// Show final completed list in chat
 				const completedList = todoItems.map((t) => `~~${t.text}~~`).join("\n");
 				pi.sendMessage(
 					{
 						customType: "plan-complete",
 						content: `**Plan Complete!** ✓\n\n${completedList}`,
 						display: true,
 					},
 					{ triggerTurn: false },
 				);
 				executionMode = false;
 				todoItems = [];
 				pi.setActiveTools(NORMAL_MODE_TOOLS);
 				updateStatus(ctx);
 			}
 			return;
 		}
 		if (!planModeEnabled) return;
 		if (!ctx.hasUI) return;
 		// Extract todos from last message
 		const messages = event.messages;
 		const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant");
 		if (lastAssistant && Array.isArray(lastAssistant.content)) {
 			const textContent = lastAssistant.content
 				.filter((block): block is { type: "text"; text: string } => block.type === "text")
 				.map((block) => block.text)
 				.join("\n");
 			if (textContent) {
 				const extracted = extractTodoItems(textContent);
 				if (extracted.length > 0) {
 					todoItems = extracted;
 				}
 			}
 		}
 		const hasTodos = todoItems.length > 0;
 		// Show todo list in chat (no IDs shown to user, just numbered)
 		if (hasTodos) {
 			const todoListText = todoItems.map((t, i) => `${i + 1}. ☐ ${t.text}`).join("\n");
 			pi.sendMessage(
 				{
 					customType: "plan-todo-list",
 					content: `**Plan Steps (${todoItems.length}):**\n\n${todoListText}`,
 					display: true,
 				},
 				{ triggerTurn: false },
 			);
 		}
 		const choice = await ctx.ui.select("Plan mode - what next?", [
 			hasTodos ? "Execute the plan (track progress)" : "Execute the plan",
 			"Stay in plan mode",
 			"Refine the plan",
 		]);
 		if (choice?.startsWith("Execute")) {
 			planModeEnabled = false;
 			executionMode = hasTodos;
 			pi.setActiveTools(NORMAL_MODE_TOOLS);
 			updateStatus(ctx);
 			// Simple execution message - context event filters old plan mode messages
 			// and before_agent_start injects fresh execution context with IDs
 			const execMessage = hasTodos
 				? `Execute the plan. Start with: ${todoItems[0].text}`
 				: "Execute the plan you just created.";
 			pi.sendMessage(
 				{
 					customType: "plan-mode-execute",
 					content: execMessage,
 					display: true,
 				},
 				{ triggerTurn: true },
 			);
 		} else if (choice === "Refine the plan") {
 			const refinement = await ctx.ui.input("What should be refined?");
 			if (refinement) {
 				ctx.ui.setEditorText(refinement);
 			}
 		}
 	});
 	// Initialize state on session start
 	pi.on("session_start", async (_event, ctx) => {
 		if (pi.getFlag("plan") === true) {
 			planModeEnabled = true;
 		}
 		const entries = ctx.sessionManager.getEntries();
 		const planModeEntry = entries
 			.filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode")
 			.pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined;
 		if (planModeEntry?.data) {
 			if (planModeEntry.data.enabled !== undefined) {
 				planModeEnabled = planModeEntry.data.enabled;
 			}
 			if (planModeEntry.data.todos) {
 				todoItems = planModeEntry.data.todos;
 			}
 			if (planModeEntry.data.executing) {
 				executionMode = planModeEntry.data.executing;
 			}
 		}
 		if (planModeEnabled) {
 			pi.setActiveTools(PLAN_MODE_TOOLS);
 		}
 		updateStatus(ctx);
 	});
 	// Reset tool tracking at start of each turn and persist state
 	pi.on("turn_start", async () => {
 		toolsCalledThisTurn = false;
 		pi.appendEntry("plan-mode", {
 			enabled: planModeEnabled,
 			todos: todoItems,
 			executing: executionMode,
 		});
 	});
 	// Handle non-tool turns (e.g., analysis, explanation steps)
 	pi.on("turn_end", async (_event, ctx) => {
 		if (!executionMode || todoItems.length === 0) return;
 		// If no tools were called this turn, the agent was doing analysis/explanation
 		// Mark the next uncompleted step as done
 		if (!toolsCalledThisTurn) {
 			const nextStep = todoItems.find((t) => !t.completed);
 			if (nextStep) {
 				nextStep.completed = true;
 				updateStatus(ctx);
 			}
 		}
 	});
 }
--- a/packages/coding-agent/examples/extensions/plan-mode/README.md
+++ b/packages/coding-agent/examples/extensions/plan-mode/README.md
@ -0,0 +1,65 @@
 # Plan Mode Extension
 Read-only exploration mode for safe code analysis.
 ## Features
 - **Read-only tools**: Restricts available tools to read, bash, grep, find, ls, question
 - **Bash allowlist**: Only read-only bash commands are allowed
 - **Plan extraction**: Extracts numbered steps from `Plan:` sections
 - **Progress tracking**: Widget shows completion status during execution
 - **[DONE:n] markers**: Explicit step completion tracking
 - **Session persistence**: State survives session resume
 ## Commands
 - `/plan` - Toggle plan mode
 - `/todos` - Show current plan progress
 - `Shift+P` - Toggle plan mode (shortcut)
 ## Usage
 1. Enable plan mode with `/plan` or `--plan` flag
 2. Ask the agent to analyze code and create a plan
 3. The agent should output a numbered plan under a `Plan:` header:
 ```
 Plan:
 1. First step description
 2. Second step description
 3. Third step description
 ```
 4. Choose "Execute the plan" when prompted
 5. During execution, the agent marks steps complete with `[DONE:n]` tags
 6. Progress widget shows completion status
 ## How It Works
 ### Plan Mode (Read-Only)
 - Only read-only tools available
 - Bash commands filtered through allowlist
 - Agent creates a plan without making changes
 ### Execution Mode
 - Full tool access restored
 - Agent executes steps in order
 - `[DONE:n]` markers track completion
 - Widget shows progress
 ### Command Allowlist
 Safe commands (allowed):
 - File inspection: `cat`, `head`, `tail`, `less`, `more`
 - Search: `grep`, `find`, `rg`, `fd`
 - Directory: `ls`, `pwd`, `tree`
 - Git read: `git status`, `git log`, `git diff`, `git branch`
 - Package info: `npm list`, `npm outdated`, `yarn info`
 - System info: `uname`, `whoami`, `date`, `uptime`
 Blocked commands:
 - File modification: `rm`, `mv`, `cp`, `mkdir`, `touch`
 - Git write: `git add`, `git commit`, `git push`
 - Package install: `npm install`, `yarn add`, `pip install`
 - System: `sudo`, `kill`, `reboot`
 - Editors: `vim`, `nano`, `code`
--- a/packages/coding-agent/examples/extensions/plan-mode/index.ts
+++ b/packages/coding-agent/examples/extensions/plan-mode/index.ts
@ -0,0 +1,340 @@
 /**
 * Plan Mode Extension
 *
 * Read-only exploration mode for safe code analysis.
 * When enabled, only read-only tools are available.
 *
 * Features:
 * - /plan command or Shift+P to toggle
 * - Bash restricted to allowlisted read-only commands
 * - Extracts numbered plan steps from "Plan:" sections
 * - [DONE:n] markers to complete steps during execution
 * - Progress tracking widget during execution
 */
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import type { AssistantMessage, TextContent } from "@mariozechner/pi-ai";
 import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
 import { Key } from "@mariozechner/pi-tui";
 import { extractTodoItems, isSafeCommand, markCompletedSteps, type TodoItem } from "./utils.js";
 // Tools
 const PLAN_MODE_TOOLS = ["read", "bash", "grep", "find", "ls", "questionnaire"];
 const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"];
 // Type guard for assistant messages
 function isAssistantMessage(m: AgentMessage): m is AssistantMessage {
 	return m.role === "assistant" && Array.isArray(m.content);
 }
 // Extract text content from an assistant message
 function getTextContent(message: AssistantMessage): string {
 	return message.content
 		.filter((block): block is TextContent => block.type === "text")
 		.map((block) => block.text)
 		.join("\n");
 }
 export default function planModeExtension(pi: ExtensionAPI): void {
 	let planModeEnabled = false;
 	let executionMode = false;
 	let todoItems: TodoItem[] = [];
 	pi.registerFlag("plan", {
 		description: "Start in plan mode (read-only exploration)",
 		type: "boolean",
 		default: false,
 	});
 	function updateStatus(ctx: ExtensionContext): void {
 		// Footer status
 		if (executionMode && todoItems.length > 0) {
 			const completed = todoItems.filter((t) => t.completed).length;
 			ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`));
 		} else if (planModeEnabled) {
 			ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("warning", "⏸ plan"));
 		} else {
 			ctx.ui.setStatus("plan-mode", undefined);
 		}
 		// Widget showing todo list
 		if (executionMode && todoItems.length > 0) {
 			const lines = todoItems.map((item) => {
 				if (item.completed) {
 					return (
 						ctx.ui.theme.fg("success", "☑ ") + ctx.ui.theme.fg("muted", ctx.ui.theme.strikethrough(item.text))
 					);
 				}
 				return `${ctx.ui.theme.fg("muted", "☐ ")}${item.text}`;
 			});
 			ctx.ui.setWidget("plan-todos", lines);
 		} else {
 			ctx.ui.setWidget("plan-todos", undefined);
 		}
 	}
 	function togglePlanMode(ctx: ExtensionContext): void {
 		planModeEnabled = !planModeEnabled;
 		executionMode = false;
 		todoItems = [];
 		if (planModeEnabled) {
 			pi.setActiveTools(PLAN_MODE_TOOLS);
 			ctx.ui.notify(`Plan mode enabled. Tools: ${PLAN_MODE_TOOLS.join(", ")}`);
 		} else {
 			pi.setActiveTools(NORMAL_MODE_TOOLS);
 			ctx.ui.notify("Plan mode disabled. Full access restored.");
 		}
 		updateStatus(ctx);
 	}
 	function persistState(): void {
 		pi.appendEntry("plan-mode", {
 			enabled: planModeEnabled,
 			todos: todoItems,
 			executing: executionMode,
 		});
 	}
 	pi.registerCommand("plan", {
 		description: "Toggle plan mode (read-only exploration)",
 		handler: async (_args, ctx) => togglePlanMode(ctx),
 	});
 	pi.registerCommand("todos", {
 		description: "Show current plan todo list",
 		handler: async (_args, ctx) => {
 			if (todoItems.length === 0) {
 				ctx.ui.notify("No todos. Create a plan first with /plan", "info");
 				return;
 			}
 			const list = todoItems.map((item, i) => `${i + 1}. ${item.completed ? "✓" : "○"} ${item.text}`).join("\n");
 			ctx.ui.notify(`Plan Progress:\n${list}`, "info");
 		},
 	});
 	pi.registerShortcut(Key.shift("p"), {
 		description: "Toggle plan mode",
 		handler: async (ctx) => togglePlanMode(ctx),
 	});
 	// Block destructive bash commands in plan mode
 	pi.on("tool_call", async (event) => {
 		if (!planModeEnabled || event.toolName !== "bash") return;
 		const command = event.input.command as string;
 		if (!isSafeCommand(command)) {
 			return {
 				block: true,
 				reason: `Plan mode: command blocked (not allowlisted). Use /plan to disable plan mode first.\nCommand: ${command}`,
 			};
 		}
 	});
 	// Filter out stale plan mode context when not in plan mode
 	pi.on("context", async (event) => {
 		if (planModeEnabled) return;
 		return {
 			messages: event.messages.filter((m) => {
 				const msg = m as AgentMessage & { customType?: string };
 				if (msg.customType === "plan-mode-context") return false;
 				if (msg.role !== "user") return true;
 				const content = msg.content;
 				if (typeof content === "string") {
 					return !content.includes("[PLAN MODE ACTIVE]");
 				}
 				if (Array.isArray(content)) {
 					return !content.some(
 						(c) => c.type === "text" && (c as TextContent).text?.includes("[PLAN MODE ACTIVE]"),
 					);
 				}
 				return true;
 			}),
 		};
 	});
 	// Inject plan/execution context before agent starts
 	pi.on("before_agent_start", async () => {
 		if (planModeEnabled) {
 			return {
 				message: {
 					customType: "plan-mode-context",
 					content: `[PLAN MODE ACTIVE]
 You are in plan mode - a read-only exploration mode for safe code analysis.
 Restrictions:
 - You can only use: read, bash, grep, find, ls, questionnaire
 - You CANNOT use: edit, write (file modifications are disabled)
 - Bash is restricted to an allowlist of read-only commands
 Ask clarifying questions using the questionnaire tool.
 Use brave-search skill via bash for web research.
 Create a detailed numbered plan under a "Plan:" header:
 Plan:
 1. First step description
 2. Second step description
 ...
 Do NOT attempt to make changes - just describe what you would do.`,
 					display: false,
 				},
 			};
 		}
 		if (executionMode && todoItems.length > 0) {
 			const remaining = todoItems.filter((t) => !t.completed);
 			const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n");
 			return {
 				message: {
 					customType: "plan-execution-context",
 					content: `[EXECUTING PLAN - Full tool access enabled]
 Remaining steps:
 ${todoList}
 Execute each step in order.
 After completing a step, include a [DONE:n] tag in your response.`,
 					display: false,
 				},
 			};
 		}
 	});
 	// Track progress after each turn
 	pi.on("turn_end", async (event, ctx) => {
 		if (!executionMode || todoItems.length === 0) return;
 		if (!isAssistantMessage(event.message)) return;
 		const text = getTextContent(event.message);
 		if (markCompletedSteps(text, todoItems) > 0) {
 			updateStatus(ctx);
 		}
 		persistState();
 	});
 	// Handle plan completion and plan mode UI
 	pi.on("agent_end", async (event, ctx) => {
 		// Check if execution is complete
 		if (executionMode && todoItems.length > 0) {
 			if (todoItems.every((t) => t.completed)) {
 				const completedList = todoItems.map((t) => `~~${t.text}~~`).join("\n");
 				pi.sendMessage(
 					{ customType: "plan-complete", content: `**Plan Complete!** ✓\n\n${completedList}`, display: true },
 					{ triggerTurn: false },
 				);
 				executionMode = false;
 				todoItems = [];
 				pi.setActiveTools(NORMAL_MODE_TOOLS);
 				updateStatus(ctx);
 				persistState(); // Save cleared state so resume doesn't restore old execution mode
 			}
 			return;
 		}
 		if (!planModeEnabled || !ctx.hasUI) return;
 		// Extract todos from last assistant message
 		const lastAssistant = [...event.messages].reverse().find(isAssistantMessage);
 		if (lastAssistant) {
 			const extracted = extractTodoItems(getTextContent(lastAssistant));
 			if (extracted.length > 0) {
 				todoItems = extracted;
 			}
 		}
 		// Show plan steps and prompt for next action
 		if (todoItems.length > 0) {
 			const todoListText = todoItems.map((t, i) => `${i + 1}. ☐ ${t.text}`).join("\n");
 			pi.sendMessage(
 				{
 					customType: "plan-todo-list",
 					content: `**Plan Steps (${todoItems.length}):**\n\n${todoListText}`,
 					display: true,
 				},
 				{ triggerTurn: false },
 			);
 		}
 		const choice = await ctx.ui.select("Plan mode - what next?", [
 			todoItems.length > 0 ? "Execute the plan (track progress)" : "Execute the plan",
 			"Stay in plan mode",
 			"Refine the plan",
 		]);
 		if (choice?.startsWith("Execute")) {
 			planModeEnabled = false;
 			executionMode = todoItems.length > 0;
 			pi.setActiveTools(NORMAL_MODE_TOOLS);
 			updateStatus(ctx);
 			const execMessage =
 				todoItems.length > 0
 					? `Execute the plan. Start with: ${todoItems[0].text}`
 					: "Execute the plan you just created.";
 			pi.sendMessage(
 				{ customType: "plan-mode-execute", content: execMessage, display: true },
 				{ triggerTurn: true },
 			);
 		} else if (choice === "Refine the plan") {
 			const refinement = await ctx.ui.editor("Refine the plan:", "");
 			if (refinement?.trim()) {
 				pi.sendUserMessage(refinement.trim());
 			}
 		}
 	});
 	// Restore state on session start/resume
 	pi.on("session_start", async (_event, ctx) => {
 		if (pi.getFlag("plan") === true) {
 			planModeEnabled = true;
 		}
 		const entries = ctx.sessionManager.getEntries();
 		// Restore persisted state
 		const planModeEntry = entries
 			.filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode")
 			.pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined;
 		if (planModeEntry?.data) {
 			planModeEnabled = planModeEntry.data.enabled ?? planModeEnabled;
 			todoItems = planModeEntry.data.todos ?? todoItems;
 			executionMode = planModeEntry.data.executing ?? executionMode;
 		}
 		// On resume: re-scan messages to rebuild completion state
 		// Only scan messages AFTER the last "plan-mode-execute" to avoid picking up [DONE:n] from previous plans
 		const isResume = planModeEntry !== undefined;
 		if (isResume && executionMode && todoItems.length > 0) {
 			// Find the index of the last plan-mode-execute entry (marks when current execution started)
 			let executeIndex = -1;
 			for (let i = entries.length - 1; i >= 0; i--) {
 				const entry = entries[i] as { type: string; customType?: string };
 				if (entry.customType === "plan-mode-execute") {
 					executeIndex = i;
 					break;
 				}
 			}
 			// Only scan messages after the execute marker
 			const messages: AssistantMessage[] = [];
 			for (let i = executeIndex + 1; i < entries.length; i++) {
 				const entry = entries[i];
 				if (entry.type === "message" && "message" in entry && isAssistantMessage(entry.message as AgentMessage)) {
 					messages.push(entry.message as AssistantMessage);
 				}
 			}
 			const allText = messages.map(getTextContent).join("\n");
 			markCompletedSteps(allText, todoItems);
 		}
 		if (planModeEnabled) {
 			pi.setActiveTools(PLAN_MODE_TOOLS);
 		}
 		updateStatus(ctx);
 	});
 }
--- a/packages/coding-agent/examples/extensions/plan-mode/utils.ts
+++ b/packages/coding-agent/examples/extensions/plan-mode/utils.ts
@ -0,0 +1,168 @@
 /**
 * Pure utility functions for plan mode.
 * Extracted for testability.
 */
 // Destructive commands blocked in plan mode
 const DESTRUCTIVE_PATTERNS = [
 	/\brm\b/i,
 	/\brmdir\b/i,
 	/\bmv\b/i,
 	/\bcp\b/i,
 	/\bmkdir\b/i,
 	/\btouch\b/i,
 	/\bchmod\b/i,
 	/\bchown\b/i,
 	/\bchgrp\b/i,
 	/\bln\b/i,
 	/\btee\b/i,
 	/\btruncate\b/i,
 	/\bdd\b/i,
 	/\bshred\b/i,
 	/(^|[^<])>(?!>)/,
 	/>>/,
 	/\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
 	/\byarn\s+(add|remove|install|publish)/i,
 	/\bpnpm\s+(add|remove|install|publish)/i,
 	/\bpip\s+(install|uninstall)/i,
 	/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
 	/\bbrew\s+(install|uninstall|upgrade)/i,
 	/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i,
 	/\bsudo\b/i,
 	/\bsu\b/i,
 	/\bkill\b/i,
 	/\bpkill\b/i,
 	/\bkillall\b/i,
 	/\breboot\b/i,
 	/\bshutdown\b/i,
 	/\bsystemctl\s+(start|stop|restart|enable|disable)/i,
 	/\bservice\s+\S+\s+(start|stop|restart)/i,
 	/\b(vim?|nano|emacs|code|subl)\b/i,
 ];
 // Safe read-only commands allowed in plan mode
 const SAFE_PATTERNS = [
 	/^\s*cat\b/,
 	/^\s*head\b/,
 	/^\s*tail\b/,
 	/^\s*less\b/,
 	/^\s*more\b/,
 	/^\s*grep\b/,
 	/^\s*find\b/,
 	/^\s*ls\b/,
 	/^\s*pwd\b/,
 	/^\s*echo\b/,
 	/^\s*printf\b/,
 	/^\s*wc\b/,
 	/^\s*sort\b/,
 	/^\s*uniq\b/,
 	/^\s*diff\b/,
 	/^\s*file\b/,
 	/^\s*stat\b/,
 	/^\s*du\b/,
 	/^\s*df\b/,
 	/^\s*tree\b/,
 	/^\s*which\b/,
 	/^\s*whereis\b/,
 	/^\s*type\b/,
 	/^\s*env\b/,
 	/^\s*printenv\b/,
 	/^\s*uname\b/,
 	/^\s*whoami\b/,
 	/^\s*id\b/,
 	/^\s*date\b/,
 	/^\s*cal\b/,
 	/^\s*uptime\b/,
 	/^\s*ps\b/,
 	/^\s*top\b/,
 	/^\s*htop\b/,
 	/^\s*free\b/,
 	/^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
 	/^\s*git\s+ls-/i,
 	/^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
 	/^\s*yarn\s+(list|info|why|audit)/i,
 	/^\s*node\s+--version/i,
 	/^\s*python\s+--version/i,
 	/^\s*curl\s/i,
 	/^\s*wget\s+-O\s*-/i,
 	/^\s*jq\b/,
 	/^\s*sed\s+-n/i,
 	/^\s*awk\b/,
 	/^\s*rg\b/,
 	/^\s*fd\b/,
 	/^\s*bat\b/,
 	/^\s*exa\b/,
 ];
 export function isSafeCommand(command: string): boolean {
 	const isDestructive = DESTRUCTIVE_PATTERNS.some((p) => p.test(command));
 	const isSafe = SAFE_PATTERNS.some((p) => p.test(command));
 	return !isDestructive && isSafe;
 }
 export interface TodoItem {
 	step: number;
 	text: string;
 	completed: boolean;
 }
 export function cleanStepText(text: string): string {
 	let cleaned = text
 		.replace(/\*{1,2}([^*]+)\*{1,2}/g, "$1") // Remove bold/italic
 		.replace(/`([^`]+)`/g, "$1") // Remove code
 		.replace(
 			/^(Use|Run|Execute|Create|Write|Read|Check|Verify|Update|Modify|Add|Remove|Delete|Install)\s+(the\s+)?/i,
 			"",
 		)
 		.replace(/\s+/g, " ")
 		.trim();
 	if (cleaned.length > 0) {
 		cleaned = cleaned.charAt(0).toUpperCase() + cleaned.slice(1);
 	}
 	if (cleaned.length > 50) {
 		cleaned = `${cleaned.slice(0, 47)}...`;
 	}
 	return cleaned;
 }
 export function extractTodoItems(message: string): TodoItem[] {
 	const items: TodoItem[] = [];
 	const headerMatch = message.match(/\*{0,2}Plan:\*{0,2}\s*\n/i);
 	if (!headerMatch) return items;
 	const planSection = message.slice(message.indexOf(headerMatch[0]) + headerMatch[0].length);
 	const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm;
 	for (const match of planSection.matchAll(numberedPattern)) {
 		const text = match[2]
 			.trim()
 			.replace(/\*{1,2}$/, "")
 			.trim();
 		if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) {
 			const cleaned = cleanStepText(text);
 			if (cleaned.length > 3) {
 				items.push({ step: items.length + 1, text: cleaned, completed: false });
 			}
 		}
 	}
 	return items;
 }
 export function extractDoneSteps(message: string): number[] {
 	const steps: number[] = [];
 	for (const match of message.matchAll(/\[DONE:(\d+)\]/gi)) {
 		const step = Number(match[1]);
 		if (Number.isFinite(step)) steps.push(step);
 	}
 	return steps;
 }
 export function markCompletedSteps(text: string, items: TodoItem[]): number {
 	const doneSteps = extractDoneSteps(text);
 	for (const step of doneSteps) {
 		const item = items.find((t) => t.step === step);
 		if (item) item.completed = true;
 	}
 	return doneSteps.length;
 }
--- a/packages/coding-agent/examples/extensions/question.ts
+++ b/packages/coding-agent/examples/extensions/question.ts
@ -1,23 +1,50 @@
 /**
- * Question Tool - Let the LLM ask the user a question with options
+ * Question Tool - Single question with options
 * Full custom UI: options list + inline editor for "Type something..."
 * Escape in editor returns to options, Escape in options cancels
 */
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
-import { Text } from "@mariozechner/pi-tui";
+import { Editor, type EditorTheme, Key, matchesKey, Text, truncateToWidth } from "@mariozechner/pi-tui";
 import { Type } from "@sinclair/typebox";
 interface OptionWithDesc {
 	label: string;
 	description?: string;
 }
 type DisplayOption = OptionWithDesc & { isOther?: boolean };
 interface QuestionDetails {
 	question: string;
 	options: string[];
 	answer: string | null;
 	wasCustom?: boolean;
 }
 // Support both simple strings and objects with descriptions
 const OptionSchema = Type.Union([
 	Type.String(),
 	Type.Object({
 		label: Type.String({ description: "Display label for the option" }),
 		description: Type.Optional(Type.String({ description: "Optional description shown below label" })),
 	}),
 ]);
 const QuestionParams = Type.Object({
 	question: Type.String({ description: "The question to ask the user" }),
-	options: Type.Array(Type.String(), { description: "Options for the user to choose from" }),
+	options: Type.Array(OptionSchema, { description: "Options for the user to choose from" }),
 });
-export default function (pi: ExtensionAPI) {
+// Normalize option to { label, description? }
 function normalizeOption(opt: string | { label: string; description?: string }): OptionWithDesc {
 	if (typeof opt === "string") {
 		return { label: opt };
 	}
 	return opt;
 }
 export default function question(pi: ExtensionAPI) {
 	pi.registerTool({
 		name: "question",
 		label: "Question",
@ -28,7 +55,11 @@ export default function (pi: ExtensionAPI) {
 			if (!ctx.hasUI) {
 				return {
 					content: [{ type: "text", text: "Error: UI not available (running in non-interactive mode)" }],
-					details: { question: params.question, options: params.options, answer: null } as QuestionDetails,
+					details: {
 						question: params.question,
 						options: params.options.map((o) => (typeof o === "string" ? o : o.label)),
 						answer: null,
 					} as QuestionDetails,
 				};
 			}
@ -39,25 +70,183 @@ export default function (pi: ExtensionAPI) {
 				};
 			}
-			const answer = await ctx.ui.select(params.question, params.options);
+			// Normalize options
 			const normalizedOptions = params.options.map(normalizeOption);
 			const allOptions: DisplayOption[] = [...normalizedOptions, { label: "Type something.", isOther: true }];
-			if (answer === undefined) {
+			const result = await ctx.ui.custom<{ answer: string; wasCustom: boolean; index?: number } | null>(
 				(tui, theme, _kb, done) => {
 					let optionIndex = 0;
 					let editMode = false;
 					let cachedLines: string[] | undefined;
 					const editorTheme: EditorTheme = {
 						borderColor: (s) => theme.fg("accent", s),
 						selectList: {
 							selectedPrefix: (t) => theme.fg("accent", t),
 							selectedText: (t) => theme.fg("accent", t),
 							description: (t) => theme.fg("muted", t),
 							scrollInfo: (t) => theme.fg("dim", t),
 							noMatch: (t) => theme.fg("warning", t),
 						},
 					};
 					const editor = new Editor(editorTheme);
 					editor.onSubmit = (value) => {
 						const trimmed = value.trim();
 						if (trimmed) {
 							done({ answer: trimmed, wasCustom: true });
 						} else {
 							editMode = false;
 							editor.setText("");
 							refresh();
 						}
 					};
 					function refresh() {
 						cachedLines = undefined;
 						tui.requestRender();
 					}
 					function handleInput(data: string) {
 						if (editMode) {
 							if (matchesKey(data, Key.escape)) {
 								editMode = false;
 								editor.setText("");
 								refresh();
 								return;
 							}
 							editor.handleInput(data);
 							refresh();
 							return;
 						}
 						if (matchesKey(data, Key.up)) {
 							optionIndex = Math.max(0, optionIndex - 1);
 							refresh();
 							return;
 						}
 						if (matchesKey(data, Key.down)) {
 							optionIndex = Math.min(allOptions.length - 1, optionIndex + 1);
 							refresh();
 							return;
 						}
 						if (matchesKey(data, Key.enter)) {
 							const selected = allOptions[optionIndex];
 							if (selected.isOther) {
 								editMode = true;
 								refresh();
 							} else {
 								done({ answer: selected.label, wasCustom: false, index: optionIndex + 1 });
 							}
 							return;
 						}
 						if (matchesKey(data, Key.escape)) {
 							done(null);
 						}
 					}
 					function render(width: number): string[] {
 						if (cachedLines) return cachedLines;
 						const lines: string[] = [];
 						const add = (s: string) => lines.push(truncateToWidth(s, width));
 						add(theme.fg("accent", "─".repeat(width)));
 						add(theme.fg("text", ` ${params.question}`));
 						lines.push("");
 						for (let i = 0; i < allOptions.length; i++) {
 							const opt = allOptions[i];
 							const selected = i === optionIndex;
 							const isOther = opt.isOther === true;
 							const prefix = selected ? theme.fg("accent", "> ") : "  ";
 							if (isOther && editMode) {
 								add(prefix + theme.fg("accent", `${i + 1}. ${opt.label} ✎`));
 							} else if (selected) {
 								add(prefix + theme.fg("accent", `${i + 1}. ${opt.label}`));
 							} else {
 								add(`  ${theme.fg("text", `${i + 1}. ${opt.label}`)}`);
 							}
 							// Show description if present
 							if (opt.description) {
 								add(`     ${theme.fg("muted", opt.description)}`);
 							}
 						}
 						if (editMode) {
 							lines.push("");
 							add(theme.fg("muted", " Your answer:"));
 							for (const line of editor.render(width - 2)) {
 								add(` ${line}`);
 							}
 						}
 						lines.push("");
 						if (editMode) {
 							add(theme.fg("dim", " Enter to submit • Esc to go back"));
 						} else {
 							add(theme.fg("dim", " ↑↓ navigate • Enter to select • Esc to cancel"));
 						}
 						add(theme.fg("accent", "─".repeat(width)));
 						cachedLines = lines;
 						return lines;
 					}
 					return {
 						render,
 						invalidate: () => {
 							cachedLines = undefined;
 						},
 						handleInput,
 					};
 				},
 			);
 			// Build simple options list for details
 			const simpleOptions = normalizedOptions.map((o) => o.label);
 			if (!result) {
 				return {
 					content: [{ type: "text", text: "User cancelled the selection" }],
-					details: { question: params.question, options: params.options, answer: null } as QuestionDetails,
+					details: { question: params.question, options: simpleOptions, answer: null } as QuestionDetails,
 				};
 			}
 			if (result.wasCustom) {
 				return {
 					content: [{ type: "text", text: `User wrote: ${result.answer}` }],
 					details: {
 						question: params.question,
 						options: simpleOptions,
 						answer: result.answer,
 						wasCustom: true,
 					} as QuestionDetails,
 				};
 			}
 			return {
-				content: [{ type: "text", text: `User selected: ${answer}` }],
+				content: [{ type: "text", text: `User selected: ${result.index}. ${result.answer}` }],
-				details: { question: params.question, options: params.options, answer } as QuestionDetails,
+				details: {
 					question: params.question,
 					options: simpleOptions,
 					answer: result.answer,
 					wasCustom: false,
 				} as QuestionDetails,
 			};
 		},
 		renderCall(args, theme) {
 			let text = theme.fg("toolTitle", theme.bold("question ")) + theme.fg("muted", args.question);
-			if (args.options?.length) {
+			const opts = Array.isArray(args.options) ? args.options : [];
-				text += `\n${theme.fg("dim", `  Options: ${args.options.join(", ")}`)}`;
+			if (opts.length) {
 				const labels = opts.map((o: string | { label: string }) => (typeof o === "string" ? o : o.label));
 				const numbered = [...labels, "Type something."].map((o, i) => `${i + 1}. ${o}`);
 				text += `\n${theme.fg("dim", `  Options: ${numbered.join(", ")}`)}`;
 			}
 			return new Text(text, 0, 0);
 		},
@ -73,7 +262,16 @@ export default function (pi: ExtensionAPI) {
 				return new Text(theme.fg("warning", "Cancelled"), 0, 0);
 			}
-			return new Text(theme.fg("success", "✓ ") + theme.fg("accent", details.answer), 0, 0);
+			if (details.wasCustom) {
 				return new Text(
 					theme.fg("success", "✓ ") + theme.fg("muted", "(wrote) ") + theme.fg("accent", details.answer),
 					0,
 					0,
 				);
 			}
 			const idx = details.options.indexOf(details.answer) + 1;
 			const display = idx > 0 ? `${idx}. ${details.answer}` : details.answer;
 			return new Text(theme.fg("success", "✓ ") + theme.fg("accent", display), 0, 0);
 		},
 	});
 }
--- a/packages/coding-agent/examples/extensions/questionnaire.ts
+++ b/packages/coding-agent/examples/extensions/questionnaire.ts
@ -0,0 +1,427 @@
 /**
 * Questionnaire Tool - Unified tool for asking single or multiple questions
 *
 * Single question: simple options list
 * Multiple questions: tab bar navigation between questions
 */
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
 import { Editor, type EditorTheme, Key, matchesKey, Text, truncateToWidth } from "@mariozechner/pi-tui";
 import { Type } from "@sinclair/typebox";
 // Types
 interface QuestionOption {
 	value: string;
 	label: string;
 	description?: string;
 }
 type RenderOption = QuestionOption & { isOther?: boolean };
 interface Question {
 	id: string;
 	label: string;
 	prompt: string;
 	options: QuestionOption[];
 	allowOther: boolean;
 }
 interface Answer {
 	id: string;
 	value: string;
 	label: string;
 	wasCustom: boolean;
 	index?: number;
 }
 interface QuestionnaireResult {
 	questions: Question[];
 	answers: Answer[];
 	cancelled: boolean;
 }
 // Schema
 const QuestionOptionSchema = Type.Object({
 	value: Type.String({ description: "The value returned when selected" }),
 	label: Type.String({ description: "Display label for the option" }),
 	description: Type.Optional(Type.String({ description: "Optional description shown below label" })),
 });
 const QuestionSchema = Type.Object({
 	id: Type.String({ description: "Unique identifier for this question" }),
 	label: Type.Optional(
 		Type.String({
 			description: "Short contextual label for tab bar, e.g. 'Scope', 'Priority' (defaults to Q1, Q2)",
 		}),
 	),
 	prompt: Type.String({ description: "The full question text to display" }),
 	options: Type.Array(QuestionOptionSchema, { description: "Available options to choose from" }),
 	allowOther: Type.Optional(Type.Boolean({ description: "Allow 'Type something' option (default: true)" })),
 });
 const QuestionnaireParams = Type.Object({
 	questions: Type.Array(QuestionSchema, { description: "Questions to ask the user" }),
 });
 function errorResult(
 	message: string,
 	questions: Question[] = [],
 ): { content: { type: "text"; text: string }[]; details: QuestionnaireResult } {
 	return {
 		content: [{ type: "text", text: message }],
 		details: { questions, answers: [], cancelled: true },
 	};
 }
 export default function questionnaire(pi: ExtensionAPI) {
 	pi.registerTool({
 		name: "questionnaire",
 		label: "Questionnaire",
 		description:
 			"Ask the user one or more questions. Use for clarifying requirements, getting preferences, or confirming decisions. For single questions, shows a simple option list. For multiple questions, shows a tab-based interface.",
 		parameters: QuestionnaireParams,
 		async execute(_toolCallId, params, _onUpdate, ctx, _signal) {
 			if (!ctx.hasUI) {
 				return errorResult("Error: UI not available (running in non-interactive mode)");
 			}
 			if (params.questions.length === 0) {
 				return errorResult("Error: No questions provided");
 			}
 			// Normalize questions with defaults
 			const questions: Question[] = params.questions.map((q, i) => ({
 				...q,
 				label: q.label || `Q${i + 1}`,
 				allowOther: q.allowOther !== false,
 			}));
 			const isMulti = questions.length > 1;
 			const totalTabs = questions.length + 1; // questions + Submit
 			const result = await ctx.ui.custom<QuestionnaireResult>((tui, theme, _kb, done) => {
 				// State
 				let currentTab = 0;
 				let optionIndex = 0;
 				let inputMode = false;
 				let inputQuestionId: string | null = null;
 				let cachedLines: string[] | undefined;
 				const answers = new Map<string, Answer>();
 				// Editor for "Type something" option
 				const editorTheme: EditorTheme = {
 					borderColor: (s) => theme.fg("accent", s),
 					selectList: {
 						selectedPrefix: (t) => theme.fg("accent", t),
 						selectedText: (t) => theme.fg("accent", t),
 						description: (t) => theme.fg("muted", t),
 						scrollInfo: (t) => theme.fg("dim", t),
 						noMatch: (t) => theme.fg("warning", t),
 					},
 				};
 				const editor = new Editor(editorTheme);
 				// Helpers
 				function refresh() {
 					cachedLines = undefined;
 					tui.requestRender();
 				}
 				function submit(cancelled: boolean) {
 					done({ questions, answers: Array.from(answers.values()), cancelled });
 				}
 				function currentQuestion(): Question | undefined {
 					return questions[currentTab];
 				}
 				function currentOptions(): RenderOption[] {
 					const q = currentQuestion();
 					if (!q) return [];
 					const opts: RenderOption[] = [...q.options];
 					if (q.allowOther) {
 						opts.push({ value: "__other__", label: "Type something.", isOther: true });
 					}
 					return opts;
 				}
 				function allAnswered(): boolean {
 					return questions.every((q) => answers.has(q.id));
 				}
 				function advanceAfterAnswer() {
 					if (!isMulti) {
 						submit(false);
 						return;
 					}
 					if (currentTab < questions.length - 1) {
 						currentTab++;
 					} else {
 						currentTab = questions.length; // Submit tab
 					}
 					optionIndex = 0;
 					refresh();
 				}
 				function saveAnswer(questionId: string, value: string, label: string, wasCustom: boolean, index?: number) {
 					answers.set(questionId, { id: questionId, value, label, wasCustom, index });
 				}
 				// Editor submit callback
 				editor.onSubmit = (value) => {
 					if (!inputQuestionId) return;
 					const trimmed = value.trim() || "(no response)";
 					saveAnswer(inputQuestionId, trimmed, trimmed, true);
 					inputMode = false;
 					inputQuestionId = null;
 					editor.setText("");
 					advanceAfterAnswer();
 				};
 				function handleInput(data: string) {
 					// Input mode: route to editor
 					if (inputMode) {
 						if (matchesKey(data, Key.escape)) {
 							inputMode = false;
 							inputQuestionId = null;
 							editor.setText("");
 							refresh();
 							return;
 						}
 						editor.handleInput(data);
 						refresh();
 						return;
 					}
 					const q = currentQuestion();
 					const opts = currentOptions();
 					// Tab navigation (multi-question only)
 					if (isMulti) {
 						if (matchesKey(data, Key.tab) || matchesKey(data, Key.right)) {
 							currentTab = (currentTab + 1) % totalTabs;
 							optionIndex = 0;
 							refresh();
 							return;
 						}
 						if (matchesKey(data, Key.shift("tab")) || matchesKey(data, Key.left)) {
 							currentTab = (currentTab - 1 + totalTabs) % totalTabs;
 							optionIndex = 0;
 							refresh();
 							return;
 						}
 					}
 					// Submit tab
 					if (currentTab === questions.length) {
 						if (matchesKey(data, Key.enter) && allAnswered()) {
 							submit(false);
 						} else if (matchesKey(data, Key.escape)) {
 							submit(true);
 						}
 						return;
 					}
 					// Option navigation
 					if (matchesKey(data, Key.up)) {
 						optionIndex = Math.max(0, optionIndex - 1);
 						refresh();
 						return;
 					}
 					if (matchesKey(data, Key.down)) {
 						optionIndex = Math.min(opts.length - 1, optionIndex + 1);
 						refresh();
 						return;
 					}
 					// Select option
 					if (matchesKey(data, Key.enter) && q) {
 						const opt = opts[optionIndex];
 						if (opt.isOther) {
 							inputMode = true;
 							inputQuestionId = q.id;
 							editor.setText("");
 							refresh();
 							return;
 						}
 						saveAnswer(q.id, opt.value, opt.label, false, optionIndex + 1);
 						advanceAfterAnswer();
 						return;
 					}
 					// Cancel
 					if (matchesKey(data, Key.escape)) {
 						submit(true);
 					}
 				}
 				function render(width: number): string[] {
 					if (cachedLines) return cachedLines;
 					const lines: string[] = [];
 					const q = currentQuestion();
 					const opts = currentOptions();
 					// Helper to add truncated line
 					const add = (s: string) => lines.push(truncateToWidth(s, width));
 					add(theme.fg("accent", "─".repeat(width)));
 					// Tab bar (multi-question only)
 					if (isMulti) {
 						const tabs: string[] = ["← "];
 						for (let i = 0; i < questions.length; i++) {
 							const isActive = i === currentTab;
 							const isAnswered = answers.has(questions[i].id);
 							const lbl = questions[i].label;
 							const box = isAnswered ? "■" : "□";
 							const color = isAnswered ? "success" : "muted";
 							const text = ` ${box} ${lbl} `;
 							const styled = isActive ? theme.bg("selectedBg", theme.fg("text", text)) : theme.fg(color, text);
 							tabs.push(`${styled} `);
 						}
 						const canSubmit = allAnswered();
 						const isSubmitTab = currentTab === questions.length;
 						const submitText = " ✓ Submit ";
 						const submitStyled = isSubmitTab
 							? theme.bg("selectedBg", theme.fg("text", submitText))
 							: theme.fg(canSubmit ? "success" : "dim", submitText);
 						tabs.push(`${submitStyled} →`);
 						add(` ${tabs.join("")}`);
 						lines.push("");
 					}
 					// Helper to render options list
 					function renderOptions() {
 						for (let i = 0; i < opts.length; i++) {
 							const opt = opts[i];
 							const selected = i === optionIndex;
 							const isOther = opt.isOther === true;
 							const prefix = selected ? theme.fg("accent", "> ") : "  ";
 							const color = selected ? "accent" : "text";
 							// Mark "Type something" differently when in input mode
 							if (isOther && inputMode) {
 								add(prefix + theme.fg("accent", `${i + 1}. ${opt.label} ✎`));
 							} else {
 								add(prefix + theme.fg(color, `${i + 1}. ${opt.label}`));
 							}
 							if (opt.description) {
 								add(`     ${theme.fg("muted", opt.description)}`);
 							}
 						}
 					}
 					// Content
 					if (inputMode && q) {
 						add(theme.fg("text", ` ${q.prompt}`));
 						lines.push("");
 						// Show options for reference
 						renderOptions();
 						lines.push("");
 						add(theme.fg("muted", " Your answer:"));
 						for (const line of editor.render(width - 2)) {
 							add(` ${line}`);
 						}
 						lines.push("");
 						add(theme.fg("dim", " Enter to submit • Esc to cancel"));
 					} else if (currentTab === questions.length) {
 						add(theme.fg("accent", theme.bold(" Ready to submit")));
 						lines.push("");
 						for (const question of questions) {
 							const answer = answers.get(question.id);
 							if (answer) {
 								const prefix = answer.wasCustom ? "(wrote) " : "";
 								add(`${theme.fg("muted", ` ${question.label}: `)}${theme.fg("text", prefix + answer.label)}`);
 							}
 						}
 						lines.push("");
 						if (allAnswered()) {
 							add(theme.fg("success", " Press Enter to submit"));
 						} else {
 							const missing = questions
 								.filter((q) => !answers.has(q.id))
 								.map((q) => q.label)
 								.join(", ");
 							add(theme.fg("warning", ` Unanswered: ${missing}`));
 						}
 					} else if (q) {
 						add(theme.fg("text", ` ${q.prompt}`));
 						lines.push("");
 						renderOptions();
 					}
 					lines.push("");
 					if (!inputMode) {
 						const help = isMulti
 							? " Tab/←→ navigate • ↑↓ select • Enter confirm • Esc cancel"
 							: " ↑↓ navigate • Enter select • Esc cancel";
 						add(theme.fg("dim", help));
 					}
 					add(theme.fg("accent", "─".repeat(width)));
 					cachedLines = lines;
 					return lines;
 				}
 				return {
 					render,
 					invalidate: () => {
 						cachedLines = undefined;
 					},
 					handleInput,
 				};
 			});
 			if (result.cancelled) {
 				return {
 					content: [{ type: "text", text: "User cancelled the questionnaire" }],
 					details: result,
 				};
 			}
 			const answerLines = result.answers.map((a) => {
 				const qLabel = questions.find((q) => q.id === a.id)?.label || a.id;
 				if (a.wasCustom) {
 					return `${qLabel}: user wrote: ${a.label}`;
 				}
 				return `${qLabel}: user selected: ${a.index}. ${a.label}`;
 			});
 			return {
 				content: [{ type: "text", text: answerLines.join("\n") }],
 				details: result,
 			};
 		},
 		renderCall(args, theme) {
 			const qs = (args.questions as Question[]) || [];
 			const count = qs.length;
 			const labels = qs.map((q) => q.label || q.id).join(", ");
 			let text = theme.fg("toolTitle", theme.bold("questionnaire "));
 			text += theme.fg("muted", `${count} question${count !== 1 ? "s" : ""}`);
 			if (labels) {
 				text += theme.fg("dim", ` (${truncateToWidth(labels, 40)})`);
 			}
 			return new Text(text, 0, 0);
 		},
 		renderResult(result, _options, theme) {
 			const details = result.details as QuestionnaireResult | undefined;
 			if (!details) {
 				const text = result.content[0];
 				return new Text(text?.type === "text" ? text.text : "", 0, 0);
 			}
 			if (details.cancelled) {
 				return new Text(theme.fg("warning", "Cancelled"), 0, 0);
 			}
 			const lines = details.answers.map((a) => {
 				if (a.wasCustom) {
 					return `${theme.fg("success", "✓ ")}${theme.fg("accent", a.id)}: ${theme.fg("muted", "(wrote) ")}${a.label}`;
 				}
 				const display = a.index ? `${a.index}. ${a.label}` : a.label;
 				return `${theme.fg("success", "✓ ")}${theme.fg("accent", a.id)}: ${display}`;
 			});
 			return new Text(lines.join("\n"), 0, 0);
 		},
 	});
 }
--- a/packages/coding-agent/examples/extensions/sandbox/.gitignore
+++ b/packages/coding-agent/examples/extensions/sandbox/.gitignore
@ -0,0 +1 @@
 node_modules
--- a/packages/coding-agent/examples/extensions/sandbox/index.ts
+++ b/packages/coding-agent/examples/extensions/sandbox/index.ts
@ -0,0 +1,318 @@
 /**
 * Sandbox Extension - OS-level sandboxing for bash commands
 *
 * Uses @anthropic-ai/sandbox-runtime to enforce filesystem and network
 * restrictions on bash commands at the OS level (sandbox-exec on macOS,
 * bubblewrap on Linux).
 *
 * Config files (merged, project takes precedence):
 * - ~/.pi/agent/sandbox.json (global)
 * - <cwd>/.pi/sandbox.json (project-local)
 *
 * Example .pi/sandbox.json:
 * ```json
 * {
 *   "enabled": true,
 *   "network": {
 *     "allowedDomains": ["github.com", "*.github.com"],
 *     "deniedDomains": []
 *   },
 *   "filesystem": {
 *     "denyRead": ["~/.ssh", "~/.aws"],
 *     "allowWrite": [".", "/tmp"],
 *     "denyWrite": [".env"]
 *   }
 * }
 * ```
 *
 * Usage:
 * - `pi -e ./sandbox` - sandbox enabled with default/config settings
 * - `pi -e ./sandbox --no-sandbox` - disable sandboxing
 * - `/sandbox` - show current sandbox configuration
 *
 * Setup:
 * 1. Copy sandbox/ directory to ~/.pi/agent/extensions/
 * 2. Run `npm install` in ~/.pi/agent/extensions/sandbox/
 *
 * Linux also requires: bubblewrap, socat, ripgrep
 */
 import { spawn } from "node:child_process";
 import { existsSync, readFileSync } from "node:fs";
 import { homedir } from "node:os";
 import { join } from "node:path";
 import { SandboxManager, type SandboxRuntimeConfig } from "@anthropic-ai/sandbox-runtime";
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
 import { type BashOperations, createBashTool } from "@mariozechner/pi-coding-agent";
 interface SandboxConfig extends SandboxRuntimeConfig {
 	enabled?: boolean;
 }
 const DEFAULT_CONFIG: SandboxConfig = {
 	enabled: true,
 	network: {
 		allowedDomains: [
 			"npmjs.org",
 			"*.npmjs.org",
 			"registry.npmjs.org",
 			"registry.yarnpkg.com",
 			"pypi.org",
 			"*.pypi.org",
 			"github.com",
 			"*.github.com",
 			"api.github.com",
 			"raw.githubusercontent.com",
 		],
 		deniedDomains: [],
 	},
 	filesystem: {
 		denyRead: ["~/.ssh", "~/.aws", "~/.gnupg"],
 		allowWrite: [".", "/tmp"],
 		denyWrite: [".env", ".env.*", "*.pem", "*.key"],
 	},
 };
 function loadConfig(cwd: string): SandboxConfig {
 	const projectConfigPath = join(cwd, ".pi", "sandbox.json");
 	const globalConfigPath = join(homedir(), ".pi", "agent", "sandbox.json");
 	let globalConfig: Partial<SandboxConfig> = {};
 	let projectConfig: Partial<SandboxConfig> = {};
 	if (existsSync(globalConfigPath)) {
 		try {
 			globalConfig = JSON.parse(readFileSync(globalConfigPath, "utf-8"));
 		} catch (e) {
 			console.error(`Warning: Could not parse ${globalConfigPath}: ${e}`);
 		}
 	}
 	if (existsSync(projectConfigPath)) {
 		try {
 			projectConfig = JSON.parse(readFileSync(projectConfigPath, "utf-8"));
 		} catch (e) {
 			console.error(`Warning: Could not parse ${projectConfigPath}: ${e}`);
 		}
 	}
 	return deepMerge(deepMerge(DEFAULT_CONFIG, globalConfig), projectConfig);
 }
 function deepMerge(base: SandboxConfig, overrides: Partial<SandboxConfig>): SandboxConfig {
 	const result: SandboxConfig = { ...base };
 	if (overrides.enabled !== undefined) result.enabled = overrides.enabled;
 	if (overrides.network) {
 		result.network = { ...base.network, ...overrides.network };
 	}
 	if (overrides.filesystem) {
 		result.filesystem = { ...base.filesystem, ...overrides.filesystem };
 	}
 	const extOverrides = overrides as {
 		ignoreViolations?: Record<string, string[]>;
 		enableWeakerNestedSandbox?: boolean;
 	};
 	const extResult = result as { ignoreViolations?: Record<string, string[]>; enableWeakerNestedSandbox?: boolean };
 	if (extOverrides.ignoreViolations) {
 		extResult.ignoreViolations = extOverrides.ignoreViolations;
 	}
 	if (extOverrides.enableWeakerNestedSandbox !== undefined) {
 		extResult.enableWeakerNestedSandbox = extOverrides.enableWeakerNestedSandbox;
 	}
 	return result;
 }
 function createSandboxedBashOps(): BashOperations {
 	return {
 		async exec(command, cwd, { onData, signal, timeout }) {
 			if (!existsSync(cwd)) {
 				throw new Error(`Working directory does not exist: ${cwd}`);
 			}
 			const wrappedCommand = await SandboxManager.wrapWithSandbox(command);
 			return new Promise((resolve, reject) => {
 				const child = spawn("bash", ["-c", wrappedCommand], {
 					cwd,
 					detached: true,
 					stdio: ["ignore", "pipe", "pipe"],
 				});
 				let timedOut = false;
 				let timeoutHandle: NodeJS.Timeout | undefined;
 				if (timeout !== undefined && timeout > 0) {
 					timeoutHandle = setTimeout(() => {
 						timedOut = true;
 						if (child.pid) {
 							try {
 								process.kill(-child.pid, "SIGKILL");
 							} catch {
 								child.kill("SIGKILL");
 							}
 						}
 					}, timeout * 1000);
 				}
 				child.stdout?.on("data", onData);
 				child.stderr?.on("data", onData);
 				child.on("error", (err) => {
 					if (timeoutHandle) clearTimeout(timeoutHandle);
 					reject(err);
 				});
 				const onAbort = () => {
 					if (child.pid) {
 						try {
 							process.kill(-child.pid, "SIGKILL");
 						} catch {
 							child.kill("SIGKILL");
 						}
 					}
 				};
 				signal?.addEventListener("abort", onAbort, { once: true });
 				child.on("close", (code) => {
 					if (timeoutHandle) clearTimeout(timeoutHandle);
 					signal?.removeEventListener("abort", onAbort);
 					if (signal?.aborted) {
 						reject(new Error("aborted"));
 					} else if (timedOut) {
 						reject(new Error(`timeout:${timeout}`));
 					} else {
 						resolve({ exitCode: code });
 					}
 				});
 			});
 		},
 	};
 }
 export default function (pi: ExtensionAPI) {
 	pi.registerFlag("no-sandbox", {
 		description: "Disable OS-level sandboxing for bash commands",
 		type: "boolean",
 		default: false,
 	});
 	const localCwd = process.cwd();
 	const localBash = createBashTool(localCwd);
 	let sandboxEnabled = false;
 	let sandboxInitialized = false;
 	pi.registerTool({
 		...localBash,
 		label: "bash (sandboxed)",
 		async execute(id, params, onUpdate, _ctx, signal) {
 			if (!sandboxEnabled || !sandboxInitialized) {
 				return localBash.execute(id, params, signal, onUpdate);
 			}
 			const sandboxedBash = createBashTool(localCwd, {
 				operations: createSandboxedBashOps(),
 			});
 			return sandboxedBash.execute(id, params, signal, onUpdate);
 		},
 	});
 	pi.on("user_bash", () => {
 		if (!sandboxEnabled || !sandboxInitialized) return;
 		return { operations: createSandboxedBashOps() };
 	});
 	pi.on("session_start", async (_event, ctx) => {
 		const noSandbox = pi.getFlag("no-sandbox") as boolean;
 		if (noSandbox) {
 			sandboxEnabled = false;
 			ctx.ui.notify("Sandbox disabled via --no-sandbox", "warning");
 			return;
 		}
 		const config = loadConfig(ctx.cwd);
 		if (!config.enabled) {
 			sandboxEnabled = false;
 			ctx.ui.notify("Sandbox disabled via config", "info");
 			return;
 		}
 		const platform = process.platform;
 		if (platform !== "darwin" && platform !== "linux") {
 			sandboxEnabled = false;
 			ctx.ui.notify(`Sandbox not supported on ${platform}`, "warning");
 			return;
 		}
 		try {
 			const configExt = config as unknown as {
 				ignoreViolations?: Record<string, string[]>;
 				enableWeakerNestedSandbox?: boolean;
 			};
 			await SandboxManager.initialize({
 				network: config.network,
 				filesystem: config.filesystem,
 				ignoreViolations: configExt.ignoreViolations,
 				enableWeakerNestedSandbox: configExt.enableWeakerNestedSandbox,
 			});
 			sandboxEnabled = true;
 			sandboxInitialized = true;
 			const networkCount = config.network?.allowedDomains?.length ?? 0;
 			const writeCount = config.filesystem?.allowWrite?.length ?? 0;
 			ctx.ui.setStatus(
 				"sandbox",
 				ctx.ui.theme.fg("accent", `🔒 Sandbox: ${networkCount} domains, ${writeCount} write paths`),
 			);
 			ctx.ui.notify("Sandbox initialized", "info");
 		} catch (err) {
 			sandboxEnabled = false;
 			ctx.ui.notify(`Sandbox initialization failed: ${err instanceof Error ? err.message : err}`, "error");
 		}
 	});
 	pi.on("session_shutdown", async () => {
 		if (sandboxInitialized) {
 			try {
 				await SandboxManager.reset();
 			} catch {
 				// Ignore cleanup errors
 			}
 		}
 	});
 	pi.registerCommand("sandbox", {
 		description: "Show sandbox configuration",
 		handler: async (_args, ctx) => {
 			if (!sandboxEnabled) {
 				ctx.ui.notify("Sandbox is disabled", "info");
 				return;
 			}
 			const config = loadConfig(ctx.cwd);
 			const lines = [
 				"Sandbox Configuration:",
 				"",
 				"Network:",
 				`  Allowed: ${config.network?.allowedDomains?.join(", ") || "(none)"}`,
 				`  Denied: ${config.network?.deniedDomains?.join(", ") || "(none)"}`,
 				"",
 				"Filesystem:",
 				`  Deny Read: ${config.filesystem?.denyRead?.join(", ") || "(none)"}`,
 				`  Allow Write: ${config.filesystem?.allowWrite?.join(", ") || "(none)"}`,
 				`  Deny Write: ${config.filesystem?.denyWrite?.join(", ") || "(none)"}`,
 			];
 			ctx.ui.notify(lines.join("\n"), "info");
 		},
 	});
 }
--- a/packages/coding-agent/examples/extensions/sandbox/package-lock.json
+++ b/packages/coding-agent/examples/extensions/sandbox/package-lock.json
@ -0,0 +1,92 @@
 {
 	"name": "pi-extension-sandbox",
 	"version": "1.0.0",
 	"lockfileVersion": 3,
 	"requires": true,
 	"packages": {
 		"": {
 			"name": "pi-extension-sandbox",
 			"version": "1.0.0",
 			"dependencies": {
 				"@anthropic-ai/sandbox-runtime": "^0.0.26"
 			}
 		},
 		"node_modules/@anthropic-ai/sandbox-runtime": {
 			"version": "0.0.26",
 			"resolved": "https://registry.npmjs.org/@anthropic-ai/sandbox-runtime/-/sandbox-runtime-0.0.26.tgz",
 			"integrity": "sha512-DYV5LSsVMnzq0lbfaYMSpxZPUMAx4+hy343dRss+pVCLIfF62qOhxpYfZ5TmOk1GTDQm5f9wPprMNSStmnsV4w==",
 			"license": "Apache-2.0",
 			"dependencies": {
 				"@pondwader/socks5-server": "^1.0.10",
 				"@types/lodash-es": "^4.17.12",
 				"commander": "^12.1.0",
 				"lodash-es": "^4.17.21",
 				"shell-quote": "^1.8.3",
 				"zod": "^3.24.1"
 			},
 			"bin": {
 				"srt": "dist/cli.js"
 			},
 			"engines": {
 				"node": ">=18.0.0"
 			}
 		},
 		"node_modules/@pondwader/socks5-server": {
 			"version": "1.0.10",
 			"resolved": "https://registry.npmjs.org/@pondwader/socks5-server/-/socks5-server-1.0.10.tgz",
 			"integrity": "sha512-bQY06wzzR8D2+vVCUoBsr5QS2U6UgPUQRmErNwtsuI6vLcyRKkafjkr3KxbtGFf9aBBIV2mcvlsKD1UYaIV+sg==",
 			"license": "MIT"
 		},
 		"node_modules/@types/lodash": {
 			"version": "4.17.23",
 			"resolved": "https://registry.npmjs.org/@types/lodash/-/lodash-4.17.23.tgz",
 			"integrity": "sha512-RDvF6wTulMPjrNdCoYRC8gNR880JNGT8uB+REUpC2Ns4pRqQJhGz90wh7rgdXDPpCczF3VGktDuFGVnz8zP7HA==",
 			"license": "MIT"
 		},
 		"node_modules/@types/lodash-es": {
 			"version": "4.17.12",
 			"resolved": "https://registry.npmjs.org/@types/lodash-es/-/lodash-es-4.17.12.tgz",
 			"integrity": "sha512-0NgftHUcV4v34VhXm8QBSftKVXtbkBG3ViCjs6+eJ5a6y6Mi/jiFGPc1sC7QK+9BFhWrURE3EOggmWaSxL9OzQ==",
 			"license": "MIT",
 			"dependencies": {
 				"@types/lodash": "*"
 			}
 		},
 		"node_modules/commander": {
 			"version": "12.1.0",
 			"resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz",
 			"integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==",
 			"license": "MIT",
 			"engines": {
 				"node": ">=18"
 			}
 		},
 		"node_modules/lodash-es": {
 			"version": "4.17.22",
 			"resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.22.tgz",
 			"integrity": "sha512-XEawp1t0gxSi9x01glktRZ5HDy0HXqrM0x5pXQM98EaI0NxO6jVM7omDOxsuEo5UIASAnm2bRp1Jt/e0a2XU8Q==",
 			"license": "MIT"
 		},
 		"node_modules/shell-quote": {
 			"version": "1.8.3",
 			"resolved": "https://registry.npmjs.org/shell-quote/-/shell-quote-1.8.3.tgz",
 			"integrity": "sha512-ObmnIF4hXNg1BqhnHmgbDETF8dLPCggZWBjkQfhZpbszZnYur5DUljTcCHii5LC3J5E0yeO/1LIMyH+UvHQgyw==",
 			"license": "MIT",
 			"engines": {
 				"node": ">= 0.4"
 			},
 			"funding": {
 				"url": "https://github.com/sponsors/ljharb"
 			}
 		},
 		"node_modules/zod": {
 			"version": "3.25.76",
 			"resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
 			"integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
 			"license": "MIT",
 			"funding": {
 				"url": "https://github.com/sponsors/colinhacks"
 			}
 		}
 	}
 }
--- a/packages/coding-agent/examples/extensions/sandbox/package.json
+++ b/packages/coding-agent/examples/extensions/sandbox/package.json
@ -0,0 +1,19 @@
 {
 	"name": "pi-extension-sandbox",
 	"private": true,
 	"version": "1.0.0",
 	"type": "module",
 	"scripts": {
 		"clean": "echo 'nothing to clean'",
 		"build": "echo 'nothing to build'",
 		"check": "echo 'nothing to check'"
 	},
 	"pi": {
 		"extensions": [
 			"./index.ts"
 		]
 	},
 	"dependencies": {
 		"@anthropic-ai/sandbox-runtime": "^0.0.26"
 	}
 }
--- a/packages/coding-agent/examples/extensions/summarize.ts
+++ b/packages/coding-agent/examples/extensions/summarize.ts
@ -0,0 +1,195 @@
 import { complete, getModel } from "@mariozechner/pi-ai";
 import type { ExtensionAPI, ExtensionCommandContext } from "@mariozechner/pi-coding-agent";
 import { DynamicBorder, getMarkdownTheme } from "@mariozechner/pi-coding-agent";
 import { Container, Markdown, matchesKey, Text } from "@mariozechner/pi-tui";
 type ContentBlock = {
 	type?: string;
 	text?: string;
 	name?: string;
 	arguments?: Record<string, unknown>;
 };
 type SessionEntry = {
 	type: string;
 	message?: {
 		role?: string;
 		content?: unknown;
 	};
 };
 const extractTextParts = (content: unknown): string[] => {
 	if (typeof content === "string") {
 		return [content];
 	}
 	if (!Array.isArray(content)) {
 		return [];
 	}
 	const textParts: string[] = [];
 	for (const part of content) {
 		if (!part || typeof part !== "object") {
 			continue;
 		}
 		const block = part as ContentBlock;
 		if (block.type === "text" && typeof block.text === "string") {
 			textParts.push(block.text);
 		}
 	}
 	return textParts;
 };
 const extractToolCallLines = (content: unknown): string[] => {
 	if (!Array.isArray(content)) {
 		return [];
 	}
 	const toolCalls: string[] = [];
 	for (const part of content) {
 		if (!part || typeof part !== "object") {
 			continue;
 		}
 		const block = part as ContentBlock;
 		if (block.type !== "toolCall" || typeof block.name !== "string") {
 			continue;
 		}
 		const args = block.arguments ?? {};
 		toolCalls.push(`Tool ${block.name} was called with args ${JSON.stringify(args)}`);
 	}
 	return toolCalls;
 };
 const buildConversationText = (entries: SessionEntry[]): string => {
 	const sections: string[] = [];
 	for (const entry of entries) {
 		if (entry.type !== "message" || !entry.message?.role) {
 			continue;
 		}
 		const role = entry.message.role;
 		const isUser = role === "user";
 		const isAssistant = role === "assistant";
 		if (!isUser && !isAssistant) {
 			continue;
 		}
 		const entryLines: string[] = [];
 		const textParts = extractTextParts(entry.message.content);
 		if (textParts.length > 0) {
 			const roleLabel = isUser ? "User" : "Assistant";
 			const messageText = textParts.join("\n").trim();
 			if (messageText.length > 0) {
 				entryLines.push(`${roleLabel}: ${messageText}`);
 			}
 		}
 		if (isAssistant) {
 			entryLines.push(...extractToolCallLines(entry.message.content));
 		}
 		if (entryLines.length > 0) {
 			sections.push(entryLines.join("\n"));
 		}
 	}
 	return sections.join("\n\n");
 };
 const buildSummaryPrompt = (conversationText: string): string =>
 	[
 		"Summarize this conversation so I can resume it later.",
 		"Include goals, key decisions, progress, open questions, and next steps.",
 		"Keep it concise and structured with headings.",
 		"",
 		"<conversation>",
 		conversationText,
 		"</conversation>",
 	].join("\n");
 const showSummaryUi = async (summary: string, ctx: ExtensionCommandContext) => {
 	if (!ctx.hasUI) {
 		return;
 	}
 	await ctx.ui.custom((_tui, theme, _kb, done) => {
 		const container = new Container();
 		const border = new DynamicBorder((s: string) => theme.fg("accent", s));
 		const mdTheme = getMarkdownTheme();
 		container.addChild(border);
 		container.addChild(new Text(theme.fg("accent", theme.bold("Conversation Summary")), 1, 0));
 		container.addChild(new Markdown(summary, 1, 1, mdTheme));
 		container.addChild(new Text(theme.fg("dim", "Press Enter or Esc to close"), 1, 0));
 		container.addChild(border);
 		return {
 			render: (width: number) => container.render(width),
 			invalidate: () => container.invalidate(),
 			handleInput: (data: string) => {
 				if (matchesKey(data, "enter") || matchesKey(data, "escape")) {
 					done(undefined);
 				}
 			},
 		};
 	});
 };
 export default function (pi: ExtensionAPI) {
 	pi.registerCommand("summarize", {
 		description: "Summarize the current conversation in a custom UI",
 		handler: async (_args, ctx) => {
 			const branch = ctx.sessionManager.getBranch();
 			const conversationText = buildConversationText(branch);
 			if (!conversationText.trim()) {
 				if (ctx.hasUI) {
 					ctx.ui.notify("No conversation text found", "warning");
 				}
 				return;
 			}
 			if (ctx.hasUI) {
 				ctx.ui.notify("Preparing summary...", "info");
 			}
 			const model = getModel("openai", "gpt-5.2");
 			if (!model && ctx.hasUI) {
 				ctx.ui.notify("Model openai/gpt-5.2 not found", "warning");
 			}
 			const apiKey = model ? await ctx.modelRegistry.getApiKey(model) : undefined;
 			if (!apiKey && ctx.hasUI) {
 				ctx.ui.notify("No API key for openai/gpt-5.2", "warning");
 			}
 			if (!model || !apiKey) {
 				return;
 			}
 			const summaryMessages = [
 				{
 					role: "user" as const,
 					content: [{ type: "text" as const, text: buildSummaryPrompt(conversationText) }],
 					timestamp: Date.now(),
 				},
 			];
 			const response = await complete(model, { messages: summaryMessages }, { apiKey, reasoningEffort: "high" });
 			const summary = response.content
 				.filter((c): c is { type: "text"; text: string } => c.type === "text")
 				.map((c) => c.text)
 				.join("\n");
 			await showSummaryUi(summary, ctx);
 		},
 	});
 }
--- a/packages/coding-agent/examples/extensions/with-deps/package-lock.json
+++ b/packages/coding-agent/examples/extensions/with-deps/package-lock.json
@ -1,12 +1,12 @@
 {
  "name": "pi-extension-with-deps",
-  "version": "1.7.0",
+  "version": "1.9.5",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "pi-extension-with-deps",
-      "version": "1.7.0",
+      "version": "1.9.5",
      "dependencies": {
        "ms": "^2.1.3"
      },
--- a/packages/coding-agent/examples/extensions/with-deps/package.json
+++ b/packages/coding-agent/examples/extensions/with-deps/package.json
@ -1,7 +1,7 @@
 {
  "name": "pi-extension-with-deps",
  "private": true,
-  "version": "1.7.0",
+  "version": "1.9.5",
  "type": "module",
  "scripts": {
    "clean": "echo 'nothing to clean'",
--- a/packages/coding-agent/examples/sdk/README.md
+++ b/packages/coding-agent/examples/sdk/README.md
@ -37,9 +37,8 @@ import {
  discoverModels,
  discoverSkills,
  discoverExtensions,
  discoverCustomTools,
  discoverContextFiles,
-  discoverSlashCommands,
+  discoverPromptTemplates,
  loadSettings,
  buildSystemPrompt,
  ModelRegistry,
@ -92,7 +91,7 @@ const { session } = await createAgentSession({
  extensions: [{ factory: myExtension }],
  skills: [],
  contextFiles: [],
-  slashCommands: [],
+  promptTemplates: [],
  sessionManager: SessionManager.inMemory(),
 });
@ -123,7 +122,7 @@ await session.prompt("Hello");
 | `additionalExtensionPaths` | `[]` | Merge with discovery |
 | `skills` | Discovered | Skills for prompt |
 | `contextFiles` | Discovered | AGENTS.md files |
-| `slashCommands` | Discovered | File commands |
+| `promptTemplates` | Discovered | Prompt templates (slash commands) |
 | `sessionManager` | `SessionManager.create(cwd)` | Persistence |
 | `settingsManager` | From agentDir | Settings overrides |
--- a/packages/coding-agent/package.json
+++ b/packages/coding-agent/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-coding-agent",
-	"version": "0.43.0",
+	"version": "0.45.5",
 	"description": "Coding agent CLI with read, bash, edit, write tools and session management",
 	"type": "module",
 	"piConfig": {
@ -39,19 +39,19 @@
 	},
 	"dependencies": {
 		"@mariozechner/clipboard": "^0.3.0",
-		"@mariozechner/pi-agent-core": "^0.43.0",
+		"@mariozechner/jiti": "^2.6.2",
-		"@mariozechner/pi-ai": "^0.43.0",
+		"@mariozechner/pi-agent-core": "^0.45.5",
-		"@mariozechner/pi-tui": "^0.43.0",
+		"@mariozechner/pi-ai": "^0.45.5",
 		"@mariozechner/pi-tui": "^0.45.5",
 		"chalk": "^5.5.0",
 		"cli-highlight": "^2.1.11",
 		"diff": "^8.0.2",
 		"file-type": "^21.1.1",
 		"glob": "^11.0.3",
 		"jiti": "^2.6.1",
 		"marked": "^15.0.12",
 		"minimatch": "^10.1.1",
 		"proper-lockfile": "^4.1.2",
-		"sharp": "^0.34.2"
+		"wasm-vips": "^0.0.16"
 	},
 	"devDependencies": {
 		"@types/diff": "^7.0.2",
--- a/packages/coding-agent/src/cli/args.ts
+++ b/packages/coding-agent/src/cli/args.ts
@ -242,7 +242,15 @@ ${chalk.bold("Environment Variables:")}
  CEREBRAS_API_KEY        - Cerebras API key
  XAI_API_KEY             - xAI Grok API key
  OPENROUTER_API_KEY      - OpenRouter API key
  AI_GATEWAY_API_KEY      - Vercel AI Gateway API key
  ZAI_API_KEY             - ZAI API key
  MISTRAL_API_KEY         - Mistral API key
  MINIMAX_API_KEY         - MiniMax API key
  AWS_PROFILE             - AWS profile for Amazon Bedrock
  AWS_ACCESS_KEY_ID       - AWS access key for Amazon Bedrock
  AWS_SECRET_ACCESS_KEY   - AWS secret key for Amazon Bedrock
  AWS_BEARER_TOKEN_BEDROCK - Bedrock API key (bearer token)
  AWS_REGION              - AWS region for Amazon Bedrock (e.g., us-east-1)
  ${ENV_AGENT_DIR.padEnd(23)} - Session storage directory (default: ~/${CONFIG_DIR_NAME}/agent)
 ${chalk.bold("Available Tools (default: read, bash, edit, write):")}
--- a/packages/coding-agent/src/cli/list-models.ts
+++ b/packages/coding-agent/src/cli/list-models.ts
@ -25,7 +25,7 @@ function formatTokenCount(count: number): string {
 * List available models, optionally filtered by search pattern
 */
 export async function listModels(modelRegistry: ModelRegistry, searchPattern?: string): Promise<void> {
-	const models = await modelRegistry.getAvailable();
+	const models = modelRegistry.getAvailable();
 	if (models.length === 0) {
 		console.log("No models available. Set API keys in environment variables.");
--- a/packages/coding-agent/src/core/agent-session.ts
+++ b/packages/coding-agent/src/core/agent-session.ts
@ -1520,8 +1520,8 @@ export class AgentSession {
 		if (isContextOverflow(message, contextWindow)) return false;
 		const err = message.errorMessage;
-		// Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable, connection error
+		// Match: overloaded_error, rate limit, 429, 500, 502, 503, 504, service unavailable, connection error, other side closed
-		return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error|connection.?error/i.test(
+		return /overloaded|rate.?limit|too many requests|429|500|502|503|504|service.?unavailable|server error|internal error|connection.?error|other side closed/i.test(
 			err,
 		);
 	}
--- a/packages/coding-agent/src/core/extensions/loader.ts
+++ b/packages/coding-agent/src/core/extensions/loader.ts
@ -1,5 +1,7 @@
 /**
 * Extension loader - loads TypeScript extension modules using jiti.
 *
 * Uses @mariozechner/jiti fork with virtualModules support for compiled Bun binaries.
 */
 import * as fs from "node:fs";
@ -7,9 +9,19 @@ import { createRequire } from "node:module";
 import * as os from "node:os";
 import * as path from "node:path";
 import { fileURLToPath } from "node:url";
 import { createJiti } from "@mariozechner/jiti";
 import * as _bundledPiAgentCore from "@mariozechner/pi-agent-core";
 import * as _bundledPiAi from "@mariozechner/pi-ai";
 import type { KeyId } from "@mariozechner/pi-tui";
-import { createJiti } from "jiti";
+import * as _bundledPiTui from "@mariozechner/pi-tui";
 // Static imports of packages that extensions may use.
 // These MUST be static so Bun bundles them into the compiled binary.
 // The virtualModules option then makes them available to extensions.
 import * as _bundledTypebox from "@sinclair/typebox";
 import { getAgentDir, isBunBinary } from "../../config.js";
 // NOTE: This import works because loader.ts exports are NOT re-exported from index.ts,
 // avoiding a circular dependency. Extensions can import from @mariozechner/pi-coding-agent.
 import * as _bundledPiCodingAgent from "../../index.js";
 import { createEventBus, type EventBus } from "../event-bus.js";
 import type { ExecOptions } from "../exec.js";
 import { execCommand } from "../exec.js";
@ -24,8 +36,21 @@ import type {
 	ToolDefinition,
 } from "./types.js";
 /** Modules available to extensions via virtualModules (for compiled Bun binary) */
 const VIRTUAL_MODULES: Record<string, unknown> = {
 	"@sinclair/typebox": _bundledTypebox,
 	"@mariozechner/pi-agent-core": _bundledPiAgentCore,
 	"@mariozechner/pi-tui": _bundledPiTui,
 	"@mariozechner/pi-ai": _bundledPiAi,
 	"@mariozechner/pi-coding-agent": _bundledPiCodingAgent,
 };
 const require = createRequire(import.meta.url);
 /**
 * Get aliases for jiti (used in Node.js/development mode).
 * In Bun binary mode, virtualModules is used instead.
 */
 let _aliases: Record<string, string> | null = null;
 function getAliases(): Record<string, string> {
 	if (_aliases) return _aliases;
@ -38,11 +63,12 @@ function getAliases(): Record<string, string> {
 	_aliases = {
 		"@mariozechner/pi-coding-agent": packageIndex,
-		"@mariozechner/pi-coding-agent/extensions": path.resolve(__dirname, "index.js"),
+		"@mariozechner/pi-agent-core": require.resolve("@mariozechner/pi-agent-core"),
 		"@mariozechner/pi-tui": require.resolve("@mariozechner/pi-tui"),
 		"@mariozechner/pi-ai": require.resolve("@mariozechner/pi-ai"),
 		"@sinclair/typebox": typeboxRoot,
 	};
 	return _aliases;
 }
@ -213,18 +239,15 @@ function createExtensionAPI(
 	return api;
 }
-async function loadBun(path: string) {
+async function loadExtensionModule(extensionPath: string) {
 	const module = await import(path);
 	const factory = (module.default ?? module) as ExtensionFactory;
 	return typeof factory !== "function" ? undefined : factory;
 }
 async function loadJiti(path: string) {
 	const jiti = createJiti(import.meta.url, {
-		alias: getAliases(),
+		// In Bun binary: use virtualModules for bundled packages (no filesystem resolution)
 		// Also disable tryNative so jiti handles ALL imports (not just the entry point)
 		// In Node.js/dev: use aliases to resolve to node_modules paths
 		...(isBunBinary ? { virtualModules: VIRTUAL_MODULES, tryNative: false } : { alias: getAliases() }),
 	});
-	const module = await jiti.import(path, { default: true });
+	const module = await jiti.import(extensionPath, { default: true });
 	const factory = module as ExtensionFactory;
 	return typeof factory !== "function" ? undefined : factory;
 }
@ -254,7 +277,7 @@ async function loadExtension(
 	const resolvedPath = resolvePath(extensionPath, cwd);
 	try {
-		const factory = isBunBinary ? await loadBun(resolvedPath) : await loadJiti(resolvedPath);
+		const factory = await loadExtensionModule(resolvedPath);
 		if (!factory) {
 			return { extension: null, error: `Extension does not export a valid factory function: ${extensionPath}` };
 		}
--- a/packages/coding-agent/src/core/model-registry.ts
+++ b/packages/coding-agent/src/core/model-registry.ts
@ -37,6 +37,7 @@ const ModelDefinitionSchema = Type.Object({
 			Type.Literal("openai-codex-responses"),
 			Type.Literal("anthropic-messages"),
 			Type.Literal("google-generative-ai"),
 			Type.Literal("bedrock-converse-stream"),
 		]),
 	),
 	reasoning: Type.Boolean(),
@ -63,6 +64,7 @@ const ProviderConfigSchema = Type.Object({
 			Type.Literal("openai-codex-responses"),
 			Type.Literal("anthropic-messages"),
 			Type.Literal("google-generative-ai"),
 			Type.Literal("bedrock-converse-stream"),
 		]),
 	),
 	headers: Type.Optional(Type.Record(Type.String(), Type.String())),
@ -373,6 +375,13 @@ export class ModelRegistry {
 		return this.authStorage.getApiKey(model.provider);
 	}
 	/**
 	 * Get API key for a provider.
 	 */
 	async getApiKeyForProvider(provider: string): Promise<string | undefined> {
 		return this.authStorage.getApiKey(provider);
 	}
 	/**
 	 * Check if a model is using OAuth credentials (subscription).
 	 */
--- a/packages/coding-agent/src/core/model-resolver.ts
+++ b/packages/coding-agent/src/core/model-resolver.ts
@ -11,6 +11,7 @@ import type { ModelRegistry } from "./model-registry.js";
 /** Default model IDs for each known provider */
 export const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"amazon-bedrock": "global.anthropic.claude-sonnet-4-5-20250929-v1:0",
 	anthropic: "claude-sonnet-4-5",
 	openai: "gpt-5.1-codex",
 	"openai-codex": "gpt-5.2-codex",
@ -20,11 +21,13 @@ export const defaultModelPerProvider: Record<KnownProvider, string> = {
 	"google-vertex": "gemini-3-pro-preview",
 	"github-copilot": "gpt-4o",
 	openrouter: "openai/gpt-5.1-codex",
 	"vercel-ai-gateway": "anthropic/claude-opus-4.5",
 	xai: "grok-4-fast-non-reasoning",
 	groq: "openai/gpt-oss-120b",
 	cerebras: "zai-glm-4.6",
 	zai: "glm-4.6",
 	mistral: "devstral-medium-latest",
 	minimax: "MiniMax-M2.1",
 	opencode: "claude-opus-4-5",
 };
--- a/packages/coding-agent/src/core/sdk.ts
+++ b/packages/coding-agent/src/core/sdk.ts
@ -628,14 +628,16 @@ export async function createAgentSession(options: CreateAgentSessionOptions = {}
 		steeringMode: settingsManager.getSteeringMode(),
 		followUpMode: settingsManager.getFollowUpMode(),
 		thinkingBudgets: settingsManager.getThinkingBudgets(),
-		getApiKey: async () => {
+		getApiKey: async (provider) => {
-			const currentModel = agent.state.model;
+			// Use the provider argument from the in-flight request;
-			if (!currentModel) {
+			// agent.state.model may already be switched mid-turn.
 			const resolvedProvider = provider || agent.state.model?.provider;
 			if (!resolvedProvider) {
 				throw new Error("No model selected");
 			}
-			const key = await modelRegistry.getApiKey(currentModel);
+			const key = await modelRegistry.getApiKeyForProvider(resolvedProvider);
 			if (!key) {
-				throw new Error(`No API key found for provider "${currentModel.provider}"`);
+				throw new Error(`No API key found for provider "${resolvedProvider}"`);
 			}
 			return key;
 		},
--- a/packages/coding-agent/src/index.ts
+++ b/packages/coding-agent/src/index.ts
@ -88,8 +88,6 @@ export type {
 	UserBashEventResult,
 } from "./core/extensions/index.js";
 export {
 	createExtensionRuntime,
 	discoverAndLoadExtensions,
 	ExtensionRunner,
 	isBashToolResult,
 	isEditToolResult,
@ -98,7 +96,6 @@ export {
 	isLsToolResult,
 	isReadToolResult,
 	isWriteToolResult,
 	loadExtensions,
 	wrapRegisteredTool,
 	wrapRegisteredTools,
 	wrapToolsWithExtensions,
--- a/packages/coding-agent/src/modes/interactive/components/scoped-models-selector.ts
+++ b/packages/coding-agent/src/modes/interactive/components/scoped-models-selector.ts
@ -12,6 +12,55 @@ import {
 import { theme } from "../theme/theme.js";
 import { DynamicBorder } from "./dynamic-border.js";
 // EnabledIds: null = all enabled (no filter), string[] = explicit ordered list
 type EnabledIds = string[] | null;
 function isEnabled(enabledIds: EnabledIds, id: string): boolean {
 	return enabledIds === null || enabledIds.includes(id);
 }
 function toggle(enabledIds: EnabledIds, id: string): EnabledIds {
 	if (enabledIds === null) return [id]; // First toggle: start with only this one
 	const index = enabledIds.indexOf(id);
 	if (index >= 0) return [...enabledIds.slice(0, index), ...enabledIds.slice(index + 1)];
 	return [...enabledIds, id];
 }
 function enableAll(enabledIds: EnabledIds, allIds: string[], targetIds?: string[]): EnabledIds {
 	if (enabledIds === null) return null; // Already all enabled
 	const targets = targetIds ?? allIds;
 	const result = [...enabledIds];
 	for (const id of targets) {
 		if (!result.includes(id)) result.push(id);
 	}
 	return result.length === allIds.length ? null : result;
 }
 function clearAll(enabledIds: EnabledIds, allIds: string[], targetIds?: string[]): EnabledIds {
 	if (enabledIds === null) {
 		return targetIds ? allIds.filter((id) => !targetIds.includes(id)) : [];
 	}
 	const targets = new Set(targetIds ?? enabledIds);
 	return enabledIds.filter((id) => !targets.has(id));
 }
 function move(enabledIds: EnabledIds, allIds: string[], id: string, delta: number): EnabledIds {
 	const list = enabledIds ?? [...allIds];
 	const index = list.indexOf(id);
 	if (index < 0) return list;
 	const newIndex = index + delta;
 	if (newIndex < 0 || newIndex >= list.length) return list;
 	const result = [...list];
 	[result[index], result[newIndex]] = [result[newIndex], result[index]];
 	return result;
 }
 function getSortedIds(enabledIds: EnabledIds, allIds: string[]): string[] {
 	if (enabledIds === null) return allIds;
 	const enabledSet = new Set(enabledIds);
 	return [...enabledIds, ...allIds.filter((id) => !enabledSet.has(id))];
 }
 interface ModelItem {
 	fullId: string;
 	model: Model<any>;
@ -44,7 +93,9 @@ export interface ModelsCallbacks {
 * Changes are session-only until explicitly persisted with Ctrl+S.
 */
 export class ScopedModelsSelectorComponent extends Container {
-	private items: ModelItem[] = [];
+	private modelsById: Map<string, Model<any>> = new Map();
 	private allIds: string[] = [];
 	private enabledIds: EnabledIds = null;
 	private filteredItems: ModelItem[] = [];
 	private selectedIndex = 0;
 	private searchInput: Input;
@ -58,28 +109,14 @@ export class ScopedModelsSelectorComponent extends Container {
 		super();
 		this.callbacks = callbacks;
 		// Group models by provider for organized display
 		const modelsByProvider = new Map<string, Model<any>[]>();
 		for (const model of config.allModels) {
-			const list = modelsByProvider.get(model.provider) ?? [];
+			const fullId = `${model.provider}/${model.id}`;
-			list.push(model);
+			this.modelsById.set(fullId, model);
-			modelsByProvider.set(model.provider, list);
+			this.allIds.push(fullId);
 		}
-		// Build items - group by provider
+		this.enabledIds = config.hasEnabledModelsFilter ? [...config.enabledModelIds] : null;
-		for (const [provider, models] of modelsByProvider) {
+		this.filteredItems = this.buildItems();
 			for (const model of models) {
 				const fullId = `${provider}/${model.id}`;
 				// If no filter defined, all models are enabled by default
 				const isEnabled = !config.hasEnabledModelsFilter || config.enabledModelIds.has(fullId);
 				this.items.push({
 					fullId,
 					model,
 					enabled: isEnabled,
 				});
 			}
 		}
 		this.filteredItems = this.getSortedItems();
 		// Header
 		this.addChild(new DynamicBorder());
@ -103,41 +140,34 @@ export class ScopedModelsSelectorComponent extends Container {
 		this.addChild(this.footerText);
 		this.addChild(new DynamicBorder());
 		this.updateList();
 	}
-	/** Get items sorted with enabled items first */
+	private buildItems(): ModelItem[] {
-	private getSortedItems(): ModelItem[] {
+		return getSortedIds(this.enabledIds, this.allIds).map((id) => ({
-		const enabled = this.items.filter((i) => i.enabled);
+			fullId: id,
-		const disabled = this.items.filter((i) => !i.enabled);
+			model: this.modelsById.get(id)!,
-		return [...enabled, ...disabled];
+			enabled: isEnabled(this.enabledIds, id),
 		}));
 	}
 	private getFooterText(): string {
-		const enabledCount = this.items.filter((i) => i.enabled).length;
+		const enabledCount = this.enabledIds?.length ?? this.allIds.length;
-		const allEnabled = enabledCount === this.items.length;
+		const allEnabled = this.enabledIds === null;
-		const countText = allEnabled ? "all enabled" : `${enabledCount}/${this.items.length} enabled`;
+		const countText = allEnabled ? "all enabled" : `${enabledCount}/${this.allIds.length} enabled`;
-		const parts = ["Enter toggle", "^A all", "^X clear", "^P provider", "^S save", countText];
+		const parts = ["Enter toggle", "^A all", "^X clear", "^P provider", "Alt+↑↓ reorder", "^S save", countText];
-		if (this.isDirty) {
+		return this.isDirty
-			return theme.fg("dim", `  ${parts.join(" · ")} `) + theme.fg("warning", "(unsaved)");
+			? theme.fg("dim", `  ${parts.join(" · ")} `) + theme.fg("warning", "(unsaved)")
-		}
+			: theme.fg("dim", `  ${parts.join(" · ")}`);
 		return theme.fg("dim", `  ${parts.join(" · ")}`);
 	}
-	private updateFooter(): void {
+	private refresh(): void {
-		this.footerText.setText(this.getFooterText());
+		const query = this.searchInput.getValue();
-	}
+		const items = this.buildItems();
-
+		this.filteredItems = query ? fuzzyFilter(items, query, (i) => `${i.model.id} ${i.model.provider}`) : items;
 	private filterItems(query: string): void {
 		const sorted = this.getSortedItems();
 		if (!query) {
 			this.filteredItems = sorted;
 		} else {
 			this.filteredItems = fuzzyFilter(sorted, query, (item) => `${item.model.id} ${item.model.provider}`);
 		}
 		this.selectedIndex = Math.min(this.selectedIndex, Math.max(0, this.filteredItems.length - 1));
 		this.updateList();
 		this.footerText.setText(this.getFooterText());
 	}
 	private updateList(): void {
@ -153,53 +183,26 @@ export class ScopedModelsSelectorComponent extends Container {
 			Math.min(this.selectedIndex - Math.floor(this.maxVisible / 2), this.filteredItems.length - this.maxVisible),
 		);
 		const endIndex = Math.min(startIndex + this.maxVisible, this.filteredItems.length);
-
+		const allEnabled = this.enabledIds === null;
 		// Only show status if there's a filter (not all models enabled)
 		const allEnabled = this.items.every((i) => i.enabled);
 		for (let i = startIndex; i < endIndex; i++) {
-			const item = this.filteredItems[i];
+			const item = this.filteredItems[i]!;
 			if (!item) continue;
 			const isSelected = i === this.selectedIndex;
 			const prefix = isSelected ? theme.fg("accent", "→ ") : "  ";
 			const modelText = isSelected ? theme.fg("accent", item.model.id) : item.model.id;
 			const providerBadge = theme.fg("muted", ` [${item.model.provider}]`);
 			// Only show checkmarks when there's actually a filter
 			const status = allEnabled ? "" : item.enabled ? theme.fg("success", " ✓") : theme.fg("dim", " ✗");
 			this.listContainer.addChild(new Text(`${prefix}${modelText}${providerBadge}${status}`, 0, 0));
 		}
 		// Add scroll indicator if needed
 		if (startIndex > 0 || endIndex < this.filteredItems.length) {
-			const scrollInfo = theme.fg("muted", `  (${this.selectedIndex + 1}/${this.filteredItems.length})`);
+			this.listContainer.addChild(
-			this.listContainer.addChild(new Text(scrollInfo, 0, 0));
+				new Text(theme.fg("muted", `  (${this.selectedIndex + 1}/${this.filteredItems.length})`), 0, 0),
 			);
 		}
 	}
 	private toggleItem(item: ModelItem): void {
 		// If all models are currently enabled (no scope yet), first toggle starts fresh:
 		// clear all and enable only the selected model
 		const allEnabled = this.items.every((i) => i.enabled);
 		if (allEnabled) {
 			for (const i of this.items) {
 				i.enabled = false;
 			}
 			item.enabled = true;
 			this.isDirty = true;
 			this.callbacks.onClearAll();
 			this.callbacks.onModelToggle(item.fullId, true);
 		} else {
 			item.enabled = !item.enabled;
 			this.isDirty = true;
 			this.callbacks.onModelToggle(item.fullId, item.enabled);
 		}
 		// Re-sort and re-filter to move item to correct section
 		this.filterItems(this.searchInput.getValue());
 		this.updateFooter();
 	}
 	handleInput(data: string): void {
 		const kb = getEditorKeybindings();
@ -217,70 +220,81 @@ export class ScopedModelsSelectorComponent extends Container {
 			return;
 		}
 		// Alt+Up/Down - Reorder enabled models
 		if (matchesKey(data, Key.alt("up")) || matchesKey(data, Key.alt("down"))) {
 			const item = this.filteredItems[this.selectedIndex];
 			if (item && isEnabled(this.enabledIds, item.fullId)) {
 				const delta = matchesKey(data, Key.alt("up")) ? -1 : 1;
 				const enabledList = this.enabledIds ?? this.allIds;
 				const currentIndex = enabledList.indexOf(item.fullId);
 				const newIndex = currentIndex + delta;
 				// Only move if within bounds
 				if (newIndex >= 0 && newIndex < enabledList.length) {
 					this.enabledIds = move(this.enabledIds, this.allIds, item.fullId, delta);
 					this.isDirty = true;
 					this.selectedIndex += delta;
 					this.refresh();
 				}
 			}
 			return;
 		}
 		// Toggle on Enter
 		if (matchesKey(data, Key.enter)) {
 			const item = this.filteredItems[this.selectedIndex];
 			if (item) {
-				this.toggleItem(item);
+				const wasAllEnabled = this.enabledIds === null;
 				this.enabledIds = toggle(this.enabledIds, item.fullId);
 				this.isDirty = true;
 				if (wasAllEnabled) this.callbacks.onClearAll();
 				this.callbacks.onModelToggle(item.fullId, isEnabled(this.enabledIds, item.fullId));
 				this.refresh();
 			}
 			return;
 		}
 		// Ctrl+A - Enable all (filtered if search active, otherwise all)
 		if (matchesKey(data, Key.ctrl("a"))) {
-			const targets = this.searchInput.getValue() ? this.filteredItems : this.items;
+			const targetIds = this.searchInput.getValue() ? this.filteredItems.map((i) => i.fullId) : undefined;
-			for (const item of targets) {
+			this.enabledIds = enableAll(this.enabledIds, this.allIds, targetIds);
 				item.enabled = true;
 			}
 			this.isDirty = true;
-			this.callbacks.onEnableAll(targets.map((i) => i.fullId));
+			this.callbacks.onEnableAll(targetIds ?? this.allIds);
-			this.filterItems(this.searchInput.getValue());
+			this.refresh();
 			this.updateFooter();
 			return;
 		}
 		// Ctrl+X - Clear all (filtered if search active, otherwise all)
 		if (matchesKey(data, Key.ctrl("x"))) {
-			const targets = this.searchInput.getValue() ? this.filteredItems : this.items;
+			const targetIds = this.searchInput.getValue() ? this.filteredItems.map((i) => i.fullId) : undefined;
-			for (const item of targets) {
+			this.enabledIds = clearAll(this.enabledIds, this.allIds, targetIds);
 				item.enabled = false;
 			}
 			this.isDirty = true;
 			this.callbacks.onClearAll();
-			this.filterItems(this.searchInput.getValue());
+			this.refresh();
 			this.updateFooter();
 			return;
 		}
 		// Ctrl+P - Toggle provider of current item
 		if (matchesKey(data, Key.ctrl("p"))) {
-			const currentItem = this.filteredItems[this.selectedIndex];
+			const item = this.filteredItems[this.selectedIndex];
-			if (currentItem) {
+			if (item) {
-				const provider = currentItem.model.provider;
+				const provider = item.model.provider;
-				const providerItems = this.items.filter((i) => i.model.provider === provider);
+				const providerIds = this.allIds.filter((id) => this.modelsById.get(id)!.provider === provider);
-				const allEnabled = providerItems.every((i) => i.enabled);
+				const allEnabled = providerIds.every((id) => isEnabled(this.enabledIds, id));
-				const newState = !allEnabled;
+				this.enabledIds = allEnabled
-				for (const item of providerItems) {
+					? clearAll(this.enabledIds, this.allIds, providerIds)
-					item.enabled = newState;
+					: enableAll(this.enabledIds, this.allIds, providerIds);
 				}
 				this.isDirty = true;
-				this.callbacks.onToggleProvider(
+				this.callbacks.onToggleProvider(provider, providerIds, !allEnabled);
-					provider,
+				this.refresh();
 					providerItems.map((i) => i.fullId),
 					newState,
 				);
 				this.filterItems(this.searchInput.getValue());
 				this.updateFooter();
 			}
 			return;
 		}
 		// Ctrl+S - Save/persist to settings
 		if (matchesKey(data, Key.ctrl("s"))) {
-			const enabledIds = this.items.filter((i) => i.enabled).map((i) => i.fullId);
+			this.callbacks.onPersist(this.enabledIds ?? [...this.allIds]);
 			this.callbacks.onPersist(enabledIds);
 			this.isDirty = false;
-			this.updateFooter();
+			this.footerText.setText(this.getFooterText());
 			return;
 		}
@ -288,7 +302,7 @@ export class ScopedModelsSelectorComponent extends Container {
 		if (matchesKey(data, Key.ctrl("c"))) {
 			if (this.searchInput.getValue()) {
 				this.searchInput.setValue("");
-				this.filterItems("");
+				this.refresh();
 			} else {
 				this.callbacks.onCancel();
 			}
@ -303,7 +317,7 @@ export class ScopedModelsSelectorComponent extends Container {
 		// Pass everything else to search input
 		this.searchInput.handleInput(data);
-		this.filterItems(this.searchInput.getValue());
+		this.refresh();
 	}
 	getSearchInput(): Input {
--- a/packages/coding-agent/src/modes/interactive/interactive-mode.ts
+++ b/packages/coding-agent/src/modes/interactive/interactive-mode.ts
@ -603,10 +603,9 @@ export class InteractiveMode {
 		const entries = parseChangelog(changelogPath);
 		if (!lastVersion) {
-			if (entries.length > 0) {
+			// Fresh install - just record the version, don't show changelog
-				this.settingsManager.setLastChangelogVersion(VERSION);
+			this.settingsManager.setLastChangelogVersion(VERSION);
-				return entries.map((e) => e.content).join("\n\n");
+			return undefined;
 			}
 		} else {
 			const newEntries = getNewEntries(entries, lastVersion);
 			if (newEntries.length > 0) {
@ -3271,7 +3270,7 @@ export class InteractiveMode {
 			}
 			// Create the preview URL
-			const previewUrl = `https://shittycodingagent.ai/session?${gistId}`;
+			const previewUrl = `https://buildwithpi.ai/session?${gistId}`;
 			this.showStatus(`Share URL: ${previewUrl}\nGist: ${gistUrl}`);
 		} catch (error: unknown) {
 			if (!loader.signal.aborted) {
--- a/packages/coding-agent/src/modes/interactive/theme/light.json
+++ b/packages/coding-agent/src/modes/interactive/theme/light.json
@ -2,13 +2,13 @@
 	"$schema": "https://raw.githubusercontent.com/badlogic/pi-mono/main/packages/coding-agent/theme-schema.json",
 	"name": "light",
 	"vars": {
-		"teal": "#5f8787",
+		"teal": "#5a8080",
-		"blue": "#5f87af",
+		"blue": "#547da7",
-		"green": "#87af87",
+		"green": "#588458",
-		"red": "#af5f5f",
+		"red": "#aa5555",
-		"yellow": "#d7af5f",
+		"yellow": "#9a7326",
 		"mediumGray": "#6c6c6c",
-		"dimGray": "#8a8a8a",
+		"dimGray": "#767676",
 		"lightGray": "#b0b0b0",
 		"selectedBg": "#d0d0e0",
 		"userMsgBg": "#e8e8e8",
@ -68,9 +68,9 @@
 		"syntaxPunctuation": "#000000",
 		"thinkingOff": "lightGray",
-		"thinkingMinimal": "#9e9e9e",
+		"thinkingMinimal": "#767676",
-		"thinkingLow": "#5f87af",
+		"thinkingLow": "blue",
-		"thinkingMedium": "#5f8787",
+		"thinkingMedium": "teal",
 		"thinkingHigh": "#875f87",
 		"thinkingXhigh": "#8b008b",
--- a/packages/coding-agent/src/modes/print-mode.ts
+++ b/packages/coding-agent/src/modes/print-mode.ts
@ -29,6 +29,12 @@ export interface PrintModeOptions {
 */
 export async function runPrintMode(session: AgentSession, options: PrintModeOptions): Promise<void> {
 	const { mode, messages = [], initialMessage, initialImages } = options;
 	if (mode === "json") {
 		const header = session.sessionManager.getHeader();
 		if (header) {
 			console.log(JSON.stringify(header));
 		}
 	}
 	// Set up extensions for print mode (no UI, no command context)
 	const extensionRunner = session.extensionRunner;
 	if (extensionRunner) {
--- a/packages/coding-agent/src/utils/image-convert.ts
+++ b/packages/coding-agent/src/utils/image-convert.ts
@ -1,3 +1,5 @@
 import { getVips } from "./vips.js";
 /**
 * Convert image to PNG format for terminal display.
 * Kitty graphics protocol requires PNG format (f=100).
@ -11,16 +13,23 @@ export async function convertToPng(
 		return { data: base64Data, mimeType };
 	}
 	const vips = await getVips();
 	if (!vips) {
 		// wasm-vips not available
 		return null;
 	}
 	try {
 		const sharp = (await import("sharp")).default;
 		const buffer = Buffer.from(base64Data, "base64");
-		const pngBuffer = await sharp(buffer).png().toBuffer();
+		const img = vips.Image.newFromBuffer(buffer);
 		const pngBuffer = img.writeToBuffer(".png");
 		img.delete();
 		return {
-			data: pngBuffer.toString("base64"),
+			data: Buffer.from(pngBuffer).toString("base64"),
 			mimeType: "image/png",
 		};
 	} catch {
-		// Sharp not available or conversion failed
+		// Conversion failed
 		return null;
 	}
 }
--- a/packages/coding-agent/src/utils/image-resize.ts
+++ b/packages/coding-agent/src/utils/image-resize.ts
@ -1,4 +1,5 @@
 import type { ImageContent } from "@mariozechner/pi-ai";
 import { getVips } from "./vips.js";
 export interface ImageResizeOptions {
 	maxWidth?: number; // Default: 2000
@ -29,9 +30,9 @@ const DEFAULT_OPTIONS: Required<ImageResizeOptions> = {
 /** Helper to pick the smaller of two buffers */
 function pickSmaller(
-	a: { buffer: Buffer; mimeType: string },
+	a: { buffer: Uint8Array; mimeType: string },
-	b: { buffer: Buffer; mimeType: string },
+	b: { buffer: Uint8Array; mimeType: string },
-): { buffer: Buffer; mimeType: string } {
+): { buffer: Uint8Array; mimeType: string } {
 	return a.buffer.length <= b.buffer.length ? a : b;
 }
@ -39,7 +40,7 @@ function pickSmaller(
 * Resize an image to fit within the specified max dimensions and file size.
 * Returns the original image if it already fits within the limits.
 *
- * Uses sharp for image processing. If sharp is not available (e.g., in some
+ * Uses wasm-vips for image processing. If wasm-vips is not available (e.g., in some
 * environments), returns the original image unchanged.
 *
 * Strategy for staying under maxBytes:
@ -52,12 +53,29 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
 	const opts = { ...DEFAULT_OPTIONS, ...options };
 	const buffer = Buffer.from(img.data, "base64");
-	let sharp: typeof import("sharp") | undefined;
+	const vipsOrNull = await getVips();
 	if (!vipsOrNull) {
 		// wasm-vips not available - return original image
 		// We can't get dimensions without vips, so return 0s
 		return {
 			data: img.data,
 			mimeType: img.mimeType,
 			originalWidth: 0,
 			originalHeight: 0,
 			width: 0,
 			height: 0,
 			wasResized: false,
 		};
 	}
 	// Capture non-null reference for use in nested functions
 	const vips = vipsOrNull;
 	// Load image to get metadata
 	let sourceImg: InstanceType<typeof vips.Image>;
 	try {
-		sharp = (await import("sharp")).default;
+		sourceImg = vips.Image.newFromBuffer(buffer);
 	} catch {
-		// Sharp not available - return original image
+		// Failed to load image
 		// We can't get dimensions without sharp, so return 0s
 		return {
 			data: img.data,
 			mimeType: img.mimeType,
@ -69,16 +87,14 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
 		};
 	}
-	const sharpImg = sharp(buffer);
+	const originalWidth = sourceImg.width;
-	const metadata = await sharpImg.metadata();
+	const originalHeight = sourceImg.height;
 	const originalWidth = metadata.width ?? 0;
 	const originalHeight = metadata.height ?? 0;
 	const format = metadata.format ?? img.mimeType?.split("/")[1] ?? "png";
 	// Check if already within all limits (dimensions AND size)
 	const originalSize = buffer.length;
 	if (originalWidth <= opts.maxWidth && originalHeight <= opts.maxHeight && originalSize <= opts.maxBytes) {
 		sourceImg.delete();
 		const format = img.mimeType?.split("/")[1] ?? "png";
 		return {
 			data: img.data,
 			mimeType: img.mimeType ?? `image/${format}`,
@ -104,37 +120,45 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
 	}
 	// Helper to resize and encode in both formats, returning the smaller one
-	async function tryBothFormats(
+	function tryBothFormats(
 		width: number,
 		height: number,
 		jpegQuality: number,
-	): Promise<{ buffer: Buffer; mimeType: string }> {
+	): { buffer: Uint8Array; mimeType: string } {
-		const resized = await sharp!(buffer)
+		// Load image fresh and resize using scale factor
-			.resize(width, height, { fit: "inside", withoutEnlargement: true })
+		// (Using newFromBuffer + resize instead of thumbnailBuffer to avoid lazy re-read issues)
-			.toBuffer();
+		const img = vips.Image.newFromBuffer(buffer);
 		const scale = Math.min(width / img.width, height / img.height);
 		const resized = scale < 1 ? img.resize(scale) : img;
-		const [pngBuffer, jpegBuffer] = await Promise.all([
+		const pngBuffer = resized.writeToBuffer(".png");
-			sharp!(resized).png({ compressionLevel: 9 }).toBuffer(),
+		const jpegBuffer = resized.writeToBuffer(".jpg", { Q: jpegQuality });
-			sharp!(resized).jpeg({ quality: jpegQuality }).toBuffer(),
+
-		]);
+		if (resized !== img) {
 			resized.delete();
 		}
 		img.delete();
 		return pickSmaller({ buffer: pngBuffer, mimeType: "image/png" }, { buffer: jpegBuffer, mimeType: "image/jpeg" });
 	}
 	// Clean up the source image
 	sourceImg.delete();
 	// Try to produce an image under maxBytes
 	const qualitySteps = [85, 70, 55, 40];
 	const scaleSteps = [1.0, 0.75, 0.5, 0.35, 0.25];
-	let best: { buffer: Buffer; mimeType: string };
+	let best: { buffer: Uint8Array; mimeType: string };
 	let finalWidth = targetWidth;
 	let finalHeight = targetHeight;
 	// First attempt: resize to target dimensions, try both formats
-	best = await tryBothFormats(targetWidth, targetHeight, opts.jpegQuality);
+	best = tryBothFormats(targetWidth, targetHeight, opts.jpegQuality);
 	if (best.buffer.length <= opts.maxBytes) {
 		return {
-			data: best.buffer.toString("base64"),
+			data: Buffer.from(best.buffer).toString("base64"),
 			mimeType: best.mimeType,
 			originalWidth,
 			originalHeight,
@ -146,11 +170,11 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
 	// Still too large - try JPEG with decreasing quality (and compare to PNG each time)
 	for (const quality of qualitySteps) {
-		best = await tryBothFormats(targetWidth, targetHeight, quality);
+		best = tryBothFormats(targetWidth, targetHeight, quality);
 		if (best.buffer.length <= opts.maxBytes) {
 			return {
-				data: best.buffer.toString("base64"),
+				data: Buffer.from(best.buffer).toString("base64"),
 				mimeType: best.mimeType,
 				originalWidth,
 				originalHeight,
@ -172,11 +196,11 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
 		}
 		for (const quality of qualitySteps) {
-			best = await tryBothFormats(finalWidth, finalHeight, quality);
+			best = tryBothFormats(finalWidth, finalHeight, quality);
 			if (best.buffer.length <= opts.maxBytes) {
 				return {
-					data: best.buffer.toString("base64"),
+					data: Buffer.from(best.buffer).toString("base64"),
 					mimeType: best.mimeType,
 					originalWidth,
 					originalHeight,
@ -191,7 +215,7 @@ export async function resizeImage(img: ImageContent, options?: ImageResizeOption
 	// Last resort: return smallest version we produced even if over limit
 	// (the API will reject it, but at least we tried everything)
 	return {
-		data: best.buffer.toString("base64"),
+		data: Buffer.from(best.buffer).toString("base64"),
 		mimeType: best.mimeType,
 		originalWidth,
 		originalHeight,
--- a/packages/coding-agent/src/utils/vips.ts
+++ b/packages/coding-agent/src/utils/vips.ts
@ -0,0 +1,40 @@
 /**
 * Singleton wrapper for wasm-vips initialization.
 * wasm-vips requires async initialization, so we cache the instance.
 */
 import type Vips from "wasm-vips";
 let vipsInstance: Awaited<ReturnType<typeof Vips>> | null = null;
 let vipsInitPromise: Promise<Awaited<ReturnType<typeof Vips>> | null> | null = null;
 /**
 * Get the initialized wasm-vips instance.
 * Returns null if wasm-vips is not available or fails to initialize.
 */
 export async function getVips(): Promise<Awaited<ReturnType<typeof Vips>> | null> {
 	if (vipsInstance) {
 		return vipsInstance;
 	}
 	if (vipsInitPromise) {
 		return vipsInitPromise;
 	}
 	vipsInitPromise = (async () => {
 		try {
 			const VipsInit = (await import("wasm-vips")).default;
 			vipsInstance = await VipsInit();
 			return vipsInstance;
 		} catch {
 			// wasm-vips not available
 			return null;
 		}
 	})();
 	const result = await vipsInitPromise;
 	if (!result) {
 		vipsInitPromise = null; // Allow retry on failure
 	}
 	return result;
 }
--- a/packages/coding-agent/test/image-processing.test.ts
+++ b/packages/coding-agent/test/image-processing.test.ts
@ -0,0 +1,144 @@
 /**
 * Tests for image processing utilities using wasm-vips.
 */
 import { describe, expect, it } from "vitest";
 import { convertToPng } from "../src/utils/image-convert.js";
 import { formatDimensionNote, resizeImage } from "../src/utils/image-resize.js";
 import { getVips } from "../src/utils/vips.js";
 // Small 2x2 red PNG image (base64)
 const TINY_PNG = "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACCAIAAAD91JpzAAAADklEQVQI12P4z8DAwMAAAA0BA/m5sb9AAAAAAElFTkSuQmCC";
 // Small 2x2 blue JPEG image (base64)
 const TINY_JPEG =
 	"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAACAAIDASIAAhEBAxEB/8QAFQABAQAAAAAAAAAAAAAAAAAAAAn/xAAUEAEAAAAAAAAAAAAAAAAAAAAA/8QAFQEBAQAAAAAAAAAAAAAAAAAAAAX/xAAUEQEAAAAAAAAAAAAAAAAAAAAA/9oADAMBAAIRAxEAPwCwAB//2Q==";
 // 100x100 gray PNG (generated with wasm-vips)
 const MEDIUM_PNG_100x100 =
 	"iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAIAAAD/gAIDAAAACXBIWXMAAAPoAAAD6AG1e1JrAAAAtGVYSWZJSSoACAAAAAYAEgEDAAEAAAABAAAAGgEFAAEAAABWAAAAGwEFAAEAAABeAAAAKAEDAAEAAAACAAAAEwIDAAEAAAABAAAAaYcEAAEAAABmAAAAAAAAADhjAADoAwAAOGMAAOgDAAAGAACQBwAEAAAAMDIxMAGRBwAEAAAAAQIDAACgBwAEAAAAMDEwMAGgAwABAAAA//8AAAKgBAABAAAAZAAAAAOgBAABAAAAZAAAAAAAAAC1xMTxAAAA4klEQVR4nO3QoQEAAAiAME/3dF+QvmUSs7zNP8WswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKzArMCswKz9zzpHfptnWvrkoQAAAABJRU5ErkJggg==";
 // 200x200 colored PNG (generated with wasm-vips)
 const LARGE_PNG_200x200 =
 	"iVBORw0KGgoAAAANSUhEUgAAAMgAAADICAIAAAAiOjnJAAAACXBIWXMAAAPoAAAD6AG1e1JrAAAAtGVYSWZJSSoACAAAAAYAEgEDAAEAAAABAAAAGgEFAAEAAABWAAAAGwEFAAEAAABeAAAAKAEDAAEAAAACAAAAEwIDAAEAAAABAAAAaYcEAAEAAABmAAAAAAAAADhjAADoAwAAOGMAAOgDAAAGAACQBwAEAAAAMDIxMAGRBwAEAAAAAQIDAACgBwAEAAAAMDEwMAGgAwABAAAA//8AAAKgBAABAAAAyAAAAAOgBAABAAAAyAAAAAAAAADqHRv+AAAD8UlEQVR4nO2UAQnAQACEFtZMy/SxVmJDdggmOOUu7hMtwNsZXG3aAnxwLoVVWKewiuD85V97LN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN8BixSW74BFCst3wCKF5TtgkcLyHbBIYfkOWKSwfAcsUli+AxYpLN/BJIXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5YpLB8ByxSWL4DFiks3wGLFJbvgEUKy3fAIoXlO2CRwvIdsEhh+Q5Y5AHNA7iPx5BmcQAAAABJRU5ErkJggg==";
 describe("wasm-vips initialization", () => {
 	it("should initialize wasm-vips successfully", async () => {
 		const vips = await getVips();
 		expect(vips).not.toBeNull();
 	});
 	it("should return cached instance on subsequent calls", async () => {
 		const vips1 = await getVips();
 		const vips2 = await getVips();
 		expect(vips1).toBe(vips2);
 	});
 });
 describe("convertToPng", () => {
 	it("should return original data for PNG input", async () => {
 		const result = await convertToPng(TINY_PNG, "image/png");
 		expect(result).not.toBeNull();
 		expect(result!.data).toBe(TINY_PNG);
 		expect(result!.mimeType).toBe("image/png");
 	});
 	it("should convert JPEG to PNG", async () => {
 		const result = await convertToPng(TINY_JPEG, "image/jpeg");
 		expect(result).not.toBeNull();
 		expect(result!.mimeType).toBe("image/png");
 		// Result should be valid base64
 		expect(() => Buffer.from(result!.data, "base64")).not.toThrow();
 		// PNG magic bytes
 		const buffer = Buffer.from(result!.data, "base64");
 		expect(buffer[0]).toBe(0x89);
 		expect(buffer[1]).toBe(0x50); // 'P'
 		expect(buffer[2]).toBe(0x4e); // 'N'
 		expect(buffer[3]).toBe(0x47); // 'G'
 	});
 });
 describe("resizeImage", () => {
 	it("should return original image if within limits", async () => {
 		const result = await resizeImage(
 			{ type: "image", data: TINY_PNG, mimeType: "image/png" },
 			{ maxWidth: 100, maxHeight: 100, maxBytes: 1024 * 1024 },
 		);
 		expect(result.wasResized).toBe(false);
 		expect(result.data).toBe(TINY_PNG);
 		expect(result.originalWidth).toBe(2);
 		expect(result.originalHeight).toBe(2);
 		expect(result.width).toBe(2);
 		expect(result.height).toBe(2);
 	});
 	it("should resize image exceeding dimension limits", async () => {
 		const result = await resizeImage(
 			{ type: "image", data: MEDIUM_PNG_100x100, mimeType: "image/png" },
 			{ maxWidth: 50, maxHeight: 50, maxBytes: 1024 * 1024 },
 		);
 		expect(result.wasResized).toBe(true);
 		expect(result.originalWidth).toBe(100);
 		expect(result.originalHeight).toBe(100);
 		expect(result.width).toBeLessThanOrEqual(50);
 		expect(result.height).toBeLessThanOrEqual(50);
 	});
 	it("should resize image exceeding byte limit", async () => {
 		const originalBuffer = Buffer.from(LARGE_PNG_200x200, "base64");
 		const originalSize = originalBuffer.length;
 		// Set maxBytes to less than the original image size
 		const result = await resizeImage(
 			{ type: "image", data: LARGE_PNG_200x200, mimeType: "image/png" },
 			{ maxWidth: 2000, maxHeight: 2000, maxBytes: Math.floor(originalSize / 2) },
 		);
 		// Should have tried to reduce size
 		const resultBuffer = Buffer.from(result.data, "base64");
 		expect(resultBuffer.length).toBeLessThan(originalSize);
 	});
 	it("should handle JPEG input", async () => {
 		const result = await resizeImage(
 			{ type: "image", data: TINY_JPEG, mimeType: "image/jpeg" },
 			{ maxWidth: 100, maxHeight: 100, maxBytes: 1024 * 1024 },
 		);
 		expect(result.wasResized).toBe(false);
 		expect(result.originalWidth).toBe(2);
 		expect(result.originalHeight).toBe(2);
 	});
 });
 describe("formatDimensionNote", () => {
 	it("should return undefined for non-resized images", () => {
 		const note = formatDimensionNote({
 			data: "",
 			mimeType: "image/png",
 			originalWidth: 100,
 			originalHeight: 100,
 			width: 100,
 			height: 100,
 			wasResized: false,
 		});
 		expect(note).toBeUndefined();
 	});
 	it("should return formatted note for resized images", () => {
 		const note = formatDimensionNote({
 			data: "",
 			mimeType: "image/png",
 			originalWidth: 2000,
 			originalHeight: 1000,
 			width: 1000,
 			height: 500,
 			wasResized: true,
 		});
 		expect(note).toContain("original 2000x1000");
 		expect(note).toContain("displayed at 1000x500");
 		expect(note).toContain("2.00"); // scale factor
 	});
 });
--- a/packages/coding-agent/test/model-resolver.test.ts
+++ b/packages/coding-agent/test/model-resolver.test.ts
@ -1,6 +1,6 @@
 import type { Model } from "@mariozechner/pi-ai";
 import { describe, expect, test } from "vitest";
-import { parseModelPattern } from "../src/core/model-resolver.js";
+import { defaultModelPerProvider, findInitialModel, parseModelPattern } from "../src/core/model-resolver.js";
 // Mock models for testing
 const mockModels: Model<"anthropic-messages">[] = [
@ -200,3 +200,37 @@ describe("parseModelPattern", () => {
 		});
 	});
 });
 describe("default model selection", () => {
 	test("ai-gateway default is opus 4.5", () => {
 		expect(defaultModelPerProvider["vercel-ai-gateway"]).toBe("anthropic/claude-opus-4.5");
 	});
 	test("findInitialModel selects ai-gateway default when available", async () => {
 		const aiGatewayModel: Model<"anthropic-messages"> = {
 			id: "anthropic/claude-opus-4.5",
 			name: "Claude Opus 4.5",
 			api: "anthropic-messages",
 			provider: "vercel-ai-gateway",
 			baseUrl: "https://ai-gateway.vercel.sh",
 			reasoning: true,
 			input: ["text", "image"],
 			cost: { input: 5, output: 15, cacheRead: 0.5, cacheWrite: 5 },
 			contextWindow: 200000,
 			maxTokens: 8192,
 		};
 		const registry = {
 			getAvailable: async () => [aiGatewayModel],
 		} as unknown as Parameters<typeof findInitialModel>[0]["modelRegistry"];
 		const result = await findInitialModel({
 			scopedModels: [],
 			isContinuing: false,
 			modelRegistry: registry,
 		});
 		expect(result.model?.provider).toBe("vercel-ai-gateway");
 		expect(result.model?.id).toBe("anthropic/claude-opus-4.5");
 	});
 });
--- a/packages/coding-agent/test/plan-mode-utils.test.ts
+++ b/packages/coding-agent/test/plan-mode-utils.test.ts
@ -0,0 +1,261 @@
 import { describe, expect, it } from "vitest";
 import {
 	cleanStepText,
 	extractDoneSteps,
 	extractTodoItems,
 	isSafeCommand,
 	markCompletedSteps,
 	type TodoItem,
 } from "../examples/extensions/plan-mode/utils.js";
 describe("isSafeCommand", () => {
 	describe("safe commands", () => {
 		it("allows basic read commands", () => {
 			expect(isSafeCommand("ls -la")).toBe(true);
 			expect(isSafeCommand("cat file.txt")).toBe(true);
 			expect(isSafeCommand("head -n 10 file.txt")).toBe(true);
 			expect(isSafeCommand("tail -f log.txt")).toBe(true);
 			expect(isSafeCommand("grep pattern file")).toBe(true);
 			expect(isSafeCommand("find . -name '*.ts'")).toBe(true);
 		});
 		it("allows git read commands", () => {
 			expect(isSafeCommand("git status")).toBe(true);
 			expect(isSafeCommand("git log --oneline")).toBe(true);
 			expect(isSafeCommand("git diff")).toBe(true);
 			expect(isSafeCommand("git branch")).toBe(true);
 		});
 		it("allows npm/yarn read commands", () => {
 			expect(isSafeCommand("npm list")).toBe(true);
 			expect(isSafeCommand("npm outdated")).toBe(true);
 			expect(isSafeCommand("yarn info react")).toBe(true);
 		});
 		it("allows other safe commands", () => {
 			expect(isSafeCommand("pwd")).toBe(true);
 			expect(isSafeCommand("echo hello")).toBe(true);
 			expect(isSafeCommand("wc -l file.txt")).toBe(true);
 			expect(isSafeCommand("du -sh .")).toBe(true);
 			expect(isSafeCommand("df -h")).toBe(true);
 		});
 	});
 	describe("destructive commands", () => {
 		it("blocks file modification commands", () => {
 			expect(isSafeCommand("rm file.txt")).toBe(false);
 			expect(isSafeCommand("rm -rf dir")).toBe(false);
 			expect(isSafeCommand("mv old new")).toBe(false);
 			expect(isSafeCommand("cp src dst")).toBe(false);
 			expect(isSafeCommand("mkdir newdir")).toBe(false);
 			expect(isSafeCommand("touch newfile")).toBe(false);
 		});
 		it("blocks git write commands", () => {
 			expect(isSafeCommand("git add .")).toBe(false);
 			expect(isSafeCommand("git commit -m 'msg'")).toBe(false);
 			expect(isSafeCommand("git push")).toBe(false);
 			expect(isSafeCommand("git checkout main")).toBe(false);
 			expect(isSafeCommand("git reset --hard")).toBe(false);
 		});
 		it("blocks package manager installs", () => {
 			expect(isSafeCommand("npm install lodash")).toBe(false);
 			expect(isSafeCommand("yarn add react")).toBe(false);
 			expect(isSafeCommand("pip install requests")).toBe(false);
 			expect(isSafeCommand("brew install node")).toBe(false);
 		});
 		it("blocks redirects", () => {
 			expect(isSafeCommand("echo hello > file.txt")).toBe(false);
 			expect(isSafeCommand("cat foo >> bar")).toBe(false);
 			expect(isSafeCommand(">file.txt")).toBe(false);
 		});
 		it("blocks dangerous commands", () => {
 			expect(isSafeCommand("sudo rm -rf /")).toBe(false);
 			expect(isSafeCommand("kill -9 1234")).toBe(false);
 			expect(isSafeCommand("reboot")).toBe(false);
 		});
 		it("blocks editors", () => {
 			expect(isSafeCommand("vim file.txt")).toBe(false);
 			expect(isSafeCommand("nano file.txt")).toBe(false);
 			expect(isSafeCommand("code .")).toBe(false);
 		});
 	});
 	describe("edge cases", () => {
 		it("requires command to be in safe list (not just non-destructive)", () => {
 			expect(isSafeCommand("unknown-command")).toBe(false);
 			expect(isSafeCommand("my-script.sh")).toBe(false);
 		});
 		it("handles commands with leading whitespace", () => {
 			expect(isSafeCommand("  ls -la")).toBe(true);
 			expect(isSafeCommand("  rm file")).toBe(false);
 		});
 	});
 });
 describe("cleanStepText", () => {
 	it("removes markdown bold/italic", () => {
 		expect(cleanStepText("**bold text**")).toBe("Bold text");
 		expect(cleanStepText("*italic text*")).toBe("Italic text");
 	});
 	it("removes markdown code", () => {
 		expect(cleanStepText("run `npm install`")).toBe("Npm install"); // "run" is stripped as action word
 		expect(cleanStepText("check the `config.json` file")).toBe("Config.json file");
 	});
 	it("removes leading action words", () => {
 		expect(cleanStepText("Create the new file")).toBe("New file");
 		expect(cleanStepText("Run the tests")).toBe("Tests");
 		expect(cleanStepText("Check the status")).toBe("Status");
 	});
 	it("capitalizes first letter", () => {
 		expect(cleanStepText("update config")).toBe("Config");
 	});
 	it("truncates long text", () => {
 		const longText = "This is a very long step description that exceeds the maximum allowed length for display";
 		const result = cleanStepText(longText);
 		expect(result.length).toBe(50);
 		expect(result.endsWith("...")).toBe(true);
 	});
 	it("normalizes whitespace", () => {
 		expect(cleanStepText("multiple   spaces   here")).toBe("Multiple spaces here");
 	});
 });
 describe("extractTodoItems", () => {
 	it("extracts numbered items after Plan: header", () => {
 		const message = `Here's what we'll do:
 Plan:
 1. First step here
 2. Second step here
 3. Third step here`;
 		const items = extractTodoItems(message);
 		expect(items).toHaveLength(3);
 		expect(items[0].step).toBe(1);
 		expect(items[0].text).toBe("First step here");
 		expect(items[0].completed).toBe(false);
 	});
 	it("handles bold Plan header", () => {
 		const message = `**Plan:**
 1. Do something`;
 		const items = extractTodoItems(message);
 		expect(items).toHaveLength(1);
 	});
 	it("handles parenthesis-style numbering", () => {
 		const message = `Plan:
 1) First item
 2) Second item`;
 		const items = extractTodoItems(message);
 		expect(items).toHaveLength(2);
 	});
 	it("returns empty array without Plan header", () => {
 		const message = `Here are some steps:
 1. First step
 2. Second step`;
 		const items = extractTodoItems(message);
 		expect(items).toHaveLength(0);
 	});
 	it("filters out short items", () => {
 		const message = `Plan:
 1. OK
 2. This is a proper step`;
 		const items = extractTodoItems(message);
 		expect(items).toHaveLength(1);
 		expect(items[0].text).toContain("proper");
 	});
 	it("filters out code-like items", () => {
 		const message = `Plan:
 1. \`npm install\`
 2. Run the build process`;
 		const items = extractTodoItems(message);
 		expect(items).toHaveLength(1);
 	});
 });
 describe("extractDoneSteps", () => {
 	it("extracts single DONE marker", () => {
 		const message = "I've completed the first step [DONE:1]";
 		expect(extractDoneSteps(message)).toEqual([1]);
 	});
 	it("extracts multiple DONE markers", () => {
 		const message = "Did steps [DONE:1] and [DONE:2] and [DONE:3]";
 		expect(extractDoneSteps(message)).toEqual([1, 2, 3]);
 	});
 	it("handles case insensitivity", () => {
 		const message = "[done:1] [DONE:2] [Done:3]";
 		expect(extractDoneSteps(message)).toEqual([1, 2, 3]);
 	});
 	it("returns empty array with no markers", () => {
 		const message = "No markers here";
 		expect(extractDoneSteps(message)).toEqual([]);
 	});
 	it("ignores malformed markers", () => {
 		const message = "[DONE:abc] [DONE:] [DONE:1]";
 		expect(extractDoneSteps(message)).toEqual([1]);
 	});
 });
 describe("markCompletedSteps", () => {
 	it("marks matching items as completed", () => {
 		const items: TodoItem[] = [
 			{ step: 1, text: "First", completed: false },
 			{ step: 2, text: "Second", completed: false },
 			{ step: 3, text: "Third", completed: false },
 		];
 		const count = markCompletedSteps("[DONE:1] [DONE:3]", items);
 		expect(count).toBe(2);
 		expect(items[0].completed).toBe(true);
 		expect(items[1].completed).toBe(false);
 		expect(items[2].completed).toBe(true);
 	});
 	it("returns count of completed items", () => {
 		const items: TodoItem[] = [{ step: 1, text: "First", completed: false }];
 		expect(markCompletedSteps("[DONE:1]", items)).toBe(1);
 		expect(markCompletedSteps("no markers", items)).toBe(0);
 	});
 	it("ignores markers for non-existent steps", () => {
 		const items: TodoItem[] = [{ step: 1, text: "First", completed: false }];
 		const count = markCompletedSteps("[DONE:99]", items);
 		expect(count).toBe(1); // Still counts the marker found
 		expect(items[0].completed).toBe(false); // But doesn't mark anything
 	});
 	it("doesn't double-complete already completed items", () => {
 		const items: TodoItem[] = [{ step: 1, text: "First", completed: true }];
 		markCompletedSteps("[DONE:1]", items);
 		expect(items[0].completed).toBe(true);
 	});
 });
--- a/packages/coding-agent/test/test-theme-colors.ts
+++ b/packages/coding-agent/test/test-theme-colors.ts
@ -1,75 +1,246 @@
 import fs from "fs";
 import { initTheme, theme } from "../src/modes/interactive/theme/theme.js";
-// Initialize with dark theme explicitly
+// --- Color utilities ---
 process.env.COLORTERM = "truecolor";
 initTheme("dark");
-console.log("\n=== Foreground Colors ===\n");
+function hexToRgb(hex: string): [number, number, number] {
 	const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
 	return result ? [parseInt(result[1], 16), parseInt(result[2], 16), parseInt(result[3], 16)] : [0, 0, 0];
 }
-// Core UI colors
+function rgbToHex(r: number, g: number, b: number): string {
-console.log("accent:", theme.fg("accent", "Sample text"));
+	return (
-console.log("border:", theme.fg("border", "Sample text"));
+		"#" +
-console.log("borderAccent:", theme.fg("borderAccent", "Sample text"));
+		[r, g, b]
-console.log("borderMuted:", theme.fg("borderMuted", "Sample text"));
+			.map((x) =>
-console.log("success:", theme.fg("success", "Sample text"));
+				Math.round(Math.max(0, Math.min(255, x)))
-console.log("error:", theme.fg("error", "Sample text"));
+					.toString(16)
-console.log("warning:", theme.fg("warning", "Sample text"));
+					.padStart(2, "0"),
-console.log("muted:", theme.fg("muted", "Sample text"));
+			)
-console.log("dim:", theme.fg("dim", "Sample text"));
+			.join("")
-console.log("text:", theme.fg("text", "Sample text"));
+	);
 }
-console.log("\n=== Message Text Colors ===\n");
+function rgbToHsl(r: number, g: number, b: number): [number, number, number] {
-console.log("userMessageText:", theme.fg("userMessageText", "Sample text"));
+	r /= 255;
-console.log("toolTitle:", theme.fg("toolTitle", "Sample text"));
+	g /= 255;
-console.log("toolOutput:", theme.fg("toolOutput", "Sample text"));
+	b /= 255;
 	const max = Math.max(r, g, b),
 		min = Math.min(r, g, b);
 	let h = 0,
 		s = 0;
 	const l = (max + min) / 2;
 	if (max !== min) {
 		const d = max - min;
 		s = l > 0.5 ? d / (2 - max - min) : d / (max + min);
 		switch (max) {
 			case r:
 				h = ((g - b) / d + (g < b ? 6 : 0)) / 6;
 				break;
 			case g:
 				h = ((b - r) / d + 2) / 6;
 				break;
 			case b:
 				h = ((r - g) / d + 4) / 6;
 				break;
 		}
 	}
 	return [h, s, l];
 }
-console.log("\n=== Markdown Colors ===\n");
+function hslToRgb(h: number, s: number, l: number): [number, number, number] {
-console.log("mdHeading:", theme.fg("mdHeading", "Sample text"));
+	let r: number, g: number, b: number;
-console.log("mdLink:", theme.fg("mdLink", "Sample text"));
+	if (s === 0) {
-console.log("mdCode:", theme.fg("mdCode", "Sample text"));
+		r = g = b = l;
-console.log("mdCodeBlock:", theme.fg("mdCodeBlock", "Sample text"));
+	} else {
-console.log("mdCodeBlockBorder:", theme.fg("mdCodeBlockBorder", "Sample text"));
+		const hue2rgb = (p: number, q: number, t: number) => {
-console.log("mdQuote:", theme.fg("mdQuote", "Sample text"));
+			if (t < 0) t += 1;
-console.log("mdQuoteBorder:", theme.fg("mdQuoteBorder", "Sample text"));
+			if (t > 1) t -= 1;
-console.log("mdHr:", theme.fg("mdHr", "Sample text"));
+			if (t < 1 / 6) return p + (q - p) * 6 * t;
-console.log("mdListBullet:", theme.fg("mdListBullet", "Sample text"));
+			if (t < 1 / 2) return q;
 			if (t < 2 / 3) return p + (q - p) * (2 / 3 - t) * 6;
 			return p;
 		};
 		const q = l < 0.5 ? l * (1 + s) : l + s - l * s;
 		const p = 2 * l - q;
 		r = hue2rgb(p, q, h + 1 / 3);
 		g = hue2rgb(p, q, h);
 		b = hue2rgb(p, q, h - 1 / 3);
 	}
 	return [Math.round(r * 255), Math.round(g * 255), Math.round(b * 255)];
 }
-console.log("\n=== Tool Diff Colors ===\n");
+function getLuminance(r: number, g: number, b: number): number {
-console.log("toolDiffAdded:", theme.fg("toolDiffAdded", "Sample text"));
+	const lin = (c: number) => {
-console.log("toolDiffRemoved:", theme.fg("toolDiffRemoved", "Sample text"));
+		c = c / 255;
-console.log("toolDiffContext:", theme.fg("toolDiffContext", "Sample text"));
+		return c <= 0.03928 ? c / 12.92 : ((c + 0.055) / 1.055) ** 2.4;
 	};
 	return 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b);
 }
-console.log("\n=== Thinking Border Colors ===\n");
+function getContrast(rgb: [number, number, number], bgLum: number): number {
-console.log("thinkingOff:", theme.fg("thinkingOff", "Sample text"));
+	const fgLum = getLuminance(...rgb);
-console.log("thinkingMinimal:", theme.fg("thinkingMinimal", "Sample text"));
+	const lighter = Math.max(fgLum, bgLum);
-console.log("thinkingLow:", theme.fg("thinkingLow", "Sample text"));
+	const darker = Math.min(fgLum, bgLum);
-console.log("thinkingMedium:", theme.fg("thinkingMedium", "Sample text"));
+	return (lighter + 0.05) / (darker + 0.05);
-console.log("thinkingHigh:", theme.fg("thinkingHigh", "Sample text"));
+}
-console.log("\n=== Background Colors ===\n");
+function adjustColorToContrast(hex: string, targetContrast: number, againstWhite: boolean): string {
-console.log("userMessageBg:", theme.bg("userMessageBg", " Sample background text "));
+	const rgb = hexToRgb(hex);
-console.log("toolPendingBg:", theme.bg("toolPendingBg", " Sample background text "));
+	const [h, s] = rgbToHsl(...rgb);
-console.log("toolSuccessBg:", theme.bg("toolSuccessBg", " Sample background text "));
+	const bgLum = againstWhite ? 1.0 : 0.0;
 console.log("toolErrorBg:", theme.bg("toolErrorBg", " Sample background text "));
-console.log("\n=== Raw ANSI Codes ===\n");
+	let lo = againstWhite ? 0 : 0.5;
-console.log("thinkingMedium ANSI:", JSON.stringify(theme.getFgAnsi("thinkingMedium")));
+	let hi = againstWhite ? 0.5 : 1.0;
 console.log("accent ANSI:", JSON.stringify(theme.getFgAnsi("accent")));
 console.log("muted ANSI:", JSON.stringify(theme.getFgAnsi("muted")));
 console.log("dim ANSI:", JSON.stringify(theme.getFgAnsi("dim")));
-console.log("\n=== Direct RGB Test ===\n");
+	for (let i = 0; i < 50; i++) {
-console.log("Gray #6c6c6c: \x1b[38;2;108;108;108mSample text\x1b[0m");
+		const mid = (lo + hi) / 2;
-console.log("Gray #444444: \x1b[38;2;68;68;68mSample text\x1b[0m");
+		const testRgb = hslToRgb(h, s, mid);
-console.log("Gray #303030: \x1b[38;2;48;48;48mSample text\x1b[0m");
+		const contrast = getContrast(testRgb, bgLum);
-console.log("\n=== Hex Color Test ===\n");
+		if (againstWhite) {
-console.log("Direct #00d7ff test: \x1b[38;2;0;215;255mBRIGHT CYAN\x1b[0m");
+			if (contrast < targetContrast) hi = mid;
-console.log("Theme cyan (should match above):", theme.fg("accent", "BRIGHT CYAN"));
+			else lo = mid;
 		} else {
 			if (contrast < targetContrast) lo = mid;
 			else hi = mid;
 		}
 	}
-console.log("\n=== Environment ===\n");
+	const finalL = againstWhite ? lo : hi;
-console.log("TERM:", process.env.TERM);
+	return rgbToHex(...hslToRgb(h, s, finalL));
-console.log("COLORTERM:", process.env.COLORTERM);
+}
 console.log("Color mode:", theme.getColorMode());
-console.log("\n");
+function fgAnsi(hex: string): string {
 	const rgb = hexToRgb(hex);
 	return `\x1b[38;2;${rgb[0]};${rgb[1]};${rgb[2]}m`;
 }
 const reset = "\x1b[0m";
 // --- Commands ---
 function cmdContrast(targetContrast: number): void {
 	const baseColors = {
 		teal: "#5f8787",
 		blue: "#5f87af",
 		green: "#87af87",
 		yellow: "#d7af5f",
 		red: "#af5f5f",
 	};
 	console.log(`\n=== Colors adjusted to ${targetContrast}:1 contrast ===\n`);
 	console.log("For LIGHT theme (vs white):");
 	for (const [name, hex] of Object.entries(baseColors)) {
 		const adjusted = adjustColorToContrast(hex, targetContrast, true);
 		const rgb = hexToRgb(adjusted);
 		const contrast = getContrast(rgb, 1.0);
 		console.log(`  ${name.padEnd(8)} ${fgAnsi(adjusted)}Sample${reset}  ${adjusted}  (${contrast.toFixed(2)}:1)`);
 	}
 	console.log("\nFor DARK theme (vs black):");
 	for (const [name, hex] of Object.entries(baseColors)) {
 		const adjusted = adjustColorToContrast(hex, targetContrast, false);
 		const rgb = hexToRgb(adjusted);
 		const contrast = getContrast(rgb, 0.0);
 		console.log(`  ${name.padEnd(8)} ${fgAnsi(adjusted)}Sample${reset}  ${adjusted}  (${contrast.toFixed(2)}:1)`);
 	}
 }
 function cmdTest(filePath: string): void {
 	if (!fs.existsSync(filePath)) {
 		console.error(`File not found: ${filePath}`);
 		process.exit(1);
 	}
 	const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
 	const vars = data.vars || data;
 	console.log(`\n=== Testing ${filePath} ===\n`);
 	for (const [name, hex] of Object.entries(vars as Record<string, string>)) {
 		if (!hex.startsWith("#")) continue;
 		const rgb = hexToRgb(hex);
 		const vsWhite = getContrast(rgb, 1.0);
 		const vsBlack = getContrast(rgb, 0.0);
 		const passW = vsWhite >= 4.5 ? "AA" : vsWhite >= 3.0 ? "AA-lg" : "FAIL";
 		const passB = vsBlack >= 4.5 ? "AA" : vsBlack >= 3.0 ? "AA-lg" : "FAIL";
 		console.log(
 			`${name.padEnd(14)} ${fgAnsi(hex)}Sample text${reset}  ${hex}  white: ${vsWhite.toFixed(2)}:1 ${passW.padEnd(5)}  black: ${vsBlack.toFixed(2)}:1 ${passB}`,
 		);
 	}
 }
 function cmdTheme(themeName: string): void {
 	process.env.COLORTERM = "truecolor";
 	initTheme(themeName);
 	const parseAnsiRgb = (ansi: string): [number, number, number] | null => {
 		const match = ansi.match(/38;2;(\d+);(\d+);(\d+)/);
 		return match ? [parseInt(match[1], 10), parseInt(match[2], 10), parseInt(match[3], 10)] : null;
 	};
 	const getContrastVsWhite = (colorName: string): string => {
 		const ansi = theme.getFgAnsi(colorName as Parameters<typeof theme.getFgAnsi>[0]);
 		const rgb = parseAnsiRgb(ansi);
 		if (!rgb) return "(default)";
 		const ratio = getContrast(rgb, 1.0);
 		const pass = ratio >= 4.5 ? "AA" : ratio >= 3.0 ? "AA-lg" : "FAIL";
 		return `${ratio.toFixed(2)}:1 ${pass}`;
 	};
 	const getContrastVsBlack = (colorName: string): string => {
 		const ansi = theme.getFgAnsi(colorName as Parameters<typeof theme.getFgAnsi>[0]);
 		const rgb = parseAnsiRgb(ansi);
 		if (!rgb) return "(default)";
 		const ratio = getContrast(rgb, 0.0);
 		const pass = ratio >= 4.5 ? "AA" : ratio >= 3.0 ? "AA-lg" : "FAIL";
 		return `${ratio.toFixed(2)}:1 ${pass}`;
 	};
 	const logColor = (name: string): void => {
 		const sample = theme.fg(name as Parameters<typeof theme.fg>[0], "Sample text");
 		const cw = getContrastVsWhite(name);
 		const cb = getContrastVsBlack(name);
 		console.log(`${name.padEnd(20)} ${sample}  white: ${cw.padEnd(12)} black: ${cb}`);
 	};
 	console.log(`\n=== ${themeName} theme (WCAG AA = 4.5:1) ===`);
 	console.log("\n--- Core UI ---");
 	["accent", "border", "borderAccent", "borderMuted", "success", "error", "warning", "muted", "dim"].forEach(logColor);
 	console.log("\n--- Markdown ---");
 	["mdHeading", "mdLink", "mdCode", "mdCodeBlock", "mdCodeBlockBorder", "mdQuote", "mdListBullet"].forEach(logColor);
 	console.log("\n--- Diff ---");
 	["toolDiffAdded", "toolDiffRemoved", "toolDiffContext"].forEach(logColor);
 	console.log("\n--- Thinking ---");
 	["thinkingOff", "thinkingMinimal", "thinkingLow", "thinkingMedium", "thinkingHigh"].forEach(logColor);
 	console.log("\n--- Backgrounds ---");
 	console.log("userMessageBg:", theme.bg("userMessageBg", " Sample "));
 	console.log("toolPendingBg:", theme.bg("toolPendingBg", " Sample "));
 	console.log("toolSuccessBg:", theme.bg("toolSuccessBg", " Sample "));
 	console.log("toolErrorBg:", theme.bg("toolErrorBg", " Sample "));
 	console.log();
 }
 // --- Main ---
 const [cmd, arg] = process.argv.slice(2);
 if (cmd === "contrast") {
 	cmdContrast(parseFloat(arg) || 4.5);
 } else if (cmd === "test") {
 	cmdTest(arg);
 } else if (cmd === "light" || cmd === "dark") {
 	cmdTheme(cmd);
 } else {
 	console.log("Usage:");
 	console.log("  npx tsx test-theme-colors.ts light|dark     Test built-in theme");
 	console.log("  npx tsx test-theme-colors.ts contrast 4.5   Compute colors at ratio");
 	console.log("  npx tsx test-theme-colors.ts test file.json Test any JSON file");
 }
--- a/packages/mom/CHANGELOG.md
+++ b/packages/mom/CHANGELOG.md
@ -2,6 +2,20 @@
 ## [Unreleased]
 ## [0.45.5] - 2026-01-13
 ## [0.45.4] - 2026-01-13
 ## [0.45.3] - 2026-01-13
 ## [0.45.2] - 2026-01-13
 ## [0.45.1] - 2026-01-13
 ## [0.45.0] - 2026-01-13
 ## [0.44.0] - 2026-01-12
 ## [0.43.0] - 2026-01-11
 ## [0.42.5] - 2026-01-11
--- a/packages/mom/package.json
+++ b/packages/mom/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-mom",
-	"version": "0.43.0",
+	"version": "0.45.5",
 	"description": "Slack bot that delegates messages to the pi coding agent",
 	"type": "module",
 	"bin": {
@ -20,9 +20,9 @@
 	},
 	"dependencies": {
 		"@anthropic-ai/sandbox-runtime": "^0.0.16",
-		"@mariozechner/pi-agent-core": "^0.43.0",
+		"@mariozechner/pi-agent-core": "^0.45.5",
-		"@mariozechner/pi-ai": "^0.43.0",
+		"@mariozechner/pi-ai": "^0.45.5",
-		"@mariozechner/pi-coding-agent": "^0.43.0",
+		"@mariozechner/pi-coding-agent": "^0.45.5",
 		"@sinclair/typebox": "^0.34.0",
 		"@slack/socket-mode": "^2.0.0",
 		"@slack/web-api": "^7.0.0",
--- a/packages/pods/package.json
+++ b/packages/pods/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi",
-	"version": "0.43.0",
+	"version": "0.45.5",
 	"description": "CLI tool for managing vLLM deployments on GPU pods",
 	"type": "module",
 	"bin": {
@ -33,7 +33,7 @@
 		"node": ">=20.0.0"
 	},
 	"dependencies": {
-		"@mariozechner/pi-agent-core": "^0.43.0",
+		"@mariozechner/pi-agent-core": "^0.45.5",
 		"chalk": "^5.5.0"
 	},
 	"devDependencies": {}
--- a/packages/tui/CHANGELOG.md
+++ b/packages/tui/CHANGELOG.md
@ -2,6 +2,20 @@
 ## [Unreleased]
 ## [0.45.5] - 2026-01-13
 ## [0.45.4] - 2026-01-13
 ## [0.45.3] - 2026-01-13
 ## [0.45.2] - 2026-01-13
 ## [0.45.1] - 2026-01-13
 ## [0.45.0] - 2026-01-13
 ## [0.44.0] - 2026-01-12
 ### Added
 - `SettingsListOptions` with `enableSearch` for fuzzy filtering in `SettingsList` ([#643](https://github.com/badlogic/pi-mono/pull/643) by [@ninlds](https://github.com/ninlds))
--- a/packages/tui/README.md
+++ b/packages/tui/README.md
@ -62,16 +62,17 @@ Overlays render components on top of existing content without replacing it. Usef
 ```typescript
 // Show overlay with default options (centered, max 80 cols)
-tui.showOverlay(component);
+const handle = tui.showOverlay(component);
 // Show overlay with custom positioning and sizing
-tui.showOverlay(component, {
+// Values can be numbers (absolute) or percentage strings (e.g., "50%")
 const handle = tui.showOverlay(component, {
  // Sizing
  width: 60,              // Fixed width in columns
-  widthPercent: 80,       // Width as percentage of terminal (0-100)
+  width: "80%",           // Width as percentage of terminal
  minWidth: 40,           // Minimum width floor
  maxHeight: 20,          // Maximum height in rows
-  maxHeightPercent: 50,   // Maximum height as percentage of terminal
+  maxHeight: "50%",       // Maximum height as percentage of terminal
  // Anchor-based positioning (default: 'center')
  anchor: 'bottom-right', // Position relative to anchor point
@ -79,8 +80,8 @@ tui.showOverlay(component, {
  offsetY: -1,            // Vertical offset from anchor
  // Percentage-based positioning (alternative to anchor)
-  rowPercent: 25,         // Vertical position (0=top, 100=bottom)
+  row: "25%",             // Vertical position (0%=top, 100%=bottom)
-  colPercent: 50,         // Horizontal position (0=left, 100=right)
+  col: "50%",             // Horizontal position (0%=left, 100%=right)
  // Absolute positioning (overrides anchor/percent)
  row: 5,                 // Exact row position
@ -88,23 +89,32 @@ tui.showOverlay(component, {
  // Margin from terminal edges
  margin: 2,              // All sides
-  margin: { top: 1, right: 2, bottom: 1, left: 2 }
+  margin: { top: 1, right: 2, bottom: 1, left: 2 },
  // Responsive visibility
  visible: (termWidth, termHeight) => termWidth >= 100  // Hide on narrow terminals
 });
 // OverlayHandle methods
 handle.hide();              // Permanently remove the overlay
 handle.setHidden(true);     // Temporarily hide (can show again)
 handle.setHidden(false);    // Show again after hiding
 handle.isHidden();          // Check if temporarily hidden
 // Hide topmost overlay
 tui.hideOverlay();
-// Check if any overlay is active
+// Check if any visible overlay is active
 tui.hasOverlay();
 ```
 **Anchor values**: `'center'`, `'top-left'`, `'top-right'`, `'bottom-left'`, `'bottom-right'`, `'top-center'`, `'bottom-center'`, `'left-center'`, `'right-center'`
 **Resolution order**:
-1. `width` takes precedence over `widthPercent`
+1. `minWidth` is applied as a floor after width calculation
-2. `minWidth` is applied as a floor after width calculation
+2. For position: absolute `row`/`col` > percentage `row`/`col` > `anchor`
-3. For position: `row`/`col` > `rowPercent`/`colPercent` > `anchor`
+3. `margin` clamps final position to stay within terminal bounds
-4. `margin` clamps final position to stay within terminal bounds
+4. `visible` callback controls whether overlay renders (called each frame)
 ### Component Interface
--- a/packages/tui/package.json
+++ b/packages/tui/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-tui",
-	"version": "0.43.0",
+	"version": "0.45.5",
 	"description": "Terminal User Interface library with differential rendering for efficient text-based applications",
 	"type": "module",
 	"main": "dist/index.js",
--- a/packages/web-ui/CHANGELOG.md
+++ b/packages/web-ui/CHANGELOG.md
@ -2,6 +2,20 @@
 ## [Unreleased]
 ## [0.45.5] - 2026-01-13
 ## [0.45.4] - 2026-01-13
 ## [0.45.3] - 2026-01-13
 ## [0.45.2] - 2026-01-13
 ## [0.45.1] - 2026-01-13
 ## [0.45.0] - 2026-01-13
 ## [0.44.0] - 2026-01-12
 ## [0.43.0] - 2026-01-11
 ## [0.42.5] - 2026-01-11
--- a/packages/web-ui/example/package.json
+++ b/packages/web-ui/example/package.json
@ -1,6 +1,6 @@
 {
  "name": "pi-web-ui-example",
-  "version": "1.31.0",
+  "version": "1.33.5",
  "private": true,
  "type": "module",
  "scripts": {
--- a/packages/web-ui/package.json
+++ b/packages/web-ui/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-web-ui",
-	"version": "0.43.0",
+	"version": "0.45.5",
 	"description": "Reusable web UI components for AI chat interfaces powered by @mariozechner/pi-ai",
 	"type": "module",
 	"main": "dist/index.js",
@ -18,8 +18,8 @@
 	},
 	"dependencies": {
 		"@lmstudio/sdk": "^1.5.0",
-		"@mariozechner/pi-ai": "^0.43.0",
+		"@mariozechner/pi-ai": "^0.45.5",
-		"@mariozechner/pi-tui": "^0.43.0",
+		"@mariozechner/pi-tui": "^0.45.5",
 		"docx-preview": "^0.3.7",
 		"jszip": "^3.10.1",
 		"lucide": "^0.544.0",
--- a/packages/web-ui/src/components/ProviderKeyInput.ts
+++ b/packages/web-ui/src/components/ProviderKeyInput.ts
@ -15,6 +15,7 @@ const TEST_MODELS: Record<string, string> = {
 	google: "gemini-2.5-flash",
 	groq: "openai/gpt-oss-20b",
 	openrouter: "z-ai/glm-4.6",
 	"vercel-ai-gateway": "anthropic/claude-opus-4.5",
 	cerebras: "gpt-oss-120b",
 	xai: "grok-4-fast-non-reasoning",
 	zai: "glm-4.5-air",