Merge main into bash-mode

2026-04-16 04:01:56 +00:00 · 2025-12-08 21:39:01 +01:00 · 2025-12-08 21:39:01 +01:00 · 1608da8770
commit 1608da8770
parent 69ff9c364c 00370cab39
91 changed files with 6083 additions and 1554 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,6 +13,7 @@ packages/*/dist-firefox/

 # Editor files
 .vscode/
+.zed/
 .idea/
 *.swp
 *.swo
@ -23,4 +24,4 @@ packages/*/dist-firefox/
 coverage/
 .nyc_output/
 .pi_config/
-tui-debug.log
+tui-debug.log
--- a/AGENTS.md
+++ b/AGENTS.md
@ -49,3 +49,30 @@ When closing issues via commit:
 - NEVER modify already-released version sections (e.g., `## [0.12.2]`)
 - Each version section is immutable once released
 - When releasing: rename `[Unreleased]` to the new version, then add a fresh empty `[Unreleased]` section
+
+## Releasing
+
+1. **Bump version** (all packages use lockstep versioning):
+   ```bash
+   npm run version:patch    # For bug fixes
+   npm run version:minor    # For new features
+   npm run version:major    # For breaking changes
+   ```
+
+2. **Finalize CHANGELOG.md**: Change `[Unreleased]` to the new version with today's date (e.g., `## [0.12.12] - 2025-12-05`)
+
+3. **Commit and tag**:
+   ```bash
+   git add .
+   git commit -m "Release v0.12.12"
+   git tag v0.12.12
+   git push origin main
+   git push origin v0.12.12
+   ```
+
+4. **Publish to npm**:
+   ```bash
+   npm run publish
+   ```
+
+5. **Add new [Unreleased] section** at top of CHANGELOG.md for next cycle, commit it
--- a/package-lock.json
+++ b/package-lock.json
--- a/packages/agent/package.json
+++ b/packages/agent/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-agent-core",
-	"version": "0.12.9",
+	"version": "0.13.2",
 	"description": "General-purpose agent with transport abstraction, state management, and attachment support",
 	"type": "module",
 	"main": "./dist/index.js",
@ -18,8 +18,8 @@
 		"prepublishOnly": "npm run clean && npm run build"
 	},
 	"dependencies": {
-		"@mariozechner/pi-ai": "^0.12.9",
-		"@mariozechner/pi-tui": "^0.12.9"
+		"@mariozechner/pi-ai": "^0.13.2",
+		"@mariozechner/pi-tui": "^0.13.2"
 	},
 	"keywords": [
 		"ai",
--- a/packages/agent/src/agent.ts
+++ b/packages/agent/src/agent.ts
@ -335,6 +335,7 @@ export class Agent {
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+					totalTokens: 0,
 					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 				},
 				stopReason: this.abortController?.signal.aborted ? "aborted" : "error",
--- a/packages/agent/src/transports/AppTransport.ts
+++ b/packages/agent/src/transports/AppTransport.ts
@ -44,6 +44,7 @@ function streamSimpleProxy(
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			timestamp: Date.now(),
--- a/packages/agent/src/transports/types.ts
+++ b/packages/agent/src/transports/types.ts
@ -7,7 +7,7 @@ export interface AgentRunConfig {
 	systemPrompt: string;
 	tools: AgentTool<any>[];
 	model: Model<any>;
-	reasoning?: "low" | "medium" | "high";
+	reasoning?: "low" | "medium" | "high" | "xhigh";
 	getQueuedMessages?: <T>() => Promise<QueuedMessage<T>[]>;
 }

--- a/packages/agent/src/types.ts
+++ b/packages/agent/src/types.ts
@ -24,8 +24,9 @@ export interface Attachment {

 /**
 * Thinking/reasoning level for models that support it.
+ * Note: "xhigh" is only supported by OpenAI codex-max models.
 */
-export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high";
+export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";

 /**
 * User message with optional attachments.
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@ -2,6 +2,34 @@

 ## [Unreleased]

+### Breaking Changes
+
+- Removed provider-level tool argument validation. Validation now happens in `agentLoop` via `executeToolCalls`, allowing models to retry on validation errors. For manual tool execution, use `validateToolCall(tools, toolCall)` or `validateToolArguments(tool, toolCall)`.
+
+### Added
+
+- Added `validateToolCall(tools, toolCall)` helper that finds the tool by name and validates arguments.
+
+- **OpenAI compatibility overrides**: Added `compat` field to `Model` for `openai-completions` API, allowing explicit configuration of provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Falls back to URL-based detection if not set. Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/pi-mono/issues/133), thanks @fink-andreas for the initial idea and PR)
+
+- **xhigh reasoning level**: Added `xhigh` to `ReasoningEffort` type for OpenAI codex-max models. For non-OpenAI providers (Anthropic, Google), `xhigh` is automatically mapped to `high`. ([#143](https://github.com/badlogic/pi-mono/issues/143))
+
+### Changed
+
+- **Updated SDK versions**: OpenAI SDK 5.21.0 → 6.10.0, Anthropic SDK 0.61.0 → 0.71.2, Google GenAI SDK 1.30.0 → 1.31.0
+
+## [0.13.0] - 2025-12-06
+
+### Breaking Changes
+
+- **Added `totalTokens` field to `Usage` type**: All code that constructs `Usage` objects must now include the `totalTokens` field. This field represents the total tokens processed by the LLM (input + output + cache). For OpenAI and Google, this uses native API values (`total_tokens`, `totalTokenCount`). For Anthropic, it's computed as `input + output + cacheRead + cacheWrite`.
+
+## [0.12.10] - 2025-12-04
+
+### Added
+
+- Added `gpt-5.1-codex-max` model support
+
 ### Fixed

 - **OpenAI Token Counting**: Fixed `usage.input` to exclude cached tokens for OpenAI providers. Previously, `input` included cached tokens, causing double-counting when calculating total context size via `input + cacheRead`. Now `input` represents non-cached input tokens across all providers, making `input + output + cacheRead + cacheWrite` the correct formula for total context size.
--- a/packages/ai/README.md
+++ b/packages/ai/README.md
@ -194,8 +194,8 @@ const response = await complete(model, context);
 // Check for tool calls in the response
 for (const block of response.content) {
  if (block.type === 'toolCall') {
-    // Arguments are automatically validated against the TypeBox schema using AJV
-    // If validation fails, an error event is emitted
+    // Execute your tool with the arguments
+    // See "Validating Tool Arguments" section for validation
    const result = await executeWeatherApi(block.arguments);

    // Add tool result with text content
@ -253,7 +253,7 @@ for await (const event of s) {
  }

  if (event.type === 'toolcall_end') {
-    // Here toolCall.arguments is complete and validated
+    // Here toolCall.arguments is complete (but not yet validated)
    const toolCall = event.toolCall;
    console.log(`Tool completed: ${toolCall.name}`, toolCall.arguments);
  }
@ -267,9 +267,44 @@ for await (const event of s) {
 - Arrays may be incomplete
 - Nested objects may be partially populated
 - At minimum, `arguments` will be an empty object `{}`, never `undefined`
- Full validation only occurs at `toolcall_end` when arguments are complete
 - The Google provider does not support function call streaming. Instead, you will receive a single `toolcall_delta` event with the full arguments.

+### Validating Tool Arguments
+
+When using `agentLoop`, tool arguments are automatically validated against your TypeBox schemas before execution. If validation fails, the error is returned to the model as a tool result, allowing it to retry.
+
+When implementing your own tool execution loop with `stream()` or `complete()`, use `validateToolCall` to validate arguments before passing them to your tools:
+
+```typescript
+import { stream, validateToolCall, Tool } from '@mariozechner/pi-ai';
+
+const tools: Tool[] = [weatherTool, calculatorTool];
+const s = stream(model, { messages, tools });
+
+for await (const event of s) {
+  if (event.type === 'toolcall_end') {
+    const toolCall = event.toolCall;
+
+    try {
+      // Validate arguments against the tool's schema (throws on invalid args)
+      const validatedArgs = validateToolCall(tools, toolCall);
+      const result = await executeMyTool(toolCall.name, validatedArgs);
+      // ... add tool result to context
+    } catch (error) {
+      // Validation failed - return error as tool result so model can retry
+      context.messages.push({
+        role: 'toolResult',
+        toolCallId: toolCall.id,
+        toolName: toolCall.name,
+        content: [{ type: 'text', text: error.message }],
+        isError: true,
+        timestamp: Date.now()
+      });
+    }
+  }
+}
+```
+
 ### Complete Event Reference

 All streaming events emitted during assistant message generation:
@ -352,7 +387,7 @@ if (model.reasoning) {
 const response = await completeSimple(model, {
  messages: [{ role: 'user', content: 'Solve: 2x + 5 = 13' }]
 }, {
-  reasoning: 'medium'  // 'minimal' | 'low' | 'medium' | 'high'
+  reasoning: 'medium'  // 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' (xhigh maps to high on non-OpenAI providers)
 });

 // Access thinking and text blocks
@ -576,6 +611,23 @@ const ollamaModel: Model<'openai-completions'> = {
  maxTokens: 32000
 };

+// Example: LiteLLM proxy with explicit compat settings
+const litellmModel: Model<'openai-completions'> = {
+  id: 'gpt-4o',
+  name: 'GPT-4o (via LiteLLM)',
+  api: 'openai-completions',
+  provider: 'litellm',
+  baseUrl: 'http://localhost:4000/v1',
+  reasoning: false,
+  input: ['text', 'image'],
+  cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
+  contextWindow: 128000,
+  maxTokens: 16384,
+  compat: {
+    supportsStore: false,  // LiteLLM doesn't support the store field
+  }
+};
+
 // Example: Custom endpoint with headers (bypassing Cloudflare bot detection)
 const proxyModel: Model<'anthropic-messages'> = {
  id: 'claude-sonnet-4',
@ -600,6 +652,25 @@ const response = await stream(ollamaModel, context, {
 });
 ```

+### OpenAI Compatibility Settings
+
+The `openai-completions` API is implemented by many providers with minor differences. By default, the library auto-detects compatibility settings based on `baseUrl` for known providers (Cerebras, xAI, Mistral, Chutes, etc.). For custom proxies or unknown endpoints, you can override these settings via the `compat` field:
+
+```typescript
+interface OpenAICompat {
+  supportsStore?: boolean;           // Whether provider supports the `store` field (default: true)
+  supportsDeveloperRole?: boolean;   // Whether provider supports `developer` role vs `system` (default: true)
+  supportsReasoningEffort?: boolean; // Whether provider supports `reasoning_effort` (default: true)
+  maxTokensField?: 'max_completion_tokens' | 'max_tokens';  // Which field name to use (default: max_completion_tokens)
+}
+```
+
+If `compat` is not set, the library falls back to URL-based detection. If `compat` is partially set, unspecified fields use the detected defaults. This is useful for:
+
+- **LiteLLM proxies**: May not support `store` field
+- **Custom inference servers**: May use non-standard field names
+- **Self-hosted endpoints**: May have different feature support
+
 ### Type Safety

 Models are typed by their API, ensuring type-safe options:
--- a/packages/ai/package.json
+++ b/packages/ai/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-ai",
-	"version": "0.12.9",
+	"version": "0.13.2",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./dist/index.js",
@ -20,13 +20,13 @@
 		"prepublishOnly": "npm run clean && npm run build"
 	},
 	"dependencies": {
-		"@anthropic-ai/sdk": "^0.61.0",
-		"@google/genai": "^1.30.0",
+		"@anthropic-ai/sdk": "0.71.2",
+		"@google/genai": "1.31.0",
 		"@sinclair/typebox": "^0.34.41",
 		"ajv": "^8.17.1",
 		"ajv-formats": "^3.0.1",
 		"chalk": "^5.6.2",
-		"openai": "5.21.0",
+		"openai": "6.10.0",
 		"partial-json": "^0.1.7",
 		"zod-to-json-schema": "^3.24.6"
 	},
--- a/packages/ai/scripts/generate-models.ts
+++ b/packages/ai/scripts/generate-models.ts
@ -344,6 +344,26 @@ async function generateModels() {
 		});
 	}

+	if (!allModels.some(m => m.provider === "openai" && m.id === "gpt-5.1-codex-max")) {
+		allModels.push({
+			id: "gpt-5.1-codex-max",
+			name: "GPT-5.1 Codex Max",
+			api: "openai-responses",
+			baseUrl: "https://api.openai.com/v1",
+			provider: "openai",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		});
+	}
+
 	// Add missing Grok models
 	if (!allModels.some(m => m.provider === "xai" && m.id === "grok-code-fast-1")) {
 		allModels.push({
--- a/packages/ai/src/index.ts
+++ b/packages/ai/src/index.ts
@ -6,4 +6,6 @@ export * from "./providers/openai-completions.js";
 export * from "./providers/openai-responses.js";
 export * from "./stream.js";
 export * from "./types.js";
+export * from "./utils/overflow.js";
 export * from "./utils/typebox-helpers.js";
+export * from "./utils/validation.js";
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@ -1080,6 +1080,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 16384,
 		} satisfies Model<"openai-responses">,
+		"gpt-5.1-codex-max": {
+			id: "gpt-5.1-codex-max",
+			name: "GPT-5.1 Codex Max",
+			api: "openai-responses",
+			provider: "openai",
+			baseUrl: "https://api.openai.com/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-responses">,
 		o3: {
 			id: "o3",
 			name: "o3",
@ -1974,6 +1991,23 @@ export const MODELS = {
 		} satisfies Model<"anthropic-messages">,
 	},
 	openrouter: {
+		"openai/gpt-5.1-codex-max": {
+			id: "openai/gpt-5.1-codex-max",
+			name: "OpenAI: GPT-5.1-Codex-Max",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text", "image"],
+			cost: {
+				input: 1.25,
+				output: 10,
+				cacheRead: 0.125,
+				cacheWrite: 0,
+			},
+			contextWindow: 400000,
+			maxTokens: 128000,
+		} satisfies Model<"openai-completions">,
 		"amazon/nova-2-lite-v1:free": {
 			id: "amazon/nova-2-lite-v1:free",
 			name: "Amazon: Nova 2 Lite (free)",
@ -2008,6 +2042,57 @@ export const MODELS = {
 			contextWindow: 1000000,
 			maxTokens: 65535,
 		} satisfies Model<"openai-completions">,
+		"mistralai/ministral-14b-2512": {
+			id: "mistralai/ministral-14b-2512",
+			name: "Mistral: Ministral 3 14B 2512",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.19999999999999998,
+				output: 0.19999999999999998,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
+		"mistralai/ministral-8b-2512": {
+			id: "mistralai/ministral-8b-2512",
+			name: "Mistral: Ministral 3 8B 2512",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.15,
+				output: 0.15,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 262144,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
+		"mistralai/ministral-3b-2512": {
+			id: "mistralai/ministral-3b-2512",
+			name: "Mistral: Ministral 3 3B 2512",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text", "image"],
+			cost: {
+				input: 0.09999999999999999,
+				output: 0.09999999999999999,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-large-2512": {
 			id: "mistralai/mistral-large-2512",
 			name: "Mistral: Mistral Large 3 2512",
@ -2142,7 +2227,7 @@ export const MODELS = {
 				cacheWrite: 6.25,
 			},
 			contextWindow: 200000,
-			maxTokens: 64000,
+			maxTokens: 32000,
 		} satisfies Model<"openai-completions">,
 		"allenai/olmo-3-7b-instruct": {
 			id: "allenai/olmo-3-7b-instruct",
@ -3156,13 +3241,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.19999999999999998,
-				output: 0.7999999999999999,
+				input: 0,
+				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 163840,
-			maxTokens: 163840,
+			contextWindow: 131072,
+			maxTokens: 131072,
 		} satisfies Model<"openai-completions">,
 		"openai/gpt-4o-audio-preview": {
 			id: "openai/gpt-4o-audio-preview",
@ -3334,22 +3419,22 @@ export const MODELS = {
 			contextWindow: 400000,
 			maxTokens: 128000,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-oss-120b:exacto": {
-			id: "openai/gpt-oss-120b:exacto",
-			name: "OpenAI: gpt-oss-120b (exacto)",
+		"openai/gpt-oss-120b:free": {
+			id: "openai/gpt-oss-120b:free",
+			name: "OpenAI: gpt-oss-120b (free)",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.04,
-				output: 0.19999999999999998,
+				input: 0,
+				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 32768,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"openai/gpt-oss-120b": {
 			id: "openai/gpt-oss-120b",
@ -3360,13 +3445,30 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.04,
-				output: 0.19999999999999998,
+				input: 0.039,
+				output: 0.19,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 131072,
-			maxTokens: 32768,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
+		"openai/gpt-oss-120b:exacto": {
+			id: "openai/gpt-oss-120b:exacto",
+			name: "OpenAI: gpt-oss-120b (exacto)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: true,
+			input: ["text"],
+			cost: {
+				input: 0.039,
+				output: 0.19,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"openai/gpt-oss-20b:free": {
 			id: "openai/gpt-oss-20b:free",
@ -3861,23 +3963,6 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"mistralai/magistral-small-2506": {
-			id: "mistralai/magistral-small-2506",
-			name: "Mistral: Magistral Small 2506",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 0.5,
-				output: 1.5,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 40000,
-			maxTokens: 40000,
-		} satisfies Model<"openai-completions">,
 		"mistralai/magistral-medium-2506:thinking": {
 			id: "mistralai/magistral-medium-2506:thinking",
 			name: "Mistral: Magistral Medium 2506 (thinking)",
@ -3895,23 +3980,6 @@ export const MODELS = {
 			contextWindow: 40960,
 			maxTokens: 40000,
 		} satisfies Model<"openai-completions">,
-		"mistralai/magistral-medium-2506": {
-			id: "mistralai/magistral-medium-2506",
-			name: "Mistral: Magistral Medium 2506",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: true,
-			input: ["text"],
-			cost: {
-				input: 2,
-				output: 5,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 40960,
-			maxTokens: 40000,
-		} satisfies Model<"openai-completions">,
 		"google/gemini-2.5-pro-preview": {
 			id: "google/gemini-2.5-pro-preview",
 			name: "Google: Gemini 2.5 Pro Preview 06-05",
@ -3980,23 +4048,6 @@ export const MODELS = {
 			contextWindow: 1000000,
 			maxTokens: 64000,
 		} satisfies Model<"openai-completions">,
-		"mistralai/devstral-small-2505": {
-			id: "mistralai/devstral-small-2505",
-			name: "Mistral: Devstral Small 2505",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.06,
-				output: 0.12,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"openai/codex-mini": {
 			id: "openai/codex-mini",
 			name: "OpenAI: Codex Mini",
@ -4397,13 +4448,13 @@ export const MODELS = {
 			reasoning: true,
 			input: ["text"],
 			cost: {
-				input: 0.19999999999999998,
-				output: 0.88,
-				cacheRead: 0.106,
+				input: 0.15,
+				output: 0.7,
+				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 163840,
-			maxTokens: 4096,
+			contextWindow: 8192,
+			maxTokens: 7168,
 		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-small-3.1-24b-instruct:free": {
 			id: "mistralai/mistral-small-3.1-24b-instruct:free",
@ -4669,13 +4720,13 @@ export const MODELS = {
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.049999999999999996,
-				output: 0.08,
+				input: 0.03,
+				output: 0.11,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
 			contextWindow: 32768,
-			maxTokens: 16384,
+			maxTokens: 32768,
 		} satisfies Model<"openai-completions">,
 		"deepseek/deepseek-r1-distill-llama-70b": {
 			id: "deepseek/deepseek-r1-distill-llama-70b",
@ -4711,23 +4762,6 @@ export const MODELS = {
 			contextWindow: 163840,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"mistralai/codestral-2501": {
-			id: "mistralai/codestral-2501",
-			name: "Mistral: Codestral 2501",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.3,
-				output: 0.8999999999999999,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 256000,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"deepseek/deepseek-chat": {
 			id: "deepseek/deepseek-chat",
 			name: "DeepSeek: DeepSeek V3",
@ -5119,23 +5153,6 @@ export const MODELS = {
 			contextWindow: 32768,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"cohere/command-r-08-2024": {
-			id: "cohere/command-r-08-2024",
-			name: "Cohere: Command R (08-2024)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.15,
-				output: 0.6,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4000,
-		} satisfies Model<"openai-completions">,
 		"cohere/command-r-plus-08-2024": {
 			id: "cohere/command-r-plus-08-2024",
 			name: "Cohere: Command R+ (08-2024)",
@ -5153,6 +5170,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4000,
 		} satisfies Model<"openai-completions">,
+		"cohere/command-r-08-2024": {
+			id: "cohere/command-r-08-2024",
+			name: "Cohere: Command R (08-2024)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.15,
+				output: 0.6,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 4000,
+		} satisfies Model<"openai-completions">,
 		"sao10k/l3.1-euryale-70b": {
 			id: "sao10k/l3.1-euryale-70b",
 			name: "Sao10K: Llama 3.1 Euryale 70B v2.2",
@ -5221,23 +5255,6 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3.1-405b-instruct": {
-			id: "meta-llama/llama-3.1-405b-instruct",
-			name: "Meta: Llama 3.1 405B Instruct",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 3.5,
-				output: 3.5,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 130815,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3.1-70b-instruct": {
 			id: "meta-llama/llama-3.1-70b-instruct",
 			name: "Meta: Llama 3.1 70B Instruct",
@ -5255,6 +5272,23 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
+		"meta-llama/llama-3.1-405b-instruct": {
+			id: "meta-llama/llama-3.1-405b-instruct",
+			name: "Meta: Llama 3.1 405B Instruct",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 3.5,
+				output: 3.5,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 130815,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-nemo": {
 			id: "mistralai/mistral-nemo",
 			name: "Mistral: Mistral Nemo",
@ -5272,9 +5306,9 @@ export const MODELS = {
 			contextWindow: 131072,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-4o-mini-2024-07-18": {
-			id: "openai/gpt-4o-mini-2024-07-18",
-			name: "OpenAI: GPT-4o-mini (2024-07-18)",
+		"openai/gpt-4o-mini": {
+			id: "openai/gpt-4o-mini",
+			name: "OpenAI: GPT-4o-mini",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
@ -5289,9 +5323,9 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-4o-mini": {
-			id: "openai/gpt-4o-mini",
-			name: "OpenAI: GPT-4o-mini",
+		"openai/gpt-4o-mini-2024-07-18": {
+			id: "openai/gpt-4o-mini-2024-07-18",
+			name: "OpenAI: GPT-4o-mini (2024-07-18)",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
@ -5391,23 +5425,6 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-4o-2024-05-13": {
-			id: "openai/gpt-4o-2024-05-13",
-			name: "OpenAI: GPT-4o (2024-05-13)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text", "image"],
-			cost: {
-				input: 5,
-				output: 15,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"openai/gpt-4o": {
 			id: "openai/gpt-4o",
 			name: "OpenAI: GPT-4o",
@ -5442,22 +5459,22 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 64000,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3-70b-instruct": {
-			id: "meta-llama/llama-3-70b-instruct",
-			name: "Meta: Llama 3 70B Instruct",
+		"openai/gpt-4o-2024-05-13": {
+			id: "openai/gpt-4o-2024-05-13",
+			name: "OpenAI: GPT-4o (2024-05-13)",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
 			reasoning: false,
-			input: ["text"],
+			input: ["text", "image"],
 			cost: {
-				input: 0.3,
-				output: 0.39999999999999997,
+				input: 5,
+				output: 15,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 8192,
-			maxTokens: 16384,
+			contextWindow: 128000,
+			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3-8b-instruct": {
 			id: "meta-llama/llama-3-8b-instruct",
@ -5476,6 +5493,23 @@ export const MODELS = {
 			contextWindow: 8192,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
+		"meta-llama/llama-3-70b-instruct": {
+			id: "meta-llama/llama-3-70b-instruct",
+			name: "Meta: Llama 3 70B Instruct",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 0.39999999999999997,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 8192,
+			maxTokens: 16384,
+		} satisfies Model<"openai-completions">,
 		"mistralai/mixtral-8x22b-instruct": {
 			id: "mistralai/mixtral-8x22b-instruct",
 			name: "Mistral: Mixtral 8x22B Instruct",
@ -5561,23 +5595,6 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"openai/gpt-3.5-turbo-0613": {
-			id: "openai/gpt-3.5-turbo-0613",
-			name: "OpenAI: GPT-3.5 Turbo (older v0613)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 1,
-				output: 2,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 4095,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"openai/gpt-4-turbo-preview": {
 			id: "openai/gpt-4-turbo-preview",
 			name: "OpenAI: GPT-4 Turbo Preview",
@ -5595,21 +5612,21 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"mistralai/mistral-small": {
-			id: "mistralai/mistral-small",
-			name: "Mistral Small",
+		"openai/gpt-3.5-turbo-0613": {
+			id: "openai/gpt-3.5-turbo-0613",
+			name: "OpenAI: GPT-3.5 Turbo (older v0613)",
 			api: "openai-completions",
 			provider: "openrouter",
 			baseUrl: "https://openrouter.ai/api/v1",
 			reasoning: false,
 			input: ["text"],
 			cost: {
-				input: 0.19999999999999998,
-				output: 0.6,
+				input: 1,
+				output: 2,
 				cacheRead: 0,
 				cacheWrite: 0,
 			},
-			contextWindow: 32768,
+			contextWindow: 4095,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-tiny": {
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@ -25,7 +25,7 @@ import type {
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
-import { validateToolArguments } from "../utils/validation.js";
+
 import { transformMessages } from "./transorm-messages.js";

 /**
@ -105,6 +105,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "stop",
@ -129,6 +130,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 					output.usage.output = event.message.usage.output_tokens || 0;
 					output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
 					output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
+					// Anthropic doesn't provide total_tokens, compute from components
+					output.usage.totalTokens =
+						output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
 					calculateCost(model, output.usage);
 				} else if (event.type === "content_block_start") {
 					if (event.content_block.type === "text") {
@ -227,15 +231,6 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 							});
 						} else if (block.type === "toolCall") {
 							block.arguments = parseStreamingJson(block.partialJson);
-
-							// Validate tool arguments if tool definition is available
-							if (context.tools) {
-								const tool = context.tools.find((t) => t.name === block.name);
-								if (tool) {
-									block.arguments = validateToolArguments(tool, block);
-								}
-							}
-
 							delete (block as any).partialJson;
 							stream.push({
 								type: "toolcall_end",
@ -253,6 +248,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 					output.usage.output = event.usage.output_tokens || 0;
 					output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
 					output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
+					// Anthropic doesn't provide total_tokens, compute from components
+					output.usage.totalTokens =
+						output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
 					calculateCost(model, output.usage);
 				}
 			}
--- a/packages/ai/src/providers/google.ts
+++ b/packages/ai/src/providers/google.ts
@ -23,7 +23,7 @@ import type {
 } from "../types.js";
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
-import { validateToolArguments } from "../utils/validation.js";
+
 import { transformMessages } from "./transorm-messages.js";

 export interface GoogleOptions extends StreamOptions {
@ -56,6 +56,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "stop",
@ -165,14 +166,6 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
 								...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }),
 							};

-							// Validate tool arguments if tool definition is available
-							if (context.tools) {
-								const tool = context.tools.find((t) => t.name === toolCall.name);
-								if (tool) {
-									toolCall.arguments = validateToolArguments(tool, toolCall);
-								}
-							}
-
 							output.content.push(toolCall);
 							stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
 							stream.push({
@ -200,6 +193,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
 							(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
 						cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
 						cacheWrite: 0,
+						totalTokens: chunk.usageMetadata.totalTokenCount || 0,
 						cost: {
 							input: 0,
 							output: 0,
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@ -12,6 +12,7 @@ import type {
 	AssistantMessage,
 	Context,
 	Model,
+	OpenAICompat,
 	StopReason,
 	StreamFunction,
 	StreamOptions,
@ -23,12 +24,12 @@ import type {
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
-import { validateToolArguments } from "../utils/validation.js";
+
 import { transformMessages } from "./transorm-messages.js";

 export interface OpenAICompletionsOptions extends StreamOptions {
 	toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
-	reasoningEffort?: "minimal" | "low" | "medium" | "high";
+	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
 }

 export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
@ -50,6 +51,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "stop",
@ -83,15 +85,6 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 						});
 					} else if (block.type === "toolCall") {
 						block.arguments = JSON.parse(block.partialArgs || "{}");
-
-						// Validate tool arguments if tool definition is available
-						if (context.tools) {
-							const tool = context.tools.find((t) => t.name === block.name);
-							if (tool) {
-								block.arguments = validateToolArguments(tool, block);
-							}
-						}
-
 						delete block.partialArgs;
 						stream.push({
 							type: "toolcall_end",
@ -106,14 +99,18 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 			for await (const chunk of openaiStream) {
 				if (chunk.usage) {
 					const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
+					const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
+					const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
+					const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
 					output.usage = {
 						// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
-						input: (chunk.usage.prompt_tokens || 0) - cachedTokens,
-						output:
-							(chunk.usage.completion_tokens || 0) +
-							(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
+						input,
+						output: outputTokens,
 						cacheRead: cachedTokens,
 						cacheWrite: 0,
+						// Compute totalTokens ourselves since we add reasoning_tokens to output
+						// and some providers (e.g., Groq) don't include them in total_tokens
+						totalTokens: input + outputTokens + cachedTokens,
 						cost: {
 							input: 0,
 							output: 0,
@ -271,7 +268,8 @@ function createClient(model: Model<"openai-completions">, apiKey?: string) {
 }

 function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
-	const messages = convertMessages(model, context);
+	const compat = getCompat(model);
+	const messages = convertMessages(model, context, compat);

 	const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 		model: model.id,
@ -280,27 +278,20 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
 		stream_options: { include_usage: true },
 	};

-	// Cerebras/xAI/Mistral dont like the "store" field
-	if (
-		!model.baseUrl.includes("cerebras.ai") &&
-		!model.baseUrl.includes("api.x.ai") &&
-		!model.baseUrl.includes("mistral.ai") &&
-		!model.baseUrl.includes("chutes.ai")
-	) {
+	if (compat.supportsStore) {
 		params.store = false;
 	}

 	if (options?.maxTokens) {
-		// Mistral/Chutes uses max_tokens instead of max_completion_tokens
-		if (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) {
-			(params as any).max_tokens = options?.maxTokens;
+		if (compat.maxTokensField === "max_tokens") {
+			(params as any).max_tokens = options.maxTokens;
 		} else {
-			params.max_completion_tokens = options?.maxTokens;
+			params.max_completion_tokens = options.maxTokens;
 		}
 	}

 	if (options?.temperature !== undefined) {
-		params.temperature = options?.temperature;
+		params.temperature = options.temperature;
 	}

 	if (context.tools) {
@ -311,27 +302,24 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
 		params.tool_choice = options.toolChoice;
 	}

-	// Grok models don't like reasoning_effort
-	if (options?.reasoningEffort && model.reasoning && !model.id.toLowerCase().includes("grok")) {
+	if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
 		params.reasoning_effort = options.reasoningEffort;
 	}

 	return params;
 }

-function convertMessages(model: Model<"openai-completions">, context: Context): ChatCompletionMessageParam[] {
+function convertMessages(
+	model: Model<"openai-completions">,
+	context: Context,
+	compat: Required<OpenAICompat>,
+): ChatCompletionMessageParam[] {
 	const params: ChatCompletionMessageParam[] = [];

 	const transformedMessages = transformMessages(context.messages, model);

 	if (context.systemPrompt) {
-		// Cerebras/xAi/Mistral/Chutes don't like the "developer" role
-		const useDeveloperRole =
-			model.reasoning &&
-			!model.baseUrl.includes("cerebras.ai") &&
-			!model.baseUrl.includes("api.x.ai") &&
-			!model.baseUrl.includes("mistral.ai") &&
-			!model.baseUrl.includes("chutes.ai");
+		const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
 		const role = useDeveloperRole ? "developer" : "system";
 		params.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });
 	}
@ -486,3 +474,42 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto
 		}
 	}
 }
+
+/**
+ * Detect compatibility settings from baseUrl for known providers.
+ * Returns a fully resolved OpenAICompat object with all fields set.
+ */
+function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
+	const isNonStandard =
+		baseUrl.includes("cerebras.ai") ||
+		baseUrl.includes("api.x.ai") ||
+		baseUrl.includes("mistral.ai") ||
+		baseUrl.includes("chutes.ai");
+
+	const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
+
+	const isGrok = baseUrl.includes("api.x.ai");
+
+	return {
+		supportsStore: !isNonStandard,
+		supportsDeveloperRole: !isNonStandard,
+		supportsReasoningEffort: !isGrok,
+		maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
+	};
+}
+
+/**
+ * Get resolved compatibility settings for a model.
+ * Uses explicit model.compat if provided, otherwise auto-detects from URL.
+ */
+function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
+	const detected = detectCompatFromUrl(model.baseUrl);
+	if (!model.compat) return detected;
+
+	return {
+		supportsStore: model.compat.supportsStore ?? detected.supportsStore,
+		supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
+		supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
+		maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
+	};
+}
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@ -27,12 +27,12 @@ import type {
 import { AssistantMessageEventStream } from "../utils/event-stream.js";
 import { parseStreamingJson } from "../utils/json-parse.js";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
-import { validateToolArguments } from "../utils/validation.js";
+
 import { transformMessages } from "./transorm-messages.js";

 // OpenAI Responses-specific options
 export interface OpenAIResponsesOptions extends StreamOptions {
-	reasoningEffort?: "minimal" | "low" | "medium" | "high";
+	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
 	reasoningSummary?: "auto" | "detailed" | "concise" | null;
 }

@ -59,6 +59,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "stop",
@ -157,7 +158,10 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 				else if (event.type === "response.content_part.added") {
 					if (currentItem && currentItem.type === "message") {
 						currentItem.content = currentItem.content || [];
-						currentItem.content.push(event.part);
+						// Filter out ReasoningText, only accept output_text and refusal
+						if (event.part.type === "output_text" || event.part.type === "refusal") {
+							currentItem.content.push(event.part);
+						}
 					}
 				} else if (event.type === "response.output_text.delta") {
 					if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
@ -238,14 +242,6 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 							arguments: JSON.parse(item.arguments),
 						};

-						// Validate tool arguments if tool definition is available
-						if (context.tools) {
-							const tool = context.tools.find((t) => t.name === toolCall.name);
-							if (tool) {
-								toolCall.arguments = validateToolArguments(tool, toolCall);
-							}
-						}
-
 						stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
 					}
 				}
@ -260,6 +256,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 							output: response.usage.output_tokens || 0,
 							cacheRead: cachedTokens,
 							cacheWrite: 0,
+							totalTokens: response.usage.total_tokens || 0,
 							cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 						};
 					}
--- a/packages/ai/src/stream.ts
+++ b/packages/ai/src/stream.ts
@ -122,6 +122,9 @@ function mapOptionsForApi<TApi extends Api>(
 		apiKey: apiKey || options?.apiKey,
 	};

+	// Helper to clamp xhigh to high for providers that don't support it
+	const clampReasoning = (effort: ReasoningEffort | undefined) => (effort === "xhigh" ? "high" : effort);
+
 	switch (model.api) {
 		case "anthropic-messages": {
 			if (!options?.reasoning) return base satisfies AnthropicOptions;
@ -136,7 +139,7 @@ function mapOptionsForApi<TApi extends Api>(
 			return {
 				...base,
 				thinkingEnabled: true,
-				thinkingBudgetTokens: anthropicBudgets[options.reasoning],
+				thinkingBudgetTokens: anthropicBudgets[clampReasoning(options.reasoning)!],
 			} satisfies AnthropicOptions;
 		}

@ -155,7 +158,10 @@ function mapOptionsForApi<TApi extends Api>(
 		case "google-generative-ai": {
 			if (!options?.reasoning) return base as any;

-			const googleBudget = getGoogleBudget(model as Model<"google-generative-ai">, options.reasoning);
+			const googleBudget = getGoogleBudget(
+				model as Model<"google-generative-ai">,
+				clampReasoning(options.reasoning)!,
+			);
 			return {
 				...base,
 				thinking: {
@ -173,10 +179,12 @@ function mapOptionsForApi<TApi extends Api>(
 	}
 }

-function getGoogleBudget(model: Model<"google-generative-ai">, effort: ReasoningEffort): number {
+type ClampedReasoningEffort = Exclude<ReasoningEffort, "xhigh">;
+
+function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedReasoningEffort): number {
 	// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
 	if (model.id.includes("2.5-pro")) {
-		const budgets = {
+		const budgets: Record<ClampedReasoningEffort, number> = {
 			minimal: 128,
 			low: 2048,
 			medium: 8192,
@ -187,7 +195,7 @@ function getGoogleBudget(model: Model<"google-generative-ai">, effort: Reasoning

 	if (model.id.includes("2.5-flash")) {
 		// Covers 2.5-flash-lite as well
-		const budgets = {
+		const budgets: Record<ClampedReasoningEffort, number> = {
 			minimal: 128,
 			low: 2048,
 			medium: 8192,
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@ -29,7 +29,7 @@ export type OptionsForApi<TApi extends Api> = ApiOptionsMap[TApi];
 export type KnownProvider = "anthropic" | "google" | "openai" | "xai" | "groq" | "cerebras" | "openrouter" | "zai";
 export type Provider = KnownProvider | string;

-export type ReasoningEffort = "minimal" | "low" | "medium" | "high";
+export type ReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";

 // Base options all providers share
 export interface StreamOptions {
@ -82,6 +82,7 @@ export interface Usage {
 	output: number;
 	cacheRead: number;
 	cacheWrite: number;
+	totalTokens: number;
 	cost: {
 		input: number;
 		output: number;
@ -151,6 +152,21 @@ export type AssistantMessageEvent =
 	| { type: "done"; reason: Extract<StopReason, "stop" | "length" | "toolUse">; message: AssistantMessage }
 	| { type: "error"; reason: Extract<StopReason, "aborted" | "error">; error: AssistantMessage };

+/**
+ * Compatibility settings for openai-completions API.
+ * Use this to override URL-based auto-detection for custom providers.
+ */
+export interface OpenAICompat {
+	/** Whether the provider supports the `store` field. Default: auto-detected from URL. */
+	supportsStore?: boolean;
+	/** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */
+	supportsDeveloperRole?: boolean;
+	/** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
+	supportsReasoningEffort?: boolean;
+	/** Which field to use for max tokens. Default: auto-detected from URL. */
+	maxTokensField?: "max_completion_tokens" | "max_tokens";
+}
+
 // Model interface for the unified model system
 export interface Model<TApi extends Api> {
 	id: string;
@ -169,4 +185,6 @@ export interface Model<TApi extends Api> {
 	contextWindow: number;
 	maxTokens: number;
 	headers?: Record<string, string>;
+	/** Compatibility overrides for openai-completions API. If not set, auto-detected from baseUrl. */
+	compat?: TApi extends "openai-completions" ? OpenAICompat : never;
 }
--- a/packages/ai/src/utils/overflow.ts
+++ b/packages/ai/src/utils/overflow.ts
@ -0,0 +1,110 @@
+import type { AssistantMessage } from "../types.js";
+
+/**
+ * Regex patterns to detect context overflow errors from different providers.
+ *
+ * These patterns match error messages returned when the input exceeds
+ * the model's context window.
+ *
+ * Provider-specific patterns (with example error messages):
+ *
+ * - Anthropic: "prompt is too long: 213462 tokens > 200000 maximum"
+ * - OpenAI: "Your input exceeds the context window of this model"
+ * - Google: "The input token count (1196265) exceeds the maximum number of tokens allowed (1048575)"
+ * - xAI: "This model's maximum prompt length is 131072 but the request contains 537812 tokens"
+ * - Groq: "Please reduce the length of the messages or completion"
+ * - OpenRouter: "This endpoint's maximum context length is X tokens. However, you requested about Y tokens"
+ * - llama.cpp: "the request exceeds the available context size, try increasing it"
+ * - LM Studio: "tokens to keep from the initial prompt is greater than the context length"
+ * - Cerebras: Returns "400 status code (no body)" - handled separately below
+ * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
+ * - Ollama: Silently truncates input - not detectable via error message
+ */
+const OVERFLOW_PATTERNS = [
+	/prompt is too long/i, // Anthropic
+	/exceeds the context window/i, // OpenAI (Completions & Responses API)
+	/input token count.*exceeds the maximum/i, // Google (Gemini)
+	/maximum prompt length is \d+/i, // xAI (Grok)
+	/reduce the length of the messages/i, // Groq
+	/maximum context length is \d+ tokens/i, // OpenRouter (all backends)
+	/exceeds the available context size/i, // llama.cpp server
+	/greater than the context length/i, // LM Studio
+	/context length exceeded/i, // Generic fallback
+	/too many tokens/i, // Generic fallback
+	/token limit exceeded/i, // Generic fallback
+];
+
+/**
+ * Check if an assistant message represents a context overflow error.
+ *
+ * This handles two cases:
+ * 1. Error-based overflow: Most providers return stopReason "error" with a
+ *    specific error message pattern.
+ * 2. Silent overflow: Some providers accept overflow requests and return
+ *    successfully. For these, we check if usage.input exceeds the context window.
+ *
+ * ## Reliability by Provider
+ *
+ * **Reliable detection (returns error with detectable message):**
+ * - Anthropic: "prompt is too long: X tokens > Y maximum"
+ * - OpenAI (Completions & Responses): "exceeds the context window"
+ * - Google Gemini: "input token count exceeds the maximum"
+ * - xAI (Grok): "maximum prompt length is X but request contains Y"
+ * - Groq: "reduce the length of the messages"
+ * - Cerebras: 400/413 status code (no body)
+ * - OpenRouter (all backends): "maximum context length is X tokens"
+ * - llama.cpp: "exceeds the available context size"
+ * - LM Studio: "greater than the context length"
+ *
+ * **Unreliable detection:**
+ * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
+ *   sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
+ * - Ollama: Silently truncates input without error. Cannot be detected via this function.
+ *   The response will have usage.input < expected, but we don't know the expected value.
+ *
+ * ## Custom Providers
+ *
+ * If you've added custom models via settings.json, this function may not detect
+ * overflow errors from those providers. To add support:
+ *
+ * 1. Send a request that exceeds the model's context window
+ * 2. Check the errorMessage in the response
+ * 3. Create a regex pattern that matches the error
+ * 4. The pattern should be added to OVERFLOW_PATTERNS in this file, or
+ *    check the errorMessage yourself before calling this function
+ *
+ * @param message - The assistant message to check
+ * @param contextWindow - Optional context window size for detecting silent overflow (z.ai)
+ * @returns true if the message indicates a context overflow
+ */
+export function isContextOverflow(message: AssistantMessage, contextWindow?: number): boolean {
+	// Case 1: Check error message patterns
+	if (message.stopReason === "error" && message.errorMessage) {
+		// Check known patterns
+		if (OVERFLOW_PATTERNS.some((p) => p.test(message.errorMessage!))) {
+			return true;
+		}
+
+		// Cerebras returns 400/413 with no body - check for status code pattern
+		if (/^4(00|13)\s*(status code)?\s*\(no body\)/i.test(message.errorMessage)) {
+			return true;
+		}
+	}
+
+	// Case 2: Silent overflow (z.ai style) - successful but usage exceeds context
+	if (contextWindow && message.stopReason === "stop") {
+		const inputTokens = message.usage.input + message.usage.cacheRead;
+		if (inputTokens > contextWindow) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/**
+ * Get the overflow patterns for testing purposes.
+ */
+export function getOverflowPatterns(): RegExp[] {
+	return [...OVERFLOW_PATTERNS];
+}
--- a/packages/ai/src/utils/validation.ts
+++ b/packages/ai/src/utils/validation.ts
@ -27,6 +27,21 @@ if (!isBrowserExtension) {
 	}
 }

+/**
+ * Finds a tool by name and validates the tool call arguments against its TypeBox schema
+ * @param tools Array of tool definitions
+ * @param toolCall The tool call from the LLM
+ * @returns The validated arguments
+ * @throws Error if tool is not found or validation fails
+ */
+export function validateToolCall(tools: Tool[], toolCall: ToolCall): any {
+	const tool = tools.find((t) => t.name === toolCall.name);
+	if (!tool) {
+		throw new Error(`Tool "${toolCall.name}" not found`);
+	}
+	return validateToolArguments(tool, toolCall);
+}
+
 /**
 * Validates tool call arguments against the tool's TypeBox schema
 * @param tool The tool definition with TypeBox schema
--- a/packages/ai/test/context-overflow.test.ts
+++ b/packages/ai/test/context-overflow.test.ts
@ -0,0 +1,465 @@
+/**
+ * Test context overflow error handling across providers.
+ *
+ * Context overflow occurs when the input (prompt + history) exceeds
+ * the model's context window. This is different from output token limits.
+ *
+ * Expected behavior: All providers should return stopReason: "error"
+ * with an errorMessage that indicates the context was too large,
+ * OR (for z.ai) return successfully with usage.input > contextWindow.
+ *
+ * The isContextOverflow() function must return true for all providers.
+ */
+
+import type { ChildProcess } from "child_process";
+import { execSync, spawn } from "child_process";
+import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete } from "../src/stream.js";
+import type { AssistantMessage, Context, Model, Usage } from "../src/types.js";
+import { isContextOverflow } from "../src/utils/overflow.js";
+
+// Lorem ipsum paragraph for realistic token estimation
+const LOREM_IPSUM = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. `;
+
+// Generate a string that will exceed the context window
+// Using chars/4 as token estimate (works better with varied text than repeated chars)
+function generateOverflowContent(contextWindow: number): string {
+	const targetTokens = contextWindow + 10000; // Exceed by 10k tokens
+	const targetChars = targetTokens * 4 * 1.5;
+	const repetitions = Math.ceil(targetChars / LOREM_IPSUM.length);
+	return LOREM_IPSUM.repeat(repetitions);
+}
+
+interface OverflowResult {
+	provider: string;
+	model: string;
+	contextWindow: number;
+	stopReason: string;
+	errorMessage: string | undefined;
+	usage: Usage;
+	hasUsageData: boolean;
+	response: AssistantMessage;
+}
+
+async function testContextOverflow(model: Model<any>, apiKey: string): Promise<OverflowResult> {
+	const overflowContent = generateOverflowContent(model.contextWindow);
+
+	const context: Context = {
+		systemPrompt: "You are a helpful assistant.",
+		messages: [
+			{
+				role: "user",
+				content: overflowContent,
+				timestamp: Date.now(),
+			},
+		],
+	};
+
+	const response = await complete(model, context, { apiKey });
+
+	const hasUsageData = response.usage.input > 0 || response.usage.cacheRead > 0;
+
+	return {
+		provider: model.provider,
+		model: model.id,
+		contextWindow: model.contextWindow,
+		stopReason: response.stopReason,
+		errorMessage: response.errorMessage,
+		usage: response.usage,
+		hasUsageData,
+		response,
+	};
+}
+
+function logResult(result: OverflowResult) {
+	console.log(`\n${result.provider} / ${result.model}:`);
+	console.log(`  contextWindow: ${result.contextWindow}`);
+	console.log(`  stopReason: ${result.stopReason}`);
+	console.log(`  errorMessage: ${result.errorMessage}`);
+	console.log(`  usage: ${JSON.stringify(result.usage)}`);
+	console.log(`  hasUsageData: ${result.hasUsageData}`);
+}
+
+// =============================================================================
+// Anthropic
+// Expected pattern: "prompt is too long: X tokens > Y maximum"
+// =============================================================================
+
+describe("Context overflow error handling", () => {
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
+		it("claude-3-5-haiku - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("anthropic", "claude-3-5-haiku-20241022");
+			const result = await testContextOverflow(model, process.env.ANTHROPIC_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/prompt is too long/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => {
+		it("claude-sonnet-4 - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("anthropic", "claude-sonnet-4-20250514");
+			const result = await testContextOverflow(model, process.env.ANTHROPIC_OAUTH_TOKEN!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/prompt is too long/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	// =============================================================================
+	// OpenAI
+	// Expected pattern: "exceeds the context window"
+	// =============================================================================
+
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
+		it("gpt-4o-mini - should detect overflow via isContextOverflow", async () => {
+			const model = { ...getModel("openai", "gpt-4o-mini") };
+			model.api = "openai-completions" as any;
+			const result = await testContextOverflow(model, process.env.OPENAI_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/exceeds the context window/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
+		it("gpt-4o - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("openai", "gpt-4o");
+			const result = await testContextOverflow(model, process.env.OPENAI_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/exceeds the context window/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	// =============================================================================
+	// Google
+	// Expected pattern: "input token count (X) exceeds the maximum"
+	// =============================================================================
+
+	describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
+		it("gemini-2.0-flash - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("google", "gemini-2.0-flash");
+			const result = await testContextOverflow(model, process.env.GEMINI_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/input token count.*exceeds the maximum/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	// =============================================================================
+	// xAI
+	// Expected pattern: "maximum prompt length is X but the request contains Y"
+	// =============================================================================
+
+	describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
+		it("grok-3-fast - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("xai", "grok-3-fast");
+			const result = await testContextOverflow(model, process.env.XAI_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/maximum prompt length is \d+/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	// =============================================================================
+	// Groq
+	// Expected pattern: "reduce the length of the messages"
+	// =============================================================================
+
+	describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
+		it("llama-3.3-70b-versatile - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("groq", "llama-3.3-70b-versatile");
+			const result = await testContextOverflow(model, process.env.GROQ_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/reduce the length of the messages/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	// =============================================================================
+	// Cerebras
+	// Expected: 400/413 status code with no body
+	// =============================================================================
+
+	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
+		it("qwen-3-235b - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("cerebras", "qwen-3-235b-a22b-instruct-2507");
+			const result = await testContextOverflow(model, process.env.CEREBRAS_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			// Cerebras returns status code with no body
+			expect(result.errorMessage).toMatch(/4(00|13).*\(no body\)/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	// =============================================================================
+	// z.ai
+	// Special case: Sometimes accepts overflow silently, sometimes rate limits
+	// Detection via usage.input > contextWindow when successful
+	// =============================================================================
+
+	describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
+		it("glm-4.5-flash - should detect overflow via isContextOverflow (silent overflow or rate limit)", async () => {
+			const model = getModel("zai", "glm-4.5-flash");
+			const result = await testContextOverflow(model, process.env.ZAI_API_KEY!);
+			logResult(result);
+
+			// z.ai behavior is inconsistent:
+			// - Sometimes accepts overflow and returns successfully with usage.input > contextWindow
+			// - Sometimes returns rate limit error
+			// Either way, isContextOverflow should detect it (via usage check or we skip if rate limited)
+			if (result.stopReason === "stop") {
+				expect(result.hasUsageData).toBe(true);
+				expect(result.usage.input).toBeGreaterThan(model.contextWindow);
+				expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+			} else {
+				// Rate limited or other error - just log and pass
+				console.log("  z.ai returned error (possibly rate limited), skipping overflow detection");
+			}
+		}, 120000);
+	});
+
+	// =============================================================================
+	// OpenRouter - Multiple backend providers
+	// Expected pattern: "maximum context length is X tokens"
+	// =============================================================================
+
+	describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
+		// Anthropic backend
+		it("anthropic/claude-sonnet-4 via OpenRouter - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("openrouter", "anthropic/claude-sonnet-4");
+			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+
+		// DeepSeek backend
+		it("deepseek/deepseek-v3.2 via OpenRouter - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("openrouter", "deepseek/deepseek-v3.2");
+			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+
+		// Mistral backend
+		it("mistralai/mistral-large-2512 via OpenRouter - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("openrouter", "mistralai/mistral-large-2512");
+			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+
+		// Google backend
+		it("google/gemini-2.5-flash via OpenRouter - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("openrouter", "google/gemini-2.5-flash");
+			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+
+		// Meta/Llama backend
+		it("meta-llama/llama-4-maverick via OpenRouter - should detect overflow via isContextOverflow", async () => {
+			const model = getModel("openrouter", "meta-llama/llama-4-maverick");
+			const result = await testContextOverflow(model, process.env.OPENROUTER_API_KEY!);
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(result.errorMessage).toMatch(/maximum context length is \d+ tokens/i);
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	// =============================================================================
+	// Ollama (local)
+	// =============================================================================
+
+	// Check if ollama is installed
+	let ollamaInstalled = false;
+	try {
+		execSync("which ollama", { stdio: "ignore" });
+		ollamaInstalled = true;
+	} catch {
+		ollamaInstalled = false;
+	}
+
+	describe.skipIf(!ollamaInstalled)("Ollama (local)", () => {
+		let ollamaProcess: ChildProcess | null = null;
+		let model: Model<"openai-completions">;
+
+		beforeAll(async () => {
+			// Check if model is available, if not pull it
+			try {
+				execSync("ollama list | grep -q 'gpt-oss:20b'", { stdio: "ignore" });
+			} catch {
+				console.log("Pulling gpt-oss:20b model for Ollama overflow tests...");
+				try {
+					execSync("ollama pull gpt-oss:20b", { stdio: "inherit" });
+				} catch (e) {
+					console.warn("Failed to pull gpt-oss:20b model, tests will be skipped");
+					return;
+				}
+			}
+
+			// Start ollama server
+			ollamaProcess = spawn("ollama", ["serve"], {
+				detached: false,
+				stdio: "ignore",
+			});
+
+			// Wait for server to be ready
+			await new Promise<void>((resolve) => {
+				const checkServer = async () => {
+					try {
+						const response = await fetch("http://localhost:11434/api/tags");
+						if (response.ok) {
+							resolve();
+						} else {
+							setTimeout(checkServer, 500);
+						}
+					} catch {
+						setTimeout(checkServer, 500);
+					}
+				};
+				setTimeout(checkServer, 1000);
+			});
+
+			model = {
+				id: "gpt-oss:20b",
+				api: "openai-completions",
+				provider: "ollama",
+				baseUrl: "http://localhost:11434/v1",
+				reasoning: true,
+				input: ["text"],
+				contextWindow: 128000,
+				maxTokens: 16000,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+				name: "Ollama GPT-OSS 20B",
+			};
+		}, 60000);
+
+		afterAll(() => {
+			if (ollamaProcess) {
+				ollamaProcess.kill("SIGTERM");
+				ollamaProcess = null;
+			}
+		});
+
+		it("gpt-oss:20b - should detect overflow via isContextOverflow (ollama silently truncates)", async () => {
+			const result = await testContextOverflow(model, "ollama");
+			logResult(result);
+
+			// Ollama silently truncates input instead of erroring
+			// It returns stopReason "stop" with truncated usage
+			// We cannot detect overflow via error message, only via usage comparison
+			if (result.stopReason === "stop" && result.hasUsageData) {
+				// Ollama truncated - check if reported usage is less than what we sent
+				// This is a "silent overflow" - we can detect it if we know expected input size
+				console.log("  Ollama silently truncated input to", result.usage.input, "tokens");
+				// For now, we accept this behavior - Ollama doesn't give us a way to detect overflow
+			} else if (result.stopReason === "error") {
+				expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+			}
+		}, 300000); // 5 min timeout for local model
+	});
+
+	// =============================================================================
+	// LM Studio (local) - Skip if not running
+	// =============================================================================
+
+	let lmStudioRunning = false;
+	try {
+		execSync("curl -s --max-time 1 http://localhost:1234/v1/models > /dev/null", { stdio: "ignore" });
+		lmStudioRunning = true;
+	} catch {
+		lmStudioRunning = false;
+	}
+
+	describe.skipIf(!lmStudioRunning)("LM Studio (local)", () => {
+		it("should detect overflow via isContextOverflow", async () => {
+			const model: Model<"openai-completions"> = {
+				id: "local-model",
+				api: "openai-completions",
+				provider: "lm-studio",
+				baseUrl: "http://localhost:1234/v1",
+				reasoning: false,
+				input: ["text"],
+				contextWindow: 8192,
+				maxTokens: 2048,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+				name: "LM Studio Local Model",
+			};
+
+			const result = await testContextOverflow(model, "lm-studio");
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+
+	// =============================================================================
+	// llama.cpp server (local) - Skip if not running
+	// =============================================================================
+
+	let llamaCppRunning = false;
+	try {
+		execSync("curl -s --max-time 1 http://localhost:8081/health > /dev/null", { stdio: "ignore" });
+		llamaCppRunning = true;
+	} catch {
+		llamaCppRunning = false;
+	}
+
+	describe.skipIf(!llamaCppRunning)("llama.cpp (local)", () => {
+		it("should detect overflow via isContextOverflow", async () => {
+			// Using small context (4096) to match server --ctx-size setting
+			const model: Model<"openai-completions"> = {
+				id: "local-model",
+				api: "openai-completions",
+				provider: "llama.cpp",
+				baseUrl: "http://localhost:8081/v1",
+				reasoning: false,
+				input: ["text"],
+				contextWindow: 4096,
+				maxTokens: 2048,
+				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+				name: "llama.cpp Local Model",
+			};
+
+			const result = await testContextOverflow(model, "llama.cpp");
+			logResult(result);
+
+			expect(result.stopReason).toBe("error");
+			expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
+		}, 120000);
+	});
+});
--- a/packages/ai/test/empty.test.ts
+++ b/packages/ai/test/empty.test.ts
@ -92,6 +92,7 @@ async function testEmptyAssistantMessage<TApi extends Api>(llm: Model<TApi>, opt
 			output: 0,
 			cacheRead: 0,
 			cacheWrite: 0,
+			totalTokens: 10,
 			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 		},
 		stopReason: "stop",
--- a/packages/ai/test/handoff.test.ts
+++ b/packages/ai/test/handoff.test.ts
@ -46,6 +46,7 @@ const providerContexts = {
 				output: 50,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 150,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "toolUse",
@ -97,6 +98,7 @@ const providerContexts = {
 				output: 60,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 180,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "toolUse",
@ -147,6 +149,7 @@ const providerContexts = {
 				output: 55,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 165,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "toolUse",
@ -199,6 +202,7 @@ const providerContexts = {
 				output: 58,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 173,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "toolUse",
@ -243,6 +247,7 @@ const providerContexts = {
 				output: 25,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 75,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "error",
--- a/packages/ai/test/total-tokens.test.ts
+++ b/packages/ai/test/total-tokens.test.ts
@ -0,0 +1,331 @@
+/**
+ * Test totalTokens field across all providers.
+ *
+ * totalTokens represents the total number of tokens processed by the LLM,
+ * including input (with cache) and output (with thinking). This is the
+ * base for calculating context size for the next request.
+ *
+ * - OpenAI Completions: Uses native total_tokens field
+ * - OpenAI Responses: Uses native total_tokens field
+ * - Google: Uses native totalTokenCount field
+ * - Anthropic: Computed as input + output + cacheRead + cacheWrite
+ * - Other OpenAI-compatible providers: Uses native total_tokens field
+ */
+
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete } from "../src/stream.js";
+import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
+
+// Generate a long system prompt to trigger caching (>2k bytes for most providers)
+const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses.
+
+Here is some additional context that makes this system prompt long enough to trigger caching:
+
+${Array(50)
+	.fill(
+		"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.",
+	)
+	.join("\n\n")}
+
+Remember: Always be helpful and concise.`;
+
+async function testTotalTokensWithCache<TApi extends Api>(
+	llm: Model<TApi>,
+	options: OptionsForApi<TApi> = {} as OptionsForApi<TApi>,
+): Promise<{ first: Usage; second: Usage }> {
+	// First request - no cache
+	const context1: Context = {
+		systemPrompt: LONG_SYSTEM_PROMPT,
+		messages: [
+			{
+				role: "user",
+				content: "What is 2 + 2? Reply with just the number.",
+				timestamp: Date.now(),
+			},
+		],
+	};
+
+	const response1 = await complete(llm, context1, options);
+	expect(response1.stopReason).toBe("stop");
+
+	// Second request - should trigger cache read (same system prompt, add conversation)
+	const context2: Context = {
+		systemPrompt: LONG_SYSTEM_PROMPT,
+		messages: [
+			...context1.messages,
+			response1, // Include previous assistant response
+			{
+				role: "user",
+				content: "What is 3 + 3? Reply with just the number.",
+				timestamp: Date.now(),
+			},
+		],
+	};
+
+	const response2 = await complete(llm, context2, options);
+	expect(response2.stopReason).toBe("stop");
+
+	return { first: response1.usage, second: response2.usage };
+}
+
+function logUsage(label: string, usage: Usage) {
+	const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
+	console.log(`  ${label}:`);
+	console.log(
+		`    input: ${usage.input}, output: ${usage.output}, cacheRead: ${usage.cacheRead}, cacheWrite: ${usage.cacheWrite}`,
+	);
+	console.log(`    totalTokens: ${usage.totalTokens}, computed: ${computed}`);
+}
+
+function assertTotalTokensEqualsComponents(usage: Usage) {
+	const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
+	expect(usage.totalTokens).toBe(computed);
+}
+
+describe("totalTokens field", () => {
+	// =========================================================================
+	// Anthropic
+	// =========================================================================
+
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
+		it("claude-3-5-haiku - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+			console.log(`\nAnthropic / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+
+			// Anthropic should have cache activity
+			const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
+			expect(hasCache).toBe(true);
+		}, 60000);
+	});
+
+	describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => {
+		it("claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("anthropic", "claude-sonnet-4-20250514");
+
+			console.log(`\nAnthropic OAuth / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_OAUTH_TOKEN });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+
+			// Anthropic should have cache activity
+			const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
+			expect(hasCache).toBe(true);
+		}, 60000);
+	});
+
+	// =========================================================================
+	// OpenAI
+	// =========================================================================
+
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
+		it("gpt-4o-mini - should return totalTokens equal to sum of components", async () => {
+			const llm: Model<"openai-completions"> = {
+				...getModel("openai", "gpt-4o-mini")!,
+				api: "openai-completions",
+			};
+
+			console.log(`\nOpenAI Completions / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm);
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+	});
+
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
+		it("gpt-4o - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("openai", "gpt-4o");
+
+			console.log(`\nOpenAI Responses / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm);
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+	});
+
+	// =========================================================================
+	// Google
+	// =========================================================================
+
+	describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
+		it("gemini-2.0-flash - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("google", "gemini-2.0-flash");
+
+			console.log(`\nGoogle / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm);
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+	});
+
+	// =========================================================================
+	// xAI
+	// =========================================================================
+
+	describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
+		it("grok-3-fast - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("xai", "grok-3-fast");
+
+			console.log(`\nxAI / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+	});
+
+	// =========================================================================
+	// Groq
+	// =========================================================================
+
+	describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
+		it("openai/gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("groq", "openai/gpt-oss-120b");
+
+			console.log(`\nGroq / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+	});
+
+	// =========================================================================
+	// Cerebras
+	// =========================================================================
+
+	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
+		it("gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("cerebras", "gpt-oss-120b");
+
+			console.log(`\nCerebras / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+	});
+
+	// =========================================================================
+	// z.ai
+	// =========================================================================
+
+	describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
+		it("glm-4.5-flash - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("zai", "glm-4.5-flash");
+
+			console.log(`\nz.ai / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+	});
+
+	// =========================================================================
+	// OpenRouter - Multiple backend providers
+	// =========================================================================
+
+	describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
+		it("anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
+
+			console.log(`\nOpenRouter / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+
+		it("deepseek/deepseek-chat - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("openrouter", "deepseek/deepseek-chat");
+
+			console.log(`\nOpenRouter / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+
+		it("mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
+
+			console.log(`\nOpenRouter / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+
+		it("google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
+
+			console.log(`\nOpenRouter / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+
+		it("meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", async () => {
+			const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
+
+			console.log(`\nOpenRouter / ${llm.id}:`);
+			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+
+			logUsage("First request", first);
+			logUsage("Second request", second);
+
+			assertTotalTokensEqualsComponents(first);
+			assertTotalTokensEqualsComponents(second);
+		}, 60000);
+	});
+});
--- a/packages/ai/test/unicode-surrogate.test.ts
+++ b/packages/ai/test/unicode-surrogate.test.ts
@ -42,6 +42,7 @@ async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, option
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+					totalTokens: 0,
 					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 				},
 				stopReason: "toolUse",
@ -126,6 +127,7 @@ async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, opt
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+					totalTokens: 0,
 					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 				},
 				stopReason: "toolUse",
@ -213,6 +215,7 @@ async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, opt
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+					totalTokens: 0,
 					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 				},
 				stopReason: "toolUse",
--- a/packages/ai/test/xhigh.test.ts
+++ b/packages/ai/test/xhigh.test.ts
@ -0,0 +1,69 @@
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { stream } from "../src/stream.js";
+import type { Context, Model } from "../src/types.js";
+
+function makeContext(): Context {
+	return {
+		messages: [
+			{
+				role: "user",
+				content: `What is ${(Math.random() * 100) | 0} + ${(Math.random() * 100) | 0}? Think step by step.`,
+				timestamp: Date.now(),
+			},
+		],
+	};
+}
+
+describe.skipIf(!process.env.OPENAI_API_KEY)("xhigh reasoning", () => {
+	describe("codex-max (supports xhigh)", () => {
+		// Note: codex models only support the responses API, not chat completions
+		it("should work with openai-responses", async () => {
+			const model = getModel("openai", "gpt-5.1-codex-max");
+			const s = stream(model, makeContext(), { reasoningEffort: "xhigh" });
+			let hasThinking = false;
+
+			for await (const event of s) {
+				if (event.type === "thinking_start" || event.type === "thinking_delta") {
+					hasThinking = true;
+				}
+			}
+
+			const response = await s.result();
+			expect(response.stopReason, `Error: ${response.errorMessage}`).toBe("stop");
+			expect(response.content.some((b) => b.type === "text")).toBe(true);
+			expect(hasThinking || response.content.some((b) => b.type === "thinking")).toBe(true);
+		});
+	});
+
+	describe("gpt-5-mini (does not support xhigh)", () => {
+		it("should error with openai-responses when using xhigh", async () => {
+			const model = getModel("openai", "gpt-5-mini");
+			const s = stream(model, makeContext(), { reasoningEffort: "xhigh" });
+
+			for await (const _ of s) {
+				// drain events
+			}
+
+			const response = await s.result();
+			expect(response.stopReason).toBe("error");
+			expect(response.errorMessage).toContain("xhigh");
+		});
+
+		it("should error with openai-completions when using xhigh", async () => {
+			const model: Model<"openai-completions"> = {
+				...getModel("openai", "gpt-5-mini"),
+				api: "openai-completions",
+			};
+			const s = stream(model, makeContext(), { reasoningEffort: "xhigh" });
+
+			for await (const _ of s) {
+				// drain events
+			}
+
+			const response = await s.result();
+			expect(response.stopReason).toBe("error");
+			expect(response.errorMessage).toContain("xhigh");
+		});
+	});
+});
--- a/packages/coding-agent/CHANGELOG.md
+++ b/packages/coding-agent/CHANGELOG.md
@ -2,6 +2,100 @@

 ## [Unreleased]

+### Breaking Changes
+
+- **Custom themes require new color tokens**: Themes must now include `thinkingXhigh` and `bashMode` color tokens. The theme loader provides helpful error messages listing missing tokens. See built-in themes (dark.json, light.json) for reference values.
+
+### Added
+
+- **OpenAI compatibility overrides in models.json**: Custom models using `openai-completions` API can now specify a `compat` object to override provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/pi-mono/issues/133), thanks @fink-andreas for the initial idea and PR)
+
+- **xhigh thinking level**: Added `xhigh` thinking level for OpenAI codex-max models. Cycle through thinking levels with Shift+Tab; `xhigh` appears only when using a codex-max model. ([#143](https://github.com/badlogic/pi-mono/issues/143))
+
+- **Collapse changelog setting**: Add `"collapseChangelog": true` to `~/.pi/agent/settings.json` to show a condensed "Updated to vX.Y.Z" message instead of the full changelog after updates. Use `/changelog` to view the full changelog. ([#148](https://github.com/badlogic/pi-mono/issues/148))
+
+## [0.13.2] - 2025-12-07
+
+### Changed
+
+- **Tool output truncation**: All tools now enforce consistent truncation limits with actionable notices for the LLM. ([#134](https://github.com/badlogic/pi-mono/issues/134))
+  - **Limits**: 2000 lines OR 50KB (whichever hits first), never partial lines
+  - **read**: Shows `[Showing lines X-Y of Z. Use offset=N to continue]`. If first line exceeds 50KB, suggests bash command
+  - **bash**: Tail truncation with temp file. Shows `[Showing lines X-Y of Z. Full output: /tmp/...]`
+  - **grep**: Pre-truncates match lines to 500 chars. Shows match limit and line truncation notices
+  - **find/ls**: Shows result/entry limit notices
+  - TUI displays truncation warnings in yellow at bottom of tool output (visible even when collapsed)
+
+## [0.13.1] - 2025-12-06
+
+### Added
+
+- **Flexible Windows shell configuration**: The bash tool now supports multiple shell sources beyond Git Bash. Resolution order: (1) custom `shellPath` in settings.json, (2) Git Bash in standard locations, (3) any bash.exe on PATH. This enables Cygwin, MSYS2, and other bash environments. Configure with `~/.pi/agent/settings.json`: `{"shellPath": "C:\\cygwin64\\bin\\bash.exe"}`.
+
+### Fixed
+
+- **Windows binary detection**: Fixed Bun compiled binary detection on Windows by checking for URL-encoded `%7EBUN` in addition to `$bunfs` and `~BUN` in `import.meta.url`. This ensures the binary correctly locates supporting files (package.json, themes, etc.) next to the executable.
+
+## [0.12.15] - 2025-12-06
+
+### Fixed
+
+- **Editor crash with emojis/CJK characters**: Fixed crash when pasting or typing text containing wide characters (emojis like ✅, CJK characters) that caused line width to exceed terminal width. The editor now uses grapheme-aware text wrapping with proper visible width calculation.
+
+## [0.12.14] - 2025-12-06
+
+### Added
+
+- **Double-Escape Branch Shortcut**: Press Escape twice with an empty editor to quickly open the `/branch` selector for conversation branching.
+
+## [0.12.13] - 2025-12-05
+
+### Changed
+
+- **Faster startup**: Version check now runs in parallel with TUI initialization instead of blocking startup for up to 1 second. Update notifications appear in chat when the check completes.
+
+## [0.12.12] - 2025-12-05
+
+### Changed
+
+- **Footer display**: Token counts now use M suffix for millions (e.g., `10.2M` instead of `10184k`). Context display shortened from `61.3% of 200k` to `61.3%/200k`.
+
+### Fixed
+
+- **Multi-key sequences in inputs**: Inputs like model search now handle multi-key sequences identically to the main prompt editor. ([#122](https://github.com/badlogic/pi-mono/pull/122) by [@markusylisiurunen](https://github.com/markusylisiurunen))
+- **Line wrapping escape codes**: Fixed underline style bleeding into padding when wrapping long URLs. ANSI codes now attach to the correct content, and line-end resets only turn off underline (preserving background colors). ([#109](https://github.com/badlogic/pi-mono/issues/109))
+
+### Added
+
+- **Fuzzy search models and sessions**: Implemented a simple fuzzy search for models and sessions (e.g., `codexmax` now finds `gpt-5.1-codex-max`). ([#122](https://github.com/badlogic/pi-mono/pull/122) by [@markusylisiurunen](https://github.com/markusylisiurunen))
+- **Prompt History Navigation**: Browse previously submitted prompts using Up/Down arrow keys when the editor is empty. Press Up to cycle through older prompts, Down to return to newer ones or clear the editor. Similar to shell history and Claude Code's prompt history feature. History is session-scoped and stores up to 100 entries. ([#121](https://github.com/badlogic/pi-mono/pull/121) by [@nicobailon](https://github.com/nicobailon))
+- **`/resume` Command**: Switch to a different session mid-conversation. Opens an interactive selector showing all available sessions. Equivalent to the `--resume` CLI flag but can be used without restarting the agent. ([#117](https://github.com/badlogic/pi-mono/pull/117) by [@hewliyang](https://github.com/hewliyang))
+
+## [0.12.11] - 2025-12-05
+
+### Changed
+
+- **Compaction UI**: Simplified collapsed compaction indicator to show warning-colored text with token count instead of styled banner. Removed redundant success message after compaction. ([#108](https://github.com/badlogic/pi-mono/issues/108))
+
+### Fixed
+
+- **Print mode error handling**: `-p` flag now outputs error messages and exits with code 1 when requests fail, instead of silently producing no output.
+- **Branch selector crash**: Fixed TUI crash when user messages contained Unicode characters (like `✔` or `›`) that caused line width to exceed terminal width. Now uses proper `truncateToWidth` instead of `substring`.
+- **Bash output escape sequences**: Fixed incomplete stripping of terminal escape sequences in bash tool output. `stripAnsi` misses some sequences like standalone String Terminator (`ESC \`), which could cause rendering issues when displaying captured TUI output.
+- **Footer overflow crash**: Fixed TUI crash when terminal width is too narrow for the footer stats line. The footer now truncates gracefully instead of overflowing.
+
+### Added
+
+- **`authHeader` option in models.json**: Custom providers can set `"authHeader": true` to automatically add `Authorization: Bearer <apiKey>` header. Useful for providers that require explicit auth headers. ([#81](https://github.com/badlogic/pi-mono/issues/81))
+- **`--append-system-prompt` Flag**: Append additional text or file contents to the system prompt. Supports both inline text and file paths. Complements `--system-prompt` for layering custom instructions without replacing the base system prompt. ([#114](https://github.com/badlogic/pi-mono/pull/114) by [@markusylisiurunen](https://github.com/markusylisiurunen))
+- **Thinking Block Toggle**: Added `Ctrl+T` shortcut to toggle visibility of LLM thinking blocks. When toggled off, shows a static "Thinking..." label instead of full content. Useful for reducing visual clutter during long conversations. ([#113](https://github.com/badlogic/pi-mono/pull/113) by [@markusylisiurunen](https://github.com/markusylisiurunen))
+
+## [0.12.10] - 2025-12-04
+
+### Added
+
+- Added `gpt-5.1-codex-max` model support
+
 ## [0.12.9] - 2025-12-04

 ### Added
@ -40,378 +134,214 @@

 ### Added

- **`/debug` Command**: New slash command that writes debug information to `~/.pi/agent/pi-debug.log`, including terminal width and all rendered lines with their visible widths. Useful for diagnosing rendering issues.
-
-### Fixed
-
- **Windows Carriage Return Handling**: Fixed tool output containing stray `\r` characters on Windows, which could cause rendering issues. Carriage returns are now stripped from bash output.
+- **RPC Termination Safeguard**: When running as an RPC worker (stdin pipe detected), the CLI now exits immediately if the parent process terminates unexpectedly. Prevents orphaned RPC workers from persisting indefinitely and consuming system resources.

 ## [0.12.3] - 2025-12-02

 ### Fixed

- **Windows Terminal Truecolor Support**: Fixed theme colors appearing washed out in Windows Terminal. The color mode detection now checks for `WT_SESSION` environment variable to enable truecolor (24-bit RGB) support instead of falling back to 256-color mode.
+- **Rate limit handling**: Anthropic rate limit errors now trigger automatic retry with exponential backoff (base 10s, max 5 retries). Previously these errors would abort the request immediately.
+- **Usage tracking during retries**: Retried requests now correctly accumulate token usage from all attempts, not just the final successful one. Fixes artificially low token counts when requests were retried.

 ## [0.12.2] - 2025-12-02

-### Fixed
+### Changed

- **Windows Binary Path Resolution**: Fixed Bun compiled binary on Windows failing to find `package.json` and other assets. The binary was incorrectly using the Bun runtime's virtual filesystem path (`B:\~BUN\`) instead of the actual executable location. Now uses `process.execPath` which correctly points to the compiled binary, and updated detection to check for `%7EBUN` (URL-encoded `~BUN`) in `import.meta.url`.
+- Removed support for gpt-4.5-preview and o3 models (not yet available)

 ## [0.12.1] - 2025-12-02

-### Changed
+### Added

- **Binary Archive Structure**: Binary downloads now contain `pi` (or `pi.exe` on Windows) instead of platform-suffixed names like `pi-darwin-arm64`, making it easier to use after extraction.
+- **Models**: Added support for OpenAI's new models:
+  - `gpt-4.1` (128K context)
+  - `gpt-4.1-mini` (128K context)
+  - `gpt-4.1-nano` (128K context)
+  - `o3` (200K context, reasoning model)
+  - `o4-mini` (200K context, reasoning model)

 ## [0.12.0] - 2025-12-02

 ### Added

- **Standalone Binary Support**: Build a self-contained binary using Bun with `npm run build:binary`. Pre-built binaries for macOS (arm64/x64), Linux (x64/arm64), and Windows (x64) are available on GitHub Releases. Based on [#89](https://github.com/badlogic/pi-mono/pull/89) by [@steipete](https://github.com/steipete), extended with cross-platform path resolution and GitHub Actions for automated release builds.
+- **`-p, --print` Flag**: Run in non-interactive batch mode. Processes input message or piped stdin without TUI, prints agent response directly to stdout. Ideal for scripting, piping, and CI/CD integration. Exits after first response.
+- **`-P, --print-streaming` Flag**: Like `-p`, but streams response tokens as they arrive. Use `--print-streaming --no-markdown` for raw unformatted output.
+- **`--print-turn` Flag**: Continue processing tool calls and agent turns until the agent naturally finishes or requires user input. Combine with `-p` for complete multi-turn conversations.
+- **`--no-markdown` Flag**: Output raw text without Markdown formatting. Useful when piping output to tools that expect plain text.
+- **Streaming Print Mode**: Added internal `printStreaming` option for streaming output in non-TUI mode.
+- **RPC Mode `print` Command**: Send `{"type":"print","content":"text"}` to get formatted print output via `print_output` events.
+- **Auto-Save in Print Mode**: Print mode conversations are automatically saved to the session directory, allowing later resumption with `--continue`.
+- **Thinking level options**: Added `--thinking-off`, `--thinking-minimal`, `--thinking-low`, `--thinking-medium`, `--thinking-high` flags for directly specifying thinking level without the selector UI.

-## [0.11.6] - 2025-12-02
+### Changed

-### Added
+- **Simplified RPC Protocol**: Replaced the `prompt` wrapper command with direct message objects. Send `{"role":"user","content":"text"}` instead of `{"type":"prompt","message":"text"}`. Better aligns with message format throughout the codebase.
+- **RPC Message Handling**: Agent now processes raw message objects directly, with `timestamp` auto-populated if missing.

- **OAuth Login Status Indicator**: The `/login` provider selector now shows "✓ logged in" next to providers where you're already authenticated. This makes it clear at a glance whether you're using your Claude Pro/Max subscription. ([#88](https://github.com/badlogic/pi-mono/pull/88) by [@steipete](https://github.com/steipete))
- **Subscription Cost Indicator**: The footer now shows "(sub)" next to the cost when using an OAuth subscription (e.g., `$0.123 (sub)`). This makes it visible without needing `/login` that you're using your Claude Pro/Max subscription.
+## [0.11.9] - 2025-12-02

-## [0.11.5] - 2025-12-01
+### Changed

-### Added
+- Change Ctrl+I to Ctrl+P for model cycling shortcut to avoid collision with Tab key in some terminals

- **Custom Slash Commands**: Define reusable prompt templates as Markdown files. Place files in `~/.pi/agent/commands/` (global) or `.pi/commands/` (project-specific). Commands appear in `/` autocomplete with source indicators like `(user)` or `(project)`. Supports bash-style arguments (`$1`, `$2`, `$@`) with quote-aware parsing. Subdirectories create namespaced commands (e.g., `.pi/commands/frontend/component.md` shows as `(project:frontend)`). Optional `description` field in YAML frontmatter. Works from CLI as well (`pi -p "/review"`). ([#86](https://github.com/badlogic/pi-mono/issues/86))
-
-## [0.11.4] - 2025-12-01
-
-### Improved
-
- **TUI Crash Diagnostics**: When a render error occurs (line exceeds terminal width), all rendered lines are now written to `~/.pi/agent/pi-crash.log` with their indices and visible widths for easier debugging.
+## [0.11.8] - 2025-12-01

 ### Fixed

- **Session Selector Crash with Wide Characters**: Fixed crash when running `pi -r` to resume sessions containing emojis, CJK characters, or other wide Unicode characters. The session list was using character count instead of visible terminal width for truncation, causing lines to exceed terminal width. Added `truncateToWidth()` utility that properly handles ANSI codes and wide characters. ([#85](https://github.com/badlogic/pi-mono/issues/85))
+- Absolute glob patterns (e.g., `/Users/foo/**/*.ts`) are now handled correctly. Previously the leading `/` was being stripped, causing the pattern to be interpreted relative to the current directory.

-## [0.11.3] - 2025-12-01
-
-### Added
-
- **Circular Menu Navigation**: All menus (model selector, message history, file picker) now wrap around when navigating past the first or last item. Pressing up at the top jumps to the bottom, and pressing down at the bottom jumps to the top. ([#82](https://github.com/badlogic/pi-mono/pull/82) by [@butelo](https://github.com/butelo))
+## [0.11.7] - 2025-12-01

 ### Fixed

- **RPC Mode Session Management**: Fixed session files not being saved in RPC mode (`--mode rpc`). Since version 0.9.0, the `agent.subscribe()` call with session management logic was only present in the TUI renderer, causing RPC mode to skip saving messages to session files. RPC mode now properly saves sessions just like interactive mode. ([#83](https://github.com/badlogic/pi-mono/issues/83))
+- Fix read path traversal vulnerability. Paths are now validated to prevent reading outside the working directory or its parents. The `read` tool can read from `cwd`, its ancestors (for config files), and all descendants. Symlinks are resolved before validation.
+
+## [0.11.6] - 2025-12-01
+
+### Fixed
+
+- Fix `--system-prompt <path>` allowing the path argument to be captured by the message collection, causing "file not found" errors.
+
+## [0.11.5] - 2025-11-30
+
+### Fixed
+
+- Fixed fatal error "Cannot set properties of undefined (setting '0')" when editing empty files in the `edit` tool.
+- Simplified `edit` tool output: Shows only "Edited file.txt" for successful edits instead of verbose search/replace details.
+- Fixed fatal error in footer rendering when token counts contain NaN values due to missing usage data.
+
+## [0.11.4] - 2025-11-30
+
+### Fixed
+
+- Fixed chat rendering crash when messages contain preformatted/styled text (e.g., thinking traces with gray italic styling). The markdown renderer now preserves existing ANSI escape codes when they appear before inline elements.
+
+## [0.11.3] - 2025-11-29
+
+### Fixed
+
+- Fix file drop functionality for absolute paths
+
+## [0.11.2] - 2025-11-29
+
+### Fixed
+
+- Fixed TUI crash when pasting content containing tab characters. Tabs are now converted to 4 spaces before insertion.
+- Fixed terminal corruption after exit when shell integration sequences (OSC 133) appeared in bash output. These sequences are now stripped along with other ANSI codes.

 ## [0.11.1] - 2025-11-29

 ### Added

- **`--export` CLI Flag**: Export session files to self-contained HTML files from the command line. Auto-detects format (session manager format or streaming event format). Usage: `pi --export session.jsonl` or `pi --export session.jsonl output.html`. Note: Streaming event logs (from `--mode json`) don't contain system prompt or tool definitions, so those sections are omitted with a notice in the HTML. ([#80](https://github.com/badlogic/pi-mono/issues/80))
-
- **Git Branch File Watcher**: Footer now auto-updates when the git branch changes externally (e.g., running `git checkout` in another terminal). Watches `.git/HEAD` for changes and refreshes the branch display automatically. ([#79](https://github.com/badlogic/pi-mono/pull/79) by [@fightbulc](https://github.com/fightbulc))
-
- **Read-Only Exploration Tools**: Added `grep`, `find`, and `ls` tools for safe code exploration without modification risk. These tools are available via the new `--tools` flag.
-  - `grep`: Uses `ripgrep` (auto-downloaded) for fast regex searching. Respects `.gitignore` (including nested), supports glob filtering, context lines, and hidden files.
-  - `find`: Uses `fd` (auto-downloaded) for fast file finding. Respects `.gitignore`, supports glob patterns, and hidden files.
-  - `ls`: Lists directory contents with proper sorting and indicators.
- **`--tools` Flag**: New CLI flag to specify available tools (e.g., `--tools read,grep,find,ls` for read-only mode). Default behavior remains unchanged (`read,bash,edit,write`).
- **Dynamic System Prompt**: The system prompt now adapts to the selected tools, showing relevant guidelines and warnings (e.g., "READ-ONLY mode" when write tools are disabled).
+- Added `fd` integration for file path autocompletion. Now uses `fd` for faster fuzzy file search

 ### Fixed

- **Prompt Restoration on API Key Error**: When submitting a message fails due to missing API key, the prompt is now restored to the editor instead of being lost. ([#77](https://github.com/badlogic/pi-mono/issues/77))
- **File `@` Autocomplete Performance**: Fixed severe UI jank when using `@` for file attachment in large repositories. The file picker now uses `fd` (a fast file finder) instead of synchronous directory walking with minimatch. On a 55k file repo, search time dropped from ~900ms to ~10ms per keystroke. If `fd` is not installed, it will be automatically downloaded to `~/.pi/agent/tools/` on first use. ([#69](https://github.com/badlogic/pi-mono/issues/69))
- **File Selector Styling**: Selected items in file autocomplete (`@` and Tab) now use consistent accent color for the entire line instead of mixed colors.
+- Fixed keyboard shortcuts Ctrl+A, Ctrl+E, Ctrl+K, Ctrl+U, Ctrl+W, and word navigation (Option+Arrow) not working in VS Code integrated terminal and some other terminal emulators

-## [0.10.2] - 2025-11-27
+## [0.11.0] - 2025-11-29
+
+### Added
+
+- **File-based Slash Commands**: Create custom reusable prompts as `.txt` files in `~/.pi/slash-commands/`. Files become `/filename` commands with first-line descriptions. Supports `{{selection}}` placeholder for referencing selected/attached content.
+- **`/branch` Command**: Create conversation branches from any previous user message. Opens a selector to pick a message, then creates a new session file starting from that point. Original message text is placed in the editor for modification.
+- **Unified Content References**: Both `@path` in messages and `--file path` CLI arguments now use the same attachment system with consistent MIME type detection.
+- **Drag & Drop Files**: Drop files onto the terminal to attach them to your message. Supports multiple files and both text and image content.

 ### Changed

- **HTML Export Prefix**: Exported session files now use `pi-session-` prefix (e.g., `pi-session-2025-11-13T12-27-53-866Z_xxx.html`) for easier `.gitignore` filtering ([#72](https://github.com/badlogic/pi-mono/issues/72))
- **Native Model Identity**: Removed "You are actually not Claude, you are Pi" from system prompt, allowing models to use their native identity ([#73](https://github.com/badlogic/pi-mono/issues/73))
-
-## [0.10.1] - 2025-11-27
-
-### Added
-
- **CLI File Arguments (`@file`)**: Include files in your initial message using the `@` prefix (e.g., `pi @prompt.md @image.png "Do this"`). All `@file` arguments are combined into the first message. Text files are wrapped in `<file name="path">content</file>` tags. Images (`.jpg`, `.jpeg`, `.png`, `.gif`, `.webp`) are attached as base64-encoded attachments. Supports `~` expansion, relative/absolute paths. Empty files are skipped. Works in interactive, `--print`, and `--mode text/json` modes. Not supported in `--mode rpc`. ([#54](https://github.com/badlogic/pi-mono/issues/54))
+- **Model Selector with Search**: The `/model` command now opens a searchable list. Type to filter models by name, use arrows to navigate, Enter to select.
+- **Improved File Autocomplete**: File path completion after `@` now supports fuzzy matching and shows file/directory indicators.
+- **Session Selector with Search**: The `--resume` and `--session` flags now open a searchable session list with fuzzy filtering.
+- **Attachment Display**: Files added via `@path` are now shown as "Attached: filename" in the user message, separate from the prompt text.
+- **Tab Completion**: Tab key now triggers file path autocompletion anywhere in the editor, not just after `@` symbol.

 ### Fixed

- **Editor Cursor Navigation**: Fixed broken up/down arrow key navigation in the editor when lines wrap. Previously, pressing up/down would move between logical lines instead of visual (wrapped) lines, causing the cursor to jump unexpectedly. Now cursor navigation is based on rendered lines. Also fixed a bug where the cursor would appear on two lines simultaneously when positioned at a wrap boundary. Added word by word navigation via Option+Left/Right or Ctrl+Left/Right. ([#61](https://github.com/badlogic/pi-mono/pull/61))
- **Edit Diff Line Number Alignment**: Fixed two issues with diff display in the edit tool:
-  1. Line numbers were incorrect for edits far from the start of a file (e.g., showing 1, 2, 3 instead of 336, 337, 338). The skip count for context lines was being added after displaying lines instead of before.
-  2. When diff lines wrapped due to terminal width, the line number prefix lost its leading space alignment, and code indentation (spaces/tabs after line numbers) was lost. Rewrote `splitIntoTokensWithAnsi` in `pi-tui` to preserve whitespace as separate tokens instead of discarding it, so wrapped lines maintain proper alignment and indentation.
+- Fixed autocomplete z-order issue where dropdown could appear behind chat messages
+- Fixed cursor position when navigating through wrapped lines in the editor
+- Fixed attachment handling for continued sessions to preserve file references

-### Improved
-
- **Git Branch Display**: Footer now shows the active git branch after the directory path (e.g., `~/project (main)`). Branch is detected by reading `.git/HEAD` directly (fast, synchronous). Cache is refreshed after each assistant message to detect branch changes from git commands executed by the agent. ([#55](https://github.com/badlogic/pi-mono/issues/55))
- **HTML Export**: Added timestamps to each message, fixed text clipping with proper word-wrapping CSS, improved font selection (`ui-monospace`, `Cascadia Code`, `Source Code Pro`), reduced font sizes for more compact display (12px base), added model switch indicators in conversation timeline, created dedicated Tokens & Cost section with cumulative statistics (input/output/cache tokens, cost breakdown by type), added context usage display showing token count and percentage for the last model used, and now displays all models used during the session. ([#51](https://github.com/badlogic/pi-mono/issues/51), [#52](https://github.com/badlogic/pi-mono/issues/52))
-
-## [0.10.0] - 2025-11-27
-
-### Added
-
- **Fuzzy File Search (`@`)**: Type `@` followed by a search term to fuzzy-search files and folders across your project. Respects `.gitignore` and skips hidden files. Directories are prioritized in results. Based on [PR #60](https://github.com/badlogic/pi-mono/pull/60) by [@fightbulc](https://github.com/fightbulc), reimplemented with pure Node.js for fast, dependency-free searching.
-
-### Fixed
-
- **Emoji Text Wrapping Crash**: Fixed crash when rendering text containing emojis (e.g., 😂) followed by long content like URLs. The `breakLongWord` function in `pi-tui` was iterating over UTF-16 code units instead of grapheme clusters, causing emojis (which are surrogate pairs) to be miscounted during line wrapping. Now uses `Intl.Segmenter` to properly handle multi-codepoint characters.
- **Footer Cost Display**: Added `$` prefix to cost display in footer. Now shows `$0.078` instead of `0.078`. ([#53](https://github.com/badlogic/pi-mono/issues/53))
-
-## [0.9.3] - 2025-11-24
-
-### Added
-
- Added Anthropic Claude Opus 4.5 support
-
-## [0.9.2] - 2025-11-24
-
-### Fixed
-
- **Edit Tool Dollar Sign Bug**: Fixed critical bug in the `edit` tool where `String.replace()` was interpreting `$` as a special replacement pattern (e.g., `$$`, `$&`, `$'`). When trying to insert `$` into code (like adding a dollar sign to a template literal), the replacement would silently fail and produce unchanged content, but the tool would incorrectly report success. Now uses `indexOf` + `substring` for raw string replacement without special character interpretation. Also added verification that content actually changed, rejecting with a clear error if the replacement produces identical content. ([#53](https://github.com/badlogic/pi-mono/issues/53))
-
-## [0.9.0] - 2025-11-21
-
-### Added
-
- **`/clear` Command**: New slash command to reset the conversation context and start a fresh session. Aborts any in-flight agent work, clears all messages, and creates a new session file. ([#48](https://github.com/badlogic/pi-mono/pull/48))
- **Model Cycling with Thinking Levels**: The `--models` flag now supports thinking level syntax (e.g., `--models sonnet:high,haiku:low`). When cycling models with `Ctrl+P`, the associated thinking level is automatically applied. The first model in the scope is used as the initial model when starting a new session. Both model and thinking level changes are now saved to session and settings for persistence. ([#47](https://github.com/badlogic/pi-mono/pull/47))
- **`--thinking` Flag**: New CLI flag to set thinking level directly (e.g., `--thinking high`). Valid values: `off`, `minimal`, `low`, `medium`, `high`. Takes highest priority over all other thinking level sources. ([#45](https://github.com/badlogic/pi-mono/issues/45))
-
-### Breaking
-
- **Interactive Mode with Initial Prompt**: Passing a prompt on the command line (e.g., `pi "List files"`) now starts interactive mode with the prompt pre-submitted, instead of exiting after completion. Use `--print` or `-p` to get the previous non-interactive behavior (e.g., `pi -p "List files"`). This matches Claude CLI (`-p`) and Codex (`exec`) behavior. ([#46](https://github.com/badlogic/pi-mono/issues/46))
-
-### Fixed
-
- **Slash Command Autocomplete**: Fixed issue where pressing Enter on a highlighted slash command suggestion (e.g., typing `/mod` with `/model` highlighted) would submit the partial text instead of executing the selected command. Now Enter applies the completion and submits in one action. ([#49](https://github.com/badlogic/pi-mono/issues/49))
- **Model Matching Priority**: The `--models` flag now prioritizes exact matches over partial matches. Supports `provider/modelId` format (e.g., `openrouter/openai/gpt-5.1-codex`) for precise selection. Exact ID matches are tried before partial matching, so `--models gpt-5.1-codex` correctly selects `gpt-5.1-codex` instead of `openai/gpt-5.1-codex-mini`.
- **Markdown Link Rendering**: Fixed links with identical text and href (e.g., `https://github.com/badlogic/pi-mono/pull/48/files`) being rendered twice. Now correctly compares raw text instead of styled text (which contains ANSI codes) when determining if link text matches href.
-
-## [0.8.5] - 2025-11-21
-
-### Fixed
-
- **Path Completion Hanging**: Fixed catastrophic regex backtracking in path completion that caused the terminal to hang when text contained many `/` characters (e.g., URLs). Replaced complex regex with simple string operations. ([#18](https://github.com/badlogic/pi-mono/issues/18))
- **Autocomplete Arrow Keys**: Fixed issue where arrow keys would move both the autocomplete selection and the editor cursor simultaneously when the file selector list was shown.
-
-## [0.8.4] - 2025-11-21
-
-### Fixed
-
- **Read Tool Error Handling**: Fixed issue where the `read` tool would return errors as successful text content instead of throwing. Now properly throws errors for file not found and offset out of bounds conditions.
-
-## [0.8.3] - 2025-11-21
-
-### Improved
-
- **Export HTML**: Limited container width to 700px for better readability. Fixed message statistics to match `/session` command output with proper breakdown of User/Assistant/Tool Calls/Tool Results/Total messages.
- **Dark Theme**: Increased visibility of editor border (darkGray from #303030 to #505050) and thinking minimal indicator (from #4e4e4e to #6e6e6e).
-
-## [0.8.0] - 2025-11-21
-
-### Added
-
- **Theme System**: Full theming support with 44 customizable color tokens. Two built-in themes (`dark`, `light`) with auto-detection based on terminal background. Use `/theme` command to select themes interactively. Custom themes in `~/.pi/agent/themes/*.json` support live editing - changes apply immediately when the file is saved. Themes use RGB hex values for consistent rendering across terminals. VS Code users: set `terminal.integrated.minimumContrastRatio` to `1` for proper color rendering. See [Theme Documentation](docs/theme.md) for details.
-
-## [0.7.29] - 2025-11-20
-
-### Improved
-
- **Read Tool Display**: When the `read` tool is called with offset/limit parameters, the tool execution now displays the line range in a compact format (e.g., `read src/main.ts:100-200` for offset=100, limit=100).
-
-## [0.7.28] - 2025-11-20
-
-### Added
-
- **Message Queuing**: You can now send multiple messages while the agent is processing without waiting for the previous response to complete. Messages submitted during streaming are queued and processed based on your queue mode setting. Queued messages are shown in a pending area below the chat. Press Escape to abort and restore all queued messages to the editor. Use `/queue` to select between "one-at-a-time" (process queued messages sequentially, recommended) or "all" (process all queued messages at once). The queue mode setting is saved and persists across sessions. ([#15](https://github.com/badlogic/pi-mono/issues/15))
-
-## [0.7.27] - 2025-11-20
-
-### Fixed
-
- **Slash Command Submission**: Fixed issue where slash commands required two Enter presses to execute. Now pressing Enter on a slash command autocomplete suggestion immediately submits the command, while Tab still applies the completion for adding arguments. ([#30](https://github.com/badlogic/pi-mono/issues/30))
- **Slash Command Autocomplete**: Fixed issue where typing a typo then correcting it would not show autocomplete suggestions. Autocomplete now re-triggers when typing or backspacing in a slash command context. ([#29](https://github.com/badlogic/pi-mono/issues/29))
-
-## [0.7.26] - 2025-11-20
-
-### Added
-
- **Tool Output Expansion**: Press `Ctrl+O` to toggle between collapsed and expanded tool output display. Expands all tool call outputs (bash, read, write, etc.) to show full content instead of truncated previews. ([#31](https://github.com/badlogic/pi-mono/issues/31))
- **Custom Headers**: Added support for custom HTTP headers in `models.json` configuration. Headers can be specified at both provider and model level, with model-level headers overriding provider-level ones. This enables bypassing Cloudflare bot detection and other proxy requirements. ([#39](https://github.com/badlogic/pi-mono/issues/39))
-
-### Fixed
-
- **Chutes AI Provider**: Fixed 400 errors when using Chutes AI provider. Added compatibility fixes for `store` field exclusion, `max_tokens` parameter usage, and system prompt role handling. ([#42](https://github.com/badlogic/pi-mono/pull/42) by [@butelo](https://github.com/butelo))
- **Mistral/Chutes Syntax Error**: Fixed syntax error in merged PR that used `iif` instead of `if`.
- **Anthropic OAuth Bug**: Fixed bug where `process.env.ANTHROPIC_API_KEY = undefined` set the env var to string "undefined" instead of deleting it. Now uses `delete` operator.
-
-## [0.7.25] - 2025-11-20
-
-### Added
-
- **Model Cycling**: Press `Ctrl+P` to quickly cycle through models. Use `--models` CLI argument to scope to specific models (e.g., `--models claude-sonnet,gpt-4o`). Supports pattern matching and smart version selection (prefers aliases over dated versions). ([#37](https://github.com/badlogic/pi-mono/pull/37) by [@fightbulc](https://github.com/fightbulc))
-
-## [0.7.24] - 2025-11-20
-
-### Added
-
- **Thinking Level Cycling**: Press `Shift+Tab` to cycle through thinking levels (off → minimal → low → medium → high) for reasoning-capable models. Editor border color changes to indicate current level (gray → blue → cyan → magenta). ([#36](https://github.com/badlogic/pi-mono/pull/36) by [@fightbulc](https://github.com/fightbulc))
-
-## [0.7.23] - 2025-11-20
-
-### Added
-
- **Update Notifications**: Interactive mode now checks for new versions on startup and displays a notification if an update is available.
+## [0.10.6] - 2025-11-28

 ### Changed

- **System Prompt**: Updated system prompt to instruct agent to output plain text summaries directly instead of using cat or bash commands to display what it did.
+- Show base64-truncated indicator for large images in tool output

 ### Fixed

- **File Path Completion**: Removed 10-file limit in tab completion selector. All matching files and directories now appear in the completion list.
- **Absolute Path Completion**: Fixed tab completion for absolute paths (e.g., `/Applications`). Absolute paths in the middle of text (like "hey /") now complete correctly. Also fixed crashes when trying to stat inaccessible files (like macOS `.VolumeIcon.icns`) during directory traversal.
+- Fixed image dimensions not being read correctly from PNG/JPEG/GIF files
+- Fixed PDF images being incorrectly base64-truncated in display
+- Allow reading files from ancestor directories (needed for monorepo configs)

-## [0.7.22] - 2025-11-19
-
-### Fixed
-
- **Long Line Wrapping**: Fixed crash when rendering long lines without spaces (e.g., file paths). Long words now break character-by-character to fit within terminal width.
-
-## [0.7.21] - 2025-11-19
-
-### Fixed
-
- **Terminal Flicker**: Fixed flicker at bottom of viewport (especially editor component) in xterm.js-based terminals (VS Code, etc.) by using per-line clear instead of clear-to-end sequence.
- **Background Color Rendering**: Fixed black cells appearing at end of wrapped lines when using background colors. Completely rewrote text wrapping and background application to properly handle ANSI reset codes.
- **Tool Output**: Strip ANSI codes from bash/tool output before rendering to prevent conflicts with TUI styling.
-
-## [0.7.20] - 2025-11-18
-
-### Fixed
-
- **Message Wrapping**: Fixed word-based text wrapping for long lines in chat messages. Text now properly wraps at word boundaries while preserving ANSI styling (colors, bold, italic, etc.) across wrapped lines. Background colors now extend to the full width of each line. Empty lines in messages now render correctly with full-width background.
-
-## [0.7.18] - 2025-11-18
-
-### Fixed
-
- **Bash Tool Error Handling**: Bash tool now properly throws errors for failed commands (non-zero exit codes), timeouts, and aborted executions. This ensures tool execution components display with red background when bash commands fail.
- **Thinking Traces Styling**: Thinking traces now maintain gray italic styling throughout, even when containing inline code blocks, bold text, or other inline formatting
-
-## [0.7.17] - 2025-11-18
+## [0.10.5] - 2025-11-28

 ### Added

- **New Model**: Added `gemini-3-pro-preview` to Google provider.
- **OAuth Authentication**: Added `/login` and `/logout` commands for OAuth-based authentication with Claude Pro/Max subscriptions. Tokens are stored in `~/.pi/agent/oauth.json` with 0600 permissions and automatically refreshed when expired. OAuth tokens take priority over API keys for Anthropic models.
+- Full multimodal support: attach images (PNG, JPEG, GIF, WebP) and PDFs to prompts using `@path` syntax or `--file` flag

 ### Fixed

- **Anthropic Aborted Thinking**: Fixed error when resubmitting assistant messages with incomplete thinking blocks (from aborted streams). Thinking blocks without valid signatures are now converted to text blocks with `<thinking>` delimiters, preventing API rejection.
- **Model Selector Loading**: Fixed models not appearing in `/model` selector until user started typing. Models now load asynchronously and re-render when available.
- **Input Paste Support**: Added bracketed paste mode support to `Input` component, enabling paste of long OAuth authorization codes.
+- `@`-references now handle special characters in file names (spaces, quotes, unicode)
+- Fixed cursor positioning issues with multi-byte unicode characters in editor

-## [0.7.16] - 2025-11-17
+## [0.10.4] - 2025-11-28

 ### Fixed

- **Tool Error Display**: Fixed edit tool (and all other tools) not showing error state correctly in TUI. Failed tool executions now properly display with red background and show the error message. Previously, the `isError` flag from tool execution events was not being passed to the UI component, causing all tool results to show with green (success) background regardless of whether they succeeded or failed.
+- Removed padding on first user message in TUI to improve visual consistency.

-## [0.7.15] - 2025-11-17
-
-### Fixed
-
- **Anthropic OAuth Support**: Added support for `ANTHROPIC_OAUTH_TOKEN` environment variable. The agent now checks for OAuth tokens before falling back to API keys for Anthropic models, enabling OAuth-based authentication.
-
-## [0.7.14] - 2025-11-17
-
-### Fixed
-
- **Mistral API Compatibility**: Fixed compatibility with Mistral API by excluding the `store` field and using `max_tokens` instead of `max_completion_tokens`, and avoiding the `developer` role in system prompts.
- **Error Display**: Fixed error message display in assistant messages to include proper spacing before the error text.
- **Message Streaming**: Fixed missing `message_start` event when no partial message chunks were received during streaming.
-
-## [0.7.13] - 2025-11-16
-
-### Fixed
-
- **TUI Editor**: Fixed unicode input support for umlauts (äöü), emojis (😀), and other extended characters. Previously the editor only accepted ASCII characters (32-126). Now properly handles all printable unicode while still filtering out control characters. ([#20](https://github.com/badlogic/pi-mono/pull/20))
-
-## [0.7.12] - 2025-11-16
+## [0.10.3] - 2025-11-28

 ### Added

- **Custom Models and Providers**: Support for custom models and providers via `~/.pi/agent/models.json` configuration file. Add local models (Ollama, vLLM, LM Studio) or any OpenAI-compatible, Anthropic-compatible, or Google-compatible API. File is reloaded on every `/model` selector open, allowing live updates without restart. ([#21](https://github.com/badlogic/pi-mono/issues/21))
- Added `gpt-5.1-codex` model to OpenAI provider (400k context, 128k max output, reasoning-capable).
+- Added RPC mode (`--rpc`) for programmatic integration. Accepts JSON commands on stdin, emits JSON events on stdout. See [RPC mode documentation](https://github.com/nicobailon/pi-mono/blob/main/packages/coding-agent/README.md#rpc-mode) for protocol details.

 ### Changed

- **Breaking**: No longer hardcodes Anthropic/Claude as default provider/model. Now prefers sensible defaults per provider (e.g., `claude-sonnet-4-5` for Anthropic, `gpt-5.1-codex` for OpenAI), or requires explicit selection in interactive mode.
- Interactive mode now allows starting without a model, showing helpful error on message submission instead of failing at startup.
- Non-interactive mode (CLI messages, JSON, RPC) still fails early if no model or API key is available.
- Model selector now saves selected model as default in settings.json.
- `models.json` validation errors (syntax + schema) now surface with precise file/field info in both CLI and `/model` selector.
- Agent system prompt now includes absolute path to its own README.md for self-documentation.
+- Refactored internal architecture to support multiple frontends (TUI, RPC) with shared agent logic.

-### Fixed
-
- Fixed crash when restoring a session with a custom model that no longer exists or lost credentials. Now gracefully falls back to default model, logs the reason, and appends a warning message to the restored chat.
- Footer no longer crashes when no model is selected.
-
-## [0.7.11] - 2025-11-16
-
-### Changed
-
- The `/model` selector now filters models based on available API keys. Only models for which API keys are configured in environment variables are shown. This prevents selecting models that would fail due to missing credentials. A yellow hint is displayed at the top of the selector explaining this behavior. ([#19](https://github.com/badlogic/pi-mono/pull/19))
-
-## [0.7.10] - 2025-11-14
+## [0.10.2] - 2025-11-26

 ### Added

- `/branch` command for creating conversation branches. Opens a selector showing all user messages in chronological order. Selecting a message creates a new session with all messages before the selected one, and places the selected message in the editor for modification or resubmission. This allows exploring alternative conversation paths without losing the current session. (fixes [#16](https://github.com/badlogic/pi-mono/issues/16))
-
-## [0.7.9] - 2025-11-14
+- Added thinking level persistence. Default level stored in `~/.pi/settings.json`, restored on startup. Per-session overrides saved in session files.
+- Added model cycling shortcut: `Ctrl+I` cycles through available models (or scoped models with `-m` flag).
+- Added automatic retry with exponential backoff for transient API errors (network issues, 500s, overload).
+- Cumulative token usage now shown in footer (total tokens used across all messages in session).
+- Added `--system-prompt` flag to override default system prompt with custom text or file contents.
+- Footer now shows estimated total cost in USD based on model pricing.

 ### Changed

- Editor: updated keyboard shortcuts to follow Unix conventions:
-  - **Ctrl+W** deletes the previous word (stopping at whitespace or punctuation)
-  - **Ctrl+U** deletes from cursor to start of line (at line start, merges with previous line)
-  - **Ctrl+K** deletes from cursor to end of line (at line end, merges with next line)
-  - **Option+Backspace** in Ghostty now behaves like **Ctrl+W** (delete word backwards)
-  - **Cmd+Backspace** in Ghostty now behaves like **Ctrl+U** (delete to start of line)
+- Replaced `--models` flag with `-m/--model` supporting multiple values. Specify models as `provider/model@thinking` (e.g., `anthropic/claude-sonnet-4-20250514@high`). Multiple `-m` flags scope available models for the session.
+- Thinking level border now persists visually after selector closes.
+- Improved tool result display with collapsible output (default collapsed, expand with `Ctrl+O`).

-## [0.7.8] - 2025-11-13
-
-### Changed
-
- Updated README.md with `/changelog` slash command documentation
-
-## [0.7.7] - 2025-11-13
+## [0.10.1] - 2025-11-25

 ### Added

- Automatic changelog display on startup in interactive mode. When starting a new session (not continuing/resuming), the agent will display all changelog entries since the last version you used. The last shown version is tracked in `~/.pi/agent/settings.json`.
- `/changelog` command to display the changelog in the TUI
- OpenRouter Auto Router model support ([#5](https://github.com/badlogic/pi-mono/pull/5))
- Windows Git Bash support with automatic detection and process tree termination ([#1](https://github.com/badlogic/pi-mono/pull/1))
+- Add custom model configuration via `~/.pi/models.json`

-### Changed
+## [0.10.0] - 2025-11-25

- **BREAKING**: Renamed project context file from `AGENT.md` to `AGENTS.md`. The system now looks for `AGENTS.md` or `CLAUDE.md` (with `AGENTS.md` preferred). Existing `AGENT.md` files will need to be renamed to `AGENTS.md` to continue working. (fixes [#9](https://github.com/badlogic/pi-mono/pull/9))
- **BREAKING**: Session file format changed to store provider and model ID separately instead of as a single `provider/modelId` string. Existing sessions will not restore the model correctly when resumed - you'll need to manually set the model again using `/model`. (fixes [#4](https://github.com/badlogic/pi-mono/pull/4))
- Improved Windows Git Bash detection logic with better error messages showing actual paths searched ([#13](https://github.com/badlogic/pi-mono/pull/13))
+Initial public release.

-### Fixed
+### Added

- Fixed markdown list rendering bug where bullets were not displayed when list items contained inline code with cyan color formatting
- Fixed context percentage showing 0% in footer when last assistant message was aborted ([#12](https://github.com/badlogic/pi-mono/issues/12))
- Fixed error message loss when `turn_end` event contains an error. Previously, errors in `turn_end` events (e.g., "Provider returned error" from OpenRouter Auto Router) were not captured in `agent.state.error`, making it appear as if the agent completed successfully. ([#6](https://github.com/badlogic/pi-mono/issues/6))
-
-## [0.7.6] - 2025-11-13
-
-Previous releases did not maintain a changelog.
+- Interactive TUI with streaming responses
+- Conversation session management with `--continue`, `--resume`, and `--session` flags
+- Multi-line input support (Shift+Enter or Option+Enter for new lines)
+- Tool execution: `read`, `write`, `edit`, `bash`, `glob`, `grep`, `think`
+- Thinking mode support for Claude with visual indicator and `/thinking` selector
+- File path autocompletion with `@` prefix
+- Slash command autocompletion
+- `/export` command for HTML session export
+- `/model` command for runtime model switching
+- `/session` command for session statistics
+- Model provider support: Anthropic (Claude), OpenAI, Google (Gemini)
+- Git branch display in footer
+- Message queueing during streaming responses
+- OAuth integration for Gmail and Google Calendar access
+- HTML export with syntax highlighting and collapsible sections
--- a/packages/coding-agent/README.md
+++ b/packages/coding-agent/README.md
@ -2,11 +2,12 @@

 A radically simple and opinionated coding agent with multi-model support (including mid-session switching), a simple yet powerful CLI for headless coding tasks, and many creature comforts you might be used to from other coding agents.

-Works on Linux, macOS, and Windows (barely tested, needs Git Bash running in the "modern" Windows Terminal).
+Works on Linux, macOS, and Windows (needs a bash shell, see [Windows Shell Configuration](#windows-shell-configuration)).

 ## Table of Contents

 - [Installation](#installation)
+- [Windows Shell Configuration](#windows-shell-configuration)
 - [Quick Start](#quick-start)
 - [API Keys](#api-keys)
 - [OAuth Authentication (Optional)](#oauth-authentication-optional)
@ -81,6 +82,29 @@ npm run build:binary
 ./dist/pi
 ```

+## Windows Shell Configuration
+
+On Windows, pi requires a bash shell. The following locations are checked in order:
+
+1. **Custom shell path** from `~/.pi/agent/settings.json` (if configured)
+2. **Git Bash** in standard locations (`C:\Program Files\Git\bin\bash.exe`)
+3. **bash.exe on PATH** (Cygwin, MSYS2, WSL, etc.)
+
+For most users, installing [Git for Windows](https://git-scm.com/download/win) is sufficient.
+
+### Custom Shell Path
+
+If you use Cygwin, MSYS2, or have bash in a non-standard location, add the path to your settings:
+
+```json
+// ~/.pi/agent/settings.json
+{
+  "shellPath": "C:\\cygwin64\\bin\\bash.exe"
+}
+```
+
+Alternatively, ensure your bash is on the system PATH.
+
 ## Quick Start

 ```bash
@ -291,6 +315,77 @@ You can add custom HTTP headers to bypass Cloudflare bot detection, add authenti
 - **Model-level `headers`**: Additional headers for specific models (merged with provider headers)
 - Model headers override provider headers when keys conflict

+### OpenAI Compatibility Settings
+
+The `openai-completions` API is implemented by many providers with minor differences (Ollama, vLLM, LiteLLM, llama.cpp, etc.). By default, compatibility settings are auto-detected from the `baseUrl`. For custom proxies or unknown endpoints, you can override these via the `compat` field on models:
+
+```json
+{
+  "providers": {
+    "litellm": {
+      "baseUrl": "http://localhost:4000/v1",
+      "apiKey": "LITELLM_API_KEY",
+      "api": "openai-completions",
+      "models": [
+        {
+          "id": "gpt-4o",
+          "name": "GPT-4o (via LiteLLM)",
+          "reasoning": false,
+          "input": ["text", "image"],
+          "cost": {"input": 2.5, "output": 10, "cacheRead": 0, "cacheWrite": 0},
+          "contextWindow": 128000,
+          "maxTokens": 16384,
+          "compat": {
+            "supportsStore": false
+          }
+        }
+      ]
+    }
+  }
+}
+```
+
+Available `compat` fields (all optional, auto-detected if not set):
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `supportsStore` | boolean | auto | Whether provider supports the `store` field |
+| `supportsDeveloperRole` | boolean | auto | Whether provider supports `developer` role (vs `system`) |
+| `supportsReasoningEffort` | boolean | auto | Whether provider supports `reasoning_effort` parameter |
+| `maxTokensField` | string | auto | Use `"max_completion_tokens"` or `"max_tokens"` |
+
+If `compat` is partially set, unspecified fields use auto-detected values.
+
+### Authorization Header
+
+Some providers require an explicit `Authorization: Bearer <token>` header. Set `authHeader: true` to automatically add this header using the resolved `apiKey`:
+
+```json
+{
+  "providers": {
+    "qwen": {
+      "baseUrl": "https://dashscope.aliyuncs.com/compatible-mode/v1",
+      "apiKey": "QWEN_API_KEY",
+      "authHeader": true,
+      "api": "openai-completions",
+      "models": [
+        {
+          "id": "qwen3-coder-plus",
+          "name": "Qwen3 Coder Plus",
+          "reasoning": true,
+          "input": ["text"],
+          "cost": {"input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0},
+          "contextWindow": 1000000,
+          "maxTokens": 65536
+        }
+      ]
+    }
+  }
+}
+```
+
+When `authHeader: true`, the resolved API key is added as `Authorization: Bearer <apiKey>` to the model headers. This is useful for providers that don't use the standard OpenAI authentication mechanism.
+
 ### Model Selection Priority

 When starting `pi`, models are selected in this order:
@ -472,6 +567,16 @@ This allows you to explore alternative conversation paths without losing your cu
 /branch
 ```

+### /resume
+
+Switch to a different session. Opens an interactive selector showing all available sessions. Select a session to load it and continue where you left off.
+
+This is equivalent to the `--resume` CLI flag but can be used mid-session.
+
+```
+/resume
+```
+
 ### /login

 Login with OAuth to use subscription-based models (Claude Pro/Max):
@ -661,6 +766,8 @@ Change queue mode with `/queue` command. Setting is saved in `~/.pi/agent/settin

 **Navigation:**
 - **Arrow keys**: Move cursor (Up/Down navigate visual lines, Left/Right move by character)
+- **Up Arrow** (empty editor): Browse previous prompts (history)
+- **Down Arrow** (browsing history): Browse newer prompts or return to empty editor
 - **Option+Left** / **Ctrl+Left**: Move word backwards
 - **Option+Right** / **Ctrl+Right**: Move word forwards
 - **Ctrl+A** / **Home**: Jump to start of line
@ -684,6 +791,7 @@ Change queue mode with `/queue` command. Setting is saved in `~/.pi/agent/settin
 - **Shift+Tab**: Cycle thinking level (for reasoning-capable models)
 - **Ctrl+P**: Cycle models (use `--models` to scope)
 - **Ctrl+O**: Toggle tool output expansion (collapsed ↔ full output)
+- **Ctrl+T**: Toggle thinking block visibility (shows full content ↔ static "Thinking..." label)

 ## Project Context Files

@ -926,6 +1034,13 @@ Custom system prompt. Can be:

 If the argument is a valid file path, the file contents will be used as the system prompt. Otherwise, the text is used directly. Project context files and datetime are automatically appended.

+**--append-system-prompt <text|file>**
+Append additional text or file contents to the system prompt. Can be:
+- Inline text: `--append-system-prompt "Also consider edge cases"`
+- File path: `--append-system-prompt ./extra-instructions.txt`
+
+If the argument is a valid file path, the file contents will be appended. Otherwise, the text is appended directly. This complements `--system-prompt` for layering custom instructions without replacing the base system prompt. Works in both custom and default system prompts.
+
 **--mode <mode>**
 Output mode for non-interactive usage (implies `--print`). Options:
 - `text` (default): Output only the final assistant message text
--- a/packages/coding-agent/docs/truncation.md
+++ b/packages/coding-agent/docs/truncation.md
@ -0,0 +1,235 @@
+# Tool Output Truncation
+
+## Limits
+
+- **Line limit**: 2000 lines
+- **Byte limit**: 30KB
+- **Grep line limit**: 500 chars per match line
+
+Whichever limit is hit first wins. **Never return partial lines** (except bash edge case).
+
+---
+
+## read
+
+Head truncation (first N lines). Has offset/limit params for continuation.
+
+### Scenarios
+
+**First line > 30KB:**
+```
+LLM sees:
+[Line 1 is 50KB, exceeds 30KB limit. Use bash to read: head -c 30000 path/to/file]
+
+Details:
+{ truncation: { truncated: true, truncatedBy: "bytes", outputLines: 0, ... } }
+```
+
+**Hit line limit (2000 lines, < 30KB):**
+```
+LLM sees:
+[lines 1-2000 content]
+
+[Showing lines 1-2000 of 5000. Use offset=2001 to continue]
+
+Details:
+{ truncation: { truncated: true, truncatedBy: "lines", outputLines: 2000, totalLines: 5000 } }
+```
+
+**Hit byte limit (< 2000 lines, 30KB):**
+```
+LLM sees:
+[lines 1-500 content]
+
+[Showing lines 1-500 of 5000 (30KB limit). Use offset=501 to continue]
+
+Details:
+{ truncation: { truncated: true, truncatedBy: "bytes", outputLines: 500, totalLines: 5000 } }
+```
+
+**With offset, hit line limit (e.g., offset=1000):**
+```
+LLM sees:
+[lines 1000-2999 content]
+
+[Showing lines 1000-2999 of 5000. Use offset=3000 to continue]
+
+Details:
+{ truncation: { truncatedBy: "lines", ... } }
+```
+
+**With offset, hit byte limit (e.g., offset=1000, 30KB after 500 lines):**
+```
+LLM sees:
+[lines 1000-1499 content]
+
+[Showing lines 1000-1499 of 5000 (30KB limit). Use offset=1500 to continue]
+
+Details:
+{ truncation: { truncatedBy: "bytes", outputLines: 500, ... } }
+```
+
+**With offset, first line at offset > 30KB (e.g., offset=1000, line 1000 is 50KB):**
+```
+LLM sees:
+[Line 1000 is 50KB, exceeds 30KB limit. Use bash: sed -n '1000p' file | head -c 30000]
+
+Details:
+{ truncation: { truncated: true, truncatedBy: "bytes", outputLines: 0 } }
+```
+
+---
+
+## bash
+
+Tail truncation (last N lines). Writes full output to temp file if truncated.
+
+### Scenarios
+
+**Hit line limit (2000 lines):**
+```
+LLM sees:
+[lines 48001-50000 content]
+
+[Showing lines 48001-50000 of 50000. Full output: /tmp/pi-bash-xxx.log]
+
+Details:
+{ truncation: { truncated: true, truncatedBy: "lines", outputLines: 2000, totalLines: 50000 }, fullOutputPath: "/tmp/..." }
+```
+
+**Hit byte limit (< 2000 lines, 30KB):**
+```
+LLM sees:
+[lines 49501-50000 content]
+
+[Showing lines 49501-50000 of 50000 (30KB limit). Full output: /tmp/pi-bash-xxx.log]
+
+Details:
+{ truncation: { truncatedBy: "bytes", ... }, fullOutputPath: "/tmp/..." }
+```
+
+**Last line alone > 30KB (edge case, partial OK here):**
+```
+LLM sees:
+[last 30KB of final line]
+
+[Showing last 30KB of line 50000 (line is 100KB). Full output: /tmp/pi-bash-xxx.log]
+
+Details:
+{ truncation: { truncatedBy: "bytes", lastLinePartial: true }, fullOutputPath: "/tmp/..." }
+```
+
+---
+
+## grep
+
+Head truncation. Primary limit: 100 matches. Each match line truncated to 500 chars.
+
+### Scenarios
+
+**Hit match limit (100 matches):**
+```
+LLM sees:
+file.ts:10: matching content here...
+file.ts:25: another match...
+...
+
+[100 matches limit reached. Use limit=200 for more, or refine pattern]
+
+Details:
+{ matchLimitReached: 100 }
+```
+
+**Hit byte limit (< 100 matches, 30KB):**
+```
+LLM sees:
+[matches that fit in 30KB]
+
+[30KB limit reached (50 of 100+ matches shown)]
+
+Details:
+{ truncation: { truncatedBy: "bytes", ... } }
+```
+
+**Match lines truncated (any line > 500 chars):**
+```
+LLM sees:
+file.ts:10: very long matching content that exceeds 500 chars gets cut off here... [truncated]
+file.ts:25: normal match
+
+[Some lines truncated to 500 chars. Use read tool to see full lines]
+
+Details:
+{ linesTruncated: true }
+```
+
+---
+
+## find
+
+Head truncation. Primary limit: 1000 results. File paths only (never > 30KB each).
+
+### Scenarios
+
+**Hit result limit (1000 results):**
+```
+LLM sees:
+src/file1.ts
+src/file2.ts
+[998 more paths]
+
+[1000 results limit reached. Use limit=2000 for more, or refine pattern]
+
+Details:
+{ resultLimitReached: 1000 }
+```
+
+**Hit byte limit (unlikely, < 1000 results, 30KB):**
+```
+LLM sees:
+[paths that fit]
+
+[30KB limit reached]
+
+Details:
+{ truncation: { truncatedBy: "bytes", ... } }
+```
+
+---
+
+## ls
+
+Head truncation. Primary limit: 500 entries. Entry names only (never > 30KB each).
+
+### Scenarios
+
+**Hit entry limit (500 entries):**
+```
+LLM sees:
+.gitignore
+README.md
+src/
+[497 more entries]
+
+[500 entries limit reached. Use limit=1000 for more]
+
+Details:
+{ entryLimitReached: 500 }
+```
+
+**Hit byte limit (unlikely):**
+```
+LLM sees:
+[entries that fit]
+
+[30KB limit reached]
+
+Details:
+{ truncation: { truncatedBy: "bytes", ... } }
+```
+
+---
+
+## TUI Display
+
+`tool-execution.ts` reads `details.truncation` and related fields to display truncation notices in warning color. The LLM text content and TUI display show the same information.
--- a/packages/coding-agent/docs/undercompaction.md
+++ b/packages/coding-agent/docs/undercompaction.md
@ -0,0 +1,313 @@
+# Under-Compaction Analysis
+
+## Problem Statement
+
+Auto-compaction triggers too late, causing context window overflows that result in failed LLM calls with `stopReason == "length"`.
+
+## Architecture Overview
+
+### Event Flow
+
+```
+User prompt
+    │
+    ▼
+agent.prompt()
+    │
+    ▼
+agentLoop() in packages/ai/src/agent/agent-loop.ts
+    │
+    ├─► streamAssistantResponse()
+    │       │
+    │       ▼
+    │   LLM provider (Anthropic, OpenAI, etc.)
+    │       │
+    │       ▼
+    │   Events: message_start → message_update* → message_end
+    │       │
+    │       ▼
+    │   AssistantMessage with usage stats (input, output, cacheRead, cacheWrite)
+    │
+    ├─► If assistant has tool calls:
+    │       │
+    │       ▼
+    │   executeToolCalls()
+    │       │
+    │       ├─► tool_execution_start (toolCallId, toolName, args)
+    │       │
+    │       ├─► tool.execute() runs (read, bash, write, edit, etc.)
+    │       │
+    │       ├─► tool_execution_end (toolCallId, toolName, result, isError)
+    │       │
+    │       └─► message_start + message_end for ToolResultMessage
+    │
+    └─► Loop continues until no more tool calls
+            │
+            ▼
+        agent_end
+```
+
+### Token Usage Reporting
+
+Token usage is ONLY available in `AssistantMessage.usage` after the LLM responds:
+
+```typescript
+// From packages/ai/src/types.ts
+export interface Usage {
+    input: number;      // Tokens in the request
+    output: number;     // Tokens generated
+    cacheRead: number;  // Cached tokens read
+    cacheWrite: number; // Cached tokens written
+    cost: Cost;
+}
+```
+
+The `input` field represents the total context size sent to the LLM, which includes:
+- System prompt
+- All conversation messages
+- All tool results from previous calls
+
+### Current Compaction Check
+
+Both TUI (`tui-renderer.ts`) and RPC (`main.ts`) modes check compaction identically:
+
+```typescript
+// In agent.subscribe() callback:
+if (event.type === "message_end") {
+    // ...
+    if (event.message.role === "assistant") {
+        await checkAutoCompaction();
+    }
+}
+
+async function checkAutoCompaction() {
+    // Get last non-aborted assistant message
+    const messages = agent.state.messages;
+    let lastAssistant = findLastNonAbortedAssistant(messages);
+    if (!lastAssistant) return;
+
+    const contextTokens = calculateContextTokens(lastAssistant.usage);
+    const contextWindow = agent.state.model.contextWindow;
+
+    if (!shouldCompact(contextTokens, contextWindow, settings)) return;
+
+    // Trigger compaction...
+}
+```
+
+**The check happens on `message_end` for assistant messages only.**
+
+## The Under-Compaction Problem
+
+### Failure Scenario
+
+```
+Context window: 200,000 tokens
+Reserve tokens: 16,384 (default)
+Threshold: 200,000 - 16,384 = 183,616
+
+Turn N:
+  1. Assistant message received, usage shows 180,000 tokens
+  2. shouldCompact(180000, 200000, settings) → 180000 > 183616 → FALSE
+  3. Tool executes: `cat large-file.txt` → outputs 100KB (~25,000 tokens)
+  4. Context now effectively 205,000 tokens, but we don't know this
+  5. Next LLM call fails: context exceeds 200,000 window
+```
+
+The problem occurs when:
+1. Context is below threshold (so compaction doesn't trigger)
+2. A tool adds enough content to push it over the window limit
+3. We only discover this when the next LLM call fails
+
+### Root Cause
+
+1. **Token counts are retrospective**: We only learn the context size AFTER the LLM processes it
+2. **Tool results are blind spots**: When a tool executes and returns a large result, we don't know how many tokens it adds until the next LLM call
+3. **No estimation before submission**: We submit the context and hope it fits
+
+## Current Tool Output Limits
+
+| Tool | Our Limit | Worst Case |
+|------|-----------|------------|
+| bash | 10MB per stream | 20MB (~5M tokens) |
+| read | 2000 lines × 2000 chars | 4MB (~1M tokens) |
+| write | Byte count only | Minimal |
+| edit | Diff output | Variable |
+
+## How Other Tools Handle This
+
+### SST/OpenCode
+
+**Tool Output Limits (during execution):**
+
+| Tool | Limit | Details |
+|------|-------|---------|
+| bash | 30KB chars | `MAX_OUTPUT_LENGTH = 30_000`, truncates with notice |
+| read | 2000 lines × 2000 chars/line | No total cap, theoretically 4MB |
+| grep | 100 matches, 2000 chars/line | Truncates with notice |
+| ls | 100 files | Truncates with notice |
+| glob | 100 results | Truncates with notice |
+| webfetch | 5MB | `MAX_RESPONSE_SIZE` |
+
+**Overflow Detection:**
+- `isOverflow()` runs BEFORE each turn (not during)
+- Uses last LLM-reported token count: `tokens.input + tokens.cache.read + tokens.output`
+- Triggers if `count > context - maxOutput`
+- Does NOT detect overflow from tool results in current turn
+
+**Recovery - Pruning:**
+- `prune()` runs AFTER each turn completes
+- Walks backwards through completed tool results
+- Keeps last 40k tokens of tool outputs (`PRUNE_PROTECT`)
+- Removes content from older tool results (marks `time.compacted`)
+- Only prunes if savings > 20k tokens (`PRUNE_MINIMUM`)
+- Token estimation: `chars / 4`
+
+**Recovery - Compaction:**
+- Triggered when `isOverflow()` returns true before a turn
+- LLM generates summary of conversation
+- Replaces old messages with summary
+
+**Gap:** No mid-turn protection. A single read returning 4MB would overflow. The 30KB bash limit is their primary practical protection.
+
+### OpenAI/Codex
+
+**Tool Output Limits (during execution):**
+
+| Tool | Limit | Details |
+|------|-------|---------|
+| shell/exec | 10k tokens or 10k bytes | Per-model `TruncationPolicy`, user-configurable |
+| read_file | 2000 lines, 500 chars/line | `MAX_LINE_LENGTH = 500`, ~1MB max |
+| grep_files | 100 matches | Default limit |
+| list_dir | Configurable | BFS with depth limits |
+
+**Truncation Policy:**
+- Per-model family setting: `TruncationPolicy::Bytes(10_000)` or `TruncationPolicy::Tokens(10_000)`
+- User can override via `tool_output_token_limit` config
+- Applied to ALL tool outputs uniformly via `truncate_function_output_items_with_policy()`
+- Preserves beginning and end, removes middle with `"…N tokens truncated…"` marker
+
+**Overflow Detection:**
+- After each successful turn: `if total_usage_tokens >= auto_compact_token_limit { compact() }`
+- Per-model thresholds (e.g., 180k for 200k context window)
+- `ContextWindowExceeded` error caught and handled
+
+**Recovery - Compaction:**
+- If tokens exceed threshold after turn, triggers `run_inline_auto_compact_task()`
+- During compaction, if `ContextWindowExceeded`: removes oldest history item and retries
+- Loop: `history.remove_first_item()` until it fits
+- Notifies user: "Trimmed N older conversation item(s)"
+
+**Recovery - Turn Error:**
+- On `ContextWindowExceeded` during normal turn: marks tokens as full, returns error to user
+- Does NOT auto-retry the failed turn
+- User must manually continue
+
+**Gap:** Still no mid-turn protection, but aggressive 10k token truncation on all tool outputs prevents most issues in practice.
+
+### Comparison
+
+| Feature | pi-coding-agent | OpenCode | Codex |
+|---------|-----------------|----------|-------|
+| Bash limit | 10MB | 30KB | ~40KB (10k tokens) |
+| Read limit | 2000×2000 (4MB) | 2000×2000 (4MB) | 2000×500 (1MB) |
+| Truncation policy | None | Per-tool | Per-model, uniform |
+| Token estimation | None | chars/4 | chars/4 |
+| Pre-turn check | No | Yes (last tokens) | Yes (threshold) |
+| Mid-turn check | No | No | No |
+| Post-turn pruning | No | Yes (removes old tool output) | No |
+| Overflow recovery | No | Compaction | Trim oldest + compact |
+
+**Key insight:** None of these tools protect against mid-turn overflow. Their practical protection is aggressive static limits on tool output, especially bash. OpenCode's 30KB bash limit vs our 10MB is the critical difference.
+
+## Recommended Solution
+
+### Phase 1: Static Limits (immediate)
+
+Add hard limits to tool outputs matching industry practice:
+
+```typescript
+// packages/coding-agent/src/tools/limits.ts
+export const MAX_TOOL_OUTPUT_CHARS = 30_000; // ~7.5k tokens, matches OpenCode bash
+export const MAX_TOOL_OUTPUT_NOTICE = "\n\n...(truncated, output exceeded limit)...";
+```
+
+Apply to all tools:
+- bash: 10MB → 30KB
+- read: Add 100KB total output cap
+- edit: Cap diff output
+
+### Phase 2: Post-Tool Estimation
+
+After `tool_execution_end`, estimate and flag:
+
+```typescript
+let needsCompactionAfterTurn = false;
+
+agent.subscribe(async (event) => {
+    if (event.type === "tool_execution_end") {
+        const resultChars = extractTextLength(event.result);
+        const estimatedTokens = Math.ceil(resultChars / 4);
+        
+        const lastUsage = getLastAssistantUsage(agent.state.messages);
+        if (lastUsage) {
+            const current = calculateContextTokens(lastUsage);
+            const projected = current + estimatedTokens;
+            const threshold = agent.state.model.contextWindow - settings.reserveTokens;
+            if (projected > threshold) {
+                needsCompactionAfterTurn = true;
+            }
+        }
+    }
+    
+    if (event.type === "turn_end" && needsCompactionAfterTurn) {
+        needsCompactionAfterTurn = false;
+        await triggerCompaction();
+    }
+});
+```
+
+### Phase 3: Overflow Recovery (like Codex)
+
+Handle `stopReason === "length"` gracefully:
+
+```typescript
+if (event.type === "message_end" && event.message.role === "assistant") {
+    if (event.message.stopReason === "length") {
+        // Context overflow occurred
+        await triggerCompaction();
+        // Optionally: retry the turn
+    }
+}
+```
+
+During compaction, if it also overflows, trim oldest messages:
+
+```typescript
+async function compactWithRetry() {
+    while (true) {
+        try {
+            await compact();
+            break;
+        } catch (e) {
+            if (isContextOverflow(e) && messages.length > 1) {
+                messages.shift(); // Remove oldest
+                continue;
+            }
+            throw e;
+        }
+    }
+}
+```
+
+## Summary
+
+The under-compaction problem occurs because:
+1. We only check context size after assistant messages
+2. Tool results can add arbitrary amounts of content
+3. We discover overflows only when the next LLM call fails
+
+The fix requires:
+1. Aggressive static limits on tool output (immediate safety net)
+2. Token estimation after tool execution (proactive detection)
+3. Graceful handling of overflow errors (fallback recovery)
--- a/packages/coding-agent/package.json
+++ b/packages/coding-agent/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-coding-agent",
-	"version": "0.12.9",
+	"version": "0.13.2",
 	"description": "Coding agent CLI with read, bash, edit, write tools and session management",
 	"type": "module",
 	"piConfig": {
@ -19,7 +19,7 @@
 	"scripts": {
 		"clean": "rm -rf dist",
 		"build": "tsgo -p tsconfig.build.json && chmod +x dist/cli.js && npm run copy-assets",
-		"build:binary": "command -v bun >/dev/null 2>&1 || { echo 'Error: Bun is required for building the binary. Install it from https://bun.sh'; exit 1; } && npm run build && bun build --compile ./dist/cli.js --outfile dist/pi && npm run copy-binary-assets",
+		"build:binary": "npm run build && bun build --compile ./dist/cli.js --outfile dist/pi && npm run copy-binary-assets",
 		"copy-assets": "cp src/theme/*.json dist/theme/",
 		"copy-binary-assets": "cp package.json dist/ && cp README.md dist/ && cp CHANGELOG.md dist/",
 		"dev": "tsgo -p tsconfig.build.json --watch --preserveWatchOutput",
@ -28,9 +28,9 @@
 		"prepublishOnly": "npm run clean && npm run build"
 	},
 	"dependencies": {
-		"@mariozechner/pi-agent-core": "^0.12.9",
-		"@mariozechner/pi-ai": "^0.12.9",
-		"@mariozechner/pi-tui": "^0.12.9",
+		"@mariozechner/pi-agent-core": "^0.13.2",
+		"@mariozechner/pi-ai": "^0.13.2",
+		"@mariozechner/pi-tui": "^0.13.2",
 		"chalk": "^5.5.0",
 		"diff": "^8.0.2",
 		"glob": "^11.0.3"
--- a/packages/coding-agent/src/compaction.ts
+++ b/packages/coding-agent/src/compaction.ts
@ -32,9 +32,10 @@ export const DEFAULT_COMPACTION_SETTINGS: CompactionSettings = {

 /**
 * Calculate total context tokens from usage.
+ * Uses the native totalTokens field when available, falls back to computing from components.
 */
 export function calculateContextTokens(usage: Usage): number {
-	return usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
+	return usage.totalTokens || usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
 }

 /**
--- a/packages/coding-agent/src/config.ts
+++ b/packages/coding-agent/src/config.ts
@ -12,9 +12,10 @@ const __dirname = dirname(__filename);

 /**
 * Detect if we're running as a Bun compiled binary.
- * Bun binaries have import.meta.url containing "$bunfs" (Bun's virtual filesystem path)
+ * Bun binaries have import.meta.url containing "$bunfs", "~BUN", or "%7EBUN" (Bun's virtual filesystem path)
 */
-export const isBunBinary = import.meta.url.includes("$bunfs");
+export const isBunBinary =
+	import.meta.url.includes("$bunfs") || import.meta.url.includes("~BUN") || import.meta.url.includes("%7EBUN");

 // =============================================================================
 // Package Asset Paths (shipped with executable)
--- a/packages/coding-agent/src/fuzzy.test.ts
+++ b/packages/coding-agent/src/fuzzy.test.ts
@ -0,0 +1,92 @@
+import { describe, expect, test } from "vitest";
+import { fuzzyFilter, fuzzyMatch } from "../src/fuzzy.js";
+
+describe("fuzzyMatch", () => {
+	test("empty query matches everything with score 0", () => {
+		const result = fuzzyMatch("", "anything");
+		expect(result.matches).toBe(true);
+		expect(result.score).toBe(0);
+	});
+
+	test("query longer than text does not match", () => {
+		const result = fuzzyMatch("longquery", "short");
+		expect(result.matches).toBe(false);
+	});
+
+	test("exact match has good score", () => {
+		const result = fuzzyMatch("test", "test");
+		expect(result.matches).toBe(true);
+		expect(result.score).toBeLessThan(0); // Should be negative due to consecutive bonuses
+	});
+
+	test("characters must appear in order", () => {
+		const matchInOrder = fuzzyMatch("abc", "aXbXc");
+		expect(matchInOrder.matches).toBe(true);
+
+		const matchOutOfOrder = fuzzyMatch("abc", "cba");
+		expect(matchOutOfOrder.matches).toBe(false);
+	});
+
+	test("case insensitive matching", () => {
+		const result = fuzzyMatch("ABC", "abc");
+		expect(result.matches).toBe(true);
+
+		const result2 = fuzzyMatch("abc", "ABC");
+		expect(result2.matches).toBe(true);
+	});
+
+	test("consecutive matches score better than scattered matches", () => {
+		const consecutive = fuzzyMatch("foo", "foobar");
+		const scattered = fuzzyMatch("foo", "f_o_o_bar");
+
+		expect(consecutive.matches).toBe(true);
+		expect(scattered.matches).toBe(true);
+		expect(consecutive.score).toBeLessThan(scattered.score);
+	});
+
+	test("word boundary matches score better", () => {
+		const atBoundary = fuzzyMatch("fb", "foo-bar");
+		const notAtBoundary = fuzzyMatch("fb", "afbx");
+
+		expect(atBoundary.matches).toBe(true);
+		expect(notAtBoundary.matches).toBe(true);
+		expect(atBoundary.score).toBeLessThan(notAtBoundary.score);
+	});
+});
+
+describe("fuzzyFilter", () => {
+	test("empty query returns all items unchanged", () => {
+		const items = ["apple", "banana", "cherry"];
+		const result = fuzzyFilter(items, "", (x) => x);
+		expect(result).toEqual(items);
+	});
+
+	test("filters out non-matching items", () => {
+		const items = ["apple", "banana", "cherry"];
+		const result = fuzzyFilter(items, "an", (x) => x);
+		expect(result).toContain("banana");
+		expect(result).not.toContain("apple");
+		expect(result).not.toContain("cherry");
+	});
+
+	test("sorts results by match quality", () => {
+		const items = ["a_p_p", "app", "application"];
+		const result = fuzzyFilter(items, "app", (x) => x);
+
+		// "app" should be first (exact consecutive match at start)
+		expect(result[0]).toBe("app");
+	});
+
+	test("works with custom getText function", () => {
+		const items = [
+			{ name: "foo", id: 1 },
+			{ name: "bar", id: 2 },
+			{ name: "foobar", id: 3 },
+		];
+		const result = fuzzyFilter(items, "foo", (item) => item.name);
+
+		expect(result.length).toBe(2);
+		expect(result.map((r) => r.name)).toContain("foo");
+		expect(result.map((r) => r.name)).toContain("foobar");
+	});
+});
--- a/packages/coding-agent/src/fuzzy.ts
+++ b/packages/coding-agent/src/fuzzy.ts
@ -0,0 +1,83 @@
+// Fuzzy search. Matches if all query characters appear in order (not necessarily consecutive).
+// Lower score = better match.
+
+export interface FuzzyMatch {
+	matches: boolean;
+	score: number;
+}
+
+export function fuzzyMatch(query: string, text: string): FuzzyMatch {
+	const queryLower = query.toLowerCase();
+	const textLower = text.toLowerCase();
+
+	if (queryLower.length === 0) {
+		return { matches: true, score: 0 };
+	}
+
+	if (queryLower.length > textLower.length) {
+		return { matches: false, score: 0 };
+	}
+
+	let queryIndex = 0;
+	let score = 0;
+	let lastMatchIndex = -1;
+	let consecutiveMatches = 0;
+
+	for (let i = 0; i < textLower.length && queryIndex < queryLower.length; i++) {
+		if (textLower[i] === queryLower[queryIndex]) {
+			const isWordBoundary = i === 0 || /[\s\-_./]/.test(textLower[i - 1]!);
+
+			// Reward consecutive character matches (e.g., typing "foo" matches "foobar" better than "f_o_o")
+			if (lastMatchIndex === i - 1) {
+				consecutiveMatches++;
+				score -= consecutiveMatches * 5;
+			} else {
+				consecutiveMatches = 0;
+				// Penalize gaps between matched characters
+				if (lastMatchIndex >= 0) {
+					score += (i - lastMatchIndex - 1) * 2;
+				}
+			}
+
+			// Reward matches at word boundaries (start of words are more likely intentional targets)
+			if (isWordBoundary) {
+				score -= 10;
+			}
+
+			// Slight penalty for matches later in the string (prefer earlier matches)
+			score += i * 0.1;
+
+			lastMatchIndex = i;
+			queryIndex++;
+		}
+	}
+
+	// Not all query characters were found in order
+	if (queryIndex < queryLower.length) {
+		return { matches: false, score: 0 };
+	}
+
+	return { matches: true, score };
+}
+
+// Filter and sort items by fuzzy match quality (best matches first)
+export function fuzzyFilter<T>(items: T[], query: string, getText: (item: T) => string): T[] {
+	if (!query.trim()) {
+		return items;
+	}
+
+	const results: { item: T; score: number }[] = [];
+
+	for (const item of items) {
+		const text = getText(item);
+		const match = fuzzyMatch(query, text);
+		if (match.matches) {
+			results.push({ item, score: match.score });
+		}
+	}
+
+	// Sort ascending by score (lower = better match)
+	results.sort((a, b) => a.score - b.score);
+
+	return results.map((r) => r.item);
+}
--- a/packages/coding-agent/src/main.ts
+++ b/packages/coding-agent/src/main.ts
@ -45,6 +45,7 @@ interface Args {
 	model?: string;
 	apiKey?: string;
 	systemPrompt?: string;
+	appendSystemPrompt?: string;
 	thinking?: ThinkingLevel;
 	continue?: boolean;
 	resume?: boolean;
@ -88,6 +89,8 @@ function parseArgs(args: string[]): Args {
 			result.apiKey = args[++i];
 		} else if (arg === "--system-prompt" && i + 1 < args.length) {
 			result.systemPrompt = args[++i];
+		} else if (arg === "--append-system-prompt" && i + 1 < args.length) {
+			result.appendSystemPrompt = args[++i];
 		} else if (arg === "--no-session") {
 			result.noSession = true;
 		} else if (arg === "--session" && i + 1 < args.length) {
@ -109,12 +112,19 @@ function parseArgs(args: string[]): Args {
 			result.tools = validTools;
 		} else if (arg === "--thinking" && i + 1 < args.length) {
 			const level = args[++i];
-			if (level === "off" || level === "minimal" || level === "low" || level === "medium" || level === "high") {
+			if (
+				level === "off" ||
+				level === "minimal" ||
+				level === "low" ||
+				level === "medium" ||
+				level === "high" ||
+				level === "xhigh"
+			) {
 				result.thinking = level;
 			} else {
 				console.error(
 					chalk.yellow(
-						`Warning: Invalid thinking level "${level}". Valid values: off, minimal, low, medium, high`,
+						`Warning: Invalid thinking level "${level}". Valid values: off, minimal, low, medium, high, xhigh`,
 					),
 				);
 			}
@ -231,22 +241,23 @@ ${chalk.bold("Usage:")}
  ${APP_NAME} [options] [@files...] [messages...]

 ${chalk.bold("Options:")}
-  --provider <name>       Provider name (default: google)
-  --model <id>            Model ID (default: gemini-2.5-flash)
-  --api-key <key>         API key (defaults to env vars)
-  --system-prompt <text>  System prompt (default: coding assistant prompt)
-  --mode <mode>           Output mode: text (default), json, or rpc
-  --print, -p             Non-interactive mode: process prompt and exit
-  --continue, -c          Continue previous session
-  --resume, -r            Select a session to resume
-  --session <path>        Use specific session file
-  --no-session            Don't save session (ephemeral)
-  --models <patterns>     Comma-separated model patterns for quick cycling with Ctrl+P
-  --tools <tools>         Comma-separated list of tools to enable (default: read,bash,edit,write)
-                          Available: read, bash, edit, write, grep, find, ls
-  --thinking <level>      Set thinking level: off, minimal, low, medium, high
-  --export <file>         Export session file to HTML and exit
-  --help, -h              Show this help
+  --provider <name>              Provider name (default: google)
+  --model <id>                   Model ID (default: gemini-2.5-flash)
+  --api-key <key>                API key (defaults to env vars)
+  --system-prompt <text>         System prompt (default: coding assistant prompt)
+  --append-system-prompt <text>  Append text or file contents to the system prompt
+  --mode <mode>                  Output mode: text (default), json, or rpc
+  --print, -p                    Non-interactive mode: process prompt and exit
+  --continue, -c                 Continue previous session
+  --resume, -r                   Select a session to resume
+  --session <path>               Use specific session file
+  --no-session                   Don't save session (ephemeral)
+  --models <patterns>            Comma-separated model patterns for quick cycling with Ctrl+P
+  --tools <tools>                Comma-separated list of tools to enable (default: read,bash,edit,write)
+                                 Available: read, bash, edit, write, grep, find, ls
+  --thinking <level>             Set thinking level: off, minimal, low, medium, high, xhigh
+  --export <file>                Export session file to HTML and exit
+  --help, -h                     Show this help

 ${chalk.bold("Examples:")}
  # Interactive mode
@ -320,32 +331,47 @@ const toolDescriptions: Record<ToolName, string> = {
 	ls: "List directory contents",
 };

-function buildSystemPrompt(customPrompt?: string, selectedTools?: ToolName[]): string {
-	// Check if customPrompt is a file path that exists
-	if (customPrompt && existsSync(customPrompt)) {
+function resolvePromptInput(input: string | undefined, description: string): string | undefined {
+	if (!input) {
+		return undefined;
+	}
+
+	if (existsSync(input)) {
 		try {
-			customPrompt = readFileSync(customPrompt, "utf-8");
+			return readFileSync(input, "utf-8");
 		} catch (error) {
-			console.error(chalk.yellow(`Warning: Could not read system prompt file ${customPrompt}: ${error}`));
-			// Fall through to use as literal string
+			console.error(chalk.yellow(`Warning: Could not read ${description} file ${input}: ${error}`));
+			return input;
 		}
 	}

-	if (customPrompt) {
-		// Use custom prompt as base, then add context/datetime
-		const now = new Date();
-		const dateTime = now.toLocaleString("en-US", {
-			weekday: "long",
-			year: "numeric",
-			month: "long",
-			day: "numeric",
-			hour: "2-digit",
-			minute: "2-digit",
-			second: "2-digit",
-			timeZoneName: "short",
-		});
+	return input;
+}

-		let prompt = customPrompt;
+function buildSystemPrompt(customPrompt?: string, selectedTools?: ToolName[], appendSystemPrompt?: string): string {
+	const resolvedCustomPrompt = resolvePromptInput(customPrompt, "system prompt");
+	const resolvedAppendPrompt = resolvePromptInput(appendSystemPrompt, "append system prompt");
+
+	const now = new Date();
+	const dateTime = now.toLocaleString("en-US", {
+		weekday: "long",
+		year: "numeric",
+		month: "long",
+		day: "numeric",
+		hour: "2-digit",
+		minute: "2-digit",
+		second: "2-digit",
+		timeZoneName: "short",
+	});
+
+	const appendSection = resolvedAppendPrompt ? `\n\n${resolvedAppendPrompt}` : "";
+
+	if (resolvedCustomPrompt) {
+		let prompt = resolvedCustomPrompt;
+
+		if (appendSection) {
+			prompt += appendSection;
+		}

 		// Append project context files
 		const contextFiles = loadProjectContextFiles();
@ -364,18 +390,6 @@ function buildSystemPrompt(customPrompt?: string, selectedTools?: ToolName[]): s
 		return prompt;
 	}

-	const now = new Date();
-	const dateTime = now.toLocaleString("en-US", {
-		weekday: "long",
-		year: "numeric",
-		month: "long",
-		day: "numeric",
-		hour: "2-digit",
-		minute: "2-digit",
-		second: "2-digit",
-		timeZoneName: "short",
-	});
-
 	// Get absolute path to README.md
 	const readmePath = getReadmePath();

@ -453,6 +467,10 @@ Documentation:
 - Your own documentation (including custom model setup and theme creation) is at: ${readmePath}
 - Read it when users ask about features, configuration, or setup, and especially if the user asks you to add a custom model or provider, or create a custom theme.`;

+	if (appendSection) {
+		prompt += appendSection;
+	}
+
 	// Append project context files
 	const contextFiles = loadProjectContextFiles();
 	if (contextFiles.length > 0) {
@ -582,7 +600,14 @@ async function resolveModelScope(

 		if (parts.length > 1) {
 			const level = parts[1];
-			if (level === "off" || level === "minimal" || level === "low" || level === "medium" || level === "high") {
+			if (
+				level === "off" ||
+				level === "minimal" ||
+				level === "low" ||
+				level === "medium" ||
+				level === "high" ||
+				level === "xhigh"
+			) {
 				thinkingLevel = level;
 			} else {
 				console.warn(
@ -705,8 +730,9 @@ async function runInteractiveMode(
 	settingsManager: SettingsManager,
 	version: string,
 	changelogMarkdown: string | null = null,
+	collapseChangelog = false,
 	modelFallbackMessage: string | null = null,
-	newVersion: string | null = null,
+	versionCheckPromise: Promise<string | null>,
 	scopedModels: Array<{ model: Model<Api>; thinkingLevel: ThinkingLevel }> = [],
 	initialMessages: string[] = [],
 	initialMessage?: string,
@ -719,7 +745,7 @@ async function runInteractiveMode(
 		settingsManager,
 		version,
 		changelogMarkdown,
-		newVersion,
+		collapseChangelog,
 		scopedModels,
 		fdPath,
 	);
@ -727,6 +753,13 @@ async function runInteractiveMode(
 	// Initialize TUI (subscribes to agent events internally)
 	await renderer.init();

+	// Handle version check result when it completes (don't block)
+	versionCheckPromise.then((newVersion) => {
+		if (newVersion) {
+			renderer.showNewVersionNotification(newVersion);
+		}
+	});
+
 	// Render any existing messages (from --continue mode)
 	renderer.renderInitialMessages(agent.state);

@ -806,7 +839,15 @@ async function runSingleShotMode(
 	if (mode === "text") {
 		const lastMessage = agent.state.messages[agent.state.messages.length - 1];
 		if (lastMessage.role === "assistant") {
-			for (const content of lastMessage.content) {
+			const assistantMsg = lastMessage as AssistantMessage;
+
+			// Check for error/aborted and output error message
+			if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") {
+				console.error(assistantMsg.errorMessage || `Request ${assistantMsg.stopReason}`);
+				process.exit(1);
+			}
+
+			for (const content of assistantMsg.content) {
 				if (content.type === "text") {
 					console.log(content.text);
 				}
@ -1138,7 +1179,7 @@ export async function main(args: string[]) {
 		}
 	}

-	const systemPrompt = buildSystemPrompt(parsed.systemPrompt, parsed.tools);
+	const systemPrompt = buildSystemPrompt(parsed.systemPrompt, parsed.tools, parsed.appendSystemPrompt);

 	// Load previous messages if continuing or resuming
 	// This may update initialModel if restoring from session
@ -1315,16 +1356,8 @@ export async function main(args: string[]) {
 		// RPC mode - headless operation
 		await runRpcMode(agent, sessionManager, settingsManager);
 	} else if (isInteractive) {
-		// Check for new version (don't block startup if it takes too long)
-		let newVersion: string | null = null;
-		try {
-			newVersion = await Promise.race([
-				checkForNewVersion(VERSION),
-				new Promise<null>((resolve) => setTimeout(() => resolve(null), 1000)), // 1 second timeout
-			]);
-		} catch (e) {
-			// Ignore errors
-		}
+		// Check for new version in the background (don't block startup)
+		const versionCheckPromise = checkForNewVersion(VERSION).catch(() => null);

 		// Check if we should show changelog (only in interactive mode, only for new sessions)
 		let changelogMarkdown: string | null = null;
@ -1368,14 +1401,16 @@ export async function main(args: string[]) {
 		const fdPath = await ensureTool("fd");

 		// Interactive mode - use TUI (may have initial messages from CLI args)
+		const collapseChangelog = settingsManager.getCollapseChangelog();
 		await runInteractiveMode(
 			agent,
 			sessionManager,
 			settingsManager,
 			VERSION,
 			changelogMarkdown,
+			collapseChangelog,
 			modelFallbackMessage,
-			newVersion,
+			versionCheckPromise,
 			scopedModels,
 			parsed.messages,
 			initialMessage,
--- a/packages/coding-agent/src/model-config.ts
+++ b/packages/coding-agent/src/model-config.ts
@ -9,6 +9,14 @@ import { loadOAuthCredentials } from "./oauth/storage.js";
 // Handle both default and named exports
 const Ajv = (AjvModule as any).default || AjvModule;

+// Schema for OpenAI compatibility settings
+const OpenAICompatSchema = Type.Object({
+	supportsStore: Type.Optional(Type.Boolean()),
+	supportsDeveloperRole: Type.Optional(Type.Boolean()),
+	supportsReasoningEffort: Type.Optional(Type.Boolean()),
+	maxTokensField: Type.Optional(Type.Union([Type.Literal("max_completion_tokens"), Type.Literal("max_tokens")])),
+});
+
 // Schema for custom model definition
 const ModelDefinitionSchema = Type.Object({
 	id: Type.String({ minLength: 1 }),
@ -32,6 +40,7 @@ const ModelDefinitionSchema = Type.Object({
 	contextWindow: Type.Number(),
 	maxTokens: Type.Number(),
 	headers: Type.Optional(Type.Record(Type.String(), Type.String())),
+	compat: Type.Optional(OpenAICompatSchema),
 });

 const ProviderConfigSchema = Type.Object({
@ -46,6 +55,7 @@ const ProviderConfigSchema = Type.Object({
 		]),
 	),
 	headers: Type.Optional(Type.Record(Type.String(), Type.String())),
+	authHeader: Type.Optional(Type.Boolean()),
 	models: Type.Array(ModelDefinitionSchema),
 });

@ -177,9 +187,17 @@ function parseModels(config: ModelsConfig): Model<Api>[] {
 			}

 			// Merge headers: provider headers are base, model headers override
-			const headers =
+			let headers =
 				providerConfig.headers || modelDef.headers ? { ...providerConfig.headers, ...modelDef.headers } : undefined;

+			// If authHeader is true, add Authorization header with resolved API key
+			if (providerConfig.authHeader) {
+				const resolvedKey = resolveApiKey(providerConfig.apiKey);
+				if (resolvedKey) {
+					headers = { ...headers, Authorization: `Bearer ${resolvedKey}` };
+				}
+			}
+
 			models.push({
 				id: modelDef.id,
 				name: modelDef.name,
@ -192,7 +210,8 @@ function parseModels(config: ModelsConfig): Model<Api>[] {
 				contextWindow: modelDef.contextWindow,
 				maxTokens: modelDef.maxTokens,
 				headers,
-			});
+				compat: modelDef.compat,
+			} as Model<Api>);
 		}
 	}

--- a/packages/coding-agent/src/settings-manager.ts
+++ b/packages/coding-agent/src/settings-manager.ts
@ -12,10 +12,13 @@ export interface Settings {
 	lastChangelogVersion?: string;
 	defaultProvider?: string;
 	defaultModel?: string;
-	defaultThinkingLevel?: "off" | "minimal" | "low" | "medium" | "high";
+	defaultThinkingLevel?: "off" | "minimal" | "low" | "medium" | "high" | "xhigh";
 	queueMode?: "all" | "one-at-a-time";
 	theme?: string;
 	compaction?: CompactionSettings;
+	hideThinkingBlock?: boolean;
+	shellPath?: string; // Custom shell path (e.g., for Cygwin users on Windows)
+	collapseChangelog?: boolean; // Show condensed changelog after update (use /changelog for full)
 }

 export class SettingsManager {
@ -107,11 +110,11 @@ export class SettingsManager {
 		this.save();
 	}

-	getDefaultThinkingLevel(): "off" | "minimal" | "low" | "medium" | "high" | undefined {
+	getDefaultThinkingLevel(): "off" | "minimal" | "low" | "medium" | "high" | "xhigh" | undefined {
 		return this.settings.defaultThinkingLevel;
 	}

-	setDefaultThinkingLevel(level: "off" | "minimal" | "low" | "medium" | "high"): void {
+	setDefaultThinkingLevel(level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh"): void {
 		this.settings.defaultThinkingLevel = level;
 		this.save();
 	}
@ -143,4 +146,31 @@ export class SettingsManager {
 			keepRecentTokens: this.getCompactionKeepRecentTokens(),
 		};
 	}
+
+	getHideThinkingBlock(): boolean {
+		return this.settings.hideThinkingBlock ?? false;
+	}
+
+	setHideThinkingBlock(hide: boolean): void {
+		this.settings.hideThinkingBlock = hide;
+		this.save();
+	}
+
+	getShellPath(): string | undefined {
+		return this.settings.shellPath;
+	}
+
+	setShellPath(path: string | undefined): void {
+		this.settings.shellPath = path;
+		this.save();
+	}
+
+	getCollapseChangelog(): boolean {
+		return this.settings.collapseChangelog ?? false;
+	}
+
+	setCollapseChangelog(collapse: boolean): void {
+		this.settings.collapseChangelog = collapse;
+		this.save();
+	}
 }
--- a/packages/coding-agent/src/shell-config.ts
+++ b/packages/coding-agent/src/shell-config.ts
@ -1,30 +0,0 @@
-import { existsSync } from "fs";
-
-/**
- * Get shell configuration based on platform
- */
-export function getShellConfig(): { shell: string; args: string[] } {
-	if (process.platform === "win32") {
-		const paths: string[] = [];
-		const programFiles = process.env.ProgramFiles;
-		if (programFiles) {
-			paths.push(`${programFiles}\\Git\\bin\\bash.exe`);
-		}
-		const programFilesX86 = process.env["ProgramFiles(x86)"];
-		if (programFilesX86) {
-			paths.push(`${programFilesX86}\\Git\\bin\\bash.exe`);
-		}
-
-		for (const path of paths) {
-			if (existsSync(path)) {
-				return { shell: path, args: ["-c"] };
-			}
-		}
-
-		throw new Error(
-			`Git Bash not found. Please install Git for Windows from https://git-scm.com/download/win\n` +
-				`Searched in:\n${paths.map((p) => `  ${p}`).join("\n")}`,
-		);
-	}
-	return { shell: "sh", args: ["-c"] };
-}
--- a/packages/coding-agent/src/shell.ts
+++ b/packages/coding-agent/src/shell.ts
@ -0,0 +1,117 @@
+import { existsSync } from "node:fs";
+import { spawn, spawnSync } from "child_process";
+import { SettingsManager } from "./settings-manager.js";
+
+let cachedShellConfig: { shell: string; args: string[] } | null = null;
+
+/**
+ * Find bash executable on PATH (Windows)
+ */
+function findBashOnPath(): string | null {
+	try {
+		const result = spawnSync("where", ["bash.exe"], { encoding: "utf-8", timeout: 5000 });
+		if (result.status === 0 && result.stdout) {
+			const firstMatch = result.stdout.trim().split(/\r?\n/)[0];
+			if (firstMatch && existsSync(firstMatch)) {
+				return firstMatch;
+			}
+		}
+	} catch {
+		// Ignore errors
+	}
+	return null;
+}
+
+/**
+ * Get shell configuration based on platform.
+ * Resolution order:
+ * 1. User-specified shellPath in settings.json
+ * 2. On Windows: Git Bash in known locations
+ * 3. Fallback: bash on PATH (Windows) or sh (Unix)
+ */
+export function getShellConfig(): { shell: string; args: string[] } {
+	if (cachedShellConfig) {
+		return cachedShellConfig;
+	}
+
+	const settings = new SettingsManager();
+	const customShellPath = settings.getShellPath();
+
+	// 1. Check user-specified shell path
+	if (customShellPath) {
+		if (existsSync(customShellPath)) {
+			cachedShellConfig = { shell: customShellPath, args: ["-c"] };
+			return cachedShellConfig;
+		}
+		throw new Error(
+			`Custom shell path not found: ${customShellPath}\n` + `Please update shellPath in ~/.pi/agent/settings.json`,
+		);
+	}
+
+	if (process.platform === "win32") {
+		// 2. Try Git Bash in known locations
+		const paths: string[] = [];
+		const programFiles = process.env.ProgramFiles;
+		if (programFiles) {
+			paths.push(`${programFiles}\\Git\\bin\\bash.exe`);
+		}
+		const programFilesX86 = process.env["ProgramFiles(x86)"];
+		if (programFilesX86) {
+			paths.push(`${programFilesX86}\\Git\\bin\\bash.exe`);
+		}
+
+		for (const path of paths) {
+			if (existsSync(path)) {
+				cachedShellConfig = { shell: path, args: ["-c"] };
+				return cachedShellConfig;
+			}
+		}
+
+		// 3. Fallback: search bash.exe on PATH (Cygwin, MSYS2, WSL, etc.)
+		const bashOnPath = findBashOnPath();
+		if (bashOnPath) {
+			cachedShellConfig = { shell: bashOnPath, args: ["-c"] };
+			return cachedShellConfig;
+		}
+
+		throw new Error(
+			`No bash shell found. Options:\n` +
+				`  1. Install Git for Windows: https://git-scm.com/download/win\n` +
+				`  2. Add your bash to PATH (Cygwin, MSYS2, etc.)\n` +
+				`  3. Set shellPath in ~/.pi/agent/settings.json\n\n` +
+				`Searched Git Bash in:\n${paths.map((p) => `  ${p}`).join("\n")}`,
+		);
+	}
+
+	cachedShellConfig = { shell: "sh", args: ["-c"] };
+	return cachedShellConfig;
+}
+
+/**
+ * Kill a process and all its children (cross-platform)
+ */
+export function killProcessTree(pid: number): void {
+	if (process.platform === "win32") {
+		// Use taskkill on Windows to kill process tree
+		try {
+			spawn("taskkill", ["/F", "/T", "/PID", String(pid)], {
+				stdio: "ignore",
+				detached: true,
+			});
+		} catch {
+			// Ignore errors if taskkill fails
+		}
+	} else {
+		// Use SIGKILL on Unix/Linux/Mac
+		try {
+			process.kill(-pid, "SIGKILL");
+		} catch {
+			// Fallback to killing just the child if process group kill fails
+			try {
+				process.kill(pid, "SIGKILL");
+			} catch {
+				// Process already dead
+			}
+		}
+	}
+}
--- a/packages/coding-agent/src/theme/dark.json
+++ b/packages/coding-agent/src/theme/dark.json
@ -66,6 +66,7 @@
 		"thinkingLow": "#5f87af",
 		"thinkingMedium": "#81a2be",
 		"thinkingHigh": "#b294bb",
+		"thinkingXhigh": "#d183e8",

 		"bashMode": "green"
 	}
--- a/packages/coding-agent/src/theme/light.json
+++ b/packages/coding-agent/src/theme/light.json
@ -65,6 +65,7 @@
 		"thinkingLow": "#5f87af",
 		"thinkingMedium": "#5f8787",
 		"thinkingHigh": "#875f87",
+		"thinkingXhigh": "#8b008b",

 		"bashMode": "green"
 	}
--- a/packages/coding-agent/src/theme/theme-schema.json
+++ b/packages/coding-agent/src/theme/theme-schema.json
@ -242,6 +242,10 @@
 					"$ref": "#/$defs/colorValue",
 					"description": "Thinking level border: high"
 				},
+				"thinkingXhigh": {
+					"$ref": "#/$defs/colorValue",
+					"description": "Thinking level border: xhigh (OpenAI codex-max only)"
+				},
 				"bashMode": {
 					"$ref": "#/$defs/colorValue",
 					"description": "Editor border color in bash mode"
--- a/packages/coding-agent/src/theme/theme.ts
+++ b/packages/coding-agent/src/theme/theme.ts
@ -66,12 +66,13 @@ const ThemeJsonSchema = Type.Object({
 		syntaxType: ColorValueSchema,
 		syntaxOperator: ColorValueSchema,
 		syntaxPunctuation: ColorValueSchema,
-		// Thinking Level Borders (5 colors)
+		// Thinking Level Borders (6 colors)
 		thinkingOff: ColorValueSchema,
 		thinkingMinimal: ColorValueSchema,
 		thinkingLow: ColorValueSchema,
 		thinkingMedium: ColorValueSchema,
 		thinkingHigh: ColorValueSchema,
+		thinkingXhigh: ColorValueSchema,
 		// Bash Mode (1 color)
 		bashMode: ColorValueSchema,
 	}),
@ -122,6 +123,7 @@ export type ThemeColor =
 	| "thinkingLow"
 	| "thinkingMedium"
 	| "thinkingHigh"
+	| "thinkingXhigh"
 	| "bashMode";

 export type ThemeBg = "userMessageBg" | "toolPendingBg" | "toolSuccessBg" | "toolErrorBg";
@ -298,7 +300,7 @@ export class Theme {
 		return this.mode;
 	}

-	getThinkingBorderColor(level: "off" | "minimal" | "low" | "medium" | "high"): (str: string) => string {
+	getThinkingBorderColor(level: "off" | "minimal" | "low" | "medium" | "high" | "xhigh"): (str: string) => string {
 		// Map thinking levels to dedicated theme colors
 		switch (level) {
 			case "off":
@ -311,6 +313,8 @@ export class Theme {
 				return (str: string) => this.fg("thinkingMedium", str);
 			case "high":
 				return (str: string) => this.fg("thinkingHigh", str);
+			case "xhigh":
+				return (str: string) => this.fg("thinkingXhigh", str);
 			default:
 				return (str: string) => this.fg("thinkingOff", str);
 		}
@ -373,8 +377,31 @@ function loadThemeJson(name: string): ThemeJson {
 	}
 	if (!validateThemeJson.Check(json)) {
 		const errors = Array.from(validateThemeJson.Errors(json));
-		const errorMessages = errors.map((e) => `  - ${e.path}: ${e.message}`).join("\n");
-		throw new Error(`Invalid theme ${name}:\n${errorMessages}`);
+		const missingColors: string[] = [];
+		const otherErrors: string[] = [];
+
+		for (const e of errors) {
+			// Check for missing required color properties
+			const match = e.path.match(/^\/colors\/(\w+)$/);
+			if (match && e.message.includes("Required")) {
+				missingColors.push(match[1]);
+			} else {
+				otherErrors.push(`  - ${e.path}: ${e.message}`);
+			}
+		}
+
+		let errorMessage = `Invalid theme "${name}":\n`;
+		if (missingColors.length > 0) {
+			errorMessage += `\nMissing required color tokens:\n`;
+			errorMessage += missingColors.map((c) => `  - ${c}`).join("\n");
+			errorMessage += `\n\nPlease add these colors to your theme's "colors" object.`;
+			errorMessage += `\nSee the built-in themes (dark.json, light.json) for reference values.`;
+		}
+		if (otherErrors.length > 0) {
+			errorMessage += `\n\nOther errors:\n${otherErrors.join("\n")}`;
+		}
+
+		throw new Error(errorMessage);
 	}
 	return json as ThemeJson;
 }
--- a/packages/coding-agent/src/tools/bash.ts
+++ b/packages/coding-agent/src/tools/bash.ts
@ -1,35 +1,19 @@
+import { randomBytes } from "node:crypto";
+import { createWriteStream } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
 import type { AgentTool } from "@mariozechner/pi-ai";
 import { Type } from "@sinclair/typebox";
 import { spawn } from "child_process";
-import { getShellConfig } from "../shell-config.js";
+import { getShellConfig, killProcessTree } from "../shell.js";
+import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateTail } from "./truncate.js";

 /**
- * Kill a process and all its children
+ * Generate a unique temp file path for bash output
 */
-function killProcessTree(pid: number): void {
-	if (process.platform === "win32") {
-		// Use taskkill on Windows to kill process tree
-		try {
-			spawn("taskkill", ["/F", "/T", "/PID", String(pid)], {
-				stdio: "ignore",
-				detached: true,
-			});
-		} catch (e) {
-			// Ignore errors if taskkill fails
-		}
-	} else {
-		// Use SIGKILL on Unix/Linux/Mac
-		try {
-			process.kill(-pid, "SIGKILL");
-		} catch (e) {
-			// Fallback to killing just the child if process group kill fails
-			try {
-				process.kill(pid, "SIGKILL");
-			} catch (e2) {
-				// Process already dead
-			}
-		}
-	}
+function getTempFilePath(): string {
+	const id = randomBytes(8).toString("hex");
+	return join(tmpdir(), `pi-bash-${id}.log`);
 }

 const bashSchema = Type.Object({
@ -37,26 +21,39 @@ const bashSchema = Type.Object({
 	timeout: Type.Optional(Type.Number({ description: "Timeout in seconds (optional, no default timeout)" })),
 });

+interface BashToolDetails {
+	truncation?: TruncationResult;
+	fullOutputPath?: string;
+}
+
 export const bashTool: AgentTool<typeof bashSchema> = {
 	name: "bash",
 	label: "bash",
-	description:
-		"Execute a bash command in the current working directory. Returns stdout and stderr. Optionally provide a timeout in seconds.",
+	description: `Execute a bash command in the current working directory. Returns stdout and stderr. Output is truncated to last ${DEFAULT_MAX_LINES} lines or ${DEFAULT_MAX_BYTES / 1024}KB (whichever is hit first). If truncated, full output is saved to a temp file. Optionally provide a timeout in seconds.`,
 	parameters: bashSchema,
 	execute: async (
 		_toolCallId: string,
 		{ command, timeout }: { command: string; timeout?: number },
 		signal?: AbortSignal,
 	) => {
-		return new Promise((resolve, _reject) => {
+		return new Promise((resolve, reject) => {
 			const { shell, args } = getShellConfig();
 			const child = spawn(shell, [...args, command], {
 				detached: true,
 				stdio: ["ignore", "pipe", "pipe"],
 			});

-			let stdout = "";
-			let stderr = "";
+			// We'll stream to a temp file if output gets large
+			let tempFilePath: string | undefined;
+			let tempFileStream: ReturnType<typeof createWriteStream> | undefined;
+			let totalBytes = 0;
+
+			// Keep a rolling buffer of the last chunk for tail truncation
+			const chunks: Buffer[] = [];
+			let chunksBytes = 0;
+			// Keep more than we need so we have enough for truncation
+			const maxChunksBytes = DEFAULT_MAX_BYTES * 2;
+
 			let timedOut = false;

 			// Set timeout if provided
@ -68,26 +65,41 @@ export const bashTool: AgentTool<typeof bashSchema> = {
 				}, timeout * 1000);
 			}

-			// Collect stdout
-			if (child.stdout) {
-				child.stdout.on("data", (data) => {
-					stdout += data.toString();
-					// Limit buffer size
-					if (stdout.length > 10 * 1024 * 1024) {
-						stdout = stdout.slice(0, 10 * 1024 * 1024);
-					}
-				});
-			}
+			const handleData = (data: Buffer) => {
+				totalBytes += data.length;

-			// Collect stderr
-			if (child.stderr) {
-				child.stderr.on("data", (data) => {
-					stderr += data.toString();
-					// Limit buffer size
-					if (stderr.length > 10 * 1024 * 1024) {
-						stderr = stderr.slice(0, 10 * 1024 * 1024);
+				// Start writing to temp file once we exceed the threshold
+				if (totalBytes > DEFAULT_MAX_BYTES && !tempFilePath) {
+					tempFilePath = getTempFilePath();
+					tempFileStream = createWriteStream(tempFilePath);
+					// Write all buffered chunks to the file
+					for (const chunk of chunks) {
+						tempFileStream.write(chunk);
 					}
-				});
+				}
+
+				// Write to temp file if we have one
+				if (tempFileStream) {
+					tempFileStream.write(data);
+				}
+
+				// Keep rolling buffer of recent data
+				chunks.push(data);
+				chunksBytes += data.length;
+
+				// Trim old chunks if buffer is too large
+				while (chunksBytes > maxChunksBytes && chunks.length > 1) {
+					const removed = chunks.shift()!;
+					chunksBytes -= removed.length;
+				}
+			};
+
+			// Collect stdout and stderr together
+			if (child.stdout) {
+				child.stdout.on("data", handleData);
+			}
+			if (child.stderr) {
+				child.stderr.on("data", handleData);
 			}

 			// Handle process exit
@ -99,44 +111,64 @@ export const bashTool: AgentTool<typeof bashSchema> = {
 					signal.removeEventListener("abort", onAbort);
 				}

+				// Close temp file stream
+				if (tempFileStream) {
+					tempFileStream.end();
+				}
+
+				// Combine all buffered chunks
+				const fullBuffer = Buffer.concat(chunks);
+				const fullOutput = fullBuffer.toString("utf-8");
+
 				if (signal?.aborted) {
-					let output = "";
-					if (stdout) output += stdout;
-					if (stderr) {
-						if (output) output += "\n";
-						output += stderr;
-					}
+					let output = fullOutput;
 					if (output) output += "\n\n";
 					output += "Command aborted";
-					_reject(new Error(output));
+					reject(new Error(output));
 					return;
 				}

 				if (timedOut) {
-					let output = "";
-					if (stdout) output += stdout;
-					if (stderr) {
-						if (output) output += "\n";
-						output += stderr;
-					}
+					let output = fullOutput;
 					if (output) output += "\n\n";
 					output += `Command timed out after ${timeout} seconds`;
-					_reject(new Error(output));
+					reject(new Error(output));
 					return;
 				}

-				let output = "";
-				if (stdout) output += stdout;
-				if (stderr) {
-					if (output) output += "\n";
-					output += stderr;
+				// Apply tail truncation
+				const truncation = truncateTail(fullOutput);
+				let outputText = truncation.content || "(no output)";
+
+				// Build details with truncation info
+				let details: BashToolDetails | undefined;
+
+				if (truncation.truncated) {
+					details = {
+						truncation,
+						fullOutputPath: tempFilePath,
+					};
+
+					// Build actionable notice
+					const startLine = truncation.totalLines - truncation.outputLines + 1;
+					const endLine = truncation.totalLines;
+
+					if (truncation.lastLinePartial) {
+						// Edge case: last line alone > 30KB
+						const lastLineSize = formatSize(Buffer.byteLength(fullOutput.split("\n").pop() || "", "utf-8"));
+						outputText += `\n\n[Showing last ${formatSize(truncation.outputBytes)} of line ${endLine} (line is ${lastLineSize}). Full output: ${tempFilePath}]`;
+					} else if (truncation.truncatedBy === "lines") {
+						outputText += `\n\n[Showing lines ${startLine}-${endLine} of ${truncation.totalLines}. Full output: ${tempFilePath}]`;
+					} else {
+						outputText += `\n\n[Showing lines ${startLine}-${endLine} of ${truncation.totalLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Full output: ${tempFilePath}]`;
+					}
 				}

 				if (code !== 0 && code !== null) {
-					if (output) output += "\n\n";
-					_reject(new Error(`${output}Command exited with code ${code}`));
+					outputText += `\n\nCommand exited with code ${code}`;
+					reject(new Error(outputText));
 				} else {
-					resolve({ content: [{ type: "text", text: output || "(no output)" }], details: undefined });
+					resolve({ content: [{ type: "text", text: outputText }], details });
 				}
 			});

--- a/packages/coding-agent/src/tools/find.ts
+++ b/packages/coding-agent/src/tools/find.ts
@ -6,6 +6,7 @@ import { globSync } from "glob";
 import { homedir } from "os";
 import path from "path";
 import { ensureTool } from "../tools-manager.js";
+import { DEFAULT_MAX_BYTES, formatSize, type TruncationResult, truncateHead } from "./truncate.js";

 /**
 * Expand ~ to home directory
@ -30,11 +31,15 @@ const findSchema = Type.Object({

 const DEFAULT_LIMIT = 1000;

+interface FindToolDetails {
+	truncation?: TruncationResult;
+	resultLimitReached?: number;
+}
+
 export const findTool: AgentTool<typeof findSchema> = {
 	name: "find",
 	label: "find",
-	description:
-		"Search for files by glob pattern. Returns matching file paths relative to the search directory. Respects .gitignore.",
+	description: `Search for files by glob pattern. Returns matching file paths relative to the search directory. Respects .gitignore. Output is truncated to ${DEFAULT_LIMIT} results or ${DEFAULT_MAX_BYTES / 1024}KB (whichever is hit first).`,
 	parameters: findSchema,
 	execute: async (
 		_toolCallId: string,
@ -112,7 +117,7 @@ export const findTool: AgentTool<typeof findSchema> = {
 						return;
 					}

-					let output = result.stdout?.trim() || "";
+					const output = result.stdout?.trim() || "";

 					if (result.status !== 0) {
 						const errorMsg = result.stderr?.trim() || `fd exited with code ${result.status}`;
@ -124,41 +129,70 @@ export const findTool: AgentTool<typeof findSchema> = {
 					}

 					if (!output) {
-						output = "No files found matching pattern";
-					} else {
-						const lines = output.split("\n");
-						const relativized: string[] = [];
-
-						for (const rawLine of lines) {
-							const line = rawLine.replace(/\r$/, "").trim();
-							if (!line) {
-								continue;
-							}
-
-							const hadTrailingSlash = line.endsWith("/") || line.endsWith("\\");
-							let relativePath = line;
-							if (line.startsWith(searchPath)) {
-								relativePath = line.slice(searchPath.length + 1); // +1 for the /
-							} else {
-								relativePath = path.relative(searchPath, line);
-							}
-
-							if (hadTrailingSlash && !relativePath.endsWith("/")) {
-								relativePath += "/";
-							}
-
-							relativized.push(relativePath);
-						}
-
-						output = relativized.join("\n");
-
-						const count = relativized.length;
-						if (count >= effectiveLimit) {
-							output += `\n\n(truncated, ${effectiveLimit} results shown)`;
-						}
+						resolve({
+							content: [{ type: "text", text: "No files found matching pattern" }],
+							details: undefined,
+						});
+						return;
 					}

-					resolve({ content: [{ type: "text", text: output }], details: undefined });
+					const lines = output.split("\n");
+					const relativized: string[] = [];
+
+					for (const rawLine of lines) {
+						const line = rawLine.replace(/\r$/, "").trim();
+						if (!line) {
+							continue;
+						}
+
+						const hadTrailingSlash = line.endsWith("/") || line.endsWith("\\");
+						let relativePath = line;
+						if (line.startsWith(searchPath)) {
+							relativePath = line.slice(searchPath.length + 1); // +1 for the /
+						} else {
+							relativePath = path.relative(searchPath, line);
+						}
+
+						if (hadTrailingSlash && !relativePath.endsWith("/")) {
+							relativePath += "/";
+						}
+
+						relativized.push(relativePath);
+					}
+
+					// Check if we hit the result limit
+					const resultLimitReached = relativized.length >= effectiveLimit;
+
+					// Apply byte truncation (no line limit since we already have result limit)
+					const rawOutput = relativized.join("\n");
+					const truncation = truncateHead(rawOutput, { maxLines: Number.MAX_SAFE_INTEGER });
+
+					let resultOutput = truncation.content;
+					const details: FindToolDetails = {};
+
+					// Build notices
+					const notices: string[] = [];
+
+					if (resultLimitReached) {
+						notices.push(
+							`${effectiveLimit} results limit reached. Use limit=${effectiveLimit * 2} for more, or refine pattern`,
+						);
+						details.resultLimitReached = effectiveLimit;
+					}
+
+					if (truncation.truncated) {
+						notices.push(`${formatSize(DEFAULT_MAX_BYTES)} limit reached`);
+						details.truncation = truncation;
+					}
+
+					if (notices.length > 0) {
+						resultOutput += `\n\n[${notices.join(". ")}]`;
+					}
+
+					resolve({
+						content: [{ type: "text", text: resultOutput }],
+						details: Object.keys(details).length > 0 ? details : undefined,
+					});
 				} catch (e: any) {
 					signal?.removeEventListener("abort", onAbort);
 					reject(e);
--- a/packages/coding-agent/src/tools/grep.ts
+++ b/packages/coding-agent/src/tools/grep.ts
@ -6,6 +6,14 @@ import { readFileSync, type Stats, statSync } from "fs";
 import { homedir } from "os";
 import path from "path";
 import { ensureTool } from "../tools-manager.js";
+import {
+	DEFAULT_MAX_BYTES,
+	formatSize,
+	GREP_MAX_LINE_LENGTH,
+	type TruncationResult,
+	truncateHead,
+	truncateLine,
+} from "./truncate.js";

 /**
 * Expand ~ to home directory
@ -36,11 +44,16 @@ const grepSchema = Type.Object({

 const DEFAULT_LIMIT = 100;

+interface GrepToolDetails {
+	truncation?: TruncationResult;
+	matchLimitReached?: number;
+	linesTruncated?: boolean;
+}
+
 export const grepTool: AgentTool<typeof grepSchema> = {
 	name: "grep",
 	label: "grep",
-	description:
-		"Search file contents for a pattern. Returns matching lines with file paths and line numbers. Respects .gitignore.",
+	description: `Search file contents for a pattern. Returns matching lines with file paths and line numbers. Respects .gitignore. Output is truncated to ${DEFAULT_LIMIT} matches or ${DEFAULT_MAX_BYTES / 1024}KB (whichever is hit first). Long lines are truncated to ${GREP_MAX_LINE_LENGTH} chars.`,
 	parameters: grepSchema,
 	execute: async (
 		_toolCallId: string,
@ -143,7 +156,8 @@ export const grepTool: AgentTool<typeof grepSchema> = {
 					const rl = createInterface({ input: child.stdout });
 					let stderr = "";
 					let matchCount = 0;
-					let truncated = false;
+					let matchLimitReached = false;
+					let linesTruncated = false;
 					let aborted = false;
 					let killedDueToLimit = false;
 					const outputLines: string[] = [];
@ -171,7 +185,7 @@ export const grepTool: AgentTool<typeof grepSchema> = {
 						stderr += chunk.toString();
 					});

-					const formatBlock = (filePath: string, lineNumber: number) => {
+					const formatBlock = (filePath: string, lineNumber: number): string[] => {
 						const relativePath = formatPath(filePath);
 						const lines = getFileLines(filePath);
 						if (!lines.length) {
@ -187,10 +201,16 @@ export const grepTool: AgentTool<typeof grepSchema> = {
 							const sanitized = lineText.replace(/\r/g, "");
 							const isMatchLine = current === lineNumber;

+							// Truncate long lines
+							const { text: truncatedText, wasTruncated } = truncateLine(sanitized);
+							if (wasTruncated) {
+								linesTruncated = true;
+							}
+
 							if (isMatchLine) {
-								block.push(`${relativePath}:${current}: ${sanitized}`);
+								block.push(`${relativePath}:${current}: ${truncatedText}`);
 							} else {
-								block.push(`${relativePath}-${current}- ${sanitized}`);
+								block.push(`${relativePath}-${current}- ${truncatedText}`);
 							}
 						}

@ -219,7 +239,7 @@ export const grepTool: AgentTool<typeof grepSchema> = {
 							}

 							if (matchCount >= effectiveLimit) {
-								truncated = true;
+								matchLimitReached = true;
 								stopChild(true);
 							}
 						}
@ -251,12 +271,45 @@ export const grepTool: AgentTool<typeof grepSchema> = {
 							return;
 						}

-						let output = outputLines.join("\n");
-						if (truncated) {
-							output += `\n\n(truncated, limit of ${effectiveLimit} matches reached)`;
+						// Apply byte truncation (no line limit since we already have match limit)
+						const rawOutput = outputLines.join("\n");
+						const truncation = truncateHead(rawOutput, { maxLines: Number.MAX_SAFE_INTEGER });
+
+						let output = truncation.content;
+						const details: GrepToolDetails = {};
+
+						// Build notices
+						const notices: string[] = [];
+
+						if (matchLimitReached) {
+							notices.push(
+								`${effectiveLimit} matches limit reached. Use limit=${effectiveLimit * 2} for more, or refine pattern`,
+							);
+							details.matchLimitReached = effectiveLimit;
 						}

-						settle(() => resolve({ content: [{ type: "text", text: output }], details: undefined }));
+						if (truncation.truncated) {
+							notices.push(`${formatSize(DEFAULT_MAX_BYTES)} limit reached`);
+							details.truncation = truncation;
+						}
+
+						if (linesTruncated) {
+							notices.push(
+								`Some lines truncated to ${GREP_MAX_LINE_LENGTH} chars. Use read tool to see full lines`,
+							);
+							details.linesTruncated = true;
+						}
+
+						if (notices.length > 0) {
+							output += `\n\n[${notices.join(". ")}]`;
+						}
+
+						settle(() =>
+							resolve({
+								content: [{ type: "text", text: output }],
+								details: Object.keys(details).length > 0 ? details : undefined,
+							}),
+						);
 					});
 				} catch (err) {
 					settle(() => reject(err as Error));
--- a/packages/coding-agent/src/tools/ls.ts
+++ b/packages/coding-agent/src/tools/ls.ts
@ -3,6 +3,7 @@ import { Type } from "@sinclair/typebox";
 import { existsSync, readdirSync, statSync } from "fs";
 import { homedir } from "os";
 import nodePath from "path";
+import { DEFAULT_MAX_BYTES, formatSize, type TruncationResult, truncateHead } from "./truncate.js";

 /**
 * Expand ~ to home directory
@ -24,11 +25,15 @@ const lsSchema = Type.Object({

 const DEFAULT_LIMIT = 500;

+interface LsToolDetails {
+	truncation?: TruncationResult;
+	entryLimitReached?: number;
+}
+
 export const lsTool: AgentTool<typeof lsSchema> = {
 	name: "ls",
 	label: "ls",
-	description:
-		"List directory contents. Returns entries sorted alphabetically, with '/' suffix for directories. Includes dotfiles.",
+	description: `List directory contents. Returns entries sorted alphabetically, with '/' suffix for directories. Includes dotfiles. Output is truncated to ${DEFAULT_LIMIT} entries or ${DEFAULT_MAX_BYTES / 1024}KB (whichever is hit first).`,
 	parameters: lsSchema,
 	execute: async (_toolCallId: string, { path, limit }: { path?: string; limit?: number }, signal?: AbortSignal) => {
 		return new Promise((resolve, reject) => {
@ -71,11 +76,11 @@ export const lsTool: AgentTool<typeof lsSchema> = {

 				// Format entries with directory indicators
 				const results: string[] = [];
-				let truncated = false;
+				let entryLimitReached = false;

 				for (const entry of entries) {
 					if (results.length >= effectiveLimit) {
-						truncated = true;
+						entryLimitReached = true;
 						break;
 					}

@ -97,16 +102,39 @@ export const lsTool: AgentTool<typeof lsSchema> = {

 				signal?.removeEventListener("abort", onAbort);

-				let output = results.join("\n");
-				if (truncated) {
-					const remaining = entries.length - effectiveLimit;
-					output += `\n\n(truncated, ${remaining} more entries)`;
-				}
 				if (results.length === 0) {
-					output = "(empty directory)";
+					resolve({ content: [{ type: "text", text: "(empty directory)" }], details: undefined });
+					return;
 				}

-				resolve({ content: [{ type: "text", text: output }], details: undefined });
+				// Apply byte truncation (no line limit since we already have entry limit)
+				const rawOutput = results.join("\n");
+				const truncation = truncateHead(rawOutput, { maxLines: Number.MAX_SAFE_INTEGER });
+
+				let output = truncation.content;
+				const details: LsToolDetails = {};
+
+				// Build notices
+				const notices: string[] = [];
+
+				if (entryLimitReached) {
+					notices.push(`${effectiveLimit} entries limit reached. Use limit=${effectiveLimit * 2} for more`);
+					details.entryLimitReached = effectiveLimit;
+				}
+
+				if (truncation.truncated) {
+					notices.push(`${formatSize(DEFAULT_MAX_BYTES)} limit reached`);
+					details.truncation = truncation;
+				}
+
+				if (notices.length > 0) {
+					output += `\n\n[${notices.join(". ")}]`;
+				}
+
+				resolve({
+					content: [{ type: "text", text: output }],
+					details: Object.keys(details).length > 0 ? details : undefined,
+				});
 			} catch (e: any) {
 				signal?.removeEventListener("abort", onAbort);
 				reject(e);
--- a/packages/coding-agent/src/tools/read.ts
+++ b/packages/coding-agent/src/tools/read.ts
@ -4,6 +4,7 @@ import { Type } from "@sinclair/typebox";
 import { constants } from "fs";
 import { access, readFile } from "fs/promises";
 import { extname, resolve as resolvePath } from "path";
+import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateHead } from "./truncate.js";

 /**
 * Expand ~ to home directory
@ -43,14 +44,14 @@ const readSchema = Type.Object({
 	limit: Type.Optional(Type.Number({ description: "Maximum number of lines to read" })),
 });

-const MAX_LINES = 2000;
-const MAX_LINE_LENGTH = 2000;
+interface ReadToolDetails {
+	truncation?: TruncationResult;
+}

 export const readTool: AgentTool<typeof readSchema> = {
 	name: "read",
 	label: "read",
-	description:
-		"Read the contents of a file. Supports text files and images (jpg, png, gif, webp). Images are sent as attachments. For text files, defaults to first 2000 lines. Use offset/limit for large files.",
+	description: `Read the contents of a file. Supports text files and images (jpg, png, gif, webp). Images are sent as attachments. For text files, output is truncated to ${DEFAULT_MAX_LINES} lines or ${DEFAULT_MAX_BYTES / 1024}KB (whichever is hit first). Use offset/limit for large files.`,
 	parameters: readSchema,
 	execute: async (
 		_toolCallId: string,
@ -60,119 +61,138 @@ export const readTool: AgentTool<typeof readSchema> = {
 		const absolutePath = resolvePath(expandPath(path));
 		const mimeType = isImageFile(absolutePath);

-		return new Promise<{ content: (TextContent | ImageContent)[]; details: undefined }>((resolve, reject) => {
-			// Check if already aborted
-			if (signal?.aborted) {
-				reject(new Error("Operation aborted"));
-				return;
-			}
-
-			let aborted = false;
-
-			// Set up abort handler
-			const onAbort = () => {
-				aborted = true;
-				reject(new Error("Operation aborted"));
-			};
-
-			if (signal) {
-				signal.addEventListener("abort", onAbort, { once: true });
-			}
-
-			// Perform the read operation
-			(async () => {
-				try {
-					// Check if file exists
-					await access(absolutePath, constants.R_OK);
-
-					// Check if aborted before reading
-					if (aborted) {
-						return;
-					}
-
-					// Read the file based on type
-					let content: (TextContent | ImageContent)[];
-
-					if (mimeType) {
-						// Read as image (binary)
-						const buffer = await readFile(absolutePath);
-						const base64 = buffer.toString("base64");
-
-						content = [
-							{ type: "text", text: `Read image file [${mimeType}]` },
-							{ type: "image", data: base64, mimeType },
-						];
-					} else {
-						// Read as text
-						const textContent = await readFile(absolutePath, "utf-8");
-						const lines = textContent.split("\n");
-
-						// Apply offset and limit (matching Claude Code Read tool behavior)
-						const startLine = offset ? Math.max(0, offset - 1) : 0; // 1-indexed to 0-indexed
-						const maxLines = limit || MAX_LINES;
-						const endLine = Math.min(startLine + maxLines, lines.length);
-
-						// Check if offset is out of bounds
-						if (startLine >= lines.length) {
-							throw new Error(`Offset ${offset} is beyond end of file (${lines.length} lines total)`);
-						}
-
-						// Get the relevant lines
-						const selectedLines = lines.slice(startLine, endLine);
-
-						// Truncate long lines and track which were truncated
-						let hadTruncatedLines = false;
-						const formattedLines = selectedLines.map((line) => {
-							if (line.length > MAX_LINE_LENGTH) {
-								hadTruncatedLines = true;
-								return line.slice(0, MAX_LINE_LENGTH);
-							}
-							return line;
-						});
-
-						let outputText = formattedLines.join("\n");
-
-						// Add notices
-						const notices: string[] = [];
-
-						if (hadTruncatedLines) {
-							notices.push(`Some lines were truncated to ${MAX_LINE_LENGTH} characters for display`);
-						}
-
-						if (endLine < lines.length) {
-							const remaining = lines.length - endLine;
-							notices.push(`${remaining} more lines not shown. Use offset=${endLine + 1} to continue reading`);
-						}
-
-						if (notices.length > 0) {
-							outputText += `\n\n... (${notices.join(". ")})`;
-						}
-
-						content = [{ type: "text", text: outputText }];
-					}
-
-					// Check if aborted after reading
-					if (aborted) {
-						return;
-					}
-
-					// Clean up abort handler
-					if (signal) {
-						signal.removeEventListener("abort", onAbort);
-					}
-
-					resolve({ content, details: undefined });
-				} catch (error: any) {
-					// Clean up abort handler
-					if (signal) {
-						signal.removeEventListener("abort", onAbort);
-					}
-
-					if (!aborted) {
-						reject(error);
-					}
+		return new Promise<{ content: (TextContent | ImageContent)[]; details: ReadToolDetails | undefined }>(
+			(resolve, reject) => {
+				// Check if already aborted
+				if (signal?.aborted) {
+					reject(new Error("Operation aborted"));
+					return;
 				}
-			})();
-		});
+
+				let aborted = false;
+
+				// Set up abort handler
+				const onAbort = () => {
+					aborted = true;
+					reject(new Error("Operation aborted"));
+				};
+
+				if (signal) {
+					signal.addEventListener("abort", onAbort, { once: true });
+				}
+
+				// Perform the read operation
+				(async () => {
+					try {
+						// Check if file exists
+						await access(absolutePath, constants.R_OK);
+
+						// Check if aborted before reading
+						if (aborted) {
+							return;
+						}
+
+						// Read the file based on type
+						let content: (TextContent | ImageContent)[];
+						let details: ReadToolDetails | undefined;
+
+						if (mimeType) {
+							// Read as image (binary)
+							const buffer = await readFile(absolutePath);
+							const base64 = buffer.toString("base64");
+
+							content = [
+								{ type: "text", text: `Read image file [${mimeType}]` },
+								{ type: "image", data: base64, mimeType },
+							];
+						} else {
+							// Read as text
+							const textContent = await readFile(absolutePath, "utf-8");
+							const allLines = textContent.split("\n");
+							const totalFileLines = allLines.length;
+
+							// Apply offset if specified (1-indexed to 0-indexed)
+							const startLine = offset ? Math.max(0, offset - 1) : 0;
+							const startLineDisplay = startLine + 1; // For display (1-indexed)
+
+							// Check if offset is out of bounds
+							if (startLine >= allLines.length) {
+								throw new Error(`Offset ${offset} is beyond end of file (${allLines.length} lines total)`);
+							}
+
+							// If limit is specified by user, use it; otherwise we'll let truncateHead decide
+							let selectedContent: string;
+							let userLimitedLines: number | undefined;
+							if (limit !== undefined) {
+								const endLine = Math.min(startLine + limit, allLines.length);
+								selectedContent = allLines.slice(startLine, endLine).join("\n");
+								userLimitedLines = endLine - startLine;
+							} else {
+								selectedContent = allLines.slice(startLine).join("\n");
+							}
+
+							// Apply truncation (respects both line and byte limits)
+							const truncation = truncateHead(selectedContent);
+
+							let outputText: string;
+
+							if (truncation.firstLineExceedsLimit) {
+								// First line at offset exceeds 30KB - tell model to use bash
+								const firstLineSize = formatSize(Buffer.byteLength(allLines[startLine], "utf-8"));
+								outputText = `[Line ${startLineDisplay} is ${firstLineSize}, exceeds ${formatSize(DEFAULT_MAX_BYTES)} limit. Use bash: sed -n '${startLineDisplay}p' ${path} | head -c ${DEFAULT_MAX_BYTES}]`;
+								details = { truncation };
+							} else if (truncation.truncated) {
+								// Truncation occurred - build actionable notice
+								const endLineDisplay = startLineDisplay + truncation.outputLines - 1;
+								const nextOffset = endLineDisplay + 1;
+
+								outputText = truncation.content;
+
+								if (truncation.truncatedBy === "lines") {
+									outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines}. Use offset=${nextOffset} to continue]`;
+								} else {
+									outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Use offset=${nextOffset} to continue]`;
+								}
+								details = { truncation };
+							} else if (userLimitedLines !== undefined && startLine + userLimitedLines < allLines.length) {
+								// User specified limit, there's more content, but no truncation
+								const endLineDisplay = startLineDisplay + userLimitedLines - 1;
+								const remaining = allLines.length - (startLine + userLimitedLines);
+								const nextOffset = startLine + userLimitedLines + 1;
+
+								outputText = truncation.content;
+								outputText += `\n\n[${remaining} more lines in file. Use offset=${nextOffset} to continue]`;
+							} else {
+								// No truncation, no user limit exceeded
+								outputText = truncation.content;
+							}
+
+							content = [{ type: "text", text: outputText }];
+						}
+
+						// Check if aborted after reading
+						if (aborted) {
+							return;
+						}
+
+						// Clean up abort handler
+						if (signal) {
+							signal.removeEventListener("abort", onAbort);
+						}
+
+						resolve({ content, details });
+					} catch (error: any) {
+						// Clean up abort handler
+						if (signal) {
+							signal.removeEventListener("abort", onAbort);
+						}
+
+						if (!aborted) {
+							reject(error);
+						}
+					}
+				})();
+			},
+		);
 	},
 };
--- a/packages/coding-agent/src/tools/truncate.ts
+++ b/packages/coding-agent/src/tools/truncate.ts
@ -0,0 +1,251 @@
+/**
+ * Shared truncation utilities for tool outputs.
+ *
+ * Truncation is based on two independent limits - whichever is hit first wins:
+ * - Line limit (default: 2000 lines)
+ * - Byte limit (default: 30KB)
+ *
+ * Never returns partial lines (except bash tail truncation edge case).
+ */
+
+export const DEFAULT_MAX_LINES = 2000;
+export const DEFAULT_MAX_BYTES = 50 * 1024; // 50KB
+export const GREP_MAX_LINE_LENGTH = 500; // Max chars per grep match line
+
+export interface TruncationResult {
+	/** The truncated content */
+	content: string;
+	/** Whether truncation occurred */
+	truncated: boolean;
+	/** Which limit was hit: "lines", "bytes", or null if not truncated */
+	truncatedBy: "lines" | "bytes" | null;
+	/** Total number of lines in the original content */
+	totalLines: number;
+	/** Total number of bytes in the original content */
+	totalBytes: number;
+	/** Number of complete lines in the truncated output */
+	outputLines: number;
+	/** Number of bytes in the truncated output */
+	outputBytes: number;
+	/** Whether the last line was partially truncated (only for tail truncation edge case) */
+	lastLinePartial: boolean;
+	/** Whether the first line exceeded the byte limit (for head truncation) */
+	firstLineExceedsLimit: boolean;
+}
+
+export interface TruncationOptions {
+	/** Maximum number of lines (default: 2000) */
+	maxLines?: number;
+	/** Maximum number of bytes (default: 30KB) */
+	maxBytes?: number;
+}
+
+/**
+ * Format bytes as human-readable size.
+ */
+export function formatSize(bytes: number): string {
+	if (bytes < 1024) {
+		return `${bytes}B`;
+	} else if (bytes < 1024 * 1024) {
+		return `${(bytes / 1024).toFixed(1)}KB`;
+	} else {
+		return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
+	}
+}
+
+/**
+ * Truncate content from the head (keep first N lines/bytes).
+ * Suitable for file reads where you want to see the beginning.
+ *
+ * Never returns partial lines. If first line exceeds byte limit,
+ * returns empty content with firstLineExceedsLimit=true.
+ */
+export function truncateHead(content: string, options: TruncationOptions = {}): TruncationResult {
+	const maxLines = options.maxLines ?? DEFAULT_MAX_LINES;
+	const maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES;
+
+	const totalBytes = Buffer.byteLength(content, "utf-8");
+	const lines = content.split("\n");
+	const totalLines = lines.length;
+
+	// Check if no truncation needed
+	if (totalLines <= maxLines && totalBytes <= maxBytes) {
+		return {
+			content,
+			truncated: false,
+			truncatedBy: null,
+			totalLines,
+			totalBytes,
+			outputLines: totalLines,
+			outputBytes: totalBytes,
+			lastLinePartial: false,
+			firstLineExceedsLimit: false,
+		};
+	}
+
+	// Check if first line alone exceeds byte limit
+	const firstLineBytes = Buffer.byteLength(lines[0], "utf-8");
+	if (firstLineBytes > maxBytes) {
+		return {
+			content: "",
+			truncated: true,
+			truncatedBy: "bytes",
+			totalLines,
+			totalBytes,
+			outputLines: 0,
+			outputBytes: 0,
+			lastLinePartial: false,
+			firstLineExceedsLimit: true,
+		};
+	}
+
+	// Collect complete lines that fit
+	const outputLinesArr: string[] = [];
+	let outputBytesCount = 0;
+	let truncatedBy: "lines" | "bytes" = "lines";
+
+	for (let i = 0; i < lines.length && i < maxLines; i++) {
+		const line = lines[i];
+		const lineBytes = Buffer.byteLength(line, "utf-8") + (i > 0 ? 1 : 0); // +1 for newline
+
+		if (outputBytesCount + lineBytes > maxBytes) {
+			truncatedBy = "bytes";
+			break;
+		}
+
+		outputLinesArr.push(line);
+		outputBytesCount += lineBytes;
+	}
+
+	// If we exited due to line limit
+	if (outputLinesArr.length >= maxLines && outputBytesCount <= maxBytes) {
+		truncatedBy = "lines";
+	}
+
+	const outputContent = outputLinesArr.join("\n");
+	const finalOutputBytes = Buffer.byteLength(outputContent, "utf-8");
+
+	return {
+		content: outputContent,
+		truncated: true,
+		truncatedBy,
+		totalLines,
+		totalBytes,
+		outputLines: outputLinesArr.length,
+		outputBytes: finalOutputBytes,
+		lastLinePartial: false,
+		firstLineExceedsLimit: false,
+	};
+}
+
+/**
+ * Truncate content from the tail (keep last N lines/bytes).
+ * Suitable for bash output where you want to see the end (errors, final results).
+ *
+ * May return partial first line if the last line of original content exceeds byte limit.
+ */
+export function truncateTail(content: string, options: TruncationOptions = {}): TruncationResult {
+	const maxLines = options.maxLines ?? DEFAULT_MAX_LINES;
+	const maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES;
+
+	const totalBytes = Buffer.byteLength(content, "utf-8");
+	const lines = content.split("\n");
+	const totalLines = lines.length;
+
+	// Check if no truncation needed
+	if (totalLines <= maxLines && totalBytes <= maxBytes) {
+		return {
+			content,
+			truncated: false,
+			truncatedBy: null,
+			totalLines,
+			totalBytes,
+			outputLines: totalLines,
+			outputBytes: totalBytes,
+			lastLinePartial: false,
+			firstLineExceedsLimit: false,
+		};
+	}
+
+	// Work backwards from the end
+	const outputLinesArr: string[] = [];
+	let outputBytesCount = 0;
+	let truncatedBy: "lines" | "bytes" = "lines";
+	let lastLinePartial = false;
+
+	for (let i = lines.length - 1; i >= 0 && outputLinesArr.length < maxLines; i--) {
+		const line = lines[i];
+		const lineBytes = Buffer.byteLength(line, "utf-8") + (outputLinesArr.length > 0 ? 1 : 0); // +1 for newline
+
+		if (outputBytesCount + lineBytes > maxBytes) {
+			truncatedBy = "bytes";
+			// Edge case: if we haven't added ANY lines yet and this line exceeds maxBytes,
+			// take the end of the line (partial)
+			if (outputLinesArr.length === 0) {
+				const truncatedLine = truncateStringToBytesFromEnd(line, maxBytes);
+				outputLinesArr.unshift(truncatedLine);
+				outputBytesCount = Buffer.byteLength(truncatedLine, "utf-8");
+				lastLinePartial = true;
+			}
+			break;
+		}
+
+		outputLinesArr.unshift(line);
+		outputBytesCount += lineBytes;
+	}
+
+	// If we exited due to line limit
+	if (outputLinesArr.length >= maxLines && outputBytesCount <= maxBytes) {
+		truncatedBy = "lines";
+	}
+
+	const outputContent = outputLinesArr.join("\n");
+	const finalOutputBytes = Buffer.byteLength(outputContent, "utf-8");
+
+	return {
+		content: outputContent,
+		truncated: true,
+		truncatedBy,
+		totalLines,
+		totalBytes,
+		outputLines: outputLinesArr.length,
+		outputBytes: finalOutputBytes,
+		lastLinePartial,
+		firstLineExceedsLimit: false,
+	};
+}
+
+/**
+ * Truncate a string to fit within a byte limit (from the end).
+ * Handles multi-byte UTF-8 characters correctly.
+ */
+function truncateStringToBytesFromEnd(str: string, maxBytes: number): string {
+	const buf = Buffer.from(str, "utf-8");
+	if (buf.length <= maxBytes) {
+		return str;
+	}
+
+	// Start from the end, skip maxBytes back
+	let start = buf.length - maxBytes;
+
+	// Find a valid UTF-8 boundary (start of a character)
+	while (start < buf.length && (buf[start] & 0xc0) === 0x80) {
+		start++;
+	}
+
+	return buf.slice(start).toString("utf-8");
+}
+
+/**
+ * Truncate a single line to max characters, adding [truncated] suffix.
+ * Used for grep match lines.
+ */
+export function truncateLine(
+	line: string,
+	maxChars: number = GREP_MAX_LINE_LENGTH,
+): { text: string; wasTruncated: boolean } {
+	if (line.length <= maxChars) {
+		return { text: line, wasTruncated: false };
+	}
+	return { text: line.slice(0, maxChars) + "... [truncated]", wasTruncated: true };
+}
--- a/packages/coding-agent/src/tui/assistant-message.ts
+++ b/packages/coding-agent/src/tui/assistant-message.ts
@ -7,10 +7,13 @@ import { getMarkdownTheme, theme } from "../theme/theme.js";
 */
 export class AssistantMessageComponent extends Container {
 	private contentContainer: Container;
+	private hideThinkingBlock: boolean;

-	constructor(message?: AssistantMessage) {
+	constructor(message?: AssistantMessage, hideThinkingBlock = false) {
 		super();

+		this.hideThinkingBlock = hideThinkingBlock;
+
 		// Container for text/thinking content
 		this.contentContainer = new Container();
 		this.addChild(this.contentContainer);
@ -20,6 +23,10 @@ export class AssistantMessageComponent extends Container {
 		}
 	}

+	setHideThinkingBlock(hide: boolean): void {
+		this.hideThinkingBlock = hide;
+	}
+
 	updateContent(message: AssistantMessage): void {
 		// Clear content container
 		this.contentContainer.clear();
@ -34,21 +41,33 @@ export class AssistantMessageComponent extends Container {
 		}

 		// Render content in order
-		for (const content of message.content) {
+		for (let i = 0; i < message.content.length; i++) {
+			const content = message.content[i];
 			if (content.type === "text" && content.text.trim()) {
 				// Assistant text messages with no background - trim the text
 				// Set paddingY=0 to avoid extra spacing before tool executions
 				this.contentContainer.addChild(new Markdown(content.text.trim(), 1, 0, getMarkdownTheme()));
 			} else if (content.type === "thinking" && content.thinking.trim()) {
-				// Thinking traces in muted color, italic
-				// Use Markdown component with default text style for consistent styling
-				this.contentContainer.addChild(
-					new Markdown(content.thinking.trim(), 1, 0, getMarkdownTheme(), {
-						color: (text: string) => theme.fg("muted", text),
-						italic: true,
-					}),
-				);
-				this.contentContainer.addChild(new Spacer(1));
+				// Check if there's text content after this thinking block
+				const hasTextAfter = message.content.slice(i + 1).some((c) => c.type === "text" && c.text.trim());
+
+				if (this.hideThinkingBlock) {
+					// Show static "Thinking..." label when hidden
+					this.contentContainer.addChild(new Text(theme.fg("muted", "Thinking..."), 1, 0));
+					if (hasTextAfter) {
+						this.contentContainer.addChild(new Spacer(1));
+					}
+				} else {
+					// Thinking traces in muted color, italic
+					// Use Markdown component with default text style for consistent styling
+					this.contentContainer.addChild(
+						new Markdown(content.thinking.trim(), 1, 0, getMarkdownTheme(), {
+							color: (text: string) => theme.fg("muted", text),
+							italic: true,
+						}),
+					);
+					this.contentContainer.addChild(new Spacer(1));
+				}
 			}
 		}

--- a/packages/coding-agent/src/tui/compaction.ts
+++ b/packages/coding-agent/src/tui/compaction.ts
@ -25,10 +25,10 @@ export class CompactionComponent extends Container {

 	private updateDisplay(): void {
 		this.clear();
-		this.addChild(new Spacer(1));

 		if (this.expanded) {
 			// Show header + summary as markdown (like user message)
+			this.addChild(new Spacer(1));
 			const header = `**Context compacted from ${this.tokensBefore.toLocaleString()} tokens**\n\n`;
 			this.addChild(
 				new Markdown(header + this.summary, 1, 1, getMarkdownTheme(), {
@ -36,17 +36,17 @@ export class CompactionComponent extends Container {
 					color: (text: string) => theme.fg("userMessageText", text),
 				}),
 			);
+			this.addChild(new Spacer(1));
 		} else {
-			// Collapsed: just show the header line with user message styling
+			// Collapsed: simple text in warning color with token count
+			const tokenStr = this.tokensBefore.toLocaleString();
 			this.addChild(
 				new Text(
-					theme.fg("userMessageText", `--- Earlier messages compacted (CTRL+O to expand) ---`),
+					theme.fg("warning", `Earlier messages compacted from ${tokenStr} tokens (ctrl+o to expand)`),
 					1,
 					1,
-					(text: string) => theme.bg("userMessageBg", text),
 				),
 			);
 		}
-		this.addChild(new Spacer(1));
 	}
 }
--- a/packages/coding-agent/src/tui/custom-editor.ts
+++ b/packages/coding-agent/src/tui/custom-editor.ts
@ -9,8 +9,15 @@ export class CustomEditor extends Editor {
 	public onShiftTab?: () => void;
 	public onCtrlP?: () => void;
 	public onCtrlO?: () => void;
+	public onCtrlT?: () => void;

 	handleInput(data: string): void {
+		// Intercept Ctrl+T for thinking block visibility toggle
+		if (data === "\x14" && this.onCtrlT) {
+			this.onCtrlT();
+			return;
+		}
+
 		// Intercept Ctrl+O for tool output expansion
 		if (data === "\x0f" && this.onCtrlO) {
 			this.onCtrlO();
--- a/packages/coding-agent/src/tui/footer.ts
+++ b/packages/coding-agent/src/tui/footer.ts
@ -145,7 +145,9 @@ export class FooterComponent implements Component {
 		const formatTokens = (count: number): string => {
 			if (count < 1000) return count.toString();
 			if (count < 10000) return (count / 1000).toFixed(1) + "k";
-			return Math.round(count / 1000) + "k";
+			if (count < 1000000) return Math.round(count / 1000) + "k";
+			if (count < 10000000) return (count / 1000000).toFixed(1) + "M";
+			return Math.round(count / 1000000) + "M";
 		};

 		// Replace home directory with ~
@ -186,16 +188,17 @@ export class FooterComponent implements Component {
 		// Colorize context percentage based on usage
 		let contextPercentStr: string;
 		const autoIndicator = this.autoCompactEnabled ? " (auto)" : "";
+		const contextPercentDisplay = `${contextPercent}%/${formatTokens(contextWindow)}${autoIndicator}`;
 		if (contextPercentValue > 90) {
-			contextPercentStr = theme.fg("error", `${contextPercent}%${autoIndicator}`);
+			contextPercentStr = theme.fg("error", contextPercentDisplay);
 		} else if (contextPercentValue > 70) {
-			contextPercentStr = theme.fg("warning", `${contextPercent}%${autoIndicator}`);
+			contextPercentStr = theme.fg("warning", contextPercentDisplay);
 		} else {
-			contextPercentStr = `${contextPercent}%${autoIndicator}`;
+			contextPercentStr = contextPercentDisplay;
 		}
 		statsParts.push(contextPercentStr);

-		const statsLeft = statsParts.join(" ");
+		let statsLeft = statsParts.join(" ");

 		// Add model name on the right side, plus thinking level if model supports it
 		const modelName = this.state.model?.id || "no-model";
@ -209,9 +212,17 @@ export class FooterComponent implements Component {
 			}
 		}

-		const statsLeftWidth = visibleWidth(statsLeft);
+		let statsLeftWidth = visibleWidth(statsLeft);
 		const rightSideWidth = visibleWidth(rightSide);

+		// If statsLeft is too wide, truncate it
+		if (statsLeftWidth > width) {
+			// Truncate statsLeft to fit width (no room for right side)
+			const plainStatsLeft = statsLeft.replace(/\x1b\[[0-9;]*m/g, "");
+			statsLeft = plainStatsLeft.substring(0, width - 3) + "...";
+			statsLeftWidth = visibleWidth(statsLeft);
+		}
+
 		// Calculate available space for padding (minimum 2 spaces between stats and model)
 		const minPadding = 2;
 		const totalNeeded = statsLeftWidth + minPadding + rightSideWidth;
--- a/packages/coding-agent/src/tui/model-selector.ts
+++ b/packages/coding-agent/src/tui/model-selector.ts
@ -1,5 +1,6 @@
 import type { Model } from "@mariozechner/pi-ai";
 import { Container, Input, Spacer, Text, type TUI } from "@mariozechner/pi-tui";
+import { fuzzyFilter } from "../fuzzy.js";
 import { getAvailableModels } from "../model-config.js";
 import type { SettingsManager } from "../settings-manager.js";
 import { theme } from "../theme/theme.js";
@ -114,19 +115,7 @@ export class ModelSelectorComponent extends Container {
 	}

 	private filterModels(query: string): void {
-		if (!query.trim()) {
-			this.filteredModels = this.allModels;
-		} else {
-			const searchTokens = query
-				.toLowerCase()
-				.split(/\s+/)
-				.filter((t) => t);
-			this.filteredModels = this.allModels.filter(({ provider, id, model }) => {
-				const searchText = `${provider} ${id} ${model.name}`.toLowerCase();
-				return searchTokens.every((token) => searchText.includes(token));
-			});
-		}
-
+		this.filteredModels = fuzzyFilter(this.allModels, query, ({ provider, id }) => `${provider} ${id}`);
 		this.selectedIndex = Math.min(this.selectedIndex, Math.max(0, this.filteredModels.length - 1));
 		this.updateList();
 	}
--- a/packages/coding-agent/src/tui/session-selector.ts
+++ b/packages/coding-agent/src/tui/session-selector.ts
@ -1,4 +1,5 @@
 import { type Component, Container, Input, Spacer, Text, truncateToWidth } from "@mariozechner/pi-tui";
+import { fuzzyFilter } from "../fuzzy.js";
 import type { SessionManager } from "../session-manager.js";
 import { theme } from "../theme/theme.js";
 import { DynamicBorder } from "./dynamic-border.js";
@ -42,20 +43,7 @@ class SessionList implements Component {
 	}

 	private filterSessions(query: string): void {
-		if (!query.trim()) {
-			this.filteredSessions = this.allSessions;
-		} else {
-			const searchTokens = query
-				.toLowerCase()
-				.split(/\s+/)
-				.filter((t) => t);
-			this.filteredSessions = this.allSessions.filter((session) => {
-				// Search through all messages in the session
-				const searchText = session.allMessagesText.toLowerCase();
-				return searchTokens.every((token) => searchText.includes(token));
-			});
-		}
-
+		this.filteredSessions = fuzzyFilter(this.allSessions, query, (session) => session.allMessagesText);
 		this.selectedIndex = Math.min(this.selectedIndex, Math.max(0, this.filteredSessions.length - 1));
 	}

--- a/packages/coding-agent/src/tui/tool-execution.ts
+++ b/packages/coding-agent/src/tui/tool-execution.ts
@ -85,7 +85,17 @@ export class ToolExecutionComponent extends Container {

 		// Strip ANSI codes and carriage returns from raw output
 		// (bash may emit colors/formatting, and Windows may include \r)
-		let output = textBlocks.map((c: any) => stripAnsi(c.text || "").replace(/\r/g, "")).join("\n");
+		let output = textBlocks
+			.map((c: any) => {
+				let text = stripAnsi(c.text || "").replace(/\r/g, "");
+				// stripAnsi misses some escape sequences like standalone ESC \ (String Terminator)
+				// and leaves orphaned fragments from malformed sequences (e.g. TUI output captured to file)
+				// Clean up: remove ESC + any following char, and control chars except newline/tab
+				text = text.replace(/\x1b./g, "");
+				text = text.replace(/[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]/g, "");
+				return text;
+			})
+			.join("\n");

 		// Add indicator for images
 		if (imageBlocks.length > 0) {
@ -105,7 +115,6 @@ export class ToolExecutionComponent extends Container {
 			text = theme.fg("toolTitle", theme.bold(`$ ${command || theme.fg("toolOutput", "...")}`));

 			if (this.result) {
-				// Show output without code fences - more minimal
 				const output = this.getTextOutput().trim();
 				if (output) {
 					const lines = output.split("\n");
@ -118,17 +127,36 @@ export class ToolExecutionComponent extends Container {
 						text += theme.fg("toolOutput", `\n... (${remaining} more lines)`);
 					}
 				}
+
+				// Show truncation warning at the bottom (outside collapsed area)
+				const truncation = this.result.details?.truncation;
+				const fullOutputPath = this.result.details?.fullOutputPath;
+				if (truncation?.truncated || fullOutputPath) {
+					const warnings: string[] = [];
+					if (fullOutputPath) {
+						warnings.push(`Full output: ${fullOutputPath}`);
+					}
+					if (truncation?.truncated) {
+						if (truncation.truncatedBy === "lines") {
+							warnings.push(`Truncated: showing ${truncation.outputLines} of ${truncation.totalLines} lines`);
+						} else {
+							warnings.push(`Truncated: ${truncation.outputLines} lines shown (30KB limit)`);
+						}
+					}
+					text += "\n" + theme.fg("warning", `[${warnings.join(". ")}]`);
+				}
 			}
 		} else if (this.toolName === "read") {
 			const path = shortenPath(this.args?.file_path || this.args?.path || "");
 			const offset = this.args?.offset;
 			const limit = this.args?.limit;

-			// Build path display with offset/limit suffix
+			// Build path display with offset/limit suffix (in warning color if offset/limit used)
 			let pathDisplay = path ? theme.fg("accent", path) : theme.fg("toolOutput", "...");
-			if (offset !== undefined) {
-				const endLine = limit !== undefined ? offset + limit : "";
-				pathDisplay += theme.fg("toolOutput", `:${offset}${endLine ? `-${endLine}` : ""}`);
+			if (offset !== undefined || limit !== undefined) {
+				const startLine = offset ?? 1;
+				const endLine = limit !== undefined ? startLine + limit - 1 : "";
+				pathDisplay += theme.fg("warning", `:${startLine}${endLine ? `-${endLine}` : ""}`);
 			}

 			text = theme.fg("toolTitle", theme.bold("read")) + " " + pathDisplay;
@ -136,6 +164,7 @@ export class ToolExecutionComponent extends Container {
 			if (this.result) {
 				const output = this.getTextOutput();
 				const lines = output.split("\n");
+
 				const maxLines = this.expanded ? lines.length : 10;
 				const displayLines = lines.slice(0, maxLines);
 				const remaining = lines.length - maxLines;
@ -144,6 +173,23 @@ export class ToolExecutionComponent extends Container {
 				if (remaining > 0) {
 					text += theme.fg("toolOutput", `\n... (${remaining} more lines)`);
 				}
+
+				// Show truncation warning at the bottom (outside collapsed area)
+				const truncation = this.result.details?.truncation;
+				if (truncation?.truncated) {
+					if (truncation.firstLineExceedsLimit) {
+						text += "\n" + theme.fg("warning", `[First line exceeds 30KB limit]`);
+					} else if (truncation.truncatedBy === "lines") {
+						text +=
+							"\n" +
+							theme.fg(
+								"warning",
+								`[Truncated: showing ${truncation.outputLines} of ${truncation.totalLines} lines]`,
+							);
+					} else {
+						text += "\n" + theme.fg("warning", `[Truncated: ${truncation.outputLines} lines shown (30KB limit)]`);
+					}
+				}
 			}
 		} else if (this.toolName === "write") {
 			const path = shortenPath(this.args?.file_path || this.args?.path || "");
@ -221,6 +267,20 @@ export class ToolExecutionComponent extends Container {
 						text += theme.fg("toolOutput", `\n... (${remaining} more lines)`);
 					}
 				}
+
+				// Show truncation warning at the bottom (outside collapsed area)
+				const entryLimit = this.result.details?.entryLimitReached;
+				const truncation = this.result.details?.truncation;
+				if (entryLimit || truncation?.truncated) {
+					const warnings: string[] = [];
+					if (entryLimit) {
+						warnings.push(`${entryLimit} entries limit`);
+					}
+					if (truncation?.truncated) {
+						warnings.push("30KB limit");
+					}
+					text += "\n" + theme.fg("warning", `[Truncated: ${warnings.join(", ")}]`);
+				}
 			}
 		} else if (this.toolName === "find") {
 			const pattern = this.args?.pattern || "";
@ -249,6 +309,20 @@ export class ToolExecutionComponent extends Container {
 						text += theme.fg("toolOutput", `\n... (${remaining} more lines)`);
 					}
 				}
+
+				// Show truncation warning at the bottom (outside collapsed area)
+				const resultLimit = this.result.details?.resultLimitReached;
+				const truncation = this.result.details?.truncation;
+				if (resultLimit || truncation?.truncated) {
+					const warnings: string[] = [];
+					if (resultLimit) {
+						warnings.push(`${resultLimit} results limit`);
+					}
+					if (truncation?.truncated) {
+						warnings.push("30KB limit");
+					}
+					text += "\n" + theme.fg("warning", `[Truncated: ${warnings.join(", ")}]`);
+				}
 			}
 		} else if (this.toolName === "grep") {
 			const pattern = this.args?.pattern || "";
@ -281,6 +355,24 @@ export class ToolExecutionComponent extends Container {
 						text += theme.fg("toolOutput", `\n... (${remaining} more lines)`);
 					}
 				}
+
+				// Show truncation warning at the bottom (outside collapsed area)
+				const matchLimit = this.result.details?.matchLimitReached;
+				const truncation = this.result.details?.truncation;
+				const linesTruncated = this.result.details?.linesTruncated;
+				if (matchLimit || truncation?.truncated || linesTruncated) {
+					const warnings: string[] = [];
+					if (matchLimit) {
+						warnings.push(`${matchLimit} matches limit`);
+					}
+					if (truncation?.truncated) {
+						warnings.push("30KB limit");
+					}
+					if (linesTruncated) {
+						warnings.push("some lines truncated");
+					}
+					text += "\n" + theme.fg("warning", `[Truncated: ${warnings.join(", ")}]`);
+				}
 			}
 		} else {
 			// Generic tool
--- a/packages/coding-agent/src/tui/tui-renderer.ts
+++ b/packages/coding-agent/src/tui/tui-renderer.ts
@ -32,7 +32,7 @@ import {
 	SUMMARY_SUFFIX,
 } from "../session-manager.js";
 import type { SettingsManager } from "../settings-manager.js";
-import { getShellConfig } from "../shell-config.js";
+import { getShellConfig, killProcessTree } from "../shell.js";
 import { expandSlashCommand, type FileSlashCommand, loadSlashCommands } from "../slash-commands.js";
 import { getEditorTheme, getMarkdownTheme, onThemeChange, setTheme, theme } from "../theme/theme.js";
 import { AssistantMessageComponent } from "./assistant-message.js";
@ -43,6 +43,7 @@ import { FooterComponent } from "./footer.js";
 import { ModelSelectorComponent } from "./model-selector.js";
 import { OAuthSelectorComponent } from "./oauth-selector.js";
 import { QueueModeSelectorComponent } from "./queue-mode-selector.js";
+import { SessionSelectorComponent } from "./session-selector.js";
 import { ThemeSelectorComponent } from "./theme-selector.js";
 import { ThinkingSelectorComponent } from "./thinking-selector.js";
 import { ToolExecutionComponent } from "./tool-execution.js";
@ -69,8 +70,9 @@ export class TuiRenderer {
 	private loadingAnimation: Loader | null = null;

 	private lastSigintTime = 0;
+	private lastEscapeTime = 0;
 	private changelogMarkdown: string | null = null;
-	private newVersion: string | null = null;
+	private collapseChangelog = false;

 	// Message queueing
 	private queuedMessages: string[] = [];
@ -96,6 +98,9 @@ export class TuiRenderer {
 	// User message selector (for branching)
 	private userMessageSelector: UserMessageSelectorComponent | null = null;

+	// Session selector (for resume)
+	private sessionSelector: SessionSelectorComponent | null = null;
+
 	// OAuth selector
 	private oauthSelector: any | null = null;

@ -108,6 +113,9 @@ export class TuiRenderer {
 	// Tool output expansion state
 	private toolOutputExpanded = false;

+	// Thinking block visibility state
+	private hideThinkingBlock = false;
+
 	// Agent subscription unsubscribe function
 	private unsubscribe?: () => void;

@ -126,7 +134,7 @@ export class TuiRenderer {
 		settingsManager: SettingsManager,
 		version: string,
 		changelogMarkdown: string | null = null,
-		newVersion: string | null = null,
+		collapseChangelog = false,
 		scopedModels: Array<{ model: Model<any>; thinkingLevel: ThinkingLevel }> = [],
 		fdPath: string | null = null,
 	) {
@ -134,8 +142,8 @@ export class TuiRenderer {
 		this.sessionManager = sessionManager;
 		this.settingsManager = settingsManager;
 		this.version = version;
-		this.newVersion = newVersion;
 		this.changelogMarkdown = changelogMarkdown;
+		this.collapseChangelog = collapseChangelog;
 		this.scopedModels = scopedModels;
 		this.ui = new TUI(new ProcessTerminal());
 		this.chatContainer = new Container();
@ -218,6 +226,14 @@ export class TuiRenderer {
 			description: "Toggle automatic context compaction",
 		};

+		const resumeCommand: SlashCommand = {
+			name: "resume",
+			description: "Resume a different session",
+		};
+
+		// Load hide thinking block setting
+		this.hideThinkingBlock = settingsManager.getHideThinkingBlock();
+
 		// Load file-based slash commands
 		this.fileCommands = loadSlashCommands();

@ -244,6 +260,7 @@ export class TuiRenderer {
 				clearCommand,
 				compactCommand,
 				autocompactCommand,
+				resumeCommand,
 				...fileSlashCommands,
 			],
 			process.cwd(),
@ -279,6 +296,9 @@ export class TuiRenderer {
 			theme.fg("dim", "ctrl+o") +
 			theme.fg("muted", " to expand tools") +
 			"\n" +
+			theme.fg("dim", "ctrl+t") +
+			theme.fg("muted", " to toggle thinking") +
+			"\n" +
 			theme.fg("dim", "/") +
 			theme.fg("muted", " for commands") +
 			"\n" +
@ -294,29 +314,21 @@ export class TuiRenderer {
 		this.ui.addChild(header);
 		this.ui.addChild(new Spacer(1));

-		// Add new version notification if available
-		if (this.newVersion) {
-			this.ui.addChild(new DynamicBorder((text) => theme.fg("warning", text)));
-			this.ui.addChild(
-				new Text(
-					theme.bold(theme.fg("warning", "Update Available")) +
-						"\n" +
-						theme.fg("muted", `New version ${this.newVersion} is available. Run: `) +
-						theme.fg("accent", "npm install -g @mariozechner/pi-coding-agent"),
-					1,
-					0,
-				),
-			);
-			this.ui.addChild(new DynamicBorder((text) => theme.fg("warning", text)));
-		}
-
 		// Add changelog if provided
 		if (this.changelogMarkdown) {
 			this.ui.addChild(new DynamicBorder());
-			this.ui.addChild(new Text(theme.bold(theme.fg("accent", "What's New")), 1, 0));
-			this.ui.addChild(new Spacer(1));
-			this.ui.addChild(new Markdown(this.changelogMarkdown.trim(), 1, 0, getMarkdownTheme()));
-			this.ui.addChild(new Spacer(1));
+			if (this.collapseChangelog) {
+				// Show condensed version with hint to use /changelog
+				const versionMatch = this.changelogMarkdown.match(/##\s+\[?(\d+\.\d+\.\d+)\]?/);
+				const latestVersion = versionMatch ? versionMatch[1] : this.version;
+				const condensedText = `Updated to v${latestVersion}. Use ${theme.bold("/changelog")} to view full changelog.`;
+				this.ui.addChild(new Text(condensedText, 1, 0));
+			} else {
+				this.ui.addChild(new Text(theme.bold(theme.fg("accent", "What's New")), 1, 0));
+				this.ui.addChild(new Spacer(1));
+				this.ui.addChild(new Markdown(this.changelogMarkdown.trim(), 1, 0, getMarkdownTheme()));
+				this.ui.addChild(new Spacer(1));
+			}
 			this.ui.addChild(new DynamicBorder());
 		}

@ -364,6 +376,15 @@ export class TuiRenderer {
 				this.editor.setText("");
 				this.isBashMode = false;
 				this.updateEditorBorderColor();
+			} else if (!this.editor.getText().trim()) {
+				// Double-escape with empty editor triggers /branch
+				const now = Date.now();
+				if (now - this.lastEscapeTime < 500) {
+					this.showUserMessageSelector();
+					this.lastEscapeTime = 0; // Reset to prevent triple-escape
+				} else {
+					this.lastEscapeTime = now;
+				}
 			}
 		};

@ -383,6 +404,10 @@ export class TuiRenderer {
 			this.toggleToolOutputExpansion();
 		};

+		this.editor.onCtrlT = () => {
+			this.toggleThinkingBlockVisibility();
+		};
+
 		// Handle editor text changes for bash mode detection
 		this.editor.onChange = (text: string) => {
 			const wasBashMode = this.isBashMode;
@ -505,6 +530,13 @@ export class TuiRenderer {
 				return;
 			}

+			// Check for /resume command
+			if (text === "/resume") {
+				this.showSessionSelector();
+				this.editor.setText("");
+				return;
+			}
+
 			// Check for bash command (!<command>)
 			if (text.startsWith("!")) {
 				const command = text.slice(1).trim();
@ -559,6 +591,9 @@ export class TuiRenderer {
 				// Update pending messages display
 				this.updatePendingMessagesDisplay();

+				// Add to history for up/down arrow navigation
+				this.editor.addToHistory(text);
+
 				// Clear editor
 				this.editor.setText("");
 				this.ui.requestRender();
@ -569,6 +604,9 @@ export class TuiRenderer {
 			if (this.onInputCallback) {
 				this.onInputCallback(text);
 			}
+
+			// Add to history for up/down arrow navigation
+			this.editor.addToHistory(text);
 		};

 		// Start the UI
@ -691,7 +729,7 @@ export class TuiRenderer {
 					this.ui.requestRender();
 				} else if (event.message.role === "assistant") {
 					// Create assistant component for streaming
-					this.streamingComponent = new AssistantMessageComponent();
+					this.streamingComponent = new AssistantMessageComponent(undefined, this.hideThinkingBlock);
 					this.chatContainer.addChild(this.streamingComponent);
 					this.streamingComponent.updateContent(event.message as AssistantMessage);
 					this.ui.requestRender();
@ -831,7 +869,7 @@ export class TuiRenderer {
 			const assistantMsg = message;

 			// Add assistant message component
-			const assistantComponent = new AssistantMessageComponent(assistantMsg);
+			const assistantComponent = new AssistantMessageComponent(assistantMsg, this.hideThinkingBlock);
 			this.chatContainer.addChild(assistantComponent);
 		}
 		// Note: tool calls and results are now handled via tool_execution_start/end events
@ -877,7 +915,7 @@ export class TuiRenderer {
 				}
 			} else if (message.role === "assistant") {
 				const assistantMsg = message as AssistantMessage;
-				const assistantComponent = new AssistantMessageComponent(assistantMsg);
+				const assistantComponent = new AssistantMessageComponent(assistantMsg, this.hideThinkingBlock);
 				this.chatContainer.addChild(assistantComponent);

 				// Create tool execution components for any tool calls
@ -918,6 +956,22 @@ export class TuiRenderer {
 		}
 		// Clear pending tools after rendering initial messages
 		this.pendingTools.clear();
+
+		// Populate editor history with user messages from the session (oldest first so newest is at index 0)
+		for (const message of state.messages) {
+			if (message.role === "user") {
+				const textBlocks =
+					typeof message.content === "string"
+						? [{ type: "text", text: message.content }]
+						: message.content.filter((c) => c.type === "text");
+				const textContent = textBlocks.map((c) => c.text).join("");
+				// Skip compaction summary messages
+				if (textContent && !textContent.startsWith(SUMMARY_PREFIX)) {
+					this.editor.addToHistory(textContent);
+				}
+			}
+		}
+
 		this.ui.requestRender();
 	}

@ -961,7 +1015,7 @@ export class TuiRenderer {
 				}
 			} else if (message.role === "assistant") {
 				const assistantMsg = message;
-				const assistantComponent = new AssistantMessageComponent(assistantMsg);
+				const assistantComponent = new AssistantMessageComponent(assistantMsg, this.hideThinkingBlock);
 				this.chatContainer.addChild(assistantComponent);

 				for (const content of assistantMsg.content) {
@ -1023,7 +1077,12 @@ export class TuiRenderer {
 			return;
 		}

-		const levels: ThinkingLevel[] = ["off", "minimal", "low", "medium", "high"];
+		// xhigh is only available for codex-max models
+		const modelId = this.agent.state.model?.id || "";
+		const supportsXhigh = modelId.includes("codex-max");
+		const levels: ThinkingLevel[] = supportsXhigh
+			? ["off", "minimal", "low", "medium", "high", "xhigh"]
+			: ["off", "minimal", "low", "medium", "high"];
 		const currentLevel = this.agent.state.thinkingLevel || "off";
 		const currentIndex = levels.indexOf(currentLevel);
 		const nextIndex = (currentIndex + 1) % levels.length;
@ -1168,6 +1227,28 @@ export class TuiRenderer {
 		this.ui.requestRender();
 	}

+	private toggleThinkingBlockVisibility(): void {
+		this.hideThinkingBlock = !this.hideThinkingBlock;
+		this.settingsManager.setHideThinkingBlock(this.hideThinkingBlock);
+
+		// Update all assistant message components and rebuild their content
+		for (const child of this.chatContainer.children) {
+			if (child instanceof AssistantMessageComponent) {
+				child.setHideThinkingBlock(this.hideThinkingBlock);
+			}
+		}
+
+		// Rebuild chat to apply visibility change
+		this.chatContainer.clear();
+		this.rebuildChatFromMessages();
+
+		// Show brief notification
+		const status = this.hideThinkingBlock ? "hidden" : "visible";
+		this.chatContainer.addChild(new Spacer(1));
+		this.chatContainer.addChild(new Text(theme.fg("dim", `Thinking blocks: ${status}`), 1, 0));
+		this.ui.requestRender();
+	}
+
 	clearEditor(): void {
 		this.editor.setText("");
 		this.ui.requestRender();
@ -1187,12 +1268,21 @@ export class TuiRenderer {
 		this.ui.requestRender();
 	}

-	private showSuccess(message: string, detail?: string): void {
+	showNewVersionNotification(newVersion: string): void {
+		// Show new version notification in the chat
 		this.chatContainer.addChild(new Spacer(1));
-		const text = detail
-			? `${theme.fg("success", message)}\n${theme.fg("muted", detail)}`
-			: theme.fg("success", message);
-		this.chatContainer.addChild(new Text(text, 1, 1));
+		this.chatContainer.addChild(new DynamicBorder((text) => theme.fg("warning", text)));
+		this.chatContainer.addChild(
+			new Text(
+				theme.bold(theme.fg("warning", "Update Available")) +
+					"\n" +
+					theme.fg("muted", `New version ${newVersion} is available. Run: `) +
+					theme.fg("accent", "npm install -g @mariozechner/pi-coding-agent"),
+				1,
+				0,
+			),
+		);
+		this.chatContainer.addChild(new DynamicBorder((text) => theme.fg("warning", text)));
 		this.ui.requestRender();
 	}

@ -1489,6 +1579,95 @@ export class TuiRenderer {
 		this.ui.setFocus(this.editor);
 	}

+	private showSessionSelector(): void {
+		// Create session selector
+		this.sessionSelector = new SessionSelectorComponent(
+			this.sessionManager,
+			async (sessionPath) => {
+				this.hideSessionSelector();
+				await this.handleResumeSession(sessionPath);
+			},
+			() => {
+				// Just hide the selector
+				this.hideSessionSelector();
+				this.ui.requestRender();
+			},
+		);
+
+		// Replace editor with selector
+		this.editorContainer.clear();
+		this.editorContainer.addChild(this.sessionSelector);
+		this.ui.setFocus(this.sessionSelector.getSessionList());
+		this.ui.requestRender();
+	}
+
+	private async handleResumeSession(sessionPath: string): Promise<void> {
+		// Unsubscribe first to prevent processing events during transition
+		this.unsubscribe?.();
+
+		// Abort and wait for completion
+		this.agent.abort();
+		await this.agent.waitForIdle();
+
+		// Stop loading animation
+		if (this.loadingAnimation) {
+			this.loadingAnimation.stop();
+			this.loadingAnimation = null;
+		}
+		this.statusContainer.clear();
+
+		// Clear UI state
+		this.queuedMessages = [];
+		this.pendingMessagesContainer.clear();
+		this.streamingComponent = null;
+		this.pendingTools.clear();
+
+		// Set the selected session as active
+		this.sessionManager.setSessionFile(sessionPath);
+
+		// Reload the session
+		const loaded = loadSessionFromEntries(this.sessionManager.loadEntries());
+		this.agent.replaceMessages(loaded.messages);
+
+		// Restore model if saved in session
+		const savedModel = this.sessionManager.loadModel();
+		if (savedModel) {
+			const availableModels = (await getAvailableModels()).models;
+			const match = availableModels.find((m) => m.provider === savedModel.provider && m.id === savedModel.modelId);
+			if (match) {
+				this.agent.setModel(match);
+			}
+		}
+
+		// Restore thinking level if saved in session
+		const savedThinking = this.sessionManager.loadThinkingLevel();
+		if (savedThinking) {
+			this.agent.setThinkingLevel(savedThinking as ThinkingLevel);
+		}
+
+		// Resubscribe to agent
+		this.subscribeToAgent();
+
+		// Clear and re-render the chat
+		this.chatContainer.clear();
+		this.isFirstUserMessage = true;
+		this.renderInitialMessages(this.agent.state);
+
+		// Show confirmation message
+		this.chatContainer.addChild(new Spacer(1));
+		this.chatContainer.addChild(new Text(theme.fg("dim", "Resumed session"), 1, 0));
+
+		this.ui.requestRender();
+	}
+
+	private hideSessionSelector(): void {
+		// Replace selector with editor in the container
+		this.editorContainer.clear();
+		this.editorContainer.addChild(this.editor);
+		this.sessionSelector = null;
+		this.ui.setFocus(this.editor);
+	}
+
 	private async showOAuthSelector(mode: "login" | "logout"): Promise<void> {
 		// For logout mode, filter to only show logged-in providers
 		let providersToShow: string[] = [];
@ -2036,10 +2215,6 @@ export class TuiRenderer {

 			// Update footer with new state (fixes context % display)
 			this.footer.updateState(this.agent.state);
-
-			// Show success message
-			const successTitle = isAuto ? "✓ Context auto-compacted" : "✓ Context compacted";
-			this.showSuccess(successTitle, `Reduced from ${compactionEntry.tokensBefore.toLocaleString()} tokens`);
 		} catch (error) {
 			const message = error instanceof Error ? error.message : String(error);
 			if (message === "Compaction cancelled" || (error instanceof Error && error.name === "AbortError")) {
@ -2109,32 +2284,3 @@ export class TuiRenderer {
 		}
 	}
 }
-
-/**
- * Kill a process and all its children (cross-platform)
- */
-function killProcessTree(pid: number): void {
-	if (process.platform === "win32") {
-		// Use taskkill on Windows to kill process tree
-		try {
-			spawn("taskkill", ["/F", "/T", "/PID", String(pid)], {
-				stdio: "ignore",
-				detached: true,
-			});
-		} catch {
-			// Ignore errors if taskkill fails
-		}
-	} else {
-		// Use SIGKILL on Unix/Linux/Mac
-		try {
-			process.kill(-pid, "SIGKILL");
-		} catch {
-			// Fallback to killing just the child if process group kill fails
-			try {
-				process.kill(pid, "SIGKILL");
-			} catch {
-				// Process already dead
-			}
-		}
-	}
-}
--- a/packages/coding-agent/src/tui/user-message-selector.ts
+++ b/packages/coding-agent/src/tui/user-message-selector.ts
@ -1,4 +1,4 @@
-import { type Component, Container, Spacer, Text } from "@mariozechner/pi-tui";
+import { type Component, Container, Spacer, Text, truncateToWidth } from "@mariozechner/pi-tui";
 import { theme } from "../theme/theme.js";
 import { DynamicBorder } from "./dynamic-border.js";

@ -54,8 +54,8 @@ class UserMessageList implements Component {

 			// First line: cursor + message
 			const cursor = isSelected ? theme.fg("accent", "› ") : "  ";
-			const maxMsgWidth = width - 2; // Account for cursor
-			const truncatedMsg = normalizedMessage.substring(0, maxMsgWidth);
+			const maxMsgWidth = width - 2; // Account for cursor (2 chars)
+			const truncatedMsg = truncateToWidth(normalizedMessage, maxMsgWidth);
 			const messageLine = cursor + (isSelected ? theme.bold(truncatedMsg) : truncatedMsg);

 			lines.push(messageLine);
--- a/packages/coding-agent/test/compaction.test.ts
+++ b/packages/coding-agent/test/compaction.test.ts
@ -38,6 +38,7 @@ function createMockUsage(input: number, output: number, cacheRead = 0, cacheWrit
 		output,
 		cacheRead,
 		cacheWrite,
+		totalTokens: input + output + cacheRead + cacheWrite,
 		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 	};
 }
--- a/packages/coding-agent/test/tools.test.ts
+++ b/packages/coding-agent/test/tools.test.ts
@ -43,7 +43,8 @@ describe("Coding Agent Tools", () => {
 			const result = await readTool.execute("test-call-1", { path: testFile });

 			expect(getTextOutput(result)).toBe(content);
-			expect(getTextOutput(result)).not.toContain("more lines not shown");
+			// No truncation message since file fits within limits
+			expect(getTextOutput(result)).not.toContain("Use offset=");
 			expect(result.details).toBeUndefined();
 		});

@ -64,23 +65,21 @@ describe("Coding Agent Tools", () => {
 			expect(output).toContain("Line 1");
 			expect(output).toContain("Line 2000");
 			expect(output).not.toContain("Line 2001");
-			expect(output).toContain("500 more lines not shown");
-			expect(output).toContain("Use offset=2001 to continue reading");
+			expect(output).toContain("[Showing lines 1-2000 of 2500. Use offset=2001 to continue]");
 		});

-		it("should truncate long lines and show notice", async () => {
-			const testFile = join(testDir, "long-lines.txt");
-			const longLine = "a".repeat(3000);
-			const content = `Short line\n${longLine}\nAnother short line`;
-			writeFileSync(testFile, content);
+		it("should truncate when byte limit exceeded", async () => {
+			const testFile = join(testDir, "large-bytes.txt");
+			// Create file that exceeds 50KB byte limit but has fewer than 2000 lines
+			const lines = Array.from({ length: 500 }, (_, i) => `Line ${i + 1}: ${"x".repeat(200)}`);
+			writeFileSync(testFile, lines.join("\n"));

 			const result = await readTool.execute("test-call-4", { path: testFile });
 			const output = getTextOutput(result);

-			expect(output).toContain("Short line");
-			expect(output).toContain("Another short line");
-			expect(output).toContain("Some lines were truncated to 2000 characters");
-			expect(output.split("\n")[1].length).toBe(2000);
+			expect(output).toContain("Line 1:");
+			// Should show byte limit message
+			expect(output).toMatch(/\[Showing lines 1-\d+ of 500 \(.* limit\)\. Use offset=\d+ to continue\]/);
 		});

 		it("should handle offset parameter", async () => {
@ -94,7 +93,8 @@ describe("Coding Agent Tools", () => {
 			expect(output).not.toContain("Line 50");
 			expect(output).toContain("Line 51");
 			expect(output).toContain("Line 100");
-			expect(output).not.toContain("more lines not shown");
+			// No truncation message since file fits within limits
+			expect(output).not.toContain("Use offset=");
 		});

 		it("should handle limit parameter", async () => {
@ -108,8 +108,7 @@ describe("Coding Agent Tools", () => {
 			expect(output).toContain("Line 1");
 			expect(output).toContain("Line 10");
 			expect(output).not.toContain("Line 11");
-			expect(output).toContain("90 more lines not shown");
-			expect(output).toContain("Use offset=11 to continue reading");
+			expect(output).toContain("[90 more lines in file. Use offset=11 to continue]");
 		});

 		it("should handle offset + limit together", async () => {
@ -128,8 +127,7 @@ describe("Coding Agent Tools", () => {
 			expect(output).toContain("Line 41");
 			expect(output).toContain("Line 60");
 			expect(output).not.toContain("Line 61");
-			expect(output).toContain("40 more lines not shown");
-			expect(output).toContain("Use offset=61 to continue reading");
+			expect(output).toContain("[40 more lines in file. Use offset=61 to continue]");
 		});

 		it("should show error when offset is beyond file length", async () => {
@ -141,17 +139,19 @@ describe("Coding Agent Tools", () => {
 			);
 		});

-		it("should show both truncation notices when applicable", async () => {
-			const testFile = join(testDir, "both-truncations.txt");
-			const longLine = "b".repeat(3000);
-			const lines = Array.from({ length: 2500 }, (_, i) => (i === 500 ? longLine : `Line ${i + 1}`));
+		it("should include truncation details when truncated", async () => {
+			const testFile = join(testDir, "large-file.txt");
+			const lines = Array.from({ length: 2500 }, (_, i) => `Line ${i + 1}`);
 			writeFileSync(testFile, lines.join("\n"));

 			const result = await readTool.execute("test-call-9", { path: testFile });
-			const output = getTextOutput(result);

-			expect(output).toContain("Some lines were truncated to 2000 characters");
-			expect(output).toContain("500 more lines not shown");
+			expect(result.details).toBeDefined();
+			expect(result.details?.truncation).toBeDefined();
+			expect(result.details?.truncation?.truncated).toBe(true);
+			expect(result.details?.truncation?.truncatedBy).toBe("lines");
+			expect(result.details?.truncation?.totalLines).toBe(2500);
+			expect(result.details?.truncation?.outputLines).toBe(2000);
 		});
 	});

@ -276,7 +276,7 @@ describe("Coding Agent Tools", () => {
 			expect(output).toContain("context.txt-1- before");
 			expect(output).toContain("context.txt:2: match one");
 			expect(output).toContain("context.txt-3- after");
-			expect(output).toContain("(truncated, limit of 1 matches reached)");
+			expect(output).toContain("[1 matches limit reached. Use limit=2 for more, or refine pattern]");
 			// Ensure second match is not present
 			expect(output).not.toContain("match two");
 		});
--- a/packages/coding-agent/test/truncate-to-width.test.ts
+++ b/packages/coding-agent/test/truncate-to-width.test.ts
@ -0,0 +1,81 @@
+import { truncateToWidth, visibleWidth } from "@mariozechner/pi-tui";
+import { describe, expect, it } from "vitest";
+
+/**
+ * Tests for truncateToWidth behavior with Unicode characters.
+ *
+ * These tests verify that truncateToWidth properly handles text with
+ * Unicode characters that have different byte vs display widths.
+ */
+describe("truncateToWidth", () => {
+	it("should truncate messages with Unicode characters correctly", () => {
+		// This message contains a checkmark (✔) which may have display width > 1 byte
+		const message = '✔ script to run › dev $ concurrently "vite" "node --import tsx ./';
+		const width = 67;
+		const maxMsgWidth = width - 2; // Account for cursor
+
+		const truncated = truncateToWidth(message, maxMsgWidth);
+		const truncatedWidth = visibleWidth(truncated);
+
+		expect(truncatedWidth).toBeLessThanOrEqual(maxMsgWidth);
+	});
+
+	it("should handle emoji characters", () => {
+		const message = "🎉 Celebration! 🚀 Launch 📦 Package ready for deployment now";
+		const width = 40;
+		const maxMsgWidth = width - 2;
+
+		const truncated = truncateToWidth(message, maxMsgWidth);
+		const truncatedWidth = visibleWidth(truncated);
+
+		expect(truncatedWidth).toBeLessThanOrEqual(maxMsgWidth);
+	});
+
+	it("should handle mixed ASCII and wide characters", () => {
+		const message = "Hello 世界 Test 你好 More text here that is long";
+		const width = 30;
+		const maxMsgWidth = width - 2;
+
+		const truncated = truncateToWidth(message, maxMsgWidth);
+		const truncatedWidth = visibleWidth(truncated);
+
+		expect(truncatedWidth).toBeLessThanOrEqual(maxMsgWidth);
+	});
+
+	it("should not truncate messages that fit", () => {
+		const message = "Short message";
+		const width = 50;
+		const maxMsgWidth = width - 2;
+
+		const truncated = truncateToWidth(message, maxMsgWidth);
+
+		expect(truncated).toBe(message);
+		expect(visibleWidth(truncated)).toBeLessThanOrEqual(maxMsgWidth);
+	});
+
+	it("should add ellipsis when truncating", () => {
+		const message = "This is a very long message that needs to be truncated";
+		const width = 30;
+		const maxMsgWidth = width - 2;
+
+		const truncated = truncateToWidth(message, maxMsgWidth);
+
+		expect(truncated).toContain("...");
+		expect(visibleWidth(truncated)).toBeLessThanOrEqual(maxMsgWidth);
+	});
+
+	it("should handle the exact crash case from issue report", () => {
+		// Terminal width was 67, line had visible width 68
+		// The problematic text contained "✔" and "›" characters
+		const message = '✔ script to run › dev $ concurrently "vite" "node --import tsx ./server.ts"';
+		const terminalWidth = 67;
+		const cursorWidth = 2; // "› " or "  "
+		const maxMsgWidth = terminalWidth - cursorWidth;
+
+		const truncated = truncateToWidth(message, maxMsgWidth);
+		const finalWidth = visibleWidth(truncated);
+
+		// The final line (cursor + message) must not exceed terminal width
+		expect(finalWidth + cursorWidth).toBeLessThanOrEqual(terminalWidth);
+	});
+});
--- a/packages/mom/CHANGELOG.md
+++ b/packages/mom/CHANGELOG.md
@ -4,6 +4,12 @@

 ### Fixed

+- Slack API errors (msg_too_long) no longer crash the process
+  - Added try/catch error handling to all Slack API calls in the message queue
+  - Main channel messages truncated at 35K with note to ask for elaboration
+  - Thread messages truncated at 20K
+  - replaceMessage also truncated at 35K
+
 - Private channel messages not being logged
  - Added `message.groups` to required bot events in README
  - Added `groups:history` and `groups:read` to required scopes in README
--- a/packages/mom/out.html
+++ b/packages/mom/out.html
--- a/packages/mom/package.json
+++ b/packages/mom/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-mom",
-	"version": "0.12.9",
+	"version": "0.13.2",
 	"description": "Slack bot that delegates messages to the pi coding agent",
 	"type": "module",
 	"bin": {
@ -21,8 +21,8 @@
 	},
 	"dependencies": {
 		"@anthropic-ai/sandbox-runtime": "^0.0.16",
-		"@mariozechner/pi-agent-core": "^0.12.9",
-		"@mariozechner/pi-ai": "^0.12.9",
+		"@mariozechner/pi-agent-core": "^0.13.2",
+		"@mariozechner/pi-ai": "^0.13.2",
 		"@sinclair/typebox": "^0.34.0",
 		"@slack/socket-mode": "^2.0.0",
 		"@slack/web-api": "^7.0.0",
--- a/packages/mom/src/agent.ts
+++ b/packages/mom/src/agent.ts
@ -127,7 +127,11 @@ function getRecentMessages(channelDir: string, turnCount: number): string {
 		for (const msg of turn) {
 			const date = (msg.date || "").substring(0, 19);
 			const user = msg.userName || msg.user || "";
-			const text = msg.text || "";
+			let text = msg.text || "";
+			// Truncate bot messages (tool results can be huge)
+			if (msg.isBot) {
+				text = truncateForContext(text, 50000, 2000, msg.ts);
+			}
 			const attachments = (msg.attachments || []).map((a) => a.local).join(",");
 			formatted.push(`${date}\t${user}\t${text}\t${attachments}`);
 		}
@ -136,6 +140,43 @@ function getRecentMessages(channelDir: string, turnCount: number): string {
 	return formatted.join("\n");
 }

+/**
+ * Truncate text to maxChars or maxLines, whichever comes first.
+ * Adds a note with stats and instructions if truncation occurred.
+ */
+function truncateForContext(text: string, maxChars: number, maxLines: number, ts?: string): string {
+	const lines = text.split("\n");
+	const originalLines = lines.length;
+	const originalChars = text.length;
+	let truncated = false;
+	let result = text;
+
+	// Check line limit first
+	if (lines.length > maxLines) {
+		result = lines.slice(0, maxLines).join("\n");
+		truncated = true;
+	}
+
+	// Check char limit
+	if (result.length > maxChars) {
+		result = result.substring(0, maxChars);
+		truncated = true;
+	}
+
+	if (truncated) {
+		const remainingLines = originalLines - result.split("\n").length;
+		const remainingChars = originalChars - result.length;
+		result += `\n[... truncated ${remainingLines} more lines, ${remainingChars} more chars. `;
+		if (ts) {
+			result += `To get full content: jq -r 'select(.ts=="${ts}") | .text' log.jsonl > /tmp/msg.txt, then read /tmp/msg.txt in segments]`;
+		} else {
+			result += `Search log.jsonl for full content]`;
+		}
+	}
+
+	return result;
+}
+
 function getMemory(channelDir: string): string {
 	const parts: string[] = [];

@ -249,6 +290,15 @@ Update when you learn something important or when asked to remember something.
 ### Current Memory
 ${memory}

+## System Configuration Log
+Maintain ${workspacePath}/SYSTEM.md to log all environment modifications:
+- Installed packages (apk add, npm install, pip install)
+- Environment variables set
+- Config files modified (~/.gitconfig, cron jobs, etc.)
+- Skill dependencies installed
+
+Update this file whenever you modify the environment. On fresh container, read it first to restore your setup.
+
 ## Log Queries (CRITICAL: limit output to avoid context overflow)
 Format: \`{"date":"...","ts":"...","user":"...","userName":"...","text":"...","isBot":false}\`
 The log contains user messages AND your tool calls/results. Filter appropriately.
@ -536,7 +586,7 @@ export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
 							date: new Date().toISOString(),
 							ts: toSlackTs(),
 							user: "bot",
-							text: `[Tool Result] ${event.toolName}: ${event.isError ? "ERROR: " : ""}${truncate(resultStr, 1000)}`,
+							text: `[Tool Result] ${event.toolName}: ${event.isError ? "ERROR: " : ""}${resultStr}`,
 							attachments: [],
 							isBot: true,
 						});
--- a/packages/mom/src/slack.ts
+++ b/packages/mom/src/slack.ts
@ -24,7 +24,7 @@ export interface SlackContext {
 	/** All known users in the workspace */
 	users: UserInfo[];
 	/** Send/update the main message (accumulates text). Set log=false to skip logging. */
-	respond(text: string, log?: boolean): Promise<void>;
+	respond(text: string, shouldLog?: boolean): Promise<void>;
 	/** Replace the entire message text (not append) */
 	replaceMessage(text: string): Promise<void>;
 	/** Post a message in the thread under the main message (for verbose details) */
@ -352,40 +352,52 @@ export class MomBot {
 			store: this.store,
 			channels: this.getChannels(),
 			users: this.getUsers(),
-			respond: async (responseText: string, log = true) => {
+			respond: async (responseText: string, shouldLog = true) => {
 				// Queue updates to avoid race conditions
 				updatePromise = updatePromise.then(async () => {
-					if (isThinking) {
-						// First real response replaces "Thinking..."
-						accumulatedText = responseText;
-						isThinking = false;
-					} else {
-						// Subsequent responses get appended
-						accumulatedText += "\n" + responseText;
-					}
+					try {
+						if (isThinking) {
+							// First real response replaces "Thinking..."
+							accumulatedText = responseText;
+							isThinking = false;
+						} else {
+							// Subsequent responses get appended
+							accumulatedText += "\n" + responseText;
+						}

-					// Add working indicator if still working
-					const displayText = isWorking ? accumulatedText + workingIndicator : accumulatedText;
+						// Truncate accumulated text if too long (Slack limit is 40K, we use 35K for safety)
+						const MAX_MAIN_LENGTH = 35000;
+						const truncationNote = "\n\n_(message truncated, ask me to elaborate on specific parts)_";
+						if (accumulatedText.length > MAX_MAIN_LENGTH) {
+							accumulatedText =
+								accumulatedText.substring(0, MAX_MAIN_LENGTH - truncationNote.length) + truncationNote;
+						}

-					if (messageTs) {
-						// Update existing message
-						await this.webClient.chat.update({
-							channel: event.channel,
-							ts: messageTs,
-							text: displayText,
-						});
-					} else {
-						// Post initial message
-						const result = await this.webClient.chat.postMessage({
-							channel: event.channel,
-							text: displayText,
-						});
-						messageTs = result.ts as string;
-					}
+						// Add working indicator if still working
+						const displayText = isWorking ? accumulatedText + workingIndicator : accumulatedText;

-					// Log the response if requested
-					if (log) {
-						await this.store.logBotResponse(event.channel, responseText, messageTs!);
+						if (messageTs) {
+							// Update existing message
+							await this.webClient.chat.update({
+								channel: event.channel,
+								ts: messageTs,
+								text: displayText,
+							});
+						} else {
+							// Post initial message
+							const result = await this.webClient.chat.postMessage({
+								channel: event.channel,
+								text: displayText,
+							});
+							messageTs = result.ts as string;
+						}
+
+						// Log the response if requested
+						if (shouldLog) {
+							await this.store.logBotResponse(event.channel, responseText, messageTs!);
+						}
+					} catch (err) {
+						log.logWarning("Slack respond error", err instanceof Error ? err.message : String(err));
 					}
 				});

@ -394,18 +406,29 @@ export class MomBot {
 			respondInThread: async (threadText: string) => {
 				// Queue thread posts to maintain order
 				updatePromise = updatePromise.then(async () => {
-					if (!messageTs) {
-						// No main message yet, just skip
-						return;
+					try {
+						if (!messageTs) {
+							// No main message yet, just skip
+							return;
+						}
+						// Obfuscate usernames to avoid pinging people in thread details
+						let obfuscatedText = this.obfuscateUsernames(threadText);
+
+						// Truncate thread messages if too long (20K limit for safety)
+						const MAX_THREAD_LENGTH = 20000;
+						if (obfuscatedText.length > MAX_THREAD_LENGTH) {
+							obfuscatedText = obfuscatedText.substring(0, MAX_THREAD_LENGTH - 50) + "\n\n_(truncated)_";
+						}
+
+						// Post in thread under the main message
+						await this.webClient.chat.postMessage({
+							channel: event.channel,
+							thread_ts: messageTs,
+							text: obfuscatedText,
+						});
+					} catch (err) {
+						log.logWarning("Slack respondInThread error", err instanceof Error ? err.message : String(err));
 					}
-					// Obfuscate usernames to avoid pinging people in thread details
-					const obfuscatedText = this.obfuscateUsernames(threadText);
-					// Post in thread under the main message
-					await this.webClient.chat.postMessage({
-						channel: event.channel,
-						thread_ts: messageTs,
-						text: obfuscatedText,
-					});
 				});
 				await updatePromise;
 			},
@ -434,40 +457,54 @@ export class MomBot {
 			},
 			replaceMessage: async (text: string) => {
 				updatePromise = updatePromise.then(async () => {
-					// Replace the accumulated text entirely
-					accumulatedText = text;
+					try {
+						// Replace the accumulated text entirely, with truncation
+						const MAX_MAIN_LENGTH = 35000;
+						const truncationNote = "\n\n_(message truncated, ask me to elaborate on specific parts)_";
+						if (text.length > MAX_MAIN_LENGTH) {
+							accumulatedText = text.substring(0, MAX_MAIN_LENGTH - truncationNote.length) + truncationNote;
+						} else {
+							accumulatedText = text;
+						}

-					const displayText = isWorking ? accumulatedText + workingIndicator : accumulatedText;
+						const displayText = isWorking ? accumulatedText + workingIndicator : accumulatedText;

-					if (messageTs) {
-						await this.webClient.chat.update({
-							channel: event.channel,
-							ts: messageTs,
-							text: displayText,
-						});
-					} else {
-						// Post initial message
-						const result = await this.webClient.chat.postMessage({
-							channel: event.channel,
-							text: displayText,
-						});
-						messageTs = result.ts as string;
+						if (messageTs) {
+							await this.webClient.chat.update({
+								channel: event.channel,
+								ts: messageTs,
+								text: displayText,
+							});
+						} else {
+							// Post initial message
+							const result = await this.webClient.chat.postMessage({
+								channel: event.channel,
+								text: displayText,
+							});
+							messageTs = result.ts as string;
+						}
+					} catch (err) {
+						log.logWarning("Slack replaceMessage error", err instanceof Error ? err.message : String(err));
 					}
 				});
 				await updatePromise;
 			},
 			setWorking: async (working: boolean) => {
 				updatePromise = updatePromise.then(async () => {
-					isWorking = working;
+					try {
+						isWorking = working;

-					// If we have a message, update it to add/remove indicator
-					if (messageTs) {
-						const displayText = isWorking ? accumulatedText + workingIndicator : accumulatedText;
-						await this.webClient.chat.update({
-							channel: event.channel,
-							ts: messageTs,
-							text: displayText,
-						});
+						// If we have a message, update it to add/remove indicator
+						if (messageTs) {
+							const displayText = isWorking ? accumulatedText + workingIndicator : accumulatedText;
+							await this.webClient.chat.update({
+								channel: event.channel,
+								ts: messageTs,
+								text: displayText,
+							});
+						}
+					} catch (err) {
+						log.logWarning("Slack setWorking error", err instanceof Error ? err.message : String(err));
 					}
 				});
 				await updatePromise;
--- a/packages/mom/src/tools/attach.ts
+++ b/packages/mom/src/tools/attach.ts
@ -18,7 +18,8 @@ const attachSchema = Type.Object({
 export const attachTool: AgentTool<typeof attachSchema> = {
 	name: "attach",
 	label: "attach",
-	description: "Attach a file to your response. Use this to share files, images, or documents with the user.",
+	description:
+		"Attach a file to your response. Use this to share files, images, or documents with the user. Only files from /workspace/ can be attached.",
 	parameters: attachSchema,
 	execute: async (
 		_toolCallId: string,
--- a/packages/pods/package.json
+++ b/packages/pods/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi",
-	"version": "0.12.9",
+	"version": "0.13.2",
 	"description": "CLI tool for managing vLLM deployments on GPU pods",
 	"type": "module",
 	"bin": {
@ -34,7 +34,7 @@
 		"node": ">=20.0.0"
 	},
 	"dependencies": {
-		"@mariozechner/pi-agent-core": "^0.12.9",
+		"@mariozechner/pi-agent-core": "^0.13.2",
 		"chalk": "^5.5.0"
 	},
 	"devDependencies": {}
--- a/packages/proxy/package.json
+++ b/packages/proxy/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-proxy",
-	"version": "0.12.9",
+	"version": "0.13.2",
 	"type": "module",
 	"description": "CORS and authentication proxy for pi-ai",
 	"main": "dist/index.js",
--- a/packages/tui/README.md
+++ b/packages/tui/README.md
@ -93,6 +93,14 @@ input.onSubmit = (value) => console.log(value);
 input.setValue("initial");
 ```

+**Key Bindings:**
+- `Enter` - Submit
+- `Ctrl+A` / `Ctrl+E` - Line start/end
+- `Ctrl+W` or `Option+Backspace` - Delete word backwards
+- `Ctrl+U` - Delete to start of line
+- `Ctrl+K` - Delete to end of line
+- Arrow keys, Backspace, Delete work as expected
+
 ### Editor

 Multi-line text editor with autocomplete, file completion, and paste handling.
--- a/packages/tui/package.json
+++ b/packages/tui/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-tui",
-	"version": "0.12.9",
+	"version": "0.13.2",
 	"description": "Terminal User Interface library with differential rendering for efficient text-based applications",
 	"type": "module",
 	"main": "dist/index.js",
--- a/packages/tui/src/components/editor.ts
+++ b/packages/tui/src/components/editor.ts
@ -1,7 +1,11 @@
 import type { AutocompleteProvider, CombinedAutocompleteProvider } from "../autocomplete.js";
 import type { Component } from "../tui.js";
+import { visibleWidth } from "../utils.js";
 import { SelectList, type SelectListTheme } from "./select-list.js";

+// Grapheme segmenter for proper Unicode iteration (handles emojis, etc.)
+const segmenter = new Intl.Segmenter();
+
 interface EditorState {
 	lines: string[];
 	cursorLine: number;
@ -48,6 +52,10 @@ export class Editor implements Component {
 	private pasteBuffer: string = "";
 	private isInPaste: boolean = false;

+	// Prompt history for up/down navigation
+	private history: string[] = [];
+	private historyIndex: number = -1; // -1 = not browsing, 0 = most recent, 1 = older, etc.
+
 	public onSubmit?: (text: string) => void;
 	public onChange?: (text: string) => void;
 	public disableSubmit: boolean = false;
@ -61,6 +69,66 @@ export class Editor implements Component {
 		this.autocompleteProvider = provider;
 	}

+	/**
+	 * Add a prompt to history for up/down arrow navigation.
+	 * Called after successful submission.
+	 */
+	addToHistory(text: string): void {
+		const trimmed = text.trim();
+		if (!trimmed) return;
+		// Don't add consecutive duplicates
+		if (this.history.length > 0 && this.history[0] === trimmed) return;
+		this.history.unshift(trimmed);
+		// Limit history size
+		if (this.history.length > 100) {
+			this.history.pop();
+		}
+	}
+
+	private isEditorEmpty(): boolean {
+		return this.state.lines.length === 1 && this.state.lines[0] === "";
+	}
+
+	private isOnFirstVisualLine(): boolean {
+		const visualLines = this.buildVisualLineMap(this.lastWidth);
+		const currentVisualLine = this.findCurrentVisualLine(visualLines);
+		return currentVisualLine === 0;
+	}
+
+	private isOnLastVisualLine(): boolean {
+		const visualLines = this.buildVisualLineMap(this.lastWidth);
+		const currentVisualLine = this.findCurrentVisualLine(visualLines);
+		return currentVisualLine === visualLines.length - 1;
+	}
+
+	private navigateHistory(direction: 1 | -1): void {
+		if (this.history.length === 0) return;
+
+		const newIndex = this.historyIndex - direction; // Up(-1) increases index, Down(1) decreases
+		if (newIndex < -1 || newIndex >= this.history.length) return;
+
+		this.historyIndex = newIndex;
+
+		if (this.historyIndex === -1) {
+			// Returned to "current" state - clear editor
+			this.setTextInternal("");
+		} else {
+			this.setTextInternal(this.history[this.historyIndex] || "");
+		}
+	}
+
+	/** Internal setText that doesn't reset history state - used by navigateHistory */
+	private setTextInternal(text: string): void {
+		const lines = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n").split("\n");
+		this.state.lines = lines.length === 0 ? [""] : lines;
+		this.state.cursorLine = this.state.lines.length - 1;
+		this.state.cursorCol = this.state.lines[this.state.cursorLine]?.length || 0;
+
+		if (this.onChange) {
+			this.onChange(this.getText());
+		}
+	}
+
 	invalidate(): void {
 		// No cached state to invalidate currently
 	}
@ -82,7 +150,7 @@ export class Editor implements Component {
 		// Render each layout line
 		for (const layoutLine of layoutLines) {
 			let displayText = layoutLine.text;
-			let visibleLength = layoutLine.text.length;
+			let lineVisibleWidth = visibleWidth(layoutLine.text);

 			// Add cursor if this line has it
 			if (layoutLine.hasCursor && layoutLine.cursorPos !== undefined) {
@ -90,34 +158,43 @@ export class Editor implements Component {
 				const after = displayText.slice(layoutLine.cursorPos);

 				if (after.length > 0) {
-					// Cursor is on a character - replace it with highlighted version
-					const cursor = `\x1b[7m${after[0]}\x1b[0m`;
-					const restAfter = after.slice(1);
+					// Cursor is on a character (grapheme) - replace it with highlighted version
+					// Get the first grapheme from 'after'
+					const afterGraphemes = [...segmenter.segment(after)];
+					const firstGrapheme = afterGraphemes[0]?.segment || "";
+					const restAfter = after.slice(firstGrapheme.length);
+					const cursor = `\x1b[7m${firstGrapheme}\x1b[0m`;
 					displayText = before + cursor + restAfter;
-					// visibleLength stays the same - we're replacing, not adding
+					// lineVisibleWidth stays the same - we're replacing, not adding
 				} else {
 					// Cursor is at the end - check if we have room for the space
-					if (layoutLine.text.length < width) {
+					if (lineVisibleWidth < width) {
 						// We have room - add highlighted space
 						const cursor = "\x1b[7m \x1b[0m";
 						displayText = before + cursor;
-						// visibleLength increases by 1 - we're adding a space
-						visibleLength = layoutLine.text.length + 1;
+						// lineVisibleWidth increases by 1 - we're adding a space
+						lineVisibleWidth = lineVisibleWidth + 1;
 					} else {
-						// Line is at full width - use reverse video on last character if possible
+						// Line is at full width - use reverse video on last grapheme if possible
 						// or just show cursor at the end without adding space
-						if (before.length > 0) {
-							const lastChar = before[before.length - 1];
-							const cursor = `\x1b[7m${lastChar}\x1b[0m`;
-							displayText = before.slice(0, -1) + cursor;
+						const beforeGraphemes = [...segmenter.segment(before)];
+						if (beforeGraphemes.length > 0) {
+							const lastGrapheme = beforeGraphemes[beforeGraphemes.length - 1]?.segment || "";
+							const cursor = `\x1b[7m${lastGrapheme}\x1b[0m`;
+							// Rebuild 'before' without the last grapheme
+							const beforeWithoutLast = beforeGraphemes
+								.slice(0, -1)
+								.map((g) => g.segment)
+								.join("");
+							displayText = beforeWithoutLast + cursor;
 						}
-						// visibleLength stays the same
+						// lineVisibleWidth stays the same
 					}
 				}
 			}

-			// Calculate padding based on actual visible length
-			const padding = " ".repeat(Math.max(0, width - visibleLength));
+			// Calculate padding based on actual visible width
+			const padding = " ".repeat(Math.max(0, width - lineVisibleWidth));

 			// Render the line (no side borders, just horizontal lines above and below)
 			result.push(displayText + padding);
@ -342,6 +419,7 @@ export class Editor implements Component {
 			};
 			this.pastes.clear();
 			this.pasteCounter = 0;
+			this.historyIndex = -1; // Exit history browsing mode

 			// Notify that editor is now empty
 			if (this.onChange) {
@ -383,11 +461,21 @@ export class Editor implements Component {
 		}
 		// Arrow keys
 		else if (data === "\x1b[A") {
-			// Up
-			this.moveCursor(-1, 0);
+			// Up - history navigation or cursor movement
+			if (this.isEditorEmpty()) {
+				this.navigateHistory(-1); // Start browsing history
+			} else if (this.historyIndex > -1 && this.isOnFirstVisualLine()) {
+				this.navigateHistory(-1); // Navigate to older history entry
+			} else {
+				this.moveCursor(-1, 0); // Cursor movement (within text or history entry)
+			}
 		} else if (data === "\x1b[B") {
-			// Down
-			this.moveCursor(1, 0);
+			// Down - history navigation or cursor movement
+			if (this.historyIndex > -1 && this.isOnLastVisualLine()) {
+				this.navigateHistory(1); // Navigate to newer history entry or clear
+			} else {
+				this.moveCursor(1, 0); // Cursor movement (within text or history entry)
+			}
 		} else if (data === "\x1b[C") {
 			// Right
 			this.moveCursor(0, 1);
@ -418,9 +506,9 @@ export class Editor implements Component {
 		for (let i = 0; i < this.state.lines.length; i++) {
 			const line = this.state.lines[i] || "";
 			const isCurrentLine = i === this.state.cursorLine;
-			const maxLineLength = contentWidth;
+			const lineVisibleWidth = visibleWidth(line);

-			if (line.length <= maxLineLength) {
+			if (lineVisibleWidth <= contentWidth) {
 				// Line fits in one layout line
 				if (isCurrentLine) {
 					layoutLines.push({
@ -435,35 +523,64 @@ export class Editor implements Component {
 					});
 				}
 			} else {
-				// Line needs wrapping
-				const chunks = [];
-				for (let pos = 0; pos < line.length; pos += maxLineLength) {
-					chunks.push(line.slice(pos, pos + maxLineLength));
+				// Line needs wrapping - use grapheme-aware chunking
+				const chunks: { text: string; startIndex: number; endIndex: number }[] = [];
+				let currentChunk = "";
+				let currentWidth = 0;
+				let chunkStartIndex = 0;
+				let currentIndex = 0;
+
+				for (const seg of segmenter.segment(line)) {
+					const grapheme = seg.segment;
+					const graphemeWidth = visibleWidth(grapheme);
+
+					if (currentWidth + graphemeWidth > contentWidth && currentChunk !== "") {
+						// Start a new chunk
+						chunks.push({
+							text: currentChunk,
+							startIndex: chunkStartIndex,
+							endIndex: currentIndex,
+						});
+						currentChunk = grapheme;
+						currentWidth = graphemeWidth;
+						chunkStartIndex = currentIndex;
+					} else {
+						currentChunk += grapheme;
+						currentWidth += graphemeWidth;
+					}
+					currentIndex += grapheme.length;
+				}
+
+				// Push the last chunk
+				if (currentChunk !== "") {
+					chunks.push({
+						text: currentChunk,
+						startIndex: chunkStartIndex,
+						endIndex: currentIndex,
+					});
 				}

 				for (let chunkIndex = 0; chunkIndex < chunks.length; chunkIndex++) {
 					const chunk = chunks[chunkIndex];
 					if (!chunk) continue;

-					const chunkStart = chunkIndex * maxLineLength;
-					const chunkEnd = chunkStart + chunk.length;
 					const cursorPos = this.state.cursorCol;
 					const isLastChunk = chunkIndex === chunks.length - 1;
-					// For non-last chunks, cursor at chunkEnd belongs to the next chunk
+					// For non-last chunks, cursor at endIndex belongs to the next chunk
 					const hasCursorInChunk =
 						isCurrentLine &&
-						cursorPos >= chunkStart &&
-						(isLastChunk ? cursorPos <= chunkEnd : cursorPos < chunkEnd);
+						cursorPos >= chunk.startIndex &&
+						(isLastChunk ? cursorPos <= chunk.endIndex : cursorPos < chunk.endIndex);

 					if (hasCursorInChunk) {
 						layoutLines.push({
-							text: chunk,
+							text: chunk.text,
 							hasCursor: true,
-							cursorPos: cursorPos - chunkStart,
+							cursorPos: cursorPos - chunk.startIndex,
 						});
 					} else {
 						layoutLines.push({
-							text: chunk,
+							text: chunk.text,
 							hasCursor: false,
 						});
 					}
@ -479,24 +596,14 @@ export class Editor implements Component {
 	}

 	setText(text: string): void {
-		// Split text into lines, handling different line endings
-		const lines = text.replace(/\r\n/g, "\n").replace(/\r/g, "\n").split("\n");
-
-		// Ensure at least one empty line
-		this.state.lines = lines.length === 0 ? [""] : lines;
-
-		// Reset cursor to end of text
-		this.state.cursorLine = this.state.lines.length - 1;
-		this.state.cursorCol = this.state.lines[this.state.cursorLine]?.length || 0;
-
-		// Notify of change
-		if (this.onChange) {
-			this.onChange(this.getText());
-		}
+		this.historyIndex = -1; // Exit history browsing mode
+		this.setTextInternal(text);
 	}

 	// All the editor methods from before...
 	private insertCharacter(char: string): void {
+		this.historyIndex = -1; // Exit history browsing mode
+
 		const line = this.state.lines[this.state.cursorLine] || "";

 		const before = line.slice(0, this.state.cursorCol);
@ -544,6 +651,8 @@ export class Editor implements Component {
 	}

 	private handlePaste(pastedText: string): void {
+		this.historyIndex = -1; // Exit history browsing mode
+
 		// Clean the pasted text
 		const cleanText = pastedText.replace(/\r\n/g, "\n").replace(/\r/g, "\n");

@ -632,6 +741,8 @@ export class Editor implements Component {
 	}

 	private addNewLine(): void {
+		this.historyIndex = -1; // Exit history browsing mode
+
 		const currentLine = this.state.lines[this.state.cursorLine] || "";

 		const before = currentLine.slice(0, this.state.cursorCol);
@ -651,6 +762,8 @@ export class Editor implements Component {
 	}

 	private handleBackspace(): void {
+		this.historyIndex = -1; // Exit history browsing mode
+
 		if (this.state.cursorCol > 0) {
 			// Delete character in current line
 			const line = this.state.lines[this.state.cursorLine] || "";
@ -704,6 +817,8 @@ export class Editor implements Component {
 	}

 	private deleteToStartOfLine(): void {
+		this.historyIndex = -1; // Exit history browsing mode
+
 		const currentLine = this.state.lines[this.state.cursorLine] || "";

 		if (this.state.cursorCol > 0) {
@ -725,6 +840,8 @@ export class Editor implements Component {
 	}

 	private deleteToEndOfLine(): void {
+		this.historyIndex = -1; // Exit history browsing mode
+
 		const currentLine = this.state.lines[this.state.cursorLine] || "";

 		if (this.state.cursorCol < currentLine.length) {
@ -743,6 +860,8 @@ export class Editor implements Component {
 	}

 	private deleteWordBackwards(): void {
+		this.historyIndex = -1; // Exit history browsing mode
+
 		const currentLine = this.state.lines[this.state.cursorLine] || "";

 		// If at start of line, behave like backspace at column 0 (merge with previous line)
@ -791,6 +910,8 @@ export class Editor implements Component {
 	}

 	private handleForwardDelete(): void {
+		this.historyIndex = -1; // Exit history browsing mode
+
 		const currentLine = this.state.lines[this.state.cursorLine] || "";

 		if (this.state.cursorCol < currentLine.length) {
@ -838,16 +959,44 @@ export class Editor implements Component {

 		for (let i = 0; i < this.state.lines.length; i++) {
 			const line = this.state.lines[i] || "";
+			const lineVisWidth = visibleWidth(line);
 			if (line.length === 0) {
 				// Empty line still takes one visual line
 				visualLines.push({ logicalLine: i, startCol: 0, length: 0 });
-			} else if (line.length <= width) {
+			} else if (lineVisWidth <= width) {
 				visualLines.push({ logicalLine: i, startCol: 0, length: line.length });
 			} else {
-				// Line needs wrapping
-				for (let pos = 0; pos < line.length; pos += width) {
-					const segmentLength = Math.min(width, line.length - pos);
-					visualLines.push({ logicalLine: i, startCol: pos, length: segmentLength });
+				// Line needs wrapping - use grapheme-aware chunking
+				let currentWidth = 0;
+				let chunkStartIndex = 0;
+				let currentIndex = 0;
+
+				for (const seg of segmenter.segment(line)) {
+					const grapheme = seg.segment;
+					const graphemeWidth = visibleWidth(grapheme);
+
+					if (currentWidth + graphemeWidth > width && currentIndex > chunkStartIndex) {
+						// Start a new chunk
+						visualLines.push({
+							logicalLine: i,
+							startCol: chunkStartIndex,
+							length: currentIndex - chunkStartIndex,
+						});
+						chunkStartIndex = currentIndex;
+						currentWidth = graphemeWidth;
+					} else {
+						currentWidth += graphemeWidth;
+					}
+					currentIndex += grapheme.length;
+				}
+
+				// Push the last chunk
+				if (currentIndex > chunkStartIndex) {
+					visualLines.push({
+						logicalLine: i,
+						startCol: chunkStartIndex,
+						length: currentIndex - chunkStartIndex,
+					});
 				}
 			}
 		}
--- a/packages/tui/src/components/input.ts
+++ b/packages/tui/src/components/input.ts
@ -113,6 +113,31 @@ export class Input implements Component {
 			return;
 		}

+		if (data.charCodeAt(0) === 23) {
+			// Ctrl+W - delete word backwards
+			this.deleteWordBackwards();
+			return;
+		}
+
+		if (data === "\x1b\x7f") {
+			// Option/Alt+Backspace - delete word backwards
+			this.deleteWordBackwards();
+			return;
+		}
+
+		if (data.charCodeAt(0) === 21) {
+			// Ctrl+U - delete from cursor to start of line
+			this.value = this.value.slice(this.cursor);
+			this.cursor = 0;
+			return;
+		}
+
+		if (data.charCodeAt(0) === 11) {
+			// Ctrl+K - delete from cursor to end of line
+			this.value = this.value.slice(0, this.cursor);
+			return;
+		}
+
 		// Regular character input
 		if (data.length === 1 && data >= " " && data <= "~") {
 			this.value = this.value.slice(0, this.cursor) + data + this.value.slice(this.cursor);
@ -120,6 +145,37 @@ export class Input implements Component {
 		}
 	}

+	private deleteWordBackwards(): void {
+		if (this.cursor === 0) {
+			return;
+		}
+
+		const text = this.value.slice(0, this.cursor);
+		let deleteFrom = this.cursor;
+
+		const isWhitespace = (char: string): boolean => /\s/.test(char);
+		const isPunctuation = (char: string): boolean => /[(){}[\]<>.,;:'"!?+\-=*/\\|&%^$#@~`]/.test(char);
+
+		const charBeforeCursor = text[deleteFrom - 1] ?? "";
+
+		// If immediately on whitespace or punctuation, delete that single boundary char
+		if (isWhitespace(charBeforeCursor) || isPunctuation(charBeforeCursor)) {
+			deleteFrom -= 1;
+		} else {
+			// Otherwise, delete a run of non-boundary characters (the "word")
+			while (deleteFrom > 0) {
+				const ch = text[deleteFrom - 1] ?? "";
+				if (isWhitespace(ch) || isPunctuation(ch)) {
+					break;
+				}
+				deleteFrom -= 1;
+			}
+		}
+
+		this.value = text.slice(0, deleteFrom) + this.value.slice(this.cursor);
+		this.cursor = deleteFrom;
+	}
+
 	private handlePaste(pastedText: string): void {
 		// Clean the pasted text - remove newlines and carriage returns
 		const cleanText = pastedText.replace(/\r\n/g, "").replace(/\r/g, "").replace(/\n/g, "");
--- a/packages/tui/src/utils.ts
+++ b/packages/tui/src/utils.ts
@ -35,27 +35,199 @@ function extractAnsiCode(str: string, pos: number): { code: string; length: numb
 * Track active ANSI SGR codes to preserve styling across line breaks.
 */
 class AnsiCodeTracker {
-	private activeAnsiCodes: string[] = [];
+	// Track individual attributes separately so we can reset them specifically
+	private bold = false;
+	private dim = false;
+	private italic = false;
+	private underline = false;
+	private blink = false;
+	private inverse = false;
+	private hidden = false;
+	private strikethrough = false;
+	private fgColor: string | null = null; // Stores the full code like "31" or "38;5;240"
+	private bgColor: string | null = null; // Stores the full code like "41" or "48;5;240"

 	process(ansiCode: string): void {
 		if (!ansiCode.endsWith("m")) {
 			return;
 		}

-		// Full reset clears everything
-		if (ansiCode === "\x1b[0m" || ansiCode === "\x1b[m") {
-			this.activeAnsiCodes.length = 0;
-		} else {
-			this.activeAnsiCodes.push(ansiCode);
+		// Extract the parameters between \x1b[ and m
+		const match = ansiCode.match(/\x1b\[([\d;]*)m/);
+		if (!match) return;
+
+		const params = match[1];
+		if (params === "" || params === "0") {
+			// Full reset
+			this.reset();
+			return;
+		}
+
+		// Parse parameters (can be semicolon-separated)
+		const parts = params.split(";");
+		let i = 0;
+		while (i < parts.length) {
+			const code = Number.parseInt(parts[i], 10);
+
+			// Handle 256-color and RGB codes which consume multiple parameters
+			if (code === 38 || code === 48) {
+				// 38;5;N (256 color fg) or 38;2;R;G;B (RGB fg)
+				// 48;5;N (256 color bg) or 48;2;R;G;B (RGB bg)
+				if (parts[i + 1] === "5" && parts[i + 2] !== undefined) {
+					// 256 color: 38;5;N or 48;5;N
+					const colorCode = `${parts[i]};${parts[i + 1]};${parts[i + 2]}`;
+					if (code === 38) {
+						this.fgColor = colorCode;
+					} else {
+						this.bgColor = colorCode;
+					}
+					i += 3;
+					continue;
+				} else if (parts[i + 1] === "2" && parts[i + 4] !== undefined) {
+					// RGB color: 38;2;R;G;B or 48;2;R;G;B
+					const colorCode = `${parts[i]};${parts[i + 1]};${parts[i + 2]};${parts[i + 3]};${parts[i + 4]}`;
+					if (code === 38) {
+						this.fgColor = colorCode;
+					} else {
+						this.bgColor = colorCode;
+					}
+					i += 5;
+					continue;
+				}
+			}
+
+			// Standard SGR codes
+			switch (code) {
+				case 0:
+					this.reset();
+					break;
+				case 1:
+					this.bold = true;
+					break;
+				case 2:
+					this.dim = true;
+					break;
+				case 3:
+					this.italic = true;
+					break;
+				case 4:
+					this.underline = true;
+					break;
+				case 5:
+					this.blink = true;
+					break;
+				case 7:
+					this.inverse = true;
+					break;
+				case 8:
+					this.hidden = true;
+					break;
+				case 9:
+					this.strikethrough = true;
+					break;
+				case 21:
+					this.bold = false;
+					break; // Some terminals
+				case 22:
+					this.bold = false;
+					this.dim = false;
+					break;
+				case 23:
+					this.italic = false;
+					break;
+				case 24:
+					this.underline = false;
+					break;
+				case 25:
+					this.blink = false;
+					break;
+				case 27:
+					this.inverse = false;
+					break;
+				case 28:
+					this.hidden = false;
+					break;
+				case 29:
+					this.strikethrough = false;
+					break;
+				case 39:
+					this.fgColor = null;
+					break; // Default fg
+				case 49:
+					this.bgColor = null;
+					break; // Default bg
+				default:
+					// Standard foreground colors 30-37, 90-97
+					if ((code >= 30 && code <= 37) || (code >= 90 && code <= 97)) {
+						this.fgColor = String(code);
+					}
+					// Standard background colors 40-47, 100-107
+					else if ((code >= 40 && code <= 47) || (code >= 100 && code <= 107)) {
+						this.bgColor = String(code);
+					}
+					break;
+			}
+			i++;
 		}
 	}

+	private reset(): void {
+		this.bold = false;
+		this.dim = false;
+		this.italic = false;
+		this.underline = false;
+		this.blink = false;
+		this.inverse = false;
+		this.hidden = false;
+		this.strikethrough = false;
+		this.fgColor = null;
+		this.bgColor = null;
+	}
+
 	getActiveCodes(): string {
-		return this.activeAnsiCodes.join("");
+		const codes: string[] = [];
+		if (this.bold) codes.push("1");
+		if (this.dim) codes.push("2");
+		if (this.italic) codes.push("3");
+		if (this.underline) codes.push("4");
+		if (this.blink) codes.push("5");
+		if (this.inverse) codes.push("7");
+		if (this.hidden) codes.push("8");
+		if (this.strikethrough) codes.push("9");
+		if (this.fgColor) codes.push(this.fgColor);
+		if (this.bgColor) codes.push(this.bgColor);
+
+		if (codes.length === 0) return "";
+		return `\x1b[${codes.join(";")}m`;
 	}

 	hasActiveCodes(): boolean {
-		return this.activeAnsiCodes.length > 0;
+		return (
+			this.bold ||
+			this.dim ||
+			this.italic ||
+			this.underline ||
+			this.blink ||
+			this.inverse ||
+			this.hidden ||
+			this.strikethrough ||
+			this.fgColor !== null ||
+			this.bgColor !== null
+		);
+	}
+
+	/**
+	 * Get reset codes for attributes that need to be turned off at line end,
+	 * specifically underline which bleeds into padding.
+	 * Returns empty string if no problematic attributes are active.
+	 */
+	getLineEndReset(): string {
+		// Only underline causes visual bleeding into padding
+		// Other attributes like colors don't visually bleed to padding
+		if (this.underline) {
+			return "\x1b[24m"; // Underline off only
+		}
+		return "";
 	}
 }

@ -78,13 +250,15 @@ function updateTrackerFromText(text: string, tracker: AnsiCodeTracker): void {
 function splitIntoTokensWithAnsi(text: string): string[] {
 	const tokens: string[] = [];
 	let current = "";
+	let pendingAnsi = ""; // ANSI codes waiting to be attached to next visible content
 	let inWhitespace = false;
 	let i = 0;

 	while (i < text.length) {
 		const ansiResult = extractAnsiCode(text, i);
 		if (ansiResult) {
-			current += ansiResult.code;
+			// Hold ANSI codes separately - they'll be attached to the next visible char
+			pendingAnsi += ansiResult.code;
 			i += ansiResult.length;
 			continue;
 		}
@ -98,11 +272,22 @@ function splitIntoTokensWithAnsi(text: string): string[] {
 			current = "";
 		}

+		// Attach any pending ANSI codes to this visible character
+		if (pendingAnsi) {
+			current += pendingAnsi;
+			pendingAnsi = "";
+		}
+
 		inWhitespace = charIsSpace;
 		current += char;
 		i++;
 	}

+	// Handle any remaining pending ANSI codes (attach to last token)
+	if (pendingAnsi) {
+		current += pendingAnsi;
+	}
+
 	if (current) {
 		tokens.push(current);
 	}
@ -161,12 +346,17 @@ function wrapSingleLine(line: string, width: number): string[] {
 		// Token itself is too long - break it character by character
 		if (tokenVisibleLength > width && !isWhitespace) {
 			if (currentLine) {
+				// Add specific reset for underline only (preserves background)
+				const lineEndReset = tracker.getLineEndReset();
+				if (lineEndReset) {
+					currentLine += lineEndReset;
+				}
 				wrapped.push(currentLine);
 				currentLine = "";
 				currentVisibleLength = 0;
 			}

-			// Break long token
+			// Break long token - breakLongWord handles its own resets
 			const broken = breakLongWord(token, width, tracker);
 			wrapped.push(...broken.slice(0, -1));
 			currentLine = broken[broken.length - 1];
@ -178,8 +368,13 @@ function wrapSingleLine(line: string, width: number): string[] {
 		const totalNeeded = currentVisibleLength + tokenVisibleLength;

 		if (totalNeeded > width && currentVisibleLength > 0) {
-			// Wrap to next line - don't carry trailing whitespace
-			wrapped.push(currentLine.trimEnd());
+			// Add specific reset for underline only (preserves background)
+			let lineToWrap = currentLine.trimEnd();
+			const lineEndReset = tracker.getLineEndReset();
+			if (lineEndReset) {
+				lineToWrap += lineEndReset;
+			}
+			wrapped.push(lineToWrap);
 			if (isWhitespace) {
 				// Don't start new line with whitespace
 				currentLine = tracker.getActiveCodes();
@ -198,6 +393,7 @@ function wrapSingleLine(line: string, width: number): string[] {
 	}

 	if (currentLine) {
+		// No reset at end of final line - let caller handle it
 		wrapped.push(currentLine);
 	}

@ -251,6 +447,11 @@ function breakLongWord(word: string, width: number, tracker: AnsiCodeTracker): s
 		const graphemeWidth = visibleWidth(grapheme);

 		if (currentWidth + graphemeWidth > width) {
+			// Add specific reset for underline only (preserves background)
+			const lineEndReset = tracker.getLineEndReset();
+			if (lineEndReset) {
+				currentLine += lineEndReset;
+			}
 			lines.push(currentLine);
 			currentLine = tracker.getActiveCodes();
 			currentWidth = 0;
@ -261,6 +462,7 @@ function breakLongWord(word: string, width: number, tracker: AnsiCodeTracker): s
 	}

 	if (currentLine) {
+		// No reset at end of final segment - caller handles continuation
 		lines.push(currentLine);
 	}

@ -309,36 +511,53 @@ export function truncateToWidth(text: string, maxWidth: number, ellipsis: string
 		return ellipsis.substring(0, maxWidth);
 	}

-	let currentWidth = 0;
-	let truncateAt = 0;
+	// Separate ANSI codes from visible content using grapheme segmentation
 	let i = 0;
+	const segments: Array<{ type: "ansi" | "grapheme"; value: string }> = [];

-	while (i < text.length && currentWidth < targetWidth) {
-		// Skip ANSI escape sequences (include them in output but don't count width)
-		if (text[i] === "\x1b" && text[i + 1] === "[") {
-			let j = i + 2;
-			while (j < text.length && !/[a-zA-Z]/.test(text[j]!)) {
-				j++;
+	while (i < text.length) {
+		const ansiResult = extractAnsiCode(text, i);
+		if (ansiResult) {
+			segments.push({ type: "ansi", value: ansiResult.code });
+			i += ansiResult.length;
+		} else {
+			// Find the next ANSI code or end of string
+			let end = i;
+			while (end < text.length) {
+				const nextAnsi = extractAnsiCode(text, end);
+				if (nextAnsi) break;
+				end++;
 			}
-			// Include the final letter of the escape sequence
-			j++;
-			truncateAt = j;
-			i = j;
+			// Segment this non-ANSI portion into graphemes
+			const textPortion = text.slice(i, end);
+			for (const seg of segmenter.segment(textPortion)) {
+				segments.push({ type: "grapheme", value: seg.segment });
+			}
+			i = end;
+		}
+	}
+
+	// Build truncated string from segments
+	let result = "";
+	let currentWidth = 0;
+
+	for (const seg of segments) {
+		if (seg.type === "ansi") {
+			result += seg.value;
 			continue;
 		}

-		const char = text[i]!;
-		const charWidth = visibleWidth(char);
+		const grapheme = seg.value;
+		const graphemeWidth = visibleWidth(grapheme);

-		if (currentWidth + charWidth > targetWidth) {
+		if (currentWidth + graphemeWidth > targetWidth) {
 			break;
 		}

-		currentWidth += charWidth;
-		truncateAt = i + 1;
-		i++;
+		result += grapheme;
+		currentWidth += graphemeWidth;
 	}

 	// Add reset code before ellipsis to prevent styling leaking into it
-	return text.substring(0, truncateAt) + "\x1b[0m" + ellipsis;
+	return result + "\x1b[0m" + ellipsis;
 }
--- a/packages/tui/test/editor.test.ts
+++ b/packages/tui/test/editor.test.ts
@ -1,9 +1,252 @@
 import assert from "node:assert";
 import { describe, it } from "node:test";
+import { stripVTControlCharacters } from "node:util";
 import { Editor } from "../src/components/editor.js";
+import { visibleWidth } from "../src/utils.js";
 import { defaultEditorTheme } from "./test-themes.js";

 describe("Editor component", () => {
+	describe("Prompt history navigation", () => {
+		it("does nothing on Up arrow when history is empty", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.handleInput("\x1b[A"); // Up arrow
+
+			assert.strictEqual(editor.getText(), "");
+		});
+
+		it("shows most recent history entry on Up arrow when editor is empty", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("first prompt");
+			editor.addToHistory("second prompt");
+
+			editor.handleInput("\x1b[A"); // Up arrow
+
+			assert.strictEqual(editor.getText(), "second prompt");
+		});
+
+		it("cycles through history entries on repeated Up arrow", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("first");
+			editor.addToHistory("second");
+			editor.addToHistory("third");
+
+			editor.handleInput("\x1b[A"); // Up - shows "third"
+			assert.strictEqual(editor.getText(), "third");
+
+			editor.handleInput("\x1b[A"); // Up - shows "second"
+			assert.strictEqual(editor.getText(), "second");
+
+			editor.handleInput("\x1b[A"); // Up - shows "first"
+			assert.strictEqual(editor.getText(), "first");
+
+			editor.handleInput("\x1b[A"); // Up - stays at "first" (oldest)
+			assert.strictEqual(editor.getText(), "first");
+		});
+
+		it("returns to empty editor on Down arrow after browsing history", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("prompt");
+
+			editor.handleInput("\x1b[A"); // Up - shows "prompt"
+			assert.strictEqual(editor.getText(), "prompt");
+
+			editor.handleInput("\x1b[B"); // Down - clears editor
+			assert.strictEqual(editor.getText(), "");
+		});
+
+		it("navigates forward through history with Down arrow", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("first");
+			editor.addToHistory("second");
+			editor.addToHistory("third");
+
+			// Go to oldest
+			editor.handleInput("\x1b[A"); // third
+			editor.handleInput("\x1b[A"); // second
+			editor.handleInput("\x1b[A"); // first
+
+			// Navigate back
+			editor.handleInput("\x1b[B"); // second
+			assert.strictEqual(editor.getText(), "second");
+
+			editor.handleInput("\x1b[B"); // third
+			assert.strictEqual(editor.getText(), "third");
+
+			editor.handleInput("\x1b[B"); // empty
+			assert.strictEqual(editor.getText(), "");
+		});
+
+		it("exits history mode when typing a character", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("old prompt");
+
+			editor.handleInput("\x1b[A"); // Up - shows "old prompt"
+			editor.handleInput("x"); // Type a character - exits history mode
+
+			assert.strictEqual(editor.getText(), "old promptx");
+		});
+
+		it("exits history mode on setText", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("first");
+			editor.addToHistory("second");
+
+			editor.handleInput("\x1b[A"); // Up - shows "second"
+			editor.setText(""); // External clear
+
+			// Up should start fresh from most recent
+			editor.handleInput("\x1b[A");
+			assert.strictEqual(editor.getText(), "second");
+		});
+
+		it("does not add empty strings to history", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("");
+			editor.addToHistory("   ");
+			editor.addToHistory("valid");
+
+			editor.handleInput("\x1b[A");
+			assert.strictEqual(editor.getText(), "valid");
+
+			// Should not have more entries
+			editor.handleInput("\x1b[A");
+			assert.strictEqual(editor.getText(), "valid");
+		});
+
+		it("does not add consecutive duplicates to history", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("same");
+			editor.addToHistory("same");
+			editor.addToHistory("same");
+
+			editor.handleInput("\x1b[A"); // "same"
+			assert.strictEqual(editor.getText(), "same");
+
+			editor.handleInput("\x1b[A"); // stays at "same" (only one entry)
+			assert.strictEqual(editor.getText(), "same");
+		});
+
+		it("allows non-consecutive duplicates in history", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("first");
+			editor.addToHistory("second");
+			editor.addToHistory("first"); // Not consecutive, should be added
+
+			editor.handleInput("\x1b[A"); // "first"
+			assert.strictEqual(editor.getText(), "first");
+
+			editor.handleInput("\x1b[A"); // "second"
+			assert.strictEqual(editor.getText(), "second");
+
+			editor.handleInput("\x1b[A"); // "first" (older one)
+			assert.strictEqual(editor.getText(), "first");
+		});
+
+		it("uses cursor movement instead of history when editor has content", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("history item");
+			editor.setText("line1\nline2");
+
+			// Cursor is at end of line2, Up should move to line1
+			editor.handleInput("\x1b[A"); // Up - cursor movement
+
+			// Insert character to verify cursor position
+			editor.handleInput("X");
+
+			// X should be inserted in line1, not replace with history
+			assert.strictEqual(editor.getText(), "line1X\nline2");
+		});
+
+		it("limits history to 100 entries", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			// Add 105 entries
+			for (let i = 0; i < 105; i++) {
+				editor.addToHistory(`prompt ${i}`);
+			}
+
+			// Navigate to oldest
+			for (let i = 0; i < 100; i++) {
+				editor.handleInput("\x1b[A");
+			}
+
+			// Should be at entry 5 (oldest kept), not entry 0
+			assert.strictEqual(editor.getText(), "prompt 5");
+
+			// One more Up should not change anything
+			editor.handleInput("\x1b[A");
+			assert.strictEqual(editor.getText(), "prompt 5");
+		});
+
+		it("allows cursor movement within multi-line history entry with Down", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("line1\nline2\nline3");
+
+			// Browse to the multi-line entry
+			editor.handleInput("\x1b[A"); // Up - shows entry, cursor at end of line3
+			assert.strictEqual(editor.getText(), "line1\nline2\nline3");
+
+			// Down should exit history since cursor is on last line
+			editor.handleInput("\x1b[B"); // Down
+			assert.strictEqual(editor.getText(), ""); // Exited to empty
+		});
+
+		it("allows cursor movement within multi-line history entry with Up", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("older entry");
+			editor.addToHistory("line1\nline2\nline3");
+
+			// Browse to the multi-line entry
+			editor.handleInput("\x1b[A"); // Up - shows multi-line, cursor at end of line3
+
+			// Up should move cursor within the entry (not on first line yet)
+			editor.handleInput("\x1b[A"); // Up - cursor moves to line2
+			assert.strictEqual(editor.getText(), "line1\nline2\nline3"); // Still same entry
+
+			editor.handleInput("\x1b[A"); // Up - cursor moves to line1 (now on first visual line)
+			assert.strictEqual(editor.getText(), "line1\nline2\nline3"); // Still same entry
+
+			// Now Up should navigate to older history entry
+			editor.handleInput("\x1b[A"); // Up - navigate to older
+			assert.strictEqual(editor.getText(), "older entry");
+		});
+
+		it("navigates from multi-line entry back to newer via Down after cursor movement", () => {
+			const editor = new Editor(defaultEditorTheme);
+
+			editor.addToHistory("line1\nline2\nline3");
+
+			// Browse to entry and move cursor up
+			editor.handleInput("\x1b[A"); // Up - shows entry, cursor at end
+			editor.handleInput("\x1b[A"); // Up - cursor to line2
+			editor.handleInput("\x1b[A"); // Up - cursor to line1
+
+			// Now Down should move cursor down within the entry
+			editor.handleInput("\x1b[B"); // Down - cursor to line2
+			assert.strictEqual(editor.getText(), "line1\nline2\nline3");
+
+			editor.handleInput("\x1b[B"); // Down - cursor to line3
+			assert.strictEqual(editor.getText(), "line1\nline2\nline3");
+
+			// Now on last line, Down should exit history
+			editor.handleInput("\x1b[B"); // Down - exit to empty
+			assert.strictEqual(editor.getText(), "");
+		});
+	});
+
 	describe("Unicode text editing behavior", () => {
 		it("inserts mixed ASCII, umlauts, and emojis as literal text", () => {
 			const editor = new Editor(defaultEditorTheme);
@ -129,4 +372,105 @@ describe("Editor component", () => {
 			assert.strictEqual(text, "xab");
 		});
 	});
+
+	describe("Grapheme-aware text wrapping", () => {
+		it("wraps lines correctly when text contains wide emojis", () => {
+			const editor = new Editor(defaultEditorTheme);
+			const width = 20;
+
+			// ✅ is 2 columns wide, so "Hello ✅ World" is 14 columns
+			editor.setText("Hello ✅ World");
+			const lines = editor.render(width);
+
+			// All content lines (between borders) should fit within width
+			for (let i = 1; i < lines.length - 1; i++) {
+				const lineWidth = visibleWidth(lines[i]!);
+				assert.strictEqual(lineWidth, width, `Line ${i} has width ${lineWidth}, expected ${width}`);
+			}
+		});
+
+		it("wraps long text with emojis at correct positions", () => {
+			const editor = new Editor(defaultEditorTheme);
+			const width = 10;
+
+			// Each ✅ is 2 columns. "✅✅✅✅✅" = 10 columns, fits exactly
+			// "✅✅✅✅✅✅" = 12 columns, needs wrap
+			editor.setText("✅✅✅✅✅✅");
+			const lines = editor.render(width);
+
+			// Should have 2 content lines (plus 2 border lines)
+			// First line: 5 emojis (10 cols), second line: 1 emoji (2 cols) + padding
+			for (let i = 1; i < lines.length - 1; i++) {
+				const lineWidth = visibleWidth(lines[i]!);
+				assert.strictEqual(lineWidth, width, `Line ${i} has width ${lineWidth}, expected ${width}`);
+			}
+		});
+
+		it("wraps CJK characters correctly (each is 2 columns wide)", () => {
+			const editor = new Editor(defaultEditorTheme);
+			const width = 10;
+
+			// Each CJK char is 2 columns. "日本語テスト" = 6 chars = 12 columns
+			editor.setText("日本語テスト");
+			const lines = editor.render(width);
+
+			for (let i = 1; i < lines.length - 1; i++) {
+				const lineWidth = visibleWidth(lines[i]!);
+				assert.strictEqual(lineWidth, width, `Line ${i} has width ${lineWidth}, expected ${width}`);
+			}
+
+			// Verify content split correctly
+			const contentLines = lines.slice(1, -1).map((l) => stripVTControlCharacters(l).trim());
+			assert.strictEqual(contentLines.length, 2);
+			assert.strictEqual(contentLines[0], "日本語テス"); // 5 chars = 10 columns
+			assert.strictEqual(contentLines[1], "ト"); // 1 char = 2 columns (+ padding)
+		});
+
+		it("handles mixed ASCII and wide characters in wrapping", () => {
+			const editor = new Editor(defaultEditorTheme);
+			const width = 15;
+
+			// "Test ✅ OK 日本" = 4 + 1 + 2 + 1 + 2 + 1 + 4 = 15 columns (fits exactly)
+			editor.setText("Test ✅ OK 日本");
+			const lines = editor.render(width);
+
+			// Should fit in one content line
+			const contentLines = lines.slice(1, -1);
+			assert.strictEqual(contentLines.length, 1);
+
+			const lineWidth = visibleWidth(contentLines[0]!);
+			assert.strictEqual(lineWidth, width);
+		});
+
+		it("renders cursor correctly on wide characters", () => {
+			const editor = new Editor(defaultEditorTheme);
+			const width = 20;
+
+			editor.setText("A✅B");
+			// Cursor should be at end (after B)
+			const lines = editor.render(width);
+
+			// The cursor (reverse video space) should be visible
+			const contentLine = lines[1]!;
+			assert.ok(contentLine.includes("\x1b[7m"), "Should have reverse video cursor");
+
+			// Line should still be correct width
+			assert.strictEqual(visibleWidth(contentLine), width);
+		});
+
+		it("does not exceed terminal width with emoji at wrap boundary", () => {
+			const editor = new Editor(defaultEditorTheme);
+			const width = 11;
+
+			// "0123456789✅" = 10 ASCII + 2-wide emoji = 12 columns
+			// Should wrap before the emoji since it would exceed width
+			editor.setText("0123456789✅");
+			const lines = editor.render(width);
+
+			for (let i = 1; i < lines.length - 1; i++) {
+				const lineWidth = visibleWidth(lines[i]!);
+				assert.ok(lineWidth <= width, `Line ${i} has width ${lineWidth}, exceeds max ${width}`);
+			}
+		});
+	});
 });
--- a/packages/tui/test/wrap-ansi.test.ts
+++ b/packages/tui/test/wrap-ansi.test.ts
@ -1,112 +1,122 @@
 import assert from "node:assert";
 import { describe, it } from "node:test";
-import { Chalk } from "chalk";
-
-// We'll implement these
-import { applyBackgroundToLine, visibleWidth, wrapTextWithAnsi } from "../src/utils.js";
-
-const chalk = new Chalk({ level: 3 });
+import { visibleWidth, wrapTextWithAnsi } from "../src/utils.js";

 describe("wrapTextWithAnsi", () => {
-	it("wraps plain text at word boundaries", () => {
-		const text = "hello world this is a test";
-		const lines = wrapTextWithAnsi(text, 15);
+	describe("underline styling", () => {
+		it("should not apply underline style before the styled text", () => {
+			const underlineOn = "\x1b[4m";
+			const underlineOff = "\x1b[24m";
+			const url = "https://example.com/very/long/path/that/will/wrap";
+			const text = `read this thread ${underlineOn}${url}${underlineOff}`;

-		assert.strictEqual(lines.length, 2);
-		assert.strictEqual(lines[0], "hello world");
-		assert.strictEqual(lines[1], "this is a test");
+			const wrapped = wrapTextWithAnsi(text, 40);
+
+			// First line should NOT contain underline code - it's just "read this thread "
+			assert.strictEqual(wrapped[0], "read this thread ");
+
+			// Second line should start with underline, have URL content
+			assert.strictEqual(wrapped[1].startsWith(underlineOn), true);
+			assert.ok(wrapped[1].includes("https://"));
+		});
+
+		it("should not bleed underline to padding - each line should end with reset for underline only", () => {
+			const underlineOn = "\x1b[4m";
+			const underlineOff = "\x1b[24m";
+			const url = "https://example.com/very/long/path/that/will/definitely/wrap";
+			const text = `prefix ${underlineOn}${url}${underlineOff} suffix`;
+
+			const wrapped = wrapTextWithAnsi(text, 30);
+
+			// Middle lines (with underlined content) should end with underline-off, not full reset
+			// Line 1 and 2 contain underlined URL parts
+			for (let i = 1; i < wrapped.length - 1; i++) {
+				const line = wrapped[i];
+				if (line.includes(underlineOn)) {
+					// Should end with underline off, NOT full reset
+					assert.strictEqual(line.endsWith(underlineOff), true);
+					assert.strictEqual(line.endsWith("\x1b[0m"), false);
+				}
+			}
+		});
 	});

-	it("preserves ANSI codes across wrapped lines", () => {
-		const text = chalk.bold("hello world this is bold text");
-		const lines = wrapTextWithAnsi(text, 20);
+	describe("background color preservation", () => {
+		it("should preserve background color across wrapped lines without full reset", () => {
+			const bgBlue = "\x1b[44m";
+			const reset = "\x1b[0m";
+			const text = `${bgBlue}hello world this is blue background text${reset}`;

-		// Should have bold code at start of each line
-		assert.ok(lines[0].includes("\x1b[1m"));
-		assert.ok(lines[1].includes("\x1b[1m"));
+			const wrapped = wrapTextWithAnsi(text, 15);

-		// Each line should be <= 20 visible chars
-		assert.ok(visibleWidth(lines[0]) <= 20);
-		assert.ok(visibleWidth(lines[1]) <= 20);
+			// Each line should have background color
+			for (const line of wrapped) {
+				assert.ok(line.includes(bgBlue));
+			}
+
+			// Middle lines should NOT end with full reset (kills background for padding)
+			for (let i = 0; i < wrapped.length - 1; i++) {
+				assert.strictEqual(wrapped[i].endsWith("\x1b[0m"), false);
+			}
+		});
+
+		it("should reset underline but preserve background when wrapping underlined text inside background", () => {
+			const underlineOn = "\x1b[4m";
+			const underlineOff = "\x1b[24m";
+			const reset = "\x1b[0m";
+
+			const text = `\x1b[41mprefix ${underlineOn}UNDERLINED_CONTENT_THAT_WRAPS${underlineOff} suffix${reset}`;
+
+			const wrapped = wrapTextWithAnsi(text, 20);
+
+			// All lines should have background color 41 (either as \x1b[41m or combined like \x1b[4;41m)
+			for (const line of wrapped) {
+				const hasBgColor = line.includes("[41m") || line.includes(";41m") || line.includes("[41;");
+				assert.ok(hasBgColor);
+			}
+
+			// Lines with underlined content should use underline-off at end, not full reset
+			for (let i = 0; i < wrapped.length - 1; i++) {
+				const line = wrapped[i];
+				// If this line has underline on, it should end with underline off (not full reset)
+				if (
+					(line.includes("[4m") || line.includes("[4;") || line.includes(";4m")) &&
+					!line.includes(underlineOff)
+				) {
+					assert.strictEqual(line.endsWith(underlineOff), true);
+					assert.strictEqual(line.endsWith("\x1b[0m"), false);
+				}
+			}
+		});
 	});

-	it("handles text with resets", () => {
-		const text = chalk.bold("bold ") + "normal " + chalk.cyan("cyan");
-		const lines = wrapTextWithAnsi(text, 30);
+	describe("basic wrapping", () => {
+		it("should wrap plain text correctly", () => {
+			const text = "hello world this is a test";
+			const wrapped = wrapTextWithAnsi(text, 10);

-		assert.strictEqual(lines.length, 1);
-		// Should contain the reset code from chalk
-		assert.ok(lines[0].includes("\x1b["));
-	});
+			assert.ok(wrapped.length > 1);
+			for (const line of wrapped) {
+				assert.ok(visibleWidth(line) <= 10);
+			}
+		});

-	it("does NOT pad lines", () => {
-		const text = "hello";
-		const lines = wrapTextWithAnsi(text, 20);
+		it("should preserve color codes across wraps", () => {
+			const red = "\x1b[31m";
+			const reset = "\x1b[0m";
+			const text = `${red}hello world this is red${reset}`;

-		assert.strictEqual(lines.length, 1);
-		assert.strictEqual(visibleWidth(lines[0]), 5); // NOT 20
-	});
+			const wrapped = wrapTextWithAnsi(text, 10);

-	it("handles empty text", () => {
-		const lines = wrapTextWithAnsi("", 20);
-		assert.strictEqual(lines.length, 1);
-		assert.strictEqual(lines[0], "");
-	});
+			// Each continuation line should start with red code
+			for (let i = 1; i < wrapped.length; i++) {
+				assert.strictEqual(wrapped[i].startsWith(red), true);
+			}

-	it("handles newlines", () => {
-		const text = "line1\nline2\nline3";
-		const lines = wrapTextWithAnsi(text, 20);
-
-		assert.strictEqual(lines.length, 3);
-		assert.strictEqual(lines[0], "line1");
-		assert.strictEqual(lines[1], "line2");
-		assert.strictEqual(lines[2], "line3");
-	});
-});
-
-describe("applyBackgroundToLine", () => {
-	const greenBg = (text: string) => chalk.bgGreen(text);
-
-	it("applies background to plain text and pads to width", () => {
-		const line = "hello";
-		const result = applyBackgroundToLine(line, 20, greenBg);
-
-		// Should be exactly 20 visible chars
-		const stripped = result.replace(/\x1b\[[0-9;]*m/g, "");
-		assert.strictEqual(stripped.length, 20);
-
-		// Should have background codes
-		assert.ok(result.includes("\x1b[48") || result.includes("\x1b[42m"));
-		assert.ok(result.includes("\x1b[49m"));
-	});
-
-	it("handles text with ANSI codes and resets", () => {
-		const line = chalk.bold("hello") + " world";
-		const result = applyBackgroundToLine(line, 20, greenBg);
-
-		// Should be exactly 20 visible chars
-		const stripped = result.replace(/\x1b\[[0-9;]*m/g, "");
-		assert.strictEqual(stripped.length, 20);
-
-		// Should still have bold
-		assert.ok(result.includes("\x1b[1m"));
-
-		// Should have background throughout (even after resets)
-		assert.ok(result.includes("\x1b[48") || result.includes("\x1b[42m"));
-	});
-
-	it("handles text with 0m resets by reapplying background", () => {
-		// Simulate: bold text + reset + normal text
-		const line = "\x1b[1mhello\x1b[0m world";
-		const result = applyBackgroundToLine(line, 20, greenBg);
-
-		// Should NOT have black cells (spaces without background)
-		// Pattern we DON'T want: 49m or 0m followed by spaces before bg reapplied
-		const blackCellPattern = /(\x1b\[49m|\x1b\[0m)\s+\x1b\[48;2/;
-		assert.ok(!blackCellPattern.test(result), `Found black cells in: ${JSON.stringify(result)}`);
-
-		// Should be exactly 20 chars
-		const stripped = result.replace(/\x1b\[[0-9;]*m/g, "");
-		assert.strictEqual(stripped.length, 20);
+			// Middle lines should not end with full reset
+			for (let i = 0; i < wrapped.length - 1; i++) {
+				assert.strictEqual(wrapped[i].endsWith("\x1b[0m"), false);
+			}
+		});
 	});
 });
--- a/packages/tui/vitest.config.ts
+++ b/packages/tui/vitest.config.ts
@ -0,0 +1,7 @@
+import { defineConfig } from "vitest/config";
+
+export default defineConfig({
+	test: {
+		include: ["test/wrap-ansi.test.ts"],
+	},
+});
--- a/packages/web-ui/README.md
+++ b/packages/web-ui/README.md
@ -2,7 +2,7 @@

 Reusable web UI components for building AI chat interfaces powered by [@mariozechner/pi-ai](../ai).

-Built with [mini-lit](https://github.com/mariozechner/mini-lit) web components and Tailwind CSS v4.
+ Built with [mini-lit](https://github.com/badlogic/mini-lit) web components and Tailwind CSS v4.

 ## Features

--- a/packages/web-ui/example/README.md
+++ b/packages/web-ui/example/README.md
@ -58,4 +58,4 @@ example/

 - [Pi Web UI Documentation](../README.md)
 - [Pi AI Documentation](../../ai/README.md)
- [Mini Lit Documentation](https://github.com/mariozechner/mini-lit)
+- [Mini Lit Documentation](https://github.com/badlogic/mini-lit)
--- a/packages/web-ui/example/package.json
+++ b/packages/web-ui/example/package.json
@ -1,6 +1,6 @@
 {
  "name": "pi-web-ui-example",
-  "version": "1.0.5",
+  "version": "1.1.2",
  "private": true,
  "type": "module",
  "scripts": {
--- a/packages/web-ui/example/src/main.ts
+++ b/packages/web-ui/example/src/main.ts
@ -131,6 +131,7 @@ const saveSession = async () => {
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0,
 					output: 0,
--- a/packages/web-ui/package.json
+++ b/packages/web-ui/package.json
@ -1,6 +1,6 @@
 {
 	"name": "@mariozechner/pi-web-ui",
-	"version": "0.12.9",
+	"version": "0.13.2",
 	"description": "Reusable web UI components for AI chat interfaces powered by @mariozechner/pi-ai",
 	"type": "module",
 	"main": "dist/index.js",
@ -18,8 +18,8 @@
 	},
 	"dependencies": {
 		"@lmstudio/sdk": "^1.5.0",
-		"@mariozechner/pi-ai": "^0.12.9",
-		"@mariozechner/pi-tui": "^0.12.9",
+		"@mariozechner/pi-ai": "^0.13.2",
+		"@mariozechner/pi-tui": "^0.13.2",
 		"docx-preview": "^0.3.7",
 		"jszip": "^3.10.1",
 		"lucide": "^0.544.0",
--- a/packages/web-ui/src/agent/agent.ts
+++ b/packages/web-ui/src/agent/agent.ts
@ -308,6 +308,7 @@ export class Agent {
 						output: 0,
 						cacheRead: 0,
 						cacheWrite: 0,
+						totalTokens: 0,
 						cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 					},
 					stopReason: this.abortController?.signal.aborted ? "aborted" : "error",
--- a/packages/web-ui/src/agent/transports/AppTransport.ts
+++ b/packages/web-ui/src/agent/transports/AppTransport.ts
@ -46,6 +46,7 @@ function streamSimpleProxy(
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			timestamp: Date.now(),
--- a/packages/web-ui/src/components/AgentInterface.ts
+++ b/packages/web-ui/src/components/AgentInterface.ts
@ -266,6 +266,7 @@ export class AgentInterface extends LitElement {
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+					totalTokens: 0,
 					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 				} satisfies Usage,
 			);
--- a/packages/web-ui/src/storage/stores/sessions-store.ts
+++ b/packages/web-ui/src/storage/stores/sessions-store.ts
@ -101,6 +101,7 @@ export class SessionsStore extends Store {
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			thinkingLevel: state.thinkingLevel || "off",
--- a/packages/web-ui/src/storage/types.ts
+++ b/packages/web-ui/src/storage/types.ts
@ -118,6 +118,8 @@ export interface SessionMetadata {
 		cacheRead: number;
 		/** Total cache write tokens */
 		cacheWrite: number;
+		/** Total tokens processed */
+		totalTokens: number;
 		/** Total cost breakdown */
 		cost: {
 			input: number;
--- a/packages/web-ui/src/utils/test-sessions.ts
+++ b/packages/web-ui/src/utils/test-sessions.ts
@ -56,11 +56,13 @@ export const simpleHtml = {
 				output: 375,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.0030632000000000003,
 					output: 0.0015,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.0045632,
 				},
 			},
@ -89,11 +91,13 @@ export const simpleHtml = {
 				output: 162,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.003376,
 					output: 0.0006479999999999999,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.004024,
 				},
 			},
@ -159,11 +163,13 @@ export const longSession = {
 				output: 455,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.0030632000000000003,
 					output: 0.00182,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.004883200000000001,
 				},
 			},
@ -192,11 +198,13 @@ export const longSession = {
 				output: 147,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.0034384000000000003,
 					output: 0.000588,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.0040264,
 				},
 			},
@ -235,11 +243,13 @@ export const longSession = {
 				output: 96,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.0035656000000000004,
 					output: 0.000384,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.0039496,
 				},
 			},
@ -267,11 +277,13 @@ export const longSession = {
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0,
 				},
 			},
@ -312,11 +324,13 @@ export const longSession = {
 				output: 115,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.0049456000000000005,
 					output: 0.00045999999999999996,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.005405600000000001,
 				},
 			},
@ -348,11 +362,13 @@ export const longSession = {
 				output: 86,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.0050696000000000005,
 					output: 0.00034399999999999996,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.0054136,
 				},
 			},
@ -391,11 +407,13 @@ export const longSession = {
 				output: 294,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.005151200000000001,
 					output: 0.001176,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.006327200000000001,
 				},
 			},
@ -428,11 +446,13 @@ export const longSession = {
 				output: 159,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.0054152,
 					output: 0.000636,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.0060512000000000005,
 				},
 			},
@ -471,11 +491,13 @@ export const longSession = {
 				output: 379,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.005566400000000001,
 					output: 0.001516,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.007082400000000001,
 				},
 			},
@ -516,11 +538,13 @@ export const longSession = {
 				output: 537,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.005900000000000001,
 					output: 0.0021479999999999997,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.008048,
 				},
 			},
@ -547,11 +571,13 @@ export const longSession = {
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0,
 				},
 			},
@ -583,11 +609,13 @@ export const longSession = {
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0,
 				},
 			},
@ -627,11 +655,13 @@ export const longSession = {
 				output: 492,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.024597,
 					output: 0.00738,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.031977,
 				},
 			},
@ -672,11 +702,13 @@ export const longSession = {
 				output: 213,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.026211,
 					output: 0.003195,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.029406,
 				},
 			},
@ -709,11 +741,13 @@ export const longSession = {
 				output: 134,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.026958,
 					output: 0.00201,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.028968,
 				},
 			},
@ -752,11 +786,13 @@ export const longSession = {
 				output: 331,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.02739,
 					output: 0.004965,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.032355,
 				},
 			},
@ -788,11 +824,13 @@ export const longSession = {
 				output: 53,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.028443,
 					output: 0.000795,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.029238,
 				},
 			},
@ -831,11 +869,13 @@ export const longSession = {
 				output: 329,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.028623,
 					output: 0.004935,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.033558,
 				},
 			},
@ -867,11 +907,13 @@ export const longSession = {
 				output: 46,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.029670000000000002,
 					output: 0.00069,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.03036,
 				},
 			},
@ -897,11 +939,13 @@ export const longSession = {
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0,
 				},
 			},
@ -937,11 +981,13 @@ export const longSession = {
 				output: 285,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.029856,
 					output: 0.004275,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.034131,
 				},
 			},
@ -974,11 +1020,13 @@ export const longSession = {
 				output: 39,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.030831,
 					output: 0.000585,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.031416,
 				},
 			},
@ -1017,11 +1065,13 @@ export const longSession = {
 				output: 473,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.030993,
 					output: 0.007095000000000001,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.038088,
 				},
 			},
@ -1048,11 +1098,13 @@ export const longSession = {
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0,
 				},
 			},
@ -1088,11 +1140,13 @@ export const longSession = {
 				output: 348,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.032556,
 					output: 0.00522,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.037776000000000004,
 				},
 			},
@ -1133,11 +1187,13 @@ export const longSession = {
 				output: 310,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.033942,
 					output: 0.0046500000000000005,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.038592,
 				},
 			},
@ -1170,11 +1226,13 @@ export const longSession = {
 				output: 53,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.034977,
 					output: 0.000795,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.035772,
 				},
 			},
@ -1213,11 +1271,13 @@ export const longSession = {
 				output: 423,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.035160000000000004,
 					output: 0.006345,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.041505,
 				},
 			},
@ -1258,11 +1318,13 @@ export const longSession = {
 				output: 193,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.036651,
 					output: 0.002895,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.039546000000000005,
 				},
 			},
@ -1295,11 +1357,13 @@ export const longSession = {
 				output: 104,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.037557,
 					output: 0.00156,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.039117,
 				},
 			},
@ -1334,11 +1398,13 @@ export const longSession = {
 				output: 146,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.037911,
 					output: 0.00219,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.040101,
 				},
 			},
@ -1371,11 +1437,13 @@ export const longSession = {
 				output: 63,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.038535,
 					output: 0.000945,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.03948,
 				},
 			},
@ -1401,11 +1469,13 @@ export const longSession = {
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0,
 				},
 			},
@ -1445,11 +1515,13 @@ export const longSession = {
 				output: 324,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.038823,
 					output: 0.00486,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.043683,
 				},
 			},
@ -1490,11 +1562,13 @@ export const longSession = {
 				output: 385,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.040605,
 					output: 0.005775,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.046380000000000005,
 				},
 			},
@ -1531,11 +1605,13 @@ export const longSession = {
 				output: 436,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.043749,
 					output: 0.00654,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.050289,
 				},
 			},
@ -1571,11 +1647,13 @@ export const longSession = {
 				output: 685,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.045105,
 					output: 0.010275,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.05538,
 				},
 			},
@ -1615,11 +1693,13 @@ export const longSession = {
 				output: 683,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.047214,
 					output: 0.010245,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.057458999999999996,
 				},
 			},
@ -1664,11 +1744,13 @@ export const longSession = {
 				output: 3462,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.058758000000000005,
 					output: 0.051930000000000004,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.11068800000000001,
 				},
 			},
@ -1697,11 +1779,13 @@ export const longSession = {
 				output: 223,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.069195,
 					output: 0.003345,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.07254000000000001,
 				},
 			},
@ -1740,11 +1824,13 @@ export const longSession = {
 				output: 335,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.06991800000000001,
 					output: 0.005025,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.07494300000000001,
 				},
 			},
@ -1785,11 +1871,13 @@ export const longSession = {
 				output: 499,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.075036,
 					output: 0.007485,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.08252100000000001,
 				},
 			},
@ -1830,11 +1918,13 @@ export const longSession = {
 				output: 462,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.078387,
 					output: 0.00693,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.085317,
 				},
 			},
@ -1875,11 +1965,13 @@ export const longSession = {
 				output: 431,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.079914,
 					output: 0.006465,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.086379,
 				},
 			},
@ -1920,11 +2012,13 @@ export const longSession = {
 				output: 335,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.083382,
 					output: 0.005025,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.088407,
 				},
 			},
@ -1969,11 +2063,13 @@ export const longSession = {
 				output: 1209,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.08655600000000001,
 					output: 0.018135000000000002,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.104691,
 				},
 			},
@ -2002,11 +2098,13 @@ export const longSession = {
 				output: 249,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.09024,
 					output: 0.003735,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.093975,
 				},
 			},
@ -2045,11 +2143,13 @@ export const longSession = {
 				output: 279,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.091008,
 					output: 0.004185,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.095193,
 				},
 			},
@ -2078,11 +2178,13 @@ export const longSession = {
 				output: 54,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.091893,
 					output: 0.0008100000000000001,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.09270300000000001,
 				},
 			},
@ -2121,11 +2223,13 @@ export const longSession = {
 				output: 162,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.092097,
 					output: 0.00243,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.094527,
 				},
 			},
@ -2155,11 +2259,13 @@ export const longSession = {
 				output: 67,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.09271800000000001,
 					output: 0.001005,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.09372300000000001,
 				},
 			},
@ -2199,11 +2305,13 @@ export const longSession = {
 				output: 182,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.092937,
 					output: 0.0027300000000000002,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.095667,
 				},
 			},
@ -2233,11 +2341,13 @@ export const longSession = {
 				output: 33,
 				cacheRead: 0,
 				cacheWrite: 0,
+				totalTokens: 0,
 				cost: {
 					input: 0.093642,
 					output: 0.000495,
 					cacheRead: 0,
 					cacheWrite: 0,
+				totalTokens: 0,
 					total: 0.094137,
 				},
 			},