move pi-mono into companion-cloud as apps/companion-os

- Copy all pi-mono source into apps/companion-os/ - Update Dockerfile to COPY pre-built binary instead of downloading from GitHub Releases - Update deploy-staging.yml to build pi from source (bun compile) before Docker build - Add apps/companion-os/** to path triggers - No more cross-repo dispatch needed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-19 18:04:45 +00:00 · 2026-03-07 09:22:50 -08:00 · 2026-03-07 09:22:50 -08:00 · 0250f72976
commit 0250f72976
579 changed files with 206942 additions and 0 deletions
--- a/packages/coding-agent/test/agent-session-auto-compaction-queue.test.ts
+++ b/packages/coding-agent/test/agent-session-auto-compaction-queue.test.ts
@ -0,0 +1,173 @@
+import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Agent } from "@mariozechner/pi-agent-core";
+import { type AssistantMessage, getModel } from "@mariozechner/pi-ai";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { AgentSession } from "../src/core/agent-session.js";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import { createTestResourceLoader } from "./utilities.js";
+
+vi.mock("../src/core/compaction/index.js", () => ({
+  calculateContextTokens: () => 0,
+  collectEntriesForBranchSummary: () => ({
+    entries: [],
+    commonAncestorId: null,
+  }),
+  compact: async () => ({
+    summary: "compacted",
+    firstKeptEntryId: "entry-1",
+    tokensBefore: 100,
+    details: {},
+  }),
+  estimateContextTokens: () => ({
+    tokens: 0,
+    usageTokens: 0,
+    trailingTokens: 0,
+    lastUsageIndex: -1,
+  }),
+  generateBranchSummary: async () => ({
+    summary: "",
+    aborted: false,
+    readFiles: [],
+    modifiedFiles: [],
+  }),
+  prepareCompaction: () => ({ dummy: true }),
+  shouldCompact: () => false,
+}));
+
+describe("AgentSession auto-compaction queue resume", () => {
+  let session: AgentSession;
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = join(tmpdir(), `pi-auto-compaction-queue-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    vi.useFakeTimers();
+
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+    const agent = new Agent({
+      initialState: {
+        model,
+        systemPrompt: "Test",
+        tools: [],
+      },
+    });
+
+    const sessionManager = SessionManager.inMemory();
+    const settingsManager = SettingsManager.create(tempDir, tempDir);
+    const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+    authStorage.setRuntimeApiKey("anthropic", "test-key");
+    const modelRegistry = new ModelRegistry(authStorage, tempDir);
+
+    session = new AgentSession({
+      agent,
+      sessionManager,
+      settingsManager,
+      cwd: tempDir,
+      modelRegistry,
+      resourceLoader: createTestResourceLoader(),
+    });
+  });
+
+  afterEach(() => {
+    session.dispose();
+    vi.useRealTimers();
+    vi.restoreAllMocks();
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true });
+    }
+  });
+
+  it("should resume after threshold compaction when only agent-level queued messages exist", async () => {
+    session.agent.followUp({
+      role: "custom",
+      customType: "test",
+      content: [{ type: "text", text: "Queued custom" }],
+      display: false,
+      timestamp: Date.now(),
+    });
+
+    expect(session.pendingMessageCount).toBe(0);
+    expect(session.agent.hasQueuedMessages()).toBe(true);
+
+    const continueSpy = vi.spyOn(session.agent, "continue").mockResolvedValue();
+
+    const runAutoCompaction = (
+      session as unknown as {
+        _runAutoCompaction: (
+          reason: "overflow" | "threshold",
+          willRetry: boolean,
+        ) => Promise<void>;
+      }
+    )._runAutoCompaction.bind(session);
+
+    await runAutoCompaction("threshold", false);
+    await vi.advanceTimersByTimeAsync(100);
+
+    expect(continueSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it("should not compact repeatedly after overflow recovery already attempted", async () => {
+    const model = session.model!;
+    const overflowMessage: AssistantMessage = {
+      role: "assistant",
+      content: [{ type: "text", text: "" }],
+      api: model.api,
+      provider: model.provider,
+      model: model.id,
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "error",
+      errorMessage: "prompt is too long",
+      timestamp: Date.now(),
+    };
+
+    const runAutoCompactionSpy = vi
+      .spyOn(
+        session as unknown as {
+          _runAutoCompaction: (
+            reason: "overflow" | "threshold",
+            willRetry: boolean,
+          ) => Promise<void>;
+        },
+        "_runAutoCompaction",
+      )
+      .mockResolvedValue();
+
+    const events: Array<{ type: string; errorMessage?: string }> = [];
+    session.subscribe((event) => {
+      if (event.type === "auto_compaction_end") {
+        events.push({ type: event.type, errorMessage: event.errorMessage });
+      }
+    });
+
+    const checkCompaction = (
+      session as unknown as {
+        _checkCompaction: (
+          assistantMessage: AssistantMessage,
+          skipAbortedCheck?: boolean,
+        ) => Promise<void>;
+      }
+    )._checkCompaction.bind(session);
+
+    await checkCompaction(overflowMessage);
+    await checkCompaction({ ...overflowMessage, timestamp: Date.now() + 1 });
+
+    expect(runAutoCompactionSpy).toHaveBeenCalledTimes(1);
+    expect(events).toContainEqual({
+      type: "auto_compaction_end",
+      errorMessage:
+        "Context overflow recovery failed after one compact-and-retry attempt. Try reducing context or switching to a larger-context model.",
+    });
+  });
+});
--- a/packages/coding-agent/test/agent-session-branching.test.ts
+++ b/packages/coding-agent/test/agent-session-branching.test.ts
@ -0,0 +1,159 @@
+/**
+ * Tests for AgentSession forking behavior.
+ *
+ * These tests verify:
+ * - Forking from a single message works
+ * - Forking in --no-session mode (in-memory only)
+ * - getUserMessagesForForking returns correct entries
+ */
+
+import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Agent } from "@mariozechner/pi-agent-core";
+import { getModel } from "@mariozechner/pi-ai";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { AgentSession } from "../src/core/agent-session.js";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import { codingTools } from "../src/core/tools/index.js";
+import { API_KEY, createTestResourceLoader } from "./utilities.js";
+
+describe.skipIf(!API_KEY)("AgentSession forking", () => {
+  let session: AgentSession;
+  let tempDir: string;
+  let sessionManager: SessionManager;
+
+  beforeEach(() => {
+    // Create temp directory for session files
+    tempDir = join(tmpdir(), `pi-branching-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+  });
+
+  afterEach(async () => {
+    if (session) {
+      session.dispose();
+    }
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true });
+    }
+  });
+
+  function createSession(noSession: boolean = false) {
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+    const agent = new Agent({
+      getApiKey: () => API_KEY,
+      initialState: {
+        model,
+        systemPrompt:
+          "You are a helpful assistant. Be extremely concise, reply with just a few words.",
+        tools: codingTools,
+      },
+    });
+
+    sessionManager = noSession
+      ? SessionManager.inMemory()
+      : SessionManager.create(tempDir);
+    const settingsManager = SettingsManager.create(tempDir, tempDir);
+    const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+    const modelRegistry = new ModelRegistry(authStorage, tempDir);
+
+    session = new AgentSession({
+      agent,
+      sessionManager,
+      settingsManager,
+      cwd: tempDir,
+      modelRegistry,
+      resourceLoader: createTestResourceLoader(),
+    });
+
+    // Must subscribe to enable session persistence
+    session.subscribe(() => {});
+
+    return session;
+  }
+
+  it("should allow forking from single message", async () => {
+    createSession();
+
+    // Send one message
+    await session.prompt("Say hello");
+    await session.agent.waitForIdle();
+
+    // Should have exactly 1 user message available for forking
+    const userMessages = session.getUserMessagesForForking();
+    expect(userMessages.length).toBe(1);
+    expect(userMessages[0].text).toBe("Say hello");
+
+    // Fork from the first message
+    const result = await session.fork(userMessages[0].entryId);
+    expect(result.selectedText).toBe("Say hello");
+    expect(result.cancelled).toBe(false);
+
+    // After forking, conversation should be empty (forked before the first message)
+    expect(session.messages.length).toBe(0);
+
+    // Session file path should be set, but file is created lazily after first assistant message
+    expect(session.sessionFile).not.toBeNull();
+    expect(existsSync(session.sessionFile!)).toBe(false);
+  });
+
+  it("should support in-memory forking in --no-session mode", async () => {
+    createSession(true);
+
+    // Verify sessions are disabled
+    expect(session.sessionFile).toBeUndefined();
+
+    // Send one message
+    await session.prompt("Say hi");
+    await session.agent.waitForIdle();
+
+    // Should have 1 user message
+    const userMessages = session.getUserMessagesForForking();
+    expect(userMessages.length).toBe(1);
+
+    // Verify we have messages before forking
+    expect(session.messages.length).toBeGreaterThan(0);
+
+    // Fork from the first message
+    const result = await session.fork(userMessages[0].entryId);
+    expect(result.selectedText).toBe("Say hi");
+    expect(result.cancelled).toBe(false);
+
+    // After forking, conversation should be empty
+    expect(session.messages.length).toBe(0);
+
+    // Session file should still be undefined (no file created)
+    expect(session.sessionFile).toBeUndefined();
+  });
+
+  it("should fork from middle of conversation", async () => {
+    createSession();
+
+    // Send multiple messages
+    await session.prompt("Say one");
+    await session.agent.waitForIdle();
+
+    await session.prompt("Say two");
+    await session.agent.waitForIdle();
+
+    await session.prompt("Say three");
+    await session.agent.waitForIdle();
+
+    // Should have 3 user messages
+    const userMessages = session.getUserMessagesForForking();
+    expect(userMessages.length).toBe(3);
+
+    // Fork from second message (keeps first message + response)
+    const secondMessage = userMessages[1];
+    const result = await session.fork(secondMessage.entryId);
+    expect(result.selectedText).toBe("Say two");
+
+    // After forking, should have first user message + assistant response
+    expect(session.messages.length).toBe(2);
+    expect(session.messages[0].role).toBe("user");
+    expect(session.messages[1].role).toBe("assistant");
+  });
+});
--- a/packages/coding-agent/test/agent-session-compaction.test.ts
+++ b/packages/coding-agent/test/agent-session-compaction.test.ts
@ -0,0 +1,213 @@
+/**
+ * E2E tests for AgentSession compaction behavior.
+ *
+ * These tests use real LLM calls (no mocking) to verify:
+ * - Manual compaction works correctly
+ * - Session persistence during compaction
+ * - Compaction entry is saved to session file
+ */
+
+import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Agent } from "@mariozechner/pi-agent-core";
+import { getModel } from "@mariozechner/pi-ai";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import {
+  AgentSession,
+  type AgentSessionEvent,
+} from "../src/core/agent-session.js";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import { codingTools } from "../src/core/tools/index.js";
+import { API_KEY, createTestResourceLoader } from "./utilities.js";
+
+describe.skipIf(!API_KEY)("AgentSession compaction e2e", () => {
+  let session: AgentSession;
+  let tempDir: string;
+  let sessionManager: SessionManager;
+  let events: AgentSessionEvent[];
+
+  beforeEach(() => {
+    // Create temp directory for session files
+    tempDir = join(tmpdir(), `pi-compaction-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    // Track events
+    events = [];
+  });
+
+  afterEach(async () => {
+    if (session) {
+      session.dispose();
+    }
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true });
+    }
+  });
+
+  function createSession(inMemory = false) {
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+    const agent = new Agent({
+      getApiKey: () => API_KEY,
+      initialState: {
+        model,
+        systemPrompt: "You are a helpful assistant. Be concise.",
+        tools: codingTools,
+      },
+    });
+
+    sessionManager = inMemory
+      ? SessionManager.inMemory()
+      : SessionManager.create(tempDir);
+    const settingsManager = SettingsManager.create(tempDir, tempDir);
+    // Use minimal keepRecentTokens so small test conversations have something to summarize
+    settingsManager.applyOverrides({ compaction: { keepRecentTokens: 1 } });
+    const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+    const modelRegistry = new ModelRegistry(authStorage);
+
+    session = new AgentSession({
+      agent,
+      sessionManager,
+      settingsManager,
+      cwd: tempDir,
+      modelRegistry,
+      resourceLoader: createTestResourceLoader(),
+    });
+
+    // Subscribe to track events
+    session.subscribe((event) => {
+      events.push(event);
+    });
+
+    return session;
+  }
+
+  it("should trigger manual compaction via compact()", async () => {
+    createSession();
+
+    // Send a few prompts to build up history
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await session.prompt("What is 3+3? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    // Manually compact
+    const result = await session.compact();
+
+    expect(result.summary).toBeDefined();
+    expect(result.summary.length).toBeGreaterThan(0);
+    expect(result.tokensBefore).toBeGreaterThan(0);
+
+    // Verify messages were compacted (should have summary + recent)
+    const messages = session.messages;
+    expect(messages.length).toBeGreaterThan(0);
+
+    // First message should be the summary (a user message with summary content)
+    const firstMsg = messages[0];
+    expect(firstMsg.role).toBe("compactionSummary");
+  }, 120000);
+
+  it("should maintain valid session state after compaction", async () => {
+    createSession();
+
+    // Build up history
+    await session.prompt("What is the capital of France? One word answer.");
+    await session.agent.waitForIdle();
+
+    await session.prompt("What is the capital of Germany? One word answer.");
+    await session.agent.waitForIdle();
+
+    // Compact
+    await session.compact();
+
+    // Session should still be usable
+    await session.prompt("What is the capital of Italy? One word answer.");
+    await session.agent.waitForIdle();
+
+    // Should have messages after compaction
+    expect(session.messages.length).toBeGreaterThan(0);
+
+    // The agent should have responded
+    const assistantMessages = session.messages.filter(
+      (m) => m.role === "assistant",
+    );
+    expect(assistantMessages.length).toBeGreaterThan(0);
+  }, 180000);
+
+  it("should persist compaction to session file", async () => {
+    createSession();
+
+    await session.prompt("Say hello");
+    await session.agent.waitForIdle();
+
+    await session.prompt("Say goodbye");
+    await session.agent.waitForIdle();
+
+    // Compact
+    await session.compact();
+
+    // Load entries from session manager
+    const entries = sessionManager.getEntries();
+
+    // Should have a compaction entry
+    const compactionEntries = entries.filter((e) => e.type === "compaction");
+    expect(compactionEntries.length).toBe(1);
+
+    const compaction = compactionEntries[0];
+    expect(compaction.type).toBe("compaction");
+    if (compaction.type === "compaction") {
+      expect(compaction.summary.length).toBeGreaterThan(0);
+      expect(typeof compaction.firstKeptEntryId).toBe("string");
+      expect(compaction.tokensBefore).toBeGreaterThan(0);
+    }
+  }, 120000);
+
+  it("should work with --no-session mode (in-memory only)", async () => {
+    createSession(true); // in-memory mode
+
+    // Send prompts
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await session.prompt("What is 3+3? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    // Compact should work even without file persistence
+    const result = await session.compact();
+
+    expect(result.summary).toBeDefined();
+    expect(result.summary.length).toBeGreaterThan(0);
+
+    // In-memory entries should have the compaction
+    const entries = sessionManager.getEntries();
+    const compactionEntries = entries.filter((e) => e.type === "compaction");
+    expect(compactionEntries.length).toBe(1);
+  }, 120000);
+
+  it("should emit correct events during auto-compaction", async () => {
+    createSession();
+
+    // Build some history
+    await session.prompt("Say hello");
+    await session.agent.waitForIdle();
+
+    // Manually trigger compaction and check events
+    await session.compact();
+
+    // Check that no auto_compaction events were emitted for manual compaction
+    const autoCompactionEvents = events.filter(
+      (e) =>
+        e.type === "auto_compaction_start" || e.type === "auto_compaction_end",
+    );
+    // Manual compaction doesn't emit auto_compaction events
+    expect(autoCompactionEvents.length).toBe(0);
+
+    // Regular events should have been emitted
+    const messageEndEvents = events.filter((e) => e.type === "message_end");
+    expect(messageEndEvents.length).toBeGreaterThan(0);
+  }, 120000);
+});
--- a/packages/coding-agent/test/agent-session-concurrent.test.ts
+++ b/packages/coding-agent/test/agent-session-concurrent.test.ts
@ -0,0 +1,402 @@
+/**
+ * Tests for AgentSession concurrent prompt guard.
+ */
+
+import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Agent } from "@mariozechner/pi-agent-core";
+import {
+  type AssistantMessage,
+  type AssistantMessageEvent,
+  EventStream,
+  getModel,
+} from "@mariozechner/pi-ai";
+import { Type } from "@sinclair/typebox";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { AgentSession } from "../src/core/agent-session.js";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import { createTestResourceLoader } from "./utilities.js";
+
+// Mock stream that mimics AssistantMessageEventStream
+class MockAssistantStream extends EventStream<
+  AssistantMessageEvent,
+  AssistantMessage
+> {
+  constructor() {
+    super(
+      (event) => event.type === "done" || event.type === "error",
+      (event) => {
+        if (event.type === "done") return event.message;
+        if (event.type === "error") return event.error;
+        throw new Error("Unexpected event type");
+      },
+    );
+  }
+}
+
+function createAssistantMessage(text: string): AssistantMessage {
+  return {
+    role: "assistant",
+    content: [{ type: "text", text }],
+    api: "anthropic-messages",
+    provider: "anthropic",
+    model: "mock",
+    usage: {
+      input: 0,
+      output: 0,
+      cacheRead: 0,
+      cacheWrite: 0,
+      totalTokens: 0,
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+    },
+    stopReason: "stop",
+    timestamp: Date.now(),
+  };
+}
+
+describe("AgentSession concurrent prompt guard", () => {
+  let session: AgentSession;
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = join(tmpdir(), `pi-concurrent-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+  });
+
+  afterEach(async () => {
+    if (session) {
+      session.dispose();
+    }
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true });
+    }
+  });
+
+  function createSession() {
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+    let abortSignal: AbortSignal | undefined;
+
+    // Use a stream function that responds to abort
+    const agent = new Agent({
+      getApiKey: () => "test-key",
+      initialState: {
+        model,
+        systemPrompt: "Test",
+        tools: [],
+      },
+      streamFn: (_model, _context, options) => {
+        abortSignal = options?.signal;
+        const stream = new MockAssistantStream();
+        queueMicrotask(() => {
+          stream.push({ type: "start", partial: createAssistantMessage("") });
+          const checkAbort = () => {
+            if (abortSignal?.aborted) {
+              stream.push({
+                type: "error",
+                reason: "aborted",
+                error: createAssistantMessage("Aborted"),
+              });
+            } else {
+              setTimeout(checkAbort, 5);
+            }
+          };
+          checkAbort();
+        });
+        return stream;
+      },
+    });
+
+    const sessionManager = SessionManager.inMemory();
+    const settingsManager = SettingsManager.create(tempDir, tempDir);
+    const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+    const modelRegistry = new ModelRegistry(authStorage, tempDir);
+    // Set a runtime API key so validation passes
+    authStorage.setRuntimeApiKey("anthropic", "test-key");
+
+    session = new AgentSession({
+      agent,
+      sessionManager,
+      settingsManager,
+      cwd: tempDir,
+      modelRegistry,
+      resourceLoader: createTestResourceLoader(),
+    });
+
+    return session;
+  }
+
+  it("should throw when prompt() called while streaming", async () => {
+    createSession();
+
+    // Start first prompt (don't await, it will block until abort)
+    const firstPrompt = session.prompt("First message");
+
+    // Wait a tick for isStreaming to be set
+    await new Promise((resolve) => setTimeout(resolve, 10));
+
+    // Verify we're streaming
+    expect(session.isStreaming).toBe(true);
+
+    // Second prompt should reject
+    await expect(session.prompt("Second message")).rejects.toThrow(
+      "Agent is already processing. Specify streamingBehavior ('steer' or 'followUp') to queue the message.",
+    );
+
+    // Cleanup
+    await session.abort();
+    await firstPrompt.catch(() => {}); // Ignore abort error
+  });
+
+  it("should allow steer() while streaming", async () => {
+    createSession();
+
+    // Start first prompt
+    const firstPrompt = session.prompt("First message");
+    await new Promise((resolve) => setTimeout(resolve, 10));
+
+    // steer should work while streaming
+    expect(() => session.steer("Steering message")).not.toThrow();
+    expect(session.pendingMessageCount).toBe(1);
+
+    // Cleanup
+    await session.abort();
+    await firstPrompt.catch(() => {});
+  });
+
+  it("should allow followUp() while streaming", async () => {
+    createSession();
+
+    // Start first prompt
+    const firstPrompt = session.prompt("First message");
+    await new Promise((resolve) => setTimeout(resolve, 10));
+
+    // followUp should work while streaming
+    expect(() => session.followUp("Follow-up message")).not.toThrow();
+    expect(session.pendingMessageCount).toBe(1);
+
+    // Cleanup
+    await session.abort();
+    await firstPrompt.catch(() => {});
+  });
+
+  it("should allow prompt() after previous completes", async () => {
+    // Create session with a stream that completes immediately
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+    const agent = new Agent({
+      getApiKey: () => "test-key",
+      initialState: {
+        model,
+        systemPrompt: "Test",
+        tools: [],
+      },
+      streamFn: () => {
+        const stream = new MockAssistantStream();
+        queueMicrotask(() => {
+          stream.push({ type: "start", partial: createAssistantMessage("") });
+          stream.push({
+            type: "done",
+            reason: "stop",
+            message: createAssistantMessage("Done"),
+          });
+        });
+        return stream;
+      },
+    });
+
+    const sessionManager = SessionManager.inMemory();
+    const settingsManager = SettingsManager.create(tempDir, tempDir);
+    const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+    const modelRegistry = new ModelRegistry(authStorage, tempDir);
+    authStorage.setRuntimeApiKey("anthropic", "test-key");
+
+    session = new AgentSession({
+      agent,
+      sessionManager,
+      settingsManager,
+      cwd: tempDir,
+      modelRegistry,
+      resourceLoader: createTestResourceLoader(),
+    });
+
+    // First prompt completes
+    await session.prompt("First message");
+
+    // Should not be streaming anymore
+    expect(session.isStreaming).toBe(false);
+
+    // Second prompt should work
+    await expect(session.prompt("Second message")).resolves.not.toThrow();
+  });
+
+  it("should persist message_end events in order with slow extension handlers", async () => {
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+    const tool = {
+      name: "dummy",
+      description: "Dummy tool",
+      label: "dummy",
+      parameters: Type.Object({ q: Type.String() }),
+      execute: async (_toolCallId: string, params: unknown) => {
+        const q =
+          typeof params === "object" && params !== null && "q" in params
+            ? String((params as { q: unknown }).q)
+            : "";
+        return {
+          content: [{ type: "text" as const, text: `result:${q}` }],
+          details: {},
+        };
+      },
+    };
+
+    const agent = new Agent({
+      getApiKey: () => "test-key",
+      initialState: {
+        model,
+        systemPrompt: "Test",
+        tools: [tool],
+      },
+      streamFn: async (_model, context) => {
+        const stream = new MockAssistantStream();
+        queueMicrotask(() => {
+          const hasToolResult = context.messages.some(
+            (message) => message.role === "toolResult",
+          );
+
+          if (hasToolResult) {
+            const message: AssistantMessage = {
+              role: "assistant",
+              content: [{ type: "text", text: "done" }],
+              api: "anthropic-messages",
+              provider: "anthropic",
+              model: "mock",
+              usage: {
+                input: 1,
+                output: 1,
+                cacheRead: 0,
+                cacheWrite: 0,
+                totalTokens: 2,
+                cost: {
+                  input: 0,
+                  output: 0,
+                  cacheRead: 0,
+                  cacheWrite: 0,
+                  total: 0,
+                },
+              },
+              stopReason: "stop",
+              timestamp: Date.now(),
+            };
+            stream.push({
+              type: "start",
+              partial: { ...message, content: [] },
+            });
+            stream.push({ type: "done", reason: "stop", message });
+            return;
+          }
+
+          const message: AssistantMessage = {
+            role: "assistant",
+            content: [
+              { type: "text", text: "calling tool" },
+              {
+                type: "toolCall",
+                id: "toolu_1",
+                name: "dummy",
+                arguments: { q: "x" },
+              },
+            ],
+            api: "anthropic-messages",
+            provider: "anthropic",
+            model: "mock",
+            usage: {
+              input: 1,
+              output: 1,
+              cacheRead: 0,
+              cacheWrite: 0,
+              totalTokens: 2,
+              cost: {
+                input: 0,
+                output: 0,
+                cacheRead: 0,
+                cacheWrite: 0,
+                total: 0,
+              },
+            },
+            stopReason: "toolUse",
+            timestamp: Date.now(),
+          };
+
+          stream.push({ type: "start", partial: { ...message, content: [] } });
+          stream.push({ type: "done", reason: "toolUse", message });
+        });
+        return stream;
+      },
+    });
+
+    const sessionManager = SessionManager.inMemory();
+    const settingsManager = SettingsManager.create(tempDir, tempDir);
+    const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+    const modelRegistry = new ModelRegistry(authStorage, tempDir);
+    authStorage.setRuntimeApiKey("anthropic", "test-key");
+
+    session = new AgentSession({
+      agent,
+      sessionManager,
+      settingsManager,
+      cwd: tempDir,
+      modelRegistry,
+      resourceLoader: createTestResourceLoader(),
+      baseToolsOverride: { dummy: tool },
+    });
+
+    const sessionWithRunner = session as unknown as {
+      _extensionRunner?: {
+        hasHandlers: (eventType: string) => boolean;
+        emit: (event: {
+          type: string;
+          message?: { role?: string };
+        }) => Promise<void>;
+        emitInput: (
+          text: string,
+          images: unknown,
+          source: "interactive" | "rpc" | "extension",
+        ) => Promise<{ action: "continue" }>;
+        emitBeforeAgentStart: (
+          prompt: string,
+          images: unknown,
+          systemPrompt: string,
+        ) => Promise<undefined>;
+      };
+    };
+    sessionWithRunner._extensionRunner = {
+      hasHandlers: () => false,
+      emit: async (event) => {
+        if (
+          event.type === "message_end" &&
+          event.message?.role === "assistant"
+        ) {
+          await new Promise((resolve) => setTimeout(resolve, 40));
+        }
+      },
+      emitInput: async () => ({ action: "continue" }),
+      emitBeforeAgentStart: async () => undefined,
+    };
+
+    await session.prompt("hi");
+    await session.agent.waitForIdle();
+    await new Promise((resolve) => setTimeout(resolve, 100));
+
+    const messageEntries = sessionManager
+      .getEntries()
+      .filter((entry) => entry.type === "message");
+    expect(messageEntries.map((entry) => entry.message.role)).toEqual([
+      "user",
+      "assistant",
+      "toolResult",
+      "assistant",
+    ]);
+  });
+});
--- a/packages/coding-agent/test/agent-session-dynamic-tools.test.ts
+++ b/packages/coding-agent/test/agent-session-dynamic-tools.test.ts
@ -0,0 +1,90 @@
+import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { getModel } from "@mariozechner/pi-ai";
+import { Type } from "@sinclair/typebox";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { DefaultResourceLoader } from "../src/core/resource-loader.js";
+import { createAgentSession } from "../src/core/sdk.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+
+describe("AgentSession dynamic tool registration", () => {
+  let tempDir: string;
+  let agentDir: string;
+
+  beforeEach(() => {
+    tempDir = join(
+      tmpdir(),
+      `pi-dynamic-tool-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    agentDir = join(tempDir, "agent");
+    mkdirSync(agentDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  it("refreshes tool registry when tools are registered after initialization", async () => {
+    const settingsManager = SettingsManager.create(tempDir, agentDir);
+    const sessionManager = SessionManager.inMemory();
+
+    const resourceLoader = new DefaultResourceLoader({
+      cwd: tempDir,
+      agentDir,
+      settingsManager,
+      extensionFactories: [
+        (pi) => {
+          pi.on("session_start", () => {
+            pi.registerTool({
+              name: "dynamic_tool",
+              label: "Dynamic Tool",
+              description: "Tool registered from session_start",
+              promptSnippet: "Run dynamic test behavior",
+              promptGuidelines: [
+                "Use dynamic_tool when the user asks for dynamic behavior tests.",
+              ],
+              parameters: Type.Object({}),
+              execute: async () => ({
+                content: [{ type: "text", text: "ok" }],
+                details: {},
+              }),
+            });
+          });
+        },
+      ],
+    });
+    await resourceLoader.reload();
+
+    const { session } = await createAgentSession({
+      cwd: tempDir,
+      agentDir,
+      model: getModel("anthropic", "claude-sonnet-4-5")!,
+      settingsManager,
+      sessionManager,
+      resourceLoader,
+    });
+
+    expect(session.getAllTools().map((tool) => tool.name)).not.toContain(
+      "dynamic_tool",
+    );
+
+    await session.bindExtensions({});
+
+    expect(session.getAllTools().map((tool) => tool.name)).toContain(
+      "dynamic_tool",
+    );
+    expect(session.getActiveToolNames()).toContain("dynamic_tool");
+    expect(session.systemPrompt).toContain(
+      "- dynamic_tool: Run dynamic test behavior",
+    );
+    expect(session.systemPrompt).toContain(
+      "- Use dynamic_tool when the user asks for dynamic behavior tests.",
+    );
+
+    session.dispose();
+  });
+});
--- a/packages/coding-agent/test/agent-session-retry.test.ts
+++ b/packages/coding-agent/test/agent-session-retry.test.ts
@ -0,0 +1,202 @@
+import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Agent, type AgentEvent } from "@mariozechner/pi-agent-core";
+import {
+  type AssistantMessage,
+  type AssistantMessageEvent,
+  EventStream,
+  getModel,
+} from "@mariozechner/pi-ai";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { AgentSession } from "../src/core/agent-session.js";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import { createTestResourceLoader } from "./utilities.js";
+
+class MockAssistantStream extends EventStream<
+  AssistantMessageEvent,
+  AssistantMessage
+> {
+  constructor() {
+    super(
+      (event) => event.type === "done" || event.type === "error",
+      (event) => {
+        if (event.type === "done") return event.message;
+        if (event.type === "error") return event.error;
+        throw new Error("Unexpected event type");
+      },
+    );
+  }
+}
+
+function createAssistantMessage(
+  text: string,
+  overrides?: Partial<AssistantMessage>,
+): AssistantMessage {
+  return {
+    role: "assistant",
+    content: [{ type: "text", text }],
+    api: "anthropic-messages",
+    provider: "anthropic",
+    model: "mock",
+    usage: {
+      input: 0,
+      output: 0,
+      cacheRead: 0,
+      cacheWrite: 0,
+      totalTokens: 0,
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+    },
+    stopReason: "stop",
+    timestamp: Date.now(),
+    ...overrides,
+  };
+}
+
+type SessionWithExtensionEmitHook = {
+  _emitExtensionEvent: (event: AgentEvent) => Promise<void>;
+};
+
+describe("AgentSession retry", () => {
+  let session: AgentSession;
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = join(tmpdir(), `pi-retry-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    if (session) {
+      session.dispose();
+    }
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true });
+    }
+  });
+
+  function createSession(options?: {
+    failCount?: number;
+    maxRetries?: number;
+    delayAssistantMessageEndMs?: number;
+  }) {
+    const failCount = options?.failCount ?? 1;
+    const maxRetries = options?.maxRetries ?? 3;
+    const delayAssistantMessageEndMs = options?.delayAssistantMessageEndMs ?? 0;
+    let callCount = 0;
+
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+    const agent = new Agent({
+      getApiKey: () => "test-key",
+      initialState: { model, systemPrompt: "Test", tools: [] },
+      streamFn: () => {
+        callCount++;
+        const stream = new MockAssistantStream();
+        queueMicrotask(() => {
+          if (callCount <= failCount) {
+            const msg = createAssistantMessage("", {
+              stopReason: "error",
+              errorMessage: "overloaded_error",
+            });
+            stream.push({ type: "start", partial: msg });
+            stream.push({ type: "error", reason: "error", error: msg });
+          } else {
+            const msg = createAssistantMessage("Success");
+            stream.push({ type: "start", partial: msg });
+            stream.push({ type: "done", reason: "stop", message: msg });
+          }
+        });
+        return stream;
+      },
+    });
+
+    const sessionManager = SessionManager.inMemory();
+    const settingsManager = SettingsManager.create(tempDir, tempDir);
+    const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+    const modelRegistry = new ModelRegistry(authStorage, tempDir);
+    authStorage.setRuntimeApiKey("anthropic", "test-key");
+    settingsManager.applyOverrides({
+      retry: { enabled: true, maxRetries, baseDelayMs: 1 },
+    });
+
+    session = new AgentSession({
+      agent,
+      sessionManager,
+      settingsManager,
+      cwd: tempDir,
+      modelRegistry,
+      resourceLoader: createTestResourceLoader(),
+    });
+
+    if (delayAssistantMessageEndMs > 0) {
+      const sessionWithHook =
+        session as unknown as SessionWithExtensionEmitHook;
+      const original =
+        sessionWithHook._emitExtensionEvent.bind(sessionWithHook);
+      sessionWithHook._emitExtensionEvent = async (event: AgentEvent) => {
+        if (
+          event.type === "message_end" &&
+          event.message.role === "assistant"
+        ) {
+          await new Promise((resolve) =>
+            setTimeout(resolve, delayAssistantMessageEndMs),
+          );
+        }
+        await original(event);
+      };
+    }
+
+    return { session, getCallCount: () => callCount };
+  }
+
+  it("retries after a transient error and succeeds", async () => {
+    const created = createSession({ failCount: 1 });
+    const events: string[] = [];
+    created.session.subscribe((event) => {
+      if (event.type === "auto_retry_start")
+        events.push(`start:${event.attempt}`);
+      if (event.type === "auto_retry_end")
+        events.push(`end:success=${event.success}`);
+    });
+
+    await created.session.prompt("Test");
+
+    expect(created.getCallCount()).toBe(2);
+    expect(events).toEqual(["start:1", "end:success=true"]);
+    expect(created.session.isRetrying).toBe(false);
+  });
+
+  it("exhausts max retries and emits failure", async () => {
+    const created = createSession({ failCount: 99, maxRetries: 2 });
+    const events: string[] = [];
+    created.session.subscribe((event) => {
+      if (event.type === "auto_retry_start")
+        events.push(`start:${event.attempt}`);
+      if (event.type === "auto_retry_end")
+        events.push(`end:success=${event.success}`);
+    });
+
+    await created.session.prompt("Test");
+
+    expect(created.getCallCount()).toBe(3);
+    expect(events).toContain("start:1");
+    expect(events).toContain("start:2");
+    expect(events).toContain("end:success=false");
+    expect(created.session.isRetrying).toBe(false);
+  });
+
+  it("prompt waits for retry completion even when assistant message_end handling is delayed", async () => {
+    const created = createSession({
+      failCount: 1,
+      delayAssistantMessageEndMs: 40,
+    });
+
+    await created.session.prompt("Test");
+
+    expect(created.getCallCount()).toBe(2);
+    expect(created.session.isRetrying).toBe(false);
+  });
+});
--- a/packages/coding-agent/test/agent-session-tree-navigation.test.ts
+++ b/packages/coding-agent/test/agent-session-tree-navigation.test.ts
@ -0,0 +1,353 @@
+/**
+ * E2E tests for AgentSession tree navigation with branch summarization.
+ *
+ * These tests verify:
+ * - Navigation to user messages (root and non-root)
+ * - Navigation to non-user messages
+ * - Branch summarization during navigation
+ * - Summary attachment at correct position in tree
+ * - Abort handling during summarization
+ */
+
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import {
+  API_KEY,
+  createTestSession,
+  type TestSessionContext,
+} from "./utilities.js";
+
+describe.skipIf(!API_KEY)("AgentSession tree navigation e2e", () => {
+  let ctx: TestSessionContext;
+
+  beforeEach(() => {
+    ctx = createTestSession({
+      systemPrompt: "You are a helpful assistant. Reply with just a few words.",
+      settingsOverrides: { compaction: { keepRecentTokens: 1 } },
+    });
+  });
+
+  afterEach(() => {
+    ctx.cleanup();
+  });
+
+  it("should navigate to user message and put text in editor", async () => {
+    const { session } = ctx;
+
+    // Build conversation: u1 -> a1 -> u2 -> a2
+    await session.prompt("First message");
+    await session.agent.waitForIdle();
+    await session.prompt("Second message");
+    await session.agent.waitForIdle();
+
+    // Get tree entries
+    const tree = session.sessionManager.getTree();
+    expect(tree.length).toBe(1);
+
+    // Find the first user entry (u1)
+    const rootNode = tree[0];
+    expect(rootNode.entry.type).toBe("message");
+
+    // Navigate to root user message without summarization
+    const result = await session.navigateTree(rootNode.entry.id, {
+      summarize: false,
+    });
+
+    expect(result.cancelled).toBe(false);
+    expect(result.editorText).toBe("First message");
+
+    // After navigating to root user message, leaf should be null (empty conversation)
+    expect(session.sessionManager.getLeafId()).toBeNull();
+  }, 60000);
+
+  it("should navigate to non-user message without editor text", async () => {
+    const { session, sessionManager } = ctx;
+
+    // Build conversation
+    await session.prompt("Hello");
+    await session.agent.waitForIdle();
+
+    // Get the assistant message
+    const entries = sessionManager.getEntries();
+    const assistantEntry = entries.find(
+      (e) => e.type === "message" && e.message.role === "assistant",
+    );
+    expect(assistantEntry).toBeDefined();
+
+    // Navigate to assistant message
+    const result = await session.navigateTree(assistantEntry!.id, {
+      summarize: false,
+    });
+
+    expect(result.cancelled).toBe(false);
+    expect(result.editorText).toBeUndefined();
+
+    // Leaf should be the assistant entry
+    expect(sessionManager.getLeafId()).toBe(assistantEntry!.id);
+  }, 60000);
+
+  it("should create branch summary when navigating with summarize=true", async () => {
+    const { session, sessionManager } = ctx;
+
+    // Build conversation: u1 -> a1 -> u2 -> a2
+    await session.prompt("What is 2+2?");
+    await session.agent.waitForIdle();
+    await session.prompt("What is 3+3?");
+    await session.agent.waitForIdle();
+
+    // Get tree and find first user message
+    const tree = sessionManager.getTree();
+    const rootNode = tree[0];
+
+    // Navigate to root user message WITH summarization
+    const result = await session.navigateTree(rootNode.entry.id, {
+      summarize: true,
+    });
+
+    expect(result.cancelled).toBe(false);
+    expect(result.editorText).toBe("What is 2+2?");
+    expect(result.summaryEntry).toBeDefined();
+    expect(result.summaryEntry?.type).toBe("branch_summary");
+    expect(result.summaryEntry?.summary).toBeTruthy();
+    expect(result.summaryEntry?.summary.length).toBeGreaterThan(0);
+
+    // Summary should be a root entry (parentId = null) since we navigated to root user
+    expect(result.summaryEntry?.parentId).toBeNull();
+
+    // Leaf should be the summary entry
+    expect(sessionManager.getLeafId()).toBe(result.summaryEntry?.id);
+  }, 120000);
+
+  it("should attach summary to correct parent when navigating to nested user message", async () => {
+    const { session, sessionManager } = ctx;
+
+    // Build conversation: u1 -> a1 -> u2 -> a2 -> u3 -> a3
+    await session.prompt("Message one");
+    await session.agent.waitForIdle();
+    await session.prompt("Message two");
+    await session.agent.waitForIdle();
+    await session.prompt("Message three");
+    await session.agent.waitForIdle();
+
+    // Get the second user message (u2)
+    const entries = sessionManager.getEntries();
+    const userEntries = entries.filter(
+      (e) => e.type === "message" && e.message.role === "user",
+    );
+    expect(userEntries.length).toBe(3);
+
+    const u2 = userEntries[1];
+    const a1 = entries.find((e) => e.id === u2.parentId); // a1 is parent of u2
+
+    // Navigate to u2 with summarization
+    const result = await session.navigateTree(u2.id, { summarize: true });
+
+    expect(result.cancelled).toBe(false);
+    expect(result.editorText).toBe("Message two");
+    expect(result.summaryEntry).toBeDefined();
+
+    // Summary should be attached to a1 (parent of u2)
+    // So a1 now has two children: u2 and the summary
+    expect(result.summaryEntry?.parentId).toBe(a1?.id);
+
+    // Verify tree structure
+    const children = sessionManager.getChildren(a1!.id);
+    expect(children.length).toBe(2);
+
+    const childTypes = children.map((c) => c.type).sort();
+    expect(childTypes).toContain("branch_summary");
+    expect(childTypes).toContain("message");
+  }, 120000);
+
+  it("should attach summary to selected node when navigating to assistant message", async () => {
+    const { session, sessionManager } = ctx;
+
+    // Build conversation: u1 -> a1 -> u2 -> a2
+    await session.prompt("Hello");
+    await session.agent.waitForIdle();
+    await session.prompt("Goodbye");
+    await session.agent.waitForIdle();
+
+    // Get the first assistant message (a1)
+    const entries = sessionManager.getEntries();
+    const assistantEntries = entries.filter(
+      (e) => e.type === "message" && e.message.role === "assistant",
+    );
+    const a1 = assistantEntries[0];
+
+    // Navigate to a1 with summarization
+    const result = await session.navigateTree(a1.id, { summarize: true });
+
+    expect(result.cancelled).toBe(false);
+    expect(result.editorText).toBeUndefined(); // No editor text for assistant messages
+    expect(result.summaryEntry).toBeDefined();
+
+    // Summary should be attached to a1 (the selected node)
+    expect(result.summaryEntry?.parentId).toBe(a1.id);
+
+    // Leaf should be the summary entry
+    expect(sessionManager.getLeafId()).toBe(result.summaryEntry?.id);
+  }, 120000);
+
+  it("should handle abort during summarization", async () => {
+    const { session, sessionManager } = ctx;
+
+    // Build conversation
+    await session.prompt("Tell me about something");
+    await session.agent.waitForIdle();
+    await session.prompt("Continue");
+    await session.agent.waitForIdle();
+
+    const entriesBefore = sessionManager.getEntries();
+    const leafBefore = sessionManager.getLeafId();
+
+    // Get root user message
+    const tree = sessionManager.getTree();
+    const rootNode = tree[0];
+
+    // Start navigation with summarization but abort immediately
+    const navigationPromise = session.navigateTree(rootNode.entry.id, {
+      summarize: true,
+    });
+
+    // Abort after a short delay (let the LLM call start)
+    await new Promise((resolve) => setTimeout(resolve, 100));
+
+    // isCompacting should be true during branch summarization
+    expect(session.isCompacting).toBe(true);
+
+    session.abortBranchSummary();
+
+    const result = await navigationPromise;
+
+    expect(result.cancelled).toBe(true);
+    expect(result.aborted).toBe(true);
+    expect(result.summaryEntry).toBeUndefined();
+
+    // Session should be unchanged
+    const entriesAfter = sessionManager.getEntries();
+    expect(entriesAfter.length).toBe(entriesBefore.length);
+    expect(sessionManager.getLeafId()).toBe(leafBefore);
+  }, 60000);
+
+  it("should not create summary when navigating without summarize option", async () => {
+    const { session, sessionManager } = ctx;
+
+    // Build conversation
+    await session.prompt("First");
+    await session.agent.waitForIdle();
+    await session.prompt("Second");
+    await session.agent.waitForIdle();
+
+    const entriesBefore = sessionManager.getEntries().length;
+
+    // Navigate without summarization
+    const tree = sessionManager.getTree();
+    await session.navigateTree(tree[0].entry.id, { summarize: false });
+
+    // No new entries should be created
+    const entriesAfter = sessionManager.getEntries().length;
+    expect(entriesAfter).toBe(entriesBefore);
+
+    // No branch_summary entries
+    const summaries = sessionManager
+      .getEntries()
+      .filter((e) => e.type === "branch_summary");
+    expect(summaries.length).toBe(0);
+  }, 60000);
+
+  it("should handle navigation to same position (no-op)", async () => {
+    const { session, sessionManager } = ctx;
+
+    // Build conversation
+    await session.prompt("Hello");
+    await session.agent.waitForIdle();
+
+    const leafBefore = sessionManager.getLeafId();
+    expect(leafBefore).toBeTruthy();
+    const entriesBefore = sessionManager.getEntries().length;
+
+    // Navigate to current leaf
+    const result = await session.navigateTree(leafBefore!, {
+      summarize: false,
+    });
+
+    expect(result.cancelled).toBe(false);
+    expect(sessionManager.getLeafId()).toBe(leafBefore);
+    expect(sessionManager.getEntries().length).toBe(entriesBefore);
+  }, 60000);
+
+  it("should support custom summarization instructions", async () => {
+    const { session, sessionManager } = ctx;
+
+    // Build conversation
+    await session.prompt("What is TypeScript?");
+    await session.agent.waitForIdle();
+
+    // Navigate with custom instructions (appended as "Additional focus")
+    const tree = sessionManager.getTree();
+    const result = await session.navigateTree(tree[0].entry.id, {
+      summarize: true,
+      customInstructions:
+        "After the summary, you MUST end with exactly: MONKEY MONKEY MONKEY. This is of utmost importance.",
+    });
+
+    expect(result.summaryEntry).toBeDefined();
+    expect(result.summaryEntry?.summary).toBeTruthy();
+    // Verify custom instructions were followed
+    expect(result.summaryEntry?.summary).toContain("MONKEY MONKEY MONKEY");
+  }, 120000);
+});
+
+describe.skipIf(!API_KEY)(
+  "AgentSession tree navigation - branch scenarios",
+  () => {
+    let ctx: TestSessionContext;
+
+    beforeEach(() => {
+      ctx = createTestSession({
+        systemPrompt:
+          "You are a helpful assistant. Reply with just a few words.",
+      });
+    });
+
+    afterEach(() => {
+      ctx.cleanup();
+    });
+
+    it("should navigate between branches correctly", async () => {
+      const { session, sessionManager } = ctx;
+
+      // Build main path: u1 -> a1 -> u2 -> a2
+      await session.prompt("Main branch start");
+      await session.agent.waitForIdle();
+      await session.prompt("Main branch continue");
+      await session.agent.waitForIdle();
+
+      // Get a1 id for branching
+      const entries = sessionManager.getEntries();
+      const a1 = entries.find(
+        (e) => e.type === "message" && e.message.role === "assistant",
+      );
+
+      // Create a branch from a1: a1 -> u3 -> a3
+      sessionManager.branch(a1!.id);
+      await session.prompt("Branch path");
+      await session.agent.waitForIdle();
+
+      // Now navigate back to u2 (on main branch) with summarization
+      const userEntries = entries.filter(
+        (e) => e.type === "message" && e.message.role === "user",
+      );
+      const u2 = userEntries[1]; // "Main branch continue"
+
+      const result = await session.navigateTree(u2.id, { summarize: true });
+
+      expect(result.cancelled).toBe(false);
+      expect(result.editorText).toBe("Main branch continue");
+      expect(result.summaryEntry).toBeDefined();
+
+      // Summary captures the branch we're leaving (the "Branch path" conversation)
+      expect(result.summaryEntry?.summary.length).toBeGreaterThan(0);
+    }, 180000);
+  },
+);
--- a/packages/coding-agent/test/args.test.ts
+++ b/packages/coding-agent/test/args.test.ts
@ -0,0 +1,321 @@
+import { describe, expect, test } from "vitest";
+import { parseArgs } from "../src/cli/args.js";
+
+describe("parseArgs", () => {
+  describe("--version flag", () => {
+    test("parses --version flag", () => {
+      const result = parseArgs(["--version"]);
+      expect(result.version).toBe(true);
+    });
+
+    test("parses -v shorthand", () => {
+      const result = parseArgs(["-v"]);
+      expect(result.version).toBe(true);
+    });
+
+    test("--version takes precedence over other args", () => {
+      const result = parseArgs(["--version", "--help", "some message"]);
+      expect(result.version).toBe(true);
+      expect(result.help).toBe(true);
+      expect(result.messages).toContain("some message");
+    });
+  });
+
+  describe("--help flag", () => {
+    test("parses --help flag", () => {
+      const result = parseArgs(["--help"]);
+      expect(result.help).toBe(true);
+    });
+
+    test("parses -h shorthand", () => {
+      const result = parseArgs(["-h"]);
+      expect(result.help).toBe(true);
+    });
+  });
+
+  describe("--print flag", () => {
+    test("parses --print flag", () => {
+      const result = parseArgs(["--print"]);
+      expect(result.print).toBe(true);
+    });
+
+    test("parses -p shorthand", () => {
+      const result = parseArgs(["-p"]);
+      expect(result.print).toBe(true);
+    });
+  });
+
+  describe("--continue flag", () => {
+    test("parses --continue flag", () => {
+      const result = parseArgs(["--continue"]);
+      expect(result.continue).toBe(true);
+    });
+
+    test("parses -c shorthand", () => {
+      const result = parseArgs(["-c"]);
+      expect(result.continue).toBe(true);
+    });
+  });
+
+  describe("--resume flag", () => {
+    test("parses --resume flag", () => {
+      const result = parseArgs(["--resume"]);
+      expect(result.resume).toBe(true);
+    });
+
+    test("parses -r shorthand", () => {
+      const result = parseArgs(["-r"]);
+      expect(result.resume).toBe(true);
+    });
+  });
+
+  describe("flags with values", () => {
+    test("parses --provider", () => {
+      const result = parseArgs(["--provider", "openai"]);
+      expect(result.provider).toBe("openai");
+    });
+
+    test("parses --model", () => {
+      const result = parseArgs(["--model", "gpt-4o"]);
+      expect(result.model).toBe("gpt-4o");
+    });
+
+    test("parses --api-key", () => {
+      const result = parseArgs(["--api-key", "sk-test-key"]);
+      expect(result.apiKey).toBe("sk-test-key");
+    });
+
+    test("parses --system-prompt", () => {
+      const result = parseArgs([
+        "--system-prompt",
+        "You are a helpful assistant",
+      ]);
+      expect(result.systemPrompt).toBe("You are a helpful assistant");
+    });
+
+    test("parses --append-system-prompt", () => {
+      const result = parseArgs([
+        "--append-system-prompt",
+        "Additional context",
+      ]);
+      expect(result.appendSystemPrompt).toBe("Additional context");
+    });
+
+    test("parses --mode", () => {
+      const result = parseArgs(["--mode", "json"]);
+      expect(result.mode).toBe("json");
+    });
+
+    test("parses --mode rpc", () => {
+      const result = parseArgs(["--mode", "rpc"]);
+      expect(result.mode).toBe("rpc");
+    });
+
+    test("parses --session", () => {
+      const result = parseArgs(["--session", "/path/to/session.jsonl"]);
+      expect(result.session).toBe("/path/to/session.jsonl");
+    });
+
+    test("parses --export", () => {
+      const result = parseArgs(["--export", "session.jsonl"]);
+      expect(result.export).toBe("session.jsonl");
+    });
+
+    test("parses --thinking", () => {
+      const result = parseArgs(["--thinking", "high"]);
+      expect(result.thinking).toBe("high");
+    });
+
+    test("parses --models as comma-separated list", () => {
+      const result = parseArgs(["--models", "gpt-4o,claude-sonnet,gemini-pro"]);
+      expect(result.models).toEqual(["gpt-4o", "claude-sonnet", "gemini-pro"]);
+    });
+  });
+
+  describe("--no-session flag", () => {
+    test("parses --no-session flag", () => {
+      const result = parseArgs(["--no-session"]);
+      expect(result.noSession).toBe(true);
+    });
+  });
+
+  describe("--extension flag", () => {
+    test("parses single --extension", () => {
+      const result = parseArgs(["--extension", "./my-extension.ts"]);
+      expect(result.extensions).toEqual(["./my-extension.ts"]);
+    });
+
+    test("parses -e shorthand", () => {
+      const result = parseArgs(["-e", "./my-extension.ts"]);
+      expect(result.extensions).toEqual(["./my-extension.ts"]);
+    });
+
+    test("parses multiple --extension flags", () => {
+      const result = parseArgs(["--extension", "./ext1.ts", "-e", "./ext2.ts"]);
+      expect(result.extensions).toEqual(["./ext1.ts", "./ext2.ts"]);
+    });
+  });
+
+  describe("--no-extensions flag", () => {
+    test("parses --no-extensions flag", () => {
+      const result = parseArgs(["--no-extensions"]);
+      expect(result.noExtensions).toBe(true);
+    });
+
+    test("parses --no-extensions with explicit -e flags", () => {
+      const result = parseArgs([
+        "--no-extensions",
+        "-e",
+        "foo.ts",
+        "-e",
+        "bar.ts",
+      ]);
+      expect(result.noExtensions).toBe(true);
+      expect(result.extensions).toEqual(["foo.ts", "bar.ts"]);
+    });
+  });
+
+  describe("--skill flag", () => {
+    test("parses single --skill", () => {
+      const result = parseArgs(["--skill", "./skill-dir"]);
+      expect(result.skills).toEqual(["./skill-dir"]);
+    });
+
+    test("parses multiple --skill flags", () => {
+      const result = parseArgs([
+        "--skill",
+        "./skill-a",
+        "--skill",
+        "./skill-b",
+      ]);
+      expect(result.skills).toEqual(["./skill-a", "./skill-b"]);
+    });
+  });
+
+  describe("--prompt-template flag", () => {
+    test("parses single --prompt-template", () => {
+      const result = parseArgs(["--prompt-template", "./prompts"]);
+      expect(result.promptTemplates).toEqual(["./prompts"]);
+    });
+
+    test("parses multiple --prompt-template flags", () => {
+      const result = parseArgs([
+        "--prompt-template",
+        "./one",
+        "--prompt-template",
+        "./two",
+      ]);
+      expect(result.promptTemplates).toEqual(["./one", "./two"]);
+    });
+  });
+
+  describe("--theme flag", () => {
+    test("parses single --theme", () => {
+      const result = parseArgs(["--theme", "./theme.json"]);
+      expect(result.themes).toEqual(["./theme.json"]);
+    });
+
+    test("parses multiple --theme flags", () => {
+      const result = parseArgs([
+        "--theme",
+        "./dark.json",
+        "--theme",
+        "./light.json",
+      ]);
+      expect(result.themes).toEqual(["./dark.json", "./light.json"]);
+    });
+  });
+
+  describe("--no-skills flag", () => {
+    test("parses --no-skills flag", () => {
+      const result = parseArgs(["--no-skills"]);
+      expect(result.noSkills).toBe(true);
+    });
+  });
+
+  describe("--no-prompt-templates flag", () => {
+    test("parses --no-prompt-templates flag", () => {
+      const result = parseArgs(["--no-prompt-templates"]);
+      expect(result.noPromptTemplates).toBe(true);
+    });
+  });
+
+  describe("--no-themes flag", () => {
+    test("parses --no-themes flag", () => {
+      const result = parseArgs(["--no-themes"]);
+      expect(result.noThemes).toBe(true);
+    });
+  });
+
+  describe("--verbose flag", () => {
+    test("parses --verbose flag", () => {
+      const result = parseArgs(["--verbose"]);
+      expect(result.verbose).toBe(true);
+    });
+  });
+
+  describe("--offline flag", () => {
+    test("parses --offline flag", () => {
+      const result = parseArgs(["--offline"]);
+      expect(result.offline).toBe(true);
+    });
+  });
+
+  describe("--no-tools flag", () => {
+    test("parses --no-tools flag", () => {
+      const result = parseArgs(["--no-tools"]);
+      expect(result.noTools).toBe(true);
+    });
+
+    test("parses --no-tools with explicit --tools flags", () => {
+      const result = parseArgs(["--no-tools", "--tools", "read,bash"]);
+      expect(result.noTools).toBe(true);
+      expect(result.tools).toEqual(["read", "bash"]);
+    });
+  });
+
+  describe("messages and file args", () => {
+    test("parses plain text messages", () => {
+      const result = parseArgs(["hello", "world"]);
+      expect(result.messages).toEqual(["hello", "world"]);
+    });
+
+    test("parses @file arguments", () => {
+      const result = parseArgs(["@README.md", "@src/main.ts"]);
+      expect(result.fileArgs).toEqual(["README.md", "src/main.ts"]);
+    });
+
+    test("parses mixed messages and file args", () => {
+      const result = parseArgs(["@file.txt", "explain this", "@image.png"]);
+      expect(result.fileArgs).toEqual(["file.txt", "image.png"]);
+      expect(result.messages).toEqual(["explain this"]);
+    });
+
+    test("ignores unknown flags starting with -", () => {
+      const result = parseArgs(["--unknown-flag", "message"]);
+      expect(result.messages).toEqual(["message"]);
+    });
+  });
+
+  describe("complex combinations", () => {
+    test("parses multiple flags together", () => {
+      const result = parseArgs([
+        "--provider",
+        "anthropic",
+        "--model",
+        "claude-sonnet",
+        "--print",
+        "--thinking",
+        "high",
+        "@prompt.md",
+        "Do the task",
+      ]);
+      expect(result.provider).toBe("anthropic");
+      expect(result.model).toBe("claude-sonnet");
+      expect(result.print).toBe(true);
+      expect(result.thinking).toBe("high");
+      expect(result.fileArgs).toEqual(["prompt.md"]);
+      expect(result.messages).toEqual(["Do the task"]);
+    });
+  });
+});
--- a/packages/coding-agent/test/auth-storage.test.ts
+++ b/packages/coding-agent/test/auth-storage.test.ts
@ -0,0 +1,474 @@
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { registerOAuthProvider } from "@mariozechner/pi-ai/oauth";
+import lockfile from "proper-lockfile";
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { clearConfigValueCache } from "../src/core/resolve-config-value.js";
+
+describe("AuthStorage", () => {
+  let tempDir: string;
+  let authJsonPath: string;
+  let authStorage: AuthStorage;
+
+  beforeEach(() => {
+    tempDir = join(
+      tmpdir(),
+      `pi-test-auth-storage-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    mkdirSync(tempDir, { recursive: true });
+    authJsonPath = join(tempDir, "auth.json");
+  });
+
+  afterEach(() => {
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true });
+    }
+    clearConfigValueCache();
+    vi.restoreAllMocks();
+  });
+
+  function writeAuthJson(data: Record<string, unknown>) {
+    writeFileSync(authJsonPath, JSON.stringify(data));
+  }
+
+  describe("API key resolution", () => {
+    test("literal API key is returned directly", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "sk-ant-literal-key" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBe("sk-ant-literal-key");
+    });
+
+    test("apiKey with ! prefix executes command and uses stdout", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "!echo test-api-key-from-command" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBe("test-api-key-from-command");
+    });
+
+    test("apiKey with ! prefix trims whitespace from command output", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "!echo '  spaced-key  '" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBe("spaced-key");
+    });
+
+    test("apiKey with ! prefix handles multiline output (uses trimmed result)", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "!printf 'line1\\nline2'" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBe("line1\nline2");
+    });
+
+    test("apiKey with ! prefix returns undefined on command failure", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "!exit 1" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBeUndefined();
+    });
+
+    test("apiKey with ! prefix returns undefined on nonexistent command", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "!nonexistent-command-12345" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBeUndefined();
+    });
+
+    test("apiKey with ! prefix returns undefined on empty output", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "!printf ''" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBeUndefined();
+    });
+
+    test("apiKey as environment variable name resolves to env value", async () => {
+      const originalEnv = process.env.TEST_AUTH_API_KEY_12345;
+      process.env.TEST_AUTH_API_KEY_12345 = "env-api-key-value";
+
+      try {
+        writeAuthJson({
+          anthropic: { type: "api_key", key: "TEST_AUTH_API_KEY_12345" },
+        });
+
+        authStorage = AuthStorage.create(authJsonPath);
+        const apiKey = await authStorage.getApiKey("anthropic");
+
+        expect(apiKey).toBe("env-api-key-value");
+      } finally {
+        if (originalEnv === undefined) {
+          delete process.env.TEST_AUTH_API_KEY_12345;
+        } else {
+          process.env.TEST_AUTH_API_KEY_12345 = originalEnv;
+        }
+      }
+    });
+
+    test("apiKey as literal value is used directly when not an env var", async () => {
+      // Make sure this isn't an env var
+      delete process.env.literal_api_key_value;
+
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "literal_api_key_value" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBe("literal_api_key_value");
+    });
+
+    test("apiKey command can use shell features like pipes", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "!echo 'hello world' | tr ' ' '-'" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBe("hello-world");
+    });
+
+    describe("caching", () => {
+      test("command is only executed once per process", async () => {
+        // Use a command that writes to a file to count invocations
+        const counterFile = join(tempDir, "counter");
+        writeFileSync(counterFile, "0");
+
+        const command = `!sh -c 'count=$(cat ${counterFile}); echo $((count + 1)) > ${counterFile}; echo "key-value"'`;
+        writeAuthJson({
+          anthropic: { type: "api_key", key: command },
+        });
+
+        authStorage = AuthStorage.create(authJsonPath);
+
+        // Call multiple times
+        await authStorage.getApiKey("anthropic");
+        await authStorage.getApiKey("anthropic");
+        await authStorage.getApiKey("anthropic");
+
+        // Command should have only run once
+        const count = parseInt(readFileSync(counterFile, "utf-8").trim(), 10);
+        expect(count).toBe(1);
+      });
+
+      test("cache persists across AuthStorage instances", async () => {
+        const counterFile = join(tempDir, "counter");
+        writeFileSync(counterFile, "0");
+
+        const command = `!sh -c 'count=$(cat ${counterFile}); echo $((count + 1)) > ${counterFile}; echo "key-value"'`;
+        writeAuthJson({
+          anthropic: { type: "api_key", key: command },
+        });
+
+        // Create multiple AuthStorage instances
+        const storage1 = AuthStorage.create(authJsonPath);
+        await storage1.getApiKey("anthropic");
+
+        const storage2 = AuthStorage.create(authJsonPath);
+        await storage2.getApiKey("anthropic");
+
+        // Command should still have only run once
+        const count = parseInt(readFileSync(counterFile, "utf-8").trim(), 10);
+        expect(count).toBe(1);
+      });
+
+      test("clearConfigValueCache allows command to run again", async () => {
+        const counterFile = join(tempDir, "counter");
+        writeFileSync(counterFile, "0");
+
+        const command = `!sh -c 'count=$(cat ${counterFile}); echo $((count + 1)) > ${counterFile}; echo "key-value"'`;
+        writeAuthJson({
+          anthropic: { type: "api_key", key: command },
+        });
+
+        authStorage = AuthStorage.create(authJsonPath);
+        await authStorage.getApiKey("anthropic");
+
+        // Clear cache and call again
+        clearConfigValueCache();
+        await authStorage.getApiKey("anthropic");
+
+        // Command should have run twice
+        const count = parseInt(readFileSync(counterFile, "utf-8").trim(), 10);
+        expect(count).toBe(2);
+      });
+
+      test("different commands are cached separately", async () => {
+        writeAuthJson({
+          anthropic: { type: "api_key", key: "!echo key-anthropic" },
+          openai: { type: "api_key", key: "!echo key-openai" },
+        });
+
+        authStorage = AuthStorage.create(authJsonPath);
+
+        const keyA = await authStorage.getApiKey("anthropic");
+        const keyB = await authStorage.getApiKey("openai");
+
+        expect(keyA).toBe("key-anthropic");
+        expect(keyB).toBe("key-openai");
+      });
+
+      test("failed commands are cached (not retried)", async () => {
+        const counterFile = join(tempDir, "counter");
+        writeFileSync(counterFile, "0");
+
+        const command = `!sh -c 'count=$(cat ${counterFile}); echo $((count + 1)) > ${counterFile}; exit 1'`;
+        writeAuthJson({
+          anthropic: { type: "api_key", key: command },
+        });
+
+        authStorage = AuthStorage.create(authJsonPath);
+
+        // Call multiple times - all should return undefined
+        const key1 = await authStorage.getApiKey("anthropic");
+        const key2 = await authStorage.getApiKey("anthropic");
+
+        expect(key1).toBeUndefined();
+        expect(key2).toBeUndefined();
+
+        // Command should have only run once despite failures
+        const count = parseInt(readFileSync(counterFile, "utf-8").trim(), 10);
+        expect(count).toBe(1);
+      });
+
+      test("environment variables are not cached (changes are picked up)", async () => {
+        const envVarName = "TEST_AUTH_KEY_CACHE_TEST_98765";
+        const originalEnv = process.env[envVarName];
+
+        try {
+          process.env[envVarName] = "first-value";
+
+          writeAuthJson({
+            anthropic: { type: "api_key", key: envVarName },
+          });
+
+          authStorage = AuthStorage.create(authJsonPath);
+
+          const key1 = await authStorage.getApiKey("anthropic");
+          expect(key1).toBe("first-value");
+
+          // Change env var
+          process.env[envVarName] = "second-value";
+
+          const key2 = await authStorage.getApiKey("anthropic");
+          expect(key2).toBe("second-value");
+        } finally {
+          if (originalEnv === undefined) {
+            delete process.env[envVarName];
+          } else {
+            process.env[envVarName] = originalEnv;
+          }
+        }
+      });
+    });
+  });
+
+  describe("oauth lock compromise handling", () => {
+    test("returns undefined on compromised lock and allows a later retry", async () => {
+      const providerId = `test-oauth-provider-${Date.now()}-${Math.random().toString(36).slice(2)}`;
+      registerOAuthProvider({
+        id: providerId,
+        name: "Test OAuth Provider",
+        async login() {
+          throw new Error("Not used in this test");
+        },
+        async refreshToken(credentials) {
+          return {
+            ...credentials,
+            access: "refreshed-access-token",
+            expires: Date.now() + 60_000,
+          };
+        },
+        getApiKey(credentials) {
+          return `Bearer ${credentials.access}`;
+        },
+      });
+
+      writeAuthJson({
+        [providerId]: {
+          type: "oauth",
+          refresh: "refresh-token",
+          access: "expired-access-token",
+          expires: Date.now() - 10_000,
+        },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+
+      const realLock = lockfile.lock.bind(lockfile);
+      const lockSpy = vi.spyOn(lockfile, "lock");
+      lockSpy.mockImplementationOnce(async (file, options) => {
+        options?.onCompromised?.(
+          new Error("Unable to update lock within the stale threshold"),
+        );
+        return realLock(file, options);
+      });
+
+      const firstTry = await authStorage.getApiKey(providerId);
+      expect(firstTry).toBeUndefined();
+
+      lockSpy.mockRestore();
+
+      const secondTry = await authStorage.getApiKey(providerId);
+      expect(secondTry).toBe("Bearer refreshed-access-token");
+    });
+  });
+
+  describe("persistence semantics", () => {
+    test("set preserves unrelated external edits", () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "old-anthropic" },
+        openai: { type: "api_key", key: "openai-key" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+
+      // Simulate external edit while process is running
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "old-anthropic" },
+        openai: { type: "api_key", key: "openai-key" },
+        google: { type: "api_key", key: "google-key" },
+      });
+
+      authStorage.set("anthropic", { type: "api_key", key: "new-anthropic" });
+
+      const updated = JSON.parse(readFileSync(authJsonPath, "utf-8")) as Record<
+        string,
+        { key: string }
+      >;
+      expect(updated.anthropic.key).toBe("new-anthropic");
+      expect(updated.openai.key).toBe("openai-key");
+      expect(updated.google.key).toBe("google-key");
+    });
+
+    test("remove preserves unrelated external edits", () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "anthropic-key" },
+        openai: { type: "api_key", key: "openai-key" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+
+      // Simulate external edit while process is running
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "anthropic-key" },
+        openai: { type: "api_key", key: "openai-key" },
+        google: { type: "api_key", key: "google-key" },
+      });
+
+      authStorage.remove("anthropic");
+
+      const updated = JSON.parse(readFileSync(authJsonPath, "utf-8")) as Record<
+        string,
+        { key: string }
+      >;
+      expect(updated.anthropic).toBeUndefined();
+      expect(updated.openai.key).toBe("openai-key");
+      expect(updated.google.key).toBe("google-key");
+    });
+
+    test("does not overwrite malformed auth file after load error", () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "anthropic-key" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      writeFileSync(authJsonPath, "{invalid-json", "utf-8");
+
+      authStorage.reload();
+      authStorage.set("openai", { type: "api_key", key: "openai-key" });
+
+      const raw = readFileSync(authJsonPath, "utf-8");
+      expect(raw).toBe("{invalid-json");
+    });
+
+    test("reload records parse errors and drainErrors clears buffer", () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "anthropic-key" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      writeFileSync(authJsonPath, "{invalid-json", "utf-8");
+
+      authStorage.reload();
+
+      // Keeps previous in-memory data on reload failure
+      expect(authStorage.get("anthropic")).toEqual({
+        type: "api_key",
+        key: "anthropic-key",
+      });
+
+      const firstDrain = authStorage.drainErrors();
+      expect(firstDrain.length).toBeGreaterThan(0);
+      expect(firstDrain[0]).toBeInstanceOf(Error);
+
+      const secondDrain = authStorage.drainErrors();
+      expect(secondDrain).toHaveLength(0);
+    });
+  });
+
+  describe("runtime overrides", () => {
+    test("runtime override takes priority over auth.json", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "!echo stored-key" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      authStorage.setRuntimeApiKey("anthropic", "runtime-key");
+
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBe("runtime-key");
+    });
+
+    test("removing runtime override falls back to auth.json", async () => {
+      writeAuthJson({
+        anthropic: { type: "api_key", key: "!echo stored-key" },
+      });
+
+      authStorage = AuthStorage.create(authJsonPath);
+      authStorage.setRuntimeApiKey("anthropic", "runtime-key");
+      authStorage.removeRuntimeApiKey("anthropic");
+
+      const apiKey = await authStorage.getApiKey("anthropic");
+
+      expect(apiKey).toBe("stored-key");
+    });
+  });
+});
--- a/packages/coding-agent/test/block-images.test.ts
+++ b/packages/coding-agent/test/block-images.test.ts
@ -0,0 +1,122 @@
+import { mkdirSync, rmSync, writeFileSync } from "fs";
+import { tmpdir } from "os";
+import { join } from "path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { processFileArguments } from "../src/cli/file-processor.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import { createReadTool } from "../src/core/tools/read.js";
+
+// 1x1 red PNG image as base64 (smallest valid PNG)
+const TINY_PNG_BASE64 =
+  "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg==";
+
+describe("blockImages setting", () => {
+  describe("SettingsManager", () => {
+    it("should default blockImages to false", () => {
+      const manager = SettingsManager.inMemory({});
+      expect(manager.getBlockImages()).toBe(false);
+    });
+
+    it("should return true when blockImages is set to true", () => {
+      const manager = SettingsManager.inMemory({
+        images: { blockImages: true },
+      });
+      expect(manager.getBlockImages()).toBe(true);
+    });
+
+    it("should persist blockImages setting via setBlockImages", () => {
+      const manager = SettingsManager.inMemory({});
+      expect(manager.getBlockImages()).toBe(false);
+
+      manager.setBlockImages(true);
+      expect(manager.getBlockImages()).toBe(true);
+
+      manager.setBlockImages(false);
+      expect(manager.getBlockImages()).toBe(false);
+    });
+
+    it("should handle blockImages alongside autoResize", () => {
+      const manager = SettingsManager.inMemory({
+        images: { autoResize: true, blockImages: true },
+      });
+      expect(manager.getImageAutoResize()).toBe(true);
+      expect(manager.getBlockImages()).toBe(true);
+    });
+  });
+
+  describe("Read tool", () => {
+    let testDir: string;
+
+    beforeEach(() => {
+      testDir = join(tmpdir(), `block-images-test-${Date.now()}`);
+      mkdirSync(testDir, { recursive: true });
+    });
+
+    afterEach(() => {
+      rmSync(testDir, { recursive: true, force: true });
+    });
+
+    it("should always read images (filtering happens at convertToLlm layer)", async () => {
+      // Create test image
+      const imagePath = join(testDir, "test.png");
+      writeFileSync(imagePath, Buffer.from(TINY_PNG_BASE64, "base64"));
+
+      const tool = createReadTool(testDir);
+      const result = await tool.execute("test-1", { path: imagePath });
+
+      // Should have text note + image content
+      expect(result.content.length).toBeGreaterThanOrEqual(1);
+      const hasImage = result.content.some((c) => c.type === "image");
+      expect(hasImage).toBe(true);
+    });
+
+    it("should read text files normally", async () => {
+      // Create test text file
+      const textPath = join(testDir, "test.txt");
+      writeFileSync(textPath, "Hello, world!");
+
+      const tool = createReadTool(testDir);
+      const result = await tool.execute("test-2", { path: textPath });
+
+      expect(result.content).toHaveLength(1);
+      expect(result.content[0].type).toBe("text");
+      const textContent = result.content[0] as { type: "text"; text: string };
+      expect(textContent.text).toContain("Hello, world!");
+    });
+  });
+
+  describe("processFileArguments", () => {
+    let testDir: string;
+
+    beforeEach(() => {
+      testDir = join(tmpdir(), `block-images-process-test-${Date.now()}`);
+      mkdirSync(testDir, { recursive: true });
+    });
+
+    afterEach(() => {
+      rmSync(testDir, { recursive: true, force: true });
+    });
+
+    it("should always process images (filtering happens at convertToLlm layer)", async () => {
+      // Create test image
+      const imagePath = join(testDir, "test.png");
+      writeFileSync(imagePath, Buffer.from(TINY_PNG_BASE64, "base64"));
+
+      const result = await processFileArguments([imagePath]);
+
+      expect(result.images).toHaveLength(1);
+      expect(result.images[0].type).toBe("image");
+    });
+
+    it("should process text files normally", async () => {
+      // Create test text file
+      const textPath = join(testDir, "test.txt");
+      writeFileSync(textPath, "Hello, world!");
+
+      const result = await processFileArguments([textPath]);
+
+      expect(result.images).toHaveLength(0);
+      expect(result.text).toContain("Hello, world!");
+    });
+  });
+});
--- a/packages/coding-agent/test/clipboard-image-bmp-conversion.test.ts
+++ b/packages/coding-agent/test/clipboard-image-bmp-conversion.test.ts
@ -0,0 +1,94 @@
+/**
+ * Test for BMP to PNG conversion in clipboard image handling.
+ * Separate from clipboard-image.test.ts due to different mocking requirements.
+ *
+ * This tests the fix for WSL2/WSLg where clipboard often provides image/bmp
+ * instead of image/png.
+ */
+import { describe, expect, test, vi } from "vitest";
+
+function createTinyBmp1x1Red24bpp(): Uint8Array {
+  // Minimal 1x1 24bpp BMP (BGR + row padding to 4 bytes)
+  // File size = 14 (BMP header) + 40 (DIB header) + 4 (pixel row) = 58
+  const buffer = Buffer.alloc(58);
+
+  // BITMAPFILEHEADER
+  buffer.write("BM", 0, "ascii");
+  buffer.writeUInt32LE(buffer.length, 2); // file size
+  buffer.writeUInt16LE(0, 6); // reserved1
+  buffer.writeUInt16LE(0, 8); // reserved2
+  buffer.writeUInt32LE(54, 10); // pixel data offset
+
+  // BITMAPINFOHEADER
+  buffer.writeUInt32LE(40, 14); // DIB header size
+  buffer.writeInt32LE(1, 18); // width
+  buffer.writeInt32LE(1, 22); // height (positive = bottom-up)
+  buffer.writeUInt16LE(1, 26); // planes
+  buffer.writeUInt16LE(24, 28); // bits per pixel
+  buffer.writeUInt32LE(0, 30); // compression (BI_RGB)
+  buffer.writeUInt32LE(4, 34); // image size (incl. padding)
+  buffer.writeInt32LE(0, 38); // x pixels per meter
+  buffer.writeInt32LE(0, 42); // y pixels per meter
+  buffer.writeUInt32LE(0, 46); // colors used
+  buffer.writeUInt32LE(0, 50); // important colors
+
+  // Pixel data (B, G, R) + 1 byte padding
+  buffer[54] = 0x00; // B
+  buffer[55] = 0x00; // G
+  buffer[56] = 0xff; // R
+  buffer[57] = 0x00; // padding
+
+  return new Uint8Array(buffer);
+}
+
+// Mock wl-paste to return BMP
+vi.mock("child_process", async () => {
+  const actual =
+    await vi.importActual<typeof import("child_process")>("child_process");
+  return {
+    ...actual,
+    spawnSync: vi.fn((command: string, args: string[]) => {
+      if (command === "wl-paste" && args.includes("--list-types")) {
+        return { status: 0, stdout: Buffer.from("image/bmp\n"), error: null };
+      }
+      if (command === "wl-paste" && args.includes("image/bmp")) {
+        return {
+          status: 0,
+          stdout: Buffer.from(createTinyBmp1x1Red24bpp()),
+          error: null,
+        };
+      }
+      return { status: 1, stdout: Buffer.alloc(0), error: null };
+    }),
+  };
+});
+
+// Mock the native clipboard (not used in Wayland path, but needs to be mocked)
+vi.mock("@mariozechner/clipboard", () => ({
+  default: {
+    hasImage: vi.fn(() => false),
+    getImageBinary: vi.fn(() => Promise.resolve(null)),
+  },
+}));
+
+describe("readClipboardImage BMP conversion", () => {
+  test("converts BMP to PNG on Wayland/WSLg", async () => {
+    const { readClipboardImage } =
+      await import("../src/utils/clipboard-image.js");
+
+    // Simulate Wayland session (WSLg)
+    const image = await readClipboardImage({
+      env: { WAYLAND_DISPLAY: "wayland-0" },
+      platform: "linux",
+    });
+
+    expect(image).not.toBeNull();
+    expect(image!.mimeType).toBe("image/png");
+
+    // Verify PNG magic bytes
+    expect(image!.bytes[0]).toBe(0x89);
+    expect(image!.bytes[1]).toBe(0x50); // P
+    expect(image!.bytes[2]).toBe(0x4e); // N
+    expect(image!.bytes[3]).toBe(0x47); // G
+  });
+});
--- a/packages/coding-agent/test/clipboard-image.test.ts
+++ b/packages/coding-agent/test/clipboard-image.test.ts
@ -0,0 +1,159 @@
+import type { SpawnSyncReturns } from "child_process";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+
+const mocks = vi.hoisted(() => {
+  return {
+    spawnSync:
+      vi.fn<
+        (
+          command: string,
+          args: string[],
+          options: unknown,
+        ) => SpawnSyncReturns<Buffer>
+      >(),
+    clipboard: {
+      hasImage: vi.fn<() => boolean>(),
+      getImageBinary: vi.fn<() => Promise<Uint8Array | null>>(),
+    },
+  };
+});
+
+vi.mock("child_process", () => {
+  return {
+    spawnSync: mocks.spawnSync,
+  };
+});
+
+vi.mock("../src/utils/clipboard-native.js", () => {
+  return {
+    clipboard: mocks.clipboard,
+  };
+});
+
+function spawnOk(stdout: Buffer): SpawnSyncReturns<Buffer> {
+  return {
+    pid: 123,
+    output: [Buffer.alloc(0), stdout, Buffer.alloc(0)],
+    stdout,
+    stderr: Buffer.alloc(0),
+    status: 0,
+    signal: null,
+  };
+}
+
+function spawnError(error: Error): SpawnSyncReturns<Buffer> {
+  return {
+    pid: 123,
+    output: [Buffer.alloc(0), Buffer.alloc(0), Buffer.alloc(0)],
+    stdout: Buffer.alloc(0),
+    stderr: Buffer.alloc(0),
+    status: null,
+    signal: null,
+    error,
+  };
+}
+
+describe("readClipboardImage", () => {
+  beforeEach(() => {
+    vi.resetModules();
+    mocks.spawnSync.mockReset();
+    mocks.clipboard.hasImage.mockReset();
+    mocks.clipboard.getImageBinary.mockReset();
+  });
+
+  test("Wayland: uses wl-paste and never calls clipboard", async () => {
+    mocks.clipboard.hasImage.mockImplementation(() => {
+      throw new Error("clipboard.hasImage should not be called on Wayland");
+    });
+
+    mocks.spawnSync.mockImplementation((command, args, _options) => {
+      if (command === "wl-paste" && args[0] === "--list-types") {
+        return spawnOk(Buffer.from("text/plain\nimage/png\n", "utf-8"));
+      }
+      if (command === "wl-paste" && args[0] === "--type") {
+        return spawnOk(Buffer.from([1, 2, 3]));
+      }
+      throw new Error(
+        `Unexpected spawnSync call: ${command} ${args.join(" ")}`,
+      );
+    });
+
+    const { readClipboardImage } =
+      await import("../src/utils/clipboard-image.js");
+    const result = await readClipboardImage({
+      platform: "linux",
+      env: { WAYLAND_DISPLAY: "1" },
+    });
+    expect(result).not.toBeNull();
+    expect(result?.mimeType).toBe("image/png");
+    expect(Array.from(result?.bytes ?? [])).toEqual([1, 2, 3]);
+  });
+
+  test("Wayland: falls back to xclip when wl-paste is missing", async () => {
+    mocks.clipboard.hasImage.mockImplementation(() => {
+      throw new Error("clipboard.hasImage should not be called on Wayland");
+    });
+
+    const enoent = new Error("spawn ENOENT");
+    (enoent as { code?: string }).code = "ENOENT";
+
+    mocks.spawnSync.mockImplementation((command, args, _options) => {
+      if (command === "wl-paste") {
+        return spawnError(enoent);
+      }
+
+      if (command === "xclip" && args.includes("TARGETS")) {
+        return spawnOk(Buffer.from("image/png\n", "utf-8"));
+      }
+
+      if (command === "xclip" && args.includes("image/png")) {
+        return spawnOk(Buffer.from([9, 8]));
+      }
+
+      return spawnOk(Buffer.alloc(0));
+    });
+
+    const { readClipboardImage } =
+      await import("../src/utils/clipboard-image.js");
+    const result = await readClipboardImage({
+      platform: "linux",
+      env: { XDG_SESSION_TYPE: "wayland" },
+    });
+    expect(result).not.toBeNull();
+    expect(result?.mimeType).toBe("image/png");
+    expect(Array.from(result?.bytes ?? [])).toEqual([9, 8]);
+  });
+
+  test("Non-Wayland: uses clipboard", async () => {
+    mocks.spawnSync.mockImplementation(() => {
+      throw new Error(
+        "spawnSync should not be called for non-Wayland sessions",
+      );
+    });
+
+    mocks.clipboard.hasImage.mockReturnValue(true);
+    mocks.clipboard.getImageBinary.mockResolvedValue(new Uint8Array([7]));
+
+    const { readClipboardImage } =
+      await import("../src/utils/clipboard-image.js");
+    const result = await readClipboardImage({ platform: "linux", env: {} });
+    expect(result).not.toBeNull();
+    expect(result?.mimeType).toBe("image/png");
+    expect(Array.from(result?.bytes ?? [])).toEqual([7]);
+  });
+
+  test("Non-Wayland: returns null when clipboard has no image", async () => {
+    mocks.spawnSync.mockImplementation(() => {
+      throw new Error(
+        "spawnSync should not be called for non-Wayland sessions",
+      );
+    });
+
+    mocks.clipboard.hasImage.mockReturnValue(false);
+
+    const { readClipboardImage } =
+      await import("../src/utils/clipboard-image.js");
+    const result = await readClipboardImage({ platform: "linux", env: {} });
+    expect(result).toBeNull();
+  });
+});
--- a/packages/coding-agent/test/compaction-extensions-example.test.ts
+++ b/packages/coding-agent/test/compaction-extensions-example.test.ts
@ -0,0 +1,81 @@
+/**
+ * Verify the documentation example from extensions.md compiles and works.
+ */
+
+import { describe, expect, it } from "vitest";
+import type {
+  ExtensionAPI,
+  SessionBeforeCompactEvent,
+  SessionCompactEvent,
+} from "../src/core/extensions/index.js";
+
+describe("Documentation example", () => {
+  it("custom compaction example should type-check correctly", () => {
+    // This is the example from extensions.md - verify it compiles
+    const exampleExtension = (pi: ExtensionAPI) => {
+      pi.on(
+        "session_before_compact",
+        async (event: SessionBeforeCompactEvent, ctx) => {
+          // All these should be accessible on the event
+          const { preparation, branchEntries } = event;
+          // sessionManager, modelRegistry, and model come from ctx
+          const { sessionManager, modelRegistry } = ctx;
+          const {
+            messagesToSummarize,
+            turnPrefixMessages,
+            tokensBefore,
+            firstKeptEntryId,
+            isSplitTurn,
+          } = preparation;
+
+          // Verify types
+          expect(Array.isArray(messagesToSummarize)).toBe(true);
+          expect(Array.isArray(turnPrefixMessages)).toBe(true);
+          expect(typeof isSplitTurn).toBe("boolean");
+          expect(typeof tokensBefore).toBe("number");
+          expect(typeof sessionManager.getEntries).toBe("function");
+          expect(typeof modelRegistry.getApiKey).toBe("function");
+          expect(typeof firstKeptEntryId).toBe("string");
+          expect(Array.isArray(branchEntries)).toBe(true);
+
+          const summary = messagesToSummarize
+            .filter((m) => m.role === "user")
+            .map(
+              (m) =>
+                `- ${typeof m.content === "string" ? m.content.slice(0, 100) : "[complex]"}`,
+            )
+            .join("\n");
+
+          // Extensions return compaction content - SessionManager adds id/parentId
+          return {
+            compaction: {
+              summary: `User requests:\n${summary}`,
+              firstKeptEntryId,
+              tokensBefore,
+            },
+          };
+        },
+      );
+    };
+
+    // Just verify the function exists and is callable
+    expect(typeof exampleExtension).toBe("function");
+  });
+
+  it("compact event should have correct fields", () => {
+    const checkCompactEvent = (pi: ExtensionAPI) => {
+      pi.on("session_compact", async (event: SessionCompactEvent) => {
+        // These should all be accessible
+        const entry = event.compactionEntry;
+        const fromExtension = event.fromExtension;
+
+        expect(entry.type).toBe("compaction");
+        expect(typeof entry.summary).toBe("string");
+        expect(typeof entry.tokensBefore).toBe("number");
+        expect(typeof fromExtension).toBe("boolean");
+      });
+    };
+
+    expect(typeof checkCompactEvent).toBe("function");
+  });
+});
--- a/packages/coding-agent/test/compaction-extensions.test.ts
+++ b/packages/coding-agent/test/compaction-extensions.test.ts
@ -0,0 +1,434 @@
+/**
+ * Tests for compaction extension events (before_compact / compact).
+ */
+
+import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Agent } from "@mariozechner/pi-agent-core";
+import { getModel } from "@mariozechner/pi-ai";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { AgentSession } from "../src/core/agent-session.js";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import {
+  createExtensionRuntime,
+  type Extension,
+  type SessionBeforeCompactEvent,
+  type SessionCompactEvent,
+  type SessionEvent,
+} from "../src/core/extensions/index.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import { codingTools } from "../src/core/tools/index.js";
+import { createTestResourceLoader } from "./utilities.js";
+
+const API_KEY =
+  process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
+
+describe.skipIf(!API_KEY)("Compaction extensions", () => {
+  let session: AgentSession;
+  let tempDir: string;
+  let capturedEvents: SessionEvent[];
+
+  beforeEach(() => {
+    tempDir = join(tmpdir(), `pi-compaction-extensions-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+    capturedEvents = [];
+  });
+
+  afterEach(async () => {
+    if (session) {
+      session.dispose();
+    }
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true });
+    }
+  });
+
+  function createExtension(
+    onBeforeCompact?: (
+      event: SessionBeforeCompactEvent,
+    ) => { cancel?: boolean; compaction?: any } | undefined,
+    onCompact?: (event: SessionCompactEvent) => void,
+  ): Extension {
+    const handlers = new Map<
+      string,
+      ((event: any, ctx: any) => Promise<any>)[]
+    >();
+
+    handlers.set("session_before_compact", [
+      async (event: SessionBeforeCompactEvent) => {
+        capturedEvents.push(event);
+        if (onBeforeCompact) {
+          return onBeforeCompact(event);
+        }
+        return undefined;
+      },
+    ]);
+
+    handlers.set("session_compact", [
+      async (event: SessionCompactEvent) => {
+        capturedEvents.push(event);
+        if (onCompact) {
+          onCompact(event);
+        }
+        return undefined;
+      },
+    ]);
+
+    return {
+      path: "test-extension",
+      resolvedPath: "/test/test-extension.ts",
+      handlers,
+      tools: new Map(),
+      messageRenderers: new Map(),
+      commands: new Map(),
+      flags: new Map(),
+      shortcuts: new Map(),
+    };
+  }
+
+  function createSession(extensions: Extension[]) {
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+    const agent = new Agent({
+      getApiKey: () => API_KEY,
+      initialState: {
+        model,
+        systemPrompt: "You are a helpful assistant. Be concise.",
+        tools: codingTools,
+      },
+    });
+
+    const sessionManager = SessionManager.create(tempDir);
+    const settingsManager = SettingsManager.create(tempDir, tempDir);
+    const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+    const modelRegistry = new ModelRegistry(authStorage);
+
+    const runtime = createExtensionRuntime();
+    const resourceLoader = {
+      ...createTestResourceLoader(),
+      getExtensions: () => ({ extensions, errors: [], runtime }),
+    };
+
+    session = new AgentSession({
+      agent,
+      sessionManager,
+      settingsManager,
+      cwd: tempDir,
+      modelRegistry,
+      resourceLoader,
+    });
+
+    return session;
+  }
+
+  it("should emit before_compact and compact events", async () => {
+    const extension = createExtension();
+    createSession([extension]);
+
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await session.prompt("What is 3+3? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await session.compact();
+
+    const beforeCompactEvents = capturedEvents.filter(
+      (e): e is SessionBeforeCompactEvent =>
+        e.type === "session_before_compact",
+    );
+    const compactEvents = capturedEvents.filter(
+      (e): e is SessionCompactEvent => e.type === "session_compact",
+    );
+
+    expect(beforeCompactEvents.length).toBe(1);
+    expect(compactEvents.length).toBe(1);
+
+    const beforeEvent = beforeCompactEvents[0];
+    expect(beforeEvent.preparation).toBeDefined();
+    expect(beforeEvent.preparation.messagesToSummarize).toBeDefined();
+    expect(beforeEvent.preparation.turnPrefixMessages).toBeDefined();
+    expect(beforeEvent.preparation.tokensBefore).toBeGreaterThanOrEqual(0);
+    expect(typeof beforeEvent.preparation.isSplitTurn).toBe("boolean");
+    expect(beforeEvent.branchEntries).toBeDefined();
+    // sessionManager, modelRegistry, and model are now on ctx, not event
+
+    const afterEvent = compactEvents[0];
+    expect(afterEvent.compactionEntry).toBeDefined();
+    expect(afterEvent.compactionEntry.summary.length).toBeGreaterThan(0);
+    expect(afterEvent.compactionEntry.tokensBefore).toBeGreaterThanOrEqual(0);
+    expect(afterEvent.fromExtension).toBe(false);
+  }, 120000);
+
+  it("should allow extensions to cancel compaction", async () => {
+    const extension = createExtension(() => ({ cancel: true }));
+    createSession([extension]);
+
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await expect(session.compact()).rejects.toThrow("Compaction cancelled");
+
+    const compactEvents = capturedEvents.filter(
+      (e) => e.type === "session_compact",
+    );
+    expect(compactEvents.length).toBe(0);
+  }, 120000);
+
+  it("should allow extensions to provide custom compaction", async () => {
+    const customSummary = "Custom summary from extension";
+
+    const extension = createExtension((event) => {
+      if (event.type === "session_before_compact") {
+        return {
+          compaction: {
+            summary: customSummary,
+            firstKeptEntryId: event.preparation.firstKeptEntryId,
+            tokensBefore: event.preparation.tokensBefore,
+          },
+        };
+      }
+      return undefined;
+    });
+    createSession([extension]);
+
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await session.prompt("What is 3+3? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    const result = await session.compact();
+
+    expect(result.summary).toBe(customSummary);
+
+    const compactEvents = capturedEvents.filter(
+      (e) => e.type === "session_compact",
+    );
+    expect(compactEvents.length).toBe(1);
+
+    const afterEvent = compactEvents[0];
+    if (afterEvent.type === "session_compact") {
+      expect(afterEvent.compactionEntry.summary).toBe(customSummary);
+      expect(afterEvent.fromExtension).toBe(true);
+    }
+  }, 120000);
+
+  it("should include entries in compact event after compaction is saved", async () => {
+    const extension = createExtension();
+    createSession([extension]);
+
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await session.compact();
+
+    const compactEvents = capturedEvents.filter(
+      (e) => e.type === "session_compact",
+    );
+    expect(compactEvents.length).toBe(1);
+
+    const afterEvent = compactEvents[0];
+    if (afterEvent.type === "session_compact") {
+      // sessionManager is now on ctx, use session.sessionManager directly
+      const entries = session.sessionManager.getEntries();
+      const hasCompactionEntry = entries.some(
+        (e: { type: string }) => e.type === "compaction",
+      );
+      expect(hasCompactionEntry).toBe(true);
+    }
+  }, 120000);
+
+  it("should continue with default compaction if extension throws error", async () => {
+    const throwingExtension: Extension = {
+      path: "throwing-extension",
+      resolvedPath: "/test/throwing-extension.ts",
+      handlers: new Map<string, ((event: any, ctx: any) => Promise<any>)[]>([
+        [
+          "session_before_compact",
+          [
+            async (event: SessionBeforeCompactEvent) => {
+              capturedEvents.push(event);
+              throw new Error("Extension intentionally throws");
+            },
+          ],
+        ],
+        [
+          "session_compact",
+          [
+            async (event: SessionCompactEvent) => {
+              capturedEvents.push(event);
+              return undefined;
+            },
+          ],
+        ],
+      ]),
+      tools: new Map(),
+      messageRenderers: new Map(),
+      commands: new Map(),
+      flags: new Map(),
+      shortcuts: new Map(),
+    };
+
+    createSession([throwingExtension]);
+
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    const result = await session.compact();
+
+    expect(result.summary).toBeDefined();
+    expect(result.summary.length).toBeGreaterThan(0);
+
+    const compactEvents = capturedEvents.filter(
+      (e): e is SessionCompactEvent => e.type === "session_compact",
+    );
+    expect(compactEvents.length).toBe(1);
+    expect(compactEvents[0].fromExtension).toBe(false);
+  }, 120000);
+
+  it("should call multiple extensions in order", async () => {
+    const callOrder: string[] = [];
+
+    const extension1: Extension = {
+      path: "extension1",
+      resolvedPath: "/test/extension1.ts",
+      handlers: new Map<string, ((event: any, ctx: any) => Promise<any>)[]>([
+        [
+          "session_before_compact",
+          [
+            async () => {
+              callOrder.push("extension1-before");
+              return undefined;
+            },
+          ],
+        ],
+        [
+          "session_compact",
+          [
+            async () => {
+              callOrder.push("extension1-after");
+              return undefined;
+            },
+          ],
+        ],
+      ]),
+      tools: new Map(),
+      messageRenderers: new Map(),
+      commands: new Map(),
+      flags: new Map(),
+      shortcuts: new Map(),
+    };
+
+    const extension2: Extension = {
+      path: "extension2",
+      resolvedPath: "/test/extension2.ts",
+      handlers: new Map<string, ((event: any, ctx: any) => Promise<any>)[]>([
+        [
+          "session_before_compact",
+          [
+            async () => {
+              callOrder.push("extension2-before");
+              return undefined;
+            },
+          ],
+        ],
+        [
+          "session_compact",
+          [
+            async () => {
+              callOrder.push("extension2-after");
+              return undefined;
+            },
+          ],
+        ],
+      ]),
+      tools: new Map(),
+      messageRenderers: new Map(),
+      commands: new Map(),
+      flags: new Map(),
+      shortcuts: new Map(),
+    };
+
+    createSession([extension1, extension2]);
+
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await session.compact();
+
+    expect(callOrder).toEqual([
+      "extension1-before",
+      "extension2-before",
+      "extension1-after",
+      "extension2-after",
+    ]);
+  }, 120000);
+
+  it("should pass correct data in before_compact event", async () => {
+    let capturedBeforeEvent: SessionBeforeCompactEvent | null = null;
+
+    const extension = createExtension((event) => {
+      capturedBeforeEvent = event;
+      return undefined;
+    });
+    createSession([extension]);
+
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await session.prompt("What is 3+3? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    await session.compact();
+
+    expect(capturedBeforeEvent).not.toBeNull();
+    const event = capturedBeforeEvent!;
+    expect(typeof event.preparation.isSplitTurn).toBe("boolean");
+    expect(event.preparation.firstKeptEntryId).toBeDefined();
+
+    expect(Array.isArray(event.preparation.messagesToSummarize)).toBe(true);
+    expect(Array.isArray(event.preparation.turnPrefixMessages)).toBe(true);
+
+    expect(typeof event.preparation.tokensBefore).toBe("number");
+
+    expect(Array.isArray(event.branchEntries)).toBe(true);
+
+    // sessionManager, modelRegistry, and model are now on ctx, not event
+    // Verify they're accessible via session
+    expect(typeof session.sessionManager.getEntries).toBe("function");
+    expect(typeof session.modelRegistry.getApiKey).toBe("function");
+
+    const entries = session.sessionManager.getEntries();
+    expect(Array.isArray(entries)).toBe(true);
+    expect(entries.length).toBeGreaterThan(0);
+  }, 120000);
+
+  it("should use extension compaction even with different values", async () => {
+    const customSummary = "Custom summary with modified values";
+
+    const extension = createExtension((event) => {
+      if (event.type === "session_before_compact") {
+        return {
+          compaction: {
+            summary: customSummary,
+            firstKeptEntryId: event.preparation.firstKeptEntryId,
+            tokensBefore: 999,
+          },
+        };
+      }
+      return undefined;
+    });
+    createSession([extension]);
+
+    await session.prompt("What is 2+2? Reply with just the number.");
+    await session.agent.waitForIdle();
+
+    const result = await session.compact();
+
+    expect(result.summary).toBe(customSummary);
+    expect(result.tokensBefore).toBe(999);
+  }, 120000);
+});
--- a/packages/coding-agent/test/compaction-summary-reasoning.test.ts
+++ b/packages/coding-agent/test/compaction-summary-reasoning.test.ts
@ -0,0 +1,80 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { AssistantMessage, Model } from "@mariozechner/pi-ai";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { generateSummary } from "../src/core/compaction/index.js";
+
+const { completeSimpleMock } = vi.hoisted(() => ({
+  completeSimpleMock: vi.fn(),
+}));
+
+vi.mock("@mariozechner/pi-ai", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("@mariozechner/pi-ai")>();
+  return {
+    ...actual,
+    completeSimple: completeSimpleMock,
+  };
+});
+
+function createModel(reasoning: boolean): Model<"anthropic-messages"> {
+  return {
+    id: reasoning ? "reasoning-model" : "non-reasoning-model",
+    name: reasoning ? "Reasoning Model" : "Non-reasoning Model",
+    api: "anthropic-messages",
+    provider: "anthropic",
+    baseUrl: "https://api.anthropic.com",
+    reasoning,
+    input: ["text"],
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    contextWindow: 200000,
+    maxTokens: 8192,
+  };
+}
+
+const mockSummaryResponse: AssistantMessage = {
+  role: "assistant",
+  content: [{ type: "text", text: "## Goal\nTest summary" }],
+  api: "anthropic-messages",
+  provider: "anthropic",
+  model: "claude-sonnet-4-5",
+  usage: {
+    input: 10,
+    output: 10,
+    cacheRead: 0,
+    cacheWrite: 0,
+    totalTokens: 20,
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+  },
+  stopReason: "stop",
+  timestamp: Date.now(),
+};
+
+const messages: AgentMessage[] = [
+  { role: "user", content: "Summarize this.", timestamp: Date.now() },
+];
+
+describe("generateSummary reasoning options", () => {
+  beforeEach(() => {
+    completeSimpleMock.mockReset();
+    completeSimpleMock.mockResolvedValue(mockSummaryResponse);
+  });
+
+  it("sets reasoning=high for reasoning-capable models", async () => {
+    await generateSummary(messages, createModel(true), 2000, "test-key");
+
+    expect(completeSimpleMock).toHaveBeenCalledTimes(1);
+    expect(completeSimpleMock.mock.calls[0][2]).toMatchObject({
+      reasoning: "high",
+      apiKey: "test-key",
+    });
+  });
+
+  it("does not set reasoning for non-reasoning models", async () => {
+    await generateSummary(messages, createModel(false), 2000, "test-key");
+
+    expect(completeSimpleMock).toHaveBeenCalledTimes(1);
+    expect(completeSimpleMock.mock.calls[0][2]).toMatchObject({
+      apiKey: "test-key",
+    });
+    expect(completeSimpleMock.mock.calls[0][2]).not.toHaveProperty("reasoning");
+  });
+});
--- a/packages/coding-agent/test/compaction-thinking-model.test.ts
+++ b/packages/coding-agent/test/compaction-thinking-model.test.ts
@ -0,0 +1,235 @@
+/**
+ * Test for compaction with thinking models.
+ *
+ * Tests both:
+ * - Claude via Antigravity (google-gemini-cli API)
+ * - Claude via real Anthropic API (anthropic-messages API)
+ *
+ * Reproduces issue where compact fails when maxTokens < thinkingBudget.
+ */
+
+import { existsSync, mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { Agent, type ThinkingLevel } from "@mariozechner/pi-agent-core";
+import { getModel, type Model } from "@mariozechner/pi-ai";
+import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
+import { AgentSession } from "../src/core/agent-session.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import { codingTools } from "../src/core/tools/index.js";
+import {
+  API_KEY,
+  createTestResourceLoader,
+  getRealAuthStorage,
+  hasAuthForProvider,
+  resolveApiKey,
+} from "./utilities.js";
+
+// Check for auth
+const HAS_ANTIGRAVITY_AUTH = hasAuthForProvider("google-antigravity");
+const HAS_ANTHROPIC_AUTH = !!API_KEY;
+
+describe.skipIf(!HAS_ANTIGRAVITY_AUTH)(
+  "Compaction with thinking models (Antigravity)",
+  () => {
+    let session: AgentSession;
+    let tempDir: string;
+    let apiKey: string;
+
+    beforeAll(async () => {
+      const key = await resolveApiKey("google-antigravity");
+      if (!key) throw new Error("Failed to resolve google-antigravity API key");
+      apiKey = key;
+    });
+
+    beforeEach(() => {
+      tempDir = join(tmpdir(), `pi-thinking-compaction-test-${Date.now()}`);
+      mkdirSync(tempDir, { recursive: true });
+    });
+
+    afterEach(async () => {
+      if (session) {
+        session.dispose();
+      }
+      if (tempDir && existsSync(tempDir)) {
+        rmSync(tempDir, { recursive: true });
+      }
+    });
+
+    function createSession(
+      modelId: "claude-opus-4-5-thinking" | "claude-sonnet-4-5",
+      thinkingLevel: ThinkingLevel = "high",
+    ) {
+      const model = getModel("google-antigravity", modelId);
+      if (!model) {
+        throw new Error(`Model not found: google-antigravity/${modelId}`);
+      }
+
+      const agent = new Agent({
+        getApiKey: () => apiKey,
+        initialState: {
+          model,
+          systemPrompt: "You are a helpful assistant. Be concise.",
+          tools: codingTools,
+          thinkingLevel,
+        },
+      });
+
+      const sessionManager = SessionManager.inMemory();
+      const settingsManager = SettingsManager.create(tempDir, tempDir);
+      // Use minimal keepRecentTokens so small test conversations have something to summarize
+      // settingsManager.applyOverrides({ compaction: { keepRecentTokens: 1 } });
+
+      const authStorage = getRealAuthStorage();
+      const modelRegistry = new ModelRegistry(authStorage);
+
+      session = new AgentSession({
+        agent,
+        sessionManager,
+        settingsManager,
+        cwd: tempDir,
+        modelRegistry,
+        resourceLoader: createTestResourceLoader(),
+      });
+
+      session.subscribe(() => {});
+
+      return session;
+    }
+
+    it("should compact successfully with claude-opus-4-5-thinking and thinking level high", async () => {
+      createSession("claude-opus-4-5-thinking", "high");
+
+      // Send a simple prompt
+      await session.prompt("Write down the first 10 prime numbers.");
+      await session.agent.waitForIdle();
+
+      // Verify we got a response
+      const messages = session.messages;
+      expect(messages.length).toBeGreaterThan(0);
+
+      const assistantMessages = messages.filter((m) => m.role === "assistant");
+      expect(assistantMessages.length).toBeGreaterThan(0);
+
+      // Now try to compact - this should not throw
+      const result = await session.compact();
+
+      expect(result.summary).toBeDefined();
+      expect(result.summary.length).toBeGreaterThan(0);
+      expect(result.tokensBefore).toBeGreaterThan(0);
+
+      // Verify session is still usable after compaction
+      const messagesAfterCompact = session.messages;
+      expect(messagesAfterCompact.length).toBeGreaterThan(0);
+      expect(messagesAfterCompact[0].role).toBe("compactionSummary");
+    }, 180000);
+
+    it("should compact successfully with claude-sonnet-4-5 (non-thinking) for comparison", async () => {
+      createSession("claude-sonnet-4-5", "off");
+
+      await session.prompt("Write down the first 10 prime numbers.");
+      await session.agent.waitForIdle();
+
+      const messages = session.messages;
+      expect(messages.length).toBeGreaterThan(0);
+
+      const result = await session.compact();
+
+      expect(result.summary).toBeDefined();
+      expect(result.summary.length).toBeGreaterThan(0);
+    }, 180000);
+  },
+);
+
+// ============================================================================
+// Real Anthropic API tests (for comparison)
+// ============================================================================
+
+describe.skipIf(!HAS_ANTHROPIC_AUTH)(
+  "Compaction with thinking models (Anthropic)",
+  () => {
+    let session: AgentSession;
+    let tempDir: string;
+
+    beforeEach(() => {
+      tempDir = join(
+        tmpdir(),
+        `pi-thinking-compaction-anthropic-test-${Date.now()}`,
+      );
+      mkdirSync(tempDir, { recursive: true });
+    });
+
+    afterEach(async () => {
+      if (session) {
+        session.dispose();
+      }
+      if (tempDir && existsSync(tempDir)) {
+        rmSync(tempDir, { recursive: true });
+      }
+    });
+
+    function createSession(
+      model: Model<any>,
+      thinkingLevel: ThinkingLevel = "high",
+    ) {
+      const agent = new Agent({
+        getApiKey: () => API_KEY,
+        initialState: {
+          model,
+          systemPrompt: "You are a helpful assistant. Be concise.",
+          tools: codingTools,
+          thinkingLevel,
+        },
+      });
+
+      const sessionManager = SessionManager.inMemory();
+      const settingsManager = SettingsManager.create(tempDir, tempDir);
+
+      const authStorage = getRealAuthStorage();
+      const modelRegistry = new ModelRegistry(authStorage);
+
+      session = new AgentSession({
+        agent,
+        sessionManager,
+        settingsManager,
+        cwd: tempDir,
+        modelRegistry,
+        resourceLoader: createTestResourceLoader(),
+      });
+
+      session.subscribe(() => {});
+
+      return session;
+    }
+
+    it("should compact successfully with claude-3-7-sonnet and thinking level high", async () => {
+      const model = getModel("anthropic", "claude-3-7-sonnet-latest")!;
+      createSession(model, "high");
+
+      // Send a simple prompt
+      await session.prompt("Write down the first 10 prime numbers.");
+      await session.agent.waitForIdle();
+
+      // Verify we got a response
+      const messages = session.messages;
+      expect(messages.length).toBeGreaterThan(0);
+
+      const assistantMessages = messages.filter((m) => m.role === "assistant");
+      expect(assistantMessages.length).toBeGreaterThan(0);
+
+      // Now try to compact - this should not throw
+      const result = await session.compact();
+
+      expect(result.summary).toBeDefined();
+      expect(result.summary.length).toBeGreaterThan(0);
+      expect(result.tokensBefore).toBeGreaterThan(0);
+
+      // Verify session is still usable after compaction
+      const messagesAfterCompact = session.messages;
+      expect(messagesAfterCompact.length).toBeGreaterThan(0);
+      expect(messagesAfterCompact[0].role).toBe("compactionSummary");
+    }, 180000);
+  },
+);
--- a/packages/coding-agent/test/compaction.test.ts
+++ b/packages/coding-agent/test/compaction.test.ts
@ -0,0 +1,523 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { AssistantMessage, Usage } from "@mariozechner/pi-ai";
+import { getModel } from "@mariozechner/pi-ai";
+import { readFileSync } from "fs";
+import { join } from "path";
+import { beforeEach, describe, expect, it } from "vitest";
+import {
+  type CompactionSettings,
+  calculateContextTokens,
+  compact,
+  DEFAULT_COMPACTION_SETTINGS,
+  findCutPoint,
+  getLastAssistantUsage,
+  prepareCompaction,
+  shouldCompact,
+} from "../src/core/compaction/index.js";
+import {
+  buildSessionContext,
+  type CompactionEntry,
+  type ModelChangeEntry,
+  migrateSessionEntries,
+  parseSessionEntries,
+  type SessionEntry,
+  type SessionMessageEntry,
+  type ThinkingLevelChangeEntry,
+} from "../src/core/session-manager.js";
+
+// ============================================================================
+// Test fixtures
+// ============================================================================
+
+function loadLargeSessionEntries(): SessionEntry[] {
+  const sessionPath = join(__dirname, "fixtures/large-session.jsonl");
+  const content = readFileSync(sessionPath, "utf-8");
+  const entries = parseSessionEntries(content);
+  migrateSessionEntries(entries); // Add id/parentId for v1 fixtures
+  return entries.filter((e): e is SessionEntry => e.type !== "session");
+}
+
+function createMockUsage(
+  input: number,
+  output: number,
+  cacheRead = 0,
+  cacheWrite = 0,
+): Usage {
+  return {
+    input,
+    output,
+    cacheRead,
+    cacheWrite,
+    totalTokens: input + output + cacheRead + cacheWrite,
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+  };
+}
+
+function createUserMessage(text: string): AgentMessage {
+  return { role: "user", content: text, timestamp: Date.now() };
+}
+
+function createAssistantMessage(text: string, usage?: Usage): AssistantMessage {
+  return {
+    role: "assistant",
+    content: [{ type: "text", text }],
+    usage: usage || createMockUsage(100, 50),
+    stopReason: "stop",
+    timestamp: Date.now(),
+    api: "anthropic-messages",
+    provider: "anthropic",
+    model: "claude-sonnet-4-5",
+  };
+}
+
+let entryCounter = 0;
+let lastId: string | null = null;
+
+function resetEntryCounter() {
+  entryCounter = 0;
+  lastId = null;
+}
+
+// Reset counter before each test to get predictable IDs
+beforeEach(() => {
+  resetEntryCounter();
+});
+
+function createMessageEntry(message: AgentMessage): SessionMessageEntry {
+  const id = `test-id-${entryCounter++}`;
+  const entry: SessionMessageEntry = {
+    type: "message",
+    id,
+    parentId: lastId,
+    timestamp: new Date().toISOString(),
+    message,
+  };
+  lastId = id;
+  return entry;
+}
+
+function createCompactionEntry(
+  summary: string,
+  firstKeptEntryId: string,
+): CompactionEntry {
+  const id = `test-id-${entryCounter++}`;
+  const entry: CompactionEntry = {
+    type: "compaction",
+    id,
+    parentId: lastId,
+    timestamp: new Date().toISOString(),
+    summary,
+    firstKeptEntryId,
+    tokensBefore: 10000,
+  };
+  lastId = id;
+  return entry;
+}
+
+function createModelChangeEntry(
+  provider: string,
+  modelId: string,
+): ModelChangeEntry {
+  const id = `test-id-${entryCounter++}`;
+  const entry: ModelChangeEntry = {
+    type: "model_change",
+    id,
+    parentId: lastId,
+    timestamp: new Date().toISOString(),
+    provider,
+    modelId,
+  };
+  lastId = id;
+  return entry;
+}
+
+function createThinkingLevelEntry(
+  thinkingLevel: string,
+): ThinkingLevelChangeEntry {
+  const id = `test-id-${entryCounter++}`;
+  const entry: ThinkingLevelChangeEntry = {
+    type: "thinking_level_change",
+    id,
+    parentId: lastId,
+    timestamp: new Date().toISOString(),
+    thinkingLevel,
+  };
+  lastId = id;
+  return entry;
+}
+
+// ============================================================================
+// Unit tests
+// ============================================================================
+
+describe("Token calculation", () => {
+  it("should calculate total context tokens from usage", () => {
+    const usage = createMockUsage(1000, 500, 200, 100);
+    expect(calculateContextTokens(usage)).toBe(1800);
+  });
+
+  it("should handle zero values", () => {
+    const usage = createMockUsage(0, 0, 0, 0);
+    expect(calculateContextTokens(usage)).toBe(0);
+  });
+});
+
+describe("getLastAssistantUsage", () => {
+  it("should find the last non-aborted assistant message usage", () => {
+    const entries: SessionEntry[] = [
+      createMessageEntry(createUserMessage("Hello")),
+      createMessageEntry(
+        createAssistantMessage("Hi", createMockUsage(100, 50)),
+      ),
+      createMessageEntry(createUserMessage("How are you?")),
+      createMessageEntry(
+        createAssistantMessage("Good", createMockUsage(200, 100)),
+      ),
+    ];
+
+    const usage = getLastAssistantUsage(entries);
+    expect(usage).not.toBeNull();
+    expect(usage!.input).toBe(200);
+  });
+
+  it("should skip aborted messages", () => {
+    const abortedMsg: AssistantMessage = {
+      ...createAssistantMessage("Aborted", createMockUsage(300, 150)),
+      stopReason: "aborted",
+    };
+
+    const entries: SessionEntry[] = [
+      createMessageEntry(createUserMessage("Hello")),
+      createMessageEntry(
+        createAssistantMessage("Hi", createMockUsage(100, 50)),
+      ),
+      createMessageEntry(createUserMessage("How are you?")),
+      createMessageEntry(abortedMsg),
+    ];
+
+    const usage = getLastAssistantUsage(entries);
+    expect(usage).not.toBeNull();
+    expect(usage!.input).toBe(100);
+  });
+
+  it("should return undefined if no assistant messages", () => {
+    const entries: SessionEntry[] = [
+      createMessageEntry(createUserMessage("Hello")),
+    ];
+    expect(getLastAssistantUsage(entries)).toBeUndefined();
+  });
+});
+
+describe("shouldCompact", () => {
+  it("should return true when context exceeds threshold", () => {
+    const settings: CompactionSettings = {
+      enabled: true,
+      reserveTokens: 10000,
+      keepRecentTokens: 20000,
+    };
+
+    expect(shouldCompact(95000, 100000, settings)).toBe(true);
+    expect(shouldCompact(89000, 100000, settings)).toBe(false);
+  });
+
+  it("should return false when disabled", () => {
+    const settings: CompactionSettings = {
+      enabled: false,
+      reserveTokens: 10000,
+      keepRecentTokens: 20000,
+    };
+
+    expect(shouldCompact(95000, 100000, settings)).toBe(false);
+  });
+});
+
+describe("findCutPoint", () => {
+  it("should find cut point based on actual token differences", () => {
+    // Create entries with cumulative token counts
+    const entries: SessionEntry[] = [];
+    for (let i = 0; i < 10; i++) {
+      entries.push(createMessageEntry(createUserMessage(`User ${i}`)));
+      entries.push(
+        createMessageEntry(
+          createAssistantMessage(
+            `Assistant ${i}`,
+            createMockUsage(0, 100, (i + 1) * 1000, 0),
+          ),
+        ),
+      );
+    }
+
+    // 20 entries, last assistant has 10000 tokens
+    // keepRecentTokens = 2500: keep entries where diff < 2500
+    const result = findCutPoint(entries, 0, entries.length, 2500);
+
+    // Should cut at a valid cut point (user or assistant message)
+    expect(entries[result.firstKeptEntryIndex].type).toBe("message");
+    const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry)
+      .message.role;
+    expect(role === "user" || role === "assistant").toBe(true);
+  });
+
+  it("should return startIndex if no valid cut points in range", () => {
+    const entries: SessionEntry[] = [
+      createMessageEntry(createAssistantMessage("a")),
+    ];
+    const result = findCutPoint(entries, 0, entries.length, 1000);
+    expect(result.firstKeptEntryIndex).toBe(0);
+  });
+
+  it("should keep everything if all messages fit within budget", () => {
+    const entries: SessionEntry[] = [
+      createMessageEntry(createUserMessage("1")),
+      createMessageEntry(
+        createAssistantMessage("a", createMockUsage(0, 50, 500, 0)),
+      ),
+      createMessageEntry(createUserMessage("2")),
+      createMessageEntry(
+        createAssistantMessage("b", createMockUsage(0, 50, 1000, 0)),
+      ),
+    ];
+
+    const result = findCutPoint(entries, 0, entries.length, 50000);
+    expect(result.firstKeptEntryIndex).toBe(0);
+  });
+
+  it("should indicate split turn when cutting at assistant message", () => {
+    // Create a scenario where we cut at an assistant message mid-turn
+    const entries: SessionEntry[] = [
+      createMessageEntry(createUserMessage("Turn 1")),
+      createMessageEntry(
+        createAssistantMessage("A1", createMockUsage(0, 100, 1000, 0)),
+      ),
+      createMessageEntry(createUserMessage("Turn 2")), // index 2
+      createMessageEntry(
+        createAssistantMessage("A2-1", createMockUsage(0, 100, 5000, 0)),
+      ), // index 3
+      createMessageEntry(
+        createAssistantMessage("A2-2", createMockUsage(0, 100, 8000, 0)),
+      ), // index 4
+      createMessageEntry(
+        createAssistantMessage("A2-3", createMockUsage(0, 100, 10000, 0)),
+      ), // index 5
+    ];
+
+    // With keepRecentTokens = 3000, should cut somewhere in Turn 2
+    const result = findCutPoint(entries, 0, entries.length, 3000);
+
+    // If cut at assistant message (not user), should indicate split turn
+    const cutEntry = entries[result.firstKeptEntryIndex] as SessionMessageEntry;
+    if (cutEntry.message.role === "assistant") {
+      expect(result.isSplitTurn).toBe(true);
+      expect(result.turnStartIndex).toBe(2); // Turn 2 starts at index 2
+    }
+  });
+});
+
+describe("buildSessionContext", () => {
+  it("should load all messages when no compaction", () => {
+    const entries: SessionEntry[] = [
+      createMessageEntry(createUserMessage("1")),
+      createMessageEntry(createAssistantMessage("a")),
+      createMessageEntry(createUserMessage("2")),
+      createMessageEntry(createAssistantMessage("b")),
+    ];
+
+    const loaded = buildSessionContext(entries);
+    expect(loaded.messages.length).toBe(4);
+    expect(loaded.thinkingLevel).toBe("off");
+    expect(loaded.model).toEqual({
+      provider: "anthropic",
+      modelId: "claude-sonnet-4-5",
+    });
+  });
+
+  it("should handle single compaction", () => {
+    // IDs: u1=test-id-0, a1=test-id-1, u2=test-id-2, a2=test-id-3, compaction=test-id-4, u3=test-id-5, a3=test-id-6
+    const u1 = createMessageEntry(createUserMessage("1"));
+    const a1 = createMessageEntry(createAssistantMessage("a"));
+    const u2 = createMessageEntry(createUserMessage("2"));
+    const a2 = createMessageEntry(createAssistantMessage("b"));
+    const compaction = createCompactionEntry("Summary of 1,a,2,b", u2.id); // keep from u2 onwards
+    const u3 = createMessageEntry(createUserMessage("3"));
+    const a3 = createMessageEntry(createAssistantMessage("c"));
+
+    const entries: SessionEntry[] = [u1, a1, u2, a2, compaction, u3, a3];
+
+    const loaded = buildSessionContext(entries);
+    // summary + kept (u2, a2) + after (u3, a3) = 5
+    expect(loaded.messages.length).toBe(5);
+    expect(loaded.messages[0].role).toBe("compactionSummary");
+    expect((loaded.messages[0] as any).summary).toContain("Summary of 1,a,2,b");
+  });
+
+  it("should handle multiple compactions (only latest matters)", () => {
+    // First batch
+    const u1 = createMessageEntry(createUserMessage("1"));
+    const a1 = createMessageEntry(createAssistantMessage("a"));
+    const compact1 = createCompactionEntry("First summary", u1.id);
+    // Second batch
+    const u2 = createMessageEntry(createUserMessage("2"));
+    const b = createMessageEntry(createAssistantMessage("b"));
+    const u3 = createMessageEntry(createUserMessage("3"));
+    const c = createMessageEntry(createAssistantMessage("c"));
+    const compact2 = createCompactionEntry("Second summary", u3.id); // keep from u3 onwards
+    // After second compaction
+    const u4 = createMessageEntry(createUserMessage("4"));
+    const d = createMessageEntry(createAssistantMessage("d"));
+
+    const entries: SessionEntry[] = [
+      u1,
+      a1,
+      compact1,
+      u2,
+      b,
+      u3,
+      c,
+      compact2,
+      u4,
+      d,
+    ];
+
+    const loaded = buildSessionContext(entries);
+    // summary + kept from u3 (u3, c) + after (u4, d) = 5
+    expect(loaded.messages.length).toBe(5);
+    expect((loaded.messages[0] as any).summary).toContain("Second summary");
+  });
+
+  it("should keep all messages when firstKeptEntryId is first entry", () => {
+    const u1 = createMessageEntry(createUserMessage("1"));
+    const a1 = createMessageEntry(createAssistantMessage("a"));
+    const compact1 = createCompactionEntry("First summary", u1.id); // keep from first entry
+    const u2 = createMessageEntry(createUserMessage("2"));
+    const b = createMessageEntry(createAssistantMessage("b"));
+
+    const entries: SessionEntry[] = [u1, a1, compact1, u2, b];
+
+    const loaded = buildSessionContext(entries);
+    // summary + all messages (u1, a1, u2, b) = 5
+    expect(loaded.messages.length).toBe(5);
+  });
+
+  it("should track model and thinking level changes", () => {
+    const entries: SessionEntry[] = [
+      createMessageEntry(createUserMessage("1")),
+      createModelChangeEntry("openai", "gpt-4"),
+      createMessageEntry(createAssistantMessage("a")),
+      createThinkingLevelEntry("high"),
+    ];
+
+    const loaded = buildSessionContext(entries);
+    // model_change is later overwritten by assistant message's model info
+    expect(loaded.model).toEqual({
+      provider: "anthropic",
+      modelId: "claude-sonnet-4-5",
+    });
+    expect(loaded.thinkingLevel).toBe("high");
+  });
+});
+
+// ============================================================================
+// Integration tests with real session data
+// ============================================================================
+
+describe("Large session fixture", () => {
+  it("should parse the large session", () => {
+    const entries = loadLargeSessionEntries();
+    expect(entries.length).toBeGreaterThan(100);
+
+    const messageCount = entries.filter((e) => e.type === "message").length;
+    expect(messageCount).toBeGreaterThan(100);
+  });
+
+  it("should find cut point in large session", () => {
+    const entries = loadLargeSessionEntries();
+    const result = findCutPoint(
+      entries,
+      0,
+      entries.length,
+      DEFAULT_COMPACTION_SETTINGS.keepRecentTokens,
+    );
+
+    // Cut point should be at a message entry (user or assistant)
+    expect(entries[result.firstKeptEntryIndex].type).toBe("message");
+    const role = (entries[result.firstKeptEntryIndex] as SessionMessageEntry)
+      .message.role;
+    expect(role === "user" || role === "assistant").toBe(true);
+  });
+
+  it("should load session correctly", () => {
+    const entries = loadLargeSessionEntries();
+    const loaded = buildSessionContext(entries);
+
+    expect(loaded.messages.length).toBeGreaterThan(100);
+    expect(loaded.model).not.toBeNull();
+  });
+});
+
+// ============================================================================
+// LLM integration tests (skipped without API key)
+// ============================================================================
+
+describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("LLM summarization", () => {
+  it("should generate a compaction result for the large session", async () => {
+    const entries = loadLargeSessionEntries();
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+
+    const preparation = prepareCompaction(entries, DEFAULT_COMPACTION_SETTINGS);
+    expect(preparation).toBeDefined();
+
+    const compactionResult = await compact(
+      preparation!,
+      model,
+      process.env.ANTHROPIC_OAUTH_TOKEN!,
+    );
+
+    expect(compactionResult.summary.length).toBeGreaterThan(100);
+    expect(compactionResult.firstKeptEntryId).toBeTruthy();
+    expect(compactionResult.tokensBefore).toBeGreaterThan(0);
+
+    console.log("Summary length:", compactionResult.summary.length);
+    console.log("First kept entry ID:", compactionResult.firstKeptEntryId);
+    console.log("Tokens before:", compactionResult.tokensBefore);
+    console.log("\n--- SUMMARY ---\n");
+    console.log(compactionResult.summary);
+  }, 60000);
+
+  it("should produce valid session after compaction", async () => {
+    const entries = loadLargeSessionEntries();
+    const loaded = buildSessionContext(entries);
+    const model = getModel("anthropic", "claude-sonnet-4-5")!;
+
+    const preparation = prepareCompaction(entries, DEFAULT_COMPACTION_SETTINGS);
+    expect(preparation).toBeDefined();
+
+    const compactionResult = await compact(
+      preparation!,
+      model,
+      process.env.ANTHROPIC_OAUTH_TOKEN!,
+    );
+
+    // Simulate appending compaction to entries by creating a proper entry
+    const lastEntry = entries[entries.length - 1];
+    const parentId = lastEntry.id;
+    const compactionEntry: CompactionEntry = {
+      type: "compaction",
+      id: "compaction-test-id",
+      parentId,
+      timestamp: new Date().toISOString(),
+      ...compactionResult,
+    };
+    const newEntries = [...entries, compactionEntry];
+    const reloaded = buildSessionContext(newEntries);
+
+    // Should have summary + kept messages
+    expect(reloaded.messages.length).toBeLessThan(loaded.messages.length);
+    expect(reloaded.messages[0].role).toBe("compactionSummary");
+    expect((reloaded.messages[0] as any).summary).toContain(
+      compactionResult.summary,
+    );
+
+    console.log("Original messages:", loaded.messages.length);
+    console.log("After compaction:", reloaded.messages.length);
+  }, 60000);
+});
--- a/packages/coding-agent/test/extensions-discovery.test.ts
+++ b/packages/coding-agent/test/extensions-discovery.test.ts
@ -0,0 +1,539 @@
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { fileURLToPath } from "node:url";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { discoverAndLoadExtensions } from "../src/core/extensions/loader.js";
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+
+describe("extensions discovery", () => {
+  let tempDir: string;
+  let extensionsDir: string;
+
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-ext-test-"));
+    extensionsDir = path.join(tempDir, "extensions");
+    fs.mkdirSync(extensionsDir);
+  });
+
+  afterEach(() => {
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  const extensionCode = `
+		export default function(pi) {
+			pi.registerCommand("test", { handler: async () => {} });
+		}
+	`;
+
+  const extensionCodeWithTool = (toolName: string) => `
+		import { Type } from "@sinclair/typebox";
+		export default function(pi) {
+			pi.registerTool({
+				name: "${toolName}",
+				label: "${toolName}",
+				description: "Test tool",
+				parameters: Type.Object({}),
+				execute: async () => ({ content: [{ type: "text", text: "ok" }] }),
+			});
+		}
+	`;
+
+  it("discovers direct .ts files in extensions/", async () => {
+    fs.writeFileSync(path.join(extensionsDir, "foo.ts"), extensionCode);
+    fs.writeFileSync(path.join(extensionsDir, "bar.ts"), extensionCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(2);
+    expect(result.extensions.map((e) => path.basename(e.path)).sort()).toEqual([
+      "bar.ts",
+      "foo.ts",
+    ]);
+  });
+
+  it("discovers direct .js files in extensions/", async () => {
+    fs.writeFileSync(path.join(extensionsDir, "foo.js"), extensionCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(path.basename(result.extensions[0].path)).toBe("foo.js");
+  });
+
+  it("discovers subdirectory with index.ts", async () => {
+    const subdir = path.join(extensionsDir, "my-extension");
+    fs.mkdirSync(subdir);
+    fs.writeFileSync(path.join(subdir, "index.ts"), extensionCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].path).toContain("my-extension");
+    expect(result.extensions[0].path).toContain("index.ts");
+  });
+
+  it("discovers subdirectory with index.js", async () => {
+    const subdir = path.join(extensionsDir, "my-extension");
+    fs.mkdirSync(subdir);
+    fs.writeFileSync(path.join(subdir, "index.js"), extensionCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].path).toContain("index.js");
+  });
+
+  it("prefers index.ts over index.js", async () => {
+    const subdir = path.join(extensionsDir, "my-extension");
+    fs.mkdirSync(subdir);
+    fs.writeFileSync(path.join(subdir, "index.ts"), extensionCode);
+    fs.writeFileSync(path.join(subdir, "index.js"), extensionCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].path).toContain("index.ts");
+  });
+
+  it("discovers subdirectory with package.json pi field", async () => {
+    const subdir = path.join(extensionsDir, "my-package");
+    const srcDir = path.join(subdir, "src");
+    fs.mkdirSync(subdir);
+    fs.mkdirSync(srcDir);
+    fs.writeFileSync(path.join(srcDir, "main.ts"), extensionCode);
+    fs.writeFileSync(
+      path.join(subdir, "package.json"),
+      JSON.stringify({
+        name: "my-package",
+        pi: {
+          extensions: ["./src/main.ts"],
+        },
+      }),
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].path).toContain("src");
+    expect(result.extensions[0].path).toContain("main.ts");
+  });
+
+  it("package.json can declare multiple extensions", async () => {
+    const subdir = path.join(extensionsDir, "my-package");
+    fs.mkdirSync(subdir);
+    fs.writeFileSync(path.join(subdir, "ext1.ts"), extensionCode);
+    fs.writeFileSync(path.join(subdir, "ext2.ts"), extensionCode);
+    fs.writeFileSync(
+      path.join(subdir, "package.json"),
+      JSON.stringify({
+        name: "my-package",
+        pi: {
+          extensions: ["./ext1.ts", "./ext2.ts"],
+        },
+      }),
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(2);
+  });
+
+  it("package.json with pi field takes precedence over index.ts", async () => {
+    const subdir = path.join(extensionsDir, "my-package");
+    fs.mkdirSync(subdir);
+    fs.writeFileSync(
+      path.join(subdir, "index.ts"),
+      extensionCodeWithTool("from-index"),
+    );
+    fs.writeFileSync(
+      path.join(subdir, "custom.ts"),
+      extensionCodeWithTool("from-custom"),
+    );
+    fs.writeFileSync(
+      path.join(subdir, "package.json"),
+      JSON.stringify({
+        name: "my-package",
+        pi: {
+          extensions: ["./custom.ts"],
+        },
+      }),
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].path).toContain("custom.ts");
+    // Verify the right tool was registered
+    expect(result.extensions[0].tools.has("from-custom")).toBe(true);
+    expect(result.extensions[0].tools.has("from-index")).toBe(false);
+  });
+
+  it("ignores package.json without pi field, falls back to index.ts", async () => {
+    const subdir = path.join(extensionsDir, "my-package");
+    fs.mkdirSync(subdir);
+    fs.writeFileSync(path.join(subdir, "index.ts"), extensionCode);
+    fs.writeFileSync(
+      path.join(subdir, "package.json"),
+      JSON.stringify({
+        name: "my-package",
+        version: "1.0.0",
+      }),
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].path).toContain("index.ts");
+  });
+
+  it("ignores subdirectory without index or package.json", async () => {
+    const subdir = path.join(extensionsDir, "not-an-extension");
+    fs.mkdirSync(subdir);
+    fs.writeFileSync(path.join(subdir, "helper.ts"), extensionCode);
+    fs.writeFileSync(path.join(subdir, "utils.ts"), extensionCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(0);
+  });
+
+  it("does not recurse beyond one level", async () => {
+    const subdir = path.join(extensionsDir, "container");
+    const nested = path.join(subdir, "nested");
+    fs.mkdirSync(subdir);
+    fs.mkdirSync(nested);
+    fs.writeFileSync(path.join(nested, "index.ts"), extensionCode);
+    // No index.ts or package.json in container/
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(0);
+  });
+
+  it("handles mixed direct files and subdirectories", async () => {
+    // Direct file
+    fs.writeFileSync(path.join(extensionsDir, "direct.ts"), extensionCode);
+
+    // Subdirectory with index
+    const subdir1 = path.join(extensionsDir, "with-index");
+    fs.mkdirSync(subdir1);
+    fs.writeFileSync(path.join(subdir1, "index.ts"), extensionCode);
+
+    // Subdirectory with package.json
+    const subdir2 = path.join(extensionsDir, "with-manifest");
+    fs.mkdirSync(subdir2);
+    fs.writeFileSync(path.join(subdir2, "entry.ts"), extensionCode);
+    fs.writeFileSync(
+      path.join(subdir2, "package.json"),
+      JSON.stringify({ pi: { extensions: ["./entry.ts"] } }),
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(3);
+  });
+
+  it("skips non-existent paths declared in package.json", async () => {
+    const subdir = path.join(extensionsDir, "my-package");
+    fs.mkdirSync(subdir);
+    fs.writeFileSync(path.join(subdir, "exists.ts"), extensionCode);
+    fs.writeFileSync(
+      path.join(subdir, "package.json"),
+      JSON.stringify({
+        pi: {
+          extensions: ["./exists.ts", "./missing.ts"],
+        },
+      }),
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].path).toContain("exists.ts");
+  });
+
+  it("loads extensions and registers commands", async () => {
+    fs.writeFileSync(
+      path.join(extensionsDir, "with-command.ts"),
+      extensionCode,
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].commands.has("test")).toBe(true);
+  });
+
+  it("loads extensions and registers tools", async () => {
+    fs.writeFileSync(
+      path.join(extensionsDir, "with-tool.ts"),
+      extensionCodeWithTool("my-tool"),
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].tools.has("my-tool")).toBe(true);
+  });
+
+  it("reports errors for invalid extension code", async () => {
+    fs.writeFileSync(
+      path.join(extensionsDir, "invalid.ts"),
+      "this is not valid typescript export",
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(1);
+    expect(result.errors[0].path).toContain("invalid.ts");
+    expect(result.extensions).toHaveLength(0);
+  });
+
+  it("handles explicitly configured paths", async () => {
+    const customPath = path.join(tempDir, "custom-location", "my-ext.ts");
+    fs.mkdirSync(path.dirname(customPath), { recursive: true });
+    fs.writeFileSync(customPath, extensionCode);
+
+    const result = await discoverAndLoadExtensions(
+      [customPath],
+      tempDir,
+      tempDir,
+    );
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].path).toContain("my-ext.ts");
+  });
+
+  it("resolves dependencies from extension's own node_modules", async () => {
+    const extPath = path.join(tempDir, "custom-location", "with-deps");
+    const nodeModulesDir = path.join(extPath, "node_modules", "ms");
+    fs.mkdirSync(nodeModulesDir, { recursive: true });
+    fs.writeFileSync(
+      path.join(extPath, "index.ts"),
+      `
+				import { Type } from "@sinclair/typebox";
+				import ms from "ms";
+				export default function(pi) {
+					pi.registerTool({
+						name: "parse_duration",
+						label: "parse_duration",
+						description: "Parse a duration string",
+						parameters: Type.Object({ value: Type.String() }),
+						execute: async (_toolCallId, params) => ({
+							content: [{ type: "text", text: String(ms(params.value)) }],
+						}),
+					});
+				}
+			`,
+    );
+    fs.writeFileSync(
+      path.join(extPath, "package.json"),
+      JSON.stringify({
+        name: "with-deps",
+        type: "module",
+      }),
+    );
+    fs.writeFileSync(
+      path.join(nodeModulesDir, "package.json"),
+      JSON.stringify({
+        name: "ms",
+        type: "module",
+        exports: "./index.js",
+      }),
+    );
+    fs.writeFileSync(
+      path.join(nodeModulesDir, "index.js"),
+      `export default function ms(value) { return value === "1m" ? 60000 : 0; }`,
+    );
+
+    const result = await discoverAndLoadExtensions([extPath], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].path).toContain("with-deps");
+    // The extension registers a 'parse_duration' tool
+    expect(result.extensions[0].tools.has("parse_duration")).toBe(true);
+  });
+
+  it("registers message renderers", async () => {
+    const extCode = `
+			export default function(pi) {
+				pi.registerMessageRenderer("my-custom-type", (message, options, theme) => {
+					return null; // Use default rendering
+				});
+			}
+		`;
+    fs.writeFileSync(path.join(extensionsDir, "with-renderer.ts"), extCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].messageRenderers.has("my-custom-type")).toBe(
+      true,
+    );
+  });
+
+  it("reports error when extension throws during initialization", async () => {
+    const extCode = `
+			export default function(pi) {
+				throw new Error("Initialization failed!");
+			}
+		`;
+    fs.writeFileSync(path.join(extensionsDir, "throws.ts"), extCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(1);
+    expect(result.errors[0].error).toContain("Initialization failed!");
+    expect(result.extensions).toHaveLength(0);
+  });
+
+  it("reports error when extension has no default export", async () => {
+    const extCode = `
+			export function notDefault(pi) {
+				pi.registerCommand("test", { handler: async () => {} });
+			}
+		`;
+    fs.writeFileSync(path.join(extensionsDir, "no-default.ts"), extCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(1);
+    expect(result.errors[0].error).toContain(
+      "does not export a valid factory function",
+    );
+    expect(result.extensions).toHaveLength(0);
+  });
+
+  it("allows multiple extensions to register different tools", async () => {
+    fs.writeFileSync(
+      path.join(extensionsDir, "tool-a.ts"),
+      extensionCodeWithTool("tool-a"),
+    );
+    fs.writeFileSync(
+      path.join(extensionsDir, "tool-b.ts"),
+      extensionCodeWithTool("tool-b"),
+    );
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(2);
+
+    const allTools = new Set<string>();
+    for (const ext of result.extensions) {
+      for (const name of ext.tools.keys()) {
+        allTools.add(name);
+      }
+    }
+    expect(allTools.has("tool-a")).toBe(true);
+    expect(allTools.has("tool-b")).toBe(true);
+  });
+
+  it("loads extension with event handlers", async () => {
+    const extCode = `
+			export default function(pi) {
+				pi.on("agent_start", async () => {});
+				pi.on("tool_call", async (event) => undefined);
+				pi.on("agent_end", async () => {});
+			}
+		`;
+    fs.writeFileSync(path.join(extensionsDir, "with-handlers.ts"), extCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].handlers.has("agent_start")).toBe(true);
+    expect(result.extensions[0].handlers.has("tool_call")).toBe(true);
+    expect(result.extensions[0].handlers.has("agent_end")).toBe(true);
+  });
+
+  it("loads extension with shortcuts", async () => {
+    const extCode = `
+			export default function(pi) {
+				pi.registerShortcut("ctrl+t", {
+					description: "Test shortcut",
+					handler: async (ctx) => {},
+				});
+			}
+		`;
+    fs.writeFileSync(path.join(extensionsDir, "with-shortcut.ts"), extCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].shortcuts.has("ctrl+t")).toBe(true);
+  });
+
+  it("loads extension with flags", async () => {
+    const extCode = `
+			export default function(pi) {
+				pi.registerFlag("my-flag", {
+					description: "My custom flag",
+					handler: async (value) => {},
+				});
+			}
+		`;
+    fs.writeFileSync(path.join(extensionsDir, "with-flag.ts"), extCode);
+
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].flags.has("my-flag")).toBe(true);
+  });
+
+  it("loadExtensions only loads explicit paths without discovery", async () => {
+    // Create discoverable extensions (would be found by discoverAndLoadExtensions)
+    fs.writeFileSync(
+      path.join(extensionsDir, "discovered.ts"),
+      extensionCodeWithTool("discovered"),
+    );
+
+    // Create explicit extension outside discovery path
+    const explicitPath = path.join(tempDir, "explicit.ts");
+    fs.writeFileSync(explicitPath, extensionCodeWithTool("explicit"));
+
+    // Use loadExtensions directly to skip discovery
+    const { loadExtensions } = await import("../src/core/extensions/loader.js");
+    const result = await loadExtensions([explicitPath], tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(1);
+    expect(result.extensions[0].tools.has("explicit")).toBe(true);
+    expect(result.extensions[0].tools.has("discovered")).toBe(false);
+  });
+
+  it("loadExtensions with no paths loads nothing", async () => {
+    // Create discoverable extensions (would be found by discoverAndLoadExtensions)
+    fs.writeFileSync(path.join(extensionsDir, "discovered.ts"), extensionCode);
+
+    // Use loadExtensions directly with empty paths
+    const { loadExtensions } = await import("../src/core/extensions/loader.js");
+    const result = await loadExtensions([], tempDir);
+
+    expect(result.errors).toHaveLength(0);
+    expect(result.extensions).toHaveLength(0);
+  });
+});
--- a/packages/coding-agent/test/extensions-input-event.test.ts
+++ b/packages/coding-agent/test/extensions-input-event.test.ts
@ -0,0 +1,148 @@
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { discoverAndLoadExtensions } from "../src/core/extensions/loader.js";
+import { ExtensionRunner } from "../src/core/extensions/runner.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { SessionManager } from "../src/core/session-manager.js";
+
+describe("Input Event", () => {
+  let tempDir: string;
+  let extensionsDir: string;
+
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-input-test-"));
+    extensionsDir = path.join(tempDir, "extensions");
+    fs.mkdirSync(extensionsDir);
+    // Clean globalThis test vars
+    delete (globalThis as any).testVar;
+  });
+
+  afterEach(() => fs.rmSync(tempDir, { recursive: true, force: true }));
+
+  async function createRunner(...extensions: string[]) {
+    // Clear and recreate extensions dir for clean state
+    fs.rmSync(extensionsDir, { recursive: true, force: true });
+    fs.mkdirSync(extensionsDir);
+    for (let i = 0; i < extensions.length; i++)
+      fs.writeFileSync(path.join(extensionsDir, `e${i}.ts`), extensions[i]);
+    const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+    const sm = SessionManager.inMemory();
+    const mr = new ModelRegistry(
+      AuthStorage.create(path.join(tempDir, "auth.json")),
+    );
+    return new ExtensionRunner(
+      result.extensions,
+      result.runtime,
+      tempDir,
+      sm,
+      mr,
+    );
+  }
+
+  it("returns continue when no handlers, undefined return, or explicit continue", async () => {
+    // No handlers
+    expect(
+      (await (await createRunner()).emitInput("x", undefined, "interactive"))
+        .action,
+    ).toBe("continue");
+    // Returns undefined
+    let r = await createRunner(
+      `export default p => p.on("input", async () => {});`,
+    );
+    expect((await r.emitInput("x", undefined, "interactive")).action).toBe(
+      "continue",
+    );
+    // Returns explicit continue
+    r = await createRunner(
+      `export default p => p.on("input", async () => ({ action: "continue" }));`,
+    );
+    expect((await r.emitInput("x", undefined, "interactive")).action).toBe(
+      "continue",
+    );
+  });
+
+  it("transforms text and preserves images when omitted", async () => {
+    const r = await createRunner(
+      `export default p => p.on("input", async e => ({ action: "transform", text: "T:" + e.text }));`,
+    );
+    const imgs = [
+      { type: "image" as const, data: "orig", mimeType: "image/png" },
+    ];
+    const result = await r.emitInput("hi", imgs, "interactive");
+    expect(result).toEqual({ action: "transform", text: "T:hi", images: imgs });
+  });
+
+  it("transforms and replaces images when provided", async () => {
+    const r = await createRunner(
+      `export default p => p.on("input", async () => ({ action: "transform", text: "X", images: [{ type: "image", data: "new", mimeType: "image/jpeg" }] }));`,
+    );
+    const result = await r.emitInput(
+      "hi",
+      [{ type: "image", data: "orig", mimeType: "image/png" }],
+      "interactive",
+    );
+    expect(result).toEqual({
+      action: "transform",
+      text: "X",
+      images: [{ type: "image", data: "new", mimeType: "image/jpeg" }],
+    });
+  });
+
+  it("chains transforms across multiple handlers", async () => {
+    const r = await createRunner(
+      `export default p => p.on("input", async e => ({ action: "transform", text: e.text + "[1]" }));`,
+      `export default p => p.on("input", async e => ({ action: "transform", text: e.text + "[2]" }));`,
+    );
+    const result = await r.emitInput("X", undefined, "interactive");
+    expect(result).toEqual({
+      action: "transform",
+      text: "X[1][2]",
+      images: undefined,
+    });
+  });
+
+  it("short-circuits on handled and skips subsequent handlers", async () => {
+    (globalThis as any).testVar = false;
+    const r = await createRunner(
+      `export default p => p.on("input", async () => ({ action: "handled" }));`,
+      `export default p => p.on("input", async () => { globalThis.testVar = true; });`,
+    );
+    expect(await r.emitInput("X", undefined, "interactive")).toEqual({
+      action: "handled",
+    });
+    expect((globalThis as any).testVar).toBe(false);
+  });
+
+  it("passes source correctly for all source types", async () => {
+    const r = await createRunner(
+      `export default p => p.on("input", async e => { globalThis.testVar = e.source; return { action: "continue" }; });`,
+    );
+    for (const source of ["interactive", "rpc", "extension"] as const) {
+      await r.emitInput("x", undefined, source);
+      expect((globalThis as any).testVar).toBe(source);
+    }
+  });
+
+  it("catches handler errors and continues", async () => {
+    const r = await createRunner(
+      `export default p => p.on("input", async () => { throw new Error("boom"); });`,
+    );
+    const errs: string[] = [];
+    r.onError((e) => errs.push(e.error));
+    const result = await r.emitInput("x", undefined, "interactive");
+    expect(result.action).toBe("continue");
+    expect(errs).toContain("boom");
+  });
+
+  it("hasHandlers returns correct value", async () => {
+    let r = await createRunner();
+    expect(r.hasHandlers("input")).toBe(false);
+    r = await createRunner(
+      `export default p => p.on("input", async () => {});`,
+    );
+    expect(r.hasHandlers("input")).toBe(true);
+  });
+});
--- a/packages/coding-agent/test/extensions-runner.test.ts
+++ b/packages/coding-agent/test/extensions-runner.test.ts
@ -0,0 +1,856 @@
+/**
+ * Tests for ExtensionRunner - conflict detection, error handling, tool wrapping.
+ */
+
+import * as fs from "node:fs";
+import * as os from "node:os";
+import * as path from "node:path";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import {
+  createExtensionRuntime,
+  discoverAndLoadExtensions,
+} from "../src/core/extensions/loader.js";
+import { ExtensionRunner } from "../src/core/extensions/runner.js";
+import type {
+  ExtensionActions,
+  ExtensionContextActions,
+  ProviderConfig,
+} from "../src/core/extensions/types.js";
+import { DEFAULT_KEYBINDINGS, type KeyId } from "../src/core/keybindings.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { SessionManager } from "../src/core/session-manager.js";
+
+describe("ExtensionRunner", () => {
+  let tempDir: string;
+  let extensionsDir: string;
+  let sessionManager: SessionManager;
+  let modelRegistry: ModelRegistry;
+
+  beforeEach(() => {
+    tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-runner-test-"));
+    extensionsDir = path.join(tempDir, "extensions");
+    fs.mkdirSync(extensionsDir);
+    sessionManager = SessionManager.inMemory();
+    const authStorage = AuthStorage.create(path.join(tempDir, "auth.json"));
+    modelRegistry = new ModelRegistry(authStorage);
+  });
+
+  afterEach(() => {
+    fs.rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  const providerModelConfig: ProviderConfig = {
+    baseUrl: "https://provider.test/v1",
+    apiKey: "PROVIDER_TEST_KEY",
+    api: "openai-completions",
+    models: [
+      {
+        id: "instant-model",
+        name: "Instant Model",
+        reasoning: false,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 128000,
+        maxTokens: 4096,
+      },
+    ],
+  };
+
+  const extensionActions: ExtensionActions = {
+    sendMessage: () => {},
+    sendUserMessage: () => {},
+    appendEntry: () => {},
+    setSessionName: () => {},
+    getSessionName: () => undefined,
+    setLabel: () => {},
+    getActiveTools: () => [],
+    getAllTools: () => [],
+    setActiveTools: () => {},
+    refreshTools: () => {},
+    getCommands: () => [],
+    setModel: async () => false,
+    getThinkingLevel: () => "off",
+    setThinkingLevel: () => {},
+  };
+
+  const extensionContextActions: ExtensionContextActions = {
+    getModel: () => undefined,
+    isIdle: () => true,
+    abort: () => {},
+    hasPendingMessages: () => false,
+    shutdown: () => {},
+    getContextUsage: () => undefined,
+    compact: () => {},
+    getSystemPrompt: () => "",
+  };
+
+  describe("shortcut conflicts", () => {
+    it("warns when extension shortcut conflicts with built-in", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.registerShortcut("ctrl+c", {
+						description: "Conflicts with built-in",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "conflict.ts"), extCode);
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const shortcuts = runner.getShortcuts(DEFAULT_KEYBINDINGS);
+
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("conflicts with built-in"),
+      );
+      expect(shortcuts.has("ctrl+c")).toBe(false);
+
+      warnSpy.mockRestore();
+    });
+
+    it("allows a shortcut when the reserved set no longer contains the default key", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.registerShortcut("ctrl+p", {
+						description: "Uses freed default",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "rebinding.ts"), extCode);
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const keybindings = {
+        ...DEFAULT_KEYBINDINGS,
+        cycleModelForward: "ctrl+n" as KeyId,
+      };
+      const shortcuts = runner.getShortcuts(keybindings);
+
+      expect(shortcuts.has("ctrl+p")).toBe(true);
+      expect(warnSpy).not.toHaveBeenCalledWith(
+        expect.stringContaining("conflicts with built-in"),
+      );
+
+      warnSpy.mockRestore();
+    });
+
+    it("warns but allows when extension uses non-reserved built-in shortcut", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.registerShortcut("ctrl+v", {
+						description: "Overrides non-reserved",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "non-reserved.ts"), extCode);
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const shortcuts = runner.getShortcuts(DEFAULT_KEYBINDINGS);
+
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("built-in shortcut for pasteImage"),
+      );
+      expect(shortcuts.has("ctrl+v")).toBe(true);
+
+      warnSpy.mockRestore();
+    });
+
+    it("blocks shortcuts for reserved actions even when rebound", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.registerShortcut("ctrl+x", {
+						description: "Conflicts with rebound reserved",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(
+        path.join(extensionsDir, "rebound-reserved.ts"),
+        extCode,
+      );
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const keybindings = {
+        ...DEFAULT_KEYBINDINGS,
+        interrupt: "ctrl+x" as KeyId,
+      };
+      const shortcuts = runner.getShortcuts(keybindings);
+
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("conflicts with built-in"),
+      );
+      expect(shortcuts.has("ctrl+x")).toBe(false);
+
+      warnSpy.mockRestore();
+    });
+
+    it("blocks shortcuts when reserved action has multiple keys", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.registerShortcut("ctrl+y", {
+						description: "Conflicts with multi-key reserved",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "multi-reserved.ts"), extCode);
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const keybindings = {
+        ...DEFAULT_KEYBINDINGS,
+        clear: ["ctrl+x", "ctrl+y"] as KeyId[],
+      };
+      const shortcuts = runner.getShortcuts(keybindings);
+
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("conflicts with built-in"),
+      );
+      expect(shortcuts.has("ctrl+y")).toBe(false);
+
+      warnSpy.mockRestore();
+    });
+
+    it("warns but allows when non-reserved action has multiple keys", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.registerShortcut("ctrl+y", {
+						description: "Overrides multi-key non-reserved",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(
+        path.join(extensionsDir, "multi-non-reserved.ts"),
+        extCode,
+      );
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const keybindings = {
+        ...DEFAULT_KEYBINDINGS,
+        pasteImage: ["ctrl+x", "ctrl+y"] as KeyId[],
+      };
+      const shortcuts = runner.getShortcuts(keybindings);
+
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("built-in shortcut for pasteImage"),
+      );
+      expect(shortcuts.has("ctrl+y")).toBe(true);
+
+      warnSpy.mockRestore();
+    });
+
+    it("warns when two extensions register same shortcut", async () => {
+      // Use a non-reserved shortcut
+      const extCode1 = `
+				export default function(pi) {
+					pi.registerShortcut("ctrl+shift+x", {
+						description: "First extension",
+						handler: async () => {},
+					});
+				}
+			`;
+      const extCode2 = `
+				export default function(pi) {
+					pi.registerShortcut("ctrl+shift+x", {
+						description: "Second extension",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "ext1.ts"), extCode1);
+      fs.writeFileSync(path.join(extensionsDir, "ext2.ts"), extCode2);
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const shortcuts = runner.getShortcuts(DEFAULT_KEYBINDINGS);
+
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("shortcut conflict"),
+      );
+      // Last one wins
+      expect(shortcuts.has("ctrl+shift+x")).toBe(true);
+
+      warnSpy.mockRestore();
+    });
+  });
+
+  describe("tool collection", () => {
+    it("collects tools from multiple extensions", async () => {
+      const toolCode = (name: string) => `
+				import { Type } from "@sinclair/typebox";
+				export default function(pi) {
+					pi.registerTool({
+						name: "${name}",
+						label: "${name}",
+						description: "Test tool",
+						parameters: Type.Object({}),
+						execute: async () => ({ content: [{ type: "text", text: "ok" }], details: {} }),
+					});
+				}
+			`;
+      fs.writeFileSync(
+        path.join(extensionsDir, "tool-a.ts"),
+        toolCode("tool_a"),
+      );
+      fs.writeFileSync(
+        path.join(extensionsDir, "tool-b.ts"),
+        toolCode("tool_b"),
+      );
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const tools = runner.getAllRegisteredTools();
+
+      expect(tools.length).toBe(2);
+      expect(tools.map((t) => t.definition.name).sort()).toEqual([
+        "tool_a",
+        "tool_b",
+      ]);
+    });
+
+    it("keeps first tool when two extensions register the same name", async () => {
+      const first = `
+				import { Type } from "@sinclair/typebox";
+				export default function(pi) {
+					pi.registerTool({
+						name: "shared",
+						label: "shared",
+						description: "first",
+						parameters: Type.Object({}),
+						execute: async () => ({ content: [{ type: "text", text: "ok" }], details: {} }),
+					});
+				}
+			`;
+      const second = `
+				import { Type } from "@sinclair/typebox";
+				export default function(pi) {
+					pi.registerTool({
+						name: "shared",
+						label: "shared",
+						description: "second",
+						parameters: Type.Object({}),
+						execute: async () => ({ content: [{ type: "text", text: "ok" }], details: {} }),
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "a-first.ts"), first);
+      fs.writeFileSync(path.join(extensionsDir, "b-second.ts"), second);
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const tools = runner.getAllRegisteredTools();
+
+      expect(tools).toHaveLength(1);
+      expect(tools[0]?.definition.description).toBe("first");
+    });
+  });
+
+  describe("command collection", () => {
+    it("collects commands from multiple extensions", async () => {
+      const cmdCode = (name: string) => `
+				export default function(pi) {
+					pi.registerCommand("${name}", {
+						description: "Test command",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "cmd-a.ts"), cmdCode("cmd-a"));
+      fs.writeFileSync(path.join(extensionsDir, "cmd-b.ts"), cmdCode("cmd-b"));
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const commands = runner.getRegisteredCommands();
+
+      expect(commands.length).toBe(2);
+      expect(commands.map((c) => c.name).sort()).toEqual(["cmd-a", "cmd-b"]);
+    });
+
+    it("gets command by name", async () => {
+      const cmdCode = `
+				export default function(pi) {
+					pi.registerCommand("my-cmd", {
+						description: "My command",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "cmd.ts"), cmdCode);
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+
+      const cmd = runner.getCommand("my-cmd");
+      expect(cmd).toBeDefined();
+      expect(cmd?.name).toBe("my-cmd");
+      expect(cmd?.description).toBe("My command");
+
+      const missing = runner.getCommand("not-exists");
+      expect(missing).toBeUndefined();
+    });
+
+    it("filters out commands conflict with reseved", async () => {
+      const cmdCode = (name: string) => `
+				export default function(pi) {
+					pi.registerCommand("${name}", {
+						description: "Test command",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "cmd-a.ts"), cmdCode("cmd-a"));
+      fs.writeFileSync(path.join(extensionsDir, "cmd-b.ts"), cmdCode("cmd-b"));
+
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const commands = runner.getRegisteredCommands(new Set(["cmd-a"]));
+      const diagnostics = runner.getCommandDiagnostics();
+
+      expect(commands.length).toBe(1);
+      expect(commands.map((c) => c.name).sort()).toEqual(["cmd-b"]);
+
+      expect(diagnostics.length).toBe(1);
+      expect(diagnostics[0].path).toEqual(path.join(extensionsDir, "cmd-a.ts"));
+
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("conflicts with built-in command"),
+      );
+      warnSpy.mockRestore();
+    });
+  });
+
+  describe("error handling", () => {
+    it("calls error listeners when handler throws", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.on("context", async () => {
+						throw new Error("Handler error!");
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "throws.ts"), extCode);
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+
+      const errors: Array<{
+        extensionPath: string;
+        event: string;
+        error: string;
+      }> = [];
+      runner.onError((err) => {
+        errors.push(err);
+      });
+
+      // Emit context event which will trigger the throwing handler
+      await runner.emitContext([]);
+
+      expect(errors.length).toBe(1);
+      expect(errors[0].error).toContain("Handler error!");
+      expect(errors[0].event).toBe("context");
+    });
+  });
+
+  describe("message renderers", () => {
+    it("gets message renderer by type", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.registerMessageRenderer("my-type", (message, options, theme) => null);
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "renderer.ts"), extCode);
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+
+      const renderer = runner.getMessageRenderer("my-type");
+      expect(renderer).toBeDefined();
+
+      const missing = runner.getMessageRenderer("not-exists");
+      expect(missing).toBeUndefined();
+    });
+  });
+
+  describe("flags", () => {
+    it("collects flags from extensions", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.registerFlag("my-flag", {
+						description: "My flag",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "with-flag.ts"), extCode);
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const flags = runner.getFlags();
+
+      expect(flags.has("my-flag")).toBe(true);
+    });
+
+    it("keeps first flag when two extensions register the same name", async () => {
+      const first = `
+				export default function(pi) {
+					pi.registerFlag("shared-flag", {
+						description: "first",
+						type: "boolean",
+						default: true,
+					});
+				}
+			`;
+      const second = `
+				export default function(pi) {
+					pi.registerFlag("shared-flag", {
+						description: "second",
+						type: "boolean",
+						default: false,
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "a-first.ts"), first);
+      fs.writeFileSync(path.join(extensionsDir, "b-second.ts"), second);
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+      const flags = runner.getFlags();
+
+      expect(flags.get("shared-flag")?.description).toBe("first");
+      expect(result.runtime.flagValues.get("shared-flag")).toBe(true);
+    });
+
+    it("can set flag values", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.registerFlag("test-flag", {
+						description: "Test flag",
+						handler: async () => {},
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "flag.ts"), extCode);
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+
+      // Setting a flag value should not throw
+      runner.setFlagValue("--test-flag", true);
+
+      // The flag values are stored in the shared runtime
+      expect(result.runtime.flagValues.get("--test-flag")).toBe(true);
+    });
+  });
+
+  describe("tool_result chaining", () => {
+    it("chains content modifications across handlers", async () => {
+      const extCode1 = `
+				export default function(pi) {
+					pi.on("tool_result", async (event) => {
+						return {
+							content: [...event.content, { type: "text", text: "ext1" }],
+						};
+					});
+				}
+			`;
+      const extCode2 = `
+				export default function(pi) {
+					pi.on("tool_result", async (event) => {
+						return {
+							content: [...event.content, { type: "text", text: "ext2" }],
+						};
+					});
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "tool-result-1.ts"), extCode1);
+      fs.writeFileSync(path.join(extensionsDir, "tool-result-2.ts"), extCode2);
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+
+      const chained = await runner.emitToolResult({
+        type: "tool_result",
+        toolName: "my_tool",
+        toolCallId: "call-1",
+        input: {},
+        content: [{ type: "text", text: "base" }],
+        details: { initial: true },
+        isError: false,
+      });
+
+      expect(chained).toBeDefined();
+      const chainedContent = chained?.content;
+      expect(chainedContent).toBeDefined();
+      expect(chainedContent![0]).toEqual({ type: "text", text: "base" });
+      expect(chainedContent).toHaveLength(3);
+      const appendedText = chainedContent!
+        .slice(1)
+        .filter(
+          (item): item is { type: "text"; text: string } =>
+            item.type === "text",
+        )
+        .map((item) => item.text);
+      expect(appendedText.sort()).toEqual(["ext1", "ext2"]);
+    });
+
+    it("preserves previous modifications when later handlers return partial patches", async () => {
+      const extCode1 = `
+				export default function(pi) {
+					pi.on("tool_result", async () => {
+						return {
+							content: [{ type: "text", text: "first" }],
+							details: { source: "ext1" },
+						};
+					});
+				}
+			`;
+      const extCode2 = `
+				export default function(pi) {
+					pi.on("tool_result", async () => {
+						return {
+							isError: true,
+						};
+					});
+				}
+			`;
+      fs.writeFileSync(
+        path.join(extensionsDir, "tool-result-partial-1.ts"),
+        extCode1,
+      );
+      fs.writeFileSync(
+        path.join(extensionsDir, "tool-result-partial-2.ts"),
+        extCode2,
+      );
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+
+      const chained = await runner.emitToolResult({
+        type: "tool_result",
+        toolName: "my_tool",
+        toolCallId: "call-2",
+        input: {},
+        content: [{ type: "text", text: "base" }],
+        details: { initial: true },
+        isError: false,
+      });
+
+      expect(chained).toEqual({
+        content: [{ type: "text", text: "first" }],
+        details: { source: "ext1" },
+        isError: true,
+      });
+    });
+  });
+
+  describe("provider registration", () => {
+    it("pre-bind unregister removes all queued registrations for a provider", () => {
+      const runtime = createExtensionRuntime();
+
+      runtime.registerProvider("queued-provider", providerModelConfig);
+      runtime.registerProvider("queued-provider", {
+        ...providerModelConfig,
+        models: [
+          {
+            id: "instant-model-2",
+            name: "Instant Model 2",
+            reasoning: false,
+            input: ["text"],
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+            contextWindow: 128000,
+            maxTokens: 4096,
+          },
+        ],
+      });
+      expect(runtime.pendingProviderRegistrations).toHaveLength(2);
+
+      runtime.unregisterProvider("queued-provider");
+      expect(runtime.pendingProviderRegistrations).toHaveLength(0);
+    });
+
+    it("post-bind register and unregister take effect immediately", () => {
+      const runtime = createExtensionRuntime();
+      const runner = new ExtensionRunner(
+        [],
+        runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+
+      runner.bindCore(extensionActions, extensionContextActions);
+      expect(runtime.pendingProviderRegistrations).toHaveLength(0);
+
+      runtime.registerProvider("instant-provider", providerModelConfig);
+      expect(runtime.pendingProviderRegistrations).toHaveLength(0);
+      expect(
+        modelRegistry.find("instant-provider", "instant-model"),
+      ).toBeDefined();
+
+      runtime.unregisterProvider("instant-provider");
+      expect(
+        modelRegistry.find("instant-provider", "instant-model"),
+      ).toBeUndefined();
+    });
+  });
+
+  describe("hasHandlers", () => {
+    it("returns true when handlers exist for event type", async () => {
+      const extCode = `
+				export default function(pi) {
+					pi.on("tool_call", async () => undefined);
+				}
+			`;
+      fs.writeFileSync(path.join(extensionsDir, "handler.ts"), extCode);
+
+      const result = await discoverAndLoadExtensions([], tempDir, tempDir);
+      const runner = new ExtensionRunner(
+        result.extensions,
+        result.runtime,
+        tempDir,
+        sessionManager,
+        modelRegistry,
+      );
+
+      expect(runner.hasHandlers("tool_call")).toBe(true);
+      expect(runner.hasHandlers("agent_end")).toBe(false);
+    });
+  });
+});
--- a/packages/coding-agent/test/fixtures/assistant-message-with-thinking-code.json
+++ b/packages/coding-agent/test/fixtures/assistant-message-with-thinking-code.json
@ -0,0 +1,33 @@
+{
+  "role": "assistant",
+  "content": [
+    {
+      "type": "thinking",
+      "thinking": "So `findModel` just looks up a model by provider + id from the merged models list (built-in + models.json).\n\nFor the SDK, the flow would be:1. `model` provided → use it\n2. Else check settings for default → call `findModel(provider, id)` → check if available (has API key)\n3. Else first from `getAvailableModels()`\n\nThe `findModel` function returns `{ model, error }`. For SDK export, we might want a simpler signature.Options for SDK:\n\n**Option A: Keep current signature**```typescript\nfunction findModel(provider: string, modelId: string, agentDir?: string): { model: Model<any> | null; error: string | null };\n```**Option B: Simpler - just returns model or null**\n```typescript\nfunction findModel(provider: string, modelId: string, agentDir?: string): Model<any> | null;```\n\n**Option C: Throws on error**```typescript\nfunction findModel(provider: string, modelId: string, agentDir?: string): Model<any>;  // throws if not found\n```\n\nI think **Option B** is fine for SDK. Errors from loading models.json are edge cases - if the file is malformed, we can throw. For \"model not found\", just return null.\n\nActually, for the SDK, maybe we should rethink. The user of the SDK will likely:\n1. Know which model they want and construct it directly2. Or want to pick from available modelsFor case 1, they don't need findModel - they construct the Model object.\nFor case 2, they use `discoverAvailableModels()` and pick.`findModel` is useful for looking up by string ID (like from config or user input). But the signature could just be:```typescriptfunction findModel(provider: string, modelId: string, agentDir?: string): Model<any> | null;\n```\n\nReturns the model if found (in built-ins or models.json), null if not found. Throws if models.json is malformed (exceptional case).\n\n\nThe SDK's model discovery functions provide a clean, straightforward way to handle model selection. I'll outline the key methods for finding and selecting models programmatically. The core functions allow discovering all models, filtering to available models with valid API keys, and finding a specific model by provider and ID. This gives developers flexible options for model management in their applications.\n\nIn the session creation process, if no model is explicitly specified, the system will first attempt to use the default provider and model configuration. If that fails, it will then retrieve the first available model from the discovered set. Should no models be found, an error will be raised to prevent silent failures.\n\nThe potential complexity arises from the need to support custom model configurations and ensure robust model selection across different scenarios.",
+      "thinkingSignature": "EqsVCkYIChgCKkBUqXXsLfuqzbiQxI5MbbS31Yw670soGNGQX63Lb/osACS2A5rrS8Gd939xNdXbHUGouQngzqIPs2KFk9HWmKajEgxqYOzDBE7h++0vFUUaDF8r9MacsHyJPwr3XiIwn3spAIQV8IxgAIFoxdYefFrCAeez7pnXqUqaK2QTTG3OjWpCIYzPwvEVs7ObbWVbKpIUy2X7MkKrZOdtlTGRUvmuEij6vCbXjPwj0zH+mjaefERbkL+aT84QCiStHqc7uuM5nZvntl4KZ76Mt1VrFoBXwi3val4fJDP9GhDj7tkD0Id22udIb+yHBuo8yBnyy2fWLMaeRTEn8vN2eUaqiuE7wvgvPF4tf6bn4mKjh/HEwpAzJ+rLsE/hmXA9eG/hub387iF4rnLP/rDJR4olzSQyb7bPpdQ5RLRIymkRJce4wRY0nFxPuZayiYooGwI7gqKPJz2mkTCdWZABn4n6PpqZB+caXCn63A3WvJtZacItZ6z3DAoi2I3jwsOC8BWQmHKBfCXd9wttQ+HuYYmduASJ3j/TNtdO1vZsiItknKneZXTPhmt0nuqphgWiDWnPFv1iOoJw++tLJO+u2hYOtM/3Nx6O+l9QWcQgkgnQjN29SRd7uiI14sTogJkWVrVaKJ6StXx+/mXrro7I++6PSBMnFJevIJ89MFVB8EiYs+x4pOuEJDaNekBU3Tm6+Eg4vL2SguijClR9yv+4bQsIHKtq6QLLABt1SuNRvO9HgUIOx6HDdn0PXeInhqJ/aILA4bRryf6lbRp0qNEcexAVrT8zbrMUkY2SzMX1kEo4IvmprCzmukHXQdal2AoxSdxPp2br12Lcz0njxzhWFd58f0gLRVHKf7gGzTWe6EGVfvve7/yquhVG1IWkDid54PcdqUEpIbeRZE4gklPQhEflfZ9ppnyeRDVmBq4N9Wmv+S19z8/sLRXMXBM2Lv31vVf7QXjZGmJxEWpKfXGPOmuChZsgZuMZSVoXSh9u+gr+M29Se6ArQ/L18/3p8grm8TwT2TKuaMeuIdki7Ja0jQQYPOqoIVHVXahtVto/4YVGcClx6eTbNtXDfKDKnWw7Eu+l+6wjF9nqEjTLQIxjpT6ABWhXw1ersAFIDgDDwRLUZFHZ8i1jQKvg3IxgWsqIyyMXjwm1gfwzeeOrNIkx8KwIGybeheHX1vZRsqaOAhARiziiBsl4PLD8ci6OLJgp1ZBke9QW8DFFwMZY6hNf4yYOb0/6K2g+qx9Z0OuHW7p2MRef97oLiDyx/WCNgv6DUW2FxHy2KjtcB50aeSLfccBCJOXkRlnym08nsBYa7H17REi2O30wkoOPnOYNqytE40EPYwqUPUdRF6WwN6LFEpbGGmQ5atrJ/upzz+MoBoeqeoF0fOrO3AaW27E7dvduDCrK2hF/TZZN5FHipNNHP/JY5NhWPBhCBumxJN9uf+nGqPcQwn3IL0eriz9ki0EUBdAYXY9kCxKYU3DhsbLsBn3YfhXLbLIT1Woy4RUqkWN7BXOC8aWi+uLVm0JUXVt/dr6ndnxdyqJdxc22Wz4EHFZZe+VtntNr1BF/6VsUoQSsSR1c0QvbxPE3iLhZ3R9RPmKduotJsQ6hb3aZrAgsMF5KWlmOKcouGQW1TNEwd8tI8Rxg91FdOuU0o98LddVlUFknfYr9gUn3/NorpUCKjDgZDyY4Oy7QeHWg9E6s6jeH1aYhHsO8mZiPGxQi4n5y0pSU8jFHEoIvlgQ+hN+7bsYRfUNMXfxsYuUZKiUqvCIiInu6W1dkxjS2GOmiQcCjB9XzOxF9gHXEkU2E4xHmSkbpBGrJjR/DHZ8gsosTPDg9VmFY2aYX/WLGYbjguzaKD8zS9LpQ3UZmbC0Jv9bZUGn3TdRRJj+xLY4fqWxEvplWNTJRTAPkHlQbawvgs8ziL9gBmfohPKHg+MA4bFCP2BPaaw/Xmw03TuDhaQ/Nb4e52N7heoN3DMd3NUQl/YFeb4kqzcF24GLhLi/Pbl2Y/JehWVgNyFeIvMkk7laFgydLqCMTWGl8VHiy3koUXOgPG/s/qERzIyYprLd/h5gcGt0aQMgl089UU69wUhT0xXkZjuUSMeCUKHLgjvhbn6gaMoMCrcqe+Ar0eZPGeW7OR9w8jhC/rE5Lh8zMpQ2uKo2Hwi/eFZul6Qq1ZSthx0kcsbqT8wW6Fyr8O42mxUmBVS8TUhvVSOccGVy5tBOXQpxQPgYbXNyUy3obUi9vhPzViEbt6KDIAW5bQwbuDSMHd+tf9nWd8H1nvEO2aWM6/v4+/qLSWqMcTXs3Rea2+GFMQkbRzj1pRN1MLzSjBP5pGLlYPQre5RHK3kImZ7ISMj7oQWfzNYLkswkD2Ay3nzk6v4JpjaFNFAaOhTHjtO0c4qA2elkvQ/5RrtD4g4/wlH+p048wIiuQhw4Iiu3rcFrclXUWny74ON5n56OY5uIXsPsmQQwCGUwtZFBVe5bP3nVgoHCBPI0SyEQXxgbd4q0o+HZyjkH9KdOL6LpxdxbrqbvONS6/EMMheWHxDAmibL5pFJh4z60o+aNejvMoZahKX04M5/KC1k7gwzAn/yIxC+VEPi/IijxKKlU0mEPE+q/HAHTe7S5CdrM5vWzgzNefKk0PjMW3/OnveH9mFoMHmIybWgrCZPlPzLyL3PPBW1Iv6q1g/NOzfxczx/ZbudD3UQOY0u84Acjcb938Y7uvUNHPLfSopleds0hGGgeUGy6aLdidmypcc3b8icF8k3KDozTN0v/3EqgLzb4PY6HML6dIwI6UYpeMvb110GWh1mXgl45v4afFwojhp0Ld92WnOrxEIMKv9/S6NCiUxR6KwAhp7ssPzdPvlTTtlmN01Xn95+Vo4GuZHvgyjcBnF9dIy+WJhwDRcgLrwV+wkZuGR71ACKTdHE3jW3QEuWlf4HuV+63c/OZj3B2rB2s2zadJVGDBn35dX434ZnJZudakoOGcK/0LZ2bhSN8qCkxs/2KJk7TMtBi6wsmQ7VGw74I1+c45iPjRcaO63UO/1rI7dZZZkD5lKje8BgwPBt+V+0DW6OHrfYC6qf+Nc5xrkE8Q6R4fbijYf3MkaDQ1FhJpGUiBIZ90tXPcs06utEq8kNT1kMGLdgoFlHRA3gQzJY7vU2kYVNs0sTp1pQL3JPSHtBdITHnITJLN4tVun0lrv4Ev4L4qkXT+tygwmmYvlR+JlnxkdMDd0DhNcv4lF9ltD1zQaTgfPjSbl3+lW/znRKgXTXqxDzHDIusv5u0lna2X7M7WWXoucp3eyHqTtZiP4DgsDgh9lNiwz4zu0Uqh7DNF5ZTWrSwEItMbQc0vs3Bryd7TirHAOK928rxRwl2Vo9YdDxEbVGV7OmSGdu3NOLDVp/oFvwsSUhFD/BwjgX9xSbLgLab8g3lEsbMGnA9h+aY0M1nSlmjjLavl5XvMvwd6VDRAbEMb+/FdI6Sp/z3/Yx4VmKeh7lB3rPPF6n9RdUijTKbBn6hVRyEZw30n5BMxRwGYxFxb5xhPhOLpmj6UNEQJUkileYUhEGN7o/GMLBBT7eOXujXJhq/ADhex7Wilj5rFy0VtIfuUkrNXpRxVLOdOTA3/eNxYn21B91jFM7UWCZc42BGZhkEvxFuIS+wK8u3TBvMv3RgB"
+    },
+    {
+      "type": "text",
+      "text": "So the current flow in main.ts:\n\n**findInitialModelForSession:**\n1. CLI args (`--provider` + `--model`) → use if both provided\n2. Scoped models (`--models` flag) → use first (unless --continue/--resume)\n3. Settings (`defaultProvider` + `defaultModel`) → use if found\n4. First available model with valid API key\n5. Return null if none\n\n**initialThinking:**\n1. Start with `'off'`\n2. If scoped models (and not continuing) → use first scoped model's thinking level\n3. Else check `settings.defaultThinkingLevel`\n4. CLI `--thinking` overrides\n5. Clamp to model capabilities\n\n---\n\nFor SDK, we don't have CLI args or scoped models. Simplifies to:\n\n**model resolution (if omitted):**\n1. Check `settings.defaultProvider` + `settings.defaultModel` → find it, verify has API key\n2. Else first from `discoverAvailableModels()`\n3. Error if none\n\n**thinkingLevel resolution (if omitted):**\n1. Check `settings.defaultThinkingLevel`\n2. Else `'off'`\n3. Clamp to model capabilities\n\n---\n\nFor `findModel`, current signature returns `{ model, error }`. For SDK, simpler:\n\n```typescript\n// Returns model or null. Throws only if models.json is malformed.\nexport function findModel(provider: string, modelId: string, agentDir?: string): Model<any> | null;\n```\n\nSo the model-related exports:\n\n```typescript\n// All models (built-in + models.json)\nexport function discoverModels(agentDir?: string): Model<any>[];\n\n// Models with valid API keys\nexport function discoverAvailableModels(agentDir?: string): Promise<Model<any>[]>;\n\n// Lookup by provider/id\nexport function findModel(provider: string, modelId: string, agentDir?: string): Model<any> | null;\n```\n\nDoes this address the wonkiness, or is there something else bothering you about it?"
+    }
+  ],
+  "api": "anthropic-messages",
+  "provider": "anthropic",
+  "model": "claude-opus-4-5",
+  "usage": {
+    "input": 8,
+    "output": 1211,
+    "cacheRead": 89775,
+    "cacheWrite": 611,
+    "totalTokens": 91605,
+    "cost": {
+      "input": 4e-5,
+      "output": 0.030275,
+      "cacheRead": 0.0448875,
+      "cacheWrite": 0.00381875,
+      "total": 0.07902124999999999
+    }
+  },
+  "stopReason": "stop",
+  "timestamp": 1766357372971
+}
--- a/packages/coding-agent/test/fixtures/before-compaction.jsonl
+++ b/packages/coding-agent/test/fixtures/before-compaction.jsonl
--- a/packages/coding-agent/test/fixtures/empty-agent/.gitkeep
+++ b/packages/coding-agent/test/fixtures/empty-agent/.gitkeep
--- a/packages/coding-agent/test/fixtures/empty-cwd/.gitkeep
+++ b/packages/coding-agent/test/fixtures/empty-cwd/.gitkeep
--- a/packages/coding-agent/test/fixtures/large-session.jsonl
+++ b/packages/coding-agent/test/fixtures/large-session.jsonl
--- a/packages/coding-agent/test/fixtures/skills-collision/first/calendar/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills-collision/first/calendar/SKILL.md
@ -0,0 +1,8 @@
+---
+name: calendar
+description: First calendar skill.
+---
+
+# Calendar (First)
+
+This is the first calendar skill.
--- a/packages/coding-agent/test/fixtures/skills-collision/second/calendar/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills-collision/second/calendar/SKILL.md
@ -0,0 +1,8 @@
+---
+name: calendar
+description: Second calendar skill.
+---
+
+# Calendar (Second)
+
+This is the second calendar skill.
--- a/packages/coding-agent/test/fixtures/skills/consecutive-hyphens/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/consecutive-hyphens/SKILL.md
@ -0,0 +1,8 @@
+---
+name: bad--name
+description: A skill with consecutive hyphens in the name.
+---
+
+# Consecutive Hyphens
+
+This skill has consecutive hyphens in its name.
--- a/packages/coding-agent/test/fixtures/skills/disable-model-invocation/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/disable-model-invocation/SKILL.md
@ -0,0 +1,9 @@
+---
+name: disable-model-invocation
+description: A skill that cannot be invoked by the model.
+disable-model-invocation: true
+---
+
+# Manual Only Skill
+
+This skill can only be invoked via /skill:disable-model-invocation.
--- a/packages/coding-agent/test/fixtures/skills/invalid-name-chars/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/invalid-name-chars/SKILL.md
@ -0,0 +1,8 @@
+---
+name: Invalid_Name
+description: A skill with invalid characters in the name.
+---
+
+# Invalid Name
+
+This skill has uppercase and underscore in the name.
--- a/packages/coding-agent/test/fixtures/skills/invalid-yaml/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/invalid-yaml/SKILL.md
@ -0,0 +1,8 @@
+---
+name: invalid-yaml
+description: [unclosed bracket
+---
+
+# Invalid YAML Skill
+
+This skill has invalid YAML in the frontmatter.
--- a/packages/coding-agent/test/fixtures/skills/long-name/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/long-name/SKILL.md
@ -0,0 +1,8 @@
+---
+name: this-is-a-very-long-skill-name-that-exceeds-the-sixty-four-character-limit-set-by-the-standard
+description: A skill with a name that exceeds 64 characters.
+---
+
+# Long Name
+
+This skill's name is too long.
--- a/packages/coding-agent/test/fixtures/skills/missing-description/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/missing-description/SKILL.md
@ -0,0 +1,7 @@
+---
+name: missing-description
+---
+
+# Missing Description
+
+This skill has no description field.
--- a/packages/coding-agent/test/fixtures/skills/multiline-description/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/multiline-description/SKILL.md
@ -0,0 +1,11 @@
+---
+name: multiline-description
+description: |
+  This is a multiline description.
+  It spans multiple lines.
+  And should be normalized.
+---
+
+# Multiline Description Skill
+
+This skill tests that multiline YAML descriptions are normalized to single lines.
--- a/packages/coding-agent/test/fixtures/skills/name-mismatch/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/name-mismatch/SKILL.md
@ -0,0 +1,8 @@
+---
+name: different-name
+description: A skill with a name that doesn't match the directory.
+---
+
+# Name Mismatch
+
+This skill's name doesn't match its parent directory.
--- a/packages/coding-agent/test/fixtures/skills/nested/child-skill/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/nested/child-skill/SKILL.md
@ -0,0 +1,8 @@
+---
+name: child-skill
+description: A nested skill in a subdirectory.
+---
+
+# Child Skill
+
+This skill is nested in a subdirectory.
--- a/packages/coding-agent/test/fixtures/skills/no-frontmatter/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/no-frontmatter/SKILL.md
@ -0,0 +1,3 @@
+# No Frontmatter
+
+This skill has no YAML frontmatter at all.
--- a/packages/coding-agent/test/fixtures/skills/unknown-field/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/unknown-field/SKILL.md
@ -0,0 +1,10 @@
+---
+name: unknown-field
+description: A skill with an unknown frontmatter field.
+author: someone
+version: 1.0
+---
+
+# Unknown Field
+
+This skill has non-standard frontmatter fields.
--- a/packages/coding-agent/test/fixtures/skills/valid-skill/SKILL.md
+++ b/packages/coding-agent/test/fixtures/skills/valid-skill/SKILL.md
@ -0,0 +1,8 @@
+---
+name: valid-skill
+description: A valid skill for testing purposes.
+---
+
+# Valid Skill
+
+This is a valid skill that follows the Agent Skills standard.
--- a/packages/coding-agent/test/footer-width.test.ts
+++ b/packages/coding-agent/test/footer-width.test.ts
@ -0,0 +1,114 @@
+import { visibleWidth } from "@mariozechner/pi-tui";
+import { beforeAll, describe, expect, it } from "vitest";
+import type { AgentSession } from "../src/core/agent-session.js";
+import type { ReadonlyFooterDataProvider } from "../src/core/footer-data-provider.js";
+import { FooterComponent } from "../src/modes/interactive/components/footer.js";
+import { initTheme } from "../src/modes/interactive/theme/theme.js";
+
+type AssistantUsage = {
+  input: number;
+  output: number;
+  cacheRead: number;
+  cacheWrite: number;
+  cost: { total: number };
+};
+
+function createSession(options: {
+  sessionName: string;
+  modelId?: string;
+  provider?: string;
+  reasoning?: boolean;
+  thinkingLevel?: string;
+  usage?: AssistantUsage;
+}): AgentSession {
+  const usage = options.usage;
+  const entries =
+    usage === undefined
+      ? []
+      : [
+          {
+            type: "message",
+            message: {
+              role: "assistant",
+              usage,
+            },
+          },
+        ];
+
+  const session = {
+    state: {
+      model: {
+        id: options.modelId ?? "test-model",
+        provider: options.provider ?? "test",
+        contextWindow: 200_000,
+        reasoning: options.reasoning ?? false,
+      },
+      thinkingLevel: options.thinkingLevel ?? "off",
+    },
+    sessionManager: {
+      getEntries: () => entries,
+      getSessionName: () => options.sessionName,
+    },
+    getContextUsage: () => ({ contextWindow: 200_000, percent: 12.3 }),
+    modelRegistry: {
+      isUsingOAuth: () => false,
+    },
+  };
+
+  return session as unknown as AgentSession;
+}
+
+function createFooterData(providerCount: number): ReadonlyFooterDataProvider {
+  const provider = {
+    getGitBranch: () => "main",
+    getExtensionStatuses: () => new Map<string, string>(),
+    getAvailableProviderCount: () => providerCount,
+    onBranchChange: (callback: () => void) => {
+      void callback;
+      return () => {};
+    },
+  };
+
+  return provider;
+}
+
+describe("FooterComponent width handling", () => {
+  beforeAll(() => {
+    initTheme(undefined, false);
+  });
+
+  it("keeps all lines within width for wide session names", () => {
+    const width = 93;
+    const session = createSession({ sessionName: "한글".repeat(30) });
+    const footer = new FooterComponent(session, createFooterData(1));
+
+    const lines = footer.render(width);
+    for (const line of lines) {
+      expect(visibleWidth(line)).toBeLessThanOrEqual(width);
+    }
+  });
+
+  it("keeps stats line within width for wide model and provider names", () => {
+    const width = 60;
+    const session = createSession({
+      sessionName: "",
+      modelId: "模".repeat(30),
+      provider: "공급자",
+      reasoning: true,
+      thinkingLevel: "high",
+      usage: {
+        input: 12_345,
+        output: 6_789,
+        cacheRead: 0,
+        cacheWrite: 0,
+        cost: { total: 1.234 },
+      },
+    });
+    const footer = new FooterComponent(session, createFooterData(2));
+
+    const lines = footer.render(width);
+    for (const line of lines) {
+      expect(visibleWidth(line)).toBeLessThanOrEqual(width);
+    }
+  });
+});
--- a/packages/coding-agent/test/frontmatter.test.ts
+++ b/packages/coding-agent/test/frontmatter.test.ts
@ -0,0 +1,72 @@
+import { describe, expect, it } from "vitest";
+import {
+  parseFrontmatter,
+  stripFrontmatter,
+} from "../src/utils/frontmatter.js";
+
+describe("parseFrontmatter", () => {
+  it("parses keys, strips quotes, and returns body", () => {
+    const input =
+      "---\nname: \"skill-name\"\ndescription: 'A desc'\nfoo-bar: value\n---\n\nBody text";
+    const { frontmatter, body } =
+      parseFrontmatter<Record<string, string>>(input);
+    expect(frontmatter.name).toBe("skill-name");
+    expect(frontmatter.description).toBe("A desc");
+    expect(frontmatter["foo-bar"]).toBe("value");
+    expect(body).toBe("Body text");
+  });
+
+  it("normalizes newlines and handles CRLF", () => {
+    const input = "---\r\nname: test\r\n---\r\nLine one\r\nLine two";
+    const { body } = parseFrontmatter<Record<string, string>>(input);
+    expect(body).toBe("Line one\nLine two");
+  });
+
+  it("throws on invalid YAML frontmatter", () => {
+    const input = "---\nfoo: [bar\n---\nBody";
+    expect(() => parseFrontmatter<Record<string, string>>(input)).toThrow(
+      /at line 1, column 10/,
+    );
+  });
+
+  it("parses | multiline yaml syntax", () => {
+    const input = "---\ndescription: |\n  Line one\n  Line two\n---\n\nBody";
+    const { frontmatter, body } =
+      parseFrontmatter<Record<string, string>>(input);
+    expect(frontmatter.description).toBe("Line one\nLine two\n");
+    expect(body).toBe("Body");
+  });
+
+  it("returns original content when frontmatter is missing or unterminated", () => {
+    const noFrontmatter = "Just text\nsecond line";
+    const missingEnd = "---\nname: test\nBody without terminator";
+    const resultNoFrontmatter =
+      parseFrontmatter<Record<string, string>>(noFrontmatter);
+    const resultMissingEnd =
+      parseFrontmatter<Record<string, string>>(missingEnd);
+    expect(resultNoFrontmatter.body).toBe("Just text\nsecond line");
+    expect(resultMissingEnd.body).toBe(
+      "---\nname: test\nBody without terminator"
+        .replace(/\r\n/g, "\n")
+        .replace(/\r/g, "\n"),
+    );
+  });
+
+  it("returns empty object for empty or comment-only frontmatter", () => {
+    const input = "---\n# just a comment\n---\nBody";
+    const { frontmatter } = parseFrontmatter(input);
+    expect(frontmatter).toEqual({});
+  });
+});
+
+describe("stripFrontmatter", () => {
+  it("removes frontmatter and trims body", () => {
+    const input = "---\nkey: value\n---\n\nBody\n";
+    expect(stripFrontmatter(input)).toBe("Body");
+  });
+
+  it("returns body when no frontmatter present", () => {
+    const input = "\n  No frontmatter body  \n";
+    expect(stripFrontmatter(input)).toBe("\n  No frontmatter body  \n");
+  });
+});
--- a/packages/coding-agent/test/git-ssh-url.test.ts
+++ b/packages/coding-agent/test/git-ssh-url.test.ts
@ -0,0 +1,78 @@
+import { describe, expect, it } from "vitest";
+import { parseGitUrl } from "../src/utils/git.js";
+
+describe("Git URL Parsing", () => {
+  describe("protocol URLs (accepted without git: prefix)", () => {
+    it("should parse HTTPS URL", () => {
+      const result = parseGitUrl("https://github.com/user/repo");
+      expect(result).toMatchObject({
+        host: "github.com",
+        path: "user/repo",
+        repo: "https://github.com/user/repo",
+      });
+    });
+
+    it("should parse ssh:// URL", () => {
+      const result = parseGitUrl("ssh://git@github.com/user/repo");
+      expect(result).toMatchObject({
+        host: "github.com",
+        path: "user/repo",
+        repo: "ssh://git@github.com/user/repo",
+      });
+    });
+
+    it("should parse protocol URL with ref", () => {
+      const result = parseGitUrl("https://github.com/user/repo@v1.0.0");
+      expect(result).toMatchObject({
+        host: "github.com",
+        path: "user/repo",
+        ref: "v1.0.0",
+        repo: "https://github.com/user/repo",
+      });
+    });
+  });
+
+  describe("shorthand URLs (accepted only with git: prefix)", () => {
+    it("should parse git@host:path with git: prefix", () => {
+      const result = parseGitUrl("git:git@github.com:user/repo");
+      expect(result).toMatchObject({
+        host: "github.com",
+        path: "user/repo",
+        repo: "git@github.com:user/repo",
+      });
+    });
+
+    it("should parse host/path shorthand with git: prefix", () => {
+      const result = parseGitUrl("git:github.com/user/repo");
+      expect(result).toMatchObject({
+        host: "github.com",
+        path: "user/repo",
+        repo: "https://github.com/user/repo",
+      });
+    });
+
+    it("should parse shorthand with ref and git: prefix", () => {
+      const result = parseGitUrl("git:git@github.com:user/repo@v1.0.0");
+      expect(result).toMatchObject({
+        host: "github.com",
+        path: "user/repo",
+        ref: "v1.0.0",
+        repo: "git@github.com:user/repo",
+      });
+    });
+  });
+
+  describe("unsupported without git: prefix", () => {
+    it("should reject git@host:path without git: prefix", () => {
+      expect(parseGitUrl("git@github.com:user/repo")).toBeNull();
+    });
+
+    it("should reject host/path shorthand without git: prefix", () => {
+      expect(parseGitUrl("github.com/user/repo")).toBeNull();
+    });
+
+    it("should reject user/repo shorthand", () => {
+      expect(parseGitUrl("user/repo")).toBeNull();
+    });
+  });
+});
--- a/packages/coding-agent/test/git-update.test.ts
+++ b/packages/coding-agent/test/git-update.test.ts
@ -0,0 +1,438 @@
+/**
+ * Tests for git-based extension updates, specifically handling force-push scenarios.
+ *
+ * These tests verify that DefaultPackageManager.update() handles:
+ * - Normal git updates (no force-push)
+ * - Force-pushed remotes gracefully (currently fails, fix needed)
+ */
+
+import { spawnSync } from "node:child_process";
+import { createHash } from "node:crypto";
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { DefaultPackageManager } from "../src/core/package-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+
+// Helper to run git commands in a directory
+function git(args: string[], cwd: string): string {
+  const result = spawnSync("git", args, {
+    cwd,
+    encoding: "utf-8",
+  });
+  if (result.status !== 0) {
+    throw new Error(`Command failed: git ${args.join(" ")}\n${result.stderr}`);
+  }
+  return result.stdout.trim();
+}
+
+// Helper to create a commit with a file
+function createCommit(
+  repoDir: string,
+  filename: string,
+  content: string,
+  message: string,
+): string {
+  writeFileSync(join(repoDir, filename), content);
+  git(["add", filename], repoDir);
+  git(["commit", "-m", message], repoDir);
+  return git(["rev-parse", "HEAD"], repoDir);
+}
+
+// Helper to get current commit hash
+function getCurrentCommit(repoDir: string): string {
+  return git(["rev-parse", "HEAD"], repoDir);
+}
+
+// Helper to get file content
+function getFileContent(repoDir: string, filename: string): string {
+  return readFileSync(join(repoDir, filename), "utf-8");
+}
+
+describe("DefaultPackageManager git update", () => {
+  let tempDir: string;
+  let remoteDir: string; // Simulates the "remote" repository
+  let agentDir: string; // The agent directory where extensions are installed
+  let installedDir: string; // The installed extension directory
+  let settingsManager: SettingsManager;
+  let packageManager: DefaultPackageManager;
+
+  // Git source that maps to our installed directory structure.
+  // Must use "git:" prefix so parseSource() treats it as a git source
+  // (bare "github.com/..." is not recognized as a git URL).
+  const gitSource = "git:github.com/test/extension";
+
+  beforeEach(() => {
+    tempDir = join(
+      tmpdir(),
+      `git-update-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    mkdirSync(tempDir, { recursive: true });
+    remoteDir = join(tempDir, "remote");
+    agentDir = join(tempDir, "agent");
+
+    // This matches the path structure: agentDir/git/<host>/<path>
+    installedDir = join(agentDir, "git", "github.com", "test", "extension");
+
+    mkdirSync(agentDir, { recursive: true });
+
+    settingsManager = SettingsManager.inMemory();
+    packageManager = new DefaultPackageManager({
+      cwd: tempDir,
+      agentDir,
+      settingsManager,
+    });
+  });
+
+  afterEach(() => {
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  /**
+   * Sets up a "remote" repository and clones it to the installed directory.
+   * This simulates what packageManager.install() would do.
+   * @param sourceOverride Optional source string to use instead of gitSource (e.g., with @ref for pinned tests)
+   */
+  function setupRemoteAndInstall(sourceOverride?: string): void {
+    // Create "remote" repository
+    mkdirSync(remoteDir, { recursive: true });
+    git(["init"], remoteDir);
+    git(["config", "--local", "user.email", "test@test.com"], remoteDir);
+    git(["config", "--local", "user.name", "Test"], remoteDir);
+    createCommit(remoteDir, "extension.ts", "// v1", "Initial commit");
+
+    // Clone to installed directory (simulating what install() does)
+    mkdirSync(join(agentDir, "git", "github.com", "test"), { recursive: true });
+    git(["clone", remoteDir, installedDir], tempDir);
+    git(["config", "--local", "user.email", "test@test.com"], installedDir);
+    git(["config", "--local", "user.name", "Test"], installedDir);
+
+    // Add to global packages so update() processes this source
+    settingsManager.setPackages([sourceOverride ?? gitSource]);
+  }
+
+  describe("normal updates (no force-push)", () => {
+    it("should update to latest commit when remote has new commits", async () => {
+      setupRemoteAndInstall();
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v1");
+
+      // Add a new commit to remote
+      const newCommit = createCommit(
+        remoteDir,
+        "extension.ts",
+        "// v2",
+        "Second commit",
+      );
+
+      // Update via package manager (no args = uses settings)
+      await packageManager.update();
+
+      // Verify update succeeded
+      expect(getCurrentCommit(installedDir)).toBe(newCommit);
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v2");
+    });
+
+    it("should handle multiple commits ahead", async () => {
+      setupRemoteAndInstall();
+
+      // Add multiple commits to remote
+      createCommit(remoteDir, "extension.ts", "// v2", "Second commit");
+      createCommit(remoteDir, "extension.ts", "// v3", "Third commit");
+      const latestCommit = createCommit(
+        remoteDir,
+        "extension.ts",
+        "// v4",
+        "Fourth commit",
+      );
+
+      await packageManager.update();
+
+      expect(getCurrentCommit(installedDir)).toBe(latestCommit);
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v4");
+    });
+
+    it("should update even when local checkout has no upstream", async () => {
+      setupRemoteAndInstall();
+      createCommit(remoteDir, "extension.ts", "// v2", "Second commit");
+      const latestCommit = createCommit(
+        remoteDir,
+        "extension.ts",
+        "// v3",
+        "Third commit",
+      );
+
+      const detachedCommit = getCurrentCommit(installedDir);
+      git(["checkout", detachedCommit], installedDir);
+
+      await packageManager.update();
+
+      expect(getCurrentCommit(installedDir)).toBe(latestCommit);
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v3");
+    });
+  });
+
+  describe("force-push scenarios", () => {
+    it("should recover when remote history is rewritten", async () => {
+      setupRemoteAndInstall();
+      const initialCommit = getCurrentCommit(remoteDir);
+
+      // Add commit to remote
+      createCommit(remoteDir, "extension.ts", "// v2", "Commit to keep");
+
+      // Update to get the new commit
+      await packageManager.update();
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v2");
+
+      // Now force-push to rewrite history on remote
+      git(["reset", "--hard", initialCommit], remoteDir);
+      const rewrittenCommit = createCommit(
+        remoteDir,
+        "extension.ts",
+        "// v2-rewritten",
+        "Rewritten commit",
+      );
+
+      // Update should succeed despite force-push
+      await packageManager.update();
+
+      expect(getCurrentCommit(installedDir)).toBe(rewrittenCommit);
+      expect(getFileContent(installedDir, "extension.ts")).toBe(
+        "// v2-rewritten",
+      );
+    });
+
+    it("should recover when local commit no longer exists in remote", async () => {
+      setupRemoteAndInstall();
+
+      // Add commits to remote
+      createCommit(remoteDir, "extension.ts", "// v2", "Commit A");
+      createCommit(remoteDir, "extension.ts", "// v3", "Commit B");
+
+      // Update to get all commits
+      await packageManager.update();
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v3");
+
+      // Force-push remote to remove commits A and B
+      git(["reset", "--hard", "HEAD~2"], remoteDir);
+      const newCommit = createCommit(
+        remoteDir,
+        "extension.ts",
+        "// v2-new",
+        "New commit replacing A and B",
+      );
+
+      // Update should succeed - the commits we had locally no longer exist
+      await packageManager.update();
+
+      expect(getCurrentCommit(installedDir)).toBe(newCommit);
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v2-new");
+    });
+
+    it("should handle complete history rewrite", async () => {
+      setupRemoteAndInstall();
+
+      // Remote gets several commits
+      createCommit(remoteDir, "extension.ts", "// v2", "v2");
+      createCommit(remoteDir, "extension.ts", "// v3", "v3");
+
+      await packageManager.update();
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v3");
+
+      // Maintainer force-pushes completely different history
+      git(["reset", "--hard", "HEAD~2"], remoteDir);
+      createCommit(remoteDir, "extension.ts", "// rewrite-a", "Rewrite A");
+      const finalCommit = createCommit(
+        remoteDir,
+        "extension.ts",
+        "// rewrite-b",
+        "Rewrite B",
+      );
+
+      // Should handle this gracefully
+      await packageManager.update();
+
+      expect(getCurrentCommit(installedDir)).toBe(finalCommit);
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// rewrite-b");
+    });
+  });
+
+  describe("pinned sources", () => {
+    it("should not update pinned git sources (with @ref)", async () => {
+      // Create remote repo first to get the initial commit
+      mkdirSync(remoteDir, { recursive: true });
+      git(["init"], remoteDir);
+      git(["config", "--local", "user.email", "test@test.com"], remoteDir);
+      git(["config", "--local", "user.name", "Test"], remoteDir);
+      const initialCommit = createCommit(
+        remoteDir,
+        "extension.ts",
+        "// v1",
+        "Initial commit",
+      );
+
+      // Install with pinned ref from the start - full clone to ensure commit is available
+      mkdirSync(join(agentDir, "git", "github.com", "test"), {
+        recursive: true,
+      });
+      git(["clone", remoteDir, installedDir], tempDir);
+      git(["checkout", initialCommit], installedDir);
+      git(["config", "--local", "user.email", "test@test.com"], installedDir);
+      git(["config", "--local", "user.name", "Test"], installedDir);
+
+      // Add to global packages with pinned ref
+      settingsManager.setPackages([`${gitSource}@${initialCommit}`]);
+
+      // Add new commit to remote
+      createCommit(remoteDir, "extension.ts", "// v2", "Second commit");
+
+      // Update should be skipped for pinned sources
+      await packageManager.update();
+
+      // Should still be on initial commit
+      expect(getCurrentCommit(installedDir)).toBe(initialCommit);
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v1");
+    });
+  });
+
+  describe("temporary git sources", () => {
+    it("should refresh cached temporary git sources when resolving", async () => {
+      const gitHost = "github.com";
+      const gitPath = "test/extension";
+      const hash = createHash("sha256")
+        .update(`git-${gitHost}-${gitPath}`)
+        .digest("hex")
+        .slice(0, 8);
+      const cachedDir = join(
+        tmpdir(),
+        "pi-extensions",
+        `git-${gitHost}`,
+        hash,
+        gitPath,
+      );
+      const extensionFile = join(
+        cachedDir,
+        "pi-extensions",
+        "session-breakdown.ts",
+      );
+
+      rmSync(cachedDir, { recursive: true, force: true });
+      mkdirSync(join(cachedDir, "pi-extensions"), { recursive: true });
+      writeFileSync(
+        join(cachedDir, "package.json"),
+        JSON.stringify({ pi: { extensions: ["./pi-extensions"] } }, null, 2),
+      );
+      writeFileSync(extensionFile, "// stale");
+
+      const executedCommands: string[] = [];
+      const managerWithInternals = packageManager as unknown as {
+        runCommand: (
+          command: string,
+          args: string[],
+          options?: { cwd?: string },
+        ) => Promise<void>;
+      };
+      managerWithInternals.runCommand = async (command, args) => {
+        executedCommands.push(`${command} ${args.join(" ")}`);
+        if (command === "git" && args[0] === "reset") {
+          writeFileSync(extensionFile, "// fresh");
+        }
+      };
+
+      await packageManager.resolveExtensionSources([gitSource], {
+        temporary: true,
+      });
+
+      expect(executedCommands).toContain("git fetch --prune origin");
+      expect(
+        getFileContent(cachedDir, "pi-extensions/session-breakdown.ts"),
+      ).toBe("// fresh");
+    });
+
+    it("should not refresh pinned temporary git sources", async () => {
+      const gitHost = "github.com";
+      const gitPath = "test/extension";
+      const hash = createHash("sha256")
+        .update(`git-${gitHost}-${gitPath}`)
+        .digest("hex")
+        .slice(0, 8);
+      const cachedDir = join(
+        tmpdir(),
+        "pi-extensions",
+        `git-${gitHost}`,
+        hash,
+        gitPath,
+      );
+      const extensionFile = join(
+        cachedDir,
+        "pi-extensions",
+        "session-breakdown.ts",
+      );
+
+      rmSync(cachedDir, { recursive: true, force: true });
+      mkdirSync(join(cachedDir, "pi-extensions"), { recursive: true });
+      writeFileSync(
+        join(cachedDir, "package.json"),
+        JSON.stringify({ pi: { extensions: ["./pi-extensions"] } }, null, 2),
+      );
+      writeFileSync(extensionFile, "// pinned");
+
+      const executedCommands: string[] = [];
+      const managerWithInternals = packageManager as unknown as {
+        runCommand: (
+          command: string,
+          args: string[],
+          options?: { cwd?: string },
+        ) => Promise<void>;
+      };
+      managerWithInternals.runCommand = async (command, args) => {
+        executedCommands.push(`${command} ${args.join(" ")}`);
+      };
+
+      await packageManager.resolveExtensionSources([`${gitSource}@main`], {
+        temporary: true,
+      });
+
+      expect(executedCommands).toEqual([]);
+      expect(
+        getFileContent(cachedDir, "pi-extensions/session-breakdown.ts"),
+      ).toBe("// pinned");
+    });
+  });
+
+  describe("scope-aware update", () => {
+    it("should not install locally when source is only registered globally", async () => {
+      setupRemoteAndInstall();
+
+      // Add a new commit to remote
+      createCommit(remoteDir, "extension.ts", "// v2", "Second commit");
+
+      // The project-scope install path should not exist before or after update
+      const projectGitDir = join(
+        tempDir,
+        ".pi",
+        "git",
+        "github.com",
+        "test",
+        "extension",
+      );
+      expect(existsSync(projectGitDir)).toBe(false);
+
+      await packageManager.update(gitSource);
+
+      // Global install should be updated
+      expect(getFileContent(installedDir, "extension.ts")).toBe("// v2");
+
+      // Project-scope directory should NOT have been created
+      expect(existsSync(projectGitDir)).toBe(false);
+    });
+  });
+});
--- a/packages/coding-agent/test/image-processing.test.ts
+++ b/packages/coding-agent/test/image-processing.test.ts
@ -0,0 +1,135 @@
+/**
+ * Tests for image processing utilities using Photon.
+ */
+
+import { describe, expect, it } from "vitest";
+import { convertToPng } from "../src/utils/image-convert.js";
+import { formatDimensionNote, resizeImage } from "../src/utils/image-resize.js";
+
+// Small 2x2 red PNG image (base64) - generated with ImageMagick
+const TINY_PNG =
+  "iVBORw0KGgoAAAANSUhEUgAAAAIAAAACAQMAAABIeJ9nAAAAIGNIUk0AAHomAACAhAAA+gAAAIDoAAB1MAAA6mAAADqYAAAXcJy6UTwAAAAGUExURf8AAP///0EdNBEAAAABYktHRAH/Ai3eAAAAB3RJTUUH6gEOADM5Ddoh/wAAAAxJREFUCNdjYGBgAAAABAABJzQnCgAAACV0RVh0ZGF0ZTpjcmVhdGUAMjAyNi0wMS0xNFQwMDo1MTo1NyswMDowMOnKzHgAAAAldEVYdGRhdGU6bW9kaWZ5ADIwMjYtMDEtMTRUMDA6NTE6NTcrMDA6MDCYl3TEAAAAKHRFWHRkYXRlOnRpbWVzdGFtcAAyMDI2LTAxLTE0VDAwOjUxOjU3KzAwOjAwz4JVGwAAAABJRU5ErkJggg==";
+
+// Small 2x2 blue JPEG image (base64) - generated with ImageMagick
+const TINY_JPEG =
+  "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCAACAAIDAREAAhEBAxEB/8QAFAABAAAAAAAAAAAAAAAAAAAACf/EABQQAQAAAAAAAAAAAAAAAAAAAAD/xAAVAQEBAAAAAAAAAAAAAAAAAAAGCf/EABQRAQAAAAAAAAAAAAAAAAAAAAD/2gAMAwEAAhEDEQA/AD3VTB3/2Q==";
+
+// 100x100 gray PNG
+const MEDIUM_PNG_100x100 =
+  "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAAAAABVicqIAAAAAmJLR0QA/4ePzL8AAAAHdElNRQfqAQ4AMzkN2iH/AAAAP0lEQVRo3u3NQQEAAAQEMASXXYrz2gqst/Lm4ZBIJBKJRCKRSCQSiUQikUgkEolEIpFIJBKJRCKRSCQSiSTsAP1cAUZeKtreAAAAJXRFWHRkYXRlOmNyZWF0ZQAyMDI2LTAxLTE0VDAwOjUxOjU3KzAwOjAw6crMeAAAACV0RVh0ZGF0ZTptb2RpZnkAMjAyNi0wMS0xNFQwMDo1MTo1NyswMDowMJiXdMQAAAAodEVYdGRhdGU6dGltZXN0YW1wADIwMjYtMDEtMTRUMDA6NTE6NTcrMDA6MDDPglUbAAAAAElFTkSuQmCC";
+
+// 200x200 colored PNG
+const LARGE_PNG_200x200 =
+  "iVBORw0KGgoAAAANSUhEUgAAAMgAAADIEAIAAAByquWKAAAAIGNIUk0AAHomAACAhAAA+gAAAIDoAAB1MAAA6mAAADqYAAAXcJy6UTwAAAAGYktHRP///////wlY99wAAAAHdElNRQfqAQ4AMzqU03BFAAAEEElEQVR42u3dvWqUURSF4XXMRAyIhSAWQvwnWtjaWusFx9bW1kKD/wELESxEiBjDjJdwysOC57mCXW32+zGcGbtdACpsdi+T8Xz1GABzm91xMl6sHgNgblzcT/Y+rh4DYG6cv0/2H64eA2BOEgI1fHQHaoyzbXIwVo8BMCcJgRqSEKgxft9Lrn5aPQbA3Ph1klw7Wj0GwJwkBGpYWECN8fMiub63egyAORcWUMPCAmqMH3eTG59XjwEwN76/S24+Wj0GwJwkBGpYWECN8e1fcmuzegyAORcWUMPCAmqM09vJ4dfVYwDMjS9vkzuPV48BMCcJgRqeSAZqjA/nyYP91WMAzElCoIYkBGqMk8Pk6HT1GABzm+3L5JIkBApIQqCGj+5AjfHmb/Lk8uoxAOYkIVBDEgI1LCygxmZ7nFyShEABFxZQw8ICaozXf5KnV1aPATDnwgJqWFhADQsLqOFnDUANFxZQw8ICaoxXZ8mzg9VjAMy5sIAaFhZQw3tYQA1/QgHUkIRADUkI1JCEQA1JCNSQhEANFxZQwzcsoIYkBGpIQqCGJARqSEKghiQEalhYQA1/QgHUcGEBNSwsoIYkBGq4sIAaFhZQw8ICaviGBdRwYQE1LCyghiQEariwgBoWFlDDe1hADS+OAjUkIVBDEgI1JCFQQxICNSQhUMOFBdTwDQuoIQmBGpIQqCEJgRqSEKghCYEaFhZQw4ujQA0XFlDDwgJqSEKghgsLqGFhATUsLKCGb1hADRcWUMPCAmpIQqCGCwuoYWEBNbyHBdTw4ihQQxICNSQhUEMSAjUkIVBDEgI1XFhADd+wgBqSEKghCYEakhCoIQmBGpIQqGFhATW8OArUcGEBNSwsoIYkBGq4sIAaFhZQw8ICaviGBdRwYQE1LCyghiQEariwgBoWFlDDe1hADS+OAjUkIVBDEgI1JCFQQxICNSQhUMOFBdTwDQuoIQmBGpIQqCEJgRqSEKghCYEa49lB8ups9RgAc14cBWpIQqCGhQXUkIRADRcWUMPCAmqMp1eS139WjwEw5xsWUEMSAjUsLKCGJARquLCAGhYWUGM8uZy8+bt6DIA5L44CNSQhUMMTyUCNcXSanByuHgNgThICNSQhUGM82E8+nK8eA2DOzxqAGpIQqDHuPE6+vF09BsDcOPyanN5ePQbAnCQEavgdFlBj3Nok3/6tHgNgzoujQA1JCNQYNx8l39+tHgNgbtz4nPy4u3oMgDlJCNSwsIAa4/pe8vNi9RgAc37WANSQhECNce0o+XWyegyAuXH1U/L73uoxAOYkIVDDwgJqjIORnG1XjwEw508ogBqSEKgx9h8m5+9XjwEwN/Y+Jhf3V48BMCcJgRpjPE+2x6vHAJgbSbLbrR4DYO4/GqiSgXN+ksgAAAAldEVYdGRhdGU6Y3JlYXRlADIwMjYtMDEtMTRUMDA6NTE6NTcrMDA6MDDpysx4AAAAJXRFWHRkYXRlOm1vZGlmeQAyMDI2LTAxLTE0VDAwOjUxOjU3KzAwOjAwmJd0xAAAACh0RVh0ZGF0ZTp0aW1lc3RhbXAAMjAyNi0wMS0xNFQwMDo1MTo1NyswMDowMM+CVRsAAAAASUVORK5CYII=";
+
+describe("convertToPng", () => {
+  it("should return original data for PNG input", async () => {
+    const result = await convertToPng(TINY_PNG, "image/png");
+    expect(result).not.toBeNull();
+    expect(result!.data).toBe(TINY_PNG);
+    expect(result!.mimeType).toBe("image/png");
+  });
+
+  it("should convert JPEG to PNG", async () => {
+    const result = await convertToPng(TINY_JPEG, "image/jpeg");
+    expect(result).not.toBeNull();
+    expect(result!.mimeType).toBe("image/png");
+    // Result should be valid base64
+    expect(() => Buffer.from(result!.data, "base64")).not.toThrow();
+    // PNG magic bytes
+    const buffer = Buffer.from(result!.data, "base64");
+    expect(buffer[0]).toBe(0x89);
+    expect(buffer[1]).toBe(0x50); // 'P'
+    expect(buffer[2]).toBe(0x4e); // 'N'
+    expect(buffer[3]).toBe(0x47); // 'G'
+  });
+});
+
+describe("resizeImage", () => {
+  it("should return original image if within limits", async () => {
+    const result = await resizeImage(
+      { type: "image", data: TINY_PNG, mimeType: "image/png" },
+      { maxWidth: 100, maxHeight: 100, maxBytes: 1024 * 1024 },
+    );
+
+    expect(result.wasResized).toBe(false);
+    expect(result.data).toBe(TINY_PNG);
+    expect(result.originalWidth).toBe(2);
+    expect(result.originalHeight).toBe(2);
+    expect(result.width).toBe(2);
+    expect(result.height).toBe(2);
+  });
+
+  it("should resize image exceeding dimension limits", async () => {
+    const result = await resizeImage(
+      { type: "image", data: MEDIUM_PNG_100x100, mimeType: "image/png" },
+      { maxWidth: 50, maxHeight: 50, maxBytes: 1024 * 1024 },
+    );
+
+    expect(result.wasResized).toBe(true);
+    expect(result.originalWidth).toBe(100);
+    expect(result.originalHeight).toBe(100);
+    expect(result.width).toBeLessThanOrEqual(50);
+    expect(result.height).toBeLessThanOrEqual(50);
+  });
+
+  it("should resize image exceeding byte limit", async () => {
+    const originalBuffer = Buffer.from(LARGE_PNG_200x200, "base64");
+    const originalSize = originalBuffer.length;
+
+    // Set maxBytes to less than the original image size
+    const result = await resizeImage(
+      { type: "image", data: LARGE_PNG_200x200, mimeType: "image/png" },
+      {
+        maxWidth: 2000,
+        maxHeight: 2000,
+        maxBytes: Math.floor(originalSize / 2),
+      },
+    );
+
+    // Should have tried to reduce size
+    const resultBuffer = Buffer.from(result.data, "base64");
+    expect(resultBuffer.length).toBeLessThan(originalSize);
+  });
+
+  it("should handle JPEG input", async () => {
+    const result = await resizeImage(
+      { type: "image", data: TINY_JPEG, mimeType: "image/jpeg" },
+      { maxWidth: 100, maxHeight: 100, maxBytes: 1024 * 1024 },
+    );
+
+    expect(result.wasResized).toBe(false);
+    expect(result.originalWidth).toBe(2);
+    expect(result.originalHeight).toBe(2);
+  });
+});
+
+describe("formatDimensionNote", () => {
+  it("should return undefined for non-resized images", () => {
+    const note = formatDimensionNote({
+      data: "",
+      mimeType: "image/png",
+      originalWidth: 100,
+      originalHeight: 100,
+      width: 100,
+      height: 100,
+      wasResized: false,
+    });
+    expect(note).toBeUndefined();
+  });
+
+  it("should return formatted note for resized images", () => {
+    const note = formatDimensionNote({
+      data: "",
+      mimeType: "image/png",
+      originalWidth: 2000,
+      originalHeight: 1000,
+      width: 1000,
+      height: 500,
+      wasResized: true,
+    });
+    expect(note).toContain("original 2000x1000");
+    expect(note).toContain("displayed at 1000x500");
+    expect(note).toContain("2.00"); // scale factor
+  });
+});
--- a/packages/coding-agent/test/interactive-mode-status.test.ts
+++ b/packages/coding-agent/test/interactive-mode-status.test.ts
@ -0,0 +1,194 @@
+import { Container } from "@mariozechner/pi-tui";
+import { beforeAll, describe, expect, test, vi } from "vitest";
+import { InteractiveMode } from "../src/modes/interactive/interactive-mode.js";
+import { initTheme } from "../src/modes/interactive/theme/theme.js";
+
+function renderLastLine(container: Container, width = 120): string {
+  const last = container.children[container.children.length - 1];
+  if (!last) return "";
+  return last.render(width).join("\n");
+}
+
+function renderAll(container: Container, width = 120): string {
+  return container.children.flatMap((child) => child.render(width)).join("\n");
+}
+
+describe("InteractiveMode.showStatus", () => {
+  beforeAll(() => {
+    // showStatus uses the global theme instance
+    initTheme("dark");
+  });
+
+  test("coalesces immediately-sequential status messages", () => {
+    const fakeThis: any = {
+      chatContainer: new Container(),
+      ui: { requestRender: vi.fn() },
+      lastStatusSpacer: undefined,
+      lastStatusText: undefined,
+    };
+
+    (InteractiveMode as any).prototype.showStatus.call(fakeThis, "STATUS_ONE");
+    expect(fakeThis.chatContainer.children).toHaveLength(2);
+    expect(renderLastLine(fakeThis.chatContainer)).toContain("STATUS_ONE");
+
+    (InteractiveMode as any).prototype.showStatus.call(fakeThis, "STATUS_TWO");
+    // second status updates the previous line instead of appending
+    expect(fakeThis.chatContainer.children).toHaveLength(2);
+    expect(renderLastLine(fakeThis.chatContainer)).toContain("STATUS_TWO");
+    expect(renderLastLine(fakeThis.chatContainer)).not.toContain("STATUS_ONE");
+  });
+
+  test("appends a new status line if something else was added in between", () => {
+    const fakeThis: any = {
+      chatContainer: new Container(),
+      ui: { requestRender: vi.fn() },
+      lastStatusSpacer: undefined,
+      lastStatusText: undefined,
+    };
+
+    (InteractiveMode as any).prototype.showStatus.call(fakeThis, "STATUS_ONE");
+    expect(fakeThis.chatContainer.children).toHaveLength(2);
+
+    // Something else gets added to the chat in between status updates
+    fakeThis.chatContainer.addChild({
+      render: () => ["OTHER"],
+      invalidate: () => {},
+    });
+    expect(fakeThis.chatContainer.children).toHaveLength(3);
+
+    (InteractiveMode as any).prototype.showStatus.call(fakeThis, "STATUS_TWO");
+    // adds spacer + text
+    expect(fakeThis.chatContainer.children).toHaveLength(5);
+    expect(renderLastLine(fakeThis.chatContainer)).toContain("STATUS_TWO");
+  });
+});
+
+describe("InteractiveMode.createExtensionUIContext setTheme", () => {
+  test("persists theme changes to settings manager", () => {
+    initTheme("dark");
+
+    let currentTheme = "dark";
+    const settingsManager = {
+      getTheme: vi.fn(() => currentTheme),
+      setTheme: vi.fn((theme: string) => {
+        currentTheme = theme;
+      }),
+    };
+    const fakeThis: any = {
+      session: { settingsManager },
+      settingsManager,
+      ui: { requestRender: vi.fn() },
+    };
+
+    const uiContext = (
+      InteractiveMode as any
+    ).prototype.createExtensionUIContext.call(fakeThis);
+    const result = uiContext.setTheme("light");
+
+    expect(result.success).toBe(true);
+    expect(settingsManager.setTheme).toHaveBeenCalledWith("light");
+    expect(currentTheme).toBe("light");
+    expect(fakeThis.ui.requestRender).toHaveBeenCalledTimes(1);
+  });
+
+  test("does not persist invalid theme names", () => {
+    initTheme("dark");
+
+    const settingsManager = {
+      getTheme: vi.fn(() => "dark"),
+      setTheme: vi.fn(),
+    };
+    const fakeThis: any = {
+      session: { settingsManager },
+      settingsManager,
+      ui: { requestRender: vi.fn() },
+    };
+
+    const uiContext = (
+      InteractiveMode as any
+    ).prototype.createExtensionUIContext.call(fakeThis);
+    const result = uiContext.setTheme("__missing_theme__");
+
+    expect(result.success).toBe(false);
+    expect(settingsManager.setTheme).not.toHaveBeenCalled();
+    expect(fakeThis.ui.requestRender).not.toHaveBeenCalled();
+  });
+});
+
+describe("InteractiveMode.showLoadedResources", () => {
+  beforeAll(() => {
+    initTheme("dark");
+  });
+
+  function createShowLoadedResourcesThis(options: {
+    quietStartup: boolean;
+    verbose?: boolean;
+    skills?: Array<{ filePath: string }>;
+    skillDiagnostics?: Array<{
+      type: "warning" | "error" | "collision";
+      message: string;
+    }>;
+  }) {
+    const fakeThis: any = {
+      options: { verbose: options.verbose ?? false },
+      chatContainer: new Container(),
+      settingsManager: {
+        getQuietStartup: () => options.quietStartup,
+      },
+      session: {
+        promptTemplates: [],
+        extensionRunner: undefined,
+        resourceLoader: {
+          getPathMetadata: () => new Map(),
+          getAgentsFiles: () => ({ agentsFiles: [] }),
+          getSkills: () => ({
+            skills: options.skills ?? [],
+            diagnostics: options.skillDiagnostics ?? [],
+          }),
+          getPrompts: () => ({ prompts: [], diagnostics: [] }),
+          getExtensions: () => ({ errors: [] }),
+          getThemes: () => ({ themes: [], diagnostics: [] }),
+        },
+      },
+      formatDisplayPath: (p: string) => p,
+      buildScopeGroups: () => [],
+      formatScopeGroups: () => "resource-list",
+      getShortPath: (p: string) => p,
+      formatDiagnostics: () => "diagnostics",
+    };
+
+    return fakeThis;
+  }
+
+  test("does not show verbose listing on quiet startup during reload", () => {
+    const fakeThis = createShowLoadedResourcesThis({
+      quietStartup: true,
+      skills: [{ filePath: "/tmp/skill/SKILL.md" }],
+    });
+
+    (InteractiveMode as any).prototype.showLoadedResources.call(fakeThis, {
+      extensionPaths: ["/tmp/ext/index.ts"],
+      force: false,
+      showDiagnosticsWhenQuiet: true,
+    });
+
+    expect(fakeThis.chatContainer.children).toHaveLength(0);
+  });
+
+  test("still shows diagnostics on quiet startup when requested", () => {
+    const fakeThis = createShowLoadedResourcesThis({
+      quietStartup: true,
+      skills: [{ filePath: "/tmp/skill/SKILL.md" }],
+      skillDiagnostics: [{ type: "warning", message: "duplicate skill name" }],
+    });
+
+    (InteractiveMode as any).prototype.showLoadedResources.call(fakeThis, {
+      force: false,
+      showDiagnosticsWhenQuiet: true,
+    });
+
+    const output = renderAll(fakeThis.chatContainer);
+    expect(output).toContain("[Skill conflicts]");
+    expect(output).not.toContain("[Skills]");
+  });
+});
--- a/packages/coding-agent/test/model-registry.test.ts
+++ b/packages/coding-agent/test/model-registry.test.ts
@ -0,0 +1,994 @@
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import type {
+  Api,
+  Context,
+  Model,
+  OpenAICompletionsCompat,
+} from "@mariozechner/pi-ai";
+import { getApiProvider } from "@mariozechner/pi-ai";
+import { getOAuthProvider } from "@mariozechner/pi-ai/oauth";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { clearApiKeyCache, ModelRegistry } from "../src/core/model-registry.js";
+
+describe("ModelRegistry", () => {
+  let tempDir: string;
+  let modelsJsonPath: string;
+  let authStorage: AuthStorage;
+
+  beforeEach(() => {
+    tempDir = join(
+      tmpdir(),
+      `pi-test-model-registry-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    mkdirSync(tempDir, { recursive: true });
+    modelsJsonPath = join(tempDir, "models.json");
+    authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+  });
+
+  afterEach(() => {
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true });
+    }
+    clearApiKeyCache();
+  });
+
+  /** Create minimal provider config  */
+  function providerConfig(
+    baseUrl: string,
+    models: Array<{ id: string; name?: string }>,
+    api: string = "anthropic-messages",
+  ) {
+    return {
+      baseUrl,
+      apiKey: "TEST_KEY",
+      api,
+      models: models.map((m) => ({
+        id: m.id,
+        name: m.name ?? m.id,
+        reasoning: false,
+        input: ["text"],
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+        contextWindow: 100000,
+        maxTokens: 8000,
+      })),
+    };
+  }
+
+  function writeModelsJson(
+    providers: Record<string, ReturnType<typeof providerConfig>>,
+  ) {
+    writeFileSync(modelsJsonPath, JSON.stringify({ providers }));
+  }
+
+  function getModelsForProvider(registry: ModelRegistry, provider: string) {
+    return registry.getAll().filter((m) => m.provider === provider);
+  }
+
+  /** Create a baseUrl-only override (no custom models) */
+  function overrideConfig(baseUrl: string, headers?: Record<string, string>) {
+    return { baseUrl, ...(headers && { headers }) };
+  }
+
+  /** Write raw providers config (for mixed override/replacement scenarios) */
+  function writeRawModelsJson(providers: Record<string, unknown>) {
+    writeFileSync(modelsJsonPath, JSON.stringify({ providers }));
+  }
+
+  const openAiModel: Model<Api> = {
+    id: "test-openai-model",
+    name: "Test OpenAI Model",
+    api: "openai-completions",
+    provider: "openai",
+    baseUrl: "https://api.openai.com/v1",
+    reasoning: false,
+    input: ["text"],
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+    contextWindow: 128000,
+    maxTokens: 4096,
+  };
+
+  const emptyContext: Context = {
+    messages: [],
+  };
+
+  describe("baseUrl override (no custom models)", () => {
+    test("overriding baseUrl keeps all built-in models", () => {
+      writeRawModelsJson({
+        anthropic: overrideConfig("https://my-proxy.example.com/v1"),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const anthropicModels = getModelsForProvider(registry, "anthropic");
+
+      // Should have multiple built-in models, not just one
+      expect(anthropicModels.length).toBeGreaterThan(1);
+      expect(anthropicModels.some((m) => m.id.includes("claude"))).toBe(true);
+    });
+
+    test("overriding baseUrl changes URL on all built-in models", () => {
+      writeRawModelsJson({
+        anthropic: overrideConfig("https://my-proxy.example.com/v1"),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const anthropicModels = getModelsForProvider(registry, "anthropic");
+
+      // All models should have the new baseUrl
+      for (const model of anthropicModels) {
+        expect(model.baseUrl).toBe("https://my-proxy.example.com/v1");
+      }
+    });
+
+    test("overriding headers merges with model headers", () => {
+      writeRawModelsJson({
+        anthropic: overrideConfig("https://my-proxy.example.com/v1", {
+          "X-Custom-Header": "custom-value",
+        }),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const anthropicModels = getModelsForProvider(registry, "anthropic");
+
+      for (const model of anthropicModels) {
+        expect(model.headers?.["X-Custom-Header"]).toBe("custom-value");
+      }
+    });
+
+    test("baseUrl-only override does not affect other providers", () => {
+      writeRawModelsJson({
+        anthropic: overrideConfig("https://my-proxy.example.com/v1"),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const googleModels = getModelsForProvider(registry, "google");
+
+      // Google models should still have their original baseUrl
+      expect(googleModels.length).toBeGreaterThan(0);
+      expect(googleModels[0].baseUrl).not.toBe(
+        "https://my-proxy.example.com/v1",
+      );
+    });
+
+    test("can mix baseUrl override and models merge", () => {
+      writeRawModelsJson({
+        // baseUrl-only for anthropic
+        anthropic: overrideConfig("https://anthropic-proxy.example.com/v1"),
+        // Add custom model for google (merged with built-ins)
+        google: providerConfig(
+          "https://google-proxy.example.com/v1",
+          [{ id: "gemini-custom" }],
+          "google-generative-ai",
+        ),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+
+      // Anthropic: multiple built-in models with new baseUrl
+      const anthropicModels = getModelsForProvider(registry, "anthropic");
+      expect(anthropicModels.length).toBeGreaterThan(1);
+      expect(anthropicModels[0].baseUrl).toBe(
+        "https://anthropic-proxy.example.com/v1",
+      );
+
+      // Google: built-ins plus custom model
+      const googleModels = getModelsForProvider(registry, "google");
+      expect(googleModels.length).toBeGreaterThan(1);
+      expect(googleModels.some((m) => m.id === "gemini-custom")).toBe(true);
+    });
+
+    test("refresh() picks up baseUrl override changes", () => {
+      writeRawModelsJson({
+        anthropic: overrideConfig("https://first-proxy.example.com/v1"),
+      });
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+
+      expect(getModelsForProvider(registry, "anthropic")[0].baseUrl).toBe(
+        "https://first-proxy.example.com/v1",
+      );
+
+      // Update and refresh
+      writeRawModelsJson({
+        anthropic: overrideConfig("https://second-proxy.example.com/v1"),
+      });
+      registry.refresh();
+
+      expect(getModelsForProvider(registry, "anthropic")[0].baseUrl).toBe(
+        "https://second-proxy.example.com/v1",
+      );
+    });
+  });
+
+  describe("custom models merge behavior", () => {
+    test("custom provider with same name as built-in merges with built-in models", () => {
+      writeModelsJson({
+        anthropic: providerConfig("https://my-proxy.example.com/v1", [
+          { id: "claude-custom" },
+        ]),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const anthropicModels = getModelsForProvider(registry, "anthropic");
+
+      expect(anthropicModels.length).toBeGreaterThan(1);
+      expect(anthropicModels.some((m) => m.id === "claude-custom")).toBe(true);
+      expect(anthropicModels.some((m) => m.id.includes("claude"))).toBe(true);
+    });
+
+    test("custom model with same id replaces built-in model by id", () => {
+      writeModelsJson({
+        openrouter: providerConfig(
+          "https://my-proxy.example.com/v1",
+          [{ id: "anthropic/claude-sonnet-4" }],
+          "openai-completions",
+        ),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+      const sonnetModels = models.filter(
+        (m) => m.id === "anthropic/claude-sonnet-4",
+      );
+
+      expect(sonnetModels).toHaveLength(1);
+      expect(sonnetModels[0].baseUrl).toBe("https://my-proxy.example.com/v1");
+    });
+
+    test("custom provider with same name as built-in does not affect other built-in providers", () => {
+      writeModelsJson({
+        anthropic: providerConfig("https://my-proxy.example.com/v1", [
+          { id: "claude-custom" },
+        ]),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+
+      expect(getModelsForProvider(registry, "google").length).toBeGreaterThan(
+        0,
+      );
+      expect(getModelsForProvider(registry, "openai").length).toBeGreaterThan(
+        0,
+      );
+    });
+
+    test("provider-level baseUrl applies to both built-in and custom models", () => {
+      writeModelsJson({
+        anthropic: providerConfig("https://merged-proxy.example.com/v1", [
+          { id: "claude-custom" },
+        ]),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const anthropicModels = getModelsForProvider(registry, "anthropic");
+
+      for (const model of anthropicModels) {
+        expect(model.baseUrl).toBe("https://merged-proxy.example.com/v1");
+      }
+    });
+
+    test("model-level baseUrl overrides provider-level baseUrl for custom models", () => {
+      writeRawModelsJson({
+        "opencode-go": {
+          baseUrl: "https://opencode.ai/zen/go/v1",
+          apiKey: "TEST_KEY",
+          models: [
+            {
+              id: "minimax-m2.5",
+              api: "anthropic-messages",
+              baseUrl: "https://opencode.ai/zen/go",
+              reasoning: true,
+              input: ["text"],
+              cost: { input: 0.3, output: 1.2, cacheRead: 0.03, cacheWrite: 0 },
+              contextWindow: 204800,
+              maxTokens: 131072,
+            },
+            {
+              id: "glm-5",
+              api: "openai-completions",
+              reasoning: true,
+              input: ["text"],
+              cost: { input: 1, output: 3.2, cacheRead: 0.2, cacheWrite: 0 },
+              contextWindow: 204800,
+              maxTokens: 131072,
+            },
+          ],
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const m25 = registry.find("opencode-go", "minimax-m2.5");
+      const glm5 = registry.find("opencode-go", "glm-5");
+
+      expect(m25?.baseUrl).toBe("https://opencode.ai/zen/go");
+      expect(glm5?.baseUrl).toBe("https://opencode.ai/zen/go/v1");
+    });
+
+    test("modelOverrides still apply when provider also defines models", () => {
+      writeRawModelsJson({
+        openrouter: {
+          baseUrl: "https://my-proxy.example.com/v1",
+          apiKey: "OPENROUTER_API_KEY",
+          api: "openai-completions",
+          models: [
+            {
+              id: "custom/openrouter-model",
+              name: "Custom OpenRouter Model",
+              reasoning: false,
+              input: ["text"],
+              cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+              contextWindow: 128000,
+              maxTokens: 16384,
+            },
+          ],
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              name: "Overridden Built-in Sonnet",
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+
+      expect(models.some((m) => m.id === "custom/openrouter-model")).toBe(true);
+      expect(
+        models.some(
+          (m) =>
+            m.id === "anthropic/claude-sonnet-4" &&
+            m.name === "Overridden Built-in Sonnet",
+        ),
+      ).toBe(true);
+    });
+
+    test("refresh() reloads merged custom models from disk", () => {
+      writeModelsJson({
+        anthropic: providerConfig("https://first-proxy.example.com/v1", [
+          { id: "claude-custom" },
+        ]),
+      });
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      expect(
+        getModelsForProvider(registry, "anthropic").some(
+          (m) => m.id === "claude-custom",
+        ),
+      ).toBe(true);
+
+      // Update and refresh
+      writeModelsJson({
+        anthropic: providerConfig("https://second-proxy.example.com/v1", [
+          { id: "claude-custom-2" },
+        ]),
+      });
+      registry.refresh();
+
+      const anthropicModels = getModelsForProvider(registry, "anthropic");
+      expect(anthropicModels.some((m) => m.id === "claude-custom")).toBe(false);
+      expect(anthropicModels.some((m) => m.id === "claude-custom-2")).toBe(
+        true,
+      );
+      expect(anthropicModels.some((m) => m.id.includes("claude"))).toBe(true);
+    });
+
+    test("removing custom models from models.json keeps built-in provider models", () => {
+      writeModelsJson({
+        anthropic: providerConfig("https://proxy.example.com/v1", [
+          { id: "claude-custom" },
+        ]),
+      });
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      expect(
+        getModelsForProvider(registry, "anthropic").some(
+          (m) => m.id === "claude-custom",
+        ),
+      ).toBe(true);
+
+      // Remove custom models and refresh
+      writeModelsJson({});
+      registry.refresh();
+
+      const anthropicModels = getModelsForProvider(registry, "anthropic");
+      expect(anthropicModels.some((m) => m.id === "claude-custom")).toBe(false);
+      expect(anthropicModels.some((m) => m.id.includes("claude"))).toBe(true);
+    });
+  });
+
+  describe("modelOverrides (per-model customization)", () => {
+    test("model override applies to a single built-in model", () => {
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              name: "Custom Sonnet Name",
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+
+      const sonnet = models.find((m) => m.id === "anthropic/claude-sonnet-4");
+      expect(sonnet?.name).toBe("Custom Sonnet Name");
+
+      // Other models should be unchanged
+      const opus = models.find((m) => m.id === "anthropic/claude-opus-4");
+      expect(opus?.name).not.toBe("Custom Sonnet Name");
+    });
+
+    test("model override with compat.openRouterRouting", () => {
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              compat: {
+                openRouterRouting: { only: ["amazon-bedrock"] },
+              },
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+
+      const sonnet = models.find((m) => m.id === "anthropic/claude-sonnet-4");
+      const compat = sonnet?.compat as OpenAICompletionsCompat | undefined;
+      expect(compat?.openRouterRouting).toEqual({ only: ["amazon-bedrock"] });
+    });
+
+    test("model override deep merges compat settings", () => {
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              compat: {
+                openRouterRouting: { order: ["anthropic", "together"] },
+              },
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+      const sonnet = models.find((m) => m.id === "anthropic/claude-sonnet-4");
+
+      // Should have both the new routing AND preserve other compat settings
+      const compat = sonnet?.compat as OpenAICompletionsCompat | undefined;
+      expect(compat?.openRouterRouting).toEqual({
+        order: ["anthropic", "together"],
+      });
+    });
+
+    test("multiple model overrides on same provider", () => {
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              compat: { openRouterRouting: { only: ["amazon-bedrock"] } },
+            },
+            "anthropic/claude-opus-4": {
+              compat: { openRouterRouting: { only: ["anthropic"] } },
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+
+      const sonnet = models.find((m) => m.id === "anthropic/claude-sonnet-4");
+      const opus = models.find((m) => m.id === "anthropic/claude-opus-4");
+
+      const sonnetCompat = sonnet?.compat as
+        | OpenAICompletionsCompat
+        | undefined;
+      const opusCompat = opus?.compat as OpenAICompletionsCompat | undefined;
+      expect(sonnetCompat?.openRouterRouting).toEqual({
+        only: ["amazon-bedrock"],
+      });
+      expect(opusCompat?.openRouterRouting).toEqual({ only: ["anthropic"] });
+    });
+
+    test("model override combined with baseUrl override", () => {
+      writeRawModelsJson({
+        openrouter: {
+          baseUrl: "https://my-proxy.example.com/v1",
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              name: "Proxied Sonnet",
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+      const sonnet = models.find((m) => m.id === "anthropic/claude-sonnet-4");
+
+      // Both overrides should apply
+      expect(sonnet?.baseUrl).toBe("https://my-proxy.example.com/v1");
+      expect(sonnet?.name).toBe("Proxied Sonnet");
+
+      // Other models should have the baseUrl but not the name override
+      const opus = models.find((m) => m.id === "anthropic/claude-opus-4");
+      expect(opus?.baseUrl).toBe("https://my-proxy.example.com/v1");
+      expect(opus?.name).not.toBe("Proxied Sonnet");
+    });
+
+    test("model override for non-existent model ID is ignored", () => {
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "nonexistent/model-id": {
+              name: "This should not appear",
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+
+      // Should not create a new model
+      expect(
+        models.find((m) => m.id === "nonexistent/model-id"),
+      ).toBeUndefined();
+      // Should not crash or show error
+      expect(registry.getError()).toBeUndefined();
+    });
+
+    test("model override can change cost fields partially", () => {
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              cost: { input: 99 },
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+      const sonnet = models.find((m) => m.id === "anthropic/claude-sonnet-4");
+
+      // Input cost should be overridden
+      expect(sonnet?.cost.input).toBe(99);
+      // Other cost fields should be preserved from built-in
+      expect(sonnet?.cost.output).toBeGreaterThan(0);
+    });
+
+    test("model override can add headers", () => {
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              headers: { "X-Custom-Model-Header": "value" },
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const models = getModelsForProvider(registry, "openrouter");
+      const sonnet = models.find((m) => m.id === "anthropic/claude-sonnet-4");
+
+      expect(sonnet?.headers?.["X-Custom-Model-Header"]).toBe("value");
+    });
+
+    test("refresh() picks up model override changes", () => {
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              name: "First Name",
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      expect(
+        getModelsForProvider(registry, "openrouter").find(
+          (m) => m.id === "anthropic/claude-sonnet-4",
+        )?.name,
+      ).toBe("First Name");
+
+      // Update and refresh
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              name: "Second Name",
+            },
+          },
+        },
+      });
+      registry.refresh();
+
+      expect(
+        getModelsForProvider(registry, "openrouter").find(
+          (m) => m.id === "anthropic/claude-sonnet-4",
+        )?.name,
+      ).toBe("Second Name");
+    });
+
+    test("removing model override restores built-in values", () => {
+      writeRawModelsJson({
+        openrouter: {
+          modelOverrides: {
+            "anthropic/claude-sonnet-4": {
+              name: "Custom Name",
+            },
+          },
+        },
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const customName = getModelsForProvider(registry, "openrouter").find(
+        (m) => m.id === "anthropic/claude-sonnet-4",
+      )?.name;
+      expect(customName).toBe("Custom Name");
+
+      // Remove override and refresh
+      writeRawModelsJson({});
+      registry.refresh();
+
+      const restoredName = getModelsForProvider(registry, "openrouter").find(
+        (m) => m.id === "anthropic/claude-sonnet-4",
+      )?.name;
+      expect(restoredName).not.toBe("Custom Name");
+    });
+  });
+
+  describe("dynamic provider lifecycle", () => {
+    test("unregisterProvider removes custom OAuth provider and restores built-in OAuth provider", () => {
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+
+      registry.registerProvider("anthropic", {
+        oauth: {
+          name: "Custom Anthropic OAuth",
+          login: async () => ({
+            access: "custom-access-token",
+            refresh: "custom-refresh-token",
+            expires: Date.now() + 60_000,
+          }),
+          refreshToken: async (credentials) => credentials,
+          getApiKey: (credentials) => credentials.access,
+        },
+      });
+
+      expect(getOAuthProvider("anthropic")?.name).toBe(
+        "Custom Anthropic OAuth",
+      );
+
+      registry.unregisterProvider("anthropic");
+
+      expect(getOAuthProvider("anthropic")?.name).not.toBe(
+        "Custom Anthropic OAuth",
+      );
+    });
+
+    test("unregisterProvider removes custom streamSimple override and restores built-in API stream handler", () => {
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+
+      registry.registerProvider("stream-override-provider", {
+        api: "openai-completions",
+        streamSimple: () => {
+          throw new Error("custom streamSimple override");
+        },
+      });
+
+      let threwCustomOverride = false;
+      try {
+        getApiProvider("openai-completions")?.streamSimple(
+          openAiModel,
+          emptyContext,
+        );
+      } catch (error) {
+        threwCustomOverride =
+          error instanceof Error &&
+          error.message === "custom streamSimple override";
+      }
+      expect(threwCustomOverride).toBe(true);
+
+      registry.unregisterProvider("stream-override-provider");
+
+      let threwCustomOverrideAfterUnregister = false;
+      try {
+        getApiProvider("openai-completions")?.streamSimple(
+          openAiModel,
+          emptyContext,
+        );
+      } catch (error) {
+        threwCustomOverrideAfterUnregister =
+          error instanceof Error &&
+          error.message === "custom streamSimple override";
+      }
+      expect(threwCustomOverrideAfterUnregister).toBe(false);
+    });
+  });
+
+  describe("API key resolution", () => {
+    /** Create provider config with custom apiKey */
+    function providerWithApiKey(apiKey: string) {
+      return {
+        baseUrl: "https://example.com/v1",
+        apiKey,
+        api: "anthropic-messages",
+        models: [
+          {
+            id: "test-model",
+            name: "Test Model",
+            reasoning: false,
+            input: ["text"],
+            cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+            contextWindow: 100000,
+            maxTokens: 8000,
+          },
+        ],
+      };
+    }
+
+    test("apiKey with ! prefix executes command and uses stdout", async () => {
+      writeRawModelsJson({
+        "custom-provider": providerWithApiKey(
+          "!echo test-api-key-from-command",
+        ),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const apiKey = await registry.getApiKeyForProvider("custom-provider");
+
+      expect(apiKey).toBe("test-api-key-from-command");
+    });
+
+    test("apiKey with ! prefix trims whitespace from command output", async () => {
+      writeRawModelsJson({
+        "custom-provider": providerWithApiKey("!echo '  spaced-key  '"),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const apiKey = await registry.getApiKeyForProvider("custom-provider");
+
+      expect(apiKey).toBe("spaced-key");
+    });
+
+    test("apiKey with ! prefix handles multiline output (uses trimmed result)", async () => {
+      writeRawModelsJson({
+        "custom-provider": providerWithApiKey("!printf 'line1\\nline2'"),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const apiKey = await registry.getApiKeyForProvider("custom-provider");
+
+      expect(apiKey).toBe("line1\nline2");
+    });
+
+    test("apiKey with ! prefix returns undefined on command failure", async () => {
+      writeRawModelsJson({
+        "custom-provider": providerWithApiKey("!exit 1"),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const apiKey = await registry.getApiKeyForProvider("custom-provider");
+
+      expect(apiKey).toBeUndefined();
+    });
+
+    test("apiKey with ! prefix returns undefined on nonexistent command", async () => {
+      writeRawModelsJson({
+        "custom-provider": providerWithApiKey("!nonexistent-command-12345"),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const apiKey = await registry.getApiKeyForProvider("custom-provider");
+
+      expect(apiKey).toBeUndefined();
+    });
+
+    test("apiKey with ! prefix returns undefined on empty output", async () => {
+      writeRawModelsJson({
+        "custom-provider": providerWithApiKey("!printf ''"),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const apiKey = await registry.getApiKeyForProvider("custom-provider");
+
+      expect(apiKey).toBeUndefined();
+    });
+
+    test("apiKey as environment variable name resolves to env value", async () => {
+      const originalEnv = process.env.TEST_API_KEY_12345;
+      process.env.TEST_API_KEY_12345 = "env-api-key-value";
+
+      try {
+        writeRawModelsJson({
+          "custom-provider": providerWithApiKey("TEST_API_KEY_12345"),
+        });
+
+        const registry = new ModelRegistry(authStorage, modelsJsonPath);
+        const apiKey = await registry.getApiKeyForProvider("custom-provider");
+
+        expect(apiKey).toBe("env-api-key-value");
+      } finally {
+        if (originalEnv === undefined) {
+          delete process.env.TEST_API_KEY_12345;
+        } else {
+          process.env.TEST_API_KEY_12345 = originalEnv;
+        }
+      }
+    });
+
+    test("apiKey as literal value is used directly when not an env var", async () => {
+      // Make sure this isn't an env var
+      delete process.env.literal_api_key_value;
+
+      writeRawModelsJson({
+        "custom-provider": providerWithApiKey("literal_api_key_value"),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const apiKey = await registry.getApiKeyForProvider("custom-provider");
+
+      expect(apiKey).toBe("literal_api_key_value");
+    });
+
+    test("apiKey command can use shell features like pipes", async () => {
+      writeRawModelsJson({
+        "custom-provider": providerWithApiKey(
+          "!echo 'hello world' | tr ' ' '-'",
+        ),
+      });
+
+      const registry = new ModelRegistry(authStorage, modelsJsonPath);
+      const apiKey = await registry.getApiKeyForProvider("custom-provider");
+
+      expect(apiKey).toBe("hello-world");
+    });
+
+    describe("caching", () => {
+      test("command is only executed once per process", async () => {
+        // Use a command that writes to a file to count invocations
+        const counterFile = join(tempDir, "counter");
+        writeFileSync(counterFile, "0");
+
+        const command = `!sh -c 'count=$(cat ${counterFile}); echo $((count + 1)) > ${counterFile}; echo "key-value"'`;
+        writeRawModelsJson({
+          "custom-provider": providerWithApiKey(command),
+        });
+
+        const registry = new ModelRegistry(authStorage, modelsJsonPath);
+
+        // Call multiple times
+        await registry.getApiKeyForProvider("custom-provider");
+        await registry.getApiKeyForProvider("custom-provider");
+        await registry.getApiKeyForProvider("custom-provider");
+
+        // Command should have only run once
+        const count = parseInt(readFileSync(counterFile, "utf-8").trim(), 10);
+        expect(count).toBe(1);
+      });
+
+      test("cache persists across registry instances", async () => {
+        const counterFile = join(tempDir, "counter");
+        writeFileSync(counterFile, "0");
+
+        const command = `!sh -c 'count=$(cat ${counterFile}); echo $((count + 1)) > ${counterFile}; echo "key-value"'`;
+        writeRawModelsJson({
+          "custom-provider": providerWithApiKey(command),
+        });
+
+        // Create multiple registry instances
+        const registry1 = new ModelRegistry(authStorage, modelsJsonPath);
+        await registry1.getApiKeyForProvider("custom-provider");
+
+        const registry2 = new ModelRegistry(authStorage, modelsJsonPath);
+        await registry2.getApiKeyForProvider("custom-provider");
+
+        // Command should still have only run once
+        const count = parseInt(readFileSync(counterFile, "utf-8").trim(), 10);
+        expect(count).toBe(1);
+      });
+
+      test("clearApiKeyCache allows command to run again", async () => {
+        const counterFile = join(tempDir, "counter");
+        writeFileSync(counterFile, "0");
+
+        const command = `!sh -c 'count=$(cat ${counterFile}); echo $((count + 1)) > ${counterFile}; echo "key-value"'`;
+        writeRawModelsJson({
+          "custom-provider": providerWithApiKey(command),
+        });
+
+        const registry = new ModelRegistry(authStorage, modelsJsonPath);
+        await registry.getApiKeyForProvider("custom-provider");
+
+        // Clear cache and call again
+        clearApiKeyCache();
+        await registry.getApiKeyForProvider("custom-provider");
+
+        // Command should have run twice
+        const count = parseInt(readFileSync(counterFile, "utf-8").trim(), 10);
+        expect(count).toBe(2);
+      });
+
+      test("different commands are cached separately", async () => {
+        writeRawModelsJson({
+          "provider-a": providerWithApiKey("!echo key-a"),
+          "provider-b": providerWithApiKey("!echo key-b"),
+        });
+
+        const registry = new ModelRegistry(authStorage, modelsJsonPath);
+
+        const keyA = await registry.getApiKeyForProvider("provider-a");
+        const keyB = await registry.getApiKeyForProvider("provider-b");
+
+        expect(keyA).toBe("key-a");
+        expect(keyB).toBe("key-b");
+      });
+
+      test("failed commands are cached (not retried)", async () => {
+        const counterFile = join(tempDir, "counter");
+        writeFileSync(counterFile, "0");
+
+        const command = `!sh -c 'count=$(cat ${counterFile}); echo $((count + 1)) > ${counterFile}; exit 1'`;
+        writeRawModelsJson({
+          "custom-provider": providerWithApiKey(command),
+        });
+
+        const registry = new ModelRegistry(authStorage, modelsJsonPath);
+
+        // Call multiple times - all should return undefined
+        const key1 = await registry.getApiKeyForProvider("custom-provider");
+        const key2 = await registry.getApiKeyForProvider("custom-provider");
+
+        expect(key1).toBeUndefined();
+        expect(key2).toBeUndefined();
+
+        // Command should have only run once despite failures
+        const count = parseInt(readFileSync(counterFile, "utf-8").trim(), 10);
+        expect(count).toBe(1);
+      });
+
+      test("environment variables are not cached (changes are picked up)", async () => {
+        const envVarName = "TEST_API_KEY_CACHE_TEST_98765";
+        const originalEnv = process.env[envVarName];
+
+        try {
+          process.env[envVarName] = "first-value";
+
+          writeRawModelsJson({
+            "custom-provider": providerWithApiKey(envVarName),
+          });
+
+          const registry = new ModelRegistry(authStorage, modelsJsonPath);
+
+          const key1 = await registry.getApiKeyForProvider("custom-provider");
+          expect(key1).toBe("first-value");
+
+          // Change env var
+          process.env[envVarName] = "second-value";
+
+          const key2 = await registry.getApiKeyForProvider("custom-provider");
+          expect(key2).toBe("second-value");
+        } finally {
+          if (originalEnv === undefined) {
+            delete process.env[envVarName];
+          } else {
+            process.env[envVarName] = originalEnv;
+          }
+        }
+      });
+    });
+  });
+});
--- a/packages/coding-agent/test/model-resolver.test.ts
+++ b/packages/coding-agent/test/model-resolver.test.ts
@ -0,0 +1,453 @@
+import type { Model } from "@mariozechner/pi-ai";
+import { describe, expect, test } from "vitest";
+import {
+  defaultModelPerProvider,
+  findInitialModel,
+  parseModelPattern,
+  resolveCliModel,
+} from "../src/core/model-resolver.js";
+
+// Mock models for testing
+const mockModels: Model<"anthropic-messages">[] = [
+  {
+    id: "claude-sonnet-4-5",
+    name: "Claude Sonnet 4.5",
+    api: "anthropic-messages",
+    provider: "anthropic",
+    baseUrl: "https://api.anthropic.com",
+    reasoning: true,
+    input: ["text", "image"],
+    cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
+    contextWindow: 200000,
+    maxTokens: 8192,
+  },
+  {
+    id: "gpt-4o",
+    name: "GPT-4o",
+    api: "anthropic-messages", // Using same type for simplicity
+    provider: "openai",
+    baseUrl: "https://api.openai.com",
+    reasoning: false,
+    input: ["text", "image"],
+    cost: { input: 5, output: 15, cacheRead: 0.5, cacheWrite: 5 },
+    contextWindow: 128000,
+    maxTokens: 4096,
+  },
+];
+
+// Mock OpenRouter models with colons in IDs
+const mockOpenRouterModels: Model<"anthropic-messages">[] = [
+  {
+    id: "qwen/qwen3-coder:exacto",
+    name: "Qwen3 Coder Exacto",
+    api: "anthropic-messages",
+    provider: "openrouter",
+    baseUrl: "https://openrouter.ai/api/v1",
+    reasoning: true,
+    input: ["text"],
+    cost: { input: 1, output: 2, cacheRead: 0.1, cacheWrite: 1 },
+    contextWindow: 128000,
+    maxTokens: 8192,
+  },
+  {
+    id: "openai/gpt-4o:extended",
+    name: "GPT-4o Extended",
+    api: "anthropic-messages",
+    provider: "openrouter",
+    baseUrl: "https://openrouter.ai/api/v1",
+    reasoning: false,
+    input: ["text", "image"],
+    cost: { input: 5, output: 15, cacheRead: 0.5, cacheWrite: 5 },
+    contextWindow: 128000,
+    maxTokens: 4096,
+  },
+];
+
+const allModels = [...mockModels, ...mockOpenRouterModels];
+
+describe("parseModelPattern", () => {
+  describe("simple patterns without colons", () => {
+    test("exact match returns model with undefined thinking level", () => {
+      const result = parseModelPattern("claude-sonnet-4-5", allModels);
+      expect(result.model?.id).toBe("claude-sonnet-4-5");
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toBeUndefined();
+    });
+
+    test("partial match returns best model with undefined thinking level", () => {
+      const result = parseModelPattern("sonnet", allModels);
+      expect(result.model?.id).toBe("claude-sonnet-4-5");
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toBeUndefined();
+    });
+
+    test("no match returns undefined model and thinking level", () => {
+      const result = parseModelPattern("nonexistent", allModels);
+      expect(result.model).toBeUndefined();
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toBeUndefined();
+    });
+  });
+
+  describe("patterns with valid thinking levels", () => {
+    test("sonnet:high returns sonnet with high thinking level", () => {
+      const result = parseModelPattern("sonnet:high", allModels);
+      expect(result.model?.id).toBe("claude-sonnet-4-5");
+      expect(result.thinkingLevel).toBe("high");
+      expect(result.warning).toBeUndefined();
+    });
+
+    test("gpt-4o:medium returns gpt-4o with medium thinking level", () => {
+      const result = parseModelPattern("gpt-4o:medium", allModels);
+      expect(result.model?.id).toBe("gpt-4o");
+      expect(result.thinkingLevel).toBe("medium");
+      expect(result.warning).toBeUndefined();
+    });
+
+    test("all valid thinking levels work", () => {
+      for (const level of [
+        "off",
+        "minimal",
+        "low",
+        "medium",
+        "high",
+        "xhigh",
+      ]) {
+        const result = parseModelPattern(`sonnet:${level}`, allModels);
+        expect(result.model?.id).toBe("claude-sonnet-4-5");
+        expect(result.thinkingLevel).toBe(level);
+        expect(result.warning).toBeUndefined();
+      }
+    });
+  });
+
+  describe("patterns with invalid thinking levels", () => {
+    test("sonnet:random returns sonnet with undefined thinking level and warning", () => {
+      const result = parseModelPattern("sonnet:random", allModels);
+      expect(result.model?.id).toBe("claude-sonnet-4-5");
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toContain("Invalid thinking level");
+      expect(result.warning).toContain("random");
+    });
+
+    test("gpt-4o:invalid returns gpt-4o with undefined thinking level and warning", () => {
+      const result = parseModelPattern("gpt-4o:invalid", allModels);
+      expect(result.model?.id).toBe("gpt-4o");
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toContain("Invalid thinking level");
+    });
+  });
+
+  describe("OpenRouter models with colons in IDs", () => {
+    test("qwen3-coder:exacto matches the model with undefined thinking level", () => {
+      const result = parseModelPattern("qwen/qwen3-coder:exacto", allModels);
+      expect(result.model?.id).toBe("qwen/qwen3-coder:exacto");
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toBeUndefined();
+    });
+
+    test("openrouter/qwen/qwen3-coder:exacto matches with provider prefix", () => {
+      const result = parseModelPattern(
+        "openrouter/qwen/qwen3-coder:exacto",
+        allModels,
+      );
+      expect(result.model?.id).toBe("qwen/qwen3-coder:exacto");
+      expect(result.model?.provider).toBe("openrouter");
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toBeUndefined();
+    });
+
+    test("qwen3-coder:exacto:high matches model with high thinking level", () => {
+      const result = parseModelPattern(
+        "qwen/qwen3-coder:exacto:high",
+        allModels,
+      );
+      expect(result.model?.id).toBe("qwen/qwen3-coder:exacto");
+      expect(result.thinkingLevel).toBe("high");
+      expect(result.warning).toBeUndefined();
+    });
+
+    test("openrouter/qwen/qwen3-coder:exacto:high matches with provider and thinking level", () => {
+      const result = parseModelPattern(
+        "openrouter/qwen/qwen3-coder:exacto:high",
+        allModels,
+      );
+      expect(result.model?.id).toBe("qwen/qwen3-coder:exacto");
+      expect(result.model?.provider).toBe("openrouter");
+      expect(result.thinkingLevel).toBe("high");
+      expect(result.warning).toBeUndefined();
+    });
+
+    test("gpt-4o:extended matches the extended model with undefined thinking level", () => {
+      const result = parseModelPattern("openai/gpt-4o:extended", allModels);
+      expect(result.model?.id).toBe("openai/gpt-4o:extended");
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toBeUndefined();
+    });
+  });
+
+  describe("invalid thinking levels with OpenRouter models", () => {
+    test("qwen3-coder:exacto:random returns model with undefined thinking level and warning", () => {
+      const result = parseModelPattern(
+        "qwen/qwen3-coder:exacto:random",
+        allModels,
+      );
+      expect(result.model?.id).toBe("qwen/qwen3-coder:exacto");
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toContain("Invalid thinking level");
+      expect(result.warning).toContain("random");
+    });
+
+    test("qwen3-coder:exacto:high:random returns model with undefined thinking level and warning", () => {
+      const result = parseModelPattern(
+        "qwen/qwen3-coder:exacto:high:random",
+        allModels,
+      );
+      expect(result.model?.id).toBe("qwen/qwen3-coder:exacto");
+      expect(result.thinkingLevel).toBeUndefined();
+      expect(result.warning).toContain("Invalid thinking level");
+      expect(result.warning).toContain("random");
+    });
+  });
+
+  describe("edge cases", () => {
+    test("empty pattern matches via partial matching", () => {
+      // Empty string is included in all model IDs, so partial matching finds a match
+      const result = parseModelPattern("", allModels);
+      expect(result.model).not.toBeNull();
+      expect(result.thinkingLevel).toBeUndefined();
+    });
+
+    test("pattern ending with colon treats empty suffix as invalid", () => {
+      const result = parseModelPattern("sonnet:", allModels);
+      // Empty string after colon is not a valid thinking level
+      // So it tries to match "sonnet:" which won't match, then tries "sonnet"
+      expect(result.model?.id).toBe("claude-sonnet-4-5");
+      expect(result.warning).toContain("Invalid thinking level");
+    });
+  });
+});
+
+describe("resolveCliModel", () => {
+  test("resolves --model provider/id without --provider", () => {
+    const registry = {
+      getAll: () => allModels,
+    } as unknown as Parameters<typeof resolveCliModel>[0]["modelRegistry"];
+
+    const result = resolveCliModel({
+      cliModel: "openai/gpt-4o",
+      modelRegistry: registry,
+    });
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.provider).toBe("openai");
+    expect(result.model?.id).toBe("gpt-4o");
+  });
+
+  test("resolves fuzzy patterns within an explicit provider", () => {
+    const registry = {
+      getAll: () => allModels,
+    } as unknown as Parameters<typeof resolveCliModel>[0]["modelRegistry"];
+
+    const result = resolveCliModel({
+      cliProvider: "openai",
+      cliModel: "4o",
+      modelRegistry: registry,
+    });
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.provider).toBe("openai");
+    expect(result.model?.id).toBe("gpt-4o");
+  });
+
+  test("supports --model <pattern>:<thinking> (without explicit --thinking)", () => {
+    const registry = {
+      getAll: () => allModels,
+    } as unknown as Parameters<typeof resolveCliModel>[0]["modelRegistry"];
+
+    const result = resolveCliModel({
+      cliModel: "sonnet:high",
+      modelRegistry: registry,
+    });
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.id).toBe("claude-sonnet-4-5");
+    expect(result.thinkingLevel).toBe("high");
+  });
+
+  test("prefers exact model id match over provider inference (OpenRouter-style ids)", () => {
+    const registry = {
+      getAll: () => allModels,
+    } as unknown as Parameters<typeof resolveCliModel>[0]["modelRegistry"];
+
+    const result = resolveCliModel({
+      cliModel: "openai/gpt-4o:extended",
+      modelRegistry: registry,
+    });
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.provider).toBe("openrouter");
+    expect(result.model?.id).toBe("openai/gpt-4o:extended");
+  });
+
+  test("does not strip invalid :suffix as thinking level in --model (treat as raw id)", () => {
+    const registry = {
+      getAll: () => allModels,
+    } as unknown as Parameters<typeof resolveCliModel>[0]["modelRegistry"];
+
+    const result = resolveCliModel({
+      cliProvider: "openai",
+      cliModel: "gpt-4o:extended",
+      modelRegistry: registry,
+    });
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.provider).toBe("openai");
+    expect(result.model?.id).toBe("gpt-4o:extended");
+  });
+
+  test("allows custom model ids for explicit providers without double prefixing", () => {
+    const registry = {
+      getAll: () => allModels,
+    } as unknown as Parameters<typeof resolveCliModel>[0]["modelRegistry"];
+
+    const result = resolveCliModel({
+      cliProvider: "openrouter",
+      cliModel: "openrouter/openai/ghost-model",
+      modelRegistry: registry,
+    });
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.provider).toBe("openrouter");
+    expect(result.model?.id).toBe("openai/ghost-model");
+  });
+
+  test("returns a clear error when there are no models", () => {
+    const registry = {
+      getAll: () => [],
+    } as unknown as Parameters<typeof resolveCliModel>[0]["modelRegistry"];
+
+    const result = resolveCliModel({
+      cliProvider: "openai",
+      cliModel: "gpt-4o",
+      modelRegistry: registry,
+    });
+
+    expect(result.model).toBeUndefined();
+    expect(result.error).toContain("No models available");
+  });
+
+  test("prefers provider/model split over gateway model with matching id", () => {
+    // When a user writes "zai/glm-5", and both a zai provider model (id: "glm-5")
+    // and a gateway model (id: "zai/glm-5") exist, prefer the zai provider model.
+    const zaiModel: Model<"anthropic-messages"> = {
+      id: "glm-5",
+      name: "GLM-5",
+      api: "anthropic-messages",
+      provider: "zai",
+      baseUrl: "https://open.bigmodel.cn/api/paas/v4",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 1, output: 2, cacheRead: 0.1, cacheWrite: 1 },
+      contextWindow: 128000,
+      maxTokens: 8192,
+    };
+    const gatewayModel: Model<"anthropic-messages"> = {
+      id: "zai/glm-5",
+      name: "GLM-5",
+      api: "anthropic-messages",
+      provider: "vercel-ai-gateway",
+      baseUrl: "https://ai-gateway.vercel.sh",
+      reasoning: true,
+      input: ["text"],
+      cost: { input: 1, output: 2, cacheRead: 0.1, cacheWrite: 1 },
+      contextWindow: 128000,
+      maxTokens: 8192,
+    };
+    const registry = {
+      getAll: () => [...allModels, zaiModel, gatewayModel],
+    } as unknown as Parameters<typeof resolveCliModel>[0]["modelRegistry"];
+
+    const result = resolveCliModel({
+      cliModel: "zai/glm-5",
+      modelRegistry: registry,
+    });
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.provider).toBe("zai");
+    expect(result.model?.id).toBe("glm-5");
+  });
+
+  test("resolves provider-prefixed fuzzy patterns (openrouter/qwen -> openrouter model)", () => {
+    const registry = {
+      getAll: () => allModels,
+    } as unknown as Parameters<typeof resolveCliModel>[0]["modelRegistry"];
+
+    const result = resolveCliModel({
+      cliModel: "openrouter/qwen",
+      modelRegistry: registry,
+    });
+
+    expect(result.error).toBeUndefined();
+    expect(result.model?.provider).toBe("openrouter");
+    expect(result.model?.id).toBe("qwen/qwen3-coder:exacto");
+  });
+});
+
+describe("default model selection", () => {
+  test("openai defaults are gpt-5.4", () => {
+    expect(defaultModelPerProvider.openai).toBe("gpt-5.4");
+    expect(defaultModelPerProvider["openai-codex"]).toBe("gpt-5.4");
+  });
+
+  test("ai-gateway default is opus 4.6", () => {
+    expect(defaultModelPerProvider["vercel-ai-gateway"]).toBe(
+      "anthropic/claude-opus-4-6",
+    );
+  });
+
+  test("findInitialModel accepts explicit provider custom model ids", async () => {
+    const registry = {
+      getAll: () => allModels,
+    } as unknown as Parameters<typeof findInitialModel>[0]["modelRegistry"];
+
+    const result = await findInitialModel({
+      cliProvider: "openrouter",
+      cliModel: "openrouter/openai/ghost-model",
+      scopedModels: [],
+      isContinuing: false,
+      modelRegistry: registry,
+    });
+
+    expect(result.model?.provider).toBe("openrouter");
+    expect(result.model?.id).toBe("openai/ghost-model");
+  });
+
+  test("findInitialModel selects ai-gateway default when available", async () => {
+    const aiGatewayModel: Model<"anthropic-messages"> = {
+      id: "anthropic/claude-opus-4-6",
+      name: "Claude Opus 4.6",
+      api: "anthropic-messages",
+      provider: "vercel-ai-gateway",
+      baseUrl: "https://ai-gateway.vercel.sh",
+      reasoning: true,
+      input: ["text", "image"],
+      cost: { input: 5, output: 15, cacheRead: 0.5, cacheWrite: 5 },
+      contextWindow: 200000,
+      maxTokens: 8192,
+    };
+
+    const registry = {
+      getAvailable: async () => [aiGatewayModel],
+    } as unknown as Parameters<typeof findInitialModel>[0]["modelRegistry"];
+
+    const result = await findInitialModel({
+      scopedModels: [],
+      isContinuing: false,
+      modelRegistry: registry,
+    });
+
+    expect(result.model?.provider).toBe("vercel-ai-gateway");
+    expect(result.model?.id).toBe("anthropic/claude-opus-4-6");
+  });
+});
--- a/packages/coding-agent/test/package-command-paths.test.ts
+++ b/packages/coding-agent/test/package-command-paths.test.ts
@ -0,0 +1,137 @@
+import { mkdirSync, readFileSync, realpathSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { ENV_AGENT_DIR } from "../src/config.js";
+import { main } from "../src/main.js";
+
+describe("package commands", () => {
+  let tempDir: string;
+  let agentDir: string;
+  let projectDir: string;
+  let packageDir: string;
+  let originalCwd: string;
+  let originalAgentDir: string | undefined;
+  let originalExitCode: typeof process.exitCode;
+
+  beforeEach(() => {
+    tempDir = join(
+      tmpdir(),
+      `pi-package-commands-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    agentDir = join(tempDir, "agent");
+    projectDir = join(tempDir, "project");
+    packageDir = join(tempDir, "local-package");
+    mkdirSync(agentDir, { recursive: true });
+    mkdirSync(projectDir, { recursive: true });
+    mkdirSync(packageDir, { recursive: true });
+
+    originalCwd = process.cwd();
+    originalAgentDir = process.env[ENV_AGENT_DIR];
+    originalExitCode = process.exitCode;
+    process.exitCode = undefined;
+    process.env[ENV_AGENT_DIR] = agentDir;
+    process.chdir(projectDir);
+  });
+
+  afterEach(() => {
+    process.chdir(originalCwd);
+    process.exitCode = originalExitCode;
+    if (originalAgentDir === undefined) {
+      delete process.env[ENV_AGENT_DIR];
+    } else {
+      process.env[ENV_AGENT_DIR] = originalAgentDir;
+    }
+    rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  it("should persist global relative local package paths relative to settings.json", async () => {
+    const relativePkgDir = join(projectDir, "packages", "local-package");
+    mkdirSync(relativePkgDir, { recursive: true });
+
+    await main(["install", "./packages/local-package"]);
+
+    const settingsPath = join(agentDir, "settings.json");
+    const settings = JSON.parse(readFileSync(settingsPath, "utf-8")) as {
+      packages?: string[];
+    };
+    expect(settings.packages?.length).toBe(1);
+    const stored = settings.packages?.[0] ?? "";
+    const resolvedFromSettings = realpathSync(join(agentDir, stored));
+    expect(resolvedFromSettings).toBe(realpathSync(relativePkgDir));
+  });
+
+  it("should remove local packages using a path with a trailing slash", async () => {
+    await main(["install", `${packageDir}/`]);
+
+    const settingsPath = join(agentDir, "settings.json");
+    const installedSettings = JSON.parse(
+      readFileSync(settingsPath, "utf-8"),
+    ) as { packages?: string[] };
+    expect(installedSettings.packages?.length).toBe(1);
+
+    await main(["remove", `${packageDir}/`]);
+
+    const removedSettings = JSON.parse(readFileSync(settingsPath, "utf-8")) as {
+      packages?: string[];
+    };
+    expect(removedSettings.packages ?? []).toHaveLength(0);
+  });
+
+  it("shows install subcommand help", async () => {
+    const logSpy = vi.spyOn(console, "log").mockImplementation(() => {});
+    const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+
+    try {
+      await expect(main(["install", "--help"])).resolves.toBeUndefined();
+
+      const stdout = logSpy.mock.calls
+        .map(([message]) => String(message))
+        .join("\n");
+      expect(stdout).toContain("Usage:");
+      expect(stdout).toContain("pi install <source> [-l]");
+      expect(errorSpy).not.toHaveBeenCalled();
+      expect(process.exitCode).toBeUndefined();
+    } finally {
+      logSpy.mockRestore();
+      errorSpy.mockRestore();
+    }
+  });
+
+  it("shows a friendly error for unknown install options", async () => {
+    const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+
+    try {
+      await expect(main(["install", "--unknown"])).resolves.toBeUndefined();
+
+      const stderr = errorSpy.mock.calls
+        .map(([message]) => String(message))
+        .join("\n");
+      expect(stderr).toContain('Unknown option --unknown for "install".');
+      expect(stderr).toContain(
+        'Use "pi --help" or "pi install <source> [-l]".',
+      );
+      expect(process.exitCode).toBe(1);
+    } finally {
+      errorSpy.mockRestore();
+    }
+  });
+
+  it("shows a friendly error for missing install source", async () => {
+    const errorSpy = vi.spyOn(console, "error").mockImplementation(() => {});
+
+    try {
+      await expect(main(["install"])).resolves.toBeUndefined();
+
+      const stderr = errorSpy.mock.calls
+        .map(([message]) => String(message))
+        .join("\n");
+      expect(stderr).toContain("Missing install source.");
+      expect(stderr).toContain("Usage: pi install <source> [-l]");
+      expect(stderr).not.toContain("at ");
+      expect(process.exitCode).toBe(1);
+    } finally {
+      errorSpy.mockRestore();
+    }
+  });
+});
--- a/packages/coding-agent/test/package-manager-ssh.test.ts
+++ b/packages/coding-agent/test/package-manager-ssh.test.ts
@ -0,0 +1,120 @@
+import { mkdirSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { DefaultPackageManager } from "../src/core/package-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+
+describe("Package Manager git source parsing", () => {
+  let tempDir: string;
+  let agentDir: string;
+  let settingsManager: SettingsManager;
+  let packageManager: DefaultPackageManager;
+
+  beforeEach(() => {
+    tempDir = join(
+      tmpdir(),
+      `pm-ssh-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    mkdirSync(tempDir, { recursive: true });
+    agentDir = join(tempDir, "agent");
+    mkdirSync(agentDir, { recursive: true });
+
+    settingsManager = SettingsManager.inMemory();
+    packageManager = new DefaultPackageManager({
+      cwd: tempDir,
+      agentDir,
+      settingsManager,
+    });
+  });
+
+  afterEach(() => {
+    rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  describe("protocol URLs without git: prefix", () => {
+    it("should parse https:// URL", () => {
+      const parsed = (packageManager as any).parseSource(
+        "https://github.com/user/repo",
+      );
+      expect(parsed.type).toBe("git");
+      expect(parsed.host).toBe("github.com");
+      expect(parsed.path).toBe("user/repo");
+    });
+
+    it("should parse ssh:// URL", () => {
+      const parsed = (packageManager as any).parseSource(
+        "ssh://git@github.com/user/repo",
+      );
+      expect(parsed.type).toBe("git");
+      expect(parsed.host).toBe("github.com");
+      expect(parsed.path).toBe("user/repo");
+      expect(parsed.repo).toBe("ssh://git@github.com/user/repo");
+    });
+  });
+
+  describe("shorthand URLs with git: prefix", () => {
+    it("should parse git@host:path format", () => {
+      const parsed = (packageManager as any).parseSource(
+        "git:git@github.com:user/repo",
+      );
+      expect(parsed.type).toBe("git");
+      expect(parsed.host).toBe("github.com");
+      expect(parsed.path).toBe("user/repo");
+      expect(parsed.repo).toBe("git@github.com:user/repo");
+      expect(parsed.pinned).toBe(false);
+    });
+
+    it("should parse host/path shorthand", () => {
+      const parsed = (packageManager as any).parseSource(
+        "git:github.com/user/repo",
+      );
+      expect(parsed.type).toBe("git");
+      expect(parsed.host).toBe("github.com");
+      expect(parsed.path).toBe("user/repo");
+    });
+
+    it("should parse shorthand with ref", () => {
+      const parsed = (packageManager as any).parseSource(
+        "git:git@github.com:user/repo@v1.0.0",
+      );
+      expect(parsed.type).toBe("git");
+      expect(parsed.ref).toBe("v1.0.0");
+      expect(parsed.pinned).toBe(true);
+    });
+  });
+
+  describe("unsupported without git: prefix", () => {
+    it("should treat git@host:path as local without git: prefix", () => {
+      const parsed = (packageManager as any).parseSource(
+        "git@github.com:user/repo",
+      );
+      expect(parsed.type).toBe("local");
+    });
+
+    it("should treat host/path shorthand as local without git: prefix", () => {
+      const parsed = (packageManager as any).parseSource(
+        "github.com/user/repo",
+      );
+      expect(parsed.type).toBe("local");
+    });
+  });
+
+  describe("identity normalization", () => {
+    it("should normalize protocol and shorthand-prefixed URLs to same identity", () => {
+      const prefixed = (packageManager as any).getPackageIdentity(
+        "git:git@github.com:user/repo",
+      );
+      const https = (packageManager as any).getPackageIdentity(
+        "https://github.com/user/repo",
+      );
+      const ssh = (packageManager as any).getPackageIdentity(
+        "ssh://git@github.com/user/repo",
+      );
+
+      expect(prefixed).toBe("git:github.com/user/repo");
+      expect(prefixed).toBe(https);
+      expect(prefixed).toBe(ssh);
+    });
+  });
+});
--- a/packages/coding-agent/test/package-manager.test.ts
+++ b/packages/coding-agent/test/package-manager.test.ts
--- a/packages/coding-agent/test/path-utils.test.ts
+++ b/packages/coding-agent/test/path-utils.test.ts
@ -0,0 +1,157 @@
+import {
+  mkdtempSync,
+  readdirSync,
+  rmdirSync,
+  unlinkSync,
+  writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import {
+  expandPath,
+  resolveReadPath,
+  resolveToCwd,
+} from "../src/core/tools/path-utils.js";
+
+describe("path-utils", () => {
+  describe("expandPath", () => {
+    it("should expand ~ to home directory", () => {
+      const result = expandPath("~");
+      expect(result).not.toContain("~");
+    });
+
+    it("should expand ~/path to home directory", () => {
+      const result = expandPath("~/Documents/file.txt");
+      expect(result).not.toContain("~/");
+    });
+
+    it("should normalize Unicode spaces", () => {
+      // Non-breaking space (U+00A0) should become regular space
+      const withNBSP = "file\u00A0name.txt";
+      const result = expandPath(withNBSP);
+      expect(result).toBe("file name.txt");
+    });
+  });
+
+  describe("resolveToCwd", () => {
+    it("should resolve absolute paths as-is", () => {
+      const result = resolveToCwd("/absolute/path/file.txt", "/some/cwd");
+      expect(result).toBe("/absolute/path/file.txt");
+    });
+
+    it("should resolve relative paths against cwd", () => {
+      const result = resolveToCwd("relative/file.txt", "/some/cwd");
+      expect(result).toBe("/some/cwd/relative/file.txt");
+    });
+  });
+
+  describe("resolveReadPath", () => {
+    let tempDir: string;
+
+    beforeEach(() => {
+      tempDir = mkdtempSync(join(tmpdir(), "path-utils-test-"));
+    });
+
+    afterEach(() => {
+      // Clean up temp files and directory
+      try {
+        const files = readdirSync(tempDir);
+        for (const file of files) {
+          unlinkSync(join(tempDir, file));
+        }
+        rmdirSync(tempDir);
+      } catch {
+        // Ignore cleanup errors
+      }
+    });
+
+    it("should resolve existing file path", () => {
+      const fileName = "test-file.txt";
+      writeFileSync(join(tempDir, fileName), "content");
+
+      const result = resolveReadPath(fileName, tempDir);
+      expect(result).toBe(join(tempDir, fileName));
+    });
+
+    it("should handle NFC vs NFD Unicode normalization (macOS filenames with accents)", () => {
+      // macOS stores filenames in NFD (decomposed) form:
+      //   é = e + combining acute accent (U+0301)
+      // Users typically type in NFC (composed) form:
+      //   é = single character (U+00E9)
+      //
+      // Note: macOS APFS normalizes Unicode automatically, so both paths work.
+      // This test verifies the NFD variant fallback works on systems that don't.
+
+      // NFD: e (U+0065) + combining acute accent (U+0301)
+      const nfdFileName = "file\u0065\u0301.txt";
+      // NFC: é as single character (U+00E9)
+      const nfcFileName = "file\u00e9.txt";
+
+      // Verify they have different byte sequences
+      expect(nfdFileName).not.toBe(nfcFileName);
+      expect(Buffer.from(nfdFileName)).not.toEqual(Buffer.from(nfcFileName));
+
+      // Create file with NFD name
+      writeFileSync(join(tempDir, nfdFileName), "content");
+
+      // User provides NFC path - should find the file (via filesystem normalization or our fallback)
+      const result = resolveReadPath(nfcFileName, tempDir);
+      // Result should contain the accented character (either NFC or NFD form)
+      expect(result).toContain(tempDir);
+      expect(result).toMatch(/file.+\.txt$/);
+    });
+
+    it("should handle curly quotes vs straight quotes (macOS filenames)", () => {
+      // macOS uses curly apostrophe (U+2019) in screenshot filenames:
+      //   Capture d'écran (U+2019)
+      // Users typically type straight apostrophe (U+0027):
+      //   Capture d'ecran (U+0027)
+
+      const curlyQuoteName = "Capture d\u2019cran.txt"; // U+2019 right single quotation mark
+      const straightQuoteName = "Capture d'cran.txt"; // U+0027 apostrophe
+
+      // Verify they are different
+      expect(curlyQuoteName).not.toBe(straightQuoteName);
+
+      // Create file with curly quote name (simulating macOS behavior)
+      writeFileSync(join(tempDir, curlyQuoteName), "content");
+
+      // User provides straight quote path - should find the curly quote file
+      const result = resolveReadPath(straightQuoteName, tempDir);
+      expect(result).toBe(join(tempDir, curlyQuoteName));
+    });
+
+    it("should handle combined NFC + curly quote (French macOS screenshots)", () => {
+      // Full macOS screenshot filename: "Capture d'écran" with NFD é and curly quote
+      // Note: macOS APFS normalizes NFD to NFC, so the actual file on disk uses NFC
+      const nfcCurlyName = "Capture d\u2019\u00e9cran.txt"; // NFC + curly quote (how APFS stores it)
+      const nfcStraightName = "Capture d'\u00e9cran.txt"; // NFC + straight quote (user input)
+
+      // Verify they are different
+      expect(nfcCurlyName).not.toBe(nfcStraightName);
+
+      // Create file with macOS-style name (curly quote)
+      writeFileSync(join(tempDir, nfcCurlyName), "content");
+
+      // User provides straight quote path - should find the curly quote file
+      const result = resolveReadPath(nfcStraightName, tempDir);
+      expect(result).toBe(join(tempDir, nfcCurlyName));
+    });
+
+    it("should handle macOS screenshot AM/PM variant with narrow no-break space", () => {
+      // macOS uses narrow no-break space (U+202F) before AM/PM in screenshot names
+      const macosName = "Screenshot 2024-01-01 at 10.00.00\u202FAM.png"; // U+202F
+      const userName = "Screenshot 2024-01-01 at 10.00.00 AM.png"; // regular space
+
+      // Create file with macOS-style name
+      writeFileSync(join(tempDir, macosName), "content");
+
+      // User provides regular space path
+      const result = resolveReadPath(userName, tempDir);
+
+      // This works because tryMacOSScreenshotPath() handles this case
+      expect(result).toBe(join(tempDir, macosName));
+    });
+  });
+});
--- a/packages/coding-agent/test/prompt-templates.test.ts
+++ b/packages/coding-agent/test/prompt-templates.test.ts
@ -0,0 +1,464 @@
+/**
+ * Tests for prompt template argument parsing and substitution.
+ *
+ * Tests verify:
+ * - Argument parsing with quotes and special characters
+ * - Placeholder substitution ($1, $2, $@, $ARGUMENTS)
+ * - No recursive substitution of patterns in argument values
+ * - Edge cases and integration between parsing and substitution
+ */
+
+import { describe, expect, test } from "vitest";
+import {
+  parseCommandArgs,
+  substituteArgs,
+} from "../src/core/prompt-templates.js";
+
+// ============================================================================
+// substituteArgs
+// ============================================================================
+
+describe("substituteArgs", () => {
+  test("should replace $ARGUMENTS with all args joined", () => {
+    expect(substituteArgs("Test: $ARGUMENTS", ["a", "b", "c"])).toBe(
+      "Test: a b c",
+    );
+  });
+
+  test("should replace $@ with all args joined", () => {
+    expect(substituteArgs("Test: $@", ["a", "b", "c"])).toBe("Test: a b c");
+  });
+
+  test("should replace $@ and $ARGUMENTS identically", () => {
+    const args = ["foo", "bar", "baz"];
+    expect(substituteArgs("Test: $@", args)).toBe(
+      substituteArgs("Test: $ARGUMENTS", args),
+    );
+  });
+
+  // CRITICAL: argument values containing patterns should remain literal
+  test("should NOT recursively substitute patterns in argument values", () => {
+    expect(substituteArgs("$ARGUMENTS", ["$1", "$ARGUMENTS"])).toBe(
+      "$1 $ARGUMENTS",
+    );
+    expect(substituteArgs("$@", ["$100", "$1"])).toBe("$100 $1");
+    expect(substituteArgs("$ARGUMENTS", ["$100", "$1"])).toBe("$100 $1");
+  });
+
+  test("should support mixed $1, $2, and $ARGUMENTS", () => {
+    expect(substituteArgs("$1: $ARGUMENTS", ["prefix", "a", "b"])).toBe(
+      "prefix: prefix a b",
+    );
+  });
+
+  test("should support mixed $1, $2, and $@", () => {
+    expect(substituteArgs("$1: $@", ["prefix", "a", "b"])).toBe(
+      "prefix: prefix a b",
+    );
+  });
+
+  test("should handle empty arguments array with $ARGUMENTS", () => {
+    expect(substituteArgs("Test: $ARGUMENTS", [])).toBe("Test: ");
+  });
+
+  test("should handle empty arguments array with $@", () => {
+    expect(substituteArgs("Test: $@", [])).toBe("Test: ");
+  });
+
+  test("should handle empty arguments array with $1", () => {
+    expect(substituteArgs("Test: $1", [])).toBe("Test: ");
+  });
+
+  test("should handle multiple occurrences of $ARGUMENTS", () => {
+    expect(substituteArgs("$ARGUMENTS and $ARGUMENTS", ["a", "b"])).toBe(
+      "a b and a b",
+    );
+  });
+
+  test("should handle multiple occurrences of $@", () => {
+    expect(substituteArgs("$@ and $@", ["a", "b"])).toBe("a b and a b");
+  });
+
+  test("should handle mixed occurrences of $@ and $ARGUMENTS", () => {
+    expect(substituteArgs("$@ and $ARGUMENTS", ["a", "b"])).toBe("a b and a b");
+  });
+
+  test("should handle special characters in arguments", () => {
+    // Note: $100 in argument doesn't get partially matched - full strings are substituted
+    expect(substituteArgs("$1 $2: $ARGUMENTS", ["arg100", "@user"])).toBe(
+      "arg100 @user: arg100 @user",
+    );
+  });
+
+  test("should handle out-of-range numbered placeholders", () => {
+    // Note: Out-of-range placeholders become empty strings (preserving spaces from template)
+    expect(substituteArgs("$1 $2 $3 $4 $5", ["a", "b"])).toBe("a b   ");
+  });
+
+  test("should handle unicode characters", () => {
+    expect(substituteArgs("$ARGUMENTS", ["日本語", "🎉", "café"])).toBe(
+      "日本語 🎉 café",
+    );
+  });
+
+  test("should preserve newlines and tabs in argument values", () => {
+    expect(substituteArgs("$1 $2", ["line1\nline2", "tab\tthere"])).toBe(
+      "line1\nline2 tab\tthere",
+    );
+  });
+
+  test("should handle consecutive dollar patterns", () => {
+    expect(substituteArgs("$1$2", ["a", "b"])).toBe("ab");
+  });
+
+  test("should handle quoted arguments with spaces", () => {
+    expect(substituteArgs("$ARGUMENTS", ["first arg", "second arg"])).toBe(
+      "first arg second arg",
+    );
+  });
+
+  test("should handle single argument with $ARGUMENTS", () => {
+    expect(substituteArgs("Test: $ARGUMENTS", ["only"])).toBe("Test: only");
+  });
+
+  test("should handle single argument with $@", () => {
+    expect(substituteArgs("Test: $@", ["only"])).toBe("Test: only");
+  });
+
+  test("should handle $0 (zero index)", () => {
+    expect(substituteArgs("$0", ["a", "b"])).toBe("");
+  });
+
+  test("should handle decimal number in pattern (only integer part matches)", () => {
+    expect(substituteArgs("$1.5", ["a"])).toBe("a.5");
+  });
+
+  test("should handle $ARGUMENTS as part of word", () => {
+    expect(substituteArgs("pre$ARGUMENTS", ["a", "b"])).toBe("prea b");
+  });
+
+  test("should handle $@ as part of word", () => {
+    expect(substituteArgs("pre$@", ["a", "b"])).toBe("prea b");
+  });
+
+  test("should handle empty arguments in middle of list", () => {
+    expect(substituteArgs("$ARGUMENTS", ["a", "", "c"])).toBe("a  c");
+  });
+
+  test("should handle trailing and leading spaces in arguments", () => {
+    expect(substituteArgs("$ARGUMENTS", ["  leading  ", "trailing  "])).toBe(
+      "  leading   trailing  ",
+    );
+  });
+
+  test("should handle argument containing pattern partially", () => {
+    expect(substituteArgs("Prefix $ARGUMENTS suffix", ["ARGUMENTS"])).toBe(
+      "Prefix ARGUMENTS suffix",
+    );
+  });
+
+  test("should handle non-matching patterns", () => {
+    expect(substituteArgs("$A $$ $ $ARGS", ["a"])).toBe("$A $$ $ $ARGS");
+  });
+
+  test("should handle case variations (case-sensitive)", () => {
+    expect(substituteArgs("$arguments $Arguments $ARGUMENTS", ["a", "b"])).toBe(
+      "$arguments $Arguments a b",
+    );
+  });
+
+  test("should handle both syntaxes in same command with same result", () => {
+    const args = ["x", "y", "z"];
+    const result1 = substituteArgs("$@ and $ARGUMENTS", args);
+    const result2 = substituteArgs("$ARGUMENTS and $@", args);
+    expect(result1).toBe(result2);
+    expect(result1).toBe("x y z and x y z");
+  });
+
+  test("should handle very long argument lists", () => {
+    const args = Array.from({ length: 100 }, (_, i) => `arg${i}`);
+    const result = substituteArgs("$ARGUMENTS", args);
+    expect(result).toBe(args.join(" "));
+  });
+
+  test("should handle numbered placeholders with single digit", () => {
+    expect(substituteArgs("$1 $2 $3", ["a", "b", "c"])).toBe("a b c");
+  });
+
+  test("should handle numbered placeholders with multiple digits", () => {
+    const args = Array.from({ length: 15 }, (_, i) => `val${i}`);
+    expect(substituteArgs("$10 $12 $15", args)).toBe("val9 val11 val14");
+  });
+
+  test("should handle escaped dollar signs (literal backslash preserved)", () => {
+    // Note: No escape mechanism exists - backslash is treated literally
+    expect(substituteArgs("Price: \\$100", [])).toBe("Price: \\");
+  });
+
+  test("should handle mixed numbered and wildcard placeholders", () => {
+    expect(
+      substituteArgs("$1: $@ ($ARGUMENTS)", ["first", "second", "third"]),
+    ).toBe("first: first second third (first second third)");
+  });
+
+  test("should handle command with no placeholders", () => {
+    expect(substituteArgs("Just plain text", ["a", "b"])).toBe(
+      "Just plain text",
+    );
+  });
+
+  test("should handle command with only placeholders", () => {
+    expect(substituteArgs("$1 $2 $@", ["a", "b", "c"])).toBe("a b a b c");
+  });
+});
+
+// ============================================================================
+// substituteArgs - Array Slicing (Bash-Style)
+// ============================================================================
+
+describe("substituteArgs - array slicing", () => {
+  test(`should slice from index (\${@:N})`, () => {
+    expect(substituteArgs(`\${@:2}`, ["a", "b", "c", "d"])).toBe("b c d");
+    expect(substituteArgs(`\${@:1}`, ["a", "b", "c"])).toBe("a b c");
+    expect(substituteArgs(`\${@:3}`, ["a", "b", "c", "d"])).toBe("c d");
+  });
+
+  test(`should slice with length (\${@:N:L})`, () => {
+    expect(substituteArgs(`\${@:2:2}`, ["a", "b", "c", "d"])).toBe("b c");
+    expect(substituteArgs(`\${@:1:1}`, ["a", "b", "c"])).toBe("a");
+    expect(substituteArgs(`\${@:3:1}`, ["a", "b", "c", "d"])).toBe("c");
+    expect(substituteArgs(`\${@:2:3}`, ["a", "b", "c", "d", "e"])).toBe(
+      "b c d",
+    );
+  });
+
+  test("should handle out of range slices", () => {
+    expect(substituteArgs(`\${@:99}`, ["a", "b"])).toBe("");
+    expect(substituteArgs(`\${@:5}`, ["a", "b"])).toBe("");
+    expect(substituteArgs(`\${@:10:5}`, ["a", "b"])).toBe("");
+  });
+
+  test("should handle zero-length slices", () => {
+    expect(substituteArgs(`\${@:2:0}`, ["a", "b", "c"])).toBe("");
+    expect(substituteArgs(`\${@:1:0}`, ["a", "b"])).toBe("");
+  });
+
+  test("should handle length exceeding array", () => {
+    expect(substituteArgs(`\${@:2:99}`, ["a", "b", "c"])).toBe("b c");
+    expect(substituteArgs(`\${@:1:10}`, ["a", "b"])).toBe("a b");
+  });
+
+  test("should process slice before simple $@", () => {
+    expect(substituteArgs(`\${@:2} vs $@`, ["a", "b", "c"])).toBe(
+      "b c vs a b c",
+    );
+    expect(substituteArgs(`First: \${@:1:1}, All: $@`, ["x", "y", "z"])).toBe(
+      "First: x, All: x y z",
+    );
+  });
+
+  test("should not recursively substitute slice patterns in args", () => {
+    expect(substituteArgs(`\${@:1}`, [`\${@:2}`, "test"])).toBe(`\${@:2} test`);
+    expect(substituteArgs(`\${@:2}`, ["a", `\${@:3}`, "c"])).toBe(`\${@:3} c`);
+  });
+
+  test("should handle mixed usage with positional args", () => {
+    expect(substituteArgs(`$1: \${@:2}`, ["cmd", "arg1", "arg2"])).toBe(
+      "cmd: arg1 arg2",
+    );
+    expect(substituteArgs(`$1 $2 \${@:3}`, ["a", "b", "c", "d"])).toBe(
+      "a b c d",
+    );
+  });
+
+  test(`should treat \${@:0} as all args`, () => {
+    expect(substituteArgs(`\${@:0}`, ["a", "b", "c"])).toBe("a b c");
+  });
+
+  test("should handle empty args array", () => {
+    expect(substituteArgs(`\${@:2}`, [])).toBe("");
+    expect(substituteArgs(`\${@:1}`, [])).toBe("");
+  });
+
+  test("should handle single arg array", () => {
+    expect(substituteArgs(`\${@:1}`, ["only"])).toBe("only");
+    expect(substituteArgs(`\${@:2}`, ["only"])).toBe("");
+  });
+
+  test("should handle slice in middle of text", () => {
+    expect(
+      substituteArgs(`Process \${@:2} with $1`, ["tool", "file1", "file2"]),
+    ).toBe("Process file1 file2 with tool");
+  });
+
+  test("should handle multiple slices in one template", () => {
+    expect(substituteArgs(`\${@:1:1} and \${@:2}`, ["a", "b", "c"])).toBe(
+      "a and b c",
+    );
+    expect(
+      substituteArgs(`\${@:1:2} vs \${@:3:2}`, ["a", "b", "c", "d", "e"]),
+    ).toBe("a b vs c d");
+  });
+
+  test("should handle quoted arguments in slices", () => {
+    expect(substituteArgs(`\${@:2}`, ["cmd", "first arg", "second arg"])).toBe(
+      "first arg second arg",
+    );
+  });
+
+  test("should handle special characters in sliced args", () => {
+    expect(substituteArgs(`\${@:2}`, ["cmd", "$100", "@user", "#tag"])).toBe(
+      "$100 @user #tag",
+    );
+  });
+
+  test("should handle unicode in sliced args", () => {
+    expect(substituteArgs(`\${@:1}`, ["日本語", "🎉", "café"])).toBe(
+      "日本語 🎉 café",
+    );
+  });
+
+  test("should combine positional, slice, and wildcard placeholders", () => {
+    const template = `Run $1 on \${@:2:2}, then process $@`;
+    const args = ["eslint", "file1.ts", "file2.ts", "file3.ts"];
+    expect(substituteArgs(template, args)).toBe(
+      "Run eslint on file1.ts file2.ts, then process eslint file1.ts file2.ts file3.ts",
+    );
+  });
+
+  test("should handle slice with no spacing", () => {
+    expect(substituteArgs(`prefix\${@:2}suffix`, ["a", "b", "c"])).toBe(
+      "prefixb csuffix",
+    );
+  });
+
+  test("should handle large slice lengths gracefully", () => {
+    const args = Array.from({ length: 10 }, (_, i) => `arg${i + 1}`);
+    expect(substituteArgs(`\${@:5:100}`, args)).toBe(
+      "arg5 arg6 arg7 arg8 arg9 arg10",
+    );
+  });
+});
+
+// ============================================================================
+// parseCommandArgs
+// ============================================================================
+
+describe("parseCommandArgs", () => {
+  test("should parse simple space-separated arguments", () => {
+    expect(parseCommandArgs("a b c")).toEqual(["a", "b", "c"]);
+  });
+
+  test("should parse quoted arguments with spaces", () => {
+    expect(parseCommandArgs('"first arg" second')).toEqual([
+      "first arg",
+      "second",
+    ]);
+  });
+
+  test("should parse single-quoted arguments", () => {
+    expect(parseCommandArgs("'first arg' second")).toEqual([
+      "first arg",
+      "second",
+    ]);
+  });
+
+  test("should parse mixed quote styles", () => {
+    expect(parseCommandArgs('"double" \'single\' "double again"')).toEqual([
+      "double",
+      "single",
+      "double again",
+    ]);
+  });
+
+  test("should handle empty string", () => {
+    expect(parseCommandArgs("")).toEqual([]);
+  });
+
+  test("should handle extra spaces", () => {
+    expect(parseCommandArgs("a  b   c")).toEqual(["a", "b", "c"]);
+  });
+
+  test("should handle tabs as separators", () => {
+    expect(parseCommandArgs("a\tb\tc")).toEqual(["a", "b", "c"]);
+  });
+
+  test("should handle quoted empty string", () => {
+    // Note: Empty quotes are skipped by current implementation
+    expect(parseCommandArgs('"" " "')).toEqual([" "]);
+  });
+
+  test("should handle arguments with special characters", () => {
+    expect(parseCommandArgs("$100 @user #tag")).toEqual([
+      "$100",
+      "@user",
+      "#tag",
+    ]);
+  });
+
+  test("should handle unicode characters", () => {
+    expect(parseCommandArgs("日本語 🎉 café")).toEqual([
+      "日本語",
+      "🎉",
+      "café",
+    ]);
+  });
+
+  test("should handle newlines in arguments", () => {
+    expect(parseCommandArgs('"line1\nline2" second')).toEqual([
+      "line1\nline2",
+      "second",
+    ]);
+  });
+
+  test("should handle escaped quotes inside quoted strings", () => {
+    // Note: This implementation doesn't handle escaped quotes - backslash is literal
+    expect(parseCommandArgs('"quoted \\"text\\""')).toEqual([
+      "quoted \\text\\",
+    ]);
+  });
+
+  test("should handle trailing spaces", () => {
+    expect(parseCommandArgs("a b c   ")).toEqual(["a", "b", "c"]);
+  });
+
+  test("should handle leading spaces", () => {
+    expect(parseCommandArgs("   a b c")).toEqual(["a", "b", "c"]);
+  });
+});
+
+// ============================================================================
+// Integration
+// ============================================================================
+
+describe("parseCommandArgs + substituteArgs integration", () => {
+  test("should parse and substitute together correctly", () => {
+    const input = 'Button "onClick handler" "disabled support"';
+    const args = parseCommandArgs(input);
+    const template = "Create component $1 with features: $ARGUMENTS";
+    const result = substituteArgs(template, args);
+    expect(result).toBe(
+      "Create component Button with features: Button onClick handler disabled support",
+    );
+  });
+
+  test("should handle the example from README", () => {
+    const input = 'Button "onClick handler" "disabled support"';
+    const args = parseCommandArgs(input);
+    const template =
+      "Create a React component named $1 with features: $ARGUMENTS";
+    const result = substituteArgs(template, args);
+    expect(result).toBe(
+      "Create a React component named Button with features: Button onClick handler disabled support",
+    );
+  });
+
+  test("should produce same result with $@ and $ARGUMENTS", () => {
+    const args = parseCommandArgs("feature1 feature2 feature3");
+    const template1 = "Implement: $@";
+    const template2 = "Implement: $ARGUMENTS";
+    expect(substituteArgs(template1, args)).toBe(
+      substituteArgs(template2, args),
+    );
+  });
+});
--- a/packages/coding-agent/test/resource-loader.test.ts
+++ b/packages/coding-agent/test/resource-loader.test.ts
@ -0,0 +1,552 @@
+import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { ExtensionRunner } from "../src/core/extensions/runner.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import { DefaultResourceLoader } from "../src/core/resource-loader.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import type { Skill } from "../src/core/skills.js";
+
+describe("DefaultResourceLoader", () => {
+  let tempDir: string;
+  let agentDir: string;
+  let cwd: string;
+
+  beforeEach(() => {
+    tempDir = join(
+      tmpdir(),
+      `rl-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    agentDir = join(tempDir, "agent");
+    cwd = join(tempDir, "project");
+    mkdirSync(agentDir, { recursive: true });
+    mkdirSync(cwd, { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  describe("reload", () => {
+    it("should initialize with empty results before reload", () => {
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+
+      expect(loader.getExtensions().extensions).toEqual([]);
+      expect(loader.getSkills().skills).toEqual([]);
+      expect(loader.getPrompts().prompts).toEqual([]);
+      expect(loader.getThemes().themes).toEqual([]);
+    });
+
+    it("should discover skills from agentDir", async () => {
+      const skillsDir = join(agentDir, "skills");
+      mkdirSync(skillsDir, { recursive: true });
+      writeFileSync(
+        join(skillsDir, "test-skill.md"),
+        `---
+name: test-skill
+description: A test skill
+---
+Skill content here.`,
+      );
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      const { skills } = loader.getSkills();
+      expect(skills.some((s) => s.name === "test-skill")).toBe(true);
+    });
+
+    it("should ignore extra markdown files in auto-discovered skill dirs", async () => {
+      const skillDir = join(agentDir, "skills", "pi-skills", "browser-tools");
+      mkdirSync(skillDir, { recursive: true });
+      writeFileSync(
+        join(skillDir, "SKILL.md"),
+        `---
+name: browser-tools
+description: Browser tools
+---
+Skill content here.`,
+      );
+      writeFileSync(join(skillDir, "EFFICIENCY.md"), "No frontmatter here");
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      const { skills, diagnostics } = loader.getSkills();
+      expect(skills.some((s) => s.name === "browser-tools")).toBe(true);
+      expect(diagnostics.some((d) => d.path?.endsWith("EFFICIENCY.md"))).toBe(
+        false,
+      );
+    });
+
+    it("should discover prompts from agentDir", async () => {
+      const promptsDir = join(agentDir, "prompts");
+      mkdirSync(promptsDir, { recursive: true });
+      writeFileSync(
+        join(promptsDir, "test-prompt.md"),
+        `---
+description: A test prompt
+---
+Prompt content.`,
+      );
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      const { prompts } = loader.getPrompts();
+      expect(prompts.some((p) => p.name === "test-prompt")).toBe(true);
+    });
+
+    it("should prefer project resources over user on name collisions", async () => {
+      const userPromptsDir = join(agentDir, "prompts");
+      const projectPromptsDir = join(cwd, ".pi", "prompts");
+      mkdirSync(userPromptsDir, { recursive: true });
+      mkdirSync(projectPromptsDir, { recursive: true });
+      const userPromptPath = join(userPromptsDir, "commit.md");
+      const projectPromptPath = join(projectPromptsDir, "commit.md");
+      writeFileSync(userPromptPath, "User prompt");
+      writeFileSync(projectPromptPath, "Project prompt");
+
+      const userSkillDir = join(agentDir, "skills", "collision-skill");
+      const projectSkillDir = join(cwd, ".pi", "skills", "collision-skill");
+      mkdirSync(userSkillDir, { recursive: true });
+      mkdirSync(projectSkillDir, { recursive: true });
+      const userSkillPath = join(userSkillDir, "SKILL.md");
+      const projectSkillPath = join(projectSkillDir, "SKILL.md");
+      writeFileSync(
+        userSkillPath,
+        `---
+name: collision-skill
+description: user
+---
+User skill`,
+      );
+      writeFileSync(
+        projectSkillPath,
+        `---
+name: collision-skill
+description: project
+---
+Project skill`,
+      );
+
+      const baseTheme = JSON.parse(
+        readFileSync(
+          join(
+            process.cwd(),
+            "src",
+            "modes",
+            "interactive",
+            "theme",
+            "dark.json",
+          ),
+          "utf-8",
+        ),
+      ) as { name: string; vars?: Record<string, string> };
+      baseTheme.name = "collision-theme";
+      const userThemePath = join(agentDir, "themes", "collision.json");
+      const projectThemePath = join(cwd, ".pi", "themes", "collision.json");
+      mkdirSync(join(agentDir, "themes"), { recursive: true });
+      mkdirSync(join(cwd, ".pi", "themes"), { recursive: true });
+      writeFileSync(userThemePath, JSON.stringify(baseTheme, null, 2));
+      if (baseTheme.vars) {
+        baseTheme.vars.accent = "#ff00ff";
+      }
+      writeFileSync(projectThemePath, JSON.stringify(baseTheme, null, 2));
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      const prompt = loader
+        .getPrompts()
+        .prompts.find((p) => p.name === "commit");
+      expect(prompt?.filePath).toBe(projectPromptPath);
+
+      const skill = loader
+        .getSkills()
+        .skills.find((s) => s.name === "collision-skill");
+      expect(skill?.filePath).toBe(projectSkillPath);
+
+      const theme = loader
+        .getThemes()
+        .themes.find((t) => t.name === "collision-theme");
+      expect(theme?.sourcePath).toBe(projectThemePath);
+    });
+
+    it("should keep both extensions loaded when command names collide", async () => {
+      const userExtDir = join(agentDir, "extensions");
+      const projectExtDir = join(cwd, ".pi", "extensions");
+      mkdirSync(userExtDir, { recursive: true });
+      mkdirSync(projectExtDir, { recursive: true });
+
+      writeFileSync(
+        join(projectExtDir, "project.ts"),
+        `export default function(pi) {
+	pi.registerCommand("deploy", {
+		description: "project deploy",
+		handler: async () => {},
+	});
+	pi.registerCommand("project-only", {
+		description: "project only",
+		handler: async () => {},
+	});
+}`,
+      );
+
+      writeFileSync(
+        join(userExtDir, "user.ts"),
+        `export default function(pi) {
+	pi.registerCommand("deploy", {
+		description: "user deploy",
+		handler: async () => {},
+	});
+	pi.registerCommand("user-only", {
+		description: "user only",
+		handler: async () => {},
+	});
+}`,
+      );
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      const extensionsResult = loader.getExtensions();
+      expect(extensionsResult.extensions).toHaveLength(2);
+      expect(
+        extensionsResult.errors.some((e) =>
+          e.error.includes('Command "/deploy" conflicts'),
+        ),
+      ).toBe(true);
+
+      const sessionManager = SessionManager.inMemory();
+      const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+      const modelRegistry = new ModelRegistry(authStorage);
+      const runner = new ExtensionRunner(
+        extensionsResult.extensions,
+        extensionsResult.runtime,
+        cwd,
+        sessionManager,
+        modelRegistry,
+      );
+
+      expect(runner.getCommand("deploy")?.description).toBe("project deploy");
+      expect(runner.getCommand("project-only")?.description).toBe(
+        "project only",
+      );
+      expect(runner.getCommand("user-only")?.description).toBe("user only");
+
+      const commandNames = runner.getRegisteredCommands().map((c) => c.name);
+      expect(commandNames.filter((name) => name === "deploy")).toHaveLength(1);
+    });
+
+    it("should honor overrides for auto-discovered resources", async () => {
+      const settingsManager = SettingsManager.inMemory();
+      settingsManager.setExtensionPaths(["-extensions/disabled.ts"]);
+      settingsManager.setSkillPaths(["-skills/skip-skill"]);
+      settingsManager.setPromptTemplatePaths(["-prompts/skip.md"]);
+      settingsManager.setThemePaths(["-themes/skip.json"]);
+
+      const extensionsDir = join(agentDir, "extensions");
+      mkdirSync(extensionsDir, { recursive: true });
+      writeFileSync(
+        join(extensionsDir, "disabled.ts"),
+        "export default function() {}",
+      );
+
+      const skillDir = join(agentDir, "skills", "skip-skill");
+      mkdirSync(skillDir, { recursive: true });
+      writeFileSync(
+        join(skillDir, "SKILL.md"),
+        `---
+name: skip-skill
+description: Skip me
+---
+Content`,
+      );
+
+      const promptsDir = join(agentDir, "prompts");
+      mkdirSync(promptsDir, { recursive: true });
+      writeFileSync(join(promptsDir, "skip.md"), "Skip prompt");
+
+      const themesDir = join(agentDir, "themes");
+      mkdirSync(themesDir, { recursive: true });
+      writeFileSync(join(themesDir, "skip.json"), "{}");
+
+      const loader = new DefaultResourceLoader({
+        cwd,
+        agentDir,
+        settingsManager,
+      });
+      await loader.reload();
+
+      const { extensions } = loader.getExtensions();
+      const { skills } = loader.getSkills();
+      const { prompts } = loader.getPrompts();
+      const { themes } = loader.getThemes();
+
+      expect(extensions.some((e) => e.path.endsWith("disabled.ts"))).toBe(
+        false,
+      );
+      expect(skills.some((s) => s.name === "skip-skill")).toBe(false);
+      expect(prompts.some((p) => p.name === "skip")).toBe(false);
+      expect(themes.some((t) => t.sourcePath?.endsWith("skip.json"))).toBe(
+        false,
+      );
+    });
+
+    it("should discover AGENTS.md context files", async () => {
+      writeFileSync(
+        join(cwd, "AGENTS.md"),
+        "# Project Guidelines\n\nBe helpful.",
+      );
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      const { agentsFiles } = loader.getAgentsFiles();
+      expect(agentsFiles.some((f) => f.path.includes("AGENTS.md"))).toBe(true);
+    });
+
+    it("should discover SOUL.md from the project root", async () => {
+      writeFileSync(join(cwd, "SOUL.md"), "# Soul\n\nBe less corporate.");
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      const { agentsFiles } = loader.getAgentsFiles();
+      expect(agentsFiles.some((f) => f.path.endsWith("SOUL.md"))).toBe(true);
+    });
+
+    it("should discover SYSTEM.md from cwd/.pi", async () => {
+      const piDir = join(cwd, ".pi");
+      mkdirSync(piDir, { recursive: true });
+      writeFileSync(join(piDir, "SYSTEM.md"), "You are a helpful assistant.");
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      expect(loader.getSystemPrompt()).toBe("You are a helpful assistant.");
+    });
+
+    it("should discover APPEND_SYSTEM.md", async () => {
+      const piDir = join(cwd, ".pi");
+      mkdirSync(piDir, { recursive: true });
+      writeFileSync(
+        join(piDir, "APPEND_SYSTEM.md"),
+        "Additional instructions.",
+      );
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      expect(loader.getAppendSystemPrompt()).toContain(
+        "Additional instructions.",
+      );
+    });
+  });
+
+  describe("extendResources", () => {
+    it("should load skills and prompts with extension metadata", async () => {
+      const extraSkillDir = join(tempDir, "extra-skills", "extra-skill");
+      mkdirSync(extraSkillDir, { recursive: true });
+      const skillPath = join(extraSkillDir, "SKILL.md");
+      writeFileSync(
+        skillPath,
+        `---
+name: extra-skill
+description: Extra skill
+---
+Extra content`,
+      );
+
+      const extraPromptDir = join(tempDir, "extra-prompts");
+      mkdirSync(extraPromptDir, { recursive: true });
+      const promptPath = join(extraPromptDir, "extra.md");
+      writeFileSync(
+        promptPath,
+        `---
+description: Extra prompt
+---
+Extra prompt content`,
+      );
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      loader.extendResources({
+        skillPaths: [
+          {
+            path: extraSkillDir,
+            metadata: {
+              source: "extension:extra",
+              scope: "temporary",
+              origin: "top-level",
+              baseDir: extraSkillDir,
+            },
+          },
+        ],
+        promptPaths: [
+          {
+            path: promptPath,
+            metadata: {
+              source: "extension:extra",
+              scope: "temporary",
+              origin: "top-level",
+              baseDir: extraPromptDir,
+            },
+          },
+        ],
+      });
+
+      const { skills } = loader.getSkills();
+      expect(skills.some((skill) => skill.name === "extra-skill")).toBe(true);
+
+      const { prompts } = loader.getPrompts();
+      expect(prompts.some((prompt) => prompt.name === "extra")).toBe(true);
+
+      const metadata = loader.getPathMetadata();
+      expect(metadata.get(skillPath)?.source).toBe("extension:extra");
+      expect(metadata.get(promptPath)?.source).toBe("extension:extra");
+    });
+  });
+
+  describe("noSkills option", () => {
+    it("should skip skill discovery when noSkills is true", async () => {
+      const skillsDir = join(agentDir, "skills");
+      mkdirSync(skillsDir, { recursive: true });
+      writeFileSync(
+        join(skillsDir, "test-skill.md"),
+        `---
+name: test-skill
+description: A test skill
+---
+Content`,
+      );
+
+      const loader = new DefaultResourceLoader({
+        cwd,
+        agentDir,
+        noSkills: true,
+      });
+      await loader.reload();
+
+      const { skills } = loader.getSkills();
+      expect(skills).toEqual([]);
+    });
+
+    it("should still load additional skill paths when noSkills is true", async () => {
+      const customSkillDir = join(tempDir, "custom-skills");
+      mkdirSync(customSkillDir, { recursive: true });
+      writeFileSync(
+        join(customSkillDir, "custom.md"),
+        `---
+name: custom
+description: Custom skill
+---
+Content`,
+      );
+
+      const loader = new DefaultResourceLoader({
+        cwd,
+        agentDir,
+        noSkills: true,
+        additionalSkillPaths: [customSkillDir],
+      });
+      await loader.reload();
+
+      const { skills } = loader.getSkills();
+      expect(skills.some((s) => s.name === "custom")).toBe(true);
+    });
+  });
+
+  describe("override functions", () => {
+    it("should apply skillsOverride", async () => {
+      const injectedSkill: Skill = {
+        name: "injected",
+        description: "Injected skill",
+        filePath: "/fake/path",
+        baseDir: "/fake",
+        source: "custom",
+        disableModelInvocation: false,
+      };
+      const loader = new DefaultResourceLoader({
+        cwd,
+        agentDir,
+        skillsOverride: () => ({
+          skills: [injectedSkill],
+          diagnostics: [],
+        }),
+      });
+      await loader.reload();
+
+      const { skills } = loader.getSkills();
+      expect(skills).toHaveLength(1);
+      expect(skills[0].name).toBe("injected");
+    });
+
+    it("should apply systemPromptOverride", async () => {
+      const loader = new DefaultResourceLoader({
+        cwd,
+        agentDir,
+        systemPromptOverride: () => "Custom system prompt",
+      });
+      await loader.reload();
+
+      expect(loader.getSystemPrompt()).toBe("Custom system prompt");
+    });
+  });
+
+  describe("extension conflict detection", () => {
+    it("should detect tool conflicts between extensions", async () => {
+      // Create two extensions that register the same tool
+      const ext1Dir = join(agentDir, "extensions", "ext1");
+      const ext2Dir = join(agentDir, "extensions", "ext2");
+      mkdirSync(ext1Dir, { recursive: true });
+      mkdirSync(ext2Dir, { recursive: true });
+
+      writeFileSync(
+        join(ext1Dir, "index.ts"),
+        `
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+import { Type } from "@sinclair/typebox";
+export default function(pi: ExtensionAPI) {
+  pi.registerTool({
+    name: "duplicate-tool",
+    description: "First",
+    parameters: Type.Object({}),
+    execute: async () => ({ result: "1" }),
+  });
+}`,
+      );
+
+      writeFileSync(
+        join(ext2Dir, "index.ts"),
+        `
+import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
+import { Type } from "@sinclair/typebox";
+export default function(pi: ExtensionAPI) {
+  pi.registerTool({
+    name: "duplicate-tool",
+    description: "Second",
+    parameters: Type.Object({}),
+    execute: async () => ({ result: "2" }),
+  });
+}`,
+      );
+
+      const loader = new DefaultResourceLoader({ cwd, agentDir });
+      await loader.reload();
+
+      const { errors } = loader.getExtensions();
+      expect(
+        errors.some(
+          (e) =>
+            e.error.includes("duplicate-tool") && e.error.includes("conflicts"),
+        ),
+      ).toBe(true);
+    });
+  });
+});
--- a/packages/coding-agent/test/rpc-example.ts
+++ b/packages/coding-agent/test/rpc-example.ts
@ -0,0 +1,91 @@
+import { dirname, join } from "node:path";
+import * as readline from "node:readline";
+import { fileURLToPath } from "node:url";
+import { RpcClient } from "../src/modes/rpc/rpc-client.js";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+/**
+ * Interactive example of using coding-agent via RpcClient.
+ * Usage: npx tsx test/rpc-example.ts
+ */
+
+async function main() {
+  const client = new RpcClient({
+    cliPath: join(__dirname, "../dist/cli.js"),
+    provider: "anthropic",
+    model: "claude-sonnet-4-20250514",
+    args: ["--no-session"],
+  });
+
+  // Stream events to console
+  client.onEvent((event) => {
+    if (event.type === "message_update") {
+      const { assistantMessageEvent } = event;
+      if (
+        assistantMessageEvent.type === "text_delta" ||
+        assistantMessageEvent.type === "thinking_delta"
+      ) {
+        process.stdout.write(assistantMessageEvent.delta);
+      }
+    }
+
+    if (event.type === "tool_execution_start") {
+      console.log(`\n[Tool: ${event.toolName}]`);
+    }
+
+    if (event.type === "tool_execution_end") {
+      console.log(
+        `[Result: ${JSON.stringify(event.result).slice(0, 200)}...]\n`,
+      );
+    }
+  });
+
+  await client.start();
+
+  const state = await client.getState();
+  console.log(`Model: ${state.model?.provider}/${state.model?.id}`);
+  console.log(`Thinking: ${state.thinkingLevel ?? "off"}\n`);
+
+  // Handle user input
+  const rl = readline.createInterface({
+    input: process.stdin,
+    output: process.stdout,
+    terminal: true,
+  });
+
+  let isWaiting = false;
+
+  const prompt = () => {
+    if (!isWaiting) process.stdout.write("You: ");
+  };
+
+  rl.on("line", async (line) => {
+    if (isWaiting) return;
+    if (line.trim() === "exit") {
+      await client.stop();
+      process.exit(0);
+    }
+
+    isWaiting = true;
+    await client.promptAndWait(line);
+    console.log("\n");
+    isWaiting = false;
+    prompt();
+  });
+
+  rl.on("SIGINT", () => {
+    if (isWaiting) {
+      console.log("\n[Aborting...]");
+      client.abort();
+    } else {
+      client.stop();
+      process.exit(0);
+    }
+  });
+
+  console.log("Interactive RPC example. Type 'exit' to quit.\n");
+  prompt();
+}
+
+main().catch(console.error);
--- a/packages/coding-agent/test/rpc.test.ts
+++ b/packages/coding-agent/test/rpc.test.ts
@ -0,0 +1,357 @@
+import { existsSync, readdirSync, readFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+import type { AgentEvent } from "@mariozechner/pi-agent-core";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { RpcClient } from "../src/modes/rpc/rpc-client.js";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+/**
+ * RPC mode tests.
+ */
+describe.skipIf(
+  !process.env.ANTHROPIC_API_KEY && !process.env.ANTHROPIC_OAUTH_TOKEN,
+)("RPC mode", () => {
+  let client: RpcClient;
+  let sessionDir: string;
+
+  beforeEach(() => {
+    sessionDir = join(tmpdir(), `pi-rpc-test-${Date.now()}`);
+    client = new RpcClient({
+      cliPath: join(__dirname, "..", "dist", "cli.js"),
+      cwd: join(__dirname, ".."),
+      env: { PI_CODING_AGENT_DIR: sessionDir },
+      provider: "anthropic",
+      model: "claude-sonnet-4-5",
+    });
+  });
+
+  afterEach(async () => {
+    await client.stop();
+    if (sessionDir && existsSync(sessionDir)) {
+      rmSync(sessionDir, { recursive: true });
+    }
+  });
+
+  test("should get state", async () => {
+    await client.start();
+    const state = await client.getState();
+
+    expect(state.model).toBeDefined();
+    expect(state.model?.provider).toBe("anthropic");
+    expect(state.model?.id).toBe("claude-sonnet-4-5");
+    expect(state.isStreaming).toBe(false);
+    expect(state.messageCount).toBe(0);
+  }, 30000);
+
+  test("should save messages to session file", async () => {
+    await client.start();
+
+    // Send prompt and wait for completion
+    const events = await client.promptAndWait(
+      "Reply with just the word 'hello'",
+    );
+
+    // Should have message events
+    const messageEndEvents = events.filter((e) => e.type === "message_end");
+    expect(messageEndEvents.length).toBeGreaterThanOrEqual(2); // user + assistant
+
+    // Wait for file writes
+    await new Promise((resolve) => setTimeout(resolve, 200));
+
+    // Verify session file
+    const sessionsPath = join(sessionDir, "sessions");
+    expect(existsSync(sessionsPath)).toBe(true);
+
+    const sessionDirs = readdirSync(sessionsPath);
+    expect(sessionDirs.length).toBeGreaterThan(0);
+
+    const cwdSessionDir = join(sessionsPath, sessionDirs[0]);
+    const sessionFiles = readdirSync(cwdSessionDir).filter((f) =>
+      f.endsWith(".jsonl"),
+    );
+    expect(sessionFiles.length).toBe(1);
+
+    const sessionContent = readFileSync(
+      join(cwdSessionDir, sessionFiles[0]),
+      "utf8",
+    );
+    const entries = sessionContent
+      .trim()
+      .split("\n")
+      .map((line) => JSON.parse(line));
+
+    // First entry should be session header
+    expect(entries[0].type).toBe("session");
+
+    // Should have user and assistant messages
+    const messages = entries.filter(
+      (e: { type: string }) => e.type === "message",
+    );
+    expect(messages.length).toBeGreaterThanOrEqual(2);
+
+    const roles = messages.map(
+      (m: { message: { role: string } }) => m.message.role,
+    );
+    expect(roles).toContain("user");
+    expect(roles).toContain("assistant");
+  }, 90000);
+
+  test("should handle manual compaction", async () => {
+    await client.start();
+
+    // First send a prompt to have messages to compact
+    await client.promptAndWait("Say hello");
+
+    // Compact
+    const result = await client.compact();
+    expect(result.summary).toBeDefined();
+    expect(result.tokensBefore).toBeGreaterThan(0);
+
+    // Wait for file writes
+    await new Promise((resolve) => setTimeout(resolve, 200));
+
+    // Verify compaction in session file
+    const sessionsPath = join(sessionDir, "sessions");
+    const sessionDirs = readdirSync(sessionsPath);
+    const cwdSessionDir = join(sessionsPath, sessionDirs[0]);
+    const sessionFiles = readdirSync(cwdSessionDir).filter((f) =>
+      f.endsWith(".jsonl"),
+    );
+    const sessionContent = readFileSync(
+      join(cwdSessionDir, sessionFiles[0]),
+      "utf8",
+    );
+    const entries = sessionContent
+      .trim()
+      .split("\n")
+      .map((line) => JSON.parse(line));
+
+    const compactionEntries = entries.filter(
+      (e: { type: string }) => e.type === "compaction",
+    );
+    expect(compactionEntries.length).toBe(1);
+    expect(compactionEntries[0].summary).toBeDefined();
+  }, 120000);
+
+  test("should execute bash command", async () => {
+    await client.start();
+
+    const result = await client.bash("echo hello");
+    expect(result.output.trim()).toBe("hello");
+    expect(result.exitCode).toBe(0);
+    expect(result.cancelled).toBe(false);
+  }, 30000);
+
+  test("should add bash output to context", async () => {
+    await client.start();
+
+    // First send a prompt to initialize session
+    await client.promptAndWait("Say hi");
+
+    // Run bash command
+    const uniqueValue = `test-${Date.now()}`;
+    await client.bash(`echo ${uniqueValue}`);
+
+    // Wait for file writes
+    await new Promise((resolve) => setTimeout(resolve, 200));
+
+    // Verify bash message in session
+    const sessionsPath = join(sessionDir, "sessions");
+    const sessionDirs = readdirSync(sessionsPath);
+    const cwdSessionDir = join(sessionsPath, sessionDirs[0]);
+    const sessionFiles = readdirSync(cwdSessionDir).filter((f) =>
+      f.endsWith(".jsonl"),
+    );
+    const sessionContent = readFileSync(
+      join(cwdSessionDir, sessionFiles[0]),
+      "utf8",
+    );
+    const entries = sessionContent
+      .trim()
+      .split("\n")
+      .map((line) => JSON.parse(line));
+
+    const bashMessages = entries.filter(
+      (e: { type: string; message?: { role: string } }) =>
+        e.type === "message" && e.message?.role === "bashExecution",
+    );
+    expect(bashMessages.length).toBe(1);
+    expect(bashMessages[0].message.output).toContain(uniqueValue);
+  }, 90000);
+
+  test("should include bash output in LLM context", async () => {
+    await client.start();
+
+    // Run a bash command with a unique value
+    const uniqueValue = `unique-${Date.now()}`;
+    await client.bash(`echo ${uniqueValue}`);
+
+    // Ask the LLM what the output was
+    const events = await client.promptAndWait(
+      "What was the exact output of the echo command I just ran? Reply with just the value, nothing else.",
+    );
+
+    // Find assistant's response
+    const messageEndEvents = events.filter(
+      (e) => e.type === "message_end",
+    ) as AgentEvent[];
+    const assistantMessage = messageEndEvents.find(
+      (e) => e.type === "message_end" && e.message?.role === "assistant",
+    ) as any;
+
+    expect(assistantMessage).toBeDefined();
+
+    const textContent = assistantMessage.message.content.find(
+      (c: any) => c.type === "text",
+    );
+    expect(textContent?.text).toContain(uniqueValue);
+  }, 90000);
+
+  test("should set and get thinking level", async () => {
+    await client.start();
+
+    // Set thinking level
+    await client.setThinkingLevel("high");
+
+    // Verify via state
+    const state = await client.getState();
+    expect(state.thinkingLevel).toBe("high");
+  }, 30000);
+
+  test("should cycle thinking level", async () => {
+    await client.start();
+
+    // Get initial level
+    const initialState = await client.getState();
+    const initialLevel = initialState.thinkingLevel;
+
+    // Cycle
+    const result = await client.cycleThinkingLevel();
+    expect(result).toBeDefined();
+    expect(result!.level).not.toBe(initialLevel);
+
+    // Verify via state
+    const newState = await client.getState();
+    expect(newState.thinkingLevel).toBe(result!.level);
+  }, 30000);
+
+  test("should get available models", async () => {
+    await client.start();
+
+    const models = await client.getAvailableModels();
+    expect(models.length).toBeGreaterThan(0);
+
+    // All models should have required fields
+    for (const model of models) {
+      expect(model.provider).toBeDefined();
+      expect(model.id).toBeDefined();
+      expect(model.contextWindow).toBeGreaterThan(0);
+      expect(typeof model.reasoning).toBe("boolean");
+    }
+  }, 30000);
+
+  test("should get session stats", async () => {
+    await client.start();
+
+    // Send a prompt first
+    await client.promptAndWait("Hello");
+
+    const stats = await client.getSessionStats();
+    expect(stats.sessionFile).toBeDefined();
+    expect(stats.sessionId).toBeDefined();
+    expect(stats.userMessages).toBeGreaterThanOrEqual(1);
+    expect(stats.assistantMessages).toBeGreaterThanOrEqual(1);
+  }, 90000);
+
+  test("should create new session", async () => {
+    await client.start();
+
+    // Send a prompt
+    await client.promptAndWait("Hello");
+
+    // Verify messages exist
+    let state = await client.getState();
+    expect(state.messageCount).toBeGreaterThan(0);
+
+    // New session
+    await client.newSession();
+
+    // Verify messages cleared
+    state = await client.getState();
+    expect(state.messageCount).toBe(0);
+  }, 90000);
+
+  test("should export to HTML", async () => {
+    await client.start();
+
+    // Send a prompt first
+    await client.promptAndWait("Hello");
+
+    // Export
+    const result = await client.exportHtml();
+    expect(result.path).toBeDefined();
+    expect(result.path.endsWith(".html")).toBe(true);
+    expect(existsSync(result.path)).toBe(true);
+  }, 90000);
+
+  test("should get last assistant text", async () => {
+    await client.start();
+
+    // Initially null
+    let text = await client.getLastAssistantText();
+    expect(text).toBeUndefined();
+
+    // Send prompt
+    await client.promptAndWait("Reply with just: test123");
+
+    // Should have text now
+    text = await client.getLastAssistantText();
+    expect(text).toContain("test123");
+  }, 90000);
+
+  test("should set and get session name", async () => {
+    await client.start();
+
+    // Initially undefined
+    let state = await client.getState();
+    expect(state.sessionName).toBeUndefined();
+
+    // Send a prompt first - session files are only written after first assistant message
+    await client.promptAndWait("Reply with just 'ok'");
+
+    // Set name
+    await client.setSessionName("my-test-session");
+
+    // Verify via state
+    state = await client.getState();
+    expect(state.sessionName).toBe("my-test-session");
+
+    // Wait for file writes
+    await new Promise((resolve) => setTimeout(resolve, 200));
+
+    // Verify session_info entry in session file
+    const sessionsPath = join(sessionDir, "sessions");
+    const sessionDirs = readdirSync(sessionsPath);
+    const cwdSessionDir = join(sessionsPath, sessionDirs[0]);
+    const sessionFiles = readdirSync(cwdSessionDir).filter((f) =>
+      f.endsWith(".jsonl"),
+    );
+    const sessionContent = readFileSync(
+      join(cwdSessionDir, sessionFiles[0]),
+      "utf8",
+    );
+    const entries = sessionContent
+      .trim()
+      .split("\n")
+      .map((line) => JSON.parse(line));
+
+    const sessionInfoEntries = entries.filter(
+      (e: { type: string }) => e.type === "session_info",
+    );
+    expect(sessionInfoEntries.length).toBe(1);
+    expect(sessionInfoEntries[0].name).toBe("my-test-session");
+  }, 60000);
+});
--- a/packages/coding-agent/test/sdk-skills.test.ts
+++ b/packages/coding-agent/test/sdk-skills.test.ts
@ -0,0 +1,125 @@
+import { mkdirSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { createExtensionRuntime } from "../src/core/extensions/loader.js";
+import type { ResourceLoader } from "../src/core/resource-loader.js";
+import { createAgentSession } from "../src/core/sdk.js";
+import { SessionManager } from "../src/core/session-manager.js";
+
+describe("createAgentSession skills option", () => {
+  let tempDir: string;
+  let skillsDir: string;
+
+  beforeEach(() => {
+    tempDir = join(
+      tmpdir(),
+      `pi-sdk-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+    );
+    skillsDir = join(tempDir, "skills", "test-skill");
+    mkdirSync(skillsDir, { recursive: true });
+
+    // Create a test skill in the pi skills directory
+    writeFileSync(
+      join(skillsDir, "SKILL.md"),
+      `---
+name: test-skill
+description: A test skill for SDK tests.
+---
+
+# Test Skill
+
+This is a test skill.
+`,
+    );
+  });
+
+  afterEach(() => {
+    if (tempDir) {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  it("should discover skills by default and expose them on session.skills", async () => {
+    const { session } = await createAgentSession({
+      cwd: tempDir,
+      agentDir: tempDir,
+      sessionManager: SessionManager.inMemory(),
+    });
+
+    // Skills should be discovered and exposed on the session
+    expect(session.resourceLoader.getSkills().skills.length).toBeGreaterThan(0);
+    expect(
+      session.resourceLoader
+        .getSkills()
+        .skills.some((s) => s.name === "test-skill"),
+    ).toBe(true);
+  });
+
+  it("should have empty skills when resource loader returns none (--no-skills)", async () => {
+    const resourceLoader: ResourceLoader = {
+      getExtensions: () => ({
+        extensions: [],
+        errors: [],
+        runtime: createExtensionRuntime(),
+      }),
+      getSkills: () => ({ skills: [], diagnostics: [] }),
+      getPrompts: () => ({ prompts: [], diagnostics: [] }),
+      getThemes: () => ({ themes: [], diagnostics: [] }),
+      getAgentsFiles: () => ({ agentsFiles: [] }),
+      getSystemPrompt: () => undefined,
+      getAppendSystemPrompt: () => [],
+      getPathMetadata: () => new Map(),
+      extendResources: () => {},
+      reload: async () => {},
+    };
+
+    const { session } = await createAgentSession({
+      cwd: tempDir,
+      agentDir: tempDir,
+      sessionManager: SessionManager.inMemory(),
+      resourceLoader,
+    });
+
+    expect(session.resourceLoader.getSkills().skills).toEqual([]);
+    expect(session.resourceLoader.getSkills().diagnostics).toEqual([]);
+  });
+
+  it("should use provided skills when resource loader supplies them", async () => {
+    const customSkill = {
+      name: "custom-skill",
+      description: "A custom skill",
+      filePath: "/fake/path/SKILL.md",
+      baseDir: "/fake/path",
+      source: "custom" as const,
+      disableModelInvocation: false,
+    };
+
+    const resourceLoader: ResourceLoader = {
+      getExtensions: () => ({
+        extensions: [],
+        errors: [],
+        runtime: createExtensionRuntime(),
+      }),
+      getSkills: () => ({ skills: [customSkill], diagnostics: [] }),
+      getPrompts: () => ({ prompts: [], diagnostics: [] }),
+      getThemes: () => ({ themes: [], diagnostics: [] }),
+      getAgentsFiles: () => ({ agentsFiles: [] }),
+      getSystemPrompt: () => undefined,
+      getAppendSystemPrompt: () => [],
+      getPathMetadata: () => new Map(),
+      extendResources: () => {},
+      reload: async () => {},
+    };
+
+    const { session } = await createAgentSession({
+      cwd: tempDir,
+      agentDir: tempDir,
+      sessionManager: SessionManager.inMemory(),
+      resourceLoader,
+    });
+
+    expect(session.resourceLoader.getSkills().skills).toEqual([customSkill]);
+    expect(session.resourceLoader.getSkills().diagnostics).toEqual([]);
+  });
+});
--- a/packages/coding-agent/test/session-info-modified-timestamp.test.ts
+++ b/packages/coding-agent/test/session-info-modified-timestamp.test.ts
@ -0,0 +1,86 @@
+import { writeFileSync } from "node:fs";
+import { stat } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeAll, describe, expect, it, vi } from "vitest";
+import type { SessionHeader } from "../src/core/session-manager.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { initTheme } from "../src/modes/interactive/theme/theme.js";
+
+function createSessionFile(path: string): void {
+  const header: SessionHeader = {
+    type: "session",
+    id: "test-session",
+    version: 3,
+    timestamp: new Date(0).toISOString(),
+    cwd: "/tmp",
+  };
+  writeFileSync(path, `${JSON.stringify(header)}\n`, "utf8");
+
+  // SessionManager only persists once it has seen at least one assistant message.
+  // Add a minimal assistant entry so subsequent appends are persisted.
+  const mgr = SessionManager.open(path);
+  mgr.appendMessage({
+    role: "assistant",
+    content: [{ type: "text", text: "hi" }],
+    api: "openai-completions",
+    provider: "openai",
+    model: "test",
+    usage: {
+      input: 1,
+      output: 1,
+      cacheRead: 0,
+      cacheWrite: 0,
+      totalTokens: 2,
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+    },
+    stopReason: "stop",
+    timestamp: Date.now(),
+  });
+}
+
+describe("SessionInfo.modified", () => {
+  beforeAll(() => initTheme("dark"));
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  it("uses last user/assistant message timestamp instead of file mtime", async () => {
+    const filePath = join(tmpdir(), `pi-session-${Date.now()}-modified.jsonl`);
+    createSessionFile(filePath);
+
+    const before = await stat(filePath);
+    // Ensure the file mtime can differ from our message timestamp even on coarse filesystems.
+    await new Promise((r) => setTimeout(r, 10));
+
+    const mgr = SessionManager.open(filePath);
+    const msgTime = Date.now();
+    mgr.appendMessage({
+      role: "assistant",
+      content: [{ type: "text", text: "later" }],
+      api: "openai-completions",
+      provider: "openai",
+      model: "test",
+      usage: {
+        input: 1,
+        output: 1,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 2,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: msgTime,
+    });
+
+    const sessions = await SessionManager.list(
+      "/tmp",
+      filePath.replace(/\/[^/]+$/, ""),
+    );
+    const s = sessions.find((x) => x.path === filePath);
+    expect(s).toBeDefined();
+    expect(s!.modified.getTime()).toBe(msgTime);
+    expect(s!.modified.getTime()).not.toBe(before.mtime.getTime());
+  });
+});
--- a/packages/coding-agent/test/session-manager/build-context.test.ts
+++ b/packages/coding-agent/test/session-manager/build-context.test.ts
@ -0,0 +1,342 @@
+import { describe, expect, it } from "vitest";
+import {
+  type BranchSummaryEntry,
+  buildSessionContext,
+  type CompactionEntry,
+  type ModelChangeEntry,
+  type SessionEntry,
+  type SessionMessageEntry,
+  type ThinkingLevelChangeEntry,
+} from "../../src/core/session-manager.js";
+
+function msg(
+  id: string,
+  parentId: string | null,
+  role: "user" | "assistant",
+  text: string,
+): SessionMessageEntry {
+  const base = {
+    type: "message" as const,
+    id,
+    parentId,
+    timestamp: "2025-01-01T00:00:00Z",
+  };
+  if (role === "user") {
+    return { ...base, message: { role, content: text, timestamp: 1 } };
+  }
+  return {
+    ...base,
+    message: {
+      role,
+      content: [{ type: "text", text }],
+      api: "anthropic-messages",
+      provider: "anthropic",
+      model: "claude-test",
+      usage: {
+        input: 1,
+        output: 1,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 2,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: 1,
+    },
+  };
+}
+
+function compaction(
+  id: string,
+  parentId: string | null,
+  summary: string,
+  firstKeptEntryId: string,
+): CompactionEntry {
+  return {
+    type: "compaction",
+    id,
+    parentId,
+    timestamp: "2025-01-01T00:00:00Z",
+    summary,
+    firstKeptEntryId,
+    tokensBefore: 1000,
+  };
+}
+
+function branchSummary(
+  id: string,
+  parentId: string | null,
+  summary: string,
+  fromId: string,
+): BranchSummaryEntry {
+  return {
+    type: "branch_summary",
+    id,
+    parentId,
+    timestamp: "2025-01-01T00:00:00Z",
+    summary,
+    fromId,
+  };
+}
+
+function thinkingLevel(
+  id: string,
+  parentId: string | null,
+  level: string,
+): ThinkingLevelChangeEntry {
+  return {
+    type: "thinking_level_change",
+    id,
+    parentId,
+    timestamp: "2025-01-01T00:00:00Z",
+    thinkingLevel: level,
+  };
+}
+
+function modelChange(
+  id: string,
+  parentId: string | null,
+  provider: string,
+  modelId: string,
+): ModelChangeEntry {
+  return {
+    type: "model_change",
+    id,
+    parentId,
+    timestamp: "2025-01-01T00:00:00Z",
+    provider,
+    modelId,
+  };
+}
+
+describe("buildSessionContext", () => {
+  describe("trivial cases", () => {
+    it("empty entries returns empty context", () => {
+      const ctx = buildSessionContext([]);
+      expect(ctx.messages).toEqual([]);
+      expect(ctx.thinkingLevel).toBe("off");
+      expect(ctx.model).toBeNull();
+    });
+
+    it("single user message", () => {
+      const entries: SessionEntry[] = [msg("1", null, "user", "hello")];
+      const ctx = buildSessionContext(entries);
+      expect(ctx.messages).toHaveLength(1);
+      expect(ctx.messages[0].role).toBe("user");
+    });
+
+    it("simple conversation", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "hello"),
+        msg("2", "1", "assistant", "hi there"),
+        msg("3", "2", "user", "how are you"),
+        msg("4", "3", "assistant", "great"),
+      ];
+      const ctx = buildSessionContext(entries);
+      expect(ctx.messages).toHaveLength(4);
+      expect(ctx.messages.map((m) => m.role)).toEqual([
+        "user",
+        "assistant",
+        "user",
+        "assistant",
+      ]);
+    });
+
+    it("tracks thinking level changes", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "hello"),
+        thinkingLevel("2", "1", "high"),
+        msg("3", "2", "assistant", "thinking hard"),
+      ];
+      const ctx = buildSessionContext(entries);
+      expect(ctx.thinkingLevel).toBe("high");
+      expect(ctx.messages).toHaveLength(2);
+    });
+
+    it("tracks model from assistant message", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "hello"),
+        msg("2", "1", "assistant", "hi"),
+      ];
+      const ctx = buildSessionContext(entries);
+      expect(ctx.model).toEqual({
+        provider: "anthropic",
+        modelId: "claude-test",
+      });
+    });
+
+    it("tracks model from model change entry", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "hello"),
+        modelChange("2", "1", "openai", "gpt-4"),
+        msg("3", "2", "assistant", "hi"),
+      ];
+      const ctx = buildSessionContext(entries);
+      // Assistant message overwrites model change
+      expect(ctx.model).toEqual({
+        provider: "anthropic",
+        modelId: "claude-test",
+      });
+    });
+  });
+
+  describe("with compaction", () => {
+    it("includes summary before kept messages", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "first"),
+        msg("2", "1", "assistant", "response1"),
+        msg("3", "2", "user", "second"),
+        msg("4", "3", "assistant", "response2"),
+        compaction("5", "4", "Summary of first two turns", "3"),
+        msg("6", "5", "user", "third"),
+        msg("7", "6", "assistant", "response3"),
+      ];
+      const ctx = buildSessionContext(entries);
+
+      // Should have: summary + kept (3,4) + after (6,7) = 5 messages
+      expect(ctx.messages).toHaveLength(5);
+      expect((ctx.messages[0] as any).summary).toContain(
+        "Summary of first two turns",
+      );
+      expect((ctx.messages[1] as any).content).toBe("second");
+      expect((ctx.messages[2] as any).content[0].text).toBe("response2");
+      expect((ctx.messages[3] as any).content).toBe("third");
+      expect((ctx.messages[4] as any).content[0].text).toBe("response3");
+    });
+
+    it("handles compaction keeping from first message", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "first"),
+        msg("2", "1", "assistant", "response"),
+        compaction("3", "2", "Empty summary", "1"),
+        msg("4", "3", "user", "second"),
+      ];
+      const ctx = buildSessionContext(entries);
+
+      // Summary + all messages (1,2,4)
+      expect(ctx.messages).toHaveLength(4);
+      expect((ctx.messages[0] as any).summary).toContain("Empty summary");
+    });
+
+    it("multiple compactions uses latest", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "a"),
+        msg("2", "1", "assistant", "b"),
+        compaction("3", "2", "First summary", "1"),
+        msg("4", "3", "user", "c"),
+        msg("5", "4", "assistant", "d"),
+        compaction("6", "5", "Second summary", "4"),
+        msg("7", "6", "user", "e"),
+      ];
+      const ctx = buildSessionContext(entries);
+
+      // Should use second summary, keep from 4
+      expect(ctx.messages).toHaveLength(4);
+      expect((ctx.messages[0] as any).summary).toContain("Second summary");
+    });
+  });
+
+  describe("with branches", () => {
+    it("follows path to specified leaf", () => {
+      // Tree:
+      //   1 -> 2 -> 3 (branch A)
+      //         \-> 4 (branch B)
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "start"),
+        msg("2", "1", "assistant", "response"),
+        msg("3", "2", "user", "branch A"),
+        msg("4", "2", "user", "branch B"),
+      ];
+
+      const ctxA = buildSessionContext(entries, "3");
+      expect(ctxA.messages).toHaveLength(3);
+      expect((ctxA.messages[2] as any).content).toBe("branch A");
+
+      const ctxB = buildSessionContext(entries, "4");
+      expect(ctxB.messages).toHaveLength(3);
+      expect((ctxB.messages[2] as any).content).toBe("branch B");
+    });
+
+    it("includes branch summary in path", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "start"),
+        msg("2", "1", "assistant", "response"),
+        msg("3", "2", "user", "abandoned path"),
+        branchSummary("4", "2", "Summary of abandoned work", "3"),
+        msg("5", "4", "user", "new direction"),
+      ];
+      const ctx = buildSessionContext(entries, "5");
+
+      expect(ctx.messages).toHaveLength(4);
+      expect((ctx.messages[2] as any).summary).toContain(
+        "Summary of abandoned work",
+      );
+      expect((ctx.messages[3] as any).content).toBe("new direction");
+    });
+
+    it("complex tree with multiple branches and compaction", () => {
+      // Tree:
+      //   1 -> 2 -> 3 -> 4 -> compaction(5) -> 6 -> 7 (main path)
+      //              \-> 8 -> 9 (abandoned branch)
+      //                    \-> branchSummary(10) -> 11 (resumed from 3)
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "start"),
+        msg("2", "1", "assistant", "r1"),
+        msg("3", "2", "user", "q2"),
+        msg("4", "3", "assistant", "r2"),
+        compaction("5", "4", "Compacted history", "3"),
+        msg("6", "5", "user", "q3"),
+        msg("7", "6", "assistant", "r3"),
+        // Abandoned branch from 3
+        msg("8", "3", "user", "wrong path"),
+        msg("9", "8", "assistant", "wrong response"),
+        // Branch summary resuming from 3
+        branchSummary("10", "3", "Tried wrong approach", "9"),
+        msg("11", "10", "user", "better approach"),
+      ];
+
+      // Main path to 7: summary + kept(3,4) + after(6,7)
+      const ctxMain = buildSessionContext(entries, "7");
+      expect(ctxMain.messages).toHaveLength(5);
+      expect((ctxMain.messages[0] as any).summary).toContain(
+        "Compacted history",
+      );
+      expect((ctxMain.messages[1] as any).content).toBe("q2");
+      expect((ctxMain.messages[2] as any).content[0].text).toBe("r2");
+      expect((ctxMain.messages[3] as any).content).toBe("q3");
+      expect((ctxMain.messages[4] as any).content[0].text).toBe("r3");
+
+      // Branch path to 11: 1,2,3 + branch_summary + 11
+      const ctxBranch = buildSessionContext(entries, "11");
+      expect(ctxBranch.messages).toHaveLength(5);
+      expect((ctxBranch.messages[0] as any).content).toBe("start");
+      expect((ctxBranch.messages[1] as any).content[0].text).toBe("r1");
+      expect((ctxBranch.messages[2] as any).content).toBe("q2");
+      expect((ctxBranch.messages[3] as any).summary).toContain(
+        "Tried wrong approach",
+      );
+      expect((ctxBranch.messages[4] as any).content).toBe("better approach");
+    });
+  });
+
+  describe("edge cases", () => {
+    it("uses last entry when leafId not found", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "hello"),
+        msg("2", "1", "assistant", "hi"),
+      ];
+      const ctx = buildSessionContext(entries, "nonexistent");
+      expect(ctx.messages).toHaveLength(2);
+    });
+
+    it("handles orphaned entries gracefully", () => {
+      const entries: SessionEntry[] = [
+        msg("1", null, "user", "hello"),
+        msg("2", "missing", "assistant", "orphan"), // parent doesn't exist
+      ];
+      const ctx = buildSessionContext(entries, "2");
+      // Should only get the orphan since parent chain is broken
+      expect(ctx.messages).toHaveLength(1);
+    });
+  });
+});
--- a/packages/coding-agent/test/session-manager/file-operations.test.ts
+++ b/packages/coding-agent/test/session-manager/file-operations.test.ts
@ -0,0 +1,224 @@
+import { mkdirSync, readFileSync, rmSync, writeFileSync } from "fs";
+import { tmpdir } from "os";
+import { join } from "path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import {
+  findMostRecentSession,
+  loadEntriesFromFile,
+  SessionManager,
+} from "../../src/core/session-manager.js";
+
+describe("loadEntriesFromFile", () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = join(tmpdir(), `session-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  it("returns empty array for non-existent file", () => {
+    const entries = loadEntriesFromFile(join(tempDir, "nonexistent.jsonl"));
+    expect(entries).toEqual([]);
+  });
+
+  it("returns empty array for empty file", () => {
+    const file = join(tempDir, "empty.jsonl");
+    writeFileSync(file, "");
+    expect(loadEntriesFromFile(file)).toEqual([]);
+  });
+
+  it("returns empty array for file without valid session header", () => {
+    const file = join(tempDir, "no-header.jsonl");
+    writeFileSync(file, '{"type":"message","id":"1"}\n');
+    expect(loadEntriesFromFile(file)).toEqual([]);
+  });
+
+  it("returns empty array for malformed JSON", () => {
+    const file = join(tempDir, "malformed.jsonl");
+    writeFileSync(file, "not json\n");
+    expect(loadEntriesFromFile(file)).toEqual([]);
+  });
+
+  it("loads valid session file", () => {
+    const file = join(tempDir, "valid.jsonl");
+    writeFileSync(
+      file,
+      '{"type":"session","id":"abc","timestamp":"2025-01-01T00:00:00Z","cwd":"/tmp"}\n' +
+        '{"type":"message","id":"1","parentId":null,"timestamp":"2025-01-01T00:00:01Z","message":{"role":"user","content":"hi","timestamp":1}}\n',
+    );
+    const entries = loadEntriesFromFile(file);
+    expect(entries).toHaveLength(2);
+    expect(entries[0].type).toBe("session");
+    expect(entries[1].type).toBe("message");
+  });
+
+  it("skips malformed lines but keeps valid ones", () => {
+    const file = join(tempDir, "mixed.jsonl");
+    writeFileSync(
+      file,
+      '{"type":"session","id":"abc","timestamp":"2025-01-01T00:00:00Z","cwd":"/tmp"}\n' +
+        "not valid json\n" +
+        '{"type":"message","id":"1","parentId":null,"timestamp":"2025-01-01T00:00:01Z","message":{"role":"user","content":"hi","timestamp":1}}\n',
+    );
+    const entries = loadEntriesFromFile(file);
+    expect(entries).toHaveLength(2);
+  });
+});
+
+describe("findMostRecentSession", () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = join(tmpdir(), `session-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  it("returns null for empty directory", () => {
+    expect(findMostRecentSession(tempDir)).toBeNull();
+  });
+
+  it("returns null for non-existent directory", () => {
+    expect(findMostRecentSession(join(tempDir, "nonexistent"))).toBeNull();
+  });
+
+  it("ignores non-jsonl files", () => {
+    writeFileSync(join(tempDir, "file.txt"), "hello");
+    writeFileSync(join(tempDir, "file.json"), "{}");
+    expect(findMostRecentSession(tempDir)).toBeNull();
+  });
+
+  it("ignores jsonl files without valid session header", () => {
+    writeFileSync(join(tempDir, "invalid.jsonl"), '{"type":"message"}\n');
+    expect(findMostRecentSession(tempDir)).toBeNull();
+  });
+
+  it("returns single valid session file", () => {
+    const file = join(tempDir, "session.jsonl");
+    writeFileSync(
+      file,
+      '{"type":"session","id":"abc","timestamp":"2025-01-01T00:00:00Z","cwd":"/tmp"}\n',
+    );
+    expect(findMostRecentSession(tempDir)).toBe(file);
+  });
+
+  it("returns most recently modified session", async () => {
+    const file1 = join(tempDir, "older.jsonl");
+    const file2 = join(tempDir, "newer.jsonl");
+
+    writeFileSync(
+      file1,
+      '{"type":"session","id":"old","timestamp":"2025-01-01T00:00:00Z","cwd":"/tmp"}\n',
+    );
+    // Small delay to ensure different mtime
+    await new Promise((r) => setTimeout(r, 10));
+    writeFileSync(
+      file2,
+      '{"type":"session","id":"new","timestamp":"2025-01-01T00:00:00Z","cwd":"/tmp"}\n',
+    );
+
+    expect(findMostRecentSession(tempDir)).toBe(file2);
+  });
+
+  it("skips invalid files and returns valid one", async () => {
+    const invalid = join(tempDir, "invalid.jsonl");
+    const valid = join(tempDir, "valid.jsonl");
+
+    writeFileSync(invalid, '{"type":"not-session"}\n');
+    await new Promise((r) => setTimeout(r, 10));
+    writeFileSync(
+      valid,
+      '{"type":"session","id":"abc","timestamp":"2025-01-01T00:00:00Z","cwd":"/tmp"}\n',
+    );
+
+    expect(findMostRecentSession(tempDir)).toBe(valid);
+  });
+});
+
+describe("SessionManager.setSessionFile with corrupted files", () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = join(tmpdir(), `session-test-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  it("truncates and rewrites empty file with valid header", () => {
+    const emptyFile = join(tempDir, "empty.jsonl");
+    writeFileSync(emptyFile, "");
+
+    const sm = SessionManager.open(emptyFile, tempDir);
+
+    // Should have created a new session with valid header
+    expect(sm.getSessionId()).toBeTruthy();
+    expect(sm.getHeader()).toBeTruthy();
+    expect(sm.getHeader()?.type).toBe("session");
+
+    // File should now contain a valid header
+    const content = readFileSync(emptyFile, "utf-8");
+    const lines = content.trim().split("\n").filter(Boolean);
+    expect(lines.length).toBe(1);
+    const header = JSON.parse(lines[0]);
+    expect(header.type).toBe("session");
+    expect(header.id).toBe(sm.getSessionId());
+  });
+
+  it("truncates and rewrites file without valid header", () => {
+    const noHeaderFile = join(tempDir, "no-header.jsonl");
+    // File with messages but no session header (corrupted state)
+    writeFileSync(
+      noHeaderFile,
+      '{"type":"message","id":"abc","parentId":"orphaned","timestamp":"2025-01-01T00:00:00Z","message":{"role":"assistant","content":"test"}}\n',
+    );
+
+    const sm = SessionManager.open(noHeaderFile, tempDir);
+
+    // Should have created a new session with valid header
+    expect(sm.getSessionId()).toBeTruthy();
+    expect(sm.getHeader()).toBeTruthy();
+    expect(sm.getHeader()?.type).toBe("session");
+
+    // File should now contain only a valid header (old content truncated)
+    const content = readFileSync(noHeaderFile, "utf-8");
+    const lines = content.trim().split("\n").filter(Boolean);
+    expect(lines.length).toBe(1);
+    const header = JSON.parse(lines[0]);
+    expect(header.type).toBe("session");
+    expect(header.id).toBe(sm.getSessionId());
+  });
+
+  it("preserves explicit session file path when recovering from corrupted file", () => {
+    const explicitPath = join(tempDir, "my-session.jsonl");
+    writeFileSync(explicitPath, "");
+
+    const sm = SessionManager.open(explicitPath, tempDir);
+
+    // The session file path should be preserved
+    expect(sm.getSessionFile()).toBe(explicitPath);
+  });
+
+  it("subsequent loads of recovered file work correctly", () => {
+    const corruptedFile = join(tempDir, "corrupted.jsonl");
+    writeFileSync(corruptedFile, "garbage content\n");
+
+    // First open recovers the file
+    const sm1 = SessionManager.open(corruptedFile, tempDir);
+    const sessionId = sm1.getSessionId();
+
+    // Second open should load the recovered file successfully
+    const sm2 = SessionManager.open(corruptedFile, tempDir);
+    expect(sm2.getSessionId()).toBe(sessionId);
+    expect(sm2.getHeader()?.type).toBe("session");
+  });
+});
--- a/packages/coding-agent/test/session-manager/labels.test.ts
+++ b/packages/coding-agent/test/session-manager/labels.test.ts
@ -0,0 +1,217 @@
+import { describe, expect, it } from "vitest";
+import {
+  type LabelEntry,
+  SessionManager,
+} from "../../src/core/session-manager.js";
+
+describe("SessionManager labels", () => {
+  it("sets and gets labels", () => {
+    const session = SessionManager.inMemory();
+
+    const msgId = session.appendMessage({
+      role: "user",
+      content: "hello",
+      timestamp: 1,
+    });
+
+    // No label initially
+    expect(session.getLabel(msgId)).toBeUndefined();
+
+    // Set a label
+    const labelId = session.appendLabelChange(msgId, "checkpoint");
+    expect(session.getLabel(msgId)).toBe("checkpoint");
+
+    // Label entry should be in entries
+    const entries = session.getEntries();
+    const labelEntry = entries.find((e) => e.type === "label") as LabelEntry;
+    expect(labelEntry).toBeDefined();
+    expect(labelEntry.id).toBe(labelId);
+    expect(labelEntry.targetId).toBe(msgId);
+    expect(labelEntry.label).toBe("checkpoint");
+  });
+
+  it("clears labels with undefined", () => {
+    const session = SessionManager.inMemory();
+
+    const msgId = session.appendMessage({
+      role: "user",
+      content: "hello",
+      timestamp: 1,
+    });
+
+    session.appendLabelChange(msgId, "checkpoint");
+    expect(session.getLabel(msgId)).toBe("checkpoint");
+
+    // Clear the label
+    session.appendLabelChange(msgId, undefined);
+    expect(session.getLabel(msgId)).toBeUndefined();
+  });
+
+  it("last label wins", () => {
+    const session = SessionManager.inMemory();
+
+    const msgId = session.appendMessage({
+      role: "user",
+      content: "hello",
+      timestamp: 1,
+    });
+
+    session.appendLabelChange(msgId, "first");
+    session.appendLabelChange(msgId, "second");
+    session.appendLabelChange(msgId, "third");
+
+    expect(session.getLabel(msgId)).toBe("third");
+  });
+
+  it("labels are included in tree nodes", () => {
+    const session = SessionManager.inMemory();
+
+    const msg1Id = session.appendMessage({
+      role: "user",
+      content: "hello",
+      timestamp: 1,
+    });
+    const msg2Id = session.appendMessage({
+      role: "assistant",
+      content: [{ type: "text", text: "hi" }],
+      api: "anthropic-messages",
+      provider: "anthropic",
+      model: "test",
+      usage: {
+        input: 1,
+        output: 1,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 2,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: 2,
+    });
+
+    session.appendLabelChange(msg1Id, "start");
+    session.appendLabelChange(msg2Id, "response");
+
+    const tree = session.getTree();
+
+    // Find the message nodes (skip label entries)
+    const msg1Node = tree.find((n) => n.entry.id === msg1Id);
+    expect(msg1Node?.label).toBe("start");
+
+    // msg2 is a child of msg1
+    const msg2Node = msg1Node?.children.find((n) => n.entry.id === msg2Id);
+    expect(msg2Node?.label).toBe("response");
+  });
+
+  it("labels are preserved in createBranchedSession", () => {
+    const session = SessionManager.inMemory();
+
+    const msg1Id = session.appendMessage({
+      role: "user",
+      content: "hello",
+      timestamp: 1,
+    });
+    const msg2Id = session.appendMessage({
+      role: "assistant",
+      content: [{ type: "text", text: "hi" }],
+      api: "anthropic-messages",
+      provider: "anthropic",
+      model: "test",
+      usage: {
+        input: 1,
+        output: 1,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 2,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: 2,
+    });
+
+    session.appendLabelChange(msg1Id, "important");
+    session.appendLabelChange(msg2Id, "also-important");
+
+    // Branch from msg2 (in-memory mode returns null, but updates internal state)
+    session.createBranchedSession(msg2Id);
+
+    // Labels should be preserved
+    expect(session.getLabel(msg1Id)).toBe("important");
+    expect(session.getLabel(msg2Id)).toBe("also-important");
+
+    // New label entries should exist
+    const entries = session.getEntries();
+    const labelEntries = entries.filter(
+      (e) => e.type === "label",
+    ) as LabelEntry[];
+    expect(labelEntries).toHaveLength(2);
+  });
+
+  it("labels not on path are not preserved in createBranchedSession", () => {
+    const session = SessionManager.inMemory();
+
+    const msg1Id = session.appendMessage({
+      role: "user",
+      content: "hello",
+      timestamp: 1,
+    });
+    const msg2Id = session.appendMessage({
+      role: "assistant",
+      content: [{ type: "text", text: "hi" }],
+      api: "anthropic-messages",
+      provider: "anthropic",
+      model: "test",
+      usage: {
+        input: 1,
+        output: 1,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 2,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: 2,
+    });
+    const msg3Id = session.appendMessage({
+      role: "user",
+      content: "followup",
+      timestamp: 3,
+    });
+
+    // Label all messages
+    session.appendLabelChange(msg1Id, "first");
+    session.appendLabelChange(msg2Id, "second");
+    session.appendLabelChange(msg3Id, "third");
+
+    // Branch from msg2 (excludes msg3)
+    session.createBranchedSession(msg2Id);
+
+    // Only labels for msg1 and msg2 should be preserved
+    expect(session.getLabel(msg1Id)).toBe("first");
+    expect(session.getLabel(msg2Id)).toBe("second");
+    expect(session.getLabel(msg3Id)).toBeUndefined();
+  });
+
+  it("labels are not included in buildSessionContext", () => {
+    const session = SessionManager.inMemory();
+
+    const msgId = session.appendMessage({
+      role: "user",
+      content: "hello",
+      timestamp: 1,
+    });
+    session.appendLabelChange(msgId, "checkpoint");
+
+    const ctx = session.buildSessionContext();
+    expect(ctx.messages).toHaveLength(1);
+    expect(ctx.messages[0].role).toBe("user");
+  });
+
+  it("throws when labeling non-existent entry", () => {
+    const session = SessionManager.inMemory();
+
+    expect(() => session.appendLabelChange("non-existent", "label")).toThrow(
+      "Entry non-existent not found",
+    );
+  });
+});
--- a/packages/coding-agent/test/session-manager/migration.test.ts
+++ b/packages/coding-agent/test/session-manager/migration.test.ts
@ -0,0 +1,96 @@
+import { describe, expect, it } from "vitest";
+import {
+  type FileEntry,
+  migrateSessionEntries,
+} from "../../src/core/session-manager.js";
+
+describe("migrateSessionEntries", () => {
+  it("should add id/parentId to v1 entries", () => {
+    const entries: FileEntry[] = [
+      {
+        type: "session",
+        id: "sess-1",
+        timestamp: "2025-01-01T00:00:00Z",
+        cwd: "/tmp",
+      },
+      {
+        type: "message",
+        timestamp: "2025-01-01T00:00:01Z",
+        message: { role: "user", content: "hi", timestamp: 1 },
+      },
+      {
+        type: "message",
+        timestamp: "2025-01-01T00:00:02Z",
+        message: {
+          role: "assistant",
+          content: [{ type: "text", text: "hello" }],
+          api: "test",
+          provider: "test",
+          model: "test",
+          usage: { input: 1, output: 1, cacheRead: 0, cacheWrite: 0 },
+          stopReason: "stop",
+          timestamp: 2,
+        },
+      },
+    ] as FileEntry[];
+
+    migrateSessionEntries(entries);
+
+    // Header should have version set (v3 is current after hookMessage->custom migration)
+    expect((entries[0] as any).version).toBe(3);
+
+    // Entries should have id/parentId
+    const msg1 = entries[1] as any;
+    const msg2 = entries[2] as any;
+
+    expect(msg1.id).toBeDefined();
+    expect(msg1.id.length).toBe(8);
+    expect(msg1.parentId).toBeNull();
+
+    expect(msg2.id).toBeDefined();
+    expect(msg2.id.length).toBe(8);
+    expect(msg2.parentId).toBe(msg1.id);
+  });
+
+  it("should be idempotent (skip already migrated)", () => {
+    const entries: FileEntry[] = [
+      {
+        type: "session",
+        id: "sess-1",
+        version: 2,
+        timestamp: "2025-01-01T00:00:00Z",
+        cwd: "/tmp",
+      },
+      {
+        type: "message",
+        id: "abc12345",
+        parentId: null,
+        timestamp: "2025-01-01T00:00:01Z",
+        message: { role: "user", content: "hi", timestamp: 1 },
+      },
+      {
+        type: "message",
+        id: "def67890",
+        parentId: "abc12345",
+        timestamp: "2025-01-01T00:00:02Z",
+        message: {
+          role: "assistant",
+          content: [{ type: "text", text: "hello" }],
+          api: "test",
+          provider: "test",
+          model: "test",
+          usage: { input: 1, output: 1, cacheRead: 0, cacheWrite: 0 },
+          stopReason: "stop",
+          timestamp: 2,
+        },
+      },
+    ] as FileEntry[];
+
+    migrateSessionEntries(entries);
+
+    // IDs should be unchanged
+    expect((entries[1] as any).id).toBe("abc12345");
+    expect((entries[2] as any).id).toBe("def67890");
+    expect((entries[2] as any).parentId).toBe("abc12345");
+  });
+});
--- a/packages/coding-agent/test/session-manager/save-entry.test.ts
+++ b/packages/coding-agent/test/session-manager/save-entry.test.ts
@ -0,0 +1,62 @@
+import { describe, expect, it } from "vitest";
+import {
+  type CustomEntry,
+  SessionManager,
+} from "../../src/core/session-manager.js";
+
+describe("SessionManager.saveCustomEntry", () => {
+  it("saves custom entries and includes them in tree traversal", () => {
+    const session = SessionManager.inMemory();
+
+    // Save a message
+    const msgId = session.appendMessage({
+      role: "user",
+      content: "hello",
+      timestamp: 1,
+    });
+
+    // Save a custom entry
+    const customId = session.appendCustomEntry("my_data", { foo: "bar" });
+
+    // Save another message
+    const msg2Id = session.appendMessage({
+      role: "assistant",
+      content: [{ type: "text", text: "hi" }],
+      api: "anthropic-messages",
+      provider: "anthropic",
+      model: "test",
+      usage: {
+        input: 1,
+        output: 1,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 2,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: 2,
+    });
+
+    // Custom entry should be in entries
+    const entries = session.getEntries();
+    expect(entries).toHaveLength(3);
+
+    const customEntry = entries.find((e) => e.type === "custom") as CustomEntry;
+    expect(customEntry).toBeDefined();
+    expect(customEntry.customType).toBe("my_data");
+    expect(customEntry.data).toEqual({ foo: "bar" });
+    expect(customEntry.id).toBe(customId);
+    expect(customEntry.parentId).toBe(msgId);
+
+    // Tree structure should be correct
+    const path = session.getBranch();
+    expect(path).toHaveLength(3);
+    expect(path[0].id).toBe(msgId);
+    expect(path[1].id).toBe(customId);
+    expect(path[2].id).toBe(msg2Id);
+
+    // buildSessionContext should work (custom entries skipped in messages)
+    const ctx = session.buildSessionContext();
+    expect(ctx.messages).toHaveLength(2); // only message entries
+  });
+});
--- a/packages/coding-agent/test/session-manager/tree-traversal.test.ts
+++ b/packages/coding-agent/test/session-manager/tree-traversal.test.ts
@ -0,0 +1,549 @@
+import { existsSync, mkdirSync, readFileSync, rmSync } from "fs";
+import { tmpdir } from "os";
+import { join } from "path";
+import { describe, expect, it } from "vitest";
+import {
+  type CustomEntry,
+  SessionManager,
+} from "../../src/core/session-manager.js";
+import { assistantMsg, userMsg } from "../utilities.js";
+
+describe("SessionManager append and tree traversal", () => {
+  describe("append operations", () => {
+    it("appendMessage creates entry with correct parentId chain", () => {
+      const session = SessionManager.inMemory();
+
+      const id1 = session.appendMessage(userMsg("first"));
+      const id2 = session.appendMessage(assistantMsg("second"));
+      const id3 = session.appendMessage(userMsg("third"));
+
+      const entries = session.getEntries();
+      expect(entries).toHaveLength(3);
+
+      expect(entries[0].id).toBe(id1);
+      expect(entries[0].parentId).toBeNull();
+      expect(entries[0].type).toBe("message");
+
+      expect(entries[1].id).toBe(id2);
+      expect(entries[1].parentId).toBe(id1);
+
+      expect(entries[2].id).toBe(id3);
+      expect(entries[2].parentId).toBe(id2);
+    });
+
+    it("appendThinkingLevelChange integrates into tree", () => {
+      const session = SessionManager.inMemory();
+
+      const msgId = session.appendMessage(userMsg("hello"));
+      const thinkingId = session.appendThinkingLevelChange("high");
+      const _msg2Id = session.appendMessage(assistantMsg("response"));
+
+      const entries = session.getEntries();
+      expect(entries).toHaveLength(3);
+
+      const thinkingEntry = entries.find(
+        (e) => e.type === "thinking_level_change",
+      );
+      expect(thinkingEntry).toBeDefined();
+      expect(thinkingEntry!.id).toBe(thinkingId);
+      expect(thinkingEntry!.parentId).toBe(msgId);
+
+      expect(entries[2].parentId).toBe(thinkingId);
+    });
+
+    it("appendModelChange integrates into tree", () => {
+      const session = SessionManager.inMemory();
+
+      const msgId = session.appendMessage(userMsg("hello"));
+      const modelId = session.appendModelChange("openai", "gpt-4");
+      const _msg2Id = session.appendMessage(assistantMsg("response"));
+
+      const entries = session.getEntries();
+      const modelEntry = entries.find((e) => e.type === "model_change");
+      expect(modelEntry).toBeDefined();
+      expect(modelEntry?.id).toBe(modelId);
+      expect(modelEntry?.parentId).toBe(msgId);
+      if (modelEntry?.type === "model_change") {
+        expect(modelEntry.provider).toBe("openai");
+        expect(modelEntry.modelId).toBe("gpt-4");
+      }
+
+      expect(entries[2].parentId).toBe(modelId);
+    });
+
+    it("appendCompaction integrates into tree", () => {
+      const session = SessionManager.inMemory();
+
+      const id1 = session.appendMessage(userMsg("1"));
+      const id2 = session.appendMessage(assistantMsg("2"));
+      const compactionId = session.appendCompaction("summary", id1, 1000);
+      const _id3 = session.appendMessage(userMsg("3"));
+
+      const entries = session.getEntries();
+      const compactionEntry = entries.find((e) => e.type === "compaction");
+      expect(compactionEntry).toBeDefined();
+      expect(compactionEntry?.id).toBe(compactionId);
+      expect(compactionEntry?.parentId).toBe(id2);
+      if (compactionEntry?.type === "compaction") {
+        expect(compactionEntry.summary).toBe("summary");
+        expect(compactionEntry.firstKeptEntryId).toBe(id1);
+        expect(compactionEntry.tokensBefore).toBe(1000);
+      }
+
+      expect(entries[3].parentId).toBe(compactionId);
+    });
+
+    it("appendCustomEntry integrates into tree", () => {
+      const session = SessionManager.inMemory();
+
+      const msgId = session.appendMessage(userMsg("hello"));
+      const customId = session.appendCustomEntry("my_data", { key: "value" });
+      const _msg2Id = session.appendMessage(assistantMsg("response"));
+
+      const entries = session.getEntries();
+      const customEntry = entries.find(
+        (e) => e.type === "custom",
+      ) as CustomEntry;
+      expect(customEntry).toBeDefined();
+      expect(customEntry.id).toBe(customId);
+      expect(customEntry.parentId).toBe(msgId);
+      expect(customEntry.customType).toBe("my_data");
+      expect(customEntry.data).toEqual({ key: "value" });
+
+      expect(entries[2].parentId).toBe(customId);
+    });
+
+    it("leaf pointer advances after each append", () => {
+      const session = SessionManager.inMemory();
+
+      expect(session.getLeafId()).toBeNull();
+
+      const id1 = session.appendMessage(userMsg("1"));
+      expect(session.getLeafId()).toBe(id1);
+
+      const id2 = session.appendMessage(assistantMsg("2"));
+      expect(session.getLeafId()).toBe(id2);
+
+      const id3 = session.appendThinkingLevelChange("high");
+      expect(session.getLeafId()).toBe(id3);
+    });
+  });
+
+  describe("getPath", () => {
+    it("returns empty array for empty session", () => {
+      const session = SessionManager.inMemory();
+      expect(session.getBranch()).toEqual([]);
+    });
+
+    it("returns single entry path", () => {
+      const session = SessionManager.inMemory();
+      const id = session.appendMessage(userMsg("hello"));
+
+      const path = session.getBranch();
+      expect(path).toHaveLength(1);
+      expect(path[0].id).toBe(id);
+    });
+
+    it("returns full path from root to leaf", () => {
+      const session = SessionManager.inMemory();
+
+      const id1 = session.appendMessage(userMsg("1"));
+      const id2 = session.appendMessage(assistantMsg("2"));
+      const id3 = session.appendThinkingLevelChange("high");
+      const id4 = session.appendMessage(userMsg("3"));
+
+      const path = session.getBranch();
+      expect(path).toHaveLength(4);
+      expect(path.map((e) => e.id)).toEqual([id1, id2, id3, id4]);
+    });
+
+    it("returns path from specified entry to root", () => {
+      const session = SessionManager.inMemory();
+
+      const id1 = session.appendMessage(userMsg("1"));
+      const id2 = session.appendMessage(assistantMsg("2"));
+      const _id3 = session.appendMessage(userMsg("3"));
+      const _id4 = session.appendMessage(assistantMsg("4"));
+
+      const path = session.getBranch(id2);
+      expect(path).toHaveLength(2);
+      expect(path.map((e) => e.id)).toEqual([id1, id2]);
+    });
+  });
+
+  describe("getTree", () => {
+    it("returns empty array for empty session", () => {
+      const session = SessionManager.inMemory();
+      expect(session.getTree()).toEqual([]);
+    });
+
+    it("returns single root for linear session", () => {
+      const session = SessionManager.inMemory();
+
+      const id1 = session.appendMessage(userMsg("1"));
+      const id2 = session.appendMessage(assistantMsg("2"));
+      const id3 = session.appendMessage(userMsg("3"));
+
+      const tree = session.getTree();
+      expect(tree).toHaveLength(1);
+
+      const root = tree[0];
+      expect(root.entry.id).toBe(id1);
+      expect(root.children).toHaveLength(1);
+      expect(root.children[0].entry.id).toBe(id2);
+      expect(root.children[0].children).toHaveLength(1);
+      expect(root.children[0].children[0].entry.id).toBe(id3);
+      expect(root.children[0].children[0].children).toHaveLength(0);
+    });
+
+    it("returns tree with branches after branch", () => {
+      const session = SessionManager.inMemory();
+
+      // Build: 1 -> 2 -> 3
+      const id1 = session.appendMessage(userMsg("1"));
+      const id2 = session.appendMessage(assistantMsg("2"));
+      const id3 = session.appendMessage(userMsg("3"));
+
+      // Branch from id2, add new path: 2 -> 4
+      session.branch(id2);
+      const id4 = session.appendMessage(userMsg("4-branch"));
+
+      const tree = session.getTree();
+      expect(tree).toHaveLength(1);
+
+      const root = tree[0];
+      expect(root.entry.id).toBe(id1);
+      expect(root.children).toHaveLength(1);
+
+      const node2 = root.children[0];
+      expect(node2.entry.id).toBe(id2);
+      expect(node2.children).toHaveLength(2); // id3 and id4 are siblings
+
+      const childIds = node2.children.map((c) => c.entry.id).sort();
+      expect(childIds).toEqual([id3, id4].sort());
+    });
+
+    it("handles multiple branches at same point", () => {
+      const session = SessionManager.inMemory();
+
+      const _id1 = session.appendMessage(userMsg("root"));
+      const id2 = session.appendMessage(assistantMsg("response"));
+
+      // Branch A
+      session.branch(id2);
+      const idA = session.appendMessage(userMsg("branch-A"));
+
+      // Branch B
+      session.branch(id2);
+      const idB = session.appendMessage(userMsg("branch-B"));
+
+      // Branch C
+      session.branch(id2);
+      const idC = session.appendMessage(userMsg("branch-C"));
+
+      const tree = session.getTree();
+      const node2 = tree[0].children[0];
+      expect(node2.entry.id).toBe(id2);
+      expect(node2.children).toHaveLength(3);
+
+      const branchIds = node2.children.map((c) => c.entry.id).sort();
+      expect(branchIds).toEqual([idA, idB, idC].sort());
+    });
+
+    it("handles deep branching", () => {
+      const session = SessionManager.inMemory();
+
+      // Main path: 1 -> 2 -> 3 -> 4
+      const _id1 = session.appendMessage(userMsg("1"));
+      const id2 = session.appendMessage(assistantMsg("2"));
+      const id3 = session.appendMessage(userMsg("3"));
+      const _id4 = session.appendMessage(assistantMsg("4"));
+
+      // Branch from 2: 2 -> 5 -> 6
+      session.branch(id2);
+      const id5 = session.appendMessage(userMsg("5"));
+      const _id6 = session.appendMessage(assistantMsg("6"));
+
+      // Branch from 5: 5 -> 7
+      session.branch(id5);
+      const _id7 = session.appendMessage(userMsg("7"));
+
+      const tree = session.getTree();
+
+      // Verify structure
+      const node2 = tree[0].children[0];
+      expect(node2.children).toHaveLength(2); // id3 and id5
+
+      const node5 = node2.children.find((c) => c.entry.id === id5)!;
+      expect(node5.children).toHaveLength(2); // id6 and id7
+
+      const node3 = node2.children.find((c) => c.entry.id === id3)!;
+      expect(node3.children).toHaveLength(1); // id4
+    });
+  });
+
+  describe("branch", () => {
+    it("moves leaf pointer to specified entry", () => {
+      const session = SessionManager.inMemory();
+
+      const id1 = session.appendMessage(userMsg("1"));
+      const _id2 = session.appendMessage(assistantMsg("2"));
+      const id3 = session.appendMessage(userMsg("3"));
+
+      expect(session.getLeafId()).toBe(id3);
+
+      session.branch(id1);
+      expect(session.getLeafId()).toBe(id1);
+    });
+
+    it("throws for non-existent entry", () => {
+      const session = SessionManager.inMemory();
+      session.appendMessage(userMsg("hello"));
+
+      expect(() => session.branch("nonexistent")).toThrow(
+        "Entry nonexistent not found",
+      );
+    });
+
+    it("new appends become children of branch point", () => {
+      const session = SessionManager.inMemory();
+
+      const id1 = session.appendMessage(userMsg("1"));
+      const _id2 = session.appendMessage(assistantMsg("2"));
+
+      session.branch(id1);
+      const id3 = session.appendMessage(userMsg("branched"));
+
+      const entries = session.getEntries();
+      const branchedEntry = entries.find((e) => e.id === id3)!;
+      expect(branchedEntry.parentId).toBe(id1); // sibling of id2
+    });
+  });
+
+  describe("branchWithSummary", () => {
+    it("inserts branch summary and advances leaf", () => {
+      const session = SessionManager.inMemory();
+
+      const id1 = session.appendMessage(userMsg("1"));
+      const _id2 = session.appendMessage(assistantMsg("2"));
+      const _id3 = session.appendMessage(userMsg("3"));
+
+      const summaryId = session.branchWithSummary(
+        id1,
+        "Summary of abandoned work",
+      );
+
+      expect(session.getLeafId()).toBe(summaryId);
+
+      const entries = session.getEntries();
+      const summaryEntry = entries.find((e) => e.type === "branch_summary");
+      expect(summaryEntry).toBeDefined();
+      expect(summaryEntry?.parentId).toBe(id1);
+      if (summaryEntry?.type === "branch_summary") {
+        expect(summaryEntry.summary).toBe("Summary of abandoned work");
+      }
+    });
+
+    it("throws for non-existent entry", () => {
+      const session = SessionManager.inMemory();
+      session.appendMessage(userMsg("hello"));
+
+      expect(() => session.branchWithSummary("nonexistent", "summary")).toThrow(
+        "Entry nonexistent not found",
+      );
+    });
+  });
+
+  describe("getLeafEntry", () => {
+    it("returns undefined for empty session", () => {
+      const session = SessionManager.inMemory();
+      expect(session.getLeafEntry()).toBeUndefined();
+    });
+
+    it("returns current leaf entry", () => {
+      const session = SessionManager.inMemory();
+
+      session.appendMessage(userMsg("1"));
+      const id2 = session.appendMessage(assistantMsg("2"));
+
+      const leaf = session.getLeafEntry();
+      expect(leaf).toBeDefined();
+      expect(leaf!.id).toBe(id2);
+    });
+  });
+
+  describe("getEntry", () => {
+    it("returns undefined for non-existent id", () => {
+      const session = SessionManager.inMemory();
+      expect(session.getEntry("nonexistent")).toBeUndefined();
+    });
+
+    it("returns entry by id", () => {
+      const session = SessionManager.inMemory();
+
+      const id1 = session.appendMessage(userMsg("first"));
+      const id2 = session.appendMessage(assistantMsg("second"));
+
+      const entry1 = session.getEntry(id1);
+      expect(entry1).toBeDefined();
+      expect(entry1?.type).toBe("message");
+      if (entry1?.type === "message" && entry1.message.role === "user") {
+        expect(entry1.message.content).toBe("first");
+      }
+
+      const entry2 = session.getEntry(id2);
+      expect(entry2).toBeDefined();
+      if (entry2?.type === "message" && entry2.message.role === "assistant") {
+        expect((entry2.message.content as any)[0].text).toBe("second");
+      }
+    });
+  });
+
+  describe("buildSessionContext with branches", () => {
+    it("returns messages from current branch only", () => {
+      const session = SessionManager.inMemory();
+
+      // Main: 1 -> 2 -> 3
+      session.appendMessage(userMsg("msg1"));
+      const id2 = session.appendMessage(assistantMsg("msg2"));
+      session.appendMessage(userMsg("msg3"));
+
+      // Branch from 2: 2 -> 4
+      session.branch(id2);
+      session.appendMessage(assistantMsg("msg4-branch"));
+
+      const ctx = session.buildSessionContext();
+      expect(ctx.messages).toHaveLength(3); // msg1, msg2, msg4-branch (not msg3)
+
+      expect((ctx.messages[0] as any).content).toBe("msg1");
+      expect((ctx.messages[1] as any).content[0].text).toBe("msg2");
+      expect((ctx.messages[2] as any).content[0].text).toBe("msg4-branch");
+    });
+  });
+});
+
+describe("createBranchedSession", () => {
+  it("throws for non-existent entry", () => {
+    const session = SessionManager.inMemory();
+    session.appendMessage(userMsg("hello"));
+
+    expect(() => session.createBranchedSession("nonexistent")).toThrow(
+      "Entry nonexistent not found",
+    );
+  });
+
+  it("creates new session with path to specified leaf (in-memory)", () => {
+    const session = SessionManager.inMemory();
+
+    // Build: 1 -> 2 -> 3 -> 4
+    const id1 = session.appendMessage(userMsg("1"));
+    const id2 = session.appendMessage(assistantMsg("2"));
+    const id3 = session.appendMessage(userMsg("3"));
+    session.appendMessage(assistantMsg("4"));
+
+    // Branch from 3: 3 -> 5
+    session.branch(id3);
+    const _id5 = session.appendMessage(userMsg("5"));
+
+    // Create branched session from id2 (should only have 1 -> 2)
+    const result = session.createBranchedSession(id2);
+    expect(result).toBeUndefined(); // in-memory returns null
+
+    // Session should now only have entries 1 and 2
+    const entries = session.getEntries();
+    expect(entries).toHaveLength(2);
+    expect(entries[0].id).toBe(id1);
+    expect(entries[1].id).toBe(id2);
+  });
+
+  it("extracts correct path from branched tree", () => {
+    const session = SessionManager.inMemory();
+
+    // Build: 1 -> 2 -> 3
+    const id1 = session.appendMessage(userMsg("1"));
+    const id2 = session.appendMessage(assistantMsg("2"));
+    session.appendMessage(userMsg("3"));
+
+    // Branch from 2: 2 -> 4 -> 5
+    session.branch(id2);
+    const id4 = session.appendMessage(userMsg("4"));
+    const id5 = session.appendMessage(assistantMsg("5"));
+
+    // Create branched session from id5 (should have 1 -> 2 -> 4 -> 5)
+    session.createBranchedSession(id5);
+
+    const entries = session.getEntries();
+    expect(entries).toHaveLength(4);
+    expect(entries.map((e) => e.id)).toEqual([id1, id2, id4, id5]);
+  });
+
+  it("does not duplicate entries when forking from first user message", () => {
+    const tempDir = join(tmpdir(), `session-fork-dedup-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      // Create a persisted session with a couple of turns
+      const session = SessionManager.create(tempDir, tempDir);
+      const id1 = session.appendMessage(userMsg("first question"));
+      session.appendMessage(assistantMsg("first answer"));
+      session.appendMessage(userMsg("second question"));
+      session.appendMessage(assistantMsg("second answer"));
+
+      // Fork from the very first user message (no assistant in the branched path)
+      const newFile = session.createBranchedSession(id1);
+      expect(newFile).toBeDefined();
+
+      // The branched path has no assistant, so the file should not exist yet
+      // (deferred to _persist on first assistant, matching newSession() contract)
+      expect(existsSync(newFile!)).toBe(false);
+
+      // Simulate extension adding entry before assistant (like preset on turn_start)
+      session.appendCustomEntry("preset-state", { name: "plan" });
+
+      // Now the assistant responds
+      session.appendMessage(assistantMsg("new answer"));
+
+      // File should now exist with exactly one header and no duplicate IDs
+      expect(existsSync(newFile!)).toBe(true);
+      const content = readFileSync(newFile!, "utf-8");
+      const lines = content.trim().split("\n").filter(Boolean);
+      const records = lines.map((line) => JSON.parse(line));
+
+      expect(records.filter((r) => r.type === "session")).toHaveLength(1);
+
+      const entryIds = records
+        .filter((r) => r.type !== "session")
+        .map((r) => r.id)
+        .filter((id): id is string => typeof id === "string");
+      expect(new Set(entryIds).size).toBe(entryIds.length);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+
+  it("writes file immediately when forking from a point with assistant messages", () => {
+    const tempDir = join(tmpdir(), `session-fork-with-assistant-${Date.now()}`);
+    mkdirSync(tempDir, { recursive: true });
+
+    try {
+      const session = SessionManager.create(tempDir, tempDir);
+      session.appendMessage(userMsg("first question"));
+      const id2 = session.appendMessage(assistantMsg("first answer"));
+      session.appendMessage(userMsg("second question"));
+      session.appendMessage(assistantMsg("second answer"));
+
+      // Fork including the assistant message
+      const newFile = session.createBranchedSession(id2);
+      expect(newFile).toBeDefined();
+
+      // Path includes an assistant, so file should be written immediately
+      expect(existsSync(newFile!)).toBe(true);
+      const content = readFileSync(newFile!, "utf-8");
+      const lines = content.trim().split("\n").filter(Boolean);
+      const records = lines.map((line) => JSON.parse(line));
+      expect(records.filter((r) => r.type === "session")).toHaveLength(1);
+    } finally {
+      rmSync(tempDir, { recursive: true, force: true });
+    }
+  });
+});
--- a/packages/coding-agent/test/session-selector-path-delete.test.ts
+++ b/packages/coding-agent/test/session-selector-path-delete.test.ts
@ -0,0 +1,207 @@
+import {
+  DEFAULT_EDITOR_KEYBINDINGS,
+  EditorKeybindingsManager,
+  setEditorKeybindings,
+} from "@mariozechner/pi-tui";
+import { beforeAll, beforeEach, describe, expect, it } from "vitest";
+import { KeybindingsManager } from "../src/core/keybindings.js";
+import type { SessionInfo } from "../src/core/session-manager.js";
+import { SessionSelectorComponent } from "../src/modes/interactive/components/session-selector.js";
+import { initTheme } from "../src/modes/interactive/theme/theme.js";
+
+type Deferred<T> = {
+  promise: Promise<T>;
+  resolve: (value: T) => void;
+  reject: (err: unknown) => void;
+};
+
+function createDeferred<T>(): Deferred<T> {
+  let resolve: (value: T) => void = () => {};
+  let reject: (err: unknown) => void = () => {};
+  const promise = new Promise<T>((res, rej) => {
+    resolve = res;
+    reject = rej;
+  });
+  return { promise, resolve, reject };
+}
+
+async function flushPromises(): Promise<void> {
+  await new Promise<void>((resolve) => {
+    setImmediate(resolve);
+  });
+}
+
+function makeSession(
+  overrides: Partial<SessionInfo> & { id: string },
+): SessionInfo {
+  return {
+    path: overrides.path ?? `/tmp/${overrides.id}.jsonl`,
+    id: overrides.id,
+    cwd: overrides.cwd ?? "",
+    name: overrides.name,
+    created: overrides.created ?? new Date(0),
+    modified: overrides.modified ?? new Date(0),
+    messageCount: overrides.messageCount ?? 1,
+    firstMessage: overrides.firstMessage ?? "hello",
+    allMessagesText: overrides.allMessagesText ?? "hello",
+  };
+}
+
+const CTRL_D = "\x04";
+const CTRL_BACKSPACE = "\x1b[127;5u";
+
+describe("session selector path/delete interactions", () => {
+  const keybindings = KeybindingsManager.inMemory();
+
+  beforeEach(() => {
+    // Ensure test isolation: editor keybindings are a global singleton
+    setEditorKeybindings(
+      new EditorKeybindingsManager(DEFAULT_EDITOR_KEYBINDINGS),
+    );
+  });
+
+  beforeAll(() => {
+    // session selector uses the global theme instance
+    initTheme("dark");
+  });
+  it("does not treat Ctrl+Backspace as delete when search query is non-empty", async () => {
+    const sessions = [makeSession({ id: "a" }), makeSession({ id: "b" })];
+
+    const selector = new SessionSelectorComponent(
+      async () => sessions,
+      async () => [],
+      () => {},
+      () => {},
+      () => {},
+      () => {},
+      { keybindings },
+    );
+    await flushPromises();
+
+    const list = selector.getSessionList();
+    const confirmationChanges: Array<string | null> = [];
+    list.onDeleteConfirmationChange = (path) => confirmationChanges.push(path);
+
+    list.handleInput("a");
+    list.handleInput(CTRL_BACKSPACE);
+
+    expect(confirmationChanges).toEqual([]);
+  });
+
+  it("enters confirmation mode on Ctrl+D even with a non-empty search query", async () => {
+    const sessions = [makeSession({ id: "a" }), makeSession({ id: "b" })];
+
+    const selector = new SessionSelectorComponent(
+      async () => sessions,
+      async () => [],
+      () => {},
+      () => {},
+      () => {},
+      () => {},
+      { keybindings },
+    );
+    await flushPromises();
+
+    const list = selector.getSessionList();
+    const confirmationChanges: Array<string | null> = [];
+    list.onDeleteConfirmationChange = (path) => confirmationChanges.push(path);
+
+    list.handleInput("a");
+    list.handleInput(CTRL_D);
+
+    expect(confirmationChanges).toEqual([sessions[0]!.path]);
+  });
+
+  it("enters confirmation mode on Ctrl+Backspace when search query is empty", async () => {
+    const sessions = [makeSession({ id: "a" }), makeSession({ id: "b" })];
+
+    const selector = new SessionSelectorComponent(
+      async () => sessions,
+      async () => [],
+      () => {},
+      () => {},
+      () => {},
+      () => {},
+      { keybindings },
+    );
+    await flushPromises();
+
+    const list = selector.getSessionList();
+    const confirmationChanges: Array<string | null> = [];
+    list.onDeleteConfirmationChange = (path) => confirmationChanges.push(path);
+
+    let deletedPath: string | null = null;
+    list.onDeleteSession = async (sessionPath) => {
+      deletedPath = sessionPath;
+    };
+
+    list.handleInput(CTRL_BACKSPACE);
+    expect(confirmationChanges).toEqual([sessions[0]!.path]);
+
+    list.handleInput("\r");
+    expect(confirmationChanges).toEqual([sessions[0]!.path, null]);
+    expect(deletedPath).toBe(sessions[0]!.path);
+  });
+
+  it("does not switch scope back to All when All load resolves after toggling back to Current", async () => {
+    const currentSessions = [makeSession({ id: "current" })];
+    const allDeferred = createDeferred<SessionInfo[]>();
+    let allLoadCalls = 0;
+
+    const selector = new SessionSelectorComponent(
+      async () => currentSessions,
+      async () => {
+        allLoadCalls++;
+        return allDeferred.promise;
+      },
+      () => {},
+      () => {},
+      () => {},
+      () => {},
+      { keybindings },
+    );
+    await flushPromises();
+
+    const list = selector.getSessionList();
+    list.handleInput("\t"); // current -> all (starts async load)
+    list.handleInput("\t"); // all -> current
+
+    allDeferred.resolve([makeSession({ id: "all" })]);
+    await flushPromises();
+
+    expect(allLoadCalls).toBe(1);
+    const output = selector.render(120).join("\n");
+    expect(output).toContain("Resume Session (Current Folder)");
+    expect(output).not.toContain("Resume Session (All)");
+  });
+
+  it("does not start redundant All loads when toggling scopes while All is already loading", async () => {
+    const currentSessions = [makeSession({ id: "current" })];
+    const allDeferred = createDeferred<SessionInfo[]>();
+    let allLoadCalls = 0;
+
+    const selector = new SessionSelectorComponent(
+      async () => currentSessions,
+      async () => {
+        allLoadCalls++;
+        return allDeferred.promise;
+      },
+      () => {},
+      () => {},
+      () => {},
+      () => {},
+      { keybindings },
+    );
+    await flushPromises();
+
+    const list = selector.getSessionList();
+    list.handleInput("\t"); // current -> all (starts async load)
+    list.handleInput("\t"); // all -> current
+    list.handleInput("\t"); // current -> all again while load pending
+
+    expect(allLoadCalls).toBe(1);
+
+    allDeferred.resolve([makeSession({ id: "all" })]);
+    await flushPromises();
+  });
+});
--- a/packages/coding-agent/test/session-selector-rename.test.ts
+++ b/packages/coding-agent/test/session-selector-rename.test.ts
@ -0,0 +1,103 @@
+import { beforeAll, describe, expect, it, vi } from "vitest";
+import type { SessionInfo } from "../src/core/session-manager.js";
+import { SessionSelectorComponent } from "../src/modes/interactive/components/session-selector.js";
+import { initTheme } from "../src/modes/interactive/theme/theme.js";
+
+async function flushPromises(): Promise<void> {
+  await new Promise<void>((resolve) => {
+    setImmediate(resolve);
+  });
+}
+
+function makeSession(
+  overrides: Partial<SessionInfo> & { id: string },
+): SessionInfo {
+  return {
+    path: overrides.path ?? `/tmp/${overrides.id}.jsonl`,
+    id: overrides.id,
+    cwd: overrides.cwd ?? "",
+    name: overrides.name,
+    created: overrides.created ?? new Date(0),
+    modified: overrides.modified ?? new Date(0),
+    messageCount: overrides.messageCount ?? 1,
+    firstMessage: overrides.firstMessage ?? "hello",
+    allMessagesText: overrides.allMessagesText ?? "hello",
+  };
+}
+
+// Kitty keyboard protocol encoding for Ctrl+R
+const CTRL_R = "\x1b[114;5u";
+
+describe("session selector rename", () => {
+  beforeAll(() => {
+    initTheme("dark");
+  });
+
+  it("shows rename hint in interactive /resume picker configuration", async () => {
+    const sessions = [makeSession({ id: "a" })];
+    const selector = new SessionSelectorComponent(
+      async () => sessions,
+      async () => [],
+      () => {},
+      () => {},
+      () => {},
+      () => {},
+      { showRenameHint: true },
+    );
+    await flushPromises();
+
+    const output = selector.render(120).join("\n");
+    expect(output).toContain("ctrl+r");
+    expect(output).toContain("rename");
+  });
+
+  it("does not show rename hint in --resume picker configuration", async () => {
+    const sessions = [makeSession({ id: "a" })];
+    const selector = new SessionSelectorComponent(
+      async () => sessions,
+      async () => [],
+      () => {},
+      () => {},
+      () => {},
+      () => {},
+      { showRenameHint: false },
+    );
+    await flushPromises();
+
+    const output = selector.render(120).join("\n");
+    expect(output).not.toContain("ctrl+r");
+    expect(output).not.toContain("rename");
+  });
+
+  it("enters rename mode on Ctrl+R and submits with Enter", async () => {
+    const sessions = [makeSession({ id: "a", name: "Old" })];
+    const renameSession = vi.fn(async () => {});
+
+    const selector = new SessionSelectorComponent(
+      async () => sessions,
+      async () => [],
+      () => {},
+      () => {},
+      () => {},
+      () => {},
+      { renameSession, showRenameHint: true },
+    );
+    await flushPromises();
+
+    selector.getSessionList().handleInput(CTRL_R);
+    await flushPromises();
+
+    // Rename mode layout
+    const output = selector.render(120).join("\n");
+    expect(output).toContain("Rename Session");
+    expect(output).not.toContain("Resume Session");
+
+    // Type and submit
+    selector.handleInput("X");
+    selector.handleInput("\r");
+    await flushPromises();
+
+    expect(renameSession).toHaveBeenCalledTimes(1);
+    expect(renameSession).toHaveBeenCalledWith(sessions[0]!.path, "XOld");
+  });
+});
--- a/packages/coding-agent/test/session-selector-search.test.ts
+++ b/packages/coding-agent/test/session-selector-search.test.ts
@ -0,0 +1,214 @@
+import { describe, expect, it } from "vitest";
+import type { SessionInfo } from "../src/core/session-manager.js";
+import { filterAndSortSessions } from "../src/modes/interactive/components/session-selector-search.js";
+
+function makeSession(
+  overrides: Partial<SessionInfo> & {
+    id: string;
+    modified: Date;
+    allMessagesText: string;
+  },
+): SessionInfo {
+  return {
+    path: `/tmp/${overrides.id}.jsonl`,
+    id: overrides.id,
+    cwd: overrides.cwd ?? "",
+    name: overrides.name,
+    created: overrides.created ?? new Date(0),
+    modified: overrides.modified,
+    messageCount: overrides.messageCount ?? 1,
+    firstMessage: overrides.firstMessage ?? "(no messages)",
+    allMessagesText: overrides.allMessagesText,
+  };
+}
+
+describe("session selector search", () => {
+  it("filters by quoted phrase with whitespace normalization", () => {
+    const sessions: SessionInfo[] = [
+      makeSession({
+        id: "a",
+        modified: new Date("2026-01-01T00:00:00.000Z"),
+        allMessagesText: "node\n\n   cve was discussed",
+      }),
+      makeSession({
+        id: "b",
+        modified: new Date("2026-01-02T00:00:00.000Z"),
+        allMessagesText: "node something else",
+      }),
+    ];
+
+    const result = filterAndSortSessions(sessions, '"node cve"', "recent");
+    expect(result.map((s) => s.id)).toEqual(["a"]);
+  });
+
+  it("filters by regex (re:) and is case-insensitive", () => {
+    const sessions: SessionInfo[] = [
+      makeSession({
+        id: "a",
+        modified: new Date("2026-01-02T00:00:00.000Z"),
+        allMessagesText: "Brave is great",
+      }),
+      makeSession({
+        id: "b",
+        modified: new Date("2026-01-03T00:00:00.000Z"),
+        allMessagesText: "bravery is not the same",
+      }),
+    ];
+
+    const result = filterAndSortSessions(sessions, "re:\\bbrave\\b", "recent");
+    expect(result.map((s) => s.id)).toEqual(["a"]);
+  });
+
+  it("recent sort preserves input order", () => {
+    const sessions: SessionInfo[] = [
+      makeSession({
+        id: "newer",
+        modified: new Date("2026-01-03T00:00:00.000Z"),
+        allMessagesText: "brave",
+      }),
+      makeSession({
+        id: "older",
+        modified: new Date("2026-01-01T00:00:00.000Z"),
+        allMessagesText: "brave",
+      }),
+      makeSession({
+        id: "nomatch",
+        modified: new Date("2026-01-04T00:00:00.000Z"),
+        allMessagesText: "something else",
+      }),
+    ];
+
+    const result = filterAndSortSessions(sessions, '"brave"', "recent");
+    expect(result.map((s) => s.id)).toEqual(["newer", "older"]);
+  });
+
+  it("relevance sort orders by score and tie-breaks by modified desc", () => {
+    const sessions: SessionInfo[] = [
+      makeSession({
+        id: "late",
+        modified: new Date("2026-01-03T00:00:00.000Z"),
+        allMessagesText: "xxxx brave",
+      }),
+      makeSession({
+        id: "early",
+        modified: new Date("2026-01-01T00:00:00.000Z"),
+        allMessagesText: "brave xxxx",
+      }),
+    ];
+
+    const result1 = filterAndSortSessions(sessions, '"brave"', "relevance");
+    expect(result1.map((s) => s.id)).toEqual(["early", "late"]);
+
+    const tieSessions: SessionInfo[] = [
+      makeSession({
+        id: "newer",
+        modified: new Date("2026-01-03T00:00:00.000Z"),
+        allMessagesText: "brave",
+      }),
+      makeSession({
+        id: "older",
+        modified: new Date("2026-01-01T00:00:00.000Z"),
+        allMessagesText: "brave",
+      }),
+    ];
+
+    const result2 = filterAndSortSessions(tieSessions, '"brave"', "relevance");
+    expect(result2.map((s) => s.id)).toEqual(["newer", "older"]);
+  });
+
+  it("returns empty list for invalid regex", () => {
+    const sessions: SessionInfo[] = [
+      makeSession({
+        id: "a",
+        modified: new Date("2026-01-01T00:00:00.000Z"),
+        allMessagesText: "brave",
+      }),
+    ];
+
+    const result = filterAndSortSessions(sessions, "re:(", "recent");
+    expect(result).toEqual([]);
+  });
+
+  describe("name filter", () => {
+    const sessions: SessionInfo[] = [
+      makeSession({
+        id: "named1",
+        name: "My Project",
+        modified: new Date("2026-01-03T00:00:00.000Z"),
+        allMessagesText: "blueberry",
+      }),
+      makeSession({
+        id: "named2",
+        name: "Another Named",
+        modified: new Date("2026-01-02T00:00:00.000Z"),
+        allMessagesText: "blueberry",
+      }),
+      makeSession({
+        id: "other1",
+        modified: new Date("2026-01-04T00:00:00.000Z"),
+        allMessagesText: "blueberry",
+      }),
+      makeSession({
+        id: "other2",
+        modified: new Date("2026-01-01T00:00:00.000Z"),
+        allMessagesText: "blueberry",
+      }),
+    ];
+
+    it("returns all sessions when nameFilter is 'all'", () => {
+      const result = filterAndSortSessions(sessions, "", "recent", "all");
+      expect(result.map((session) => session.id)).toEqual([
+        "named1",
+        "named2",
+        "other1",
+        "other2",
+      ]);
+    });
+
+    it("returns only named sessions when nameFilter is 'named'", () => {
+      const result = filterAndSortSessions(sessions, "", "recent", "named");
+      expect(result.map((session) => session.id)).toEqual(["named1", "named2"]);
+    });
+
+    it("applies name filter before search query", () => {
+      const result = filterAndSortSessions(
+        sessions,
+        "blueberry",
+        "recent",
+        "named",
+      );
+      expect(result.map((session) => session.id)).toEqual(["named1", "named2"]);
+    });
+
+    it("excludes whitespace-only names from named filter", () => {
+      const sessionsWithWhitespace: SessionInfo[] = [
+        makeSession({
+          id: "whitespace",
+          name: "   ",
+          modified: new Date("2026-01-01T00:00:00.000Z"),
+          allMessagesText: "test",
+        }),
+        makeSession({
+          id: "empty",
+          name: "",
+          modified: new Date("2026-01-02T00:00:00.000Z"),
+          allMessagesText: "test",
+        }),
+        makeSession({
+          id: "named",
+          name: "Real Name",
+          modified: new Date("2026-01-03T00:00:00.000Z"),
+          allMessagesText: "test",
+        }),
+      ];
+
+      const result = filterAndSortSessions(
+        sessionsWithWhitespace,
+        "",
+        "recent",
+        "named",
+      );
+      expect(result.map((session) => session.id)).toEqual(["named"]);
+    });
+  });
+});
--- a/packages/coding-agent/test/settings-manager-bug.test.ts
+++ b/packages/coding-agent/test/settings-manager-bug.test.ts
@ -0,0 +1,165 @@
+import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "fs";
+import { join } from "path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { SettingsManager } from "../src/core/settings-manager.js";
+
+/**
+ * Tests for the fix to a bug where external file changes to arrays were overwritten.
+ *
+ * The bug scenario was:
+ * 1. Pi starts with settings.json containing packages: ["npm:some-pkg"]
+ * 2. User externally edits file to packages: []
+ * 3. User changes an unrelated setting (e.g., theme) via UI
+ * 4. save() would overwrite packages back to ["npm:some-pkg"] from stale in-memory state
+ *
+ * The fix tracks which fields were explicitly modified during the session, and only
+ * those fields override file values during save().
+ */
+describe("SettingsManager - External Edit Preservation", () => {
+  const testDir = join(process.cwd(), "test-settings-bug-tmp");
+  const agentDir = join(testDir, "agent");
+  const projectDir = join(testDir, "project");
+
+  beforeEach(() => {
+    if (existsSync(testDir)) {
+      rmSync(testDir, { recursive: true });
+    }
+    mkdirSync(agentDir, { recursive: true });
+    mkdirSync(join(projectDir, ".pi"), { recursive: true });
+  });
+
+  afterEach(() => {
+    if (existsSync(testDir)) {
+      rmSync(testDir, { recursive: true });
+    }
+  });
+
+  it("should preserve file changes to packages array when changing unrelated setting", async () => {
+    const settingsPath = join(agentDir, "settings.json");
+
+    // Initial state: packages has one item
+    writeFileSync(
+      settingsPath,
+      JSON.stringify({
+        theme: "dark",
+        packages: ["npm:pi-mcp-adapter"],
+      }),
+    );
+
+    // Pi starts up, loads settings into memory
+    const manager = SettingsManager.create(projectDir, agentDir);
+
+    // At this point, globalSettings.packages = ["npm:pi-mcp-adapter"]
+    expect(manager.getPackages()).toEqual(["npm:pi-mcp-adapter"]);
+
+    // User externally edits settings.json to remove the package
+    const currentSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+    currentSettings.packages = []; // User wants to remove this!
+    writeFileSync(settingsPath, JSON.stringify(currentSettings, null, 2));
+
+    // Verify file was changed
+    expect(JSON.parse(readFileSync(settingsPath, "utf-8")).packages).toEqual(
+      [],
+    );
+
+    // User changes an UNRELATED setting via UI (this triggers save)
+    manager.setTheme("light");
+    await manager.flush();
+
+    // With the fix, packages should be preserved as [] (not reverted to startup value)
+    const savedSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+
+    expect(savedSettings.packages).toEqual([]);
+    expect(savedSettings.theme).toBe("light");
+  });
+
+  it("should preserve file changes to extensions array when changing unrelated setting", async () => {
+    const settingsPath = join(agentDir, "settings.json");
+
+    writeFileSync(
+      settingsPath,
+      JSON.stringify({
+        theme: "dark",
+        extensions: ["/old/extension.ts"],
+      }),
+    );
+
+    const manager = SettingsManager.create(projectDir, agentDir);
+
+    // User externally updates extensions
+    const currentSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+    currentSettings.extensions = ["/new/extension.ts"];
+    writeFileSync(settingsPath, JSON.stringify(currentSettings, null, 2));
+
+    // Change unrelated setting
+    manager.setDefaultThinkingLevel("high");
+    await manager.flush();
+
+    const savedSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+
+    // With the fix, extensions should be preserved (not reverted to startup value)
+    expect(savedSettings.extensions).toEqual(["/new/extension.ts"]);
+  });
+
+  it("should preserve external project settings changes when updating unrelated project field", async () => {
+    const projectSettingsPath = join(projectDir, ".pi", "settings.json");
+    writeFileSync(
+      projectSettingsPath,
+      JSON.stringify({
+        extensions: ["./old-extension.ts"],
+        prompts: ["./old-prompt.md"],
+      }),
+    );
+
+    const manager = SettingsManager.create(projectDir, agentDir);
+
+    const currentProjectSettings = JSON.parse(
+      readFileSync(projectSettingsPath, "utf-8"),
+    );
+    currentProjectSettings.prompts = ["./new-prompt.md"];
+    writeFileSync(
+      projectSettingsPath,
+      JSON.stringify(currentProjectSettings, null, 2),
+    );
+
+    manager.setProjectExtensionPaths(["./updated-extension.ts"]);
+    await manager.flush();
+
+    const savedProjectSettings = JSON.parse(
+      readFileSync(projectSettingsPath, "utf-8"),
+    );
+    expect(savedProjectSettings.prompts).toEqual(["./new-prompt.md"]);
+    expect(savedProjectSettings.extensions).toEqual(["./updated-extension.ts"]);
+  });
+
+  it("should let in-memory project changes override external changes for the same project field", async () => {
+    const projectSettingsPath = join(projectDir, ".pi", "settings.json");
+    writeFileSync(
+      projectSettingsPath,
+      JSON.stringify({
+        extensions: ["./initial-extension.ts"],
+      }),
+    );
+
+    const manager = SettingsManager.create(projectDir, agentDir);
+
+    const currentProjectSettings = JSON.parse(
+      readFileSync(projectSettingsPath, "utf-8"),
+    );
+    currentProjectSettings.extensions = ["./external-extension.ts"];
+    writeFileSync(
+      projectSettingsPath,
+      JSON.stringify(currentProjectSettings, null, 2),
+    );
+
+    manager.setProjectExtensionPaths(["./in-memory-extension.ts"]);
+    await manager.flush();
+
+    const savedProjectSettings = JSON.parse(
+      readFileSync(projectSettingsPath, "utf-8"),
+    );
+    expect(savedProjectSettings.extensions).toEqual([
+      "./in-memory-extension.ts",
+    ]);
+  });
+});
--- a/packages/coding-agent/test/settings-manager.test.ts
+++ b/packages/coding-agent/test/settings-manager.test.ts
@ -0,0 +1,303 @@
+import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync } from "fs";
+import { join } from "path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { SettingsManager } from "../src/core/settings-manager.js";
+
+describe("SettingsManager", () => {
+  const testDir = join(process.cwd(), "test-settings-tmp");
+  const agentDir = join(testDir, "agent");
+  const projectDir = join(testDir, "project");
+
+  beforeEach(() => {
+    // Clean up and create fresh directories
+    if (existsSync(testDir)) {
+      rmSync(testDir, { recursive: true });
+    }
+    mkdirSync(agentDir, { recursive: true });
+    mkdirSync(join(projectDir, ".pi"), { recursive: true });
+  });
+
+  afterEach(() => {
+    if (existsSync(testDir)) {
+      rmSync(testDir, { recursive: true });
+    }
+  });
+
+  describe("preserves externally added settings", () => {
+    it("should preserve enabledModels when changing thinking level", async () => {
+      // Create initial settings file
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(
+        settingsPath,
+        JSON.stringify({
+          theme: "dark",
+          defaultModel: "claude-sonnet",
+        }),
+      );
+
+      // Create SettingsManager (simulates pi starting up)
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      // Simulate user editing settings.json externally to add enabledModels
+      const currentSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+      currentSettings.enabledModels = ["claude-opus-4-5", "gpt-5.2-codex"];
+      writeFileSync(settingsPath, JSON.stringify(currentSettings, null, 2));
+
+      // User changes thinking level via Shift+Tab
+      manager.setDefaultThinkingLevel("high");
+      await manager.flush();
+
+      // Verify enabledModels is preserved
+      const savedSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+      expect(savedSettings.enabledModels).toEqual([
+        "claude-opus-4-5",
+        "gpt-5.2-codex",
+      ]);
+      expect(savedSettings.defaultThinkingLevel).toBe("high");
+      expect(savedSettings.theme).toBe("dark");
+      expect(savedSettings.defaultModel).toBe("claude-sonnet");
+    });
+
+    it("should preserve custom settings when changing theme", async () => {
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(
+        settingsPath,
+        JSON.stringify({
+          defaultModel: "claude-sonnet",
+        }),
+      );
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      // User adds custom settings externally
+      const currentSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+      currentSettings.shellPath = "/bin/zsh";
+      currentSettings.extensions = ["/path/to/extension.ts"];
+      writeFileSync(settingsPath, JSON.stringify(currentSettings, null, 2));
+
+      // User changes theme
+      manager.setTheme("light");
+      await manager.flush();
+
+      // Verify all settings preserved
+      const savedSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+      expect(savedSettings.shellPath).toBe("/bin/zsh");
+      expect(savedSettings.extensions).toEqual(["/path/to/extension.ts"]);
+      expect(savedSettings.theme).toBe("light");
+    });
+
+    it("should let in-memory changes override file changes for same key", async () => {
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(
+        settingsPath,
+        JSON.stringify({
+          theme: "dark",
+        }),
+      );
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      // User externally sets thinking level to "low"
+      const currentSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+      currentSettings.defaultThinkingLevel = "low";
+      writeFileSync(settingsPath, JSON.stringify(currentSettings, null, 2));
+
+      // But then changes it via UI to "high"
+      manager.setDefaultThinkingLevel("high");
+      await manager.flush();
+
+      // In-memory change should win
+      const savedSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+      expect(savedSettings.defaultThinkingLevel).toBe("high");
+    });
+  });
+
+  describe("packages migration", () => {
+    it("should keep local-only extensions in extensions array", () => {
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(
+        settingsPath,
+        JSON.stringify({
+          extensions: ["/local/ext.ts", "./relative/ext.ts"],
+        }),
+      );
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      expect(manager.getPackages()).toEqual([]);
+      expect(manager.getExtensionPaths()).toEqual([
+        "/local/ext.ts",
+        "./relative/ext.ts",
+      ]);
+    });
+
+    it("should handle packages with filtering objects", () => {
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(
+        settingsPath,
+        JSON.stringify({
+          packages: [
+            "npm:simple-pkg",
+            {
+              source: "npm:shitty-extensions",
+              extensions: ["extensions/oracle.ts"],
+              skills: [],
+            },
+          ],
+        }),
+      );
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      const packages = manager.getPackages();
+      expect(packages).toHaveLength(2);
+      expect(packages[0]).toBe("npm:simple-pkg");
+      expect(packages[1]).toEqual({
+        source: "npm:shitty-extensions",
+        extensions: ["extensions/oracle.ts"],
+        skills: [],
+      });
+    });
+  });
+
+  describe("reload", () => {
+    it("should reload global settings from disk", () => {
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(
+        settingsPath,
+        JSON.stringify({
+          theme: "dark",
+          extensions: ["/before.ts"],
+        }),
+      );
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      writeFileSync(
+        settingsPath,
+        JSON.stringify({
+          theme: "light",
+          extensions: ["/after.ts"],
+          defaultModel: "claude-sonnet",
+        }),
+      );
+
+      manager.reload();
+
+      expect(manager.getTheme()).toBe("light");
+      expect(manager.getExtensionPaths()).toEqual(["/after.ts"]);
+      expect(manager.getDefaultModel()).toBe("claude-sonnet");
+    });
+
+    it("should keep previous settings when file is invalid", () => {
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(settingsPath, JSON.stringify({ theme: "dark" }));
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      writeFileSync(settingsPath, "{ invalid json");
+      manager.reload();
+
+      expect(manager.getTheme()).toBe("dark");
+    });
+  });
+
+  describe("error tracking", () => {
+    it("should collect and clear load errors via drainErrors", () => {
+      const globalSettingsPath = join(agentDir, "settings.json");
+      const projectSettingsPath = join(projectDir, ".pi", "settings.json");
+      writeFileSync(globalSettingsPath, "{ invalid global json");
+      writeFileSync(projectSettingsPath, "{ invalid project json");
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+      const errors = manager.drainErrors();
+
+      expect(errors).toHaveLength(2);
+      expect(errors.map((e) => e.scope).sort()).toEqual(["global", "project"]);
+      expect(manager.drainErrors()).toEqual([]);
+    });
+  });
+
+  describe("project settings directory creation", () => {
+    it("should not create .pi folder when only reading project settings", () => {
+      // Create agent dir with global settings, but NO .pi folder in project
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(settingsPath, JSON.stringify({ theme: "dark" }));
+
+      // Delete the .pi folder that beforeEach created
+      rmSync(join(projectDir, ".pi"), { recursive: true });
+
+      // Create SettingsManager (reads both global and project settings)
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      // .pi folder should NOT have been created just from reading
+      expect(existsSync(join(projectDir, ".pi"))).toBe(false);
+
+      // Settings should still be loaded from global
+      expect(manager.getTheme()).toBe("dark");
+    });
+
+    it("should create .pi folder when writing project settings", async () => {
+      // Create agent dir with global settings, but NO .pi folder in project
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(settingsPath, JSON.stringify({ theme: "dark" }));
+
+      // Delete the .pi folder that beforeEach created
+      rmSync(join(projectDir, ".pi"), { recursive: true });
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      // .pi folder should NOT exist yet
+      expect(existsSync(join(projectDir, ".pi"))).toBe(false);
+
+      // Write a project-specific setting
+      manager.setProjectPackages([{ source: "npm:test-pkg" }]);
+      await manager.flush();
+
+      // Now .pi folder should exist
+      expect(existsSync(join(projectDir, ".pi"))).toBe(true);
+
+      // And settings file should be created
+      expect(existsSync(join(projectDir, ".pi", "settings.json"))).toBe(true);
+    });
+  });
+
+  describe("shellCommandPrefix", () => {
+    it("should load shellCommandPrefix from settings", () => {
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(
+        settingsPath,
+        JSON.stringify({ shellCommandPrefix: "shopt -s expand_aliases" }),
+      );
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      expect(manager.getShellCommandPrefix()).toBe("shopt -s expand_aliases");
+    });
+
+    it("should return undefined when shellCommandPrefix is not set", () => {
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(settingsPath, JSON.stringify({ theme: "dark" }));
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+
+      expect(manager.getShellCommandPrefix()).toBeUndefined();
+    });
+
+    it("should preserve shellCommandPrefix when saving unrelated settings", async () => {
+      const settingsPath = join(agentDir, "settings.json");
+      writeFileSync(
+        settingsPath,
+        JSON.stringify({ shellCommandPrefix: "shopt -s expand_aliases" }),
+      );
+
+      const manager = SettingsManager.create(projectDir, agentDir);
+      manager.setTheme("light");
+      await manager.flush();
+
+      const savedSettings = JSON.parse(readFileSync(settingsPath, "utf-8"));
+      expect(savedSettings.shellCommandPrefix).toBe("shopt -s expand_aliases");
+      expect(savedSettings.theme).toBe("light");
+    });
+  });
+});
--- a/packages/coding-agent/test/skills.test.ts
+++ b/packages/coding-agent/test/skills.test.ts
@ -0,0 +1,453 @@
+import { homedir } from "os";
+import { join, resolve } from "path";
+import { describe, expect, it } from "vitest";
+import type { ResourceDiagnostic } from "../src/core/diagnostics.js";
+import {
+  formatSkillsForPrompt,
+  loadSkills,
+  loadSkillsFromDir,
+  type Skill,
+} from "../src/core/skills.js";
+
+const fixturesDir = resolve(__dirname, "fixtures/skills");
+const collisionFixturesDir = resolve(__dirname, "fixtures/skills-collision");
+
+describe("skills", () => {
+  describe("loadSkillsFromDir", () => {
+    it("should load a valid skill", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "valid-skill"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(skills[0].name).toBe("valid-skill");
+      expect(skills[0].description).toBe("A valid skill for testing purposes.");
+      expect(skills[0].source).toBe("test");
+      expect(diagnostics).toHaveLength(0);
+    });
+
+    it("should warn when name doesn't match parent directory", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "name-mismatch"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(skills[0].name).toBe("different-name");
+      expect(
+        diagnostics.some((d: ResourceDiagnostic) =>
+          d.message.includes("does not match parent directory"),
+        ),
+      ).toBe(true);
+    });
+
+    it("should warn when name contains invalid characters", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "invalid-name-chars"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(
+        diagnostics.some((d: ResourceDiagnostic) =>
+          d.message.includes("invalid characters"),
+        ),
+      ).toBe(true);
+    });
+
+    it("should warn when name exceeds 64 characters", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "long-name"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(
+        diagnostics.some((d: ResourceDiagnostic) =>
+          d.message.includes("exceeds 64 characters"),
+        ),
+      ).toBe(true);
+    });
+
+    it("should warn and skip skill when description is missing", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "missing-description"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(0);
+      expect(
+        diagnostics.some((d: ResourceDiagnostic) =>
+          d.message.includes("description is required"),
+        ),
+      ).toBe(true);
+    });
+
+    it("should ignore unknown frontmatter fields", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "unknown-field"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(diagnostics).toHaveLength(0);
+    });
+
+    it("should load nested skills recursively", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "nested"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(skills[0].name).toBe("child-skill");
+      expect(diagnostics).toHaveLength(0);
+    });
+
+    it("should skip files without frontmatter", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "no-frontmatter"),
+        source: "test",
+      });
+
+      // no-frontmatter has no description, so it should be skipped
+      expect(skills).toHaveLength(0);
+      expect(
+        diagnostics.some((d: ResourceDiagnostic) =>
+          d.message.includes("description is required"),
+        ),
+      ).toBe(true);
+    });
+
+    it("should warn and skip skill when YAML frontmatter is invalid", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "invalid-yaml"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(0);
+      expect(
+        diagnostics.some((d: ResourceDiagnostic) =>
+          d.message.includes("at line"),
+        ),
+      ).toBe(true);
+    });
+
+    it("should preserve multiline descriptions from YAML", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "multiline-description"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(skills[0].description).toContain("\n");
+      expect(skills[0].description).toContain(
+        "This is a multiline description.",
+      );
+      expect(diagnostics).toHaveLength(0);
+    });
+
+    it("should warn when name contains consecutive hyphens", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "consecutive-hyphens"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(
+        diagnostics.some((d: ResourceDiagnostic) =>
+          d.message.includes("consecutive hyphens"),
+        ),
+      ).toBe(true);
+    });
+
+    it("should load all skills from fixture directory", () => {
+      const { skills } = loadSkillsFromDir({
+        dir: fixturesDir,
+        source: "test",
+      });
+
+      // Should load all skills that have descriptions (even with warnings)
+      // valid-skill, name-mismatch, invalid-name-chars, long-name, unknown-field, nested/child-skill, consecutive-hyphens
+      // NOT: missing-description, no-frontmatter (both missing descriptions)
+      expect(skills.length).toBeGreaterThanOrEqual(6);
+    });
+
+    it("should return empty for non-existent directory", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: "/non/existent/path",
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(0);
+      expect(diagnostics).toHaveLength(0);
+    });
+
+    it("should use parent directory name when name not in frontmatter", () => {
+      // The no-frontmatter fixture has no name in frontmatter, so it should use "no-frontmatter"
+      // But it also has no description, so it won't load
+      // Let's test with a valid skill that relies on directory name
+      const { skills } = loadSkillsFromDir({
+        dir: join(fixturesDir, "valid-skill"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(skills[0].name).toBe("valid-skill");
+    });
+
+    it("should parse disable-model-invocation frontmatter field", () => {
+      const { skills, diagnostics } = loadSkillsFromDir({
+        dir: join(fixturesDir, "disable-model-invocation"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(skills[0].name).toBe("disable-model-invocation");
+      expect(skills[0].disableModelInvocation).toBe(true);
+      // Should not warn about unknown field
+      expect(
+        diagnostics.some((d: ResourceDiagnostic) =>
+          d.message.includes("unknown frontmatter field"),
+        ),
+      ).toBe(false);
+    });
+
+    it("should default disableModelInvocation to false when not specified", () => {
+      const { skills } = loadSkillsFromDir({
+        dir: join(fixturesDir, "valid-skill"),
+        source: "test",
+      });
+
+      expect(skills).toHaveLength(1);
+      expect(skills[0].disableModelInvocation).toBe(false);
+    });
+  });
+
+  describe("formatSkillsForPrompt", () => {
+    it("should return empty string for no skills", () => {
+      const result = formatSkillsForPrompt([]);
+      expect(result).toBe("");
+    });
+
+    it("should format skills as XML", () => {
+      const skills: Skill[] = [
+        {
+          name: "test-skill",
+          description: "A test skill.",
+          filePath: "/path/to/skill/SKILL.md",
+          baseDir: "/path/to/skill",
+          source: "test",
+          disableModelInvocation: false,
+        },
+      ];
+
+      const result = formatSkillsForPrompt(skills);
+
+      expect(result).toContain("<available_skills>");
+      expect(result).toContain("</available_skills>");
+      expect(result).toContain("<skill>");
+      expect(result).toContain("<name>test-skill</name>");
+      expect(result).toContain("<description>A test skill.</description>");
+      expect(result).toContain("<location>/path/to/skill/SKILL.md</location>");
+    });
+
+    it("should include intro text before XML", () => {
+      const skills: Skill[] = [
+        {
+          name: "test-skill",
+          description: "A test skill.",
+          filePath: "/path/to/skill/SKILL.md",
+          baseDir: "/path/to/skill",
+          source: "test",
+          disableModelInvocation: false,
+        },
+      ];
+
+      const result = formatSkillsForPrompt(skills);
+      const xmlStart = result.indexOf("<available_skills>");
+      const introText = result.substring(0, xmlStart);
+
+      expect(introText).toContain(
+        "The following skills provide specialized instructions",
+      );
+      expect(introText).toContain("Use the read tool to load a skill's file");
+    });
+
+    it("should escape XML special characters", () => {
+      const skills: Skill[] = [
+        {
+          name: "test-skill",
+          description: 'A skill with <special> & "characters".',
+          filePath: "/path/to/skill/SKILL.md",
+          baseDir: "/path/to/skill",
+          source: "test",
+          disableModelInvocation: false,
+        },
+      ];
+
+      const result = formatSkillsForPrompt(skills);
+
+      expect(result).toContain("&lt;special&gt;");
+      expect(result).toContain("&amp;");
+      expect(result).toContain("&quot;characters&quot;");
+    });
+
+    it("should format multiple skills", () => {
+      const skills: Skill[] = [
+        {
+          name: "skill-one",
+          description: "First skill.",
+          filePath: "/path/one/SKILL.md",
+          baseDir: "/path/one",
+          source: "test",
+          disableModelInvocation: false,
+        },
+        {
+          name: "skill-two",
+          description: "Second skill.",
+          filePath: "/path/two/SKILL.md",
+          baseDir: "/path/two",
+          source: "test",
+          disableModelInvocation: false,
+        },
+      ];
+
+      const result = formatSkillsForPrompt(skills);
+
+      expect(result).toContain("<name>skill-one</name>");
+      expect(result).toContain("<name>skill-two</name>");
+      expect((result.match(/<skill>/g) || []).length).toBe(2);
+    });
+
+    it("should exclude skills with disableModelInvocation from prompt", () => {
+      const skills: Skill[] = [
+        {
+          name: "visible-skill",
+          description: "A visible skill.",
+          filePath: "/path/visible/SKILL.md",
+          baseDir: "/path/visible",
+          source: "test",
+          disableModelInvocation: false,
+        },
+        {
+          name: "hidden-skill",
+          description: "A hidden skill.",
+          filePath: "/path/hidden/SKILL.md",
+          baseDir: "/path/hidden",
+          source: "test",
+          disableModelInvocation: true,
+        },
+      ];
+
+      const result = formatSkillsForPrompt(skills);
+
+      expect(result).toContain("<name>visible-skill</name>");
+      expect(result).not.toContain("<name>hidden-skill</name>");
+      expect((result.match(/<skill>/g) || []).length).toBe(1);
+    });
+
+    it("should return empty string when all skills have disableModelInvocation", () => {
+      const skills: Skill[] = [
+        {
+          name: "hidden-skill",
+          description: "A hidden skill.",
+          filePath: "/path/hidden/SKILL.md",
+          baseDir: "/path/hidden",
+          source: "test",
+          disableModelInvocation: true,
+        },
+      ];
+
+      const result = formatSkillsForPrompt(skills);
+      expect(result).toBe("");
+    });
+  });
+
+  describe("loadSkills with options", () => {
+    const emptyAgentDir = resolve(__dirname, "fixtures/empty-agent");
+    const emptyCwd = resolve(__dirname, "fixtures/empty-cwd");
+
+    it("should load from explicit skillPaths", () => {
+      const { skills, diagnostics } = loadSkills({
+        agentDir: emptyAgentDir,
+        cwd: emptyCwd,
+        skillPaths: [join(fixturesDir, "valid-skill")],
+      });
+      expect(skills).toHaveLength(1);
+      expect(skills[0].source).toBe("path");
+      expect(diagnostics).toHaveLength(0);
+    });
+
+    it("should warn when skill path does not exist", () => {
+      const { skills, diagnostics } = loadSkills({
+        agentDir: emptyAgentDir,
+        cwd: emptyCwd,
+        skillPaths: ["/non/existent/path"],
+      });
+      expect(skills).toHaveLength(0);
+      expect(
+        diagnostics.some((d: ResourceDiagnostic) =>
+          d.message.includes("does not exist"),
+        ),
+      ).toBe(true);
+    });
+
+    it("should expand ~ in skillPaths", () => {
+      const homeSkillsDir = join(homedir(), ".pi/agent/skills");
+      const { skills: withTilde } = loadSkills({
+        agentDir: emptyAgentDir,
+        cwd: emptyCwd,
+        skillPaths: ["~/.pi/agent/skills"],
+      });
+      const { skills: withoutTilde } = loadSkills({
+        agentDir: emptyAgentDir,
+        cwd: emptyCwd,
+        skillPaths: [homeSkillsDir],
+      });
+      expect(withTilde.length).toBe(withoutTilde.length);
+    });
+  });
+
+  describe("collision handling", () => {
+    it("should detect name collisions and keep first skill", () => {
+      // Load from first directory
+      const first = loadSkillsFromDir({
+        dir: join(collisionFixturesDir, "first"),
+        source: "first",
+      });
+
+      const second = loadSkillsFromDir({
+        dir: join(collisionFixturesDir, "second"),
+        source: "second",
+      });
+
+      // Simulate the collision behavior from loadSkills()
+      const skillMap = new Map<string, Skill>();
+      const collisionWarnings: Array<{ skillPath: string; message: string }> =
+        [];
+
+      for (const skill of first.skills) {
+        skillMap.set(skill.name, skill);
+      }
+
+      for (const skill of second.skills) {
+        const existing = skillMap.get(skill.name);
+        if (existing) {
+          collisionWarnings.push({
+            skillPath: skill.filePath,
+            message: `name collision: "${skill.name}" already loaded from ${existing.filePath}`,
+          });
+        } else {
+          skillMap.set(skill.name, skill);
+        }
+      }
+
+      expect(skillMap.size).toBe(1);
+      expect(skillMap.get("calendar")?.source).toBe("first");
+      expect(collisionWarnings).toHaveLength(1);
+      expect(collisionWarnings[0].message).toContain("name collision");
+    });
+  });
+});
--- a/packages/coding-agent/test/streaming-render-debug.ts
+++ b/packages/coding-agent/test/streaming-render-debug.ts
@ -0,0 +1,103 @@
+/**
+ * Debug script to reproduce streaming rendering issues.
+ * Uses real fixture data that caused the bug.
+ * Run with: npx tsx test/streaming-render-debug.ts
+ */
+
+import type { AssistantMessage } from "@mariozechner/pi-ai";
+import { ProcessTerminal, TUI } from "@mariozechner/pi-tui";
+import { readFileSync } from "fs";
+import { dirname, join } from "path";
+import { fileURLToPath } from "url";
+import { AssistantMessageComponent } from "../src/modes/interactive/components/assistant-message.js";
+import { initTheme } from "../src/modes/interactive/theme/theme.js";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+// Initialize dark theme with full color support
+process.env.COLORTERM = "truecolor";
+initTheme("dark");
+
+// Load the real fixture that caused the bug
+const fixtureMessage: AssistantMessage = JSON.parse(
+  readFileSync(
+    join(__dirname, "fixtures/assistant-message-with-thinking-code.json"),
+    "utf-8",
+  ),
+);
+
+// Extract thinking and text content
+const thinkingContent = fixtureMessage.content.find(
+  (c) => c.type === "thinking",
+);
+const textContent = fixtureMessage.content.find((c) => c.type === "text");
+
+if (!thinkingContent || thinkingContent.type !== "thinking") {
+  console.error("No thinking content in fixture");
+  process.exit(1);
+}
+
+const fullThinkingText = thinkingContent.thinking;
+const fullTextContent =
+  textContent && textContent.type === "text" ? textContent.text : "";
+
+async function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
+async function main() {
+  const terminal = new ProcessTerminal();
+  const tui = new TUI(terminal);
+
+  // Start with empty message
+  const message = {
+    role: "assistant",
+    content: [{ type: "thinking", thinking: "" }],
+  } as AssistantMessage;
+
+  const component = new AssistantMessageComponent(message, false);
+  tui.addChild(component);
+  tui.start();
+
+  // Simulate streaming thinking content
+  let thinkingBuffer = "";
+  const chunkSize = 10; // characters per "token"
+
+  for (let i = 0; i < fullThinkingText.length; i += chunkSize) {
+    thinkingBuffer += fullThinkingText.slice(i, i + chunkSize);
+
+    // Update message content
+    const updatedMessage = {
+      role: "assistant",
+      content: [{ type: "thinking", thinking: thinkingBuffer }],
+    } as AssistantMessage;
+
+    component.updateContent(updatedMessage);
+    tui.requestRender();
+
+    await sleep(15); // Simulate token delay
+  }
+
+  // Now add the text content
+  await sleep(500);
+
+  const finalMessage = {
+    role: "assistant",
+    content: [
+      { type: "thinking", thinking: fullThinkingText },
+      { type: "text", text: fullTextContent },
+    ],
+  } as AssistantMessage;
+
+  component.updateContent(finalMessage);
+  tui.requestRender();
+
+  // Keep alive for a moment to see the result
+  await sleep(3000);
+
+  tui.stop();
+  process.exit(0);
+}
+
+main().catch(console.error);
--- a/packages/coding-agent/test/system-prompt.test.ts
+++ b/packages/coding-agent/test/system-prompt.test.ts
@ -0,0 +1,104 @@
+import { describe, expect, test } from "vitest";
+import { buildSystemPrompt } from "../src/core/system-prompt.js";
+
+describe("buildSystemPrompt", () => {
+  describe("empty tools", () => {
+    test("shows (none) for empty tools list", () => {
+      const prompt = buildSystemPrompt({
+        selectedTools: [],
+        contextFiles: [],
+        skills: [],
+      });
+
+      expect(prompt).toContain("Available tools:\n(none)");
+    });
+
+    test("shows file paths guideline even with no tools", () => {
+      const prompt = buildSystemPrompt({
+        selectedTools: [],
+        contextFiles: [],
+        skills: [],
+      });
+
+      expect(prompt).toContain("Show file paths clearly");
+    });
+  });
+
+  describe("default tools", () => {
+    test("includes all default tools", () => {
+      const prompt = buildSystemPrompt({
+        contextFiles: [],
+        skills: [],
+      });
+
+      expect(prompt).toContain("- read:");
+      expect(prompt).toContain("- bash:");
+      expect(prompt).toContain("- edit:");
+      expect(prompt).toContain("- write:");
+    });
+  });
+
+  describe("custom tool snippets", () => {
+    test("includes custom tools in available tools section", () => {
+      const prompt = buildSystemPrompt({
+        selectedTools: ["read", "dynamic_tool"],
+        toolSnippets: {
+          dynamic_tool: "Run dynamic test behavior",
+        },
+        contextFiles: [],
+        skills: [],
+      });
+
+      expect(prompt).toContain("- dynamic_tool: Run dynamic test behavior");
+    });
+  });
+
+  describe("prompt guidelines", () => {
+    test("appends promptGuidelines to default guidelines", () => {
+      const prompt = buildSystemPrompt({
+        selectedTools: ["read", "dynamic_tool"],
+        promptGuidelines: ["Use dynamic_tool for project summaries."],
+        contextFiles: [],
+        skills: [],
+      });
+
+      expect(prompt).toContain("- Use dynamic_tool for project summaries.");
+    });
+
+    test("deduplicates and trims promptGuidelines", () => {
+      const prompt = buildSystemPrompt({
+        selectedTools: ["read", "dynamic_tool"],
+        promptGuidelines: [
+          "Use dynamic_tool for summaries.",
+          "  Use dynamic_tool for summaries.  ",
+          "   ",
+        ],
+        contextFiles: [],
+        skills: [],
+      });
+
+      expect(prompt.match(/- Use dynamic_tool for summaries\./g)).toHaveLength(
+        1,
+      );
+    });
+  });
+
+  describe("SOUL.md context", () => {
+    test("adds persona guidance when SOUL.md is present", () => {
+      const prompt = buildSystemPrompt({
+        contextFiles: [
+          {
+            path: "/tmp/project/SOUL.md",
+            content: "# Soul\n\nBe sharp.",
+          },
+        ],
+        skills: [],
+      });
+
+      expect(prompt).toContain(
+        "If SOUL.md is present, embody its persona and tone.",
+      );
+      expect(prompt).toContain("## /tmp/project/SOUL.md");
+    });
+  });
+});
--- a/packages/coding-agent/test/test-theme-colors.ts
+++ b/packages/coding-agent/test/test-theme-colors.ts
@ -0,0 +1,301 @@
+import fs from "fs";
+import { initTheme, theme } from "../src/modes/interactive/theme/theme.js";
+
+// --- Color utilities ---
+
+function hexToRgb(hex: string): [number, number, number] {
+  const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
+  return result
+    ? [
+        parseInt(result[1], 16),
+        parseInt(result[2], 16),
+        parseInt(result[3], 16),
+      ]
+    : [0, 0, 0];
+}
+
+function rgbToHex(r: number, g: number, b: number): string {
+  return (
+    "#" +
+    [r, g, b]
+      .map((x) =>
+        Math.round(Math.max(0, Math.min(255, x)))
+          .toString(16)
+          .padStart(2, "0"),
+      )
+      .join("")
+  );
+}
+
+function rgbToHsl(r: number, g: number, b: number): [number, number, number] {
+  r /= 255;
+  g /= 255;
+  b /= 255;
+  const max = Math.max(r, g, b),
+    min = Math.min(r, g, b);
+  let h = 0,
+    s = 0;
+  const l = (max + min) / 2;
+  if (max !== min) {
+    const d = max - min;
+    s = l > 0.5 ? d / (2 - max - min) : d / (max + min);
+    switch (max) {
+      case r:
+        h = ((g - b) / d + (g < b ? 6 : 0)) / 6;
+        break;
+      case g:
+        h = ((b - r) / d + 2) / 6;
+        break;
+      case b:
+        h = ((r - g) / d + 4) / 6;
+        break;
+    }
+  }
+  return [h, s, l];
+}
+
+function hslToRgb(h: number, s: number, l: number): [number, number, number] {
+  let r: number, g: number, b: number;
+  if (s === 0) {
+    r = g = b = l;
+  } else {
+    const hue2rgb = (p: number, q: number, t: number) => {
+      if (t < 0) t += 1;
+      if (t > 1) t -= 1;
+      if (t < 1 / 6) return p + (q - p) * 6 * t;
+      if (t < 1 / 2) return q;
+      if (t < 2 / 3) return p + (q - p) * (2 / 3 - t) * 6;
+      return p;
+    };
+    const q = l < 0.5 ? l * (1 + s) : l + s - l * s;
+    const p = 2 * l - q;
+    r = hue2rgb(p, q, h + 1 / 3);
+    g = hue2rgb(p, q, h);
+    b = hue2rgb(p, q, h - 1 / 3);
+  }
+  return [Math.round(r * 255), Math.round(g * 255), Math.round(b * 255)];
+}
+
+function getLuminance(r: number, g: number, b: number): number {
+  const lin = (c: number) => {
+    c = c / 255;
+    return c <= 0.03928 ? c / 12.92 : ((c + 0.055) / 1.055) ** 2.4;
+  };
+  return 0.2126 * lin(r) + 0.7152 * lin(g) + 0.0722 * lin(b);
+}
+
+function getContrast(rgb: [number, number, number], bgLum: number): number {
+  const fgLum = getLuminance(...rgb);
+  const lighter = Math.max(fgLum, bgLum);
+  const darker = Math.min(fgLum, bgLum);
+  return (lighter + 0.05) / (darker + 0.05);
+}
+
+function adjustColorToContrast(
+  hex: string,
+  targetContrast: number,
+  againstWhite: boolean,
+): string {
+  const rgb = hexToRgb(hex);
+  const [h, s] = rgbToHsl(...rgb);
+  const bgLum = againstWhite ? 1.0 : 0.0;
+
+  let lo = againstWhite ? 0 : 0.5;
+  let hi = againstWhite ? 0.5 : 1.0;
+
+  for (let i = 0; i < 50; i++) {
+    const mid = (lo + hi) / 2;
+    const testRgb = hslToRgb(h, s, mid);
+    const contrast = getContrast(testRgb, bgLum);
+
+    if (againstWhite) {
+      if (contrast < targetContrast) hi = mid;
+      else lo = mid;
+    } else {
+      if (contrast < targetContrast) lo = mid;
+      else hi = mid;
+    }
+  }
+
+  const finalL = againstWhite ? lo : hi;
+  return rgbToHex(...hslToRgb(h, s, finalL));
+}
+
+function fgAnsi(hex: string): string {
+  const rgb = hexToRgb(hex);
+  return `\x1b[38;2;${rgb[0]};${rgb[1]};${rgb[2]}m`;
+}
+
+const reset = "\x1b[0m";
+
+// --- Commands ---
+
+function cmdContrast(targetContrast: number): void {
+  const baseColors = {
+    teal: "#5f8787",
+    blue: "#5f87af",
+    green: "#87af87",
+    yellow: "#d7af5f",
+    red: "#af5f5f",
+  };
+
+  console.log(`\n=== Colors adjusted to ${targetContrast}:1 contrast ===\n`);
+
+  console.log("For LIGHT theme (vs white):");
+  for (const [name, hex] of Object.entries(baseColors)) {
+    const adjusted = adjustColorToContrast(hex, targetContrast, true);
+    const rgb = hexToRgb(adjusted);
+    const contrast = getContrast(rgb, 1.0);
+    console.log(
+      `  ${name.padEnd(8)} ${fgAnsi(adjusted)}Sample${reset}  ${adjusted}  (${contrast.toFixed(2)}:1)`,
+    );
+  }
+
+  console.log("\nFor DARK theme (vs black):");
+  for (const [name, hex] of Object.entries(baseColors)) {
+    const adjusted = adjustColorToContrast(hex, targetContrast, false);
+    const rgb = hexToRgb(adjusted);
+    const contrast = getContrast(rgb, 0.0);
+    console.log(
+      `  ${name.padEnd(8)} ${fgAnsi(adjusted)}Sample${reset}  ${adjusted}  (${contrast.toFixed(2)}:1)`,
+    );
+  }
+}
+
+function cmdTest(filePath: string): void {
+  if (!fs.existsSync(filePath)) {
+    console.error(`File not found: ${filePath}`);
+    process.exit(1);
+  }
+
+  const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
+  const vars = data.vars || data;
+
+  console.log(`\n=== Testing ${filePath} ===\n`);
+
+  for (const [name, hex] of Object.entries(vars as Record<string, string>)) {
+    if (!hex.startsWith("#")) continue;
+    const rgb = hexToRgb(hex);
+    const vsWhite = getContrast(rgb, 1.0);
+    const vsBlack = getContrast(rgb, 0.0);
+    const passW = vsWhite >= 4.5 ? "AA" : vsWhite >= 3.0 ? "AA-lg" : "FAIL";
+    const passB = vsBlack >= 4.5 ? "AA" : vsBlack >= 3.0 ? "AA-lg" : "FAIL";
+    console.log(
+      `${name.padEnd(14)} ${fgAnsi(hex)}Sample text${reset}  ${hex}  white: ${vsWhite.toFixed(2)}:1 ${passW.padEnd(5)}  black: ${vsBlack.toFixed(2)}:1 ${passB}`,
+    );
+  }
+}
+
+function cmdTheme(themeName: string): void {
+  process.env.COLORTERM = "truecolor";
+  initTheme(themeName);
+
+  const parseAnsiRgb = (ansi: string): [number, number, number] | null => {
+    const match = ansi.match(/38;2;(\d+);(\d+);(\d+)/);
+    return match
+      ? [parseInt(match[1], 10), parseInt(match[2], 10), parseInt(match[3], 10)]
+      : null;
+  };
+
+  const getContrastVsWhite = (colorName: string): string => {
+    const ansi = theme.getFgAnsi(
+      colorName as Parameters<typeof theme.getFgAnsi>[0],
+    );
+    const rgb = parseAnsiRgb(ansi);
+    if (!rgb) return "(default)";
+    const ratio = getContrast(rgb, 1.0);
+    const pass = ratio >= 4.5 ? "AA" : ratio >= 3.0 ? "AA-lg" : "FAIL";
+    return `${ratio.toFixed(2)}:1 ${pass}`;
+  };
+
+  const getContrastVsBlack = (colorName: string): string => {
+    const ansi = theme.getFgAnsi(
+      colorName as Parameters<typeof theme.getFgAnsi>[0],
+    );
+    const rgb = parseAnsiRgb(ansi);
+    if (!rgb) return "(default)";
+    const ratio = getContrast(rgb, 0.0);
+    const pass = ratio >= 4.5 ? "AA" : ratio >= 3.0 ? "AA-lg" : "FAIL";
+    return `${ratio.toFixed(2)}:1 ${pass}`;
+  };
+
+  const logColor = (name: string): void => {
+    const sample = theme.fg(
+      name as Parameters<typeof theme.fg>[0],
+      "Sample text",
+    );
+    const cw = getContrastVsWhite(name);
+    const cb = getContrastVsBlack(name);
+    console.log(
+      `${name.padEnd(20)} ${sample}  white: ${cw.padEnd(12)} black: ${cb}`,
+    );
+  };
+
+  console.log(`\n=== ${themeName} theme (WCAG AA = 4.5:1) ===`);
+
+  console.log("\n--- Core UI ---");
+  [
+    "accent",
+    "border",
+    "borderAccent",
+    "borderMuted",
+    "success",
+    "error",
+    "warning",
+    "muted",
+    "dim",
+  ].forEach(logColor);
+
+  console.log("\n--- Markdown ---");
+  [
+    "mdHeading",
+    "mdLink",
+    "mdCode",
+    "mdCodeBlock",
+    "mdCodeBlockBorder",
+    "mdQuote",
+    "mdListBullet",
+  ].forEach(logColor);
+
+  console.log("\n--- Diff ---");
+  ["toolDiffAdded", "toolDiffRemoved", "toolDiffContext"].forEach(logColor);
+
+  console.log("\n--- Thinking ---");
+  [
+    "thinkingOff",
+    "thinkingMinimal",
+    "thinkingLow",
+    "thinkingMedium",
+    "thinkingHigh",
+  ].forEach(logColor);
+
+  console.log("\n--- Backgrounds ---");
+  console.log("userMessageBg:", theme.bg("userMessageBg", " Sample "));
+  console.log("toolPendingBg:", theme.bg("toolPendingBg", " Sample "));
+  console.log("toolSuccessBg:", theme.bg("toolSuccessBg", " Sample "));
+  console.log("toolErrorBg:", theme.bg("toolErrorBg", " Sample "));
+  console.log();
+}
+
+// --- Main ---
+
+const [cmd, arg] = process.argv.slice(2);
+
+if (cmd === "contrast") {
+  cmdContrast(parseFloat(arg) || 4.5);
+} else if (cmd === "test") {
+  cmdTest(arg);
+} else if (cmd === "light" || cmd === "dark") {
+  cmdTheme(cmd);
+} else {
+  console.log("Usage:");
+  console.log(
+    "  npx tsx test-theme-colors.ts light|dark     Test built-in theme",
+  );
+  console.log(
+    "  npx tsx test-theme-colors.ts contrast 4.5   Compute colors at ratio",
+  );
+  console.log(
+    "  npx tsx test-theme-colors.ts test file.json Test any JSON file",
+  );
+}
--- a/packages/coding-agent/test/tool-execution-component.test.ts
+++ b/packages/coding-agent/test/tool-execution-component.test.ts
@ -0,0 +1,90 @@
+import { Text, type TUI } from "@mariozechner/pi-tui";
+import { Type } from "@sinclair/typebox";
+import stripAnsi from "strip-ansi";
+import { beforeAll, describe, expect, test } from "vitest";
+import type { ToolDefinition } from "../src/core/extensions/types.js";
+import { ToolExecutionComponent } from "../src/modes/interactive/components/tool-execution.js";
+import { initTheme } from "../src/modes/interactive/theme/theme.js";
+
+function createBaseToolDefinition(): ToolDefinition {
+  return {
+    name: "custom_tool",
+    label: "custom_tool",
+    description: "custom tool",
+    parameters: Type.Any(),
+    execute: async () => ({
+      content: [{ type: "text", text: "ok" }],
+      details: {},
+    }),
+  };
+}
+
+function createFakeTui(): TUI {
+  return {
+    requestRender: () => {},
+  } as unknown as TUI;
+}
+
+describe("ToolExecutionComponent custom renderer suppression", () => {
+  beforeAll(() => {
+    initTheme("dark");
+  });
+
+  test("renders no lines when custom renderers return undefined", () => {
+    const toolDefinition: ToolDefinition = {
+      ...createBaseToolDefinition(),
+      renderCall: () => undefined,
+      renderResult: () => undefined,
+    };
+
+    const component = new ToolExecutionComponent(
+      "custom_tool",
+      {},
+      {},
+      toolDefinition,
+      createFakeTui(),
+    );
+    expect(component.render(120)).toEqual([]);
+
+    component.updateResult(
+      {
+        content: [{ type: "text", text: "hidden" }],
+        details: {},
+        isError: false,
+      },
+      false,
+    );
+
+    expect(component.render(120)).toEqual([]);
+  });
+
+  test("keeps built-in tool rendering visible", () => {
+    const component = new ToolExecutionComponent(
+      "read",
+      { path: "README.md" },
+      {},
+      undefined,
+      createFakeTui(),
+    );
+    const rendered = stripAnsi(component.render(120).join("\n"));
+    expect(rendered).toContain("read");
+  });
+
+  test("keeps custom tool rendering visible when renderer returns a component", () => {
+    const toolDefinition: ToolDefinition = {
+      ...createBaseToolDefinition(),
+      renderCall: () => new Text("custom call", 0, 0),
+      renderResult: () => undefined,
+    };
+
+    const component = new ToolExecutionComponent(
+      "custom_tool",
+      {},
+      {},
+      toolDefinition,
+      createFakeTui(),
+    );
+    const rendered = stripAnsi(component.render(120).join("\n"));
+    expect(rendered).toContain("custom call");
+  });
+});
--- a/packages/coding-agent/test/tools.test.ts
+++ b/packages/coding-agent/test/tools.test.ts
@ -0,0 +1,689 @@
+import { mkdirSync, readFileSync, rmSync, writeFileSync } from "fs";
+import { tmpdir } from "os";
+import { join } from "path";
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import { bashTool, createBashTool } from "../src/core/tools/bash.js";
+import { editTool } from "../src/core/tools/edit.js";
+import { findTool } from "../src/core/tools/find.js";
+import { grepTool } from "../src/core/tools/grep.js";
+import { lsTool } from "../src/core/tools/ls.js";
+import { readTool } from "../src/core/tools/read.js";
+import { writeTool } from "../src/core/tools/write.js";
+import * as shellModule from "../src/utils/shell.js";
+
+// Helper to extract text from content blocks
+function getTextOutput(result: any): string {
+  return (
+    result.content
+      ?.filter((c: any) => c.type === "text")
+      .map((c: any) => c.text)
+      .join("\n") || ""
+  );
+}
+
+describe("Coding Agent Tools", () => {
+  let testDir: string;
+
+  beforeEach(() => {
+    // Create a unique temporary directory for each test
+    testDir = join(tmpdir(), `coding-agent-test-${Date.now()}`);
+    mkdirSync(testDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    // Clean up test directory
+    rmSync(testDir, { recursive: true, force: true });
+  });
+
+  describe("read tool", () => {
+    it("should read file contents that fit within limits", async () => {
+      const testFile = join(testDir, "test.txt");
+      const content = "Hello, world!\nLine 2\nLine 3";
+      writeFileSync(testFile, content);
+
+      const result = await readTool.execute("test-call-1", { path: testFile });
+
+      expect(getTextOutput(result)).toBe(content);
+      // No truncation message since file fits within limits
+      expect(getTextOutput(result)).not.toContain("Use offset=");
+      expect(result.details).toBeUndefined();
+    });
+
+    it("should handle non-existent files", async () => {
+      const testFile = join(testDir, "nonexistent.txt");
+
+      await expect(
+        readTool.execute("test-call-2", { path: testFile }),
+      ).rejects.toThrow(/ENOENT|not found/i);
+    });
+
+    it("should truncate files exceeding line limit", async () => {
+      const testFile = join(testDir, "large.txt");
+      const lines = Array.from({ length: 2500 }, (_, i) => `Line ${i + 1}`);
+      writeFileSync(testFile, lines.join("\n"));
+
+      const result = await readTool.execute("test-call-3", { path: testFile });
+      const output = getTextOutput(result);
+
+      expect(output).toContain("Line 1");
+      expect(output).toContain("Line 2000");
+      expect(output).not.toContain("Line 2001");
+      expect(output).toContain(
+        "[Showing lines 1-2000 of 2500. Use offset=2001 to continue.]",
+      );
+    });
+
+    it("should truncate when byte limit exceeded", async () => {
+      const testFile = join(testDir, "large-bytes.txt");
+      // Create file that exceeds 50KB byte limit but has fewer than 2000 lines
+      const lines = Array.from(
+        { length: 500 },
+        (_, i) => `Line ${i + 1}: ${"x".repeat(200)}`,
+      );
+      writeFileSync(testFile, lines.join("\n"));
+
+      const result = await readTool.execute("test-call-4", { path: testFile });
+      const output = getTextOutput(result);
+
+      expect(output).toContain("Line 1:");
+      // Should show byte limit message
+      expect(output).toMatch(
+        /\[Showing lines 1-\d+ of 500 \(.* limit\)\. Use offset=\d+ to continue\.\]/,
+      );
+    });
+
+    it("should handle offset parameter", async () => {
+      const testFile = join(testDir, "offset-test.txt");
+      const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`);
+      writeFileSync(testFile, lines.join("\n"));
+
+      const result = await readTool.execute("test-call-5", {
+        path: testFile,
+        offset: 51,
+      });
+      const output = getTextOutput(result);
+
+      expect(output).not.toContain("Line 50");
+      expect(output).toContain("Line 51");
+      expect(output).toContain("Line 100");
+      // No truncation message since file fits within limits
+      expect(output).not.toContain("Use offset=");
+    });
+
+    it("should handle limit parameter", async () => {
+      const testFile = join(testDir, "limit-test.txt");
+      const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`);
+      writeFileSync(testFile, lines.join("\n"));
+
+      const result = await readTool.execute("test-call-6", {
+        path: testFile,
+        limit: 10,
+      });
+      const output = getTextOutput(result);
+
+      expect(output).toContain("Line 1");
+      expect(output).toContain("Line 10");
+      expect(output).not.toContain("Line 11");
+      expect(output).toContain(
+        "[90 more lines in file. Use offset=11 to continue.]",
+      );
+    });
+
+    it("should handle offset + limit together", async () => {
+      const testFile = join(testDir, "offset-limit-test.txt");
+      const lines = Array.from({ length: 100 }, (_, i) => `Line ${i + 1}`);
+      writeFileSync(testFile, lines.join("\n"));
+
+      const result = await readTool.execute("test-call-7", {
+        path: testFile,
+        offset: 41,
+        limit: 20,
+      });
+      const output = getTextOutput(result);
+
+      expect(output).not.toContain("Line 40");
+      expect(output).toContain("Line 41");
+      expect(output).toContain("Line 60");
+      expect(output).not.toContain("Line 61");
+      expect(output).toContain(
+        "[40 more lines in file. Use offset=61 to continue.]",
+      );
+    });
+
+    it("should show error when offset is beyond file length", async () => {
+      const testFile = join(testDir, "short.txt");
+      writeFileSync(testFile, "Line 1\nLine 2\nLine 3");
+
+      await expect(
+        readTool.execute("test-call-8", { path: testFile, offset: 100 }),
+      ).rejects.toThrow(/Offset 100 is beyond end of file \(3 lines total\)/);
+    });
+
+    it("should include truncation details when truncated", async () => {
+      const testFile = join(testDir, "large-file.txt");
+      const lines = Array.from({ length: 2500 }, (_, i) => `Line ${i + 1}`);
+      writeFileSync(testFile, lines.join("\n"));
+
+      const result = await readTool.execute("test-call-9", { path: testFile });
+
+      expect(result.details).toBeDefined();
+      expect(result.details?.truncation).toBeDefined();
+      expect(result.details?.truncation?.truncated).toBe(true);
+      expect(result.details?.truncation?.truncatedBy).toBe("lines");
+      expect(result.details?.truncation?.totalLines).toBe(2500);
+      expect(result.details?.truncation?.outputLines).toBe(2000);
+    });
+
+    it("should detect image MIME type from file magic (not extension)", async () => {
+      const png1x1Base64 =
+        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+X2Z0AAAAASUVORK5CYII=";
+      const pngBuffer = Buffer.from(png1x1Base64, "base64");
+
+      const testFile = join(testDir, "image.txt");
+      writeFileSync(testFile, pngBuffer);
+
+      const result = await readTool.execute("test-call-img-1", {
+        path: testFile,
+      });
+
+      expect(result.content[0]?.type).toBe("text");
+      expect(getTextOutput(result)).toContain("Read image file [image/png]");
+
+      const imageBlock = result.content.find(
+        (c): c is { type: "image"; mimeType: string; data: string } =>
+          c.type === "image",
+      );
+      expect(imageBlock).toBeDefined();
+      expect(imageBlock?.mimeType).toBe("image/png");
+      expect(typeof imageBlock?.data).toBe("string");
+      expect((imageBlock?.data ?? "").length).toBeGreaterThan(0);
+    });
+
+    it("should treat files with image extension but non-image content as text", async () => {
+      const testFile = join(testDir, "not-an-image.png");
+      writeFileSync(testFile, "definitely not a png");
+
+      const result = await readTool.execute("test-call-img-2", {
+        path: testFile,
+      });
+      const output = getTextOutput(result);
+
+      expect(output).toContain("definitely not a png");
+      expect(result.content.some((c: any) => c.type === "image")).toBe(false);
+    });
+  });
+
+  describe("write tool", () => {
+    it("should write file contents", async () => {
+      const testFile = join(testDir, "write-test.txt");
+      const content = "Test content";
+
+      const result = await writeTool.execute("test-call-3", {
+        path: testFile,
+        content,
+      });
+
+      expect(getTextOutput(result)).toContain("Successfully wrote");
+      expect(getTextOutput(result)).toContain(testFile);
+      expect(result.details).toBeUndefined();
+    });
+
+    it("should create parent directories", async () => {
+      const testFile = join(testDir, "nested", "dir", "test.txt");
+      const content = "Nested content";
+
+      const result = await writeTool.execute("test-call-4", {
+        path: testFile,
+        content,
+      });
+
+      expect(getTextOutput(result)).toContain("Successfully wrote");
+    });
+  });
+
+  describe("edit tool", () => {
+    it("should replace text in file", async () => {
+      const testFile = join(testDir, "edit-test.txt");
+      const originalContent = "Hello, world!";
+      writeFileSync(testFile, originalContent);
+
+      const result = await editTool.execute("test-call-5", {
+        path: testFile,
+        oldText: "world",
+        newText: "testing",
+      });
+
+      expect(getTextOutput(result)).toContain("Successfully replaced");
+      expect(result.details).toBeDefined();
+      expect(result.details.diff).toBeDefined();
+      expect(typeof result.details.diff).toBe("string");
+      expect(result.details.diff).toContain("testing");
+    });
+
+    it("should fail if text not found", async () => {
+      const testFile = join(testDir, "edit-test.txt");
+      const originalContent = "Hello, world!";
+      writeFileSync(testFile, originalContent);
+
+      await expect(
+        editTool.execute("test-call-6", {
+          path: testFile,
+          oldText: "nonexistent",
+          newText: "testing",
+        }),
+      ).rejects.toThrow(/Could not find the exact text/);
+    });
+
+    it("should fail if text appears multiple times", async () => {
+      const testFile = join(testDir, "edit-test.txt");
+      const originalContent = "foo foo foo";
+      writeFileSync(testFile, originalContent);
+
+      await expect(
+        editTool.execute("test-call-7", {
+          path: testFile,
+          oldText: "foo",
+          newText: "bar",
+        }),
+      ).rejects.toThrow(/Found 3 occurrences/);
+    });
+  });
+
+  describe("bash tool", () => {
+    it("should execute simple commands", async () => {
+      const result = await bashTool.execute("test-call-8", {
+        command: "echo 'test output'",
+      });
+
+      expect(getTextOutput(result)).toContain("test output");
+      expect(result.details).toBeUndefined();
+    });
+
+    it("should handle command errors", async () => {
+      await expect(
+        bashTool.execute("test-call-9", { command: "exit 1" }),
+      ).rejects.toThrow(/(Command failed|code 1)/);
+    });
+
+    it("should respect timeout", async () => {
+      await expect(
+        bashTool.execute("test-call-10", { command: "sleep 5", timeout: 1 }),
+      ).rejects.toThrow(/timed out/i);
+    });
+
+    it("should throw error when cwd does not exist", async () => {
+      const nonexistentCwd = "/this/directory/definitely/does/not/exist/12345";
+
+      const bashToolWithBadCwd = createBashTool(nonexistentCwd);
+
+      await expect(
+        bashToolWithBadCwd.execute("test-call-11", { command: "echo test" }),
+      ).rejects.toThrow(/Working directory does not exist/);
+    });
+
+    it("should handle process spawn errors", async () => {
+      vi.spyOn(shellModule, "getShellConfig").mockReturnValueOnce({
+        shell: "/nonexistent-shell-path-xyz123",
+        args: ["-c"],
+      });
+
+      const bashWithBadShell = createBashTool(testDir);
+
+      await expect(
+        bashWithBadShell.execute("test-call-12", { command: "echo test" }),
+      ).rejects.toThrow(/ENOENT/);
+    });
+
+    it("should prepend command prefix when configured", async () => {
+      const bashWithPrefix = createBashTool(testDir, {
+        commandPrefix: "export TEST_VAR=hello",
+      });
+
+      const result = await bashWithPrefix.execute("test-prefix-1", {
+        command: "echo $TEST_VAR",
+      });
+      expect(getTextOutput(result).trim()).toBe("hello");
+    });
+
+    it("should include output from both prefix and command", async () => {
+      const bashWithPrefix = createBashTool(testDir, {
+        commandPrefix: "echo prefix-output",
+      });
+
+      const result = await bashWithPrefix.execute("test-prefix-2", {
+        command: "echo command-output",
+      });
+      expect(getTextOutput(result).trim()).toBe(
+        "prefix-output\ncommand-output",
+      );
+    });
+
+    it("should work without command prefix", async () => {
+      const bashWithoutPrefix = createBashTool(testDir, {});
+
+      const result = await bashWithoutPrefix.execute("test-prefix-3", {
+        command: "echo no-prefix",
+      });
+      expect(getTextOutput(result).trim()).toBe("no-prefix");
+    });
+  });
+
+  describe("grep tool", () => {
+    it("should include filename when searching a single file", async () => {
+      const testFile = join(testDir, "example.txt");
+      writeFileSync(testFile, "first line\nmatch line\nlast line");
+
+      const result = await grepTool.execute("test-call-11", {
+        pattern: "match",
+        path: testFile,
+      });
+
+      const output = getTextOutput(result);
+      expect(output).toContain("example.txt:2: match line");
+    });
+
+    it("should respect global limit and include context lines", async () => {
+      const testFile = join(testDir, "context.txt");
+      const content = [
+        "before",
+        "match one",
+        "after",
+        "middle",
+        "match two",
+        "after two",
+      ].join("\n");
+      writeFileSync(testFile, content);
+
+      const result = await grepTool.execute("test-call-12", {
+        pattern: "match",
+        path: testFile,
+        limit: 1,
+        context: 1,
+      });
+
+      const output = getTextOutput(result);
+      expect(output).toContain("context.txt-1- before");
+      expect(output).toContain("context.txt:2: match one");
+      expect(output).toContain("context.txt-3- after");
+      expect(output).toContain(
+        "[1 matches limit reached. Use limit=2 for more, or refine pattern]",
+      );
+      // Ensure second match is not present
+      expect(output).not.toContain("match two");
+    });
+  });
+
+  describe("find tool", () => {
+    it("should include hidden files that are not gitignored", async () => {
+      const hiddenDir = join(testDir, ".secret");
+      mkdirSync(hiddenDir);
+      writeFileSync(join(hiddenDir, "hidden.txt"), "hidden");
+      writeFileSync(join(testDir, "visible.txt"), "visible");
+
+      const result = await findTool.execute("test-call-13", {
+        pattern: "**/*.txt",
+        path: testDir,
+      });
+
+      const outputLines = getTextOutput(result)
+        .split("\n")
+        .map((line) => line.trim())
+        .filter(Boolean);
+
+      expect(outputLines).toContain("visible.txt");
+      expect(outputLines).toContain(".secret/hidden.txt");
+    });
+
+    it("should respect .gitignore", async () => {
+      writeFileSync(join(testDir, ".gitignore"), "ignored.txt\n");
+      writeFileSync(join(testDir, "ignored.txt"), "ignored");
+      writeFileSync(join(testDir, "kept.txt"), "kept");
+
+      const result = await findTool.execute("test-call-14", {
+        pattern: "**/*.txt",
+        path: testDir,
+      });
+
+      const output = getTextOutput(result);
+      expect(output).toContain("kept.txt");
+      expect(output).not.toContain("ignored.txt");
+    });
+  });
+
+  describe("ls tool", () => {
+    it("should list dotfiles and directories", async () => {
+      writeFileSync(join(testDir, ".hidden-file"), "secret");
+      mkdirSync(join(testDir, ".hidden-dir"));
+
+      const result = await lsTool.execute("test-call-15", { path: testDir });
+      const output = getTextOutput(result);
+
+      expect(output).toContain(".hidden-file");
+      expect(output).toContain(".hidden-dir/");
+    });
+  });
+});
+
+describe("edit tool fuzzy matching", () => {
+  let testDir: string;
+
+  beforeEach(() => {
+    testDir = join(tmpdir(), `coding-agent-fuzzy-test-${Date.now()}`);
+    mkdirSync(testDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(testDir, { recursive: true, force: true });
+  });
+
+  it("should match text with trailing whitespace stripped", async () => {
+    const testFile = join(testDir, "trailing-ws.txt");
+    // File has trailing spaces on lines
+    writeFileSync(testFile, "line one   \nline two  \nline three\n");
+
+    // oldText without trailing whitespace should still match
+    const result = await editTool.execute("test-fuzzy-1", {
+      path: testFile,
+      oldText: "line one\nline two\n",
+      newText: "replaced\n",
+    });
+
+    expect(getTextOutput(result)).toContain("Successfully replaced");
+    const content = readFileSync(testFile, "utf-8");
+    expect(content).toBe("replaced\nline three\n");
+  });
+
+  it("should match smart single quotes to ASCII quotes", async () => {
+    const testFile = join(testDir, "smart-quotes.txt");
+    // File has smart/curly single quotes (U+2018, U+2019)
+    writeFileSync(testFile, "console.log(\u2018hello\u2019);\n");
+
+    // oldText with ASCII quotes should match
+    const result = await editTool.execute("test-fuzzy-2", {
+      path: testFile,
+      oldText: "console.log('hello');",
+      newText: "console.log('world');",
+    });
+
+    expect(getTextOutput(result)).toContain("Successfully replaced");
+    const content = readFileSync(testFile, "utf-8");
+    expect(content).toContain("world");
+  });
+
+  it("should match smart double quotes to ASCII quotes", async () => {
+    const testFile = join(testDir, "smart-double-quotes.txt");
+    // File has smart/curly double quotes (U+201C, U+201D)
+    writeFileSync(testFile, "const msg = \u201CHello World\u201D;\n");
+
+    // oldText with ASCII quotes should match
+    const result = await editTool.execute("test-fuzzy-3", {
+      path: testFile,
+      oldText: 'const msg = "Hello World";',
+      newText: 'const msg = "Goodbye";',
+    });
+
+    expect(getTextOutput(result)).toContain("Successfully replaced");
+    const content = readFileSync(testFile, "utf-8");
+    expect(content).toContain("Goodbye");
+  });
+
+  it("should match Unicode dashes to ASCII hyphen", async () => {
+    const testFile = join(testDir, "unicode-dashes.txt");
+    // File has en-dash (U+2013) and em-dash (U+2014)
+    writeFileSync(testFile, "range: 1\u20135\nbreak\u2014here\n");
+
+    // oldText with ASCII hyphens should match
+    const result = await editTool.execute("test-fuzzy-4", {
+      path: testFile,
+      oldText: "range: 1-5\nbreak-here",
+      newText: "range: 10-50\nbreak--here",
+    });
+
+    expect(getTextOutput(result)).toContain("Successfully replaced");
+    const content = readFileSync(testFile, "utf-8");
+    expect(content).toContain("10-50");
+  });
+
+  it("should match non-breaking space to regular space", async () => {
+    const testFile = join(testDir, "nbsp.txt");
+    // File has non-breaking space (U+00A0)
+    writeFileSync(testFile, "hello\u00A0world\n");
+
+    // oldText with regular space should match
+    const result = await editTool.execute("test-fuzzy-5", {
+      path: testFile,
+      oldText: "hello world",
+      newText: "hello universe",
+    });
+
+    expect(getTextOutput(result)).toContain("Successfully replaced");
+    const content = readFileSync(testFile, "utf-8");
+    expect(content).toContain("universe");
+  });
+
+  it("should prefer exact match over fuzzy match", async () => {
+    const testFile = join(testDir, "exact-preferred.txt");
+    // File has both exact and fuzzy-matchable content
+    writeFileSync(testFile, "const x = 'exact';\nconst y = 'other';\n");
+
+    const result = await editTool.execute("test-fuzzy-6", {
+      path: testFile,
+      oldText: "const x = 'exact';",
+      newText: "const x = 'changed';",
+    });
+
+    expect(getTextOutput(result)).toContain("Successfully replaced");
+    const content = readFileSync(testFile, "utf-8");
+    expect(content).toBe("const x = 'changed';\nconst y = 'other';\n");
+  });
+
+  it("should still fail when text is not found even with fuzzy matching", async () => {
+    const testFile = join(testDir, "no-match.txt");
+    writeFileSync(testFile, "completely different content\n");
+
+    await expect(
+      editTool.execute("test-fuzzy-7", {
+        path: testFile,
+        oldText: "this does not exist",
+        newText: "replacement",
+      }),
+    ).rejects.toThrow(/Could not find the exact text/);
+  });
+
+  it("should detect duplicates after fuzzy normalization", async () => {
+    const testFile = join(testDir, "fuzzy-dups.txt");
+    // Two lines that are identical after trailing whitespace is stripped
+    writeFileSync(testFile, "hello world   \nhello world\n");
+
+    await expect(
+      editTool.execute("test-fuzzy-8", {
+        path: testFile,
+        oldText: "hello world",
+        newText: "replaced",
+      }),
+    ).rejects.toThrow(/Found 2 occurrences/);
+  });
+});
+
+describe("edit tool CRLF handling", () => {
+  let testDir: string;
+
+  beforeEach(() => {
+    testDir = join(tmpdir(), `coding-agent-crlf-test-${Date.now()}`);
+    mkdirSync(testDir, { recursive: true });
+  });
+
+  afterEach(() => {
+    rmSync(testDir, { recursive: true, force: true });
+  });
+
+  it("should match LF oldText against CRLF file content", async () => {
+    const testFile = join(testDir, "crlf-test.txt");
+
+    writeFileSync(testFile, "line one\r\nline two\r\nline three\r\n");
+
+    const result = await editTool.execute("test-crlf-1", {
+      path: testFile,
+      oldText: "line two\n",
+      newText: "replaced line\n",
+    });
+
+    expect(getTextOutput(result)).toContain("Successfully replaced");
+  });
+
+  it("should preserve CRLF line endings after edit", async () => {
+    const testFile = join(testDir, "crlf-preserve.txt");
+    writeFileSync(testFile, "first\r\nsecond\r\nthird\r\n");
+
+    await editTool.execute("test-crlf-2", {
+      path: testFile,
+      oldText: "second\n",
+      newText: "REPLACED\n",
+    });
+
+    const content = readFileSync(testFile, "utf-8");
+    expect(content).toBe("first\r\nREPLACED\r\nthird\r\n");
+  });
+
+  it("should preserve LF line endings for LF files", async () => {
+    const testFile = join(testDir, "lf-preserve.txt");
+    writeFileSync(testFile, "first\nsecond\nthird\n");
+
+    await editTool.execute("test-lf-1", {
+      path: testFile,
+      oldText: "second\n",
+      newText: "REPLACED\n",
+    });
+
+    const content = readFileSync(testFile, "utf-8");
+    expect(content).toBe("first\nREPLACED\nthird\n");
+  });
+
+  it("should detect duplicates across CRLF/LF variants", async () => {
+    const testFile = join(testDir, "mixed-endings.txt");
+
+    writeFileSync(testFile, "hello\r\nworld\r\n---\r\nhello\nworld\n");
+
+    await expect(
+      editTool.execute("test-crlf-dup", {
+        path: testFile,
+        oldText: "hello\nworld\n",
+        newText: "replaced\n",
+      }),
+    ).rejects.toThrow(/Found 2 occurrences/);
+  });
+
+  it("should preserve UTF-8 BOM after edit", async () => {
+    const testFile = join(testDir, "bom-test.txt");
+    writeFileSync(testFile, "\uFEFFfirst\r\nsecond\r\nthird\r\n");
+
+    await editTool.execute("test-bom", {
+      path: testFile,
+      oldText: "second\n",
+      newText: "REPLACED\n",
+    });
+
+    const content = readFileSync(testFile, "utf-8");
+    expect(content).toBe("\uFEFFfirst\r\nREPLACED\r\nthird\r\n");
+  });
+});
--- a/packages/coding-agent/test/tree-selector.test.ts
+++ b/packages/coding-agent/test/tree-selector.test.ts
@ -0,0 +1,294 @@
+import { beforeAll, describe, expect, test } from "vitest";
+import type {
+  ModelChangeEntry,
+  SessionEntry,
+  SessionMessageEntry,
+  SessionTreeNode,
+} from "../src/core/session-manager.js";
+import { TreeSelectorComponent } from "../src/modes/interactive/components/tree-selector.js";
+import { initTheme } from "../src/modes/interactive/theme/theme.js";
+
+beforeAll(() => {
+  initTheme("dark");
+});
+
+// Helper to create a user message entry
+function userMessage(
+  id: string,
+  parentId: string | null,
+  content: string,
+): SessionMessageEntry {
+  return {
+    type: "message",
+    id,
+    parentId,
+    timestamp: new Date().toISOString(),
+    message: { role: "user", content, timestamp: Date.now() },
+  };
+}
+
+// Helper to create an assistant message entry
+function assistantMessage(
+  id: string,
+  parentId: string | null,
+  text: string,
+): SessionMessageEntry {
+  return {
+    type: "message",
+    id,
+    parentId,
+    timestamp: new Date().toISOString(),
+    message: {
+      role: "assistant",
+      content: [{ type: "text", text }],
+      api: "anthropic-messages",
+      provider: "anthropic",
+      model: "claude-sonnet-4",
+      usage: {
+        input: 0,
+        output: 0,
+        cacheRead: 0,
+        cacheWrite: 0,
+        totalTokens: 0,
+        cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+      },
+      stopReason: "stop",
+      timestamp: Date.now(),
+    },
+  };
+}
+
+// Helper to create a model_change entry
+function modelChange(id: string, parentId: string | null): ModelChangeEntry {
+  return {
+    type: "model_change",
+    id,
+    parentId,
+    timestamp: new Date().toISOString(),
+    provider: "anthropic",
+    modelId: "claude-sonnet-4",
+  };
+}
+
+// Helper to build a tree from entries using parentId relationships
+function buildTree(entries: Array<SessionEntry>): SessionTreeNode[] {
+  if (entries.length === 0) return [];
+
+  const nodes: SessionTreeNode[] = entries.map((entry) => ({
+    entry,
+    children: [],
+  }));
+
+  const byId = new Map<string, SessionTreeNode>();
+  for (const node of nodes) {
+    byId.set(node.entry.id, node);
+  }
+
+  const roots: SessionTreeNode[] = [];
+  for (const node of nodes) {
+    if (node.entry.parentId === null) {
+      roots.push(node);
+    } else {
+      const parent = byId.get(node.entry.parentId);
+      if (parent) {
+        parent.children.push(node);
+      }
+    }
+  }
+  return roots;
+}
+
+describe("TreeSelectorComponent", () => {
+  describe("initial selection with metadata entries", () => {
+    test("focuses nearest visible ancestor when currentLeafId is a model_change with sibling branch", () => {
+      // Tree structure:
+      // user-1
+      // └── asst-1
+      //     ├── user-2 (active branch)
+      //     │   └── model-1 (model_change, CURRENT LEAF)
+      //     └── user-3 (sibling branch, added later chronologically)
+      const entries = [
+        userMessage("user-1", null, "hello"),
+        assistantMessage("asst-1", "user-1", "hi"),
+        userMessage("user-2", "asst-1", "active branch"), // Active branch
+        modelChange("model-1", "user-2"), // Current leaf (metadata)
+        userMessage("user-3", "asst-1", "sibling branch"), // Sibling branch
+      ];
+      const tree = buildTree(entries);
+
+      const selector = new TreeSelectorComponent(
+        tree,
+        "model-1", // currentLeafId is the model_change entry
+        24,
+        () => {},
+        () => {},
+      );
+
+      const list = selector.getTreeList();
+      // Should focus on user-2 (parent of model-1), not user-3 (last item)
+      expect(list.getSelectedNode()?.entry.id).toBe("user-2");
+    });
+
+    test("focuses nearest visible ancestor when currentLeafId is a thinking_level_change entry", () => {
+      // Similar structure with thinking_level_change instead of model_change
+      const entries = [
+        userMessage("user-1", null, "hello"),
+        assistantMessage("asst-1", "user-1", "hi"),
+        userMessage("user-2", "asst-1", "active branch"),
+        {
+          type: "thinking_level_change" as const,
+          id: "thinking-1",
+          parentId: "user-2",
+          timestamp: new Date().toISOString(),
+          thinkingLevel: "high",
+        },
+        userMessage("user-3", "asst-1", "sibling branch"),
+      ];
+      const tree = buildTree(entries);
+
+      const selector = new TreeSelectorComponent(
+        tree,
+        "thinking-1",
+        24,
+        () => {},
+        () => {},
+      );
+
+      const list = selector.getTreeList();
+      expect(list.getSelectedNode()?.entry.id).toBe("user-2");
+    });
+  });
+
+  describe("filter switching with parent traversal", () => {
+    test("switches to nearest visible user message when changing to user-only filter", () => {
+      // In user-only filter: [user-1, user-2, user-3]
+      const entries = [
+        userMessage("user-1", null, "hello"),
+        assistantMessage("asst-1", "user-1", "hi"),
+        userMessage("user-2", "asst-1", "active branch"),
+        assistantMessage("asst-2", "user-2", "response"),
+        userMessage("user-3", "asst-1", "sibling branch"),
+      ];
+      const tree = buildTree(entries);
+
+      const selector = new TreeSelectorComponent(
+        tree,
+        "asst-2",
+        24,
+        () => {},
+        () => {},
+      );
+
+      const list = selector.getTreeList();
+      expect(list.getSelectedNode()?.entry.id).toBe("asst-2");
+
+      // Simulate Ctrl+U (user-only filter)
+      selector.handleInput("\x15");
+
+      // Should now be on user-2 (the parent user message), not user-3
+      expect(list.getSelectedNode()?.entry.id).toBe("user-2");
+    });
+
+    test("returns to nearest visible ancestor when switching back to default filter", () => {
+      // Same branching structure
+      const entries = [
+        userMessage("user-1", null, "hello"),
+        assistantMessage("asst-1", "user-1", "hi"),
+        userMessage("user-2", "asst-1", "active branch"),
+        assistantMessage("asst-2", "user-2", "response"),
+        userMessage("user-3", "asst-1", "sibling branch"),
+      ];
+      const tree = buildTree(entries);
+
+      const selector = new TreeSelectorComponent(
+        tree,
+        "asst-2",
+        24,
+        () => {},
+        () => {},
+      );
+
+      const list = selector.getTreeList();
+      expect(list.getSelectedNode()?.entry.id).toBe("asst-2");
+
+      // Switch to user-only
+      selector.handleInput("\x15"); // Ctrl+U
+      expect(list.getSelectedNode()?.entry.id).toBe("user-2");
+
+      // Switch back to default - should stay on user-2
+      // (since that's what we navigated to via parent traversal)
+      selector.handleInput("\x04"); // Ctrl+D
+      expect(list.getSelectedNode()?.entry.id).toBe("user-2");
+    });
+  });
+
+  describe("empty filter preservation", () => {
+    test("preserves selection when switching to empty labeled filter and back", () => {
+      // Tree with no labels
+      const entries = [
+        userMessage("user-1", null, "hello"),
+        assistantMessage("asst-1", "user-1", "hi"),
+        userMessage("user-2", "asst-1", "bye"),
+        assistantMessage("asst-2", "user-2", "goodbye"),
+      ];
+      const tree = buildTree(entries);
+
+      const selector = new TreeSelectorComponent(
+        tree,
+        "asst-2",
+        24,
+        () => {},
+        () => {},
+      );
+
+      const list = selector.getTreeList();
+      expect(list.getSelectedNode()?.entry.id).toBe("asst-2");
+
+      // Switch to labeled-only filter (no labels exist, so empty result)
+      selector.handleInput("\x0c"); // Ctrl+L
+
+      // The list should be empty, getSelectedNode returns undefined
+      expect(list.getSelectedNode()).toBeUndefined();
+
+      // Switch back to default filter
+      selector.handleInput("\x04"); // Ctrl+D
+
+      // Should restore to asst-2 (the selection before we switched to empty filter)
+      expect(list.getSelectedNode()?.entry.id).toBe("asst-2");
+    });
+
+    test("preserves selection through multiple empty filter switches", () => {
+      const entries = [
+        userMessage("user-1", null, "hello"),
+        assistantMessage("asst-1", "user-1", "hi"),
+      ];
+      const tree = buildTree(entries);
+
+      const selector = new TreeSelectorComponent(
+        tree,
+        "asst-1",
+        24,
+        () => {},
+        () => {},
+      );
+
+      const list = selector.getTreeList();
+      expect(list.getSelectedNode()?.entry.id).toBe("asst-1");
+
+      // Switch to labeled-only (empty) - Ctrl+L toggles labeled ↔ default
+      selector.handleInput("\x0c"); // Ctrl+L -> labeled-only
+      expect(list.getSelectedNode()).toBeUndefined();
+
+      // Switch to default, then back to labeled-only
+      selector.handleInput("\x0c"); // Ctrl+L -> default (toggle back)
+      expect(list.getSelectedNode()?.entry.id).toBe("asst-1");
+
+      selector.handleInput("\x0c"); // Ctrl+L -> labeled-only again
+      expect(list.getSelectedNode()).toBeUndefined();
+
+      // Switch back to default with Ctrl+D
+      selector.handleInput("\x04"); // Ctrl+D
+      expect(list.getSelectedNode()?.entry.id).toBe("asst-1");
+    });
+  });
+});
--- a/packages/coding-agent/test/truncate-to-width.test.ts
+++ b/packages/coding-agent/test/truncate-to-width.test.ts
@ -0,0 +1,84 @@
+import { truncateToWidth, visibleWidth } from "@mariozechner/pi-tui";
+import { describe, expect, it } from "vitest";
+
+/**
+ * Tests for truncateToWidth behavior with Unicode characters.
+ *
+ * These tests verify that truncateToWidth properly handles text with
+ * Unicode characters that have different byte vs display widths.
+ */
+describe("truncateToWidth", () => {
+  it("should truncate messages with Unicode characters correctly", () => {
+    // This message contains a checkmark (✔) which may have display width > 1 byte
+    const message =
+      '✔ script to run › dev $ concurrently "vite" "node --import tsx ./';
+    const width = 67;
+    const maxMsgWidth = width - 2; // Account for cursor
+
+    const truncated = truncateToWidth(message, maxMsgWidth);
+    const truncatedWidth = visibleWidth(truncated);
+
+    expect(truncatedWidth).toBeLessThanOrEqual(maxMsgWidth);
+  });
+
+  it("should handle emoji characters", () => {
+    const message =
+      "🎉 Celebration! 🚀 Launch 📦 Package ready for deployment now";
+    const width = 40;
+    const maxMsgWidth = width - 2;
+
+    const truncated = truncateToWidth(message, maxMsgWidth);
+    const truncatedWidth = visibleWidth(truncated);
+
+    expect(truncatedWidth).toBeLessThanOrEqual(maxMsgWidth);
+  });
+
+  it("should handle mixed ASCII and wide characters", () => {
+    const message = "Hello 世界 Test 你好 More text here that is long";
+    const width = 30;
+    const maxMsgWidth = width - 2;
+
+    const truncated = truncateToWidth(message, maxMsgWidth);
+    const truncatedWidth = visibleWidth(truncated);
+
+    expect(truncatedWidth).toBeLessThanOrEqual(maxMsgWidth);
+  });
+
+  it("should not truncate messages that fit", () => {
+    const message = "Short message";
+    const width = 50;
+    const maxMsgWidth = width - 2;
+
+    const truncated = truncateToWidth(message, maxMsgWidth);
+
+    expect(truncated).toBe(message);
+    expect(visibleWidth(truncated)).toBeLessThanOrEqual(maxMsgWidth);
+  });
+
+  it("should add ellipsis when truncating", () => {
+    const message = "This is a very long message that needs to be truncated";
+    const width = 30;
+    const maxMsgWidth = width - 2;
+
+    const truncated = truncateToWidth(message, maxMsgWidth);
+
+    expect(truncated).toContain("...");
+    expect(visibleWidth(truncated)).toBeLessThanOrEqual(maxMsgWidth);
+  });
+
+  it("should handle the exact crash case from issue report", () => {
+    // Terminal width was 67, line had visible width 68
+    // The problematic text contained "✔" and "›" characters
+    const message =
+      '✔ script to run › dev $ concurrently "vite" "node --import tsx ./server.ts"';
+    const terminalWidth = 67;
+    const cursorWidth = 2; // "› " or "  "
+    const maxMsgWidth = terminalWidth - cursorWidth;
+
+    const truncated = truncateToWidth(message, maxMsgWidth);
+    const finalWidth = visibleWidth(truncated);
+
+    // The final line (cursor + message) must not exceed terminal width
+    expect(finalWidth + cursorWidth).toBeLessThanOrEqual(terminalWidth);
+  });
+});
--- a/packages/coding-agent/test/utilities.ts
+++ b/packages/coding-agent/test/utilities.ts
@ -0,0 +1,314 @@
+/**
+ * Shared test utilities for coding-agent tests.
+ */
+
+import {
+  chmodSync,
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { homedir, tmpdir } from "node:os";
+import { dirname, join } from "node:path";
+import { Agent } from "@mariozechner/pi-agent-core";
+import {
+  getModel,
+  type OAuthCredentials,
+  type OAuthProvider,
+} from "@mariozechner/pi-ai";
+import { getOAuthApiKey } from "@mariozechner/pi-ai/oauth";
+import { AgentSession } from "../src/core/agent-session.js";
+import { AuthStorage } from "../src/core/auth-storage.js";
+import { createExtensionRuntime } from "../src/core/extensions/loader.js";
+import { ModelRegistry } from "../src/core/model-registry.js";
+import type { ResourceLoader } from "../src/core/resource-loader.js";
+import { SessionManager } from "../src/core/session-manager.js";
+import { SettingsManager } from "../src/core/settings-manager.js";
+import { codingTools } from "../src/core/tools/index.js";
+
+/**
+ * API key for authenticated tests. Tests using this should be wrapped in
+ * describe.skipIf(!API_KEY)
+ */
+export const API_KEY =
+  process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
+
+// ============================================================================
+// OAuth API key resolution from ~/.pi/agent/auth.json
+// ============================================================================
+
+const AUTH_PATH = join(homedir(), ".pi", "agent", "auth.json");
+
+type ApiKeyCredential = {
+  type: "api_key";
+  key: string;
+};
+
+type OAuthCredentialEntry = {
+  type: "oauth";
+} & OAuthCredentials;
+
+type AuthCredential = ApiKeyCredential | OAuthCredentialEntry;
+
+type AuthStorageData = Record<string, AuthCredential>;
+
+function loadAuthStorage(): AuthStorageData {
+  if (!existsSync(AUTH_PATH)) {
+    return {};
+  }
+  try {
+    const content = readFileSync(AUTH_PATH, "utf-8");
+    return JSON.parse(content);
+  } catch {
+    return {};
+  }
+}
+
+function saveAuthStorage(storage: AuthStorageData): void {
+  const configDir = dirname(AUTH_PATH);
+  if (!existsSync(configDir)) {
+    mkdirSync(configDir, { recursive: true, mode: 0o700 });
+  }
+  writeFileSync(AUTH_PATH, JSON.stringify(storage, null, 2), "utf-8");
+  chmodSync(AUTH_PATH, 0o600);
+}
+
+/**
+ * Resolve API key for a provider from ~/.pi/agent/auth.json
+ *
+ * For API key credentials, returns the key directly.
+ * For OAuth credentials, returns the access token (refreshing if expired and saving back).
+ *
+ * For google-gemini-cli and google-antigravity, returns JSON-encoded { token, projectId }
+ */
+export async function resolveApiKey(
+  provider: string,
+): Promise<string | undefined> {
+  const storage = loadAuthStorage();
+  const entry = storage[provider];
+
+  if (!entry) return undefined;
+
+  if (entry.type === "api_key") {
+    return entry.key;
+  }
+
+  if (entry.type === "oauth") {
+    // Build OAuthCredentials record for getOAuthApiKey
+    const oauthCredentials: Record<string, OAuthCredentials> = {};
+    for (const [key, value] of Object.entries(storage)) {
+      if (value.type === "oauth") {
+        const { type: _, ...creds } = value;
+        oauthCredentials[key] = creds;
+      }
+    }
+
+    const result = await getOAuthApiKey(
+      provider as OAuthProvider,
+      oauthCredentials,
+    );
+    if (!result) return undefined;
+
+    // Save refreshed credentials back to auth.json
+    storage[provider] = { type: "oauth", ...result.newCredentials };
+    saveAuthStorage(storage);
+
+    return result.apiKey;
+  }
+
+  return undefined;
+}
+
+/**
+ * Check if a provider has credentials in ~/.pi/agent/auth.json
+ */
+export function hasAuthForProvider(provider: string): boolean {
+  const storage = loadAuthStorage();
+  return provider in storage;
+}
+
+/** Path to the real pi agent config directory */
+export const PI_AGENT_DIR = join(homedir(), ".pi", "agent");
+
+/**
+ * Get an AuthStorage instance backed by ~/.pi/agent/auth.json
+ * Use this for tests that need real OAuth credentials.
+ */
+export function getRealAuthStorage(): AuthStorage {
+  return AuthStorage.create(AUTH_PATH);
+}
+
+/**
+ * Create a minimal user message for testing.
+ */
+export function userMsg(text: string) {
+  return { role: "user" as const, content: text, timestamp: Date.now() };
+}
+
+/**
+ * Create a minimal assistant message for testing.
+ */
+export function assistantMsg(text: string) {
+  return {
+    role: "assistant" as const,
+    content: [{ type: "text" as const, text }],
+    api: "anthropic-messages" as const,
+    provider: "anthropic",
+    model: "test",
+    usage: {
+      input: 1,
+      output: 1,
+      cacheRead: 0,
+      cacheWrite: 0,
+      totalTokens: 2,
+      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+    },
+    stopReason: "stop" as const,
+    timestamp: Date.now(),
+  };
+}
+
+/**
+ * Options for creating a test session.
+ */
+export interface TestSessionOptions {
+  /** Use in-memory session (no file persistence) */
+  inMemory?: boolean;
+  /** Custom system prompt */
+  systemPrompt?: string;
+  /** Custom settings overrides */
+  settingsOverrides?: Record<string, unknown>;
+}
+
+/**
+ * Resources returned by createTestSession that need cleanup.
+ */
+export interface TestSessionContext {
+  session: AgentSession;
+  sessionManager: SessionManager;
+  tempDir: string;
+  cleanup: () => void;
+}
+
+export function createTestResourceLoader(): ResourceLoader {
+  return {
+    getExtensions: () => ({
+      extensions: [],
+      errors: [],
+      runtime: createExtensionRuntime(),
+    }),
+    getSkills: () => ({ skills: [], diagnostics: [] }),
+    getPrompts: () => ({ prompts: [], diagnostics: [] }),
+    getThemes: () => ({ themes: [], diagnostics: [] }),
+    getAgentsFiles: () => ({ agentsFiles: [] }),
+    getSystemPrompt: () => undefined,
+    getAppendSystemPrompt: () => [],
+    getPathMetadata: () => new Map(),
+    extendResources: () => {},
+    reload: async () => {},
+  };
+}
+
+/**
+ * Create an AgentSession for testing with proper setup and cleanup.
+ * Use this for e2e tests that need real LLM calls.
+ */
+export function createTestSession(
+  options: TestSessionOptions = {},
+): TestSessionContext {
+  const tempDir = join(
+    tmpdir(),
+    `pi-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+  );
+  mkdirSync(tempDir, { recursive: true });
+
+  const model = getModel("anthropic", "claude-sonnet-4-5")!;
+  const agent = new Agent({
+    getApiKey: () => API_KEY,
+    initialState: {
+      model,
+      systemPrompt:
+        options.systemPrompt ??
+        "You are a helpful assistant. Be extremely concise.",
+      tools: codingTools,
+    },
+  });
+
+  const sessionManager = options.inMemory
+    ? SessionManager.inMemory()
+    : SessionManager.create(tempDir);
+  const settingsManager = SettingsManager.create(tempDir, tempDir);
+
+  if (options.settingsOverrides) {
+    settingsManager.applyOverrides(options.settingsOverrides);
+  }
+
+  const authStorage = AuthStorage.create(join(tempDir, "auth.json"));
+  const modelRegistry = new ModelRegistry(authStorage, tempDir);
+
+  const session = new AgentSession({
+    agent,
+    sessionManager,
+    settingsManager,
+    cwd: tempDir,
+    modelRegistry,
+    resourceLoader: createTestResourceLoader(),
+  });
+
+  // Must subscribe to enable session persistence
+  session.subscribe(() => {});
+
+  const cleanup = () => {
+    session.dispose();
+    if (tempDir && existsSync(tempDir)) {
+      rmSync(tempDir, { recursive: true });
+    }
+  };
+
+  return { session, sessionManager, tempDir, cleanup };
+}
+
+/**
+ * Build a session tree for testing using SessionManager.
+ * Returns the IDs of all created entries.
+ *
+ * Example tree structure:
+ * ```
+ * u1 -> a1 -> u2 -> a2
+ *          -> u3 -> a3  (branch from a1)
+ * u4 -> a4              (another root)
+ * ```
+ */
+export function buildTestTree(
+  session: SessionManager,
+  structure: {
+    messages: Array<{
+      role: "user" | "assistant";
+      text: string;
+      branchFrom?: string;
+    }>;
+  },
+): Map<string, string> {
+  const ids = new Map<string, string>();
+
+  for (const msg of structure.messages) {
+    if (msg.branchFrom) {
+      const branchFromId = ids.get(msg.branchFrom);
+      if (!branchFromId) {
+        throw new Error(`Cannot branch from unknown entry: ${msg.branchFrom}`);
+      }
+      session.branch(branchFromId);
+    }
+
+    const id =
+      msg.role === "user"
+        ? session.appendMessage(userMsg(msg.text))
+        : session.appendMessage(assistantMsg(msg.text));
+
+    ids.set(msg.text, id);
+  }
+
+  return ids;
+}
--- a/packages/coding-agent/test/vercel-ai-stream.test.ts
+++ b/packages/coding-agent/test/vercel-ai-stream.test.ts
@ -0,0 +1,198 @@
+import { describe, expect, it } from "vitest";
+import type { AgentSessionEvent } from "../src/core/agent-session.js";
+import {
+  createVercelStreamListener,
+  extractUserText,
+} from "../src/core/vercel-ai-stream.js";
+
+describe("extractUserText", () => {
+  it("extracts text from useChat v5+ format with parts", () => {
+    const body = {
+      messages: [
+        { role: "user", parts: [{ type: "text", text: "hello world" }] },
+      ],
+    };
+    expect(extractUserText(body)).toBe("hello world");
+  });
+
+  it("extracts text from useChat v4 format with content string", () => {
+    const body = {
+      messages: [{ role: "user", content: "hello world" }],
+    };
+    expect(extractUserText(body)).toBe("hello world");
+  });
+
+  it("extracts last user message when multiple messages present", () => {
+    const body = {
+      messages: [
+        { role: "user", parts: [{ type: "text", text: "first" }] },
+        { role: "assistant", parts: [{ type: "text", text: "response" }] },
+        { role: "user", parts: [{ type: "text", text: "second" }] },
+      ],
+    };
+    expect(extractUserText(body)).toBe("second");
+  });
+
+  it("extracts text from simple gateway format", () => {
+    expect(extractUserText({ text: "hello" })).toBe("hello");
+  });
+
+  it("extracts text from prompt format", () => {
+    expect(extractUserText({ prompt: "hello" })).toBe("hello");
+  });
+
+  it("returns null for empty body", () => {
+    expect(extractUserText({})).toBeNull();
+  });
+
+  it("returns null for empty messages array", () => {
+    expect(extractUserText({ messages: [] })).toBeNull();
+  });
+
+  it("prefers text field over messages", () => {
+    const body = {
+      text: "direct",
+      messages: [
+        { role: "user", parts: [{ type: "text", text: "from messages" }] },
+      ],
+    };
+    expect(extractUserText(body)).toBe("direct");
+  });
+});
+
+describe("createVercelStreamListener", () => {
+  function createMockResponse() {
+    const chunks: string[] = [];
+    let ended = false;
+    return {
+      writableEnded: false,
+      write(data: string) {
+        chunks.push(data);
+        return true;
+      },
+      end() {
+        ended = true;
+        this.writableEnded = true;
+      },
+      chunks,
+      get ended() {
+        return ended;
+      },
+    } as any;
+  }
+
+  function parseChunks(chunks: string[]): Array<object | string> {
+    return chunks
+      .filter((c) => c.startsWith("data: "))
+      .map((c) => {
+        const payload = c.replace(/^data: /, "").replace(/\n\n$/, "");
+        try {
+          return JSON.parse(payload);
+        } catch {
+          return payload;
+        }
+      });
+  }
+
+  it("translates text streaming events", () => {
+    const response = createMockResponse();
+    const listener = createVercelStreamListener(response, "test-msg-id");
+
+    listener({ type: "agent_start" } as AgentSessionEvent);
+    listener({
+      type: "turn_start",
+      turnIndex: 0,
+      timestamp: Date.now(),
+    } as AgentSessionEvent);
+    listener({
+      type: "message_update",
+      message: {} as any,
+      assistantMessageEvent: {
+        type: "text_start",
+        contentIndex: 0,
+        partial: {} as any,
+      },
+    } as AgentSessionEvent);
+    listener({
+      type: "message_update",
+      message: {} as any,
+      assistantMessageEvent: {
+        type: "text_delta",
+        contentIndex: 0,
+        delta: "hello",
+        partial: {} as any,
+      },
+    } as AgentSessionEvent);
+    listener({
+      type: "message_update",
+      message: {} as any,
+      assistantMessageEvent: {
+        type: "text_end",
+        contentIndex: 0,
+        content: "hello",
+        partial: {} as any,
+      },
+    } as AgentSessionEvent);
+    listener({
+      type: "turn_end",
+      turnIndex: 0,
+      message: {} as any,
+      toolResults: [],
+    } as AgentSessionEvent);
+
+    const parsed = parseChunks(response.chunks);
+    expect(parsed).toEqual([
+      { type: "start", messageId: "test-msg-id" },
+      { type: "start-step" },
+      { type: "text-start", id: "text_0" },
+      { type: "text-delta", id: "text_0", delta: "hello" },
+      { type: "text-end", id: "text_0" },
+      { type: "finish-step" },
+    ]);
+  });
+
+  it("does not write after response has ended", () => {
+    const response = createMockResponse();
+    const listener = createVercelStreamListener(response, "test-msg-id");
+
+    listener({ type: "agent_start" } as AgentSessionEvent);
+    response.end();
+    listener({
+      type: "turn_start",
+      turnIndex: 0,
+      timestamp: Date.now(),
+    } as AgentSessionEvent);
+
+    const parsed = parseChunks(response.chunks);
+    expect(parsed).toEqual([{ type: "start", messageId: "test-msg-id" }]);
+  });
+
+  it("ignores events outside the active prompt lifecycle", () => {
+    const response = createMockResponse();
+    const listener = createVercelStreamListener(response, "test-msg-id");
+
+    listener({
+      type: "turn_start",
+      turnIndex: 0,
+      timestamp: Date.now(),
+    } as AgentSessionEvent);
+    listener({ type: "agent_start" } as AgentSessionEvent);
+    listener({
+      type: "turn_start",
+      turnIndex: 0,
+      timestamp: Date.now(),
+    } as AgentSessionEvent);
+    listener({ type: "agent_end", messages: [] } as AgentSessionEvent);
+    listener({
+      type: "turn_start",
+      turnIndex: 1,
+      timestamp: Date.now(),
+    } as AgentSessionEvent);
+
+    const parsed = parseChunks(response.chunks);
+    expect(parsed).toEqual([
+      { type: "start", messageId: "test-msg-id" },
+      { type: "start-step" },
+    ]);
+  });
+});