feat(plan-mode): enhanced plan mode with explicit step tracking (#694)

Changes from the original: - Explicit [DONE:n] tag tracking (more accurate than auto-marking on tool_result) - Plan: header requirement - only extracts todos from 'Plan:' sections - Utils extracted to separate file for testability - Better session resume - only scans messages after plan-mode-execute marker - Context filtering - properly filters plan-mode-context custom type messages - Refactored to directory structure (index.ts + utils.ts + README.md) The original auto-completed steps on every tool_result, which was inaccurate for multi-tool steps. This version uses explicit [DONE:n] markers that the agent outputs after completing each step.
2026-04-15 06:04:40 +00:00 · 2026-01-13 17:53:11 +01:00 · 2026-01-13 17:53:11 +01:00 · e8f1322eee
commit e8f1322eee
parent a7a863c792
5 changed files with 834 additions and 548 deletions
--- a/packages/coding-agent/examples/extensions/plan-mode.ts
+++ b/packages/coding-agent/examples/extensions/plan-mode.ts
@ -1,548 +0,0 @@
-/**
- * Plan Mode Extension
- *
- * Provides a Claude Code-style "plan mode" for safe code exploration.
- * When enabled, the agent can only use read-only tools and cannot modify files.
- *
- * Features:
- * - /plan command to toggle plan mode
- * - In plan mode: only read, bash (read-only), grep, find, ls are available
- * - Injects system context telling the agent about the restrictions
- * - After each agent response, prompts to execute the plan or continue planning
- * - Shows "plan" indicator in footer when active
- * - Extracts todo list from plan and tracks progress during execution
- * - Uses ID-based tracking: agent outputs [DONE:id] to mark steps complete
- *
- * Usage:
- * 1. Copy this file to ~/.pi/agent/extensions/ or your project's .pi/extensions/
- * 2. Use /plan to toggle plan mode on/off
- * 3. Or start in plan mode with --plan flag
- */
-
-import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
-import { Key } from "@mariozechner/pi-tui";
-
-// Read-only tools for plan mode
-const PLAN_MODE_TOOLS = ["read", "bash", "grep", "find", "ls"];
-
-// Full set of tools for normal mode
-const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"];
-
-// Patterns for destructive bash commands that should be blocked in plan mode
-const DESTRUCTIVE_PATTERNS = [
-	/\brm\b/i,
-	/\brmdir\b/i,
-	/\bmv\b/i,
-	/\bcp\b/i,
-	/\bmkdir\b/i,
-	/\btouch\b/i,
-	/\bchmod\b/i,
-	/\bchown\b/i,
-	/\bchgrp\b/i,
-	/\bln\b/i,
-	/\btee\b/i,
-	/\btruncate\b/i,
-	/\bdd\b/i,
-	/\bshred\b/i,
-	/[^<]>(?!>)/,
-	/>>/,
-	/\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
-	/\byarn\s+(add|remove|install|publish)/i,
-	/\bpnpm\s+(add|remove|install|publish)/i,
-	/\bpip\s+(install|uninstall)/i,
-	/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
-	/\bbrew\s+(install|uninstall|upgrade)/i,
-	/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout\s+-b|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i,
-	/\bsudo\b/i,
-	/\bsu\b/i,
-	/\bkill\b/i,
-	/\bpkill\b/i,
-	/\bkillall\b/i,
-	/\breboot\b/i,
-	/\bshutdown\b/i,
-	/\bsystemctl\s+(start|stop|restart|enable|disable)/i,
-	/\bservice\s+\S+\s+(start|stop|restart)/i,
-	/\b(vim?|nano|emacs|code|subl)\b/i,
-];
-
-// Read-only commands that are always safe
-const SAFE_COMMANDS = [
-	/^\s*cat\b/,
-	/^\s*head\b/,
-	/^\s*tail\b/,
-	/^\s*less\b/,
-	/^\s*more\b/,
-	/^\s*grep\b/,
-	/^\s*find\b/,
-	/^\s*ls\b/,
-	/^\s*pwd\b/,
-	/^\s*echo\b/,
-	/^\s*printf\b/,
-	/^\s*wc\b/,
-	/^\s*sort\b/,
-	/^\s*uniq\b/,
-	/^\s*diff\b/,
-	/^\s*file\b/,
-	/^\s*stat\b/,
-	/^\s*du\b/,
-	/^\s*df\b/,
-	/^\s*tree\b/,
-	/^\s*which\b/,
-	/^\s*whereis\b/,
-	/^\s*type\b/,
-	/^\s*env\b/,
-	/^\s*printenv\b/,
-	/^\s*uname\b/,
-	/^\s*whoami\b/,
-	/^\s*id\b/,
-	/^\s*date\b/,
-	/^\s*cal\b/,
-	/^\s*uptime\b/,
-	/^\s*ps\b/,
-	/^\s*top\b/,
-	/^\s*htop\b/,
-	/^\s*free\b/,
-	/^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
-	/^\s*git\s+ls-/i,
-	/^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
-	/^\s*yarn\s+(list|info|why|audit)/i,
-	/^\s*node\s+--version/i,
-	/^\s*python\s+--version/i,
-	/^\s*curl\s/i,
-	/^\s*wget\s+-O\s*-/i,
-	/^\s*jq\b/,
-	/^\s*sed\s+-n/i,
-	/^\s*awk\b/,
-	/^\s*rg\b/,
-	/^\s*fd\b/,
-	/^\s*bat\b/,
-	/^\s*exa\b/,
-];
-
-function isSafeCommand(command: string): boolean {
-	if (SAFE_COMMANDS.some((pattern) => pattern.test(command))) {
-		if (!DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) {
-			return true;
-		}
-	}
-	if (DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) {
-		return false;
-	}
-	return true;
-}
-
-// Todo item with step number
-interface TodoItem {
-	step: number;
-	text: string;
-	completed: boolean;
-}
-
-/**
- * Clean up extracted step text for display.
- */
-function cleanStepText(text: string): string {
-	let cleaned = text
-		// Remove markdown bold/italic
-		.replace(/\*{1,2}([^*]+)\*{1,2}/g, "$1")
-		// Remove markdown code
-		.replace(/`([^`]+)`/g, "$1")
-		// Remove leading action words that are redundant
-		.replace(
-			/^(Use|Run|Execute|Create|Write|Read|Check|Verify|Update|Modify|Add|Remove|Delete|Install)\s+(the\s+)?/i,
-			"",
-		)
-		// Clean up extra whitespace
-		.replace(/\s+/g, " ")
-		.trim();
-
-	// Capitalize first letter
-	if (cleaned.length > 0) {
-		cleaned = cleaned.charAt(0).toUpperCase() + cleaned.slice(1);
-	}
-
-	// Truncate if too long
-	if (cleaned.length > 50) {
-		cleaned = `${cleaned.slice(0, 47)}...`;
-	}
-
-	return cleaned;
-}
-
-/**
- * Extract todo items from assistant message.
- */
-function extractTodoItems(message: string): TodoItem[] {
-	const items: TodoItem[] = [];
-
-	// Match numbered lists: "1. Task" or "1) Task" - also handle **bold** prefixes
-	const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm;
-	for (const match of message.matchAll(numberedPattern)) {
-		let text = match[2].trim();
-		text = text.replace(/\*{1,2}$/, "").trim();
-		// Skip if too short or looks like code/command
-		if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) {
-			const cleaned = cleanStepText(text);
-			if (cleaned.length > 3) {
-				items.push({ step: items.length + 1, text: cleaned, completed: false });
-			}
-		}
-	}
-
-	// If no numbered items, try bullet points
-	if (items.length === 0) {
-		const stepPattern = /^\s*[-*]\s*(?:Step\s*\d+[:.])?\s*\*{0,2}([^*\n]+)/gim;
-		for (const match of message.matchAll(stepPattern)) {
-			let text = match[1].trim();
-			text = text.replace(/\*{1,2}$/, "").trim();
-			if (text.length > 10 && !text.startsWith("`")) {
-				const cleaned = cleanStepText(text);
-				if (cleaned.length > 3) {
-					items.push({ step: items.length + 1, text: cleaned, completed: false });
-				}
-			}
-		}
-	}
-
-	return items;
-}
-
-export default function planModeExtension(pi: ExtensionAPI) {
-	let planModeEnabled = false;
-	let toolsCalledThisTurn = false;
-	let executionMode = false;
-	let todoItems: TodoItem[] = [];
-
-	// Register --plan CLI flag
-	pi.registerFlag("plan", {
-		description: "Start in plan mode (read-only exploration)",
-		type: "boolean",
-		default: false,
-	});
-
-	// Helper to update status displays
-	function updateStatus(ctx: ExtensionContext) {
-		if (executionMode && todoItems.length > 0) {
-			const completed = todoItems.filter((t) => t.completed).length;
-			ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`));
-		} else if (planModeEnabled) {
-			ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("warning", "⏸ plan"));
-		} else {
-			ctx.ui.setStatus("plan-mode", undefined);
-		}
-
-		// Show widget during execution (no IDs shown to user)
-		if (executionMode && todoItems.length > 0) {
-			const lines: string[] = [];
-			for (const item of todoItems) {
-				if (item.completed) {
-					lines.push(
-						ctx.ui.theme.fg("success", "☑ ") + ctx.ui.theme.fg("muted", ctx.ui.theme.strikethrough(item.text)),
-					);
-				} else {
-					lines.push(ctx.ui.theme.fg("muted", "☐ ") + item.text);
-				}
-			}
-			ctx.ui.setWidget("plan-todos", lines);
-		} else {
-			ctx.ui.setWidget("plan-todos", undefined);
-		}
-	}
-
-	function togglePlanMode(ctx: ExtensionContext) {
-		planModeEnabled = !planModeEnabled;
-		executionMode = false;
-		todoItems = [];
-
-		if (planModeEnabled) {
-			pi.setActiveTools(PLAN_MODE_TOOLS);
-			ctx.ui.notify(`Plan mode enabled. Tools: ${PLAN_MODE_TOOLS.join(", ")}`);
-		} else {
-			pi.setActiveTools(NORMAL_MODE_TOOLS);
-			ctx.ui.notify("Plan mode disabled. Full access restored.");
-		}
-		updateStatus(ctx);
-	}
-
-	// Register /plan command
-	pi.registerCommand("plan", {
-		description: "Toggle plan mode (read-only exploration)",
-		handler: async (_args, ctx) => {
-			togglePlanMode(ctx);
-		},
-	});
-
-	// Register /todos command
-	pi.registerCommand("todos", {
-		description: "Show current plan todo list",
-		handler: async (_args, ctx) => {
-			if (todoItems.length === 0) {
-				ctx.ui.notify("No todos. Create a plan first with /plan", "info");
-				return;
-			}
-
-			const todoList = todoItems
-				.map((item, i) => {
-					const checkbox = item.completed ? "✓" : "○";
-					return `${i + 1}. ${checkbox} ${item.text}`;
-				})
-				.join("\n");
-
-			ctx.ui.notify(`Plan Progress:\n${todoList}`, "info");
-		},
-	});
-
-	// Register Shift+P shortcut
-	pi.registerShortcut(Key.shift("p"), {
-		description: "Toggle plan mode",
-		handler: async (ctx) => {
-			togglePlanMode(ctx);
-		},
-	});
-
-	// Block destructive bash in plan mode
-	pi.on("tool_call", async (event) => {
-		if (!planModeEnabled) return;
-		if (event.toolName !== "bash") return;
-
-		const command = event.input.command as string;
-		if (!isSafeCommand(command)) {
-			return {
-				block: true,
-				reason: `Plan mode: destructive command blocked. Use /plan to disable plan mode first.\nCommand: ${command}`,
-			};
-		}
-	});
-
-	// Track step completion based on tool results
-	pi.on("tool_result", async (_event, ctx) => {
-		toolsCalledThisTurn = true;
-
-		if (!executionMode || todoItems.length === 0) return;
-
-		// Mark the first uncompleted step as done when any tool succeeds
-		const nextStep = todoItems.find((t) => !t.completed);
-		if (nextStep) {
-			nextStep.completed = true;
-			updateStatus(ctx);
-		}
-	});
-
-	// Filter out stale plan mode context messages from LLM context
-	// This ensures the agent only sees the CURRENT state (plan mode on/off)
-	pi.on("context", async (event) => {
-		// Only filter when NOT in plan mode (i.e., when executing)
-		if (planModeEnabled) {
-			return;
-		}
-
-		// Remove any previous plan-mode-context messages
-		const _beforeCount = event.messages.length;
-		const filtered = event.messages.filter((m) => {
-			if (m.role === "user" && Array.isArray(m.content)) {
-				const hasOldContext = m.content.some((c) => c.type === "text" && c.text.includes("[PLAN MODE ACTIVE]"));
-				if (hasOldContext) {
-					return false;
-				}
-			}
-			return true;
-		});
-		return { messages: filtered };
-	});
-
-	// Inject plan mode context
-	pi.on("before_agent_start", async () => {
-		if (!planModeEnabled && !executionMode) {
-			return;
-		}
-
-		if (planModeEnabled) {
-			return {
-				message: {
-					customType: "plan-mode-context",
-					content: `[PLAN MODE ACTIVE]
-You are in plan mode - a read-only exploration mode for safe code analysis.
-
-Restrictions:
- You can only use: read, bash, grep, find, ls
- You CANNOT use: edit, write (file modifications are disabled)
- Bash is restricted to READ-ONLY commands
- Focus on analysis, planning, and understanding the codebase
-
-Create a detailed numbered plan:
-1. First step description
-2. Second step description
-...
-
-Do NOT attempt to make changes - just describe what you would do.`,
-					display: false,
-				},
-			};
-		}
-
-		if (executionMode && todoItems.length > 0) {
-			const remaining = todoItems.filter((t) => !t.completed);
-			const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n");
-			return {
-				message: {
-					customType: "plan-execution-context",
-					content: `[EXECUTING PLAN - Full tool access enabled]
-
-Remaining steps:
-${todoList}
-
-Execute each step in order.`,
-					display: false,
-				},
-			};
-		}
-	});
-
-	// After agent finishes
-	pi.on("agent_end", async (event, ctx) => {
-		// In execution mode, check if all steps complete
-		if (executionMode && todoItems.length > 0) {
-			const allComplete = todoItems.every((t) => t.completed);
-			if (allComplete) {
-				// Show final completed list in chat
-				const completedList = todoItems.map((t) => `~~${t.text}~~`).join("\n");
-				pi.sendMessage(
-					{
-						customType: "plan-complete",
-						content: `**Plan Complete!** ✓\n\n${completedList}`,
-						display: true,
-					},
-					{ triggerTurn: false },
-				);
-
-				executionMode = false;
-				todoItems = [];
-				pi.setActiveTools(NORMAL_MODE_TOOLS);
-				updateStatus(ctx);
-			}
-			return;
-		}
-
-		if (!planModeEnabled) return;
-		if (!ctx.hasUI) return;
-
-		// Extract todos from last message
-		const messages = event.messages;
-		const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant");
-		if (lastAssistant && Array.isArray(lastAssistant.content)) {
-			const textContent = lastAssistant.content
-				.filter((block): block is { type: "text"; text: string } => block.type === "text")
-				.map((block) => block.text)
-				.join("\n");
-
-			if (textContent) {
-				const extracted = extractTodoItems(textContent);
-				if (extracted.length > 0) {
-					todoItems = extracted;
-				}
-			}
-		}
-
-		const hasTodos = todoItems.length > 0;
-
-		// Show todo list in chat (no IDs shown to user, just numbered)
-		if (hasTodos) {
-			const todoListText = todoItems.map((t, i) => `${i + 1}. ☐ ${t.text}`).join("\n");
-			pi.sendMessage(
-				{
-					customType: "plan-todo-list",
-					content: `**Plan Steps (${todoItems.length}):**\n\n${todoListText}`,
-					display: true,
-				},
-				{ triggerTurn: false },
-			);
-		}
-
-		const choice = await ctx.ui.select("Plan mode - what next?", [
-			hasTodos ? "Execute the plan (track progress)" : "Execute the plan",
-			"Stay in plan mode",
-			"Refine the plan",
-		]);
-
-		if (choice?.startsWith("Execute")) {
-			planModeEnabled = false;
-			executionMode = hasTodos;
-			pi.setActiveTools(NORMAL_MODE_TOOLS);
-			updateStatus(ctx);
-
-			// Simple execution message - context event filters old plan mode messages
-			// and before_agent_start injects fresh execution context with IDs
-			const execMessage = hasTodos
-				? `Execute the plan. Start with: ${todoItems[0].text}`
-				: "Execute the plan you just created.";
-
-			pi.sendMessage(
-				{
-					customType: "plan-mode-execute",
-					content: execMessage,
-					display: true,
-				},
-				{ triggerTurn: true },
-			);
-		} else if (choice === "Refine the plan") {
-			const refinement = await ctx.ui.input("What should be refined?");
-			if (refinement) {
-				ctx.ui.setEditorText(refinement);
-			}
-		}
-	});
-
-	// Initialize state on session start
-	pi.on("session_start", async (_event, ctx) => {
-		if (pi.getFlag("plan") === true) {
-			planModeEnabled = true;
-		}
-
-		const entries = ctx.sessionManager.getEntries();
-		const planModeEntry = entries
-			.filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode")
-			.pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined;
-
-		if (planModeEntry?.data) {
-			if (planModeEntry.data.enabled !== undefined) {
-				planModeEnabled = planModeEntry.data.enabled;
-			}
-			if (planModeEntry.data.todos) {
-				todoItems = planModeEntry.data.todos;
-			}
-			if (planModeEntry.data.executing) {
-				executionMode = planModeEntry.data.executing;
-			}
-		}
-
-		if (planModeEnabled) {
-			pi.setActiveTools(PLAN_MODE_TOOLS);
-		}
-		updateStatus(ctx);
-	});
-
-	// Reset tool tracking at start of each turn and persist state
-	pi.on("turn_start", async () => {
-		toolsCalledThisTurn = false;
-		pi.appendEntry("plan-mode", {
-			enabled: planModeEnabled,
-			todos: todoItems,
-			executing: executionMode,
-		});
-	});
-
-	// Handle non-tool turns (e.g., analysis, explanation steps)
-	pi.on("turn_end", async (_event, ctx) => {
-		if (!executionMode || todoItems.length === 0) return;
-
-		// If no tools were called this turn, the agent was doing analysis/explanation
-		// Mark the next uncompleted step as done
-		if (!toolsCalledThisTurn) {
-			const nextStep = todoItems.find((t) => !t.completed);
-			if (nextStep) {
-				nextStep.completed = true;
-				updateStatus(ctx);
-			}
-		}
-	});
-}
--- a/packages/coding-agent/examples/extensions/plan-mode/README.md
+++ b/packages/coding-agent/examples/extensions/plan-mode/README.md
@ -0,0 +1,65 @@
+# Plan Mode Extension
+
+Read-only exploration mode for safe code analysis.
+
+## Features
+
+- **Read-only tools**: Restricts available tools to read, bash, grep, find, ls, question
+- **Bash allowlist**: Only read-only bash commands are allowed
+- **Plan extraction**: Extracts numbered steps from `Plan:` sections
+- **Progress tracking**: Widget shows completion status during execution
+- **[DONE:n] markers**: Explicit step completion tracking
+- **Session persistence**: State survives session resume
+
+## Commands
+
+- `/plan` - Toggle plan mode
+- `/todos` - Show current plan progress
+- `Shift+P` - Toggle plan mode (shortcut)
+
+## Usage
+
+1. Enable plan mode with `/plan` or `--plan` flag
+2. Ask the agent to analyze code and create a plan
+3. The agent should output a numbered plan under a `Plan:` header:
+
+```
+Plan:
+1. First step description
+2. Second step description
+3. Third step description
+```
+
+4. Choose "Execute the plan" when prompted
+5. During execution, the agent marks steps complete with `[DONE:n]` tags
+6. Progress widget shows completion status
+
+## How It Works
+
+### Plan Mode (Read-Only)
+- Only read-only tools available
+- Bash commands filtered through allowlist
+- Agent creates a plan without making changes
+
+### Execution Mode
+- Full tool access restored
+- Agent executes steps in order
+- `[DONE:n]` markers track completion
+- Widget shows progress
+
+### Command Allowlist
+
+Safe commands (allowed):
+- File inspection: `cat`, `head`, `tail`, `less`, `more`
+- Search: `grep`, `find`, `rg`, `fd`
+- Directory: `ls`, `pwd`, `tree`
+- Git read: `git status`, `git log`, `git diff`, `git branch`
+- Package info: `npm list`, `npm outdated`, `yarn info`
+- System info: `uname`, `whoami`, `date`, `uptime`
+
+Blocked commands:
+- File modification: `rm`, `mv`, `cp`, `mkdir`, `touch`
+- Git write: `git add`, `git commit`, `git push`
+- Package install: `npm install`, `yarn add`, `pip install`
+- System: `sudo`, `kill`, `reboot`
+- Editors: `vim`, `nano`, `code`
--- a/packages/coding-agent/examples/extensions/plan-mode/index.ts
+++ b/packages/coding-agent/examples/extensions/plan-mode/index.ts
@ -0,0 +1,340 @@
+/**
+ * Plan Mode Extension
+ *
+ * Read-only exploration mode for safe code analysis.
+ * When enabled, only read-only tools are available.
+ *
+ * Features:
+ * - /plan command or Shift+P to toggle
+ * - Bash restricted to allowlisted read-only commands
+ * - Extracts numbered plan steps from "Plan:" sections
+ * - [DONE:n] markers to complete steps during execution
+ * - Progress tracking widget during execution
+ */
+
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { AssistantMessage, TextContent } from "@mariozechner/pi-ai";
+import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
+import { Key } from "@mariozechner/pi-tui";
+import { extractTodoItems, isSafeCommand, markCompletedSteps, type TodoItem } from "./utils.js";
+
+// Tools
+const PLAN_MODE_TOOLS = ["read", "bash", "grep", "find", "ls", "questionnaire"];
+const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"];
+
+// Type guard for assistant messages
+function isAssistantMessage(m: AgentMessage): m is AssistantMessage {
+	return m.role === "assistant" && Array.isArray(m.content);
+}
+
+// Extract text content from an assistant message
+function getTextContent(message: AssistantMessage): string {
+	return message.content
+		.filter((block): block is TextContent => block.type === "text")
+		.map((block) => block.text)
+		.join("\n");
+}
+
+export default function planModeExtension(pi: ExtensionAPI): void {
+	let planModeEnabled = false;
+	let executionMode = false;
+	let todoItems: TodoItem[] = [];
+
+	pi.registerFlag("plan", {
+		description: "Start in plan mode (read-only exploration)",
+		type: "boolean",
+		default: false,
+	});
+
+	function updateStatus(ctx: ExtensionContext): void {
+		// Footer status
+		if (executionMode && todoItems.length > 0) {
+			const completed = todoItems.filter((t) => t.completed).length;
+			ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`));
+		} else if (planModeEnabled) {
+			ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("warning", "⏸ plan"));
+		} else {
+			ctx.ui.setStatus("plan-mode", undefined);
+		}
+
+		// Widget showing todo list
+		if (executionMode && todoItems.length > 0) {
+			const lines = todoItems.map((item) => {
+				if (item.completed) {
+					return (
+						ctx.ui.theme.fg("success", "☑ ") + ctx.ui.theme.fg("muted", ctx.ui.theme.strikethrough(item.text))
+					);
+				}
+				return `${ctx.ui.theme.fg("muted", "☐ ")}${item.text}`;
+			});
+			ctx.ui.setWidget("plan-todos", lines);
+		} else {
+			ctx.ui.setWidget("plan-todos", undefined);
+		}
+	}
+
+	function togglePlanMode(ctx: ExtensionContext): void {
+		planModeEnabled = !planModeEnabled;
+		executionMode = false;
+		todoItems = [];
+
+		if (planModeEnabled) {
+			pi.setActiveTools(PLAN_MODE_TOOLS);
+			ctx.ui.notify(`Plan mode enabled. Tools: ${PLAN_MODE_TOOLS.join(", ")}`);
+		} else {
+			pi.setActiveTools(NORMAL_MODE_TOOLS);
+			ctx.ui.notify("Plan mode disabled. Full access restored.");
+		}
+		updateStatus(ctx);
+	}
+
+	function persistState(): void {
+		pi.appendEntry("plan-mode", {
+			enabled: planModeEnabled,
+			todos: todoItems,
+			executing: executionMode,
+		});
+	}
+
+	pi.registerCommand("plan", {
+		description: "Toggle plan mode (read-only exploration)",
+		handler: async (_args, ctx) => togglePlanMode(ctx),
+	});
+
+	pi.registerCommand("todos", {
+		description: "Show current plan todo list",
+		handler: async (_args, ctx) => {
+			if (todoItems.length === 0) {
+				ctx.ui.notify("No todos. Create a plan first with /plan", "info");
+				return;
+			}
+			const list = todoItems.map((item, i) => `${i + 1}. ${item.completed ? "✓" : "○"} ${item.text}`).join("\n");
+			ctx.ui.notify(`Plan Progress:\n${list}`, "info");
+		},
+	});
+
+	pi.registerShortcut(Key.shift("p"), {
+		description: "Toggle plan mode",
+		handler: async (ctx) => togglePlanMode(ctx),
+	});
+
+	// Block destructive bash commands in plan mode
+	pi.on("tool_call", async (event) => {
+		if (!planModeEnabled || event.toolName !== "bash") return;
+
+		const command = event.input.command as string;
+		if (!isSafeCommand(command)) {
+			return {
+				block: true,
+				reason: `Plan mode: command blocked (not allowlisted). Use /plan to disable plan mode first.\nCommand: ${command}`,
+			};
+		}
+	});
+
+	// Filter out stale plan mode context when not in plan mode
+	pi.on("context", async (event) => {
+		if (planModeEnabled) return;
+
+		return {
+			messages: event.messages.filter((m) => {
+				const msg = m as AgentMessage & { customType?: string };
+				if (msg.customType === "plan-mode-context") return false;
+				if (msg.role !== "user") return true;
+
+				const content = msg.content;
+				if (typeof content === "string") {
+					return !content.includes("[PLAN MODE ACTIVE]");
+				}
+				if (Array.isArray(content)) {
+					return !content.some(
+						(c) => c.type === "text" && (c as TextContent).text?.includes("[PLAN MODE ACTIVE]"),
+					);
+				}
+				return true;
+			}),
+		};
+	});
+
+	// Inject plan/execution context before agent starts
+	pi.on("before_agent_start", async () => {
+		if (planModeEnabled) {
+			return {
+				message: {
+					customType: "plan-mode-context",
+					content: `[PLAN MODE ACTIVE]
+You are in plan mode - a read-only exploration mode for safe code analysis.
+
+Restrictions:
+- You can only use: read, bash, grep, find, ls, questionnaire
+- You CANNOT use: edit, write (file modifications are disabled)
+- Bash is restricted to an allowlist of read-only commands
+
+Ask clarifying questions using the questionnaire tool.
+Use brave-search skill via bash for web research.
+
+Create a detailed numbered plan under a "Plan:" header:
+
+Plan:
+1. First step description
+2. Second step description
+...
+
+Do NOT attempt to make changes - just describe what you would do.`,
+					display: false,
+				},
+			};
+		}
+
+		if (executionMode && todoItems.length > 0) {
+			const remaining = todoItems.filter((t) => !t.completed);
+			const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n");
+			return {
+				message: {
+					customType: "plan-execution-context",
+					content: `[EXECUTING PLAN - Full tool access enabled]
+
+Remaining steps:
+${todoList}
+
+Execute each step in order.
+After completing a step, include a [DONE:n] tag in your response.`,
+					display: false,
+				},
+			};
+		}
+	});
+
+	// Track progress after each turn
+	pi.on("turn_end", async (event, ctx) => {
+		if (!executionMode || todoItems.length === 0) return;
+		if (!isAssistantMessage(event.message)) return;
+
+		const text = getTextContent(event.message);
+		if (markCompletedSteps(text, todoItems) > 0) {
+			updateStatus(ctx);
+		}
+		persistState();
+	});
+
+	// Handle plan completion and plan mode UI
+	pi.on("agent_end", async (event, ctx) => {
+		// Check if execution is complete
+		if (executionMode && todoItems.length > 0) {
+			if (todoItems.every((t) => t.completed)) {
+				const completedList = todoItems.map((t) => `~~${t.text}~~`).join("\n");
+				pi.sendMessage(
+					{ customType: "plan-complete", content: `**Plan Complete!** ✓\n\n${completedList}`, display: true },
+					{ triggerTurn: false },
+				);
+				executionMode = false;
+				todoItems = [];
+				pi.setActiveTools(NORMAL_MODE_TOOLS);
+				updateStatus(ctx);
+				persistState(); // Save cleared state so resume doesn't restore old execution mode
+			}
+			return;
+		}
+
+		if (!planModeEnabled || !ctx.hasUI) return;
+
+		// Extract todos from last assistant message
+		const lastAssistant = [...event.messages].reverse().find(isAssistantMessage);
+		if (lastAssistant) {
+			const extracted = extractTodoItems(getTextContent(lastAssistant));
+			if (extracted.length > 0) {
+				todoItems = extracted;
+			}
+		}
+
+		// Show plan steps and prompt for next action
+		if (todoItems.length > 0) {
+			const todoListText = todoItems.map((t, i) => `${i + 1}. ☐ ${t.text}`).join("\n");
+			pi.sendMessage(
+				{
+					customType: "plan-todo-list",
+					content: `**Plan Steps (${todoItems.length}):**\n\n${todoListText}`,
+					display: true,
+				},
+				{ triggerTurn: false },
+			);
+		}
+
+		const choice = await ctx.ui.select("Plan mode - what next?", [
+			todoItems.length > 0 ? "Execute the plan (track progress)" : "Execute the plan",
+			"Stay in plan mode",
+			"Refine the plan",
+		]);
+
+		if (choice?.startsWith("Execute")) {
+			planModeEnabled = false;
+			executionMode = todoItems.length > 0;
+			pi.setActiveTools(NORMAL_MODE_TOOLS);
+			updateStatus(ctx);
+
+			const execMessage =
+				todoItems.length > 0
+					? `Execute the plan. Start with: ${todoItems[0].text}`
+					: "Execute the plan you just created.";
+			pi.sendMessage(
+				{ customType: "plan-mode-execute", content: execMessage, display: true },
+				{ triggerTurn: true },
+			);
+		} else if (choice === "Refine the plan") {
+			const refinement = await ctx.ui.editor("Refine the plan:", "");
+			if (refinement?.trim()) {
+				pi.sendUserMessage(refinement.trim());
+			}
+		}
+	});
+
+	// Restore state on session start/resume
+	pi.on("session_start", async (_event, ctx) => {
+		if (pi.getFlag("plan") === true) {
+			planModeEnabled = true;
+		}
+
+		const entries = ctx.sessionManager.getEntries();
+
+		// Restore persisted state
+		const planModeEntry = entries
+			.filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode")
+			.pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined;
+
+		if (planModeEntry?.data) {
+			planModeEnabled = planModeEntry.data.enabled ?? planModeEnabled;
+			todoItems = planModeEntry.data.todos ?? todoItems;
+			executionMode = planModeEntry.data.executing ?? executionMode;
+		}
+
+		// On resume: re-scan messages to rebuild completion state
+		// Only scan messages AFTER the last "plan-mode-execute" to avoid picking up [DONE:n] from previous plans
+		const isResume = planModeEntry !== undefined;
+		if (isResume && executionMode && todoItems.length > 0) {
+			// Find the index of the last plan-mode-execute entry (marks when current execution started)
+			let executeIndex = -1;
+			for (let i = entries.length - 1; i >= 0; i--) {
+				const entry = entries[i] as { type: string; customType?: string };
+				if (entry.customType === "plan-mode-execute") {
+					executeIndex = i;
+					break;
+				}
+			}
+
+			// Only scan messages after the execute marker
+			const messages: AssistantMessage[] = [];
+			for (let i = executeIndex + 1; i < entries.length; i++) {
+				const entry = entries[i];
+				if (entry.type === "message" && "message" in entry && isAssistantMessage(entry.message as AgentMessage)) {
+					messages.push(entry.message as AssistantMessage);
+				}
+			}
+			const allText = messages.map(getTextContent).join("\n");
+			markCompletedSteps(allText, todoItems);
+		}
+
+		if (planModeEnabled) {
+			pi.setActiveTools(PLAN_MODE_TOOLS);
+		}
+		updateStatus(ctx);
+	});
+}
--- a/packages/coding-agent/examples/extensions/plan-mode/utils.ts
+++ b/packages/coding-agent/examples/extensions/plan-mode/utils.ts
@ -0,0 +1,168 @@
+/**
+ * Pure utility functions for plan mode.
+ * Extracted for testability.
+ */
+
+// Destructive commands blocked in plan mode
+const DESTRUCTIVE_PATTERNS = [
+	/\brm\b/i,
+	/\brmdir\b/i,
+	/\bmv\b/i,
+	/\bcp\b/i,
+	/\bmkdir\b/i,
+	/\btouch\b/i,
+	/\bchmod\b/i,
+	/\bchown\b/i,
+	/\bchgrp\b/i,
+	/\bln\b/i,
+	/\btee\b/i,
+	/\btruncate\b/i,
+	/\bdd\b/i,
+	/\bshred\b/i,
+	/(^|[^<])>(?!>)/,
+	/>>/,
+	/\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
+	/\byarn\s+(add|remove|install|publish)/i,
+	/\bpnpm\s+(add|remove|install|publish)/i,
+	/\bpip\s+(install|uninstall)/i,
+	/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
+	/\bbrew\s+(install|uninstall|upgrade)/i,
+	/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i,
+	/\bsudo\b/i,
+	/\bsu\b/i,
+	/\bkill\b/i,
+	/\bpkill\b/i,
+	/\bkillall\b/i,
+	/\breboot\b/i,
+	/\bshutdown\b/i,
+	/\bsystemctl\s+(start|stop|restart|enable|disable)/i,
+	/\bservice\s+\S+\s+(start|stop|restart)/i,
+	/\b(vim?|nano|emacs|code|subl)\b/i,
+];
+
+// Safe read-only commands allowed in plan mode
+const SAFE_PATTERNS = [
+	/^\s*cat\b/,
+	/^\s*head\b/,
+	/^\s*tail\b/,
+	/^\s*less\b/,
+	/^\s*more\b/,
+	/^\s*grep\b/,
+	/^\s*find\b/,
+	/^\s*ls\b/,
+	/^\s*pwd\b/,
+	/^\s*echo\b/,
+	/^\s*printf\b/,
+	/^\s*wc\b/,
+	/^\s*sort\b/,
+	/^\s*uniq\b/,
+	/^\s*diff\b/,
+	/^\s*file\b/,
+	/^\s*stat\b/,
+	/^\s*du\b/,
+	/^\s*df\b/,
+	/^\s*tree\b/,
+	/^\s*which\b/,
+	/^\s*whereis\b/,
+	/^\s*type\b/,
+	/^\s*env\b/,
+	/^\s*printenv\b/,
+	/^\s*uname\b/,
+	/^\s*whoami\b/,
+	/^\s*id\b/,
+	/^\s*date\b/,
+	/^\s*cal\b/,
+	/^\s*uptime\b/,
+	/^\s*ps\b/,
+	/^\s*top\b/,
+	/^\s*htop\b/,
+	/^\s*free\b/,
+	/^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
+	/^\s*git\s+ls-/i,
+	/^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
+	/^\s*yarn\s+(list|info|why|audit)/i,
+	/^\s*node\s+--version/i,
+	/^\s*python\s+--version/i,
+	/^\s*curl\s/i,
+	/^\s*wget\s+-O\s*-/i,
+	/^\s*jq\b/,
+	/^\s*sed\s+-n/i,
+	/^\s*awk\b/,
+	/^\s*rg\b/,
+	/^\s*fd\b/,
+	/^\s*bat\b/,
+	/^\s*exa\b/,
+];
+
+export function isSafeCommand(command: string): boolean {
+	const isDestructive = DESTRUCTIVE_PATTERNS.some((p) => p.test(command));
+	const isSafe = SAFE_PATTERNS.some((p) => p.test(command));
+	return !isDestructive && isSafe;
+}
+
+export interface TodoItem {
+	step: number;
+	text: string;
+	completed: boolean;
+}
+
+export function cleanStepText(text: string): string {
+	let cleaned = text
+		.replace(/\*{1,2}([^*]+)\*{1,2}/g, "$1") // Remove bold/italic
+		.replace(/`([^`]+)`/g, "$1") // Remove code
+		.replace(
+			/^(Use|Run|Execute|Create|Write|Read|Check|Verify|Update|Modify|Add|Remove|Delete|Install)\s+(the\s+)?/i,
+			"",
+		)
+		.replace(/\s+/g, " ")
+		.trim();
+
+	if (cleaned.length > 0) {
+		cleaned = cleaned.charAt(0).toUpperCase() + cleaned.slice(1);
+	}
+	if (cleaned.length > 50) {
+		cleaned = `${cleaned.slice(0, 47)}...`;
+	}
+	return cleaned;
+}
+
+export function extractTodoItems(message: string): TodoItem[] {
+	const items: TodoItem[] = [];
+	const headerMatch = message.match(/\*{0,2}Plan:\*{0,2}\s*\n/i);
+	if (!headerMatch) return items;
+
+	const planSection = message.slice(message.indexOf(headerMatch[0]) + headerMatch[0].length);
+	const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm;
+
+	for (const match of planSection.matchAll(numberedPattern)) {
+		const text = match[2]
+			.trim()
+			.replace(/\*{1,2}$/, "")
+			.trim();
+		if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) {
+			const cleaned = cleanStepText(text);
+			if (cleaned.length > 3) {
+				items.push({ step: items.length + 1, text: cleaned, completed: false });
+			}
+		}
+	}
+	return items;
+}
+
+export function extractDoneSteps(message: string): number[] {
+	const steps: number[] = [];
+	for (const match of message.matchAll(/\[DONE:(\d+)\]/gi)) {
+		const step = Number(match[1]);
+		if (Number.isFinite(step)) steps.push(step);
+	}
+	return steps;
+}
+
+export function markCompletedSteps(text: string, items: TodoItem[]): number {
+	const doneSteps = extractDoneSteps(text);
+	for (const step of doneSteps) {
+		const item = items.find((t) => t.step === step);
+		if (item) item.completed = true;
+	}
+	return doneSteps.length;
+}
--- a/packages/coding-agent/test/plan-mode-utils.test.ts
+++ b/packages/coding-agent/test/plan-mode-utils.test.ts
@ -0,0 +1,261 @@
+import { describe, expect, it } from "vitest";
+import {
+	cleanStepText,
+	extractDoneSteps,
+	extractTodoItems,
+	isSafeCommand,
+	markCompletedSteps,
+	type TodoItem,
+} from "../examples/extensions/plan-mode/utils.js";
+
+describe("isSafeCommand", () => {
+	describe("safe commands", () => {
+		it("allows basic read commands", () => {
+			expect(isSafeCommand("ls -la")).toBe(true);
+			expect(isSafeCommand("cat file.txt")).toBe(true);
+			expect(isSafeCommand("head -n 10 file.txt")).toBe(true);
+			expect(isSafeCommand("tail -f log.txt")).toBe(true);
+			expect(isSafeCommand("grep pattern file")).toBe(true);
+			expect(isSafeCommand("find . -name '*.ts'")).toBe(true);
+		});
+
+		it("allows git read commands", () => {
+			expect(isSafeCommand("git status")).toBe(true);
+			expect(isSafeCommand("git log --oneline")).toBe(true);
+			expect(isSafeCommand("git diff")).toBe(true);
+			expect(isSafeCommand("git branch")).toBe(true);
+		});
+
+		it("allows npm/yarn read commands", () => {
+			expect(isSafeCommand("npm list")).toBe(true);
+			expect(isSafeCommand("npm outdated")).toBe(true);
+			expect(isSafeCommand("yarn info react")).toBe(true);
+		});
+
+		it("allows other safe commands", () => {
+			expect(isSafeCommand("pwd")).toBe(true);
+			expect(isSafeCommand("echo hello")).toBe(true);
+			expect(isSafeCommand("wc -l file.txt")).toBe(true);
+			expect(isSafeCommand("du -sh .")).toBe(true);
+			expect(isSafeCommand("df -h")).toBe(true);
+		});
+	});
+
+	describe("destructive commands", () => {
+		it("blocks file modification commands", () => {
+			expect(isSafeCommand("rm file.txt")).toBe(false);
+			expect(isSafeCommand("rm -rf dir")).toBe(false);
+			expect(isSafeCommand("mv old new")).toBe(false);
+			expect(isSafeCommand("cp src dst")).toBe(false);
+			expect(isSafeCommand("mkdir newdir")).toBe(false);
+			expect(isSafeCommand("touch newfile")).toBe(false);
+		});
+
+		it("blocks git write commands", () => {
+			expect(isSafeCommand("git add .")).toBe(false);
+			expect(isSafeCommand("git commit -m 'msg'")).toBe(false);
+			expect(isSafeCommand("git push")).toBe(false);
+			expect(isSafeCommand("git checkout main")).toBe(false);
+			expect(isSafeCommand("git reset --hard")).toBe(false);
+		});
+
+		it("blocks package manager installs", () => {
+			expect(isSafeCommand("npm install lodash")).toBe(false);
+			expect(isSafeCommand("yarn add react")).toBe(false);
+			expect(isSafeCommand("pip install requests")).toBe(false);
+			expect(isSafeCommand("brew install node")).toBe(false);
+		});
+
+		it("blocks redirects", () => {
+			expect(isSafeCommand("echo hello > file.txt")).toBe(false);
+			expect(isSafeCommand("cat foo >> bar")).toBe(false);
+			expect(isSafeCommand(">file.txt")).toBe(false);
+		});
+
+		it("blocks dangerous commands", () => {
+			expect(isSafeCommand("sudo rm -rf /")).toBe(false);
+			expect(isSafeCommand("kill -9 1234")).toBe(false);
+			expect(isSafeCommand("reboot")).toBe(false);
+		});
+
+		it("blocks editors", () => {
+			expect(isSafeCommand("vim file.txt")).toBe(false);
+			expect(isSafeCommand("nano file.txt")).toBe(false);
+			expect(isSafeCommand("code .")).toBe(false);
+		});
+	});
+
+	describe("edge cases", () => {
+		it("requires command to be in safe list (not just non-destructive)", () => {
+			expect(isSafeCommand("unknown-command")).toBe(false);
+			expect(isSafeCommand("my-script.sh")).toBe(false);
+		});
+
+		it("handles commands with leading whitespace", () => {
+			expect(isSafeCommand("  ls -la")).toBe(true);
+			expect(isSafeCommand("  rm file")).toBe(false);
+		});
+	});
+});
+
+describe("cleanStepText", () => {
+	it("removes markdown bold/italic", () => {
+		expect(cleanStepText("**bold text**")).toBe("Bold text");
+		expect(cleanStepText("*italic text*")).toBe("Italic text");
+	});
+
+	it("removes markdown code", () => {
+		expect(cleanStepText("run `npm install`")).toBe("Npm install"); // "run" is stripped as action word
+		expect(cleanStepText("check the `config.json` file")).toBe("Config.json file");
+	});
+
+	it("removes leading action words", () => {
+		expect(cleanStepText("Create the new file")).toBe("New file");
+		expect(cleanStepText("Run the tests")).toBe("Tests");
+		expect(cleanStepText("Check the status")).toBe("Status");
+	});
+
+	it("capitalizes first letter", () => {
+		expect(cleanStepText("update config")).toBe("Config");
+	});
+
+	it("truncates long text", () => {
+		const longText = "This is a very long step description that exceeds the maximum allowed length for display";
+		const result = cleanStepText(longText);
+		expect(result.length).toBe(50);
+		expect(result.endsWith("...")).toBe(true);
+	});
+
+	it("normalizes whitespace", () => {
+		expect(cleanStepText("multiple   spaces   here")).toBe("Multiple spaces here");
+	});
+});
+
+describe("extractTodoItems", () => {
+	it("extracts numbered items after Plan: header", () => {
+		const message = `Here's what we'll do:
+
+Plan:
+1. First step here
+2. Second step here
+3. Third step here`;
+
+		const items = extractTodoItems(message);
+		expect(items).toHaveLength(3);
+		expect(items[0].step).toBe(1);
+		expect(items[0].text).toBe("First step here");
+		expect(items[0].completed).toBe(false);
+	});
+
+	it("handles bold Plan header", () => {
+		const message = `**Plan:**
+1. Do something`;
+
+		const items = extractTodoItems(message);
+		expect(items).toHaveLength(1);
+	});
+
+	it("handles parenthesis-style numbering", () => {
+		const message = `Plan:
+1) First item
+2) Second item`;
+
+		const items = extractTodoItems(message);
+		expect(items).toHaveLength(2);
+	});
+
+	it("returns empty array without Plan header", () => {
+		const message = `Here are some steps:
+1. First step
+2. Second step`;
+
+		const items = extractTodoItems(message);
+		expect(items).toHaveLength(0);
+	});
+
+	it("filters out short items", () => {
+		const message = `Plan:
+1. OK
+2. This is a proper step`;
+
+		const items = extractTodoItems(message);
+		expect(items).toHaveLength(1);
+		expect(items[0].text).toContain("proper");
+	});
+
+	it("filters out code-like items", () => {
+		const message = `Plan:
+1. \`npm install\`
+2. Run the build process`;
+
+		const items = extractTodoItems(message);
+		expect(items).toHaveLength(1);
+	});
+});
+
+describe("extractDoneSteps", () => {
+	it("extracts single DONE marker", () => {
+		const message = "I've completed the first step [DONE:1]";
+		expect(extractDoneSteps(message)).toEqual([1]);
+	});
+
+	it("extracts multiple DONE markers", () => {
+		const message = "Did steps [DONE:1] and [DONE:2] and [DONE:3]";
+		expect(extractDoneSteps(message)).toEqual([1, 2, 3]);
+	});
+
+	it("handles case insensitivity", () => {
+		const message = "[done:1] [DONE:2] [Done:3]";
+		expect(extractDoneSteps(message)).toEqual([1, 2, 3]);
+	});
+
+	it("returns empty array with no markers", () => {
+		const message = "No markers here";
+		expect(extractDoneSteps(message)).toEqual([]);
+	});
+
+	it("ignores malformed markers", () => {
+		const message = "[DONE:abc] [DONE:] [DONE:1]";
+		expect(extractDoneSteps(message)).toEqual([1]);
+	});
+});
+
+describe("markCompletedSteps", () => {
+	it("marks matching items as completed", () => {
+		const items: TodoItem[] = [
+			{ step: 1, text: "First", completed: false },
+			{ step: 2, text: "Second", completed: false },
+			{ step: 3, text: "Third", completed: false },
+		];
+
+		const count = markCompletedSteps("[DONE:1] [DONE:3]", items);
+
+		expect(count).toBe(2);
+		expect(items[0].completed).toBe(true);
+		expect(items[1].completed).toBe(false);
+		expect(items[2].completed).toBe(true);
+	});
+
+	it("returns count of completed items", () => {
+		const items: TodoItem[] = [{ step: 1, text: "First", completed: false }];
+
+		expect(markCompletedSteps("[DONE:1]", items)).toBe(1);
+		expect(markCompletedSteps("no markers", items)).toBe(0);
+	});
+
+	it("ignores markers for non-existent steps", () => {
+		const items: TodoItem[] = [{ step: 1, text: "First", completed: false }];
+
+		const count = markCompletedSteps("[DONE:99]", items);
+
+		expect(count).toBe(1); // Still counts the marker found
+		expect(items[0].completed).toBe(false); // But doesn't mark anything
+	});
+
+	it("doesn't double-complete already completed items", () => {
+		const items: TodoItem[] = [{ step: 1, text: "First", completed: true }];
+
+		markCompletedSteps("[DONE:1]", items);
+		expect(items[0].completed).toBe(true);
+	});
+});