From e8f1322eeee866c4fb8e8d5f04cb81078595e431 Mon Sep 17 00:00:00 2001 From: Fero Date: Tue, 13 Jan 2026 17:53:11 +0100 Subject: [PATCH] feat(plan-mode): enhanced plan mode with explicit step tracking (#694) Changes from the original: - Explicit [DONE:n] tag tracking (more accurate than auto-marking on tool_result) - Plan: header requirement - only extracts todos from 'Plan:' sections - Utils extracted to separate file for testability - Better session resume - only scans messages after plan-mode-execute marker - Context filtering - properly filters plan-mode-context custom type messages - Refactored to directory structure (index.ts + utils.ts + README.md) The original auto-completed steps on every tool_result, which was inaccurate for multi-tool steps. This version uses explicit [DONE:n] markers that the agent outputs after completing each step. --- .../examples/extensions/plan-mode.ts | 548 ------------------ .../examples/extensions/plan-mode/README.md | 65 +++ .../examples/extensions/plan-mode/index.ts | 340 +++++++++++ .../examples/extensions/plan-mode/utils.ts | 168 ++++++ .../coding-agent/test/plan-mode-utils.test.ts | 261 +++++++++ 5 files changed, 834 insertions(+), 548 deletions(-) delete mode 100644 packages/coding-agent/examples/extensions/plan-mode.ts create mode 100644 packages/coding-agent/examples/extensions/plan-mode/README.md create mode 100644 packages/coding-agent/examples/extensions/plan-mode/index.ts create mode 100644 packages/coding-agent/examples/extensions/plan-mode/utils.ts create mode 100644 packages/coding-agent/test/plan-mode-utils.test.ts diff --git a/packages/coding-agent/examples/extensions/plan-mode.ts b/packages/coding-agent/examples/extensions/plan-mode.ts deleted file mode 100644 index 8f3efdf6..00000000 --- a/packages/coding-agent/examples/extensions/plan-mode.ts +++ /dev/null @@ -1,548 +0,0 @@ -/** - * Plan Mode Extension - * - * Provides a Claude Code-style "plan mode" for safe code exploration. - * When enabled, the agent can only use read-only tools and cannot modify files. - * - * Features: - * - /plan command to toggle plan mode - * - In plan mode: only read, bash (read-only), grep, find, ls are available - * - Injects system context telling the agent about the restrictions - * - After each agent response, prompts to execute the plan or continue planning - * - Shows "plan" indicator in footer when active - * - Extracts todo list from plan and tracks progress during execution - * - Uses ID-based tracking: agent outputs [DONE:id] to mark steps complete - * - * Usage: - * 1. Copy this file to ~/.pi/agent/extensions/ or your project's .pi/extensions/ - * 2. Use /plan to toggle plan mode on/off - * 3. Or start in plan mode with --plan flag - */ - -import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; -import { Key } from "@mariozechner/pi-tui"; - -// Read-only tools for plan mode -const PLAN_MODE_TOOLS = ["read", "bash", "grep", "find", "ls"]; - -// Full set of tools for normal mode -const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"]; - -// Patterns for destructive bash commands that should be blocked in plan mode -const DESTRUCTIVE_PATTERNS = [ - /\brm\b/i, - /\brmdir\b/i, - /\bmv\b/i, - /\bcp\b/i, - /\bmkdir\b/i, - /\btouch\b/i, - /\bchmod\b/i, - /\bchown\b/i, - /\bchgrp\b/i, - /\bln\b/i, - /\btee\b/i, - /\btruncate\b/i, - /\bdd\b/i, - /\bshred\b/i, - /[^<]>(?!>)/, - />>/, - /\bnpm\s+(install|uninstall|update|ci|link|publish)/i, - /\byarn\s+(add|remove|install|publish)/i, - /\bpnpm\s+(add|remove|install|publish)/i, - /\bpip\s+(install|uninstall)/i, - /\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i, - /\bbrew\s+(install|uninstall|upgrade)/i, - /\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout\s+-b|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i, - /\bsudo\b/i, - /\bsu\b/i, - /\bkill\b/i, - /\bpkill\b/i, - /\bkillall\b/i, - /\breboot\b/i, - /\bshutdown\b/i, - /\bsystemctl\s+(start|stop|restart|enable|disable)/i, - /\bservice\s+\S+\s+(start|stop|restart)/i, - /\b(vim?|nano|emacs|code|subl)\b/i, -]; - -// Read-only commands that are always safe -const SAFE_COMMANDS = [ - /^\s*cat\b/, - /^\s*head\b/, - /^\s*tail\b/, - /^\s*less\b/, - /^\s*more\b/, - /^\s*grep\b/, - /^\s*find\b/, - /^\s*ls\b/, - /^\s*pwd\b/, - /^\s*echo\b/, - /^\s*printf\b/, - /^\s*wc\b/, - /^\s*sort\b/, - /^\s*uniq\b/, - /^\s*diff\b/, - /^\s*file\b/, - /^\s*stat\b/, - /^\s*du\b/, - /^\s*df\b/, - /^\s*tree\b/, - /^\s*which\b/, - /^\s*whereis\b/, - /^\s*type\b/, - /^\s*env\b/, - /^\s*printenv\b/, - /^\s*uname\b/, - /^\s*whoami\b/, - /^\s*id\b/, - /^\s*date\b/, - /^\s*cal\b/, - /^\s*uptime\b/, - /^\s*ps\b/, - /^\s*top\b/, - /^\s*htop\b/, - /^\s*free\b/, - /^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i, - /^\s*git\s+ls-/i, - /^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i, - /^\s*yarn\s+(list|info|why|audit)/i, - /^\s*node\s+--version/i, - /^\s*python\s+--version/i, - /^\s*curl\s/i, - /^\s*wget\s+-O\s*-/i, - /^\s*jq\b/, - /^\s*sed\s+-n/i, - /^\s*awk\b/, - /^\s*rg\b/, - /^\s*fd\b/, - /^\s*bat\b/, - /^\s*exa\b/, -]; - -function isSafeCommand(command: string): boolean { - if (SAFE_COMMANDS.some((pattern) => pattern.test(command))) { - if (!DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) { - return true; - } - } - if (DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) { - return false; - } - return true; -} - -// Todo item with step number -interface TodoItem { - step: number; - text: string; - completed: boolean; -} - -/** - * Clean up extracted step text for display. - */ -function cleanStepText(text: string): string { - let cleaned = text - // Remove markdown bold/italic - .replace(/\*{1,2}([^*]+)\*{1,2}/g, "$1") - // Remove markdown code - .replace(/`([^`]+)`/g, "$1") - // Remove leading action words that are redundant - .replace( - /^(Use|Run|Execute|Create|Write|Read|Check|Verify|Update|Modify|Add|Remove|Delete|Install)\s+(the\s+)?/i, - "", - ) - // Clean up extra whitespace - .replace(/\s+/g, " ") - .trim(); - - // Capitalize first letter - if (cleaned.length > 0) { - cleaned = cleaned.charAt(0).toUpperCase() + cleaned.slice(1); - } - - // Truncate if too long - if (cleaned.length > 50) { - cleaned = `${cleaned.slice(0, 47)}...`; - } - - return cleaned; -} - -/** - * Extract todo items from assistant message. - */ -function extractTodoItems(message: string): TodoItem[] { - const items: TodoItem[] = []; - - // Match numbered lists: "1. Task" or "1) Task" - also handle **bold** prefixes - const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm; - for (const match of message.matchAll(numberedPattern)) { - let text = match[2].trim(); - text = text.replace(/\*{1,2}$/, "").trim(); - // Skip if too short or looks like code/command - if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) { - const cleaned = cleanStepText(text); - if (cleaned.length > 3) { - items.push({ step: items.length + 1, text: cleaned, completed: false }); - } - } - } - - // If no numbered items, try bullet points - if (items.length === 0) { - const stepPattern = /^\s*[-*]\s*(?:Step\s*\d+[:.])?\s*\*{0,2}([^*\n]+)/gim; - for (const match of message.matchAll(stepPattern)) { - let text = match[1].trim(); - text = text.replace(/\*{1,2}$/, "").trim(); - if (text.length > 10 && !text.startsWith("`")) { - const cleaned = cleanStepText(text); - if (cleaned.length > 3) { - items.push({ step: items.length + 1, text: cleaned, completed: false }); - } - } - } - } - - return items; -} - -export default function planModeExtension(pi: ExtensionAPI) { - let planModeEnabled = false; - let toolsCalledThisTurn = false; - let executionMode = false; - let todoItems: TodoItem[] = []; - - // Register --plan CLI flag - pi.registerFlag("plan", { - description: "Start in plan mode (read-only exploration)", - type: "boolean", - default: false, - }); - - // Helper to update status displays - function updateStatus(ctx: ExtensionContext) { - if (executionMode && todoItems.length > 0) { - const completed = todoItems.filter((t) => t.completed).length; - ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`)); - } else if (planModeEnabled) { - ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("warning", "⏸ plan")); - } else { - ctx.ui.setStatus("plan-mode", undefined); - } - - // Show widget during execution (no IDs shown to user) - if (executionMode && todoItems.length > 0) { - const lines: string[] = []; - for (const item of todoItems) { - if (item.completed) { - lines.push( - ctx.ui.theme.fg("success", "☑ ") + ctx.ui.theme.fg("muted", ctx.ui.theme.strikethrough(item.text)), - ); - } else { - lines.push(ctx.ui.theme.fg("muted", "☐ ") + item.text); - } - } - ctx.ui.setWidget("plan-todos", lines); - } else { - ctx.ui.setWidget("plan-todos", undefined); - } - } - - function togglePlanMode(ctx: ExtensionContext) { - planModeEnabled = !planModeEnabled; - executionMode = false; - todoItems = []; - - if (planModeEnabled) { - pi.setActiveTools(PLAN_MODE_TOOLS); - ctx.ui.notify(`Plan mode enabled. Tools: ${PLAN_MODE_TOOLS.join(", ")}`); - } else { - pi.setActiveTools(NORMAL_MODE_TOOLS); - ctx.ui.notify("Plan mode disabled. Full access restored."); - } - updateStatus(ctx); - } - - // Register /plan command - pi.registerCommand("plan", { - description: "Toggle plan mode (read-only exploration)", - handler: async (_args, ctx) => { - togglePlanMode(ctx); - }, - }); - - // Register /todos command - pi.registerCommand("todos", { - description: "Show current plan todo list", - handler: async (_args, ctx) => { - if (todoItems.length === 0) { - ctx.ui.notify("No todos. Create a plan first with /plan", "info"); - return; - } - - const todoList = todoItems - .map((item, i) => { - const checkbox = item.completed ? "✓" : "○"; - return `${i + 1}. ${checkbox} ${item.text}`; - }) - .join("\n"); - - ctx.ui.notify(`Plan Progress:\n${todoList}`, "info"); - }, - }); - - // Register Shift+P shortcut - pi.registerShortcut(Key.shift("p"), { - description: "Toggle plan mode", - handler: async (ctx) => { - togglePlanMode(ctx); - }, - }); - - // Block destructive bash in plan mode - pi.on("tool_call", async (event) => { - if (!planModeEnabled) return; - if (event.toolName !== "bash") return; - - const command = event.input.command as string; - if (!isSafeCommand(command)) { - return { - block: true, - reason: `Plan mode: destructive command blocked. Use /plan to disable plan mode first.\nCommand: ${command}`, - }; - } - }); - - // Track step completion based on tool results - pi.on("tool_result", async (_event, ctx) => { - toolsCalledThisTurn = true; - - if (!executionMode || todoItems.length === 0) return; - - // Mark the first uncompleted step as done when any tool succeeds - const nextStep = todoItems.find((t) => !t.completed); - if (nextStep) { - nextStep.completed = true; - updateStatus(ctx); - } - }); - - // Filter out stale plan mode context messages from LLM context - // This ensures the agent only sees the CURRENT state (plan mode on/off) - pi.on("context", async (event) => { - // Only filter when NOT in plan mode (i.e., when executing) - if (planModeEnabled) { - return; - } - - // Remove any previous plan-mode-context messages - const _beforeCount = event.messages.length; - const filtered = event.messages.filter((m) => { - if (m.role === "user" && Array.isArray(m.content)) { - const hasOldContext = m.content.some((c) => c.type === "text" && c.text.includes("[PLAN MODE ACTIVE]")); - if (hasOldContext) { - return false; - } - } - return true; - }); - return { messages: filtered }; - }); - - // Inject plan mode context - pi.on("before_agent_start", async () => { - if (!planModeEnabled && !executionMode) { - return; - } - - if (planModeEnabled) { - return { - message: { - customType: "plan-mode-context", - content: `[PLAN MODE ACTIVE] -You are in plan mode - a read-only exploration mode for safe code analysis. - -Restrictions: -- You can only use: read, bash, grep, find, ls -- You CANNOT use: edit, write (file modifications are disabled) -- Bash is restricted to READ-ONLY commands -- Focus on analysis, planning, and understanding the codebase - -Create a detailed numbered plan: -1. First step description -2. Second step description -... - -Do NOT attempt to make changes - just describe what you would do.`, - display: false, - }, - }; - } - - if (executionMode && todoItems.length > 0) { - const remaining = todoItems.filter((t) => !t.completed); - const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n"); - return { - message: { - customType: "plan-execution-context", - content: `[EXECUTING PLAN - Full tool access enabled] - -Remaining steps: -${todoList} - -Execute each step in order.`, - display: false, - }, - }; - } - }); - - // After agent finishes - pi.on("agent_end", async (event, ctx) => { - // In execution mode, check if all steps complete - if (executionMode && todoItems.length > 0) { - const allComplete = todoItems.every((t) => t.completed); - if (allComplete) { - // Show final completed list in chat - const completedList = todoItems.map((t) => `~~${t.text}~~`).join("\n"); - pi.sendMessage( - { - customType: "plan-complete", - content: `**Plan Complete!** ✓\n\n${completedList}`, - display: true, - }, - { triggerTurn: false }, - ); - - executionMode = false; - todoItems = []; - pi.setActiveTools(NORMAL_MODE_TOOLS); - updateStatus(ctx); - } - return; - } - - if (!planModeEnabled) return; - if (!ctx.hasUI) return; - - // Extract todos from last message - const messages = event.messages; - const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant"); - if (lastAssistant && Array.isArray(lastAssistant.content)) { - const textContent = lastAssistant.content - .filter((block): block is { type: "text"; text: string } => block.type === "text") - .map((block) => block.text) - .join("\n"); - - if (textContent) { - const extracted = extractTodoItems(textContent); - if (extracted.length > 0) { - todoItems = extracted; - } - } - } - - const hasTodos = todoItems.length > 0; - - // Show todo list in chat (no IDs shown to user, just numbered) - if (hasTodos) { - const todoListText = todoItems.map((t, i) => `${i + 1}. ☐ ${t.text}`).join("\n"); - pi.sendMessage( - { - customType: "plan-todo-list", - content: `**Plan Steps (${todoItems.length}):**\n\n${todoListText}`, - display: true, - }, - { triggerTurn: false }, - ); - } - - const choice = await ctx.ui.select("Plan mode - what next?", [ - hasTodos ? "Execute the plan (track progress)" : "Execute the plan", - "Stay in plan mode", - "Refine the plan", - ]); - - if (choice?.startsWith("Execute")) { - planModeEnabled = false; - executionMode = hasTodos; - pi.setActiveTools(NORMAL_MODE_TOOLS); - updateStatus(ctx); - - // Simple execution message - context event filters old plan mode messages - // and before_agent_start injects fresh execution context with IDs - const execMessage = hasTodos - ? `Execute the plan. Start with: ${todoItems[0].text}` - : "Execute the plan you just created."; - - pi.sendMessage( - { - customType: "plan-mode-execute", - content: execMessage, - display: true, - }, - { triggerTurn: true }, - ); - } else if (choice === "Refine the plan") { - const refinement = await ctx.ui.input("What should be refined?"); - if (refinement) { - ctx.ui.setEditorText(refinement); - } - } - }); - - // Initialize state on session start - pi.on("session_start", async (_event, ctx) => { - if (pi.getFlag("plan") === true) { - planModeEnabled = true; - } - - const entries = ctx.sessionManager.getEntries(); - const planModeEntry = entries - .filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode") - .pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined; - - if (planModeEntry?.data) { - if (planModeEntry.data.enabled !== undefined) { - planModeEnabled = planModeEntry.data.enabled; - } - if (planModeEntry.data.todos) { - todoItems = planModeEntry.data.todos; - } - if (planModeEntry.data.executing) { - executionMode = planModeEntry.data.executing; - } - } - - if (planModeEnabled) { - pi.setActiveTools(PLAN_MODE_TOOLS); - } - updateStatus(ctx); - }); - - // Reset tool tracking at start of each turn and persist state - pi.on("turn_start", async () => { - toolsCalledThisTurn = false; - pi.appendEntry("plan-mode", { - enabled: planModeEnabled, - todos: todoItems, - executing: executionMode, - }); - }); - - // Handle non-tool turns (e.g., analysis, explanation steps) - pi.on("turn_end", async (_event, ctx) => { - if (!executionMode || todoItems.length === 0) return; - - // If no tools were called this turn, the agent was doing analysis/explanation - // Mark the next uncompleted step as done - if (!toolsCalledThisTurn) { - const nextStep = todoItems.find((t) => !t.completed); - if (nextStep) { - nextStep.completed = true; - updateStatus(ctx); - } - } - }); -} diff --git a/packages/coding-agent/examples/extensions/plan-mode/README.md b/packages/coding-agent/examples/extensions/plan-mode/README.md new file mode 100644 index 00000000..bd430718 --- /dev/null +++ b/packages/coding-agent/examples/extensions/plan-mode/README.md @@ -0,0 +1,65 @@ +# Plan Mode Extension + +Read-only exploration mode for safe code analysis. + +## Features + +- **Read-only tools**: Restricts available tools to read, bash, grep, find, ls, question +- **Bash allowlist**: Only read-only bash commands are allowed +- **Plan extraction**: Extracts numbered steps from `Plan:` sections +- **Progress tracking**: Widget shows completion status during execution +- **[DONE:n] markers**: Explicit step completion tracking +- **Session persistence**: State survives session resume + +## Commands + +- `/plan` - Toggle plan mode +- `/todos` - Show current plan progress +- `Shift+P` - Toggle plan mode (shortcut) + +## Usage + +1. Enable plan mode with `/plan` or `--plan` flag +2. Ask the agent to analyze code and create a plan +3. The agent should output a numbered plan under a `Plan:` header: + +``` +Plan: +1. First step description +2. Second step description +3. Third step description +``` + +4. Choose "Execute the plan" when prompted +5. During execution, the agent marks steps complete with `[DONE:n]` tags +6. Progress widget shows completion status + +## How It Works + +### Plan Mode (Read-Only) +- Only read-only tools available +- Bash commands filtered through allowlist +- Agent creates a plan without making changes + +### Execution Mode +- Full tool access restored +- Agent executes steps in order +- `[DONE:n]` markers track completion +- Widget shows progress + +### Command Allowlist + +Safe commands (allowed): +- File inspection: `cat`, `head`, `tail`, `less`, `more` +- Search: `grep`, `find`, `rg`, `fd` +- Directory: `ls`, `pwd`, `tree` +- Git read: `git status`, `git log`, `git diff`, `git branch` +- Package info: `npm list`, `npm outdated`, `yarn info` +- System info: `uname`, `whoami`, `date`, `uptime` + +Blocked commands: +- File modification: `rm`, `mv`, `cp`, `mkdir`, `touch` +- Git write: `git add`, `git commit`, `git push` +- Package install: `npm install`, `yarn add`, `pip install` +- System: `sudo`, `kill`, `reboot` +- Editors: `vim`, `nano`, `code` diff --git a/packages/coding-agent/examples/extensions/plan-mode/index.ts b/packages/coding-agent/examples/extensions/plan-mode/index.ts new file mode 100644 index 00000000..cd35c3d3 --- /dev/null +++ b/packages/coding-agent/examples/extensions/plan-mode/index.ts @@ -0,0 +1,340 @@ +/** + * Plan Mode Extension + * + * Read-only exploration mode for safe code analysis. + * When enabled, only read-only tools are available. + * + * Features: + * - /plan command or Shift+P to toggle + * - Bash restricted to allowlisted read-only commands + * - Extracts numbered plan steps from "Plan:" sections + * - [DONE:n] markers to complete steps during execution + * - Progress tracking widget during execution + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { AssistantMessage, TextContent } from "@mariozechner/pi-ai"; +import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent"; +import { Key } from "@mariozechner/pi-tui"; +import { extractTodoItems, isSafeCommand, markCompletedSteps, type TodoItem } from "./utils.js"; + +// Tools +const PLAN_MODE_TOOLS = ["read", "bash", "grep", "find", "ls", "questionnaire"]; +const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"]; + +// Type guard for assistant messages +function isAssistantMessage(m: AgentMessage): m is AssistantMessage { + return m.role === "assistant" && Array.isArray(m.content); +} + +// Extract text content from an assistant message +function getTextContent(message: AssistantMessage): string { + return message.content + .filter((block): block is TextContent => block.type === "text") + .map((block) => block.text) + .join("\n"); +} + +export default function planModeExtension(pi: ExtensionAPI): void { + let planModeEnabled = false; + let executionMode = false; + let todoItems: TodoItem[] = []; + + pi.registerFlag("plan", { + description: "Start in plan mode (read-only exploration)", + type: "boolean", + default: false, + }); + + function updateStatus(ctx: ExtensionContext): void { + // Footer status + if (executionMode && todoItems.length > 0) { + const completed = todoItems.filter((t) => t.completed).length; + ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`)); + } else if (planModeEnabled) { + ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("warning", "⏸ plan")); + } else { + ctx.ui.setStatus("plan-mode", undefined); + } + + // Widget showing todo list + if (executionMode && todoItems.length > 0) { + const lines = todoItems.map((item) => { + if (item.completed) { + return ( + ctx.ui.theme.fg("success", "☑ ") + ctx.ui.theme.fg("muted", ctx.ui.theme.strikethrough(item.text)) + ); + } + return `${ctx.ui.theme.fg("muted", "☐ ")}${item.text}`; + }); + ctx.ui.setWidget("plan-todos", lines); + } else { + ctx.ui.setWidget("plan-todos", undefined); + } + } + + function togglePlanMode(ctx: ExtensionContext): void { + planModeEnabled = !planModeEnabled; + executionMode = false; + todoItems = []; + + if (planModeEnabled) { + pi.setActiveTools(PLAN_MODE_TOOLS); + ctx.ui.notify(`Plan mode enabled. Tools: ${PLAN_MODE_TOOLS.join(", ")}`); + } else { + pi.setActiveTools(NORMAL_MODE_TOOLS); + ctx.ui.notify("Plan mode disabled. Full access restored."); + } + updateStatus(ctx); + } + + function persistState(): void { + pi.appendEntry("plan-mode", { + enabled: planModeEnabled, + todos: todoItems, + executing: executionMode, + }); + } + + pi.registerCommand("plan", { + description: "Toggle plan mode (read-only exploration)", + handler: async (_args, ctx) => togglePlanMode(ctx), + }); + + pi.registerCommand("todos", { + description: "Show current plan todo list", + handler: async (_args, ctx) => { + if (todoItems.length === 0) { + ctx.ui.notify("No todos. Create a plan first with /plan", "info"); + return; + } + const list = todoItems.map((item, i) => `${i + 1}. ${item.completed ? "✓" : "○"} ${item.text}`).join("\n"); + ctx.ui.notify(`Plan Progress:\n${list}`, "info"); + }, + }); + + pi.registerShortcut(Key.shift("p"), { + description: "Toggle plan mode", + handler: async (ctx) => togglePlanMode(ctx), + }); + + // Block destructive bash commands in plan mode + pi.on("tool_call", async (event) => { + if (!planModeEnabled || event.toolName !== "bash") return; + + const command = event.input.command as string; + if (!isSafeCommand(command)) { + return { + block: true, + reason: `Plan mode: command blocked (not allowlisted). Use /plan to disable plan mode first.\nCommand: ${command}`, + }; + } + }); + + // Filter out stale plan mode context when not in plan mode + pi.on("context", async (event) => { + if (planModeEnabled) return; + + return { + messages: event.messages.filter((m) => { + const msg = m as AgentMessage & { customType?: string }; + if (msg.customType === "plan-mode-context") return false; + if (msg.role !== "user") return true; + + const content = msg.content; + if (typeof content === "string") { + return !content.includes("[PLAN MODE ACTIVE]"); + } + if (Array.isArray(content)) { + return !content.some( + (c) => c.type === "text" && (c as TextContent).text?.includes("[PLAN MODE ACTIVE]"), + ); + } + return true; + }), + }; + }); + + // Inject plan/execution context before agent starts + pi.on("before_agent_start", async () => { + if (planModeEnabled) { + return { + message: { + customType: "plan-mode-context", + content: `[PLAN MODE ACTIVE] +You are in plan mode - a read-only exploration mode for safe code analysis. + +Restrictions: +- You can only use: read, bash, grep, find, ls, questionnaire +- You CANNOT use: edit, write (file modifications are disabled) +- Bash is restricted to an allowlist of read-only commands + +Ask clarifying questions using the questionnaire tool. +Use brave-search skill via bash for web research. + +Create a detailed numbered plan under a "Plan:" header: + +Plan: +1. First step description +2. Second step description +... + +Do NOT attempt to make changes - just describe what you would do.`, + display: false, + }, + }; + } + + if (executionMode && todoItems.length > 0) { + const remaining = todoItems.filter((t) => !t.completed); + const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n"); + return { + message: { + customType: "plan-execution-context", + content: `[EXECUTING PLAN - Full tool access enabled] + +Remaining steps: +${todoList} + +Execute each step in order. +After completing a step, include a [DONE:n] tag in your response.`, + display: false, + }, + }; + } + }); + + // Track progress after each turn + pi.on("turn_end", async (event, ctx) => { + if (!executionMode || todoItems.length === 0) return; + if (!isAssistantMessage(event.message)) return; + + const text = getTextContent(event.message); + if (markCompletedSteps(text, todoItems) > 0) { + updateStatus(ctx); + } + persistState(); + }); + + // Handle plan completion and plan mode UI + pi.on("agent_end", async (event, ctx) => { + // Check if execution is complete + if (executionMode && todoItems.length > 0) { + if (todoItems.every((t) => t.completed)) { + const completedList = todoItems.map((t) => `~~${t.text}~~`).join("\n"); + pi.sendMessage( + { customType: "plan-complete", content: `**Plan Complete!** ✓\n\n${completedList}`, display: true }, + { triggerTurn: false }, + ); + executionMode = false; + todoItems = []; + pi.setActiveTools(NORMAL_MODE_TOOLS); + updateStatus(ctx); + persistState(); // Save cleared state so resume doesn't restore old execution mode + } + return; + } + + if (!planModeEnabled || !ctx.hasUI) return; + + // Extract todos from last assistant message + const lastAssistant = [...event.messages].reverse().find(isAssistantMessage); + if (lastAssistant) { + const extracted = extractTodoItems(getTextContent(lastAssistant)); + if (extracted.length > 0) { + todoItems = extracted; + } + } + + // Show plan steps and prompt for next action + if (todoItems.length > 0) { + const todoListText = todoItems.map((t, i) => `${i + 1}. ☐ ${t.text}`).join("\n"); + pi.sendMessage( + { + customType: "plan-todo-list", + content: `**Plan Steps (${todoItems.length}):**\n\n${todoListText}`, + display: true, + }, + { triggerTurn: false }, + ); + } + + const choice = await ctx.ui.select("Plan mode - what next?", [ + todoItems.length > 0 ? "Execute the plan (track progress)" : "Execute the plan", + "Stay in plan mode", + "Refine the plan", + ]); + + if (choice?.startsWith("Execute")) { + planModeEnabled = false; + executionMode = todoItems.length > 0; + pi.setActiveTools(NORMAL_MODE_TOOLS); + updateStatus(ctx); + + const execMessage = + todoItems.length > 0 + ? `Execute the plan. Start with: ${todoItems[0].text}` + : "Execute the plan you just created."; + pi.sendMessage( + { customType: "plan-mode-execute", content: execMessage, display: true }, + { triggerTurn: true }, + ); + } else if (choice === "Refine the plan") { + const refinement = await ctx.ui.editor("Refine the plan:", ""); + if (refinement?.trim()) { + pi.sendUserMessage(refinement.trim()); + } + } + }); + + // Restore state on session start/resume + pi.on("session_start", async (_event, ctx) => { + if (pi.getFlag("plan") === true) { + planModeEnabled = true; + } + + const entries = ctx.sessionManager.getEntries(); + + // Restore persisted state + const planModeEntry = entries + .filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode") + .pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined; + + if (planModeEntry?.data) { + planModeEnabled = planModeEntry.data.enabled ?? planModeEnabled; + todoItems = planModeEntry.data.todos ?? todoItems; + executionMode = planModeEntry.data.executing ?? executionMode; + } + + // On resume: re-scan messages to rebuild completion state + // Only scan messages AFTER the last "plan-mode-execute" to avoid picking up [DONE:n] from previous plans + const isResume = planModeEntry !== undefined; + if (isResume && executionMode && todoItems.length > 0) { + // Find the index of the last plan-mode-execute entry (marks when current execution started) + let executeIndex = -1; + for (let i = entries.length - 1; i >= 0; i--) { + const entry = entries[i] as { type: string; customType?: string }; + if (entry.customType === "plan-mode-execute") { + executeIndex = i; + break; + } + } + + // Only scan messages after the execute marker + const messages: AssistantMessage[] = []; + for (let i = executeIndex + 1; i < entries.length; i++) { + const entry = entries[i]; + if (entry.type === "message" && "message" in entry && isAssistantMessage(entry.message as AgentMessage)) { + messages.push(entry.message as AssistantMessage); + } + } + const allText = messages.map(getTextContent).join("\n"); + markCompletedSteps(allText, todoItems); + } + + if (planModeEnabled) { + pi.setActiveTools(PLAN_MODE_TOOLS); + } + updateStatus(ctx); + }); +} diff --git a/packages/coding-agent/examples/extensions/plan-mode/utils.ts b/packages/coding-agent/examples/extensions/plan-mode/utils.ts new file mode 100644 index 00000000..7c49bdb6 --- /dev/null +++ b/packages/coding-agent/examples/extensions/plan-mode/utils.ts @@ -0,0 +1,168 @@ +/** + * Pure utility functions for plan mode. + * Extracted for testability. + */ + +// Destructive commands blocked in plan mode +const DESTRUCTIVE_PATTERNS = [ + /\brm\b/i, + /\brmdir\b/i, + /\bmv\b/i, + /\bcp\b/i, + /\bmkdir\b/i, + /\btouch\b/i, + /\bchmod\b/i, + /\bchown\b/i, + /\bchgrp\b/i, + /\bln\b/i, + /\btee\b/i, + /\btruncate\b/i, + /\bdd\b/i, + /\bshred\b/i, + /(^|[^<])>(?!>)/, + />>/, + /\bnpm\s+(install|uninstall|update|ci|link|publish)/i, + /\byarn\s+(add|remove|install|publish)/i, + /\bpnpm\s+(add|remove|install|publish)/i, + /\bpip\s+(install|uninstall)/i, + /\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i, + /\bbrew\s+(install|uninstall|upgrade)/i, + /\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i, + /\bsudo\b/i, + /\bsu\b/i, + /\bkill\b/i, + /\bpkill\b/i, + /\bkillall\b/i, + /\breboot\b/i, + /\bshutdown\b/i, + /\bsystemctl\s+(start|stop|restart|enable|disable)/i, + /\bservice\s+\S+\s+(start|stop|restart)/i, + /\b(vim?|nano|emacs|code|subl)\b/i, +]; + +// Safe read-only commands allowed in plan mode +const SAFE_PATTERNS = [ + /^\s*cat\b/, + /^\s*head\b/, + /^\s*tail\b/, + /^\s*less\b/, + /^\s*more\b/, + /^\s*grep\b/, + /^\s*find\b/, + /^\s*ls\b/, + /^\s*pwd\b/, + /^\s*echo\b/, + /^\s*printf\b/, + /^\s*wc\b/, + /^\s*sort\b/, + /^\s*uniq\b/, + /^\s*diff\b/, + /^\s*file\b/, + /^\s*stat\b/, + /^\s*du\b/, + /^\s*df\b/, + /^\s*tree\b/, + /^\s*which\b/, + /^\s*whereis\b/, + /^\s*type\b/, + /^\s*env\b/, + /^\s*printenv\b/, + /^\s*uname\b/, + /^\s*whoami\b/, + /^\s*id\b/, + /^\s*date\b/, + /^\s*cal\b/, + /^\s*uptime\b/, + /^\s*ps\b/, + /^\s*top\b/, + /^\s*htop\b/, + /^\s*free\b/, + /^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i, + /^\s*git\s+ls-/i, + /^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i, + /^\s*yarn\s+(list|info|why|audit)/i, + /^\s*node\s+--version/i, + /^\s*python\s+--version/i, + /^\s*curl\s/i, + /^\s*wget\s+-O\s*-/i, + /^\s*jq\b/, + /^\s*sed\s+-n/i, + /^\s*awk\b/, + /^\s*rg\b/, + /^\s*fd\b/, + /^\s*bat\b/, + /^\s*exa\b/, +]; + +export function isSafeCommand(command: string): boolean { + const isDestructive = DESTRUCTIVE_PATTERNS.some((p) => p.test(command)); + const isSafe = SAFE_PATTERNS.some((p) => p.test(command)); + return !isDestructive && isSafe; +} + +export interface TodoItem { + step: number; + text: string; + completed: boolean; +} + +export function cleanStepText(text: string): string { + let cleaned = text + .replace(/\*{1,2}([^*]+)\*{1,2}/g, "$1") // Remove bold/italic + .replace(/`([^`]+)`/g, "$1") // Remove code + .replace( + /^(Use|Run|Execute|Create|Write|Read|Check|Verify|Update|Modify|Add|Remove|Delete|Install)\s+(the\s+)?/i, + "", + ) + .replace(/\s+/g, " ") + .trim(); + + if (cleaned.length > 0) { + cleaned = cleaned.charAt(0).toUpperCase() + cleaned.slice(1); + } + if (cleaned.length > 50) { + cleaned = `${cleaned.slice(0, 47)}...`; + } + return cleaned; +} + +export function extractTodoItems(message: string): TodoItem[] { + const items: TodoItem[] = []; + const headerMatch = message.match(/\*{0,2}Plan:\*{0,2}\s*\n/i); + if (!headerMatch) return items; + + const planSection = message.slice(message.indexOf(headerMatch[0]) + headerMatch[0].length); + const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm; + + for (const match of planSection.matchAll(numberedPattern)) { + const text = match[2] + .trim() + .replace(/\*{1,2}$/, "") + .trim(); + if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) { + const cleaned = cleanStepText(text); + if (cleaned.length > 3) { + items.push({ step: items.length + 1, text: cleaned, completed: false }); + } + } + } + return items; +} + +export function extractDoneSteps(message: string): number[] { + const steps: number[] = []; + for (const match of message.matchAll(/\[DONE:(\d+)\]/gi)) { + const step = Number(match[1]); + if (Number.isFinite(step)) steps.push(step); + } + return steps; +} + +export function markCompletedSteps(text: string, items: TodoItem[]): number { + const doneSteps = extractDoneSteps(text); + for (const step of doneSteps) { + const item = items.find((t) => t.step === step); + if (item) item.completed = true; + } + return doneSteps.length; +} diff --git a/packages/coding-agent/test/plan-mode-utils.test.ts b/packages/coding-agent/test/plan-mode-utils.test.ts new file mode 100644 index 00000000..8d71ba95 --- /dev/null +++ b/packages/coding-agent/test/plan-mode-utils.test.ts @@ -0,0 +1,261 @@ +import { describe, expect, it } from "vitest"; +import { + cleanStepText, + extractDoneSteps, + extractTodoItems, + isSafeCommand, + markCompletedSteps, + type TodoItem, +} from "../examples/extensions/plan-mode/utils.js"; + +describe("isSafeCommand", () => { + describe("safe commands", () => { + it("allows basic read commands", () => { + expect(isSafeCommand("ls -la")).toBe(true); + expect(isSafeCommand("cat file.txt")).toBe(true); + expect(isSafeCommand("head -n 10 file.txt")).toBe(true); + expect(isSafeCommand("tail -f log.txt")).toBe(true); + expect(isSafeCommand("grep pattern file")).toBe(true); + expect(isSafeCommand("find . -name '*.ts'")).toBe(true); + }); + + it("allows git read commands", () => { + expect(isSafeCommand("git status")).toBe(true); + expect(isSafeCommand("git log --oneline")).toBe(true); + expect(isSafeCommand("git diff")).toBe(true); + expect(isSafeCommand("git branch")).toBe(true); + }); + + it("allows npm/yarn read commands", () => { + expect(isSafeCommand("npm list")).toBe(true); + expect(isSafeCommand("npm outdated")).toBe(true); + expect(isSafeCommand("yarn info react")).toBe(true); + }); + + it("allows other safe commands", () => { + expect(isSafeCommand("pwd")).toBe(true); + expect(isSafeCommand("echo hello")).toBe(true); + expect(isSafeCommand("wc -l file.txt")).toBe(true); + expect(isSafeCommand("du -sh .")).toBe(true); + expect(isSafeCommand("df -h")).toBe(true); + }); + }); + + describe("destructive commands", () => { + it("blocks file modification commands", () => { + expect(isSafeCommand("rm file.txt")).toBe(false); + expect(isSafeCommand("rm -rf dir")).toBe(false); + expect(isSafeCommand("mv old new")).toBe(false); + expect(isSafeCommand("cp src dst")).toBe(false); + expect(isSafeCommand("mkdir newdir")).toBe(false); + expect(isSafeCommand("touch newfile")).toBe(false); + }); + + it("blocks git write commands", () => { + expect(isSafeCommand("git add .")).toBe(false); + expect(isSafeCommand("git commit -m 'msg'")).toBe(false); + expect(isSafeCommand("git push")).toBe(false); + expect(isSafeCommand("git checkout main")).toBe(false); + expect(isSafeCommand("git reset --hard")).toBe(false); + }); + + it("blocks package manager installs", () => { + expect(isSafeCommand("npm install lodash")).toBe(false); + expect(isSafeCommand("yarn add react")).toBe(false); + expect(isSafeCommand("pip install requests")).toBe(false); + expect(isSafeCommand("brew install node")).toBe(false); + }); + + it("blocks redirects", () => { + expect(isSafeCommand("echo hello > file.txt")).toBe(false); + expect(isSafeCommand("cat foo >> bar")).toBe(false); + expect(isSafeCommand(">file.txt")).toBe(false); + }); + + it("blocks dangerous commands", () => { + expect(isSafeCommand("sudo rm -rf /")).toBe(false); + expect(isSafeCommand("kill -9 1234")).toBe(false); + expect(isSafeCommand("reboot")).toBe(false); + }); + + it("blocks editors", () => { + expect(isSafeCommand("vim file.txt")).toBe(false); + expect(isSafeCommand("nano file.txt")).toBe(false); + expect(isSafeCommand("code .")).toBe(false); + }); + }); + + describe("edge cases", () => { + it("requires command to be in safe list (not just non-destructive)", () => { + expect(isSafeCommand("unknown-command")).toBe(false); + expect(isSafeCommand("my-script.sh")).toBe(false); + }); + + it("handles commands with leading whitespace", () => { + expect(isSafeCommand(" ls -la")).toBe(true); + expect(isSafeCommand(" rm file")).toBe(false); + }); + }); +}); + +describe("cleanStepText", () => { + it("removes markdown bold/italic", () => { + expect(cleanStepText("**bold text**")).toBe("Bold text"); + expect(cleanStepText("*italic text*")).toBe("Italic text"); + }); + + it("removes markdown code", () => { + expect(cleanStepText("run `npm install`")).toBe("Npm install"); // "run" is stripped as action word + expect(cleanStepText("check the `config.json` file")).toBe("Config.json file"); + }); + + it("removes leading action words", () => { + expect(cleanStepText("Create the new file")).toBe("New file"); + expect(cleanStepText("Run the tests")).toBe("Tests"); + expect(cleanStepText("Check the status")).toBe("Status"); + }); + + it("capitalizes first letter", () => { + expect(cleanStepText("update config")).toBe("Config"); + }); + + it("truncates long text", () => { + const longText = "This is a very long step description that exceeds the maximum allowed length for display"; + const result = cleanStepText(longText); + expect(result.length).toBe(50); + expect(result.endsWith("...")).toBe(true); + }); + + it("normalizes whitespace", () => { + expect(cleanStepText("multiple spaces here")).toBe("Multiple spaces here"); + }); +}); + +describe("extractTodoItems", () => { + it("extracts numbered items after Plan: header", () => { + const message = `Here's what we'll do: + +Plan: +1. First step here +2. Second step here +3. Third step here`; + + const items = extractTodoItems(message); + expect(items).toHaveLength(3); + expect(items[0].step).toBe(1); + expect(items[0].text).toBe("First step here"); + expect(items[0].completed).toBe(false); + }); + + it("handles bold Plan header", () => { + const message = `**Plan:** +1. Do something`; + + const items = extractTodoItems(message); + expect(items).toHaveLength(1); + }); + + it("handles parenthesis-style numbering", () => { + const message = `Plan: +1) First item +2) Second item`; + + const items = extractTodoItems(message); + expect(items).toHaveLength(2); + }); + + it("returns empty array without Plan header", () => { + const message = `Here are some steps: +1. First step +2. Second step`; + + const items = extractTodoItems(message); + expect(items).toHaveLength(0); + }); + + it("filters out short items", () => { + const message = `Plan: +1. OK +2. This is a proper step`; + + const items = extractTodoItems(message); + expect(items).toHaveLength(1); + expect(items[0].text).toContain("proper"); + }); + + it("filters out code-like items", () => { + const message = `Plan: +1. \`npm install\` +2. Run the build process`; + + const items = extractTodoItems(message); + expect(items).toHaveLength(1); + }); +}); + +describe("extractDoneSteps", () => { + it("extracts single DONE marker", () => { + const message = "I've completed the first step [DONE:1]"; + expect(extractDoneSteps(message)).toEqual([1]); + }); + + it("extracts multiple DONE markers", () => { + const message = "Did steps [DONE:1] and [DONE:2] and [DONE:3]"; + expect(extractDoneSteps(message)).toEqual([1, 2, 3]); + }); + + it("handles case insensitivity", () => { + const message = "[done:1] [DONE:2] [Done:3]"; + expect(extractDoneSteps(message)).toEqual([1, 2, 3]); + }); + + it("returns empty array with no markers", () => { + const message = "No markers here"; + expect(extractDoneSteps(message)).toEqual([]); + }); + + it("ignores malformed markers", () => { + const message = "[DONE:abc] [DONE:] [DONE:1]"; + expect(extractDoneSteps(message)).toEqual([1]); + }); +}); + +describe("markCompletedSteps", () => { + it("marks matching items as completed", () => { + const items: TodoItem[] = [ + { step: 1, text: "First", completed: false }, + { step: 2, text: "Second", completed: false }, + { step: 3, text: "Third", completed: false }, + ]; + + const count = markCompletedSteps("[DONE:1] [DONE:3]", items); + + expect(count).toBe(2); + expect(items[0].completed).toBe(true); + expect(items[1].completed).toBe(false); + expect(items[2].completed).toBe(true); + }); + + it("returns count of completed items", () => { + const items: TodoItem[] = [{ step: 1, text: "First", completed: false }]; + + expect(markCompletedSteps("[DONE:1]", items)).toBe(1); + expect(markCompletedSteps("no markers", items)).toBe(0); + }); + + it("ignores markers for non-existent steps", () => { + const items: TodoItem[] = [{ step: 1, text: "First", completed: false }]; + + const count = markCompletedSteps("[DONE:99]", items); + + expect(count).toBe(1); // Still counts the marker found + expect(items[0].completed).toBe(false); // But doesn't mark anything + }); + + it("doesn't double-complete already completed items", () => { + const items: TodoItem[] = [{ step: 1, text: "First", completed: true }]; + + markCompletedSteps("[DONE:1]", items); + expect(items[0].completed).toBe(true); + }); +});