feat(plan-mode): enhanced plan mode with explicit step tracking (#694)

Changes from the original:
- Explicit [DONE:n] tag tracking (more accurate than auto-marking on tool_result)
- Plan: header requirement - only extracts todos from 'Plan:' sections
- Utils extracted to separate file for testability
- Better session resume - only scans messages after plan-mode-execute marker
- Context filtering - properly filters plan-mode-context custom type messages
- Refactored to directory structure (index.ts + utils.ts + README.md)

The original auto-completed steps on every tool_result, which was inaccurate
for multi-tool steps. This version uses explicit [DONE:n] markers that the
agent outputs after completing each step.
This commit is contained in:
Fero 2026-01-13 17:53:11 +01:00 committed by GitHub
parent a7a863c792
commit e8f1322eee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 834 additions and 548 deletions

View file

@ -0,0 +1,65 @@
# Plan Mode Extension
Read-only exploration mode for safe code analysis.
## Features
- **Read-only tools**: Restricts available tools to read, bash, grep, find, ls, question
- **Bash allowlist**: Only read-only bash commands are allowed
- **Plan extraction**: Extracts numbered steps from `Plan:` sections
- **Progress tracking**: Widget shows completion status during execution
- **[DONE:n] markers**: Explicit step completion tracking
- **Session persistence**: State survives session resume
## Commands
- `/plan` - Toggle plan mode
- `/todos` - Show current plan progress
- `Shift+P` - Toggle plan mode (shortcut)
## Usage
1. Enable plan mode with `/plan` or `--plan` flag
2. Ask the agent to analyze code and create a plan
3. The agent should output a numbered plan under a `Plan:` header:
```
Plan:
1. First step description
2. Second step description
3. Third step description
```
4. Choose "Execute the plan" when prompted
5. During execution, the agent marks steps complete with `[DONE:n]` tags
6. Progress widget shows completion status
## How It Works
### Plan Mode (Read-Only)
- Only read-only tools available
- Bash commands filtered through allowlist
- Agent creates a plan without making changes
### Execution Mode
- Full tool access restored
- Agent executes steps in order
- `[DONE:n]` markers track completion
- Widget shows progress
### Command Allowlist
Safe commands (allowed):
- File inspection: `cat`, `head`, `tail`, `less`, `more`
- Search: `grep`, `find`, `rg`, `fd`
- Directory: `ls`, `pwd`, `tree`
- Git read: `git status`, `git log`, `git diff`, `git branch`
- Package info: `npm list`, `npm outdated`, `yarn info`
- System info: `uname`, `whoami`, `date`, `uptime`
Blocked commands:
- File modification: `rm`, `mv`, `cp`, `mkdir`, `touch`
- Git write: `git add`, `git commit`, `git push`
- Package install: `npm install`, `yarn add`, `pip install`
- System: `sudo`, `kill`, `reboot`
- Editors: `vim`, `nano`, `code`

View file

@ -0,0 +1,340 @@
/**
* Plan Mode Extension
*
* Read-only exploration mode for safe code analysis.
* When enabled, only read-only tools are available.
*
* Features:
* - /plan command or Shift+P to toggle
* - Bash restricted to allowlisted read-only commands
* - Extracts numbered plan steps from "Plan:" sections
* - [DONE:n] markers to complete steps during execution
* - Progress tracking widget during execution
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { AssistantMessage, TextContent } from "@mariozechner/pi-ai";
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
import { Key } from "@mariozechner/pi-tui";
import { extractTodoItems, isSafeCommand, markCompletedSteps, type TodoItem } from "./utils.js";
// Tools
const PLAN_MODE_TOOLS = ["read", "bash", "grep", "find", "ls", "questionnaire"];
const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"];
// Type guard for assistant messages
function isAssistantMessage(m: AgentMessage): m is AssistantMessage {
return m.role === "assistant" && Array.isArray(m.content);
}
// Extract text content from an assistant message
function getTextContent(message: AssistantMessage): string {
return message.content
.filter((block): block is TextContent => block.type === "text")
.map((block) => block.text)
.join("\n");
}
export default function planModeExtension(pi: ExtensionAPI): void {
let planModeEnabled = false;
let executionMode = false;
let todoItems: TodoItem[] = [];
pi.registerFlag("plan", {
description: "Start in plan mode (read-only exploration)",
type: "boolean",
default: false,
});
function updateStatus(ctx: ExtensionContext): void {
// Footer status
if (executionMode && todoItems.length > 0) {
const completed = todoItems.filter((t) => t.completed).length;
ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`));
} else if (planModeEnabled) {
ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("warning", "⏸ plan"));
} else {
ctx.ui.setStatus("plan-mode", undefined);
}
// Widget showing todo list
if (executionMode && todoItems.length > 0) {
const lines = todoItems.map((item) => {
if (item.completed) {
return (
ctx.ui.theme.fg("success", "☑ ") + ctx.ui.theme.fg("muted", ctx.ui.theme.strikethrough(item.text))
);
}
return `${ctx.ui.theme.fg("muted", "☐ ")}${item.text}`;
});
ctx.ui.setWidget("plan-todos", lines);
} else {
ctx.ui.setWidget("plan-todos", undefined);
}
}
function togglePlanMode(ctx: ExtensionContext): void {
planModeEnabled = !planModeEnabled;
executionMode = false;
todoItems = [];
if (planModeEnabled) {
pi.setActiveTools(PLAN_MODE_TOOLS);
ctx.ui.notify(`Plan mode enabled. Tools: ${PLAN_MODE_TOOLS.join(", ")}`);
} else {
pi.setActiveTools(NORMAL_MODE_TOOLS);
ctx.ui.notify("Plan mode disabled. Full access restored.");
}
updateStatus(ctx);
}
function persistState(): void {
pi.appendEntry("plan-mode", {
enabled: planModeEnabled,
todos: todoItems,
executing: executionMode,
});
}
pi.registerCommand("plan", {
description: "Toggle plan mode (read-only exploration)",
handler: async (_args, ctx) => togglePlanMode(ctx),
});
pi.registerCommand("todos", {
description: "Show current plan todo list",
handler: async (_args, ctx) => {
if (todoItems.length === 0) {
ctx.ui.notify("No todos. Create a plan first with /plan", "info");
return;
}
const list = todoItems.map((item, i) => `${i + 1}. ${item.completed ? "✓" : "○"} ${item.text}`).join("\n");
ctx.ui.notify(`Plan Progress:\n${list}`, "info");
},
});
pi.registerShortcut(Key.shift("p"), {
description: "Toggle plan mode",
handler: async (ctx) => togglePlanMode(ctx),
});
// Block destructive bash commands in plan mode
pi.on("tool_call", async (event) => {
if (!planModeEnabled || event.toolName !== "bash") return;
const command = event.input.command as string;
if (!isSafeCommand(command)) {
return {
block: true,
reason: `Plan mode: command blocked (not allowlisted). Use /plan to disable plan mode first.\nCommand: ${command}`,
};
}
});
// Filter out stale plan mode context when not in plan mode
pi.on("context", async (event) => {
if (planModeEnabled) return;
return {
messages: event.messages.filter((m) => {
const msg = m as AgentMessage & { customType?: string };
if (msg.customType === "plan-mode-context") return false;
if (msg.role !== "user") return true;
const content = msg.content;
if (typeof content === "string") {
return !content.includes("[PLAN MODE ACTIVE]");
}
if (Array.isArray(content)) {
return !content.some(
(c) => c.type === "text" && (c as TextContent).text?.includes("[PLAN MODE ACTIVE]"),
);
}
return true;
}),
};
});
// Inject plan/execution context before agent starts
pi.on("before_agent_start", async () => {
if (planModeEnabled) {
return {
message: {
customType: "plan-mode-context",
content: `[PLAN MODE ACTIVE]
You are in plan mode - a read-only exploration mode for safe code analysis.
Restrictions:
- You can only use: read, bash, grep, find, ls, questionnaire
- You CANNOT use: edit, write (file modifications are disabled)
- Bash is restricted to an allowlist of read-only commands
Ask clarifying questions using the questionnaire tool.
Use brave-search skill via bash for web research.
Create a detailed numbered plan under a "Plan:" header:
Plan:
1. First step description
2. Second step description
...
Do NOT attempt to make changes - just describe what you would do.`,
display: false,
},
};
}
if (executionMode && todoItems.length > 0) {
const remaining = todoItems.filter((t) => !t.completed);
const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n");
return {
message: {
customType: "plan-execution-context",
content: `[EXECUTING PLAN - Full tool access enabled]
Remaining steps:
${todoList}
Execute each step in order.
After completing a step, include a [DONE:n] tag in your response.`,
display: false,
},
};
}
});
// Track progress after each turn
pi.on("turn_end", async (event, ctx) => {
if (!executionMode || todoItems.length === 0) return;
if (!isAssistantMessage(event.message)) return;
const text = getTextContent(event.message);
if (markCompletedSteps(text, todoItems) > 0) {
updateStatus(ctx);
}
persistState();
});
// Handle plan completion and plan mode UI
pi.on("agent_end", async (event, ctx) => {
// Check if execution is complete
if (executionMode && todoItems.length > 0) {
if (todoItems.every((t) => t.completed)) {
const completedList = todoItems.map((t) => `~~${t.text}~~`).join("\n");
pi.sendMessage(
{ customType: "plan-complete", content: `**Plan Complete!** ✓\n\n${completedList}`, display: true },
{ triggerTurn: false },
);
executionMode = false;
todoItems = [];
pi.setActiveTools(NORMAL_MODE_TOOLS);
updateStatus(ctx);
persistState(); // Save cleared state so resume doesn't restore old execution mode
}
return;
}
if (!planModeEnabled || !ctx.hasUI) return;
// Extract todos from last assistant message
const lastAssistant = [...event.messages].reverse().find(isAssistantMessage);
if (lastAssistant) {
const extracted = extractTodoItems(getTextContent(lastAssistant));
if (extracted.length > 0) {
todoItems = extracted;
}
}
// Show plan steps and prompt for next action
if (todoItems.length > 0) {
const todoListText = todoItems.map((t, i) => `${i + 1}. ☐ ${t.text}`).join("\n");
pi.sendMessage(
{
customType: "plan-todo-list",
content: `**Plan Steps (${todoItems.length}):**\n\n${todoListText}`,
display: true,
},
{ triggerTurn: false },
);
}
const choice = await ctx.ui.select("Plan mode - what next?", [
todoItems.length > 0 ? "Execute the plan (track progress)" : "Execute the plan",
"Stay in plan mode",
"Refine the plan",
]);
if (choice?.startsWith("Execute")) {
planModeEnabled = false;
executionMode = todoItems.length > 0;
pi.setActiveTools(NORMAL_MODE_TOOLS);
updateStatus(ctx);
const execMessage =
todoItems.length > 0
? `Execute the plan. Start with: ${todoItems[0].text}`
: "Execute the plan you just created.";
pi.sendMessage(
{ customType: "plan-mode-execute", content: execMessage, display: true },
{ triggerTurn: true },
);
} else if (choice === "Refine the plan") {
const refinement = await ctx.ui.editor("Refine the plan:", "");
if (refinement?.trim()) {
pi.sendUserMessage(refinement.trim());
}
}
});
// Restore state on session start/resume
pi.on("session_start", async (_event, ctx) => {
if (pi.getFlag("plan") === true) {
planModeEnabled = true;
}
const entries = ctx.sessionManager.getEntries();
// Restore persisted state
const planModeEntry = entries
.filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode")
.pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined;
if (planModeEntry?.data) {
planModeEnabled = planModeEntry.data.enabled ?? planModeEnabled;
todoItems = planModeEntry.data.todos ?? todoItems;
executionMode = planModeEntry.data.executing ?? executionMode;
}
// On resume: re-scan messages to rebuild completion state
// Only scan messages AFTER the last "plan-mode-execute" to avoid picking up [DONE:n] from previous plans
const isResume = planModeEntry !== undefined;
if (isResume && executionMode && todoItems.length > 0) {
// Find the index of the last plan-mode-execute entry (marks when current execution started)
let executeIndex = -1;
for (let i = entries.length - 1; i >= 0; i--) {
const entry = entries[i] as { type: string; customType?: string };
if (entry.customType === "plan-mode-execute") {
executeIndex = i;
break;
}
}
// Only scan messages after the execute marker
const messages: AssistantMessage[] = [];
for (let i = executeIndex + 1; i < entries.length; i++) {
const entry = entries[i];
if (entry.type === "message" && "message" in entry && isAssistantMessage(entry.message as AgentMessage)) {
messages.push(entry.message as AssistantMessage);
}
}
const allText = messages.map(getTextContent).join("\n");
markCompletedSteps(allText, todoItems);
}
if (planModeEnabled) {
pi.setActiveTools(PLAN_MODE_TOOLS);
}
updateStatus(ctx);
});
}

View file

@ -0,0 +1,168 @@
/**
* Pure utility functions for plan mode.
* Extracted for testability.
*/
// Destructive commands blocked in plan mode
const DESTRUCTIVE_PATTERNS = [
/\brm\b/i,
/\brmdir\b/i,
/\bmv\b/i,
/\bcp\b/i,
/\bmkdir\b/i,
/\btouch\b/i,
/\bchmod\b/i,
/\bchown\b/i,
/\bchgrp\b/i,
/\bln\b/i,
/\btee\b/i,
/\btruncate\b/i,
/\bdd\b/i,
/\bshred\b/i,
/(^|[^<])>(?!>)/,
/>>/,
/\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
/\byarn\s+(add|remove|install|publish)/i,
/\bpnpm\s+(add|remove|install|publish)/i,
/\bpip\s+(install|uninstall)/i,
/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
/\bbrew\s+(install|uninstall|upgrade)/i,
/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i,
/\bsudo\b/i,
/\bsu\b/i,
/\bkill\b/i,
/\bpkill\b/i,
/\bkillall\b/i,
/\breboot\b/i,
/\bshutdown\b/i,
/\bsystemctl\s+(start|stop|restart|enable|disable)/i,
/\bservice\s+\S+\s+(start|stop|restart)/i,
/\b(vim?|nano|emacs|code|subl)\b/i,
];
// Safe read-only commands allowed in plan mode
const SAFE_PATTERNS = [
/^\s*cat\b/,
/^\s*head\b/,
/^\s*tail\b/,
/^\s*less\b/,
/^\s*more\b/,
/^\s*grep\b/,
/^\s*find\b/,
/^\s*ls\b/,
/^\s*pwd\b/,
/^\s*echo\b/,
/^\s*printf\b/,
/^\s*wc\b/,
/^\s*sort\b/,
/^\s*uniq\b/,
/^\s*diff\b/,
/^\s*file\b/,
/^\s*stat\b/,
/^\s*du\b/,
/^\s*df\b/,
/^\s*tree\b/,
/^\s*which\b/,
/^\s*whereis\b/,
/^\s*type\b/,
/^\s*env\b/,
/^\s*printenv\b/,
/^\s*uname\b/,
/^\s*whoami\b/,
/^\s*id\b/,
/^\s*date\b/,
/^\s*cal\b/,
/^\s*uptime\b/,
/^\s*ps\b/,
/^\s*top\b/,
/^\s*htop\b/,
/^\s*free\b/,
/^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
/^\s*git\s+ls-/i,
/^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
/^\s*yarn\s+(list|info|why|audit)/i,
/^\s*node\s+--version/i,
/^\s*python\s+--version/i,
/^\s*curl\s/i,
/^\s*wget\s+-O\s*-/i,
/^\s*jq\b/,
/^\s*sed\s+-n/i,
/^\s*awk\b/,
/^\s*rg\b/,
/^\s*fd\b/,
/^\s*bat\b/,
/^\s*exa\b/,
];
export function isSafeCommand(command: string): boolean {
const isDestructive = DESTRUCTIVE_PATTERNS.some((p) => p.test(command));
const isSafe = SAFE_PATTERNS.some((p) => p.test(command));
return !isDestructive && isSafe;
}
export interface TodoItem {
step: number;
text: string;
completed: boolean;
}
export function cleanStepText(text: string): string {
let cleaned = text
.replace(/\*{1,2}([^*]+)\*{1,2}/g, "$1") // Remove bold/italic
.replace(/`([^`]+)`/g, "$1") // Remove code
.replace(
/^(Use|Run|Execute|Create|Write|Read|Check|Verify|Update|Modify|Add|Remove|Delete|Install)\s+(the\s+)?/i,
"",
)
.replace(/\s+/g, " ")
.trim();
if (cleaned.length > 0) {
cleaned = cleaned.charAt(0).toUpperCase() + cleaned.slice(1);
}
if (cleaned.length > 50) {
cleaned = `${cleaned.slice(0, 47)}...`;
}
return cleaned;
}
export function extractTodoItems(message: string): TodoItem[] {
const items: TodoItem[] = [];
const headerMatch = message.match(/\*{0,2}Plan:\*{0,2}\s*\n/i);
if (!headerMatch) return items;
const planSection = message.slice(message.indexOf(headerMatch[0]) + headerMatch[0].length);
const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm;
for (const match of planSection.matchAll(numberedPattern)) {
const text = match[2]
.trim()
.replace(/\*{1,2}$/, "")
.trim();
if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) {
const cleaned = cleanStepText(text);
if (cleaned.length > 3) {
items.push({ step: items.length + 1, text: cleaned, completed: false });
}
}
}
return items;
}
export function extractDoneSteps(message: string): number[] {
const steps: number[] = [];
for (const match of message.matchAll(/\[DONE:(\d+)\]/gi)) {
const step = Number(match[1]);
if (Number.isFinite(step)) steps.push(step);
}
return steps;
}
export function markCompletedSteps(text: string, items: TodoItem[]): number {
const doneSteps = extractDoneSteps(text);
for (const step of doneSteps) {
const item = items.find((t) => t.step === step);
if (item) item.completed = true;
}
return doneSteps.length;
}