feat(plan-mode): use ID-based todo tracking with [DONE:id] tags

- Each todo item gets a unique ID (e.g., abc123)
- Agent marks items complete by outputting [DONE:id]
- IDs shown in chat and in execution context
- Agent instructed to output [DONE:id] after each step
- Removed unreliable tool-counting heuristics
This commit is contained in:
Helmut Januschka 2026-01-03 16:20:26 +01:00 committed by Mario Zechner
parent e781c9a466
commit 7a03f57fbe

View file

@ -11,6 +11,7 @@
* - After each agent response, prompts to execute the plan or continue planning * - After each agent response, prompts to execute the plan or continue planning
* - Shows "plan" indicator in footer when active * - Shows "plan" indicator in footer when active
* - Extracts todo list from plan and tracks progress during execution * - Extracts todo list from plan and tracks progress during execution
* - Agent marks steps complete by outputting [DONE:id] tags
* *
* Usage: * Usage:
* 1. Copy this file to ~/.pi/agent/hooks/ or your project's .pi/hooks/ * 1. Copy this file to ~/.pi/agent/hooks/ or your project's .pi/hooks/
@ -28,35 +29,29 @@ const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"];
// Patterns for destructive bash commands that should be blocked in plan mode // Patterns for destructive bash commands that should be blocked in plan mode
const DESTRUCTIVE_PATTERNS = [ const DESTRUCTIVE_PATTERNS = [
// File/directory modification
/\brm\b/i, /\brm\b/i,
/\brmdir\b/i, /\brmdir\b/i,
/\bmv\b/i, /\bmv\b/i,
/\bcp\b/i, // cp can overwrite files /\bcp\b/i,
/\bmkdir\b/i, /\bmkdir\b/i,
/\btouch\b/i, /\btouch\b/i,
/\bchmod\b/i, /\bchmod\b/i,
/\bchown\b/i, /\bchown\b/i,
/\bchgrp\b/i, /\bchgrp\b/i,
/\bln\b/i, // symlinks /\bln\b/i,
// File content modification
/\btee\b/i, /\btee\b/i,
/\btruncate\b/i, /\btruncate\b/i,
/\bdd\b/i, /\bdd\b/i,
/\bshred\b/i, /\bshred\b/i,
// Redirects that write to files /[^<]>(?!>)/,
/[^<]>(?!>)/, // > but not >> or <> />>/,
/>>/, // append
// Package managers / installers
/\bnpm\s+(install|uninstall|update|ci|link|publish)/i, /\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
/\byarn\s+(add|remove|install|publish)/i, /\byarn\s+(add|remove|install|publish)/i,
/\bpnpm\s+(add|remove|install|publish)/i, /\bpnpm\s+(add|remove|install|publish)/i,
/\bpip\s+(install|uninstall)/i, /\bpip\s+(install|uninstall)/i,
/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i, /\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
/\bbrew\s+(install|uninstall|upgrade)/i, /\bbrew\s+(install|uninstall|upgrade)/i,
// Git write operations
/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout\s+-b|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i, /\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout\s+-b|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i,
// Other dangerous commands
/\bsudo\b/i, /\bsudo\b/i,
/\bsu\b/i, /\bsu\b/i,
/\bkill\b/i, /\bkill\b/i,
@ -66,7 +61,6 @@ const DESTRUCTIVE_PATTERNS = [
/\bshutdown\b/i, /\bshutdown\b/i,
/\bsystemctl\s+(start|stop|restart|enable|disable)/i, /\bsystemctl\s+(start|stop|restart|enable|disable)/i,
/\bservice\s+\S+\s+(start|stop|restart)/i, /\bservice\s+\S+\s+(start|stop|restart)/i,
// Editors (interactive, could modify files)
/\b(vim?|nano|emacs|code|subl)\b/i, /\b(vim?|nano|emacs|code|subl)\b/i,
]; ];
@ -113,77 +107,65 @@ const SAFE_COMMANDS = [
/^\s*yarn\s+(list|info|why|audit)/i, /^\s*yarn\s+(list|info|why|audit)/i,
/^\s*node\s+--version/i, /^\s*node\s+--version/i,
/^\s*python\s+--version/i, /^\s*python\s+--version/i,
/^\s*curl\s/i, // curl without -o is usually safe (reading) /^\s*curl\s/i,
/^\s*wget\s+-O\s*-/i, // wget to stdout only /^\s*wget\s+-O\s*-/i,
/^\s*jq\b/, /^\s*jq\b/,
/^\s*sed\s+-n/i, // sed with -n (no auto-print) for reading only /^\s*sed\s+-n/i,
/^\s*awk\b/, /^\s*awk\b/,
/^\s*rg\b/, // ripgrep /^\s*rg\b/,
/^\s*fd\b/, // fd-find /^\s*fd\b/,
/^\s*bat\b/, // bat (cat clone) /^\s*bat\b/,
/^\s*exa\b/, // exa (ls clone) /^\s*exa\b/,
]; ];
/**
* Check if a bash command is safe (read-only) for plan mode.
*/
function isSafeCommand(command: string): boolean { function isSafeCommand(command: string): boolean {
// Check if it's an explicitly safe command
if (SAFE_COMMANDS.some((pattern) => pattern.test(command))) { if (SAFE_COMMANDS.some((pattern) => pattern.test(command))) {
// But still check for destructive patterns (e.g., cat > file)
if (!DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) { if (!DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) {
return true; return true;
} }
} }
// Check for destructive patterns
if (DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) { if (DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) {
return false; return false;
} }
// Allow commands that don't match any destructive pattern
// This is permissive - unknown commands are allowed
return true; return true;
} }
// Todo item for plan execution tracking // Todo item with unique ID
interface TodoItem { interface TodoItem {
id: string;
text: string; text: string;
completed: boolean; completed: boolean;
} }
// Generate a short unique ID
function generateId(): string {
return Math.random().toString(36).substring(2, 8);
}
/** /**
* Extract todo items from assistant message. * Extract todo items from assistant message and assign IDs.
* Looks for numbered lists like:
* 1. First task
* 2. Second task
* Or bullet points with step indicators:
* - Step 1: Do something
* - Step 2: Do another thing
*/ */
function extractTodoItems(message: string): TodoItem[] { function extractTodoItems(message: string): TodoItem[] {
const items: TodoItem[] = []; const items: TodoItem[] = [];
// Match numbered lists: "1. Task" or "1) Task" (handles markdown bold like "1. **Task**") // Match numbered lists: "1. Task" or "1) Task"
const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm; const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm;
for (const match of message.matchAll(numberedPattern)) { for (const match of message.matchAll(numberedPattern)) {
let text = match[2].trim(); let text = match[2].trim();
// Remove trailing ** if present
text = text.replace(/\*{1,2}$/, "").trim(); text = text.replace(/\*{1,2}$/, "").trim();
// Skip if it's just a file path, code reference, or sub-item
if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) { if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) {
items.push({ text, completed: false }); items.push({ id: generateId(), text, completed: false });
} }
} }
// If no numbered items found, try bullet points with "Step" prefix // If no numbered items, try bullet points
if (items.length === 0) { if (items.length === 0) {
const stepPattern = /^\s*[-*]\s*(?:Step\s*\d+[:.])?\s*\*{0,2}([^*\n]+)/gim; const stepPattern = /^\s*[-*]\s*(?:Step\s*\d+[:.])?\s*\*{0,2}([^*\n]+)/gim;
for (const match of message.matchAll(stepPattern)) { for (const match of message.matchAll(stepPattern)) {
let text = match[1].trim(); let text = match[1].trim();
text = text.replace(/\*{1,2}$/, "").trim(); text = text.replace(/\*{1,2}$/, "").trim();
if (text.length > 10 && !text.startsWith("`")) { if (text.length > 10 && !text.startsWith("`")) {
items.push({ text, completed: false }); items.push({ id: generateId(), text, completed: false });
} }
} }
} }
@ -192,37 +174,20 @@ function extractTodoItems(message: string): TodoItem[] {
} }
/** /**
* Try to match a tool call or message to a todo item. * Find [DONE:id] tags in text and return the IDs.
* Returns the index of the matching item, or -1 if no match.
*/ */
function matchTodoItem(todos: TodoItem[], action: string): number { function findDoneTags(text: string): string[] {
const actionLower = action.toLowerCase(); const pattern = /\[DONE:([a-z0-9]+)\]/gi;
const ids: string[] = [];
for (let i = 0; i < todos.length; i++) { for (const match of text.matchAll(pattern)) {
if (todos[i].completed) continue; ids.push(match[1].toLowerCase());
const todoLower = todos[i].text.toLowerCase();
// Check for keyword overlap
const todoWords = todoLower.split(/\s+/).filter((w) => w.length > 3);
const matchCount = todoWords.filter((w) => actionLower.includes(w)).length;
// If more than 30% of significant words match, consider it a match
if (todoWords.length > 0 && matchCount / todoWords.length > 0.3) {
return i;
}
} }
return ids;
return -1;
} }
export default function planModeHook(pi: HookAPI) { export default function planModeHook(pi: HookAPI) {
// Track plan mode state
let planModeEnabled = false; let planModeEnabled = false;
// Track execution mode (after plan confirmed)
let executionMode = false; let executionMode = false;
// Todo list extracted from plan
let todoItems: TodoItem[] = []; let todoItems: TodoItem[] = [];
// Register --plan CLI flag // Register --plan CLI flag
@ -234,7 +199,6 @@ export default function planModeHook(pi: HookAPI) {
// Helper to update status displays // Helper to update status displays
function updateStatus(ctx: HookContext) { function updateStatus(ctx: HookContext) {
// Update footer status
if (executionMode && todoItems.length > 0) { if (executionMode && todoItems.length > 0) {
const completed = todoItems.filter((t) => t.completed).length; const completed = todoItems.filter((t) => t.completed).length;
ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`)); ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`));
@ -244,7 +208,7 @@ export default function planModeHook(pi: HookAPI) {
ctx.ui.setStatus("plan-mode", undefined); ctx.ui.setStatus("plan-mode", undefined);
} }
// Update widget with todo list (only during execution mode) // Show widget during execution
if (executionMode && todoItems.length > 0) { if (executionMode && todoItems.length > 0) {
const lines: string[] = []; const lines: string[] = [];
for (const item of todoItems) { for (const item of todoItems) {
@ -262,7 +226,6 @@ export default function planModeHook(pi: HookAPI) {
} }
} }
// Helper to toggle plan mode
function togglePlanMode(ctx: HookContext) { function togglePlanMode(ctx: HookContext) {
planModeEnabled = !planModeEnabled; planModeEnabled = !planModeEnabled;
executionMode = false; executionMode = false;
@ -286,7 +249,7 @@ export default function planModeHook(pi: HookAPI) {
}, },
}); });
// Register /todos command to show current todo list // Register /todos command
pi.registerCommand("todos", { pi.registerCommand("todos", {
description: "Show current plan todo list", description: "Show current plan todo list",
handler: async (_args, ctx) => { handler: async (_args, ctx) => {
@ -296,12 +259,9 @@ export default function planModeHook(pi: HookAPI) {
} }
const todoList = todoItems const todoList = todoItems
.map((item, i) => { .map((item) => {
const checkbox = item.completed ? "✓" : "○"; const checkbox = item.completed ? "✓" : "○";
const style = item.completed return `[${item.id}] ${checkbox} ${item.text}`;
? ctx.ui.theme.fg("muted", `${checkbox} ${item.text}`)
: `${checkbox} ${item.text}`;
return `${i + 1}. ${style}`;
}) })
.join("\n"); .join("\n");
@ -317,18 +277,8 @@ export default function planModeHook(pi: HookAPI) {
}, },
}); });
// Block destructive bash commands in plan mode // Block destructive bash in plan mode
pi.on("tool_call", async (event, ctx) => { pi.on("tool_call", async (event) => {
// Track progress in execution mode
if (executionMode && todoItems.length > 0) {
const action = `${event.toolName}: ${JSON.stringify(event.input).slice(0, 200)}`;
const matchIdx = matchTodoItem(todoItems, action);
if (matchIdx >= 0) {
todoItems[matchIdx].completed = true;
updateStatus(ctx);
}
}
if (!planModeEnabled) return; if (!planModeEnabled) return;
if (event.toolName !== "bash") return; if (event.toolName !== "bash") return;
@ -341,48 +291,89 @@ export default function planModeHook(pi: HookAPI) {
} }
}); });
// Inject plan mode context at the start of each turn via before_agent_start // Check for [DONE:id] tags after each tool result (agent may output them in tool-related text)
pi.on("before_agent_start", async () => { pi.on("tool_result", async (_event, ctx) => {
if (!planModeEnabled) return; if (!executionMode || todoItems.length === 0) return;
// The actual checking happens in agent_end when we have the full message
// But we update status here to keep UI responsive
updateStatus(ctx);
});
// Return a message to inject into context // Inject plan mode context
return { pi.on("before_agent_start", async () => {
message: { if (!planModeEnabled && !executionMode) return;
customType: "plan-mode-context",
content: `[PLAN MODE ACTIVE] if (planModeEnabled) {
return {
message: {
customType: "plan-mode-context",
content: `[PLAN MODE ACTIVE]
You are in plan mode - a read-only exploration mode for safe code analysis. You are in plan mode - a read-only exploration mode for safe code analysis.
Restrictions: Restrictions:
- You can only use: read, bash, grep, find, ls - You can only use: read, bash, grep, find, ls
- You CANNOT use: edit, write (file modifications are disabled) - You CANNOT use: edit, write (file modifications are disabled)
- Bash is restricted to READ-ONLY commands (cat, ls, grep, git status, etc.) - Bash is restricted to READ-ONLY commands
- Destructive bash commands are BLOCKED (rm, mv, cp, git commit, npm install, etc.)
- Focus on analysis, planning, and understanding the codebase - Focus on analysis, planning, and understanding the codebase
Your task is to explore, analyze, and create a detailed plan. Create a detailed numbered plan:
IMPORTANT: When you have a complete plan, format it as a numbered list:
1. First step description 1. First step description
2. Second step description 2. Second step description
3. Third step description
... ...
This format allows tracking progress during execution.
Do NOT attempt to make changes - just describe what you would do.`, Do NOT attempt to make changes - just describe what you would do.`,
display: false, // Don't show in TUI, just inject into context display: false,
}, },
}; };
}
if (executionMode && todoItems.length > 0) {
const todoList = todoItems.map((t) => `- [${t.id}] ${t.completed ? "☑" : "☐"} ${t.text}`).join("\n");
return {
message: {
customType: "plan-execution-context",
content: `[EXECUTING PLAN]
You have a plan with ${todoItems.length} steps. After completing each step, output [DONE:id] to mark it complete.
Current plan status:
${todoList}
IMPORTANT: After completing each step, output [DONE:id] where id is the step's ID (e.g., [DONE:${todoItems.find((t) => !t.completed)?.id || todoItems[0].id}]).`,
display: false,
},
};
}
}); });
// After agent finishes, offer to execute the plan // After agent finishes in plan mode
pi.on("agent_end", async (event, ctx) => { pi.on("agent_end", async (event, ctx) => {
// In execution mode, check if all todos are complete // Check for done tags in the final message too
if (executionMode && todoItems.length > 0) { if (executionMode && todoItems.length > 0) {
const messages = event.messages;
const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant");
if (lastAssistant && Array.isArray(lastAssistant.content)) {
const textContent = lastAssistant.content
.filter((block): block is { type: "text"; text: string } => block.type === "text")
.map((block) => block.text)
.join("\n");
const doneIds = findDoneTags(textContent);
for (const id of doneIds) {
const item = todoItems.find((t) => t.id === id);
if (item && !item.completed) {
item.completed = true;
}
}
updateStatus(ctx);
}
// Check if all complete
const allComplete = todoItems.every((t) => t.completed); const allComplete = todoItems.every((t) => t.completed);
if (allComplete) { if (allComplete) {
ctx.ui.notify("Plan execution complete!", "info"); ctx.ui.notify("Plan execution complete!", "info");
executionMode = false; executionMode = false;
todoItems = []; todoItems = [];
pi.setTools(NORMAL_MODE_TOOLS);
updateStatus(ctx); updateStatus(ctx);
} }
return; return;
@ -391,11 +382,10 @@ Do NOT attempt to make changes - just describe what you would do.`,
if (!planModeEnabled) return; if (!planModeEnabled) return;
if (!ctx.hasUI) return; if (!ctx.hasUI) return;
// Try to extract todo items from the last message // Extract todos from last message
const messages = event.messages; const messages = event.messages;
const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant"); const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant");
if (lastAssistant && Array.isArray(lastAssistant.content)) { if (lastAssistant && Array.isArray(lastAssistant.content)) {
// Extract text from content blocks
const textContent = lastAssistant.content const textContent = lastAssistant.content
.filter((block): block is { type: "text"; text: string } => block.type === "text") .filter((block): block is { type: "text"; text: string } => block.type === "text")
.map((block) => block.text) .map((block) => block.text)
@ -411,9 +401,9 @@ Do NOT attempt to make changes - just describe what you would do.`,
const hasTodos = todoItems.length > 0; const hasTodos = todoItems.length > 0;
// Show todo list in chat if we extracted items // Show todo list in chat with IDs
if (hasTodos) { if (hasTodos) {
const todoListText = todoItems.map((t, i) => `${i + 1}. ${t.text}`).join("\n"); const todoListText = todoItems.map((t) => `☐ [${t.id}] ${t.text}`).join("\n");
pi.sendMessage( pi.sendMessage(
{ {
customType: "plan-todo-list", customType: "plan-todo-list",
@ -431,15 +421,13 @@ Do NOT attempt to make changes - just describe what you would do.`,
]); ]);
if (choice?.startsWith("Execute")) { if (choice?.startsWith("Execute")) {
// Switch to normal mode
planModeEnabled = false; planModeEnabled = false;
executionMode = hasTodos; executionMode = hasTodos;
pi.setTools(NORMAL_MODE_TOOLS); pi.setTools(NORMAL_MODE_TOOLS);
updateStatus(ctx); // This will now show the widget during execution updateStatus(ctx);
// Send message to trigger execution immediately
const execMessage = hasTodos const execMessage = hasTodos
? `Execute the plan you just created. There are ${todoItems.length} steps to complete. Proceed step by step, announcing each step as you work on it.` ? `Execute the plan. After completing each step, output [DONE:id] where id is the step's ID. Start with step [${todoItems[0].id}]: ${todoItems[0].text}`
: "Execute the plan you just created. Proceed step by step."; : "Execute the plan you just created. Proceed step by step.";
pi.sendMessage( pi.sendMessage(
@ -456,23 +444,19 @@ Do NOT attempt to make changes - just describe what you would do.`,
ctx.ui.setEditorText(refinement); ctx.ui.setEditorText(refinement);
} }
} }
// "Stay in plan mode" - do nothing, just continue
}); });
// Initialize plan mode state on session start // Initialize state on session start
pi.on("session_start", async (_event, ctx) => { pi.on("session_start", async (_event, ctx) => {
// Check --plan flag first
if (pi.getFlag("plan") === true) { if (pi.getFlag("plan") === true) {
planModeEnabled = true; planModeEnabled = true;
} }
// Check if there's persisted plan mode state (from previous session)
const entries = ctx.sessionManager.getEntries(); const entries = ctx.sessionManager.getEntries();
const planModeEntry = entries const planModeEntry = entries
.filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode") .filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode")
.pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined; .pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined;
// Restore from session (overrides flag if session has state)
if (planModeEntry?.data) { if (planModeEntry?.data) {
if (planModeEntry.data.enabled !== undefined) { if (planModeEntry.data.enabled !== undefined) {
planModeEnabled = planModeEntry.data.enabled; planModeEnabled = planModeEntry.data.enabled;
@ -485,16 +469,14 @@ Do NOT attempt to make changes - just describe what you would do.`,
} }
} }
// Apply initial state if plan mode is enabled
if (planModeEnabled) { if (planModeEnabled) {
pi.setTools(PLAN_MODE_TOOLS); pi.setTools(PLAN_MODE_TOOLS);
} }
updateStatus(ctx); updateStatus(ctx);
}); });
// Save state when plan mode changes (via tool_call or other events) // Persist state
pi.on("turn_start", async () => { pi.on("turn_start", async () => {
// Persist current state including todos
pi.appendEntry("plan-mode", { pi.appendEntry("plan-mode", {
enabled: planModeEnabled, enabled: planModeEnabled,
todos: todoItems, todos: todoItems,