mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-17 06:04:51 +00:00
feat(plan-mode): enhanced plan mode with explicit step tracking (#694)
Changes from the original: - Explicit [DONE:n] tag tracking (more accurate than auto-marking on tool_result) - Plan: header requirement - only extracts todos from 'Plan:' sections - Utils extracted to separate file for testability - Better session resume - only scans messages after plan-mode-execute marker - Context filtering - properly filters plan-mode-context custom type messages - Refactored to directory structure (index.ts + utils.ts + README.md) The original auto-completed steps on every tool_result, which was inaccurate for multi-tool steps. This version uses explicit [DONE:n] markers that the agent outputs after completing each step.
This commit is contained in:
parent
a7a863c792
commit
e8f1322eee
5 changed files with 834 additions and 548 deletions
|
|
@ -1,548 +0,0 @@
|
||||||
/**
|
|
||||||
* Plan Mode Extension
|
|
||||||
*
|
|
||||||
* Provides a Claude Code-style "plan mode" for safe code exploration.
|
|
||||||
* When enabled, the agent can only use read-only tools and cannot modify files.
|
|
||||||
*
|
|
||||||
* Features:
|
|
||||||
* - /plan command to toggle plan mode
|
|
||||||
* - In plan mode: only read, bash (read-only), grep, find, ls are available
|
|
||||||
* - Injects system context telling the agent about the restrictions
|
|
||||||
* - After each agent response, prompts to execute the plan or continue planning
|
|
||||||
* - Shows "plan" indicator in footer when active
|
|
||||||
* - Extracts todo list from plan and tracks progress during execution
|
|
||||||
* - Uses ID-based tracking: agent outputs [DONE:id] to mark steps complete
|
|
||||||
*
|
|
||||||
* Usage:
|
|
||||||
* 1. Copy this file to ~/.pi/agent/extensions/ or your project's .pi/extensions/
|
|
||||||
* 2. Use /plan to toggle plan mode on/off
|
|
||||||
* 3. Or start in plan mode with --plan flag
|
|
||||||
*/
|
|
||||||
|
|
||||||
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
||||||
import { Key } from "@mariozechner/pi-tui";
|
|
||||||
|
|
||||||
// Read-only tools for plan mode
|
|
||||||
const PLAN_MODE_TOOLS = ["read", "bash", "grep", "find", "ls"];
|
|
||||||
|
|
||||||
// Full set of tools for normal mode
|
|
||||||
const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"];
|
|
||||||
|
|
||||||
// Patterns for destructive bash commands that should be blocked in plan mode
|
|
||||||
const DESTRUCTIVE_PATTERNS = [
|
|
||||||
/\brm\b/i,
|
|
||||||
/\brmdir\b/i,
|
|
||||||
/\bmv\b/i,
|
|
||||||
/\bcp\b/i,
|
|
||||||
/\bmkdir\b/i,
|
|
||||||
/\btouch\b/i,
|
|
||||||
/\bchmod\b/i,
|
|
||||||
/\bchown\b/i,
|
|
||||||
/\bchgrp\b/i,
|
|
||||||
/\bln\b/i,
|
|
||||||
/\btee\b/i,
|
|
||||||
/\btruncate\b/i,
|
|
||||||
/\bdd\b/i,
|
|
||||||
/\bshred\b/i,
|
|
||||||
/[^<]>(?!>)/,
|
|
||||||
/>>/,
|
|
||||||
/\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
|
|
||||||
/\byarn\s+(add|remove|install|publish)/i,
|
|
||||||
/\bpnpm\s+(add|remove|install|publish)/i,
|
|
||||||
/\bpip\s+(install|uninstall)/i,
|
|
||||||
/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
|
|
||||||
/\bbrew\s+(install|uninstall|upgrade)/i,
|
|
||||||
/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout\s+-b|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i,
|
|
||||||
/\bsudo\b/i,
|
|
||||||
/\bsu\b/i,
|
|
||||||
/\bkill\b/i,
|
|
||||||
/\bpkill\b/i,
|
|
||||||
/\bkillall\b/i,
|
|
||||||
/\breboot\b/i,
|
|
||||||
/\bshutdown\b/i,
|
|
||||||
/\bsystemctl\s+(start|stop|restart|enable|disable)/i,
|
|
||||||
/\bservice\s+\S+\s+(start|stop|restart)/i,
|
|
||||||
/\b(vim?|nano|emacs|code|subl)\b/i,
|
|
||||||
];
|
|
||||||
|
|
||||||
// Read-only commands that are always safe
|
|
||||||
const SAFE_COMMANDS = [
|
|
||||||
/^\s*cat\b/,
|
|
||||||
/^\s*head\b/,
|
|
||||||
/^\s*tail\b/,
|
|
||||||
/^\s*less\b/,
|
|
||||||
/^\s*more\b/,
|
|
||||||
/^\s*grep\b/,
|
|
||||||
/^\s*find\b/,
|
|
||||||
/^\s*ls\b/,
|
|
||||||
/^\s*pwd\b/,
|
|
||||||
/^\s*echo\b/,
|
|
||||||
/^\s*printf\b/,
|
|
||||||
/^\s*wc\b/,
|
|
||||||
/^\s*sort\b/,
|
|
||||||
/^\s*uniq\b/,
|
|
||||||
/^\s*diff\b/,
|
|
||||||
/^\s*file\b/,
|
|
||||||
/^\s*stat\b/,
|
|
||||||
/^\s*du\b/,
|
|
||||||
/^\s*df\b/,
|
|
||||||
/^\s*tree\b/,
|
|
||||||
/^\s*which\b/,
|
|
||||||
/^\s*whereis\b/,
|
|
||||||
/^\s*type\b/,
|
|
||||||
/^\s*env\b/,
|
|
||||||
/^\s*printenv\b/,
|
|
||||||
/^\s*uname\b/,
|
|
||||||
/^\s*whoami\b/,
|
|
||||||
/^\s*id\b/,
|
|
||||||
/^\s*date\b/,
|
|
||||||
/^\s*cal\b/,
|
|
||||||
/^\s*uptime\b/,
|
|
||||||
/^\s*ps\b/,
|
|
||||||
/^\s*top\b/,
|
|
||||||
/^\s*htop\b/,
|
|
||||||
/^\s*free\b/,
|
|
||||||
/^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
|
|
||||||
/^\s*git\s+ls-/i,
|
|
||||||
/^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
|
|
||||||
/^\s*yarn\s+(list|info|why|audit)/i,
|
|
||||||
/^\s*node\s+--version/i,
|
|
||||||
/^\s*python\s+--version/i,
|
|
||||||
/^\s*curl\s/i,
|
|
||||||
/^\s*wget\s+-O\s*-/i,
|
|
||||||
/^\s*jq\b/,
|
|
||||||
/^\s*sed\s+-n/i,
|
|
||||||
/^\s*awk\b/,
|
|
||||||
/^\s*rg\b/,
|
|
||||||
/^\s*fd\b/,
|
|
||||||
/^\s*bat\b/,
|
|
||||||
/^\s*exa\b/,
|
|
||||||
];
|
|
||||||
|
|
||||||
function isSafeCommand(command: string): boolean {
|
|
||||||
if (SAFE_COMMANDS.some((pattern) => pattern.test(command))) {
|
|
||||||
if (!DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (DESTRUCTIVE_PATTERNS.some((pattern) => pattern.test(command))) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Todo item with step number
|
|
||||||
interface TodoItem {
|
|
||||||
step: number;
|
|
||||||
text: string;
|
|
||||||
completed: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clean up extracted step text for display.
|
|
||||||
*/
|
|
||||||
function cleanStepText(text: string): string {
|
|
||||||
let cleaned = text
|
|
||||||
// Remove markdown bold/italic
|
|
||||||
.replace(/\*{1,2}([^*]+)\*{1,2}/g, "$1")
|
|
||||||
// Remove markdown code
|
|
||||||
.replace(/`([^`]+)`/g, "$1")
|
|
||||||
// Remove leading action words that are redundant
|
|
||||||
.replace(
|
|
||||||
/^(Use|Run|Execute|Create|Write|Read|Check|Verify|Update|Modify|Add|Remove|Delete|Install)\s+(the\s+)?/i,
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
// Clean up extra whitespace
|
|
||||||
.replace(/\s+/g, " ")
|
|
||||||
.trim();
|
|
||||||
|
|
||||||
// Capitalize first letter
|
|
||||||
if (cleaned.length > 0) {
|
|
||||||
cleaned = cleaned.charAt(0).toUpperCase() + cleaned.slice(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Truncate if too long
|
|
||||||
if (cleaned.length > 50) {
|
|
||||||
cleaned = `${cleaned.slice(0, 47)}...`;
|
|
||||||
}
|
|
||||||
|
|
||||||
return cleaned;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extract todo items from assistant message.
|
|
||||||
*/
|
|
||||||
function extractTodoItems(message: string): TodoItem[] {
|
|
||||||
const items: TodoItem[] = [];
|
|
||||||
|
|
||||||
// Match numbered lists: "1. Task" or "1) Task" - also handle **bold** prefixes
|
|
||||||
const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm;
|
|
||||||
for (const match of message.matchAll(numberedPattern)) {
|
|
||||||
let text = match[2].trim();
|
|
||||||
text = text.replace(/\*{1,2}$/, "").trim();
|
|
||||||
// Skip if too short or looks like code/command
|
|
||||||
if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) {
|
|
||||||
const cleaned = cleanStepText(text);
|
|
||||||
if (cleaned.length > 3) {
|
|
||||||
items.push({ step: items.length + 1, text: cleaned, completed: false });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no numbered items, try bullet points
|
|
||||||
if (items.length === 0) {
|
|
||||||
const stepPattern = /^\s*[-*]\s*(?:Step\s*\d+[:.])?\s*\*{0,2}([^*\n]+)/gim;
|
|
||||||
for (const match of message.matchAll(stepPattern)) {
|
|
||||||
let text = match[1].trim();
|
|
||||||
text = text.replace(/\*{1,2}$/, "").trim();
|
|
||||||
if (text.length > 10 && !text.startsWith("`")) {
|
|
||||||
const cleaned = cleanStepText(text);
|
|
||||||
if (cleaned.length > 3) {
|
|
||||||
items.push({ step: items.length + 1, text: cleaned, completed: false });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return items;
|
|
||||||
}
|
|
||||||
|
|
||||||
export default function planModeExtension(pi: ExtensionAPI) {
|
|
||||||
let planModeEnabled = false;
|
|
||||||
let toolsCalledThisTurn = false;
|
|
||||||
let executionMode = false;
|
|
||||||
let todoItems: TodoItem[] = [];
|
|
||||||
|
|
||||||
// Register --plan CLI flag
|
|
||||||
pi.registerFlag("plan", {
|
|
||||||
description: "Start in plan mode (read-only exploration)",
|
|
||||||
type: "boolean",
|
|
||||||
default: false,
|
|
||||||
});
|
|
||||||
|
|
||||||
// Helper to update status displays
|
|
||||||
function updateStatus(ctx: ExtensionContext) {
|
|
||||||
if (executionMode && todoItems.length > 0) {
|
|
||||||
const completed = todoItems.filter((t) => t.completed).length;
|
|
||||||
ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`));
|
|
||||||
} else if (planModeEnabled) {
|
|
||||||
ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("warning", "⏸ plan"));
|
|
||||||
} else {
|
|
||||||
ctx.ui.setStatus("plan-mode", undefined);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Show widget during execution (no IDs shown to user)
|
|
||||||
if (executionMode && todoItems.length > 0) {
|
|
||||||
const lines: string[] = [];
|
|
||||||
for (const item of todoItems) {
|
|
||||||
if (item.completed) {
|
|
||||||
lines.push(
|
|
||||||
ctx.ui.theme.fg("success", "☑ ") + ctx.ui.theme.fg("muted", ctx.ui.theme.strikethrough(item.text)),
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
lines.push(ctx.ui.theme.fg("muted", "☐ ") + item.text);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ctx.ui.setWidget("plan-todos", lines);
|
|
||||||
} else {
|
|
||||||
ctx.ui.setWidget("plan-todos", undefined);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function togglePlanMode(ctx: ExtensionContext) {
|
|
||||||
planModeEnabled = !planModeEnabled;
|
|
||||||
executionMode = false;
|
|
||||||
todoItems = [];
|
|
||||||
|
|
||||||
if (planModeEnabled) {
|
|
||||||
pi.setActiveTools(PLAN_MODE_TOOLS);
|
|
||||||
ctx.ui.notify(`Plan mode enabled. Tools: ${PLAN_MODE_TOOLS.join(", ")}`);
|
|
||||||
} else {
|
|
||||||
pi.setActiveTools(NORMAL_MODE_TOOLS);
|
|
||||||
ctx.ui.notify("Plan mode disabled. Full access restored.");
|
|
||||||
}
|
|
||||||
updateStatus(ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Register /plan command
|
|
||||||
pi.registerCommand("plan", {
|
|
||||||
description: "Toggle plan mode (read-only exploration)",
|
|
||||||
handler: async (_args, ctx) => {
|
|
||||||
togglePlanMode(ctx);
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
// Register /todos command
|
|
||||||
pi.registerCommand("todos", {
|
|
||||||
description: "Show current plan todo list",
|
|
||||||
handler: async (_args, ctx) => {
|
|
||||||
if (todoItems.length === 0) {
|
|
||||||
ctx.ui.notify("No todos. Create a plan first with /plan", "info");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const todoList = todoItems
|
|
||||||
.map((item, i) => {
|
|
||||||
const checkbox = item.completed ? "✓" : "○";
|
|
||||||
return `${i + 1}. ${checkbox} ${item.text}`;
|
|
||||||
})
|
|
||||||
.join("\n");
|
|
||||||
|
|
||||||
ctx.ui.notify(`Plan Progress:\n${todoList}`, "info");
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
// Register Shift+P shortcut
|
|
||||||
pi.registerShortcut(Key.shift("p"), {
|
|
||||||
description: "Toggle plan mode",
|
|
||||||
handler: async (ctx) => {
|
|
||||||
togglePlanMode(ctx);
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
// Block destructive bash in plan mode
|
|
||||||
pi.on("tool_call", async (event) => {
|
|
||||||
if (!planModeEnabled) return;
|
|
||||||
if (event.toolName !== "bash") return;
|
|
||||||
|
|
||||||
const command = event.input.command as string;
|
|
||||||
if (!isSafeCommand(command)) {
|
|
||||||
return {
|
|
||||||
block: true,
|
|
||||||
reason: `Plan mode: destructive command blocked. Use /plan to disable plan mode first.\nCommand: ${command}`,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Track step completion based on tool results
|
|
||||||
pi.on("tool_result", async (_event, ctx) => {
|
|
||||||
toolsCalledThisTurn = true;
|
|
||||||
|
|
||||||
if (!executionMode || todoItems.length === 0) return;
|
|
||||||
|
|
||||||
// Mark the first uncompleted step as done when any tool succeeds
|
|
||||||
const nextStep = todoItems.find((t) => !t.completed);
|
|
||||||
if (nextStep) {
|
|
||||||
nextStep.completed = true;
|
|
||||||
updateStatus(ctx);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Filter out stale plan mode context messages from LLM context
|
|
||||||
// This ensures the agent only sees the CURRENT state (plan mode on/off)
|
|
||||||
pi.on("context", async (event) => {
|
|
||||||
// Only filter when NOT in plan mode (i.e., when executing)
|
|
||||||
if (planModeEnabled) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove any previous plan-mode-context messages
|
|
||||||
const _beforeCount = event.messages.length;
|
|
||||||
const filtered = event.messages.filter((m) => {
|
|
||||||
if (m.role === "user" && Array.isArray(m.content)) {
|
|
||||||
const hasOldContext = m.content.some((c) => c.type === "text" && c.text.includes("[PLAN MODE ACTIVE]"));
|
|
||||||
if (hasOldContext) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
});
|
|
||||||
return { messages: filtered };
|
|
||||||
});
|
|
||||||
|
|
||||||
// Inject plan mode context
|
|
||||||
pi.on("before_agent_start", async () => {
|
|
||||||
if (!planModeEnabled && !executionMode) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (planModeEnabled) {
|
|
||||||
return {
|
|
||||||
message: {
|
|
||||||
customType: "plan-mode-context",
|
|
||||||
content: `[PLAN MODE ACTIVE]
|
|
||||||
You are in plan mode - a read-only exploration mode for safe code analysis.
|
|
||||||
|
|
||||||
Restrictions:
|
|
||||||
- You can only use: read, bash, grep, find, ls
|
|
||||||
- You CANNOT use: edit, write (file modifications are disabled)
|
|
||||||
- Bash is restricted to READ-ONLY commands
|
|
||||||
- Focus on analysis, planning, and understanding the codebase
|
|
||||||
|
|
||||||
Create a detailed numbered plan:
|
|
||||||
1. First step description
|
|
||||||
2. Second step description
|
|
||||||
...
|
|
||||||
|
|
||||||
Do NOT attempt to make changes - just describe what you would do.`,
|
|
||||||
display: false,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
if (executionMode && todoItems.length > 0) {
|
|
||||||
const remaining = todoItems.filter((t) => !t.completed);
|
|
||||||
const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n");
|
|
||||||
return {
|
|
||||||
message: {
|
|
||||||
customType: "plan-execution-context",
|
|
||||||
content: `[EXECUTING PLAN - Full tool access enabled]
|
|
||||||
|
|
||||||
Remaining steps:
|
|
||||||
${todoList}
|
|
||||||
|
|
||||||
Execute each step in order.`,
|
|
||||||
display: false,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// After agent finishes
|
|
||||||
pi.on("agent_end", async (event, ctx) => {
|
|
||||||
// In execution mode, check if all steps complete
|
|
||||||
if (executionMode && todoItems.length > 0) {
|
|
||||||
const allComplete = todoItems.every((t) => t.completed);
|
|
||||||
if (allComplete) {
|
|
||||||
// Show final completed list in chat
|
|
||||||
const completedList = todoItems.map((t) => `~~${t.text}~~`).join("\n");
|
|
||||||
pi.sendMessage(
|
|
||||||
{
|
|
||||||
customType: "plan-complete",
|
|
||||||
content: `**Plan Complete!** ✓\n\n${completedList}`,
|
|
||||||
display: true,
|
|
||||||
},
|
|
||||||
{ triggerTurn: false },
|
|
||||||
);
|
|
||||||
|
|
||||||
executionMode = false;
|
|
||||||
todoItems = [];
|
|
||||||
pi.setActiveTools(NORMAL_MODE_TOOLS);
|
|
||||||
updateStatus(ctx);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!planModeEnabled) return;
|
|
||||||
if (!ctx.hasUI) return;
|
|
||||||
|
|
||||||
// Extract todos from last message
|
|
||||||
const messages = event.messages;
|
|
||||||
const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant");
|
|
||||||
if (lastAssistant && Array.isArray(lastAssistant.content)) {
|
|
||||||
const textContent = lastAssistant.content
|
|
||||||
.filter((block): block is { type: "text"; text: string } => block.type === "text")
|
|
||||||
.map((block) => block.text)
|
|
||||||
.join("\n");
|
|
||||||
|
|
||||||
if (textContent) {
|
|
||||||
const extracted = extractTodoItems(textContent);
|
|
||||||
if (extracted.length > 0) {
|
|
||||||
todoItems = extracted;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const hasTodos = todoItems.length > 0;
|
|
||||||
|
|
||||||
// Show todo list in chat (no IDs shown to user, just numbered)
|
|
||||||
if (hasTodos) {
|
|
||||||
const todoListText = todoItems.map((t, i) => `${i + 1}. ☐ ${t.text}`).join("\n");
|
|
||||||
pi.sendMessage(
|
|
||||||
{
|
|
||||||
customType: "plan-todo-list",
|
|
||||||
content: `**Plan Steps (${todoItems.length}):**\n\n${todoListText}`,
|
|
||||||
display: true,
|
|
||||||
},
|
|
||||||
{ triggerTurn: false },
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
const choice = await ctx.ui.select("Plan mode - what next?", [
|
|
||||||
hasTodos ? "Execute the plan (track progress)" : "Execute the plan",
|
|
||||||
"Stay in plan mode",
|
|
||||||
"Refine the plan",
|
|
||||||
]);
|
|
||||||
|
|
||||||
if (choice?.startsWith("Execute")) {
|
|
||||||
planModeEnabled = false;
|
|
||||||
executionMode = hasTodos;
|
|
||||||
pi.setActiveTools(NORMAL_MODE_TOOLS);
|
|
||||||
updateStatus(ctx);
|
|
||||||
|
|
||||||
// Simple execution message - context event filters old plan mode messages
|
|
||||||
// and before_agent_start injects fresh execution context with IDs
|
|
||||||
const execMessage = hasTodos
|
|
||||||
? `Execute the plan. Start with: ${todoItems[0].text}`
|
|
||||||
: "Execute the plan you just created.";
|
|
||||||
|
|
||||||
pi.sendMessage(
|
|
||||||
{
|
|
||||||
customType: "plan-mode-execute",
|
|
||||||
content: execMessage,
|
|
||||||
display: true,
|
|
||||||
},
|
|
||||||
{ triggerTurn: true },
|
|
||||||
);
|
|
||||||
} else if (choice === "Refine the plan") {
|
|
||||||
const refinement = await ctx.ui.input("What should be refined?");
|
|
||||||
if (refinement) {
|
|
||||||
ctx.ui.setEditorText(refinement);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Initialize state on session start
|
|
||||||
pi.on("session_start", async (_event, ctx) => {
|
|
||||||
if (pi.getFlag("plan") === true) {
|
|
||||||
planModeEnabled = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const entries = ctx.sessionManager.getEntries();
|
|
||||||
const planModeEntry = entries
|
|
||||||
.filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode")
|
|
||||||
.pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined;
|
|
||||||
|
|
||||||
if (planModeEntry?.data) {
|
|
||||||
if (planModeEntry.data.enabled !== undefined) {
|
|
||||||
planModeEnabled = planModeEntry.data.enabled;
|
|
||||||
}
|
|
||||||
if (planModeEntry.data.todos) {
|
|
||||||
todoItems = planModeEntry.data.todos;
|
|
||||||
}
|
|
||||||
if (planModeEntry.data.executing) {
|
|
||||||
executionMode = planModeEntry.data.executing;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (planModeEnabled) {
|
|
||||||
pi.setActiveTools(PLAN_MODE_TOOLS);
|
|
||||||
}
|
|
||||||
updateStatus(ctx);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Reset tool tracking at start of each turn and persist state
|
|
||||||
pi.on("turn_start", async () => {
|
|
||||||
toolsCalledThisTurn = false;
|
|
||||||
pi.appendEntry("plan-mode", {
|
|
||||||
enabled: planModeEnabled,
|
|
||||||
todos: todoItems,
|
|
||||||
executing: executionMode,
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
// Handle non-tool turns (e.g., analysis, explanation steps)
|
|
||||||
pi.on("turn_end", async (_event, ctx) => {
|
|
||||||
if (!executionMode || todoItems.length === 0) return;
|
|
||||||
|
|
||||||
// If no tools were called this turn, the agent was doing analysis/explanation
|
|
||||||
// Mark the next uncompleted step as done
|
|
||||||
if (!toolsCalledThisTurn) {
|
|
||||||
const nextStep = todoItems.find((t) => !t.completed);
|
|
||||||
if (nextStep) {
|
|
||||||
nextStep.completed = true;
|
|
||||||
updateStatus(ctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,65 @@
|
||||||
|
# Plan Mode Extension
|
||||||
|
|
||||||
|
Read-only exploration mode for safe code analysis.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- **Read-only tools**: Restricts available tools to read, bash, grep, find, ls, question
|
||||||
|
- **Bash allowlist**: Only read-only bash commands are allowed
|
||||||
|
- **Plan extraction**: Extracts numbered steps from `Plan:` sections
|
||||||
|
- **Progress tracking**: Widget shows completion status during execution
|
||||||
|
- **[DONE:n] markers**: Explicit step completion tracking
|
||||||
|
- **Session persistence**: State survives session resume
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
- `/plan` - Toggle plan mode
|
||||||
|
- `/todos` - Show current plan progress
|
||||||
|
- `Shift+P` - Toggle plan mode (shortcut)
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
1. Enable plan mode with `/plan` or `--plan` flag
|
||||||
|
2. Ask the agent to analyze code and create a plan
|
||||||
|
3. The agent should output a numbered plan under a `Plan:` header:
|
||||||
|
|
||||||
|
```
|
||||||
|
Plan:
|
||||||
|
1. First step description
|
||||||
|
2. Second step description
|
||||||
|
3. Third step description
|
||||||
|
```
|
||||||
|
|
||||||
|
4. Choose "Execute the plan" when prompted
|
||||||
|
5. During execution, the agent marks steps complete with `[DONE:n]` tags
|
||||||
|
6. Progress widget shows completion status
|
||||||
|
|
||||||
|
## How It Works
|
||||||
|
|
||||||
|
### Plan Mode (Read-Only)
|
||||||
|
- Only read-only tools available
|
||||||
|
- Bash commands filtered through allowlist
|
||||||
|
- Agent creates a plan without making changes
|
||||||
|
|
||||||
|
### Execution Mode
|
||||||
|
- Full tool access restored
|
||||||
|
- Agent executes steps in order
|
||||||
|
- `[DONE:n]` markers track completion
|
||||||
|
- Widget shows progress
|
||||||
|
|
||||||
|
### Command Allowlist
|
||||||
|
|
||||||
|
Safe commands (allowed):
|
||||||
|
- File inspection: `cat`, `head`, `tail`, `less`, `more`
|
||||||
|
- Search: `grep`, `find`, `rg`, `fd`
|
||||||
|
- Directory: `ls`, `pwd`, `tree`
|
||||||
|
- Git read: `git status`, `git log`, `git diff`, `git branch`
|
||||||
|
- Package info: `npm list`, `npm outdated`, `yarn info`
|
||||||
|
- System info: `uname`, `whoami`, `date`, `uptime`
|
||||||
|
|
||||||
|
Blocked commands:
|
||||||
|
- File modification: `rm`, `mv`, `cp`, `mkdir`, `touch`
|
||||||
|
- Git write: `git add`, `git commit`, `git push`
|
||||||
|
- Package install: `npm install`, `yarn add`, `pip install`
|
||||||
|
- System: `sudo`, `kill`, `reboot`
|
||||||
|
- Editors: `vim`, `nano`, `code`
|
||||||
340
packages/coding-agent/examples/extensions/plan-mode/index.ts
Normal file
340
packages/coding-agent/examples/extensions/plan-mode/index.ts
Normal file
|
|
@ -0,0 +1,340 @@
|
||||||
|
/**
|
||||||
|
* Plan Mode Extension
|
||||||
|
*
|
||||||
|
* Read-only exploration mode for safe code analysis.
|
||||||
|
* When enabled, only read-only tools are available.
|
||||||
|
*
|
||||||
|
* Features:
|
||||||
|
* - /plan command or Shift+P to toggle
|
||||||
|
* - Bash restricted to allowlisted read-only commands
|
||||||
|
* - Extracts numbered plan steps from "Plan:" sections
|
||||||
|
* - [DONE:n] markers to complete steps during execution
|
||||||
|
* - Progress tracking widget during execution
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
|
import type { AssistantMessage, TextContent } from "@mariozechner/pi-ai";
|
||||||
|
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
||||||
|
import { Key } from "@mariozechner/pi-tui";
|
||||||
|
import { extractTodoItems, isSafeCommand, markCompletedSteps, type TodoItem } from "./utils.js";
|
||||||
|
|
||||||
|
// Tools
|
||||||
|
const PLAN_MODE_TOOLS = ["read", "bash", "grep", "find", "ls", "questionnaire"];
|
||||||
|
const NORMAL_MODE_TOOLS = ["read", "bash", "edit", "write"];
|
||||||
|
|
||||||
|
// Type guard for assistant messages
|
||||||
|
function isAssistantMessage(m: AgentMessage): m is AssistantMessage {
|
||||||
|
return m.role === "assistant" && Array.isArray(m.content);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract text content from an assistant message
|
||||||
|
function getTextContent(message: AssistantMessage): string {
|
||||||
|
return message.content
|
||||||
|
.filter((block): block is TextContent => block.type === "text")
|
||||||
|
.map((block) => block.text)
|
||||||
|
.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function planModeExtension(pi: ExtensionAPI): void {
|
||||||
|
let planModeEnabled = false;
|
||||||
|
let executionMode = false;
|
||||||
|
let todoItems: TodoItem[] = [];
|
||||||
|
|
||||||
|
pi.registerFlag("plan", {
|
||||||
|
description: "Start in plan mode (read-only exploration)",
|
||||||
|
type: "boolean",
|
||||||
|
default: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
function updateStatus(ctx: ExtensionContext): void {
|
||||||
|
// Footer status
|
||||||
|
if (executionMode && todoItems.length > 0) {
|
||||||
|
const completed = todoItems.filter((t) => t.completed).length;
|
||||||
|
ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("accent", `📋 ${completed}/${todoItems.length}`));
|
||||||
|
} else if (planModeEnabled) {
|
||||||
|
ctx.ui.setStatus("plan-mode", ctx.ui.theme.fg("warning", "⏸ plan"));
|
||||||
|
} else {
|
||||||
|
ctx.ui.setStatus("plan-mode", undefined);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Widget showing todo list
|
||||||
|
if (executionMode && todoItems.length > 0) {
|
||||||
|
const lines = todoItems.map((item) => {
|
||||||
|
if (item.completed) {
|
||||||
|
return (
|
||||||
|
ctx.ui.theme.fg("success", "☑ ") + ctx.ui.theme.fg("muted", ctx.ui.theme.strikethrough(item.text))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return `${ctx.ui.theme.fg("muted", "☐ ")}${item.text}`;
|
||||||
|
});
|
||||||
|
ctx.ui.setWidget("plan-todos", lines);
|
||||||
|
} else {
|
||||||
|
ctx.ui.setWidget("plan-todos", undefined);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function togglePlanMode(ctx: ExtensionContext): void {
|
||||||
|
planModeEnabled = !planModeEnabled;
|
||||||
|
executionMode = false;
|
||||||
|
todoItems = [];
|
||||||
|
|
||||||
|
if (planModeEnabled) {
|
||||||
|
pi.setActiveTools(PLAN_MODE_TOOLS);
|
||||||
|
ctx.ui.notify(`Plan mode enabled. Tools: ${PLAN_MODE_TOOLS.join(", ")}`);
|
||||||
|
} else {
|
||||||
|
pi.setActiveTools(NORMAL_MODE_TOOLS);
|
||||||
|
ctx.ui.notify("Plan mode disabled. Full access restored.");
|
||||||
|
}
|
||||||
|
updateStatus(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
function persistState(): void {
|
||||||
|
pi.appendEntry("plan-mode", {
|
||||||
|
enabled: planModeEnabled,
|
||||||
|
todos: todoItems,
|
||||||
|
executing: executionMode,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pi.registerCommand("plan", {
|
||||||
|
description: "Toggle plan mode (read-only exploration)",
|
||||||
|
handler: async (_args, ctx) => togglePlanMode(ctx),
|
||||||
|
});
|
||||||
|
|
||||||
|
pi.registerCommand("todos", {
|
||||||
|
description: "Show current plan todo list",
|
||||||
|
handler: async (_args, ctx) => {
|
||||||
|
if (todoItems.length === 0) {
|
||||||
|
ctx.ui.notify("No todos. Create a plan first with /plan", "info");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const list = todoItems.map((item, i) => `${i + 1}. ${item.completed ? "✓" : "○"} ${item.text}`).join("\n");
|
||||||
|
ctx.ui.notify(`Plan Progress:\n${list}`, "info");
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
pi.registerShortcut(Key.shift("p"), {
|
||||||
|
description: "Toggle plan mode",
|
||||||
|
handler: async (ctx) => togglePlanMode(ctx),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Block destructive bash commands in plan mode
|
||||||
|
pi.on("tool_call", async (event) => {
|
||||||
|
if (!planModeEnabled || event.toolName !== "bash") return;
|
||||||
|
|
||||||
|
const command = event.input.command as string;
|
||||||
|
if (!isSafeCommand(command)) {
|
||||||
|
return {
|
||||||
|
block: true,
|
||||||
|
reason: `Plan mode: command blocked (not allowlisted). Use /plan to disable plan mode first.\nCommand: ${command}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Filter out stale plan mode context when not in plan mode
|
||||||
|
pi.on("context", async (event) => {
|
||||||
|
if (planModeEnabled) return;
|
||||||
|
|
||||||
|
return {
|
||||||
|
messages: event.messages.filter((m) => {
|
||||||
|
const msg = m as AgentMessage & { customType?: string };
|
||||||
|
if (msg.customType === "plan-mode-context") return false;
|
||||||
|
if (msg.role !== "user") return true;
|
||||||
|
|
||||||
|
const content = msg.content;
|
||||||
|
if (typeof content === "string") {
|
||||||
|
return !content.includes("[PLAN MODE ACTIVE]");
|
||||||
|
}
|
||||||
|
if (Array.isArray(content)) {
|
||||||
|
return !content.some(
|
||||||
|
(c) => c.type === "text" && (c as TextContent).text?.includes("[PLAN MODE ACTIVE]"),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
// Inject plan/execution context before agent starts
|
||||||
|
pi.on("before_agent_start", async () => {
|
||||||
|
if (planModeEnabled) {
|
||||||
|
return {
|
||||||
|
message: {
|
||||||
|
customType: "plan-mode-context",
|
||||||
|
content: `[PLAN MODE ACTIVE]
|
||||||
|
You are in plan mode - a read-only exploration mode for safe code analysis.
|
||||||
|
|
||||||
|
Restrictions:
|
||||||
|
- You can only use: read, bash, grep, find, ls, questionnaire
|
||||||
|
- You CANNOT use: edit, write (file modifications are disabled)
|
||||||
|
- Bash is restricted to an allowlist of read-only commands
|
||||||
|
|
||||||
|
Ask clarifying questions using the questionnaire tool.
|
||||||
|
Use brave-search skill via bash for web research.
|
||||||
|
|
||||||
|
Create a detailed numbered plan under a "Plan:" header:
|
||||||
|
|
||||||
|
Plan:
|
||||||
|
1. First step description
|
||||||
|
2. Second step description
|
||||||
|
...
|
||||||
|
|
||||||
|
Do NOT attempt to make changes - just describe what you would do.`,
|
||||||
|
display: false,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (executionMode && todoItems.length > 0) {
|
||||||
|
const remaining = todoItems.filter((t) => !t.completed);
|
||||||
|
const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n");
|
||||||
|
return {
|
||||||
|
message: {
|
||||||
|
customType: "plan-execution-context",
|
||||||
|
content: `[EXECUTING PLAN - Full tool access enabled]
|
||||||
|
|
||||||
|
Remaining steps:
|
||||||
|
${todoList}
|
||||||
|
|
||||||
|
Execute each step in order.
|
||||||
|
After completing a step, include a [DONE:n] tag in your response.`,
|
||||||
|
display: false,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Track progress after each turn
|
||||||
|
pi.on("turn_end", async (event, ctx) => {
|
||||||
|
if (!executionMode || todoItems.length === 0) return;
|
||||||
|
if (!isAssistantMessage(event.message)) return;
|
||||||
|
|
||||||
|
const text = getTextContent(event.message);
|
||||||
|
if (markCompletedSteps(text, todoItems) > 0) {
|
||||||
|
updateStatus(ctx);
|
||||||
|
}
|
||||||
|
persistState();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Handle plan completion and plan mode UI
|
||||||
|
pi.on("agent_end", async (event, ctx) => {
|
||||||
|
// Check if execution is complete
|
||||||
|
if (executionMode && todoItems.length > 0) {
|
||||||
|
if (todoItems.every((t) => t.completed)) {
|
||||||
|
const completedList = todoItems.map((t) => `~~${t.text}~~`).join("\n");
|
||||||
|
pi.sendMessage(
|
||||||
|
{ customType: "plan-complete", content: `**Plan Complete!** ✓\n\n${completedList}`, display: true },
|
||||||
|
{ triggerTurn: false },
|
||||||
|
);
|
||||||
|
executionMode = false;
|
||||||
|
todoItems = [];
|
||||||
|
pi.setActiveTools(NORMAL_MODE_TOOLS);
|
||||||
|
updateStatus(ctx);
|
||||||
|
persistState(); // Save cleared state so resume doesn't restore old execution mode
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!planModeEnabled || !ctx.hasUI) return;
|
||||||
|
|
||||||
|
// Extract todos from last assistant message
|
||||||
|
const lastAssistant = [...event.messages].reverse().find(isAssistantMessage);
|
||||||
|
if (lastAssistant) {
|
||||||
|
const extracted = extractTodoItems(getTextContent(lastAssistant));
|
||||||
|
if (extracted.length > 0) {
|
||||||
|
todoItems = extracted;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show plan steps and prompt for next action
|
||||||
|
if (todoItems.length > 0) {
|
||||||
|
const todoListText = todoItems.map((t, i) => `${i + 1}. ☐ ${t.text}`).join("\n");
|
||||||
|
pi.sendMessage(
|
||||||
|
{
|
||||||
|
customType: "plan-todo-list",
|
||||||
|
content: `**Plan Steps (${todoItems.length}):**\n\n${todoListText}`,
|
||||||
|
display: true,
|
||||||
|
},
|
||||||
|
{ triggerTurn: false },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const choice = await ctx.ui.select("Plan mode - what next?", [
|
||||||
|
todoItems.length > 0 ? "Execute the plan (track progress)" : "Execute the plan",
|
||||||
|
"Stay in plan mode",
|
||||||
|
"Refine the plan",
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (choice?.startsWith("Execute")) {
|
||||||
|
planModeEnabled = false;
|
||||||
|
executionMode = todoItems.length > 0;
|
||||||
|
pi.setActiveTools(NORMAL_MODE_TOOLS);
|
||||||
|
updateStatus(ctx);
|
||||||
|
|
||||||
|
const execMessage =
|
||||||
|
todoItems.length > 0
|
||||||
|
? `Execute the plan. Start with: ${todoItems[0].text}`
|
||||||
|
: "Execute the plan you just created.";
|
||||||
|
pi.sendMessage(
|
||||||
|
{ customType: "plan-mode-execute", content: execMessage, display: true },
|
||||||
|
{ triggerTurn: true },
|
||||||
|
);
|
||||||
|
} else if (choice === "Refine the plan") {
|
||||||
|
const refinement = await ctx.ui.editor("Refine the plan:", "");
|
||||||
|
if (refinement?.trim()) {
|
||||||
|
pi.sendUserMessage(refinement.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Restore state on session start/resume
|
||||||
|
pi.on("session_start", async (_event, ctx) => {
|
||||||
|
if (pi.getFlag("plan") === true) {
|
||||||
|
planModeEnabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const entries = ctx.sessionManager.getEntries();
|
||||||
|
|
||||||
|
// Restore persisted state
|
||||||
|
const planModeEntry = entries
|
||||||
|
.filter((e: { type: string; customType?: string }) => e.type === "custom" && e.customType === "plan-mode")
|
||||||
|
.pop() as { data?: { enabled: boolean; todos?: TodoItem[]; executing?: boolean } } | undefined;
|
||||||
|
|
||||||
|
if (planModeEntry?.data) {
|
||||||
|
planModeEnabled = planModeEntry.data.enabled ?? planModeEnabled;
|
||||||
|
todoItems = planModeEntry.data.todos ?? todoItems;
|
||||||
|
executionMode = planModeEntry.data.executing ?? executionMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
// On resume: re-scan messages to rebuild completion state
|
||||||
|
// Only scan messages AFTER the last "plan-mode-execute" to avoid picking up [DONE:n] from previous plans
|
||||||
|
const isResume = planModeEntry !== undefined;
|
||||||
|
if (isResume && executionMode && todoItems.length > 0) {
|
||||||
|
// Find the index of the last plan-mode-execute entry (marks when current execution started)
|
||||||
|
let executeIndex = -1;
|
||||||
|
for (let i = entries.length - 1; i >= 0; i--) {
|
||||||
|
const entry = entries[i] as { type: string; customType?: string };
|
||||||
|
if (entry.customType === "plan-mode-execute") {
|
||||||
|
executeIndex = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only scan messages after the execute marker
|
||||||
|
const messages: AssistantMessage[] = [];
|
||||||
|
for (let i = executeIndex + 1; i < entries.length; i++) {
|
||||||
|
const entry = entries[i];
|
||||||
|
if (entry.type === "message" && "message" in entry && isAssistantMessage(entry.message as AgentMessage)) {
|
||||||
|
messages.push(entry.message as AssistantMessage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const allText = messages.map(getTextContent).join("\n");
|
||||||
|
markCompletedSteps(allText, todoItems);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (planModeEnabled) {
|
||||||
|
pi.setActiveTools(PLAN_MODE_TOOLS);
|
||||||
|
}
|
||||||
|
updateStatus(ctx);
|
||||||
|
});
|
||||||
|
}
|
||||||
168
packages/coding-agent/examples/extensions/plan-mode/utils.ts
Normal file
168
packages/coding-agent/examples/extensions/plan-mode/utils.ts
Normal file
|
|
@ -0,0 +1,168 @@
|
||||||
|
/**
|
||||||
|
* Pure utility functions for plan mode.
|
||||||
|
* Extracted for testability.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Destructive commands blocked in plan mode
|
||||||
|
const DESTRUCTIVE_PATTERNS = [
|
||||||
|
/\brm\b/i,
|
||||||
|
/\brmdir\b/i,
|
||||||
|
/\bmv\b/i,
|
||||||
|
/\bcp\b/i,
|
||||||
|
/\bmkdir\b/i,
|
||||||
|
/\btouch\b/i,
|
||||||
|
/\bchmod\b/i,
|
||||||
|
/\bchown\b/i,
|
||||||
|
/\bchgrp\b/i,
|
||||||
|
/\bln\b/i,
|
||||||
|
/\btee\b/i,
|
||||||
|
/\btruncate\b/i,
|
||||||
|
/\bdd\b/i,
|
||||||
|
/\bshred\b/i,
|
||||||
|
/(^|[^<])>(?!>)/,
|
||||||
|
/>>/,
|
||||||
|
/\bnpm\s+(install|uninstall|update|ci|link|publish)/i,
|
||||||
|
/\byarn\s+(add|remove|install|publish)/i,
|
||||||
|
/\bpnpm\s+(add|remove|install|publish)/i,
|
||||||
|
/\bpip\s+(install|uninstall)/i,
|
||||||
|
/\bapt(-get)?\s+(install|remove|purge|update|upgrade)/i,
|
||||||
|
/\bbrew\s+(install|uninstall|upgrade)/i,
|
||||||
|
/\bgit\s+(add|commit|push|pull|merge|rebase|reset|checkout|branch\s+-[dD]|stash|cherry-pick|revert|tag|init|clone)/i,
|
||||||
|
/\bsudo\b/i,
|
||||||
|
/\bsu\b/i,
|
||||||
|
/\bkill\b/i,
|
||||||
|
/\bpkill\b/i,
|
||||||
|
/\bkillall\b/i,
|
||||||
|
/\breboot\b/i,
|
||||||
|
/\bshutdown\b/i,
|
||||||
|
/\bsystemctl\s+(start|stop|restart|enable|disable)/i,
|
||||||
|
/\bservice\s+\S+\s+(start|stop|restart)/i,
|
||||||
|
/\b(vim?|nano|emacs|code|subl)\b/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
// Safe read-only commands allowed in plan mode
|
||||||
|
const SAFE_PATTERNS = [
|
||||||
|
/^\s*cat\b/,
|
||||||
|
/^\s*head\b/,
|
||||||
|
/^\s*tail\b/,
|
||||||
|
/^\s*less\b/,
|
||||||
|
/^\s*more\b/,
|
||||||
|
/^\s*grep\b/,
|
||||||
|
/^\s*find\b/,
|
||||||
|
/^\s*ls\b/,
|
||||||
|
/^\s*pwd\b/,
|
||||||
|
/^\s*echo\b/,
|
||||||
|
/^\s*printf\b/,
|
||||||
|
/^\s*wc\b/,
|
||||||
|
/^\s*sort\b/,
|
||||||
|
/^\s*uniq\b/,
|
||||||
|
/^\s*diff\b/,
|
||||||
|
/^\s*file\b/,
|
||||||
|
/^\s*stat\b/,
|
||||||
|
/^\s*du\b/,
|
||||||
|
/^\s*df\b/,
|
||||||
|
/^\s*tree\b/,
|
||||||
|
/^\s*which\b/,
|
||||||
|
/^\s*whereis\b/,
|
||||||
|
/^\s*type\b/,
|
||||||
|
/^\s*env\b/,
|
||||||
|
/^\s*printenv\b/,
|
||||||
|
/^\s*uname\b/,
|
||||||
|
/^\s*whoami\b/,
|
||||||
|
/^\s*id\b/,
|
||||||
|
/^\s*date\b/,
|
||||||
|
/^\s*cal\b/,
|
||||||
|
/^\s*uptime\b/,
|
||||||
|
/^\s*ps\b/,
|
||||||
|
/^\s*top\b/,
|
||||||
|
/^\s*htop\b/,
|
||||||
|
/^\s*free\b/,
|
||||||
|
/^\s*git\s+(status|log|diff|show|branch|remote|config\s+--get)/i,
|
||||||
|
/^\s*git\s+ls-/i,
|
||||||
|
/^\s*npm\s+(list|ls|view|info|search|outdated|audit)/i,
|
||||||
|
/^\s*yarn\s+(list|info|why|audit)/i,
|
||||||
|
/^\s*node\s+--version/i,
|
||||||
|
/^\s*python\s+--version/i,
|
||||||
|
/^\s*curl\s/i,
|
||||||
|
/^\s*wget\s+-O\s*-/i,
|
||||||
|
/^\s*jq\b/,
|
||||||
|
/^\s*sed\s+-n/i,
|
||||||
|
/^\s*awk\b/,
|
||||||
|
/^\s*rg\b/,
|
||||||
|
/^\s*fd\b/,
|
||||||
|
/^\s*bat\b/,
|
||||||
|
/^\s*exa\b/,
|
||||||
|
];
|
||||||
|
|
||||||
|
export function isSafeCommand(command: string): boolean {
|
||||||
|
const isDestructive = DESTRUCTIVE_PATTERNS.some((p) => p.test(command));
|
||||||
|
const isSafe = SAFE_PATTERNS.some((p) => p.test(command));
|
||||||
|
return !isDestructive && isSafe;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TodoItem {
|
||||||
|
step: number;
|
||||||
|
text: string;
|
||||||
|
completed: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function cleanStepText(text: string): string {
|
||||||
|
let cleaned = text
|
||||||
|
.replace(/\*{1,2}([^*]+)\*{1,2}/g, "$1") // Remove bold/italic
|
||||||
|
.replace(/`([^`]+)`/g, "$1") // Remove code
|
||||||
|
.replace(
|
||||||
|
/^(Use|Run|Execute|Create|Write|Read|Check|Verify|Update|Modify|Add|Remove|Delete|Install)\s+(the\s+)?/i,
|
||||||
|
"",
|
||||||
|
)
|
||||||
|
.replace(/\s+/g, " ")
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
if (cleaned.length > 0) {
|
||||||
|
cleaned = cleaned.charAt(0).toUpperCase() + cleaned.slice(1);
|
||||||
|
}
|
||||||
|
if (cleaned.length > 50) {
|
||||||
|
cleaned = `${cleaned.slice(0, 47)}...`;
|
||||||
|
}
|
||||||
|
return cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function extractTodoItems(message: string): TodoItem[] {
|
||||||
|
const items: TodoItem[] = [];
|
||||||
|
const headerMatch = message.match(/\*{0,2}Plan:\*{0,2}\s*\n/i);
|
||||||
|
if (!headerMatch) return items;
|
||||||
|
|
||||||
|
const planSection = message.slice(message.indexOf(headerMatch[0]) + headerMatch[0].length);
|
||||||
|
const numberedPattern = /^\s*(\d+)[.)]\s+\*{0,2}([^*\n]+)/gm;
|
||||||
|
|
||||||
|
for (const match of planSection.matchAll(numberedPattern)) {
|
||||||
|
const text = match[2]
|
||||||
|
.trim()
|
||||||
|
.replace(/\*{1,2}$/, "")
|
||||||
|
.trim();
|
||||||
|
if (text.length > 5 && !text.startsWith("`") && !text.startsWith("/") && !text.startsWith("-")) {
|
||||||
|
const cleaned = cleanStepText(text);
|
||||||
|
if (cleaned.length > 3) {
|
||||||
|
items.push({ step: items.length + 1, text: cleaned, completed: false });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return items;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function extractDoneSteps(message: string): number[] {
|
||||||
|
const steps: number[] = [];
|
||||||
|
for (const match of message.matchAll(/\[DONE:(\d+)\]/gi)) {
|
||||||
|
const step = Number(match[1]);
|
||||||
|
if (Number.isFinite(step)) steps.push(step);
|
||||||
|
}
|
||||||
|
return steps;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function markCompletedSteps(text: string, items: TodoItem[]): number {
|
||||||
|
const doneSteps = extractDoneSteps(text);
|
||||||
|
for (const step of doneSteps) {
|
||||||
|
const item = items.find((t) => t.step === step);
|
||||||
|
if (item) item.completed = true;
|
||||||
|
}
|
||||||
|
return doneSteps.length;
|
||||||
|
}
|
||||||
261
packages/coding-agent/test/plan-mode-utils.test.ts
Normal file
261
packages/coding-agent/test/plan-mode-utils.test.ts
Normal file
|
|
@ -0,0 +1,261 @@
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
import {
|
||||||
|
cleanStepText,
|
||||||
|
extractDoneSteps,
|
||||||
|
extractTodoItems,
|
||||||
|
isSafeCommand,
|
||||||
|
markCompletedSteps,
|
||||||
|
type TodoItem,
|
||||||
|
} from "../examples/extensions/plan-mode/utils.js";
|
||||||
|
|
||||||
|
describe("isSafeCommand", () => {
|
||||||
|
describe("safe commands", () => {
|
||||||
|
it("allows basic read commands", () => {
|
||||||
|
expect(isSafeCommand("ls -la")).toBe(true);
|
||||||
|
expect(isSafeCommand("cat file.txt")).toBe(true);
|
||||||
|
expect(isSafeCommand("head -n 10 file.txt")).toBe(true);
|
||||||
|
expect(isSafeCommand("tail -f log.txt")).toBe(true);
|
||||||
|
expect(isSafeCommand("grep pattern file")).toBe(true);
|
||||||
|
expect(isSafeCommand("find . -name '*.ts'")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("allows git read commands", () => {
|
||||||
|
expect(isSafeCommand("git status")).toBe(true);
|
||||||
|
expect(isSafeCommand("git log --oneline")).toBe(true);
|
||||||
|
expect(isSafeCommand("git diff")).toBe(true);
|
||||||
|
expect(isSafeCommand("git branch")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("allows npm/yarn read commands", () => {
|
||||||
|
expect(isSafeCommand("npm list")).toBe(true);
|
||||||
|
expect(isSafeCommand("npm outdated")).toBe(true);
|
||||||
|
expect(isSafeCommand("yarn info react")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("allows other safe commands", () => {
|
||||||
|
expect(isSafeCommand("pwd")).toBe(true);
|
||||||
|
expect(isSafeCommand("echo hello")).toBe(true);
|
||||||
|
expect(isSafeCommand("wc -l file.txt")).toBe(true);
|
||||||
|
expect(isSafeCommand("du -sh .")).toBe(true);
|
||||||
|
expect(isSafeCommand("df -h")).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("destructive commands", () => {
|
||||||
|
it("blocks file modification commands", () => {
|
||||||
|
expect(isSafeCommand("rm file.txt")).toBe(false);
|
||||||
|
expect(isSafeCommand("rm -rf dir")).toBe(false);
|
||||||
|
expect(isSafeCommand("mv old new")).toBe(false);
|
||||||
|
expect(isSafeCommand("cp src dst")).toBe(false);
|
||||||
|
expect(isSafeCommand("mkdir newdir")).toBe(false);
|
||||||
|
expect(isSafeCommand("touch newfile")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("blocks git write commands", () => {
|
||||||
|
expect(isSafeCommand("git add .")).toBe(false);
|
||||||
|
expect(isSafeCommand("git commit -m 'msg'")).toBe(false);
|
||||||
|
expect(isSafeCommand("git push")).toBe(false);
|
||||||
|
expect(isSafeCommand("git checkout main")).toBe(false);
|
||||||
|
expect(isSafeCommand("git reset --hard")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("blocks package manager installs", () => {
|
||||||
|
expect(isSafeCommand("npm install lodash")).toBe(false);
|
||||||
|
expect(isSafeCommand("yarn add react")).toBe(false);
|
||||||
|
expect(isSafeCommand("pip install requests")).toBe(false);
|
||||||
|
expect(isSafeCommand("brew install node")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("blocks redirects", () => {
|
||||||
|
expect(isSafeCommand("echo hello > file.txt")).toBe(false);
|
||||||
|
expect(isSafeCommand("cat foo >> bar")).toBe(false);
|
||||||
|
expect(isSafeCommand(">file.txt")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("blocks dangerous commands", () => {
|
||||||
|
expect(isSafeCommand("sudo rm -rf /")).toBe(false);
|
||||||
|
expect(isSafeCommand("kill -9 1234")).toBe(false);
|
||||||
|
expect(isSafeCommand("reboot")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("blocks editors", () => {
|
||||||
|
expect(isSafeCommand("vim file.txt")).toBe(false);
|
||||||
|
expect(isSafeCommand("nano file.txt")).toBe(false);
|
||||||
|
expect(isSafeCommand("code .")).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("edge cases", () => {
|
||||||
|
it("requires command to be in safe list (not just non-destructive)", () => {
|
||||||
|
expect(isSafeCommand("unknown-command")).toBe(false);
|
||||||
|
expect(isSafeCommand("my-script.sh")).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles commands with leading whitespace", () => {
|
||||||
|
expect(isSafeCommand(" ls -la")).toBe(true);
|
||||||
|
expect(isSafeCommand(" rm file")).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("cleanStepText", () => {
|
||||||
|
it("removes markdown bold/italic", () => {
|
||||||
|
expect(cleanStepText("**bold text**")).toBe("Bold text");
|
||||||
|
expect(cleanStepText("*italic text*")).toBe("Italic text");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("removes markdown code", () => {
|
||||||
|
expect(cleanStepText("run `npm install`")).toBe("Npm install"); // "run" is stripped as action word
|
||||||
|
expect(cleanStepText("check the `config.json` file")).toBe("Config.json file");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("removes leading action words", () => {
|
||||||
|
expect(cleanStepText("Create the new file")).toBe("New file");
|
||||||
|
expect(cleanStepText("Run the tests")).toBe("Tests");
|
||||||
|
expect(cleanStepText("Check the status")).toBe("Status");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("capitalizes first letter", () => {
|
||||||
|
expect(cleanStepText("update config")).toBe("Config");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("truncates long text", () => {
|
||||||
|
const longText = "This is a very long step description that exceeds the maximum allowed length for display";
|
||||||
|
const result = cleanStepText(longText);
|
||||||
|
expect(result.length).toBe(50);
|
||||||
|
expect(result.endsWith("...")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("normalizes whitespace", () => {
|
||||||
|
expect(cleanStepText("multiple spaces here")).toBe("Multiple spaces here");
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("extractTodoItems", () => {
|
||||||
|
it("extracts numbered items after Plan: header", () => {
|
||||||
|
const message = `Here's what we'll do:
|
||||||
|
|
||||||
|
Plan:
|
||||||
|
1. First step here
|
||||||
|
2. Second step here
|
||||||
|
3. Third step here`;
|
||||||
|
|
||||||
|
const items = extractTodoItems(message);
|
||||||
|
expect(items).toHaveLength(3);
|
||||||
|
expect(items[0].step).toBe(1);
|
||||||
|
expect(items[0].text).toBe("First step here");
|
||||||
|
expect(items[0].completed).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles bold Plan header", () => {
|
||||||
|
const message = `**Plan:**
|
||||||
|
1. Do something`;
|
||||||
|
|
||||||
|
const items = extractTodoItems(message);
|
||||||
|
expect(items).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles parenthesis-style numbering", () => {
|
||||||
|
const message = `Plan:
|
||||||
|
1) First item
|
||||||
|
2) Second item`;
|
||||||
|
|
||||||
|
const items = extractTodoItems(message);
|
||||||
|
expect(items).toHaveLength(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns empty array without Plan header", () => {
|
||||||
|
const message = `Here are some steps:
|
||||||
|
1. First step
|
||||||
|
2. Second step`;
|
||||||
|
|
||||||
|
const items = extractTodoItems(message);
|
||||||
|
expect(items).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("filters out short items", () => {
|
||||||
|
const message = `Plan:
|
||||||
|
1. OK
|
||||||
|
2. This is a proper step`;
|
||||||
|
|
||||||
|
const items = extractTodoItems(message);
|
||||||
|
expect(items).toHaveLength(1);
|
||||||
|
expect(items[0].text).toContain("proper");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("filters out code-like items", () => {
|
||||||
|
const message = `Plan:
|
||||||
|
1. \`npm install\`
|
||||||
|
2. Run the build process`;
|
||||||
|
|
||||||
|
const items = extractTodoItems(message);
|
||||||
|
expect(items).toHaveLength(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("extractDoneSteps", () => {
|
||||||
|
it("extracts single DONE marker", () => {
|
||||||
|
const message = "I've completed the first step [DONE:1]";
|
||||||
|
expect(extractDoneSteps(message)).toEqual([1]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("extracts multiple DONE markers", () => {
|
||||||
|
const message = "Did steps [DONE:1] and [DONE:2] and [DONE:3]";
|
||||||
|
expect(extractDoneSteps(message)).toEqual([1, 2, 3]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles case insensitivity", () => {
|
||||||
|
const message = "[done:1] [DONE:2] [Done:3]";
|
||||||
|
expect(extractDoneSteps(message)).toEqual([1, 2, 3]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns empty array with no markers", () => {
|
||||||
|
const message = "No markers here";
|
||||||
|
expect(extractDoneSteps(message)).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores malformed markers", () => {
|
||||||
|
const message = "[DONE:abc] [DONE:] [DONE:1]";
|
||||||
|
expect(extractDoneSteps(message)).toEqual([1]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("markCompletedSteps", () => {
|
||||||
|
it("marks matching items as completed", () => {
|
||||||
|
const items: TodoItem[] = [
|
||||||
|
{ step: 1, text: "First", completed: false },
|
||||||
|
{ step: 2, text: "Second", completed: false },
|
||||||
|
{ step: 3, text: "Third", completed: false },
|
||||||
|
];
|
||||||
|
|
||||||
|
const count = markCompletedSteps("[DONE:1] [DONE:3]", items);
|
||||||
|
|
||||||
|
expect(count).toBe(2);
|
||||||
|
expect(items[0].completed).toBe(true);
|
||||||
|
expect(items[1].completed).toBe(false);
|
||||||
|
expect(items[2].completed).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns count of completed items", () => {
|
||||||
|
const items: TodoItem[] = [{ step: 1, text: "First", completed: false }];
|
||||||
|
|
||||||
|
expect(markCompletedSteps("[DONE:1]", items)).toBe(1);
|
||||||
|
expect(markCompletedSteps("no markers", items)).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("ignores markers for non-existent steps", () => {
|
||||||
|
const items: TodoItem[] = [{ step: 1, text: "First", completed: false }];
|
||||||
|
|
||||||
|
const count = markCompletedSteps("[DONE:99]", items);
|
||||||
|
|
||||||
|
expect(count).toBe(1); // Still counts the marker found
|
||||||
|
expect(items[0].completed).toBe(false); // But doesn't mark anything
|
||||||
|
});
|
||||||
|
|
||||||
|
it("doesn't double-complete already completed items", () => {
|
||||||
|
const items: TodoItem[] = [{ step: 1, text: "First", completed: true }];
|
||||||
|
|
||||||
|
markCompletedSteps("[DONE:1]", items);
|
||||||
|
expect(items[0].completed).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
Loading…
Add table
Add a link
Reference in a new issue