From fdcc044491da200cf315882ef344e23573cec141 Mon Sep 17 00:00:00 2001 From: Helmut Januschka Date: Sat, 3 Jan 2026 22:20:07 +0100 Subject: [PATCH] fix(plan-mode): track step completion via tool_result events - No longer relies on agent outputting [STEP N DONE] tags - Each successful tool_result marks the next uncompleted step done - Much more reliable than expecting LLM to follow tag format - Simplified execution context (no special instructions needed) --- .../coding-agent/examples/hooks/plan-mode.ts | 59 ++++++------------- 1 file changed, 19 insertions(+), 40 deletions(-) diff --git a/packages/coding-agent/examples/hooks/plan-mode.ts b/packages/coding-agent/examples/hooks/plan-mode.ts index c8e8ea67..ad5da318 100644 --- a/packages/coding-agent/examples/hooks/plan-mode.ts +++ b/packages/coding-agent/examples/hooks/plan-mode.ts @@ -168,21 +168,7 @@ function extractTodoItems(message: string): TodoItem[] { return items; } -/** - * Find [STEP N DONE] or [DONE N] tags in text and return step numbers. - */ -function findDoneSteps(text: string): number[] { - const steps: number[] = []; - // Match [STEP 1 DONE], [STEP 2 DONE], etc. - for (const match of text.matchAll(/\[STEP\s+(\d+)\s+DONE\]/gi)) { - steps.push(parseInt(match[1], 10)); - } - // Also match [DONE 1], [DONE 2], etc. - for (const match of text.matchAll(/\[DONE\s+(\d+)\]/gi)) { - steps.push(parseInt(match[1], 10)); - } - return steps; -} + export default function planModeHook(pi: HookAPI) { let planModeEnabled = false; @@ -290,6 +276,19 @@ export default function planModeHook(pi: HookAPI) { } }); + // Track step completion based on tool results + pi.on("tool_result", async (_event, ctx) => { + if (!executionMode || todoItems.length === 0) return; + + // Mark the first uncompleted step as done when any tool succeeds + const nextStep = todoItems.find((t) => !t.completed); + if (nextStep) { + nextStep.completed = true; + console.error(`[plan-mode] Marked step ${nextStep.step} complete: ${nextStep.text}`); + updateStatus(ctx); + } + }); + // Filter out stale plan mode context messages from LLM context // This ensures the agent only sees the CURRENT state (plan mode on/off) pi.on("context", async (event) => { @@ -355,16 +354,16 @@ Do NOT attempt to make changes - just describe what you would do.`, if (executionMode && todoItems.length > 0) { console.error("[plan-mode] before_agent_start: injecting EXECUTING PLAN context"); const remaining = todoItems.filter((t) => !t.completed); - const todoList = remaining.map((t) => `Step ${t.step}: ${t.text}`).join("\n"); + const todoList = remaining.map((t) => `${t.step}. ${t.text}`).join("\n"); return { message: { customType: "plan-execution-context", - content: `[EXECUTING PLAN - You have FULL tool access] + content: `[EXECUTING PLAN - Full tool access enabled] +Remaining steps: ${todoList} -IMPORTANT: After completing each step, output [STEP N DONE] where N is the step number. -Example: After completing step ${remaining[0]?.step || 1}, write [STEP ${remaining[0]?.step || 1} DONE]`, +Execute each step in order.`, display: false, }, }; @@ -374,28 +373,8 @@ Example: After completing step ${remaining[0]?.step || 1}, write [STEP ${remaini // After agent finishes pi.on("agent_end", async (event, ctx) => { - // Check for done tags in execution mode + // In execution mode, check if all steps complete if (executionMode && todoItems.length > 0) { - const messages = event.messages; - const lastAssistant = [...messages].reverse().find((m) => m.role === "assistant"); - if (lastAssistant && Array.isArray(lastAssistant.content)) { - const textContent = lastAssistant.content - .filter((block): block is { type: "text"; text: string } => block.type === "text") - .map((block) => block.text) - .join("\n"); - - // Find and mark completed items - const doneSteps = findDoneSteps(textContent); - for (const stepNum of doneSteps) { - const item = todoItems.find((t) => t.step === stepNum); - if (item && !item.completed) { - item.completed = true; - } - } - updateStatus(ctx); - } - - // Check if all complete const allComplete = todoItems.every((t) => t.completed); if (allComplete) { // Show final completed list in chat