From 81f4cdf3e3d2ac6cfb2e4480a14bf1f92bf8766a Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Tue, 30 Dec 2025 00:13:11 +0100 Subject: [PATCH] Extract shared compaction/branch-summarization utils - New utils.ts with shared functions: - FileOperations type and createFileOps() - extractFileOpsFromMessage() - computeFileLists() - formatFileOperations() - serializeConversation() - SUMMARIZATION_SYSTEM_PROMPT - branch-summarization.ts now uses: - Serialization approach (conversation as text, not LLM messages) - completeSimple with system prompt - Shared utility functions --- .../core/compaction/branch-summarization.ts | 93 +++-------- .../src/core/compaction/compaction.ts | 144 ++-------------- .../coding-agent/src/core/compaction/index.ts | 1 + .../coding-agent/src/core/compaction/utils.ts | 154 ++++++++++++++++++ 4 files changed, 193 insertions(+), 199 deletions(-) create mode 100644 packages/coding-agent/src/core/compaction/utils.ts diff --git a/packages/coding-agent/src/core/compaction/branch-summarization.ts b/packages/coding-agent/src/core/compaction/branch-summarization.ts index 3647afb8..8bca45ff 100644 --- a/packages/coding-agent/src/core/compaction/branch-summarization.ts +++ b/packages/coding-agent/src/core/compaction/branch-summarization.ts @@ -7,7 +7,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { Model } from "@mariozechner/pi-ai"; -import { complete } from "@mariozechner/pi-ai"; +import { completeSimple } from "@mariozechner/pi-ai"; import { convertToLlm, createBranchSummaryMessage, @@ -16,6 +16,15 @@ import { } from "../messages.js"; import type { ReadonlySessionManager, SessionEntry } from "../session-manager.js"; import { estimateTokens } from "./compaction.js"; +import { + computeFileLists, + createFileOps, + extractFileOpsFromMessage, + type FileOperations, + formatFileOperations, + SUMMARIZATION_SYSTEM_PROMPT, + serializeConversation, +} from "./utils.js"; // ============================================================================ // Types @@ -35,11 +44,7 @@ export interface BranchSummaryDetails { modifiedFiles: string[]; } -export interface FileOperations { - read: Set; - written: Set; - edited: Set; -} +export type { FileOperations } from "./utils.js"; export interface BranchPreparation { /** Messages extracted for summarization, in chronological order */ @@ -159,38 +164,6 @@ function getMessageFromEntry(entry: SessionEntry): AgentMessage | undefined { } } -/** - * Extract file operations from tool calls in an assistant message. - */ -function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperations): void { - if (message.role !== "assistant") return; - if (!("content" in message) || !Array.isArray(message.content)) return; - - for (const block of message.content) { - if (typeof block !== "object" || block === null) continue; - if (!("type" in block) || block.type !== "toolCall") continue; - if (!("arguments" in block) || !("name" in block)) continue; - - const args = block.arguments as Record | undefined; - if (!args) continue; - - const path = typeof args.path === "string" ? args.path : undefined; - if (!path) continue; - - switch (block.name) { - case "read": - fileOps.read.add(path); - break; - case "write": - fileOps.written.add(path); - break; - case "edit": - fileOps.edited.add(path); - break; - } - } -} - /** * Prepare entries for summarization with token budget. * @@ -206,11 +179,7 @@ function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperation */ export function prepareBranchEntries(entries: SessionEntry[], tokenBudget: number = 0): BranchPreparation { const messages: AgentMessage[] = []; - const fileOps: FileOperations = { - read: new Set(), - written: new Set(), - edited: new Set(), - }; + const fileOps = createFileOps(); let totalTokens = 0; // First pass: collect file ops from ALL entries (even if they don't fit in token budget) @@ -322,24 +291,29 @@ export async function generateBranchSummary( return { summary: "No content to summarize" }; } - // Transform to LLM-compatible messages (preserves tool calls, etc.) - const transformedMessages = convertToLlm(messages); + // Transform to LLM-compatible messages, then serialize to text + // Serialization prevents the model from treating it as a conversation to continue + const llmMessages = convertToLlm(messages); + const conversationText = serializeConversation(llmMessages); // Build prompt const instructions = customInstructions || BRANCH_SUMMARY_PROMPT; + const promptText = `\n${conversationText}\n\n\n${instructions}`; - // Append summarization prompt as final user message const summarizationMessages = [ - ...transformedMessages, { role: "user" as const, - content: [{ type: "text" as const, text: instructions }], + content: [{ type: "text" as const, text: promptText }], timestamp: Date.now(), }, ]; // Call LLM for summarization - const response = await complete(model, { messages: summarizationMessages }, { apiKey, signal, maxTokens: 2048 }); + const response = await completeSimple( + model, + { systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: summarizationMessages }, + { apiKey, signal, maxTokens: 2048 }, + ); // Check if aborted or errored if (response.stopReason === "aborted") { @@ -357,26 +331,13 @@ export async function generateBranchSummary( // Prepend preamble to provide context about the branch summary summary = BRANCH_SUMMARY_PREAMBLE + summary; - // Compute file lists - const modified = new Set([...fileOps.edited, ...fileOps.written]); - const readOnly = [...fileOps.read].filter((f) => !modified.has(f)).sort(); - const modifiedFiles = [...modified].sort(); - - // Append file lists to summary text (for LLM context and TUI display) - const fileSections: string[] = []; - if (readOnly.length > 0) { - fileSections.push(`\n${readOnly.join("\n")}\n`); - } - if (modifiedFiles.length > 0) { - fileSections.push(`\n${modifiedFiles.join("\n")}\n`); - } - if (fileSections.length > 0) { - summary += `\n\n${fileSections.join("\n\n")}`; - } + // Compute file lists and append to summary + const { readFiles, modifiedFiles } = computeFileLists(fileOps); + summary += formatFileOperations(readFiles, modifiedFiles); return { summary: summary || "No summary generated", - readFiles: readOnly, + readFiles, modifiedFiles, }; } diff --git a/packages/coding-agent/src/core/compaction/compaction.ts b/packages/coding-agent/src/core/compaction/compaction.ts index 2883483d..afa99152 100644 --- a/packages/coding-agent/src/core/compaction/compaction.ts +++ b/packages/coding-agent/src/core/compaction/compaction.ts @@ -6,10 +6,19 @@ */ import type { AgentMessage } from "@mariozechner/pi-agent-core"; -import type { AssistantMessage, Message, Model, Usage } from "@mariozechner/pi-ai"; +import type { AssistantMessage, Model, Usage } from "@mariozechner/pi-ai"; import { complete, completeSimple } from "@mariozechner/pi-ai"; import { convertToLlm, createBranchSummaryMessage, createHookMessage } from "../messages.js"; import type { CompactionEntry, SessionEntry } from "../session-manager.js"; +import { + computeFileLists, + createFileOps, + extractFileOpsFromMessage, + type FileOperations, + formatFileOperations, + SUMMARIZATION_SYSTEM_PROMPT, + serializeConversation, +} from "./utils.js"; // ============================================================================ // File Operation Tracking @@ -21,44 +30,6 @@ export interface CompactionDetails { modifiedFiles: string[]; } -interface FileOperations { - read: Set; - written: Set; - edited: Set; -} - -/** - * Extract file operations from tool calls in an assistant message. - */ -function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperations): void { - if (message.role !== "assistant") return; - if (!("content" in message) || !Array.isArray(message.content)) return; - - for (const block of message.content) { - if (typeof block !== "object" || block === null) continue; - if (!("type" in block) || block.type !== "toolCall") continue; - if (!("arguments" in block) || !("name" in block)) continue; - - const args = block.arguments as Record | undefined; - if (!args) continue; - - const path = typeof args.path === "string" ? args.path : undefined; - if (!path) continue; - - switch (block.name) { - case "read": - fileOps.read.add(path); - break; - case "write": - fileOps.written.add(path); - break; - case "edit": - fileOps.edited.add(path); - break; - } - } -} - /** * Extract file operations from messages and previous compaction entries. */ @@ -67,11 +38,7 @@ function extractFileOperations( entries: SessionEntry[], prevCompactionIndex: number, ): FileOperations { - const fileOps: FileOperations = { - read: new Set(), - written: new Set(), - edited: new Set(), - }; + const fileOps = createFileOps(); // Collect from previous compaction's details (if pi-generated) if (prevCompactionIndex >= 0) { @@ -95,91 +62,6 @@ function extractFileOperations( return fileOps; } -/** - * Compute final file lists from file operations. - */ -function computeFileLists(fileOps: FileOperations): { readFiles: string[]; modifiedFiles: string[] } { - const modified = new Set([...fileOps.edited, ...fileOps.written]); - const readOnly = [...fileOps.read].filter((f) => !modified.has(f)).sort(); - const modifiedFiles = [...modified].sort(); - return { readFiles: readOnly, modifiedFiles }; -} - -/** - * Format file operations as XML tags for summary. - */ -function formatFileOperations(readFiles: string[], modifiedFiles: string[]): string { - const sections: string[] = []; - if (readFiles.length > 0) { - sections.push(`\n${readFiles.join("\n")}\n`); - } - if (modifiedFiles.length > 0) { - sections.push(`\n${modifiedFiles.join("\n")}\n`); - } - if (sections.length === 0) return ""; - return `\n\n${sections.join("\n\n")}`; -} - -/** - * Serialize LLM messages to text for summarization. - * This prevents the model from treating it as a conversation to continue. - * Call convertToLlm() first to handle custom message types. - */ -function serializeConversation(messages: Message[]): string { - const parts: string[] = []; - - for (const msg of messages) { - if (msg.role === "user") { - const content = - typeof msg.content === "string" - ? msg.content - : msg.content - .filter((c): c is { type: "text"; text: string } => c.type === "text") - .map((c) => c.text) - .join(""); - if (content) parts.push(`[User]: ${content}`); - } else if (msg.role === "assistant") { - const textParts: string[] = []; - const thinkingParts: string[] = []; - const toolCalls: string[] = []; - - for (const block of msg.content) { - if (block.type === "text") { - textParts.push(block.text); - } else if (block.type === "thinking") { - thinkingParts.push(block.thinking); - } else if (block.type === "toolCall") { - const args = block.arguments as Record; - const argsStr = Object.entries(args) - .map(([k, v]) => `${k}=${JSON.stringify(v)}`) - .join(", "); - toolCalls.push(`${block.name}(${argsStr})`); - } - } - - if (thinkingParts.length > 0) { - parts.push(`[Assistant thinking]: ${thinkingParts.join("\n")}`); - } - if (textParts.length > 0) { - parts.push(`[Assistant]: ${textParts.join("\n")}`); - } - if (toolCalls.length > 0) { - parts.push(`[Assistant tool calls]: ${toolCalls.join("; ")}`); - } - } else if (msg.role === "toolResult") { - const content = msg.content - .filter((c): c is { type: "text"; text: string } => c.type === "text") - .map((c) => c.text) - .join(""); - if (content) { - parts.push(`[Tool result]: ${content}`); - } - } - } - - return parts.join("\n\n"); -} - // ============================================================================ // Message Extraction // ============================================================================ @@ -501,10 +383,6 @@ export function findCutPoint( // Summarization // ============================================================================ -const SUMMARIZATION_SYSTEM_PROMPT = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified. - -Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.`; - const SUMMARIZATION_PROMPT = `The messages above are a conversation to summarize. Create a structured context checkpoint summary that another LLM will use to continue the work. Use this EXACT format: diff --git a/packages/coding-agent/src/core/compaction/index.ts b/packages/coding-agent/src/core/compaction/index.ts index 4f8ad306..d8c92a67 100644 --- a/packages/coding-agent/src/core/compaction/index.ts +++ b/packages/coding-agent/src/core/compaction/index.ts @@ -4,3 +4,4 @@ export * from "./branch-summarization.js"; export * from "./compaction.js"; +export * from "./utils.js"; diff --git a/packages/coding-agent/src/core/compaction/utils.ts b/packages/coding-agent/src/core/compaction/utils.ts new file mode 100644 index 00000000..9c8f46bc --- /dev/null +++ b/packages/coding-agent/src/core/compaction/utils.ts @@ -0,0 +1,154 @@ +/** + * Shared utilities for compaction and branch summarization. + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { Message } from "@mariozechner/pi-ai"; + +// ============================================================================ +// File Operation Tracking +// ============================================================================ + +export interface FileOperations { + read: Set; + written: Set; + edited: Set; +} + +export function createFileOps(): FileOperations { + return { + read: new Set(), + written: new Set(), + edited: new Set(), + }; +} + +/** + * Extract file operations from tool calls in an assistant message. + */ +export function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperations): void { + if (message.role !== "assistant") return; + if (!("content" in message) || !Array.isArray(message.content)) return; + + for (const block of message.content) { + if (typeof block !== "object" || block === null) continue; + if (!("type" in block) || block.type !== "toolCall") continue; + if (!("arguments" in block) || !("name" in block)) continue; + + const args = block.arguments as Record | undefined; + if (!args) continue; + + const path = typeof args.path === "string" ? args.path : undefined; + if (!path) continue; + + switch (block.name) { + case "read": + fileOps.read.add(path); + break; + case "write": + fileOps.written.add(path); + break; + case "edit": + fileOps.edited.add(path); + break; + } + } +} + +/** + * Compute final file lists from file operations. + * Returns readFiles (files only read, not modified) and modifiedFiles. + */ +export function computeFileLists(fileOps: FileOperations): { readFiles: string[]; modifiedFiles: string[] } { + const modified = new Set([...fileOps.edited, ...fileOps.written]); + const readOnly = [...fileOps.read].filter((f) => !modified.has(f)).sort(); + const modifiedFiles = [...modified].sort(); + return { readFiles: readOnly, modifiedFiles }; +} + +/** + * Format file operations as XML tags for summary. + */ +export function formatFileOperations(readFiles: string[], modifiedFiles: string[]): string { + const sections: string[] = []; + if (readFiles.length > 0) { + sections.push(`\n${readFiles.join("\n")}\n`); + } + if (modifiedFiles.length > 0) { + sections.push(`\n${modifiedFiles.join("\n")}\n`); + } + if (sections.length === 0) return ""; + return `\n\n${sections.join("\n\n")}`; +} + +// ============================================================================ +// Message Serialization +// ============================================================================ + +/** + * Serialize LLM messages to text for summarization. + * This prevents the model from treating it as a conversation to continue. + * Call convertToLlm() first to handle custom message types. + */ +export function serializeConversation(messages: Message[]): string { + const parts: string[] = []; + + for (const msg of messages) { + if (msg.role === "user") { + const content = + typeof msg.content === "string" + ? msg.content + : msg.content + .filter((c): c is { type: "text"; text: string } => c.type === "text") + .map((c) => c.text) + .join(""); + if (content) parts.push(`[User]: ${content}`); + } else if (msg.role === "assistant") { + const textParts: string[] = []; + const thinkingParts: string[] = []; + const toolCalls: string[] = []; + + for (const block of msg.content) { + if (block.type === "text") { + textParts.push(block.text); + } else if (block.type === "thinking") { + thinkingParts.push(block.thinking); + } else if (block.type === "toolCall") { + const args = block.arguments as Record; + const argsStr = Object.entries(args) + .map(([k, v]) => `${k}=${JSON.stringify(v)}`) + .join(", "); + toolCalls.push(`${block.name}(${argsStr})`); + } + } + + if (thinkingParts.length > 0) { + parts.push(`[Assistant thinking]: ${thinkingParts.join("\n")}`); + } + if (textParts.length > 0) { + parts.push(`[Assistant]: ${textParts.join("\n")}`); + } + if (toolCalls.length > 0) { + parts.push(`[Assistant tool calls]: ${toolCalls.join("; ")}`); + } + } else if (msg.role === "toolResult") { + const content = msg.content + .filter((c): c is { type: "text"; text: string } => c.type === "text") + .map((c) => c.text) + .join(""); + if (content) { + parts.push(`[Tool result]: ${content}`); + } + } + } + + return parts.join("\n\n"); +} + +// ============================================================================ +// Summarization System Prompt +// ============================================================================ + +export const SUMMARIZATION_SYSTEM_PROMPT = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified. + +Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.`;