Extract shared compaction/branch-summarization utils

- New utils.ts with shared functions: - FileOperations type and createFileOps() - extractFileOpsFromMessage() - computeFileLists() - formatFileOperations() - serializeConversation() - SUMMARIZATION_SYSTEM_PROMPT - branch-summarization.ts now uses: - Serialization approach (conversation as text, not LLM messages) - completeSimple with system prompt - Shared utility functions
2026-04-15 12:03:49 +00:00 · 2025-12-30 00:13:11 +01:00 · 2025-12-30 00:13:11 +01:00 · 81f4cdf3e3
commit 81f4cdf3e3
parent 17ce3814a8
4 changed files with 193 additions and 199 deletions
--- a/packages/coding-agent/src/core/compaction/branch-summarization.ts
+++ b/packages/coding-agent/src/core/compaction/branch-summarization.ts
@ -7,7 +7,7 @@

 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import type { Model } from "@mariozechner/pi-ai";
-import { complete } from "@mariozechner/pi-ai";
+import { completeSimple } from "@mariozechner/pi-ai";
 import {
 	convertToLlm,
 	createBranchSummaryMessage,
@ -16,6 +16,15 @@ import {
 } from "../messages.js";
 import type { ReadonlySessionManager, SessionEntry } from "../session-manager.js";
 import { estimateTokens } from "./compaction.js";
+import {
+	computeFileLists,
+	createFileOps,
+	extractFileOpsFromMessage,
+	type FileOperations,
+	formatFileOperations,
+	SUMMARIZATION_SYSTEM_PROMPT,
+	serializeConversation,
+} from "./utils.js";

 // ============================================================================
 // Types
@ -35,11 +44,7 @@ export interface BranchSummaryDetails {
 	modifiedFiles: string[];
 }

-export interface FileOperations {
-	read: Set<string>;
-	written: Set<string>;
-	edited: Set<string>;
-}
+export type { FileOperations } from "./utils.js";

 export interface BranchPreparation {
 	/** Messages extracted for summarization, in chronological order */
@ -159,38 +164,6 @@ function getMessageFromEntry(entry: SessionEntry): AgentMessage | undefined {
 	}
 }

-/**
- * Extract file operations from tool calls in an assistant message.
- */
-function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperations): void {
-	if (message.role !== "assistant") return;
-	if (!("content" in message) || !Array.isArray(message.content)) return;
-
-	for (const block of message.content) {
-		if (typeof block !== "object" || block === null) continue;
-		if (!("type" in block) || block.type !== "toolCall") continue;
-		if (!("arguments" in block) || !("name" in block)) continue;
-
-		const args = block.arguments as Record<string, unknown> | undefined;
-		if (!args) continue;
-
-		const path = typeof args.path === "string" ? args.path : undefined;
-		if (!path) continue;
-
-		switch (block.name) {
-			case "read":
-				fileOps.read.add(path);
-				break;
-			case "write":
-				fileOps.written.add(path);
-				break;
-			case "edit":
-				fileOps.edited.add(path);
-				break;
-		}
-	}
-}
-
 /**
 * Prepare entries for summarization with token budget.
 *
@ -206,11 +179,7 @@ function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperation
 */
 export function prepareBranchEntries(entries: SessionEntry[], tokenBudget: number = 0): BranchPreparation {
 	const messages: AgentMessage[] = [];
-	const fileOps: FileOperations = {
-		read: new Set(),
-		written: new Set(),
-		edited: new Set(),
-	};
+	const fileOps = createFileOps();
 	let totalTokens = 0;

 	// First pass: collect file ops from ALL entries (even if they don't fit in token budget)
@ -322,24 +291,29 @@ export async function generateBranchSummary(
 		return { summary: "No content to summarize" };
 	}

-	// Transform to LLM-compatible messages (preserves tool calls, etc.)
-	const transformedMessages = convertToLlm(messages);
+	// Transform to LLM-compatible messages, then serialize to text
+	// Serialization prevents the model from treating it as a conversation to continue
+	const llmMessages = convertToLlm(messages);
+	const conversationText = serializeConversation(llmMessages);

 	// Build prompt
 	const instructions = customInstructions || BRANCH_SUMMARY_PROMPT;
+	const promptText = `<conversation>\n${conversationText}\n</conversation>\n\n${instructions}`;

-	// Append summarization prompt as final user message
 	const summarizationMessages = [
-		...transformedMessages,
 		{
 			role: "user" as const,
-			content: [{ type: "text" as const, text: instructions }],
+			content: [{ type: "text" as const, text: promptText }],
 			timestamp: Date.now(),
 		},
 	];

 	// Call LLM for summarization
-	const response = await complete(model, { messages: summarizationMessages }, { apiKey, signal, maxTokens: 2048 });
+	const response = await completeSimple(
+		model,
+		{ systemPrompt: SUMMARIZATION_SYSTEM_PROMPT, messages: summarizationMessages },
+		{ apiKey, signal, maxTokens: 2048 },
+	);

 	// Check if aborted or errored
 	if (response.stopReason === "aborted") {
@ -357,26 +331,13 @@ export async function generateBranchSummary(
 	// Prepend preamble to provide context about the branch summary
 	summary = BRANCH_SUMMARY_PREAMBLE + summary;

-	// Compute file lists
-	const modified = new Set([...fileOps.edited, ...fileOps.written]);
-	const readOnly = [...fileOps.read].filter((f) => !modified.has(f)).sort();
-	const modifiedFiles = [...modified].sort();
-
-	// Append file lists to summary text (for LLM context and TUI display)
-	const fileSections: string[] = [];
-	if (readOnly.length > 0) {
-		fileSections.push(`<read-files>\n${readOnly.join("\n")}\n</read-files>`);
-	}
-	if (modifiedFiles.length > 0) {
-		fileSections.push(`<modified-files>\n${modifiedFiles.join("\n")}\n</modified-files>`);
-	}
-	if (fileSections.length > 0) {
-		summary += `\n\n${fileSections.join("\n\n")}`;
-	}
+	// Compute file lists and append to summary
+	const { readFiles, modifiedFiles } = computeFileLists(fileOps);
+	summary += formatFileOperations(readFiles, modifiedFiles);

 	return {
 		summary: summary || "No summary generated",
-		readFiles: readOnly,
+		readFiles,
 		modifiedFiles,
 	};
 }
--- a/packages/coding-agent/src/core/compaction/compaction.ts
+++ b/packages/coding-agent/src/core/compaction/compaction.ts
@ -6,10 +6,19 @@
 */

 import type { AgentMessage } from "@mariozechner/pi-agent-core";
-import type { AssistantMessage, Message, Model, Usage } from "@mariozechner/pi-ai";
+import type { AssistantMessage, Model, Usage } from "@mariozechner/pi-ai";
 import { complete, completeSimple } from "@mariozechner/pi-ai";
 import { convertToLlm, createBranchSummaryMessage, createHookMessage } from "../messages.js";
 import type { CompactionEntry, SessionEntry } from "../session-manager.js";
+import {
+	computeFileLists,
+	createFileOps,
+	extractFileOpsFromMessage,
+	type FileOperations,
+	formatFileOperations,
+	SUMMARIZATION_SYSTEM_PROMPT,
+	serializeConversation,
+} from "./utils.js";

 // ============================================================================
 // File Operation Tracking
@ -21,44 +30,6 @@ export interface CompactionDetails {
 	modifiedFiles: string[];
 }

-interface FileOperations {
-	read: Set<string>;
-	written: Set<string>;
-	edited: Set<string>;
-}
-
-/**
- * Extract file operations from tool calls in an assistant message.
- */
-function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperations): void {
-	if (message.role !== "assistant") return;
-	if (!("content" in message) || !Array.isArray(message.content)) return;
-
-	for (const block of message.content) {
-		if (typeof block !== "object" || block === null) continue;
-		if (!("type" in block) || block.type !== "toolCall") continue;
-		if (!("arguments" in block) || !("name" in block)) continue;
-
-		const args = block.arguments as Record<string, unknown> | undefined;
-		if (!args) continue;
-
-		const path = typeof args.path === "string" ? args.path : undefined;
-		if (!path) continue;
-
-		switch (block.name) {
-			case "read":
-				fileOps.read.add(path);
-				break;
-			case "write":
-				fileOps.written.add(path);
-				break;
-			case "edit":
-				fileOps.edited.add(path);
-				break;
-		}
-	}
-}
-
 /**
 * Extract file operations from messages and previous compaction entries.
 */
@ -67,11 +38,7 @@ function extractFileOperations(
 	entries: SessionEntry[],
 	prevCompactionIndex: number,
 ): FileOperations {
-	const fileOps: FileOperations = {
-		read: new Set(),
-		written: new Set(),
-		edited: new Set(),
-	};
+	const fileOps = createFileOps();

 	// Collect from previous compaction's details (if pi-generated)
 	if (prevCompactionIndex >= 0) {
@ -95,91 +62,6 @@ function extractFileOperations(
 	return fileOps;
 }

-/**
- * Compute final file lists from file operations.
- */
-function computeFileLists(fileOps: FileOperations): { readFiles: string[]; modifiedFiles: string[] } {
-	const modified = new Set([...fileOps.edited, ...fileOps.written]);
-	const readOnly = [...fileOps.read].filter((f) => !modified.has(f)).sort();
-	const modifiedFiles = [...modified].sort();
-	return { readFiles: readOnly, modifiedFiles };
-}
-
-/**
- * Format file operations as XML tags for summary.
- */
-function formatFileOperations(readFiles: string[], modifiedFiles: string[]): string {
-	const sections: string[] = [];
-	if (readFiles.length > 0) {
-		sections.push(`<read-files>\n${readFiles.join("\n")}\n</read-files>`);
-	}
-	if (modifiedFiles.length > 0) {
-		sections.push(`<modified-files>\n${modifiedFiles.join("\n")}\n</modified-files>`);
-	}
-	if (sections.length === 0) return "";
-	return `\n\n${sections.join("\n\n")}`;
-}
-
-/**
- * Serialize LLM messages to text for summarization.
- * This prevents the model from treating it as a conversation to continue.
- * Call convertToLlm() first to handle custom message types.
- */
-function serializeConversation(messages: Message[]): string {
-	const parts: string[] = [];
-
-	for (const msg of messages) {
-		if (msg.role === "user") {
-			const content =
-				typeof msg.content === "string"
-					? msg.content
-					: msg.content
-							.filter((c): c is { type: "text"; text: string } => c.type === "text")
-							.map((c) => c.text)
-							.join("");
-			if (content) parts.push(`[User]: ${content}`);
-		} else if (msg.role === "assistant") {
-			const textParts: string[] = [];
-			const thinkingParts: string[] = [];
-			const toolCalls: string[] = [];
-
-			for (const block of msg.content) {
-				if (block.type === "text") {
-					textParts.push(block.text);
-				} else if (block.type === "thinking") {
-					thinkingParts.push(block.thinking);
-				} else if (block.type === "toolCall") {
-					const args = block.arguments as Record<string, unknown>;
-					const argsStr = Object.entries(args)
-						.map(([k, v]) => `${k}=${JSON.stringify(v)}`)
-						.join(", ");
-					toolCalls.push(`${block.name}(${argsStr})`);
-				}
-			}
-
-			if (thinkingParts.length > 0) {
-				parts.push(`[Assistant thinking]: ${thinkingParts.join("\n")}`);
-			}
-			if (textParts.length > 0) {
-				parts.push(`[Assistant]: ${textParts.join("\n")}`);
-			}
-			if (toolCalls.length > 0) {
-				parts.push(`[Assistant tool calls]: ${toolCalls.join("; ")}`);
-			}
-		} else if (msg.role === "toolResult") {
-			const content = msg.content
-				.filter((c): c is { type: "text"; text: string } => c.type === "text")
-				.map((c) => c.text)
-				.join("");
-			if (content) {
-				parts.push(`[Tool result]: ${content}`);
-			}
-		}
-	}
-
-	return parts.join("\n\n");
-}
-
 // ============================================================================
 // Message Extraction
 // ============================================================================
@ -501,10 +383,6 @@ export function findCutPoint(
 // Summarization
 // ============================================================================

-const SUMMARIZATION_SYSTEM_PROMPT = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified.
-
-Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.`;
-
 const SUMMARIZATION_PROMPT = `The messages above are a conversation to summarize. Create a structured context checkpoint summary that another LLM will use to continue the work.

 Use this EXACT format:
--- a/packages/coding-agent/src/core/compaction/index.ts
+++ b/packages/coding-agent/src/core/compaction/index.ts
@ -4,3 +4,4 @@

 export * from "./branch-summarization.js";
 export * from "./compaction.js";
+export * from "./utils.js";
--- a/packages/coding-agent/src/core/compaction/utils.ts
+++ b/packages/coding-agent/src/core/compaction/utils.ts
@ -0,0 +1,154 @@
+/**
+ * Shared utilities for compaction and branch summarization.
+ */
+
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { Message } from "@mariozechner/pi-ai";
+
+// ============================================================================
+// File Operation Tracking
+// ============================================================================
+
+export interface FileOperations {
+	read: Set<string>;
+	written: Set<string>;
+	edited: Set<string>;
+}
+
+export function createFileOps(): FileOperations {
+	return {
+		read: new Set(),
+		written: new Set(),
+		edited: new Set(),
+	};
+}
+
+/**
+ * Extract file operations from tool calls in an assistant message.
+ */
+export function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperations): void {
+	if (message.role !== "assistant") return;
+	if (!("content" in message) || !Array.isArray(message.content)) return;
+
+	for (const block of message.content) {
+		if (typeof block !== "object" || block === null) continue;
+		if (!("type" in block) || block.type !== "toolCall") continue;
+		if (!("arguments" in block) || !("name" in block)) continue;
+
+		const args = block.arguments as Record<string, unknown> | undefined;
+		if (!args) continue;
+
+		const path = typeof args.path === "string" ? args.path : undefined;
+		if (!path) continue;
+
+		switch (block.name) {
+			case "read":
+				fileOps.read.add(path);
+				break;
+			case "write":
+				fileOps.written.add(path);
+				break;
+			case "edit":
+				fileOps.edited.add(path);
+				break;
+		}
+	}
+}
+
+/**
+ * Compute final file lists from file operations.
+ * Returns readFiles (files only read, not modified) and modifiedFiles.
+ */
+export function computeFileLists(fileOps: FileOperations): { readFiles: string[]; modifiedFiles: string[] } {
+	const modified = new Set([...fileOps.edited, ...fileOps.written]);
+	const readOnly = [...fileOps.read].filter((f) => !modified.has(f)).sort();
+	const modifiedFiles = [...modified].sort();
+	return { readFiles: readOnly, modifiedFiles };
+}
+
+/**
+ * Format file operations as XML tags for summary.
+ */
+export function formatFileOperations(readFiles: string[], modifiedFiles: string[]): string {
+	const sections: string[] = [];
+	if (readFiles.length > 0) {
+		sections.push(`<read-files>\n${readFiles.join("\n")}\n</read-files>`);
+	}
+	if (modifiedFiles.length > 0) {
+		sections.push(`<modified-files>\n${modifiedFiles.join("\n")}\n</modified-files>`);
+	}
+	if (sections.length === 0) return "";
+	return `\n\n${sections.join("\n\n")}`;
+}
+
+// ============================================================================
+// Message Serialization
+// ============================================================================
+
+/**
+ * Serialize LLM messages to text for summarization.
+ * This prevents the model from treating it as a conversation to continue.
+ * Call convertToLlm() first to handle custom message types.
+ */
+export function serializeConversation(messages: Message[]): string {
+	const parts: string[] = [];
+
+	for (const msg of messages) {
+		if (msg.role === "user") {
+			const content =
+				typeof msg.content === "string"
+					? msg.content
+					: msg.content
+							.filter((c): c is { type: "text"; text: string } => c.type === "text")
+							.map((c) => c.text)
+							.join("");
+			if (content) parts.push(`[User]: ${content}`);
+		} else if (msg.role === "assistant") {
+			const textParts: string[] = [];
+			const thinkingParts: string[] = [];
+			const toolCalls: string[] = [];
+
+			for (const block of msg.content) {
+				if (block.type === "text") {
+					textParts.push(block.text);
+				} else if (block.type === "thinking") {
+					thinkingParts.push(block.thinking);
+				} else if (block.type === "toolCall") {
+					const args = block.arguments as Record<string, unknown>;
+					const argsStr = Object.entries(args)
+						.map(([k, v]) => `${k}=${JSON.stringify(v)}`)
+						.join(", ");
+					toolCalls.push(`${block.name}(${argsStr})`);
+				}
+			}
+
+			if (thinkingParts.length > 0) {
+				parts.push(`[Assistant thinking]: ${thinkingParts.join("\n")}`);
+			}
+			if (textParts.length > 0) {
+				parts.push(`[Assistant]: ${textParts.join("\n")}`);
+			}
+			if (toolCalls.length > 0) {
+				parts.push(`[Assistant tool calls]: ${toolCalls.join("; ")}`);
+			}
+		} else if (msg.role === "toolResult") {
+			const content = msg.content
+				.filter((c): c is { type: "text"; text: string } => c.type === "text")
+				.map((c) => c.text)
+				.join("");
+			if (content) {
+				parts.push(`[Tool result]: ${content}`);
+			}
+		}
+	}
+
+	return parts.join("\n\n");
+}
+
+// ============================================================================
+// Summarization System Prompt
+// ============================================================================
+
+export const SUMMARIZATION_SYSTEM_PROMPT = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified.
+
+Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.`;