Port truncation logic from coding-agent to mom

- Add truncate.ts with 2000 lines / 50KB limits - Update bash tool with tail truncation and temp file output - Update read tool with head truncation and offset hints - Remove redundant context history truncation (tools already provide actionable hints) fixes #155
2026-04-17 08:00:59 +00:00 · 2025-12-09 16:05:08 +01:00 · 2025-12-09 16:05:08 +01:00 · 02c7f9ea51
commit 02c7f9ea51
parent de3fd172a9
5 changed files with 380 additions and 92 deletions
--- a/packages/mom/src/agent.ts
+++ b/packages/mom/src/agent.ts
@ -127,11 +127,7 @@ function getRecentMessages(channelDir: string, turnCount: number): string {
 		for (const msg of turn) {
 			const date = (msg.date || "").substring(0, 19);
 			const user = msg.userName || msg.user || "";
-			let text = msg.text || "";
-			// Truncate bot messages (tool results can be huge)
-			if (msg.isBot) {
-				text = truncateForContext(text, 50000, 2000, msg.ts);
-			}
+			const text = msg.text || "";
 			const attachments = (msg.attachments || []).map((a) => a.local).join(",");
 			formatted.push(`${date}\t${user}\t${text}\t${attachments}`);
 		}
@ -140,43 +136,6 @@ function getRecentMessages(channelDir: string, turnCount: number): string {
 	return formatted.join("\n");
 }

-/**
- * Truncate text to maxChars or maxLines, whichever comes first.
- * Adds a note with stats and instructions if truncation occurred.
- */
-function truncateForContext(text: string, maxChars: number, maxLines: number, ts?: string): string {
-	const lines = text.split("\n");
-	const originalLines = lines.length;
-	const originalChars = text.length;
-	let truncated = false;
-	let result = text;
-
-	// Check line limit first
-	if (lines.length > maxLines) {
-		result = lines.slice(0, maxLines).join("\n");
-		truncated = true;
-	}
-
-	// Check char limit
-	if (result.length > maxChars) {
-		result = result.substring(0, maxChars);
-		truncated = true;
-	}
-
-	if (truncated) {
-		const remainingLines = originalLines - result.split("\n").length;
-		const remainingChars = originalChars - result.length;
-		result += `\n[... truncated ${remainingLines} more lines, ${remainingChars} more chars. `;
-		if (ts) {
-			result += `To get full content: jq -r 'select(.ts=="${ts}") | .text' log.jsonl > /tmp/msg.txt, then read /tmp/msg.txt in segments]`;
-		} else {
-			result += `Search log.jsonl for full content]`;
-		}
-	}
-
-	return result;
-}
-
 function getMemory(channelDir: string): string {
 	const parts: string[] = [];

--- a/packages/mom/src/tools/bash.ts
+++ b/packages/mom/src/tools/bash.ts
@ -1,6 +1,19 @@
+import { randomBytes } from "node:crypto";
+import { createWriteStream } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
 import type { AgentTool } from "@mariozechner/pi-ai";
 import { Type } from "@sinclair/typebox";
 import type { Executor } from "../sandbox.js";
+import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateTail } from "./truncate.js";
+
+/**
+ * Generate a unique temp file path for bash output
+ */
+function getTempFilePath(): string {
+	const id = randomBytes(8).toString("hex");
+	return join(tmpdir(), `mom-bash-${id}.log`);
+}

 const bashSchema = Type.Object({
 	label: Type.String({ description: "Brief description of what this command does (shown to user)" }),
@ -8,18 +21,26 @@ const bashSchema = Type.Object({
 	timeout: Type.Optional(Type.Number({ description: "Timeout in seconds (optional, no default timeout)" })),
 });

+interface BashToolDetails {
+	truncation?: TruncationResult;
+	fullOutputPath?: string;
+}
+
 export function createBashTool(executor: Executor): AgentTool<typeof bashSchema> {
 	return {
 		name: "bash",
 		label: "bash",
-		description:
-			"Execute a bash command in the current working directory. Returns stdout and stderr. Optionally provide a timeout in seconds.",
+		description: `Execute a bash command in the current working directory. Returns stdout and stderr. Output is truncated to last ${DEFAULT_MAX_LINES} lines or ${DEFAULT_MAX_BYTES / 1024}KB (whichever is hit first). If truncated, full output is saved to a temp file. Optionally provide a timeout in seconds.`,
 		parameters: bashSchema,
 		execute: async (
 			_toolCallId: string,
 			{ command, timeout }: { label: string; command: string; timeout?: number },
 			signal?: AbortSignal,
 		) => {
+			// Track output for potential temp file writing
+			let tempFilePath: string | undefined;
+			let tempFileStream: ReturnType<typeof createWriteStream> | undefined;
+
 			const result = await executor.exec(command, { timeout, signal });
 			let output = "";
 			if (result.stdout) output += result.stdout;
@ -28,11 +49,49 @@ export function createBashTool(executor: Executor): AgentTool<typeof bashSchema>
 				output += result.stderr;
 			}

-			if (result.code !== 0) {
-				throw new Error(`${output}\n\nCommand exited with code ${result.code}`.trim());
+			const totalBytes = Buffer.byteLength(output, "utf-8");
+
+			// Write to temp file if output exceeds limit
+			if (totalBytes > DEFAULT_MAX_BYTES) {
+				tempFilePath = getTempFilePath();
+				tempFileStream = createWriteStream(tempFilePath);
+				tempFileStream.write(output);
+				tempFileStream.end();
 			}

-			return { content: [{ type: "text", text: output || "(no output)" }], details: undefined };
+			// Apply tail truncation
+			const truncation = truncateTail(output);
+			let outputText = truncation.content || "(no output)";
+
+			// Build details with truncation info
+			let details: BashToolDetails | undefined;
+
+			if (truncation.truncated) {
+				details = {
+					truncation,
+					fullOutputPath: tempFilePath,
+				};
+
+				// Build actionable notice
+				const startLine = truncation.totalLines - truncation.outputLines + 1;
+				const endLine = truncation.totalLines;
+
+				if (truncation.lastLinePartial) {
+					// Edge case: last line alone > 50KB
+					const lastLineSize = formatSize(Buffer.byteLength(output.split("\n").pop() || "", "utf-8"));
+					outputText += `\n\n[Showing last ${formatSize(truncation.outputBytes)} of line ${endLine} (line is ${lastLineSize}). Full output: ${tempFilePath}]`;
+				} else if (truncation.truncatedBy === "lines") {
+					outputText += `\n\n[Showing lines ${startLine}-${endLine} of ${truncation.totalLines}. Full output: ${tempFilePath}]`;
+				} else {
+					outputText += `\n\n[Showing lines ${startLine}-${endLine} of ${truncation.totalLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Full output: ${tempFilePath}]`;
+				}
+			}
+
+			if (result.code !== 0) {
+				throw new Error(`${outputText}\n\nCommand exited with code ${result.code}`.trim());
+			}
+
+			return { content: [{ type: "text", text: outputText }], details };
 		},
 	};
 }
--- a/packages/mom/src/tools/read.ts
+++ b/packages/mom/src/tools/read.ts
@ -2,6 +2,7 @@ import type { AgentTool, ImageContent, TextContent } from "@mariozechner/pi-ai";
 import { Type } from "@sinclair/typebox";
 import { extname } from "path";
 import type { Executor } from "../sandbox.js";
+import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateHead } from "./truncate.js";

 /**
 * Map of file extensions to MIME types for common image formats
@ -29,21 +30,21 @@ const readSchema = Type.Object({
 	limit: Type.Optional(Type.Number({ description: "Maximum number of lines to read" })),
 });

-const MAX_LINES = 2000;
-const MAX_LINE_LENGTH = 2000;
+interface ReadToolDetails {
+	truncation?: TruncationResult;
+}

 export function createReadTool(executor: Executor): AgentTool<typeof readSchema> {
 	return {
 		name: "read",
 		label: "read",
-		description:
-			"Read the contents of a file. Supports text files and images (jpg, png, gif, webp). Images are sent as attachments. For text files, defaults to first 2000 lines. Use offset/limit for large files.",
+		description: `Read the contents of a file. Supports text files and images (jpg, png, gif, webp). Images are sent as attachments. For text files, output is truncated to ${DEFAULT_MAX_LINES} lines or ${DEFAULT_MAX_BYTES / 1024}KB (whichever is hit first). Use offset/limit for large files.`,
 		parameters: readSchema,
 		execute: async (
 			_toolCallId: string,
 			{ path, offset, limit }: { label: string; path: string; offset?: number; limit?: number },
 			signal?: AbortSignal,
-		) => {
+		): Promise<{ content: (TextContent | ImageContent)[]; details: ReadToolDetails | undefined }> => {
 			const mimeType = isImageFile(path);

 			if (mimeType) {
@ -58,65 +59,95 @@ export function createReadTool(executor: Executor): AgentTool<typeof readSchema>
 					content: [
 						{ type: "text", text: `Read image file [${mimeType}]` },
 						{ type: "image", data: base64, mimeType },
-					] as (TextContent | ImageContent)[],
+					],
 					details: undefined,
 				};
 			}

-			// Read as text using cat with offset/limit via sed/head/tail
-			let cmd: string;
-			const startLine = offset ? Math.max(1, offset) : 1;
-			const maxLines = limit || MAX_LINES;
+			// Get total line count first
+			const countResult = await executor.exec(`wc -l < ${shellEscape(path)}`, { signal });
+			if (countResult.code !== 0) {
+				throw new Error(countResult.stderr || `Failed to read file: ${path}`);
+			}
+			const totalFileLines = Number.parseInt(countResult.stdout.trim(), 10) + 1; // wc -l counts newlines, not lines

-			if (startLine === 1) {
-				cmd = `head -n ${maxLines} ${shellEscape(path)}`;
-			} else {
-				cmd = `sed -n '${startLine},${startLine + maxLines - 1}p' ${shellEscape(path)}`;
+			// Apply offset if specified (1-indexed)
+			const startLine = offset ? Math.max(1, offset) : 1;
+			const startLineDisplay = startLine;
+
+			// Check if offset is out of bounds
+			if (startLine > totalFileLines) {
+				throw new Error(`Offset ${offset} is beyond end of file (${totalFileLines} lines total)`);
 			}

-			// Also get total line count
-			const countResult = await executor.exec(`wc -l < ${shellEscape(path)}`, { signal });
-			const totalLines = Number.parseInt(countResult.stdout.trim(), 10) || 0;
+			// Read content with offset
+			let cmd: string;
+			if (startLine === 1) {
+				cmd = `cat ${shellEscape(path)}`;
+			} else {
+				cmd = `tail -n +${startLine} ${shellEscape(path)}`;
+			}

 			const result = await executor.exec(cmd, { signal });
 			if (result.code !== 0) {
 				throw new Error(result.stderr || `Failed to read file: ${path}`);
 			}

-			const lines = result.stdout.split("\n");
+			let selectedContent = result.stdout;
+			let userLimitedLines: number | undefined;

-			// Truncate long lines
-			let hadTruncatedLines = false;
-			const formattedLines = lines.map((line) => {
-				if (line.length > MAX_LINE_LENGTH) {
-					hadTruncatedLines = true;
-					return line.slice(0, MAX_LINE_LENGTH);
+			// Apply user limit if specified
+			if (limit !== undefined) {
+				const lines = selectedContent.split("\n");
+				const endLine = Math.min(limit, lines.length);
+				selectedContent = lines.slice(0, endLine).join("\n");
+				userLimitedLines = endLine;
+			}
+
+			// Apply truncation (respects both line and byte limits)
+			const truncation = truncateHead(selectedContent);
+
+			let outputText: string;
+			let details: ReadToolDetails | undefined;
+
+			if (truncation.firstLineExceedsLimit) {
+				// First line at offset exceeds 50KB - tell model to use bash
+				const firstLineSize = formatSize(Buffer.byteLength(selectedContent.split("\n")[0], "utf-8"));
+				outputText = `[Line ${startLineDisplay} is ${firstLineSize}, exceeds ${formatSize(DEFAULT_MAX_BYTES)} limit. Use bash: sed -n '${startLineDisplay}p' ${path} | head -c ${DEFAULT_MAX_BYTES}]`;
+				details = { truncation };
+			} else if (truncation.truncated) {
+				// Truncation occurred - build actionable notice
+				const endLineDisplay = startLineDisplay + truncation.outputLines - 1;
+				const nextOffset = endLineDisplay + 1;
+
+				outputText = truncation.content;
+
+				if (truncation.truncatedBy === "lines") {
+					outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines}. Use offset=${nextOffset} to continue]`;
+				} else {
+					outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Use offset=${nextOffset} to continue]`;
 				}
-				return line;
-			});
+				details = { truncation };
+			} else if (userLimitedLines !== undefined) {
+				// User specified limit, check if there's more content
+				const linesFromStart = startLine - 1 + userLimitedLines;
+				if (linesFromStart < totalFileLines) {
+					const remaining = totalFileLines - linesFromStart;
+					const nextOffset = startLine + userLimitedLines;

-			let outputText = formattedLines.join("\n");
-
-			// Add notices
-			const notices: string[] = [];
-			const endLine = startLine + lines.length - 1;
-
-			if (hadTruncatedLines) {
-				notices.push(`Some lines were truncated to ${MAX_LINE_LENGTH} characters for display`);
-			}
-
-			if (endLine < totalLines) {
-				const remaining = totalLines - endLine;
-				notices.push(`${remaining} more lines not shown. Use offset=${endLine + 1} to continue reading`);
-			}
-
-			if (notices.length > 0) {
-				outputText += `\n\n... (${notices.join(". ")})`;
+					outputText = truncation.content;
+					outputText += `\n\n[${remaining} more lines in file. Use offset=${nextOffset} to continue]`;
+				} else {
+					outputText = truncation.content;
+				}
+			} else {
+				// No truncation, no user limit exceeded
+				outputText = truncation.content;
 			}

 			return {
-				content: [{ type: "text", text: outputText }] as (TextContent | ImageContent)[],
-				details: undefined,
+				content: [{ type: "text", text: outputText }],
+				details,
 			};
 		},
 	};
--- a/packages/mom/src/tools/truncate.ts
+++ b/packages/mom/src/tools/truncate.ts
@ -0,0 +1,236 @@
+/**
+ * Shared truncation utilities for tool outputs.
+ *
+ * Truncation is based on two independent limits - whichever is hit first wins:
+ * - Line limit (default: 2000 lines)
+ * - Byte limit (default: 50KB)
+ *
+ * Never returns partial lines (except bash tail truncation edge case).
+ */
+
+export const DEFAULT_MAX_LINES = 2000;
+export const DEFAULT_MAX_BYTES = 50 * 1024; // 50KB
+
+export interface TruncationResult {
+	/** The truncated content */
+	content: string;
+	/** Whether truncation occurred */
+	truncated: boolean;
+	/** Which limit was hit: "lines", "bytes", or null if not truncated */
+	truncatedBy: "lines" | "bytes" | null;
+	/** Total number of lines in the original content */
+	totalLines: number;
+	/** Total number of bytes in the original content */
+	totalBytes: number;
+	/** Number of complete lines in the truncated output */
+	outputLines: number;
+	/** Number of bytes in the truncated output */
+	outputBytes: number;
+	/** Whether the last line was partially truncated (only for tail truncation edge case) */
+	lastLinePartial: boolean;
+	/** Whether the first line exceeded the byte limit (for head truncation) */
+	firstLineExceedsLimit: boolean;
+}
+
+export interface TruncationOptions {
+	/** Maximum number of lines (default: 2000) */
+	maxLines?: number;
+	/** Maximum number of bytes (default: 50KB) */
+	maxBytes?: number;
+}
+
+/**
+ * Format bytes as human-readable size.
+ */
+export function formatSize(bytes: number): string {
+	if (bytes < 1024) {
+		return `${bytes}B`;
+	} else if (bytes < 1024 * 1024) {
+		return `${(bytes / 1024).toFixed(1)}KB`;
+	} else {
+		return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
+	}
+}
+
+/**
+ * Truncate content from the head (keep first N lines/bytes).
+ * Suitable for file reads where you want to see the beginning.
+ *
+ * Never returns partial lines. If first line exceeds byte limit,
+ * returns empty content with firstLineExceedsLimit=true.
+ */
+export function truncateHead(content: string, options: TruncationOptions = {}): TruncationResult {
+	const maxLines = options.maxLines ?? DEFAULT_MAX_LINES;
+	const maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES;
+
+	const totalBytes = Buffer.byteLength(content, "utf-8");
+	const lines = content.split("\n");
+	const totalLines = lines.length;
+
+	// Check if no truncation needed
+	if (totalLines <= maxLines && totalBytes <= maxBytes) {
+		return {
+			content,
+			truncated: false,
+			truncatedBy: null,
+			totalLines,
+			totalBytes,
+			outputLines: totalLines,
+			outputBytes: totalBytes,
+			lastLinePartial: false,
+			firstLineExceedsLimit: false,
+		};
+	}
+
+	// Check if first line alone exceeds byte limit
+	const firstLineBytes = Buffer.byteLength(lines[0], "utf-8");
+	if (firstLineBytes > maxBytes) {
+		return {
+			content: "",
+			truncated: true,
+			truncatedBy: "bytes",
+			totalLines,
+			totalBytes,
+			outputLines: 0,
+			outputBytes: 0,
+			lastLinePartial: false,
+			firstLineExceedsLimit: true,
+		};
+	}
+
+	// Collect complete lines that fit
+	const outputLinesArr: string[] = [];
+	let outputBytesCount = 0;
+	let truncatedBy: "lines" | "bytes" = "lines";
+
+	for (let i = 0; i < lines.length && i < maxLines; i++) {
+		const line = lines[i];
+		const lineBytes = Buffer.byteLength(line, "utf-8") + (i > 0 ? 1 : 0); // +1 for newline
+
+		if (outputBytesCount + lineBytes > maxBytes) {
+			truncatedBy = "bytes";
+			break;
+		}
+
+		outputLinesArr.push(line);
+		outputBytesCount += lineBytes;
+	}
+
+	// If we exited due to line limit
+	if (outputLinesArr.length >= maxLines && outputBytesCount <= maxBytes) {
+		truncatedBy = "lines";
+	}
+
+	const outputContent = outputLinesArr.join("\n");
+	const finalOutputBytes = Buffer.byteLength(outputContent, "utf-8");
+
+	return {
+		content: outputContent,
+		truncated: true,
+		truncatedBy,
+		totalLines,
+		totalBytes,
+		outputLines: outputLinesArr.length,
+		outputBytes: finalOutputBytes,
+		lastLinePartial: false,
+		firstLineExceedsLimit: false,
+	};
+}
+
+/**
+ * Truncate content from the tail (keep last N lines/bytes).
+ * Suitable for bash output where you want to see the end (errors, final results).
+ *
+ * May return partial first line if the last line of original content exceeds byte limit.
+ */
+export function truncateTail(content: string, options: TruncationOptions = {}): TruncationResult {
+	const maxLines = options.maxLines ?? DEFAULT_MAX_LINES;
+	const maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES;
+
+	const totalBytes = Buffer.byteLength(content, "utf-8");
+	const lines = content.split("\n");
+	const totalLines = lines.length;
+
+	// Check if no truncation needed
+	if (totalLines <= maxLines && totalBytes <= maxBytes) {
+		return {
+			content,
+			truncated: false,
+			truncatedBy: null,
+			totalLines,
+			totalBytes,
+			outputLines: totalLines,
+			outputBytes: totalBytes,
+			lastLinePartial: false,
+			firstLineExceedsLimit: false,
+		};
+	}
+
+	// Work backwards from the end
+	const outputLinesArr: string[] = [];
+	let outputBytesCount = 0;
+	let truncatedBy: "lines" | "bytes" = "lines";
+	let lastLinePartial = false;
+
+	for (let i = lines.length - 1; i >= 0 && outputLinesArr.length < maxLines; i--) {
+		const line = lines[i];
+		const lineBytes = Buffer.byteLength(line, "utf-8") + (outputLinesArr.length > 0 ? 1 : 0); // +1 for newline
+
+		if (outputBytesCount + lineBytes > maxBytes) {
+			truncatedBy = "bytes";
+			// Edge case: if we haven't added ANY lines yet and this line exceeds maxBytes,
+			// take the end of the line (partial)
+			if (outputLinesArr.length === 0) {
+				const truncatedLine = truncateStringToBytesFromEnd(line, maxBytes);
+				outputLinesArr.unshift(truncatedLine);
+				outputBytesCount = Buffer.byteLength(truncatedLine, "utf-8");
+				lastLinePartial = true;
+			}
+			break;
+		}
+
+		outputLinesArr.unshift(line);
+		outputBytesCount += lineBytes;
+	}
+
+	// If we exited due to line limit
+	if (outputLinesArr.length >= maxLines && outputBytesCount <= maxBytes) {
+		truncatedBy = "lines";
+	}
+
+	const outputContent = outputLinesArr.join("\n");
+	const finalOutputBytes = Buffer.byteLength(outputContent, "utf-8");
+
+	return {
+		content: outputContent,
+		truncated: true,
+		truncatedBy,
+		totalLines,
+		totalBytes,
+		outputLines: outputLinesArr.length,
+		outputBytes: finalOutputBytes,
+		lastLinePartial,
+		firstLineExceedsLimit: false,
+	};
+}
+
+/**
+ * Truncate a string to fit within a byte limit (from the end).
+ * Handles multi-byte UTF-8 characters correctly.
+ */
+function truncateStringToBytesFromEnd(str: string, maxBytes: number): string {
+	const buf = Buffer.from(str, "utf-8");
+	if (buf.length <= maxBytes) {
+		return str;
+	}
+
+	// Start from the end, skip maxBytes back
+	let start = buf.length - maxBytes;
+
+	// Find a valid UTF-8 boundary (start of a character)
+	while (start < buf.length && (buf[start] & 0xc0) === 0x80) {
+		start++;
+	}
+
+	return buf.slice(start).toString("utf-8");
+}