diff --git a/packages/mom/CHANGELOG.md b/packages/mom/CHANGELOG.md index e71f99aa..0b7ad116 100644 --- a/packages/mom/CHANGELOG.md +++ b/packages/mom/CHANGELOG.md @@ -18,6 +18,9 @@ ### Added +- Port truncation logic from coding-agent: bash and read tools now use consistent 2000 lines OR 50KB limits with actionable notices +- Remove redundant context history truncation (tools already provide truncation with actionable hints) + - Message backfill on startup (#103) - Fetches missed messages from Slack using `conversations.history` API when mom restarts - Backfills up to 3 pages (3000 messages) per channel since last logged timestamp diff --git a/packages/mom/src/agent.ts b/packages/mom/src/agent.ts index fa68c991..38475970 100644 --- a/packages/mom/src/agent.ts +++ b/packages/mom/src/agent.ts @@ -127,11 +127,7 @@ function getRecentMessages(channelDir: string, turnCount: number): string { for (const msg of turn) { const date = (msg.date || "").substring(0, 19); const user = msg.userName || msg.user || ""; - let text = msg.text || ""; - // Truncate bot messages (tool results can be huge) - if (msg.isBot) { - text = truncateForContext(text, 50000, 2000, msg.ts); - } + const text = msg.text || ""; const attachments = (msg.attachments || []).map((a) => a.local).join(","); formatted.push(`${date}\t${user}\t${text}\t${attachments}`); } @@ -140,43 +136,6 @@ function getRecentMessages(channelDir: string, turnCount: number): string { return formatted.join("\n"); } -/** - * Truncate text to maxChars or maxLines, whichever comes first. - * Adds a note with stats and instructions if truncation occurred. - */ -function truncateForContext(text: string, maxChars: number, maxLines: number, ts?: string): string { - const lines = text.split("\n"); - const originalLines = lines.length; - const originalChars = text.length; - let truncated = false; - let result = text; - - // Check line limit first - if (lines.length > maxLines) { - result = lines.slice(0, maxLines).join("\n"); - truncated = true; - } - - // Check char limit - if (result.length > maxChars) { - result = result.substring(0, maxChars); - truncated = true; - } - - if (truncated) { - const remainingLines = originalLines - result.split("\n").length; - const remainingChars = originalChars - result.length; - result += `\n[... truncated ${remainingLines} more lines, ${remainingChars} more chars. `; - if (ts) { - result += `To get full content: jq -r 'select(.ts=="${ts}") | .text' log.jsonl > /tmp/msg.txt, then read /tmp/msg.txt in segments]`; - } else { - result += `Search log.jsonl for full content]`; - } - } - - return result; -} - function getMemory(channelDir: string): string { const parts: string[] = []; diff --git a/packages/mom/src/tools/bash.ts b/packages/mom/src/tools/bash.ts index 3a88e03a..dbda5e43 100644 --- a/packages/mom/src/tools/bash.ts +++ b/packages/mom/src/tools/bash.ts @@ -1,6 +1,19 @@ +import { randomBytes } from "node:crypto"; +import { createWriteStream } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; import type { AgentTool } from "@mariozechner/pi-ai"; import { Type } from "@sinclair/typebox"; import type { Executor } from "../sandbox.js"; +import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateTail } from "./truncate.js"; + +/** + * Generate a unique temp file path for bash output + */ +function getTempFilePath(): string { + const id = randomBytes(8).toString("hex"); + return join(tmpdir(), `mom-bash-${id}.log`); +} const bashSchema = Type.Object({ label: Type.String({ description: "Brief description of what this command does (shown to user)" }), @@ -8,18 +21,26 @@ const bashSchema = Type.Object({ timeout: Type.Optional(Type.Number({ description: "Timeout in seconds (optional, no default timeout)" })), }); +interface BashToolDetails { + truncation?: TruncationResult; + fullOutputPath?: string; +} + export function createBashTool(executor: Executor): AgentTool { return { name: "bash", label: "bash", - description: - "Execute a bash command in the current working directory. Returns stdout and stderr. Optionally provide a timeout in seconds.", + description: `Execute a bash command in the current working directory. Returns stdout and stderr. Output is truncated to last ${DEFAULT_MAX_LINES} lines or ${DEFAULT_MAX_BYTES / 1024}KB (whichever is hit first). If truncated, full output is saved to a temp file. Optionally provide a timeout in seconds.`, parameters: bashSchema, execute: async ( _toolCallId: string, { command, timeout }: { label: string; command: string; timeout?: number }, signal?: AbortSignal, ) => { + // Track output for potential temp file writing + let tempFilePath: string | undefined; + let tempFileStream: ReturnType | undefined; + const result = await executor.exec(command, { timeout, signal }); let output = ""; if (result.stdout) output += result.stdout; @@ -28,11 +49,49 @@ export function createBashTool(executor: Executor): AgentTool output += result.stderr; } - if (result.code !== 0) { - throw new Error(`${output}\n\nCommand exited with code ${result.code}`.trim()); + const totalBytes = Buffer.byteLength(output, "utf-8"); + + // Write to temp file if output exceeds limit + if (totalBytes > DEFAULT_MAX_BYTES) { + tempFilePath = getTempFilePath(); + tempFileStream = createWriteStream(tempFilePath); + tempFileStream.write(output); + tempFileStream.end(); } - return { content: [{ type: "text", text: output || "(no output)" }], details: undefined }; + // Apply tail truncation + const truncation = truncateTail(output); + let outputText = truncation.content || "(no output)"; + + // Build details with truncation info + let details: BashToolDetails | undefined; + + if (truncation.truncated) { + details = { + truncation, + fullOutputPath: tempFilePath, + }; + + // Build actionable notice + const startLine = truncation.totalLines - truncation.outputLines + 1; + const endLine = truncation.totalLines; + + if (truncation.lastLinePartial) { + // Edge case: last line alone > 50KB + const lastLineSize = formatSize(Buffer.byteLength(output.split("\n").pop() || "", "utf-8")); + outputText += `\n\n[Showing last ${formatSize(truncation.outputBytes)} of line ${endLine} (line is ${lastLineSize}). Full output: ${tempFilePath}]`; + } else if (truncation.truncatedBy === "lines") { + outputText += `\n\n[Showing lines ${startLine}-${endLine} of ${truncation.totalLines}. Full output: ${tempFilePath}]`; + } else { + outputText += `\n\n[Showing lines ${startLine}-${endLine} of ${truncation.totalLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Full output: ${tempFilePath}]`; + } + } + + if (result.code !== 0) { + throw new Error(`${outputText}\n\nCommand exited with code ${result.code}`.trim()); + } + + return { content: [{ type: "text", text: outputText }], details }; }, }; } diff --git a/packages/mom/src/tools/read.ts b/packages/mom/src/tools/read.ts index d59f9d6d..db36d615 100644 --- a/packages/mom/src/tools/read.ts +++ b/packages/mom/src/tools/read.ts @@ -2,6 +2,7 @@ import type { AgentTool, ImageContent, TextContent } from "@mariozechner/pi-ai"; import { Type } from "@sinclair/typebox"; import { extname } from "path"; import type { Executor } from "../sandbox.js"; +import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize, type TruncationResult, truncateHead } from "./truncate.js"; /** * Map of file extensions to MIME types for common image formats @@ -29,21 +30,21 @@ const readSchema = Type.Object({ limit: Type.Optional(Type.Number({ description: "Maximum number of lines to read" })), }); -const MAX_LINES = 2000; -const MAX_LINE_LENGTH = 2000; +interface ReadToolDetails { + truncation?: TruncationResult; +} export function createReadTool(executor: Executor): AgentTool { return { name: "read", label: "read", - description: - "Read the contents of a file. Supports text files and images (jpg, png, gif, webp). Images are sent as attachments. For text files, defaults to first 2000 lines. Use offset/limit for large files.", + description: `Read the contents of a file. Supports text files and images (jpg, png, gif, webp). Images are sent as attachments. For text files, output is truncated to ${DEFAULT_MAX_LINES} lines or ${DEFAULT_MAX_BYTES / 1024}KB (whichever is hit first). Use offset/limit for large files.`, parameters: readSchema, execute: async ( _toolCallId: string, { path, offset, limit }: { label: string; path: string; offset?: number; limit?: number }, signal?: AbortSignal, - ) => { + ): Promise<{ content: (TextContent | ImageContent)[]; details: ReadToolDetails | undefined }> => { const mimeType = isImageFile(path); if (mimeType) { @@ -58,65 +59,95 @@ export function createReadTool(executor: Executor): AgentTool content: [ { type: "text", text: `Read image file [${mimeType}]` }, { type: "image", data: base64, mimeType }, - ] as (TextContent | ImageContent)[], + ], details: undefined, }; } - // Read as text using cat with offset/limit via sed/head/tail - let cmd: string; - const startLine = offset ? Math.max(1, offset) : 1; - const maxLines = limit || MAX_LINES; + // Get total line count first + const countResult = await executor.exec(`wc -l < ${shellEscape(path)}`, { signal }); + if (countResult.code !== 0) { + throw new Error(countResult.stderr || `Failed to read file: ${path}`); + } + const totalFileLines = Number.parseInt(countResult.stdout.trim(), 10) + 1; // wc -l counts newlines, not lines - if (startLine === 1) { - cmd = `head -n ${maxLines} ${shellEscape(path)}`; - } else { - cmd = `sed -n '${startLine},${startLine + maxLines - 1}p' ${shellEscape(path)}`; + // Apply offset if specified (1-indexed) + const startLine = offset ? Math.max(1, offset) : 1; + const startLineDisplay = startLine; + + // Check if offset is out of bounds + if (startLine > totalFileLines) { + throw new Error(`Offset ${offset} is beyond end of file (${totalFileLines} lines total)`); } - // Also get total line count - const countResult = await executor.exec(`wc -l < ${shellEscape(path)}`, { signal }); - const totalLines = Number.parseInt(countResult.stdout.trim(), 10) || 0; + // Read content with offset + let cmd: string; + if (startLine === 1) { + cmd = `cat ${shellEscape(path)}`; + } else { + cmd = `tail -n +${startLine} ${shellEscape(path)}`; + } const result = await executor.exec(cmd, { signal }); if (result.code !== 0) { throw new Error(result.stderr || `Failed to read file: ${path}`); } - const lines = result.stdout.split("\n"); + let selectedContent = result.stdout; + let userLimitedLines: number | undefined; - // Truncate long lines - let hadTruncatedLines = false; - const formattedLines = lines.map((line) => { - if (line.length > MAX_LINE_LENGTH) { - hadTruncatedLines = true; - return line.slice(0, MAX_LINE_LENGTH); + // Apply user limit if specified + if (limit !== undefined) { + const lines = selectedContent.split("\n"); + const endLine = Math.min(limit, lines.length); + selectedContent = lines.slice(0, endLine).join("\n"); + userLimitedLines = endLine; + } + + // Apply truncation (respects both line and byte limits) + const truncation = truncateHead(selectedContent); + + let outputText: string; + let details: ReadToolDetails | undefined; + + if (truncation.firstLineExceedsLimit) { + // First line at offset exceeds 50KB - tell model to use bash + const firstLineSize = formatSize(Buffer.byteLength(selectedContent.split("\n")[0], "utf-8")); + outputText = `[Line ${startLineDisplay} is ${firstLineSize}, exceeds ${formatSize(DEFAULT_MAX_BYTES)} limit. Use bash: sed -n '${startLineDisplay}p' ${path} | head -c ${DEFAULT_MAX_BYTES}]`; + details = { truncation }; + } else if (truncation.truncated) { + // Truncation occurred - build actionable notice + const endLineDisplay = startLineDisplay + truncation.outputLines - 1; + const nextOffset = endLineDisplay + 1; + + outputText = truncation.content; + + if (truncation.truncatedBy === "lines") { + outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines}. Use offset=${nextOffset} to continue]`; + } else { + outputText += `\n\n[Showing lines ${startLineDisplay}-${endLineDisplay} of ${totalFileLines} (${formatSize(DEFAULT_MAX_BYTES)} limit). Use offset=${nextOffset} to continue]`; } - return line; - }); + details = { truncation }; + } else if (userLimitedLines !== undefined) { + // User specified limit, check if there's more content + const linesFromStart = startLine - 1 + userLimitedLines; + if (linesFromStart < totalFileLines) { + const remaining = totalFileLines - linesFromStart; + const nextOffset = startLine + userLimitedLines; - let outputText = formattedLines.join("\n"); - - // Add notices - const notices: string[] = []; - const endLine = startLine + lines.length - 1; - - if (hadTruncatedLines) { - notices.push(`Some lines were truncated to ${MAX_LINE_LENGTH} characters for display`); - } - - if (endLine < totalLines) { - const remaining = totalLines - endLine; - notices.push(`${remaining} more lines not shown. Use offset=${endLine + 1} to continue reading`); - } - - if (notices.length > 0) { - outputText += `\n\n... (${notices.join(". ")})`; + outputText = truncation.content; + outputText += `\n\n[${remaining} more lines in file. Use offset=${nextOffset} to continue]`; + } else { + outputText = truncation.content; + } + } else { + // No truncation, no user limit exceeded + outputText = truncation.content; } return { - content: [{ type: "text", text: outputText }] as (TextContent | ImageContent)[], - details: undefined, + content: [{ type: "text", text: outputText }], + details, }; }, }; diff --git a/packages/mom/src/tools/truncate.ts b/packages/mom/src/tools/truncate.ts new file mode 100644 index 00000000..0eff9a0b --- /dev/null +++ b/packages/mom/src/tools/truncate.ts @@ -0,0 +1,236 @@ +/** + * Shared truncation utilities for tool outputs. + * + * Truncation is based on two independent limits - whichever is hit first wins: + * - Line limit (default: 2000 lines) + * - Byte limit (default: 50KB) + * + * Never returns partial lines (except bash tail truncation edge case). + */ + +export const DEFAULT_MAX_LINES = 2000; +export const DEFAULT_MAX_BYTES = 50 * 1024; // 50KB + +export interface TruncationResult { + /** The truncated content */ + content: string; + /** Whether truncation occurred */ + truncated: boolean; + /** Which limit was hit: "lines", "bytes", or null if not truncated */ + truncatedBy: "lines" | "bytes" | null; + /** Total number of lines in the original content */ + totalLines: number; + /** Total number of bytes in the original content */ + totalBytes: number; + /** Number of complete lines in the truncated output */ + outputLines: number; + /** Number of bytes in the truncated output */ + outputBytes: number; + /** Whether the last line was partially truncated (only for tail truncation edge case) */ + lastLinePartial: boolean; + /** Whether the first line exceeded the byte limit (for head truncation) */ + firstLineExceedsLimit: boolean; +} + +export interface TruncationOptions { + /** Maximum number of lines (default: 2000) */ + maxLines?: number; + /** Maximum number of bytes (default: 50KB) */ + maxBytes?: number; +} + +/** + * Format bytes as human-readable size. + */ +export function formatSize(bytes: number): string { + if (bytes < 1024) { + return `${bytes}B`; + } else if (bytes < 1024 * 1024) { + return `${(bytes / 1024).toFixed(1)}KB`; + } else { + return `${(bytes / (1024 * 1024)).toFixed(1)}MB`; + } +} + +/** + * Truncate content from the head (keep first N lines/bytes). + * Suitable for file reads where you want to see the beginning. + * + * Never returns partial lines. If first line exceeds byte limit, + * returns empty content with firstLineExceedsLimit=true. + */ +export function truncateHead(content: string, options: TruncationOptions = {}): TruncationResult { + const maxLines = options.maxLines ?? DEFAULT_MAX_LINES; + const maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES; + + const totalBytes = Buffer.byteLength(content, "utf-8"); + const lines = content.split("\n"); + const totalLines = lines.length; + + // Check if no truncation needed + if (totalLines <= maxLines && totalBytes <= maxBytes) { + return { + content, + truncated: false, + truncatedBy: null, + totalLines, + totalBytes, + outputLines: totalLines, + outputBytes: totalBytes, + lastLinePartial: false, + firstLineExceedsLimit: false, + }; + } + + // Check if first line alone exceeds byte limit + const firstLineBytes = Buffer.byteLength(lines[0], "utf-8"); + if (firstLineBytes > maxBytes) { + return { + content: "", + truncated: true, + truncatedBy: "bytes", + totalLines, + totalBytes, + outputLines: 0, + outputBytes: 0, + lastLinePartial: false, + firstLineExceedsLimit: true, + }; + } + + // Collect complete lines that fit + const outputLinesArr: string[] = []; + let outputBytesCount = 0; + let truncatedBy: "lines" | "bytes" = "lines"; + + for (let i = 0; i < lines.length && i < maxLines; i++) { + const line = lines[i]; + const lineBytes = Buffer.byteLength(line, "utf-8") + (i > 0 ? 1 : 0); // +1 for newline + + if (outputBytesCount + lineBytes > maxBytes) { + truncatedBy = "bytes"; + break; + } + + outputLinesArr.push(line); + outputBytesCount += lineBytes; + } + + // If we exited due to line limit + if (outputLinesArr.length >= maxLines && outputBytesCount <= maxBytes) { + truncatedBy = "lines"; + } + + const outputContent = outputLinesArr.join("\n"); + const finalOutputBytes = Buffer.byteLength(outputContent, "utf-8"); + + return { + content: outputContent, + truncated: true, + truncatedBy, + totalLines, + totalBytes, + outputLines: outputLinesArr.length, + outputBytes: finalOutputBytes, + lastLinePartial: false, + firstLineExceedsLimit: false, + }; +} + +/** + * Truncate content from the tail (keep last N lines/bytes). + * Suitable for bash output where you want to see the end (errors, final results). + * + * May return partial first line if the last line of original content exceeds byte limit. + */ +export function truncateTail(content: string, options: TruncationOptions = {}): TruncationResult { + const maxLines = options.maxLines ?? DEFAULT_MAX_LINES; + const maxBytes = options.maxBytes ?? DEFAULT_MAX_BYTES; + + const totalBytes = Buffer.byteLength(content, "utf-8"); + const lines = content.split("\n"); + const totalLines = lines.length; + + // Check if no truncation needed + if (totalLines <= maxLines && totalBytes <= maxBytes) { + return { + content, + truncated: false, + truncatedBy: null, + totalLines, + totalBytes, + outputLines: totalLines, + outputBytes: totalBytes, + lastLinePartial: false, + firstLineExceedsLimit: false, + }; + } + + // Work backwards from the end + const outputLinesArr: string[] = []; + let outputBytesCount = 0; + let truncatedBy: "lines" | "bytes" = "lines"; + let lastLinePartial = false; + + for (let i = lines.length - 1; i >= 0 && outputLinesArr.length < maxLines; i--) { + const line = lines[i]; + const lineBytes = Buffer.byteLength(line, "utf-8") + (outputLinesArr.length > 0 ? 1 : 0); // +1 for newline + + if (outputBytesCount + lineBytes > maxBytes) { + truncatedBy = "bytes"; + // Edge case: if we haven't added ANY lines yet and this line exceeds maxBytes, + // take the end of the line (partial) + if (outputLinesArr.length === 0) { + const truncatedLine = truncateStringToBytesFromEnd(line, maxBytes); + outputLinesArr.unshift(truncatedLine); + outputBytesCount = Buffer.byteLength(truncatedLine, "utf-8"); + lastLinePartial = true; + } + break; + } + + outputLinesArr.unshift(line); + outputBytesCount += lineBytes; + } + + // If we exited due to line limit + if (outputLinesArr.length >= maxLines && outputBytesCount <= maxBytes) { + truncatedBy = "lines"; + } + + const outputContent = outputLinesArr.join("\n"); + const finalOutputBytes = Buffer.byteLength(outputContent, "utf-8"); + + return { + content: outputContent, + truncated: true, + truncatedBy, + totalLines, + totalBytes, + outputLines: outputLinesArr.length, + outputBytes: finalOutputBytes, + lastLinePartial, + firstLineExceedsLimit: false, + }; +} + +/** + * Truncate a string to fit within a byte limit (from the end). + * Handles multi-byte UTF-8 characters correctly. + */ +function truncateStringToBytesFromEnd(str: string, maxBytes: number): string { + const buf = Buffer.from(str, "utf-8"); + if (buf.length <= maxBytes) { + return str; + } + + // Start from the end, skip maxBytes back + let start = buf.length - maxBytes; + + // Find a valid UTF-8 boundary (start of a character) + while (start < buf.length && (buf[start] & 0xc0) === 0x80) { + start++; + } + + return buf.slice(start).toString("utf-8"); +}