mom: add centralized logging, usage tracking, and improve prompt caching

Major improvements to mom's logging and cost reporting:

Centralized Logging System:
- Add src/log.ts with type-safe logging functions
- Colored console output (green=user, yellow=mom, dim=details)
- Consistent format: [HH:MM:SS] [context] message
- Replace scattered console.log/error calls throughout codebase

Usage Tracking & Cost Reporting:
- Track tokens (input, output, cache read/write) and costs per run
- Display summary at end of each run in console and Slack thread
- Example: 💰 Usage: 12,543 in + 847 out (5,234 cache read) = $0.0234

Prompt Caching Optimization:
- Move recent messages from system prompt to user message
- System prompt now mostly static (only changes with memory files)
- Enables effective use of Anthropic's prompt caching
- Significantly reduces costs on subsequent requests

Model & Cost Improvements:
- Switch from Claude Opus 4.5 to Sonnet 4.5 (~40% cost reduction)
- Fix Claude Opus 4.5 cache pricing in ai package (was 3x too expensive)
- Add manual override in generate-models.ts until upstream fix merges
- Submitted PR to models.dev: https://github.com/sst/models.dev/pull/439

UI/UX Improvements:
- Extract actual text from tool results instead of JSON wrapper
- Cleaner Slack thread formatting with duration and labels
- Tool args formatting shows paths with offset:limit notation
- Add chalk for colored terminal output

Dependencies:
- Add chalk package for terminal colors
This commit is contained in:
Mario Zechner 2025-11-26 18:04:16 +01:00
parent 82d4ac93e1
commit 213bc4df1c
11 changed files with 478 additions and 63 deletions

View file

@ -3,14 +3,14 @@ import { getModel } from "@mariozechner/pi-ai";
import { existsSync, readFileSync } from "fs";
import { mkdir } from "fs/promises";
import { join } from "path";
import * as log from "./log.js";
import { createExecutor, type SandboxConfig } from "./sandbox.js";
import type { SlackContext } from "./slack.js";
import type { ChannelStore } from "./store.js";
import { createMomTools, setUploadFunction } from "./tools/index.js";
// Hardcoded model for now
const model = getModel("anthropic", "claude-opus-4-5");
const model = getModel("anthropic", "claude-sonnet-4-5");
export interface AgentRunner {
run(ctx: SlackContext, channelDir: string, store: ChannelStore): Promise<void>;
@ -67,7 +67,7 @@ function getMemory(channelDir: string): string {
parts.push("### Global Workspace Memory\n" + content);
}
} catch (error) {
console.error(`Failed to read workspace memory: ${error}`);
log.logWarning("Failed to read workspace memory", `${workspaceMemoryPath}: ${error}`);
}
}
@ -80,7 +80,7 @@ function getMemory(channelDir: string): string {
parts.push("### Channel-Specific Memory\n" + content);
}
} catch (error) {
console.error(`Failed to read channel memory: ${error}`);
log.logWarning("Failed to read channel memory", `${channelMemoryPath}: ${error}`);
}
}
@ -94,7 +94,6 @@ function getMemory(channelDir: string): string {
function buildSystemPrompt(
workspacePath: string,
channelId: string,
recentMessages: string,
memory: string,
sandboxConfig: SandboxConfig,
): string {
@ -224,10 +223,6 @@ You can maintain working memory across conversations by writing MEMORY.md files.
### Current Working Memory
${memory}
### Recent Messages (last 50)
Format: date TAB user TAB text TAB attachments
${recentMessages}
## Tools
You have access to: bash, read, edit, write, attach tools.
- bash: Run shell commands (this is your main tool)
@ -254,6 +249,68 @@ function truncate(text: string, maxLen: number): string {
return text.substring(0, maxLen - 3) + "...";
}
function extractToolResultText(result: unknown): string {
// If it's already a string, return it
if (typeof result === "string") {
return result;
}
// If it's an object with content array (tool result format)
if (
result &&
typeof result === "object" &&
"content" in result &&
Array.isArray((result as { content: unknown }).content)
) {
const content = (result as { content: Array<{ type: string; text?: string }> }).content;
const textParts: string[] = [];
for (const part of content) {
if (part.type === "text" && part.text) {
textParts.push(part.text);
}
}
if (textParts.length > 0) {
return textParts.join("\n");
}
}
// Fallback to JSON
return JSON.stringify(result);
}
function formatToolArgsForSlack(toolName: string, args: Record<string, unknown>): string {
const lines: string[] = [];
for (const [key, value] of Object.entries(args)) {
// Skip the label - it's already shown
if (key === "label") continue;
// For read tool, format path with offset/limit
if (key === "path" && typeof value === "string") {
const offset = args.offset as number | undefined;
const limit = args.limit as number | undefined;
if (offset !== undefined && limit !== undefined) {
lines.push(`${value}:${offset}-${offset + limit}`);
} else {
lines.push(value);
}
continue;
}
// Skip offset/limit since we already handled them
if (key === "offset" || key === "limit") continue;
// For other values, format them
if (typeof value === "string") {
lines.push(value);
} else {
lines.push(JSON.stringify(value));
}
}
return lines.join("\n");
}
export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
let agent: Agent | null = null;
const executor = createExecutor(sandboxConfig);
@ -267,7 +324,7 @@ export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
const workspacePath = executor.getWorkspacePath(channelDir.replace(`/${channelId}`, ""));
const recentMessages = getRecentMessages(channelDir, 50);
const memory = getMemory(channelDir);
const systemPrompt = buildSystemPrompt(workspacePath, channelId, recentMessages, memory, sandboxConfig);
const systemPrompt = buildSystemPrompt(workspacePath, channelId, memory, sandboxConfig);
// Set up file upload function for the attach tool
// For Docker, we need to translate paths back to host
@ -292,8 +349,30 @@ export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
}),
});
// Track pending tool calls to pair args with results
const pendingTools = new Map<string, { toolName: string; args: unknown }>();
// Create logging context
const logCtx = {
channelId: ctx.message.channel,
userName: ctx.message.userName,
channelName: ctx.channelName,
};
// Track pending tool calls to pair args with results and timing
const pendingTools = new Map<string, { toolName: string; args: unknown; startTime: number }>();
// Track usage across all assistant messages in this run
const totalUsage = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
},
};
// Subscribe to events
agent.subscribe(async (event: AgentEvent) => {
@ -303,10 +382,14 @@ export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
const label = args.label || event.toolName;
// Store args to pair with result later
pendingTools.set(event.toolCallId, { toolName: event.toolName, args: event.args });
pendingTools.set(event.toolCallId, {
toolName: event.toolName,
args: event.args,
startTime: Date.now(),
});
// Log to console
console.log(`\n[Tool] ${event.toolName}: ${JSON.stringify(event.args)}`);
log.logToolStart(logCtx, event.toolName, label, event.args as Record<string, unknown>);
// Log to jsonl
await store.logMessage(ctx.message.channel, {
@ -324,12 +407,18 @@ export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
}
case "tool_execution_end": {
const resultStr = typeof event.result === "string" ? event.result : JSON.stringify(event.result);
const resultStr = extractToolResultText(event.result);
const pending = pendingTools.get(event.toolCallId);
pendingTools.delete(event.toolCallId);
const durationMs = pending ? Date.now() - pending.startTime : 0;
// Log to console
console.log(`[Tool Result] ${event.isError ? "ERROR: " : ""}${truncate(resultStr, 1000)}\n`);
if (event.isError) {
log.logToolError(logCtx, event.toolName, durationMs, resultStr);
} else {
log.logToolSuccess(logCtx, event.toolName, durationMs, resultStr);
}
// Log to jsonl
await store.logMessage(ctx.message.channel, {
@ -342,17 +431,26 @@ export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
});
// Post args + result together in thread
const argsStr = pending ? JSON.stringify(pending.args, null, 2) : "(args not found)";
const label = pending?.args ? (pending.args as { label?: string }).label : undefined;
const argsFormatted = pending
? formatToolArgsForSlack(event.toolName, pending.args as Record<string, unknown>)
: "(args not found)";
const duration = (durationMs / 1000).toFixed(1);
const threadResult = truncate(resultStr, 2000);
await ctx.respondInThread(
`*[${event.toolName}]* ${event.isError ? "❌" : "✓"}\n` +
"```\n" +
argsStr +
"\n```\n" +
"*Result:*\n```\n" +
threadResult +
"\n```",
);
let threadMessage = `*${event.isError ? "✗" : "✓"} ${event.toolName}*`;
if (label) {
threadMessage += `: ${label}`;
}
threadMessage += ` (${duration}s)\n`;
if (argsFormatted) {
threadMessage += "```\n" + argsFormatted + "\n```\n";
}
threadMessage += "*Result:*\n```\n" + threadResult + "\n```";
await ctx.respondInThread(threadMessage);
// Show brief error in main message if failed
if (event.isError) {
@ -362,25 +460,33 @@ export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
}
case "message_update": {
const ev = event.assistantMessageEvent;
// Stream deltas to console
if (ev.type === "text_delta") {
process.stdout.write(ev.delta);
} else if (ev.type === "thinking_delta") {
process.stdout.write(ev.delta);
}
// No longer stream to console - just track that we're streaming
break;
}
case "message_start":
if (event.message.role === "assistant") {
process.stdout.write("\n");
log.logResponseStart(logCtx);
}
break;
case "message_end":
if (event.message.role === "assistant") {
process.stdout.write("\n");
const assistantMsg = event.message as any; // AssistantMessage type
// Accumulate usage
if (assistantMsg.usage) {
totalUsage.input += assistantMsg.usage.input;
totalUsage.output += assistantMsg.usage.output;
totalUsage.cacheRead += assistantMsg.usage.cacheRead;
totalUsage.cacheWrite += assistantMsg.usage.cacheWrite;
totalUsage.cost.input += assistantMsg.usage.cost.input;
totalUsage.cost.output += assistantMsg.usage.cost.output;
totalUsage.cost.cacheRead += assistantMsg.usage.cost.cacheRead;
totalUsage.cost.cacheWrite += assistantMsg.usage.cost.cacheWrite;
totalUsage.cost.total += assistantMsg.usage.cost.total;
}
// Extract text from assistant message
const content = event.message.content;
let text = "";
@ -391,6 +497,7 @@ export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
}
if (text.trim()) {
await ctx.respond(text);
log.logResponseComplete(logCtx, text.length);
}
}
break;
@ -398,7 +505,21 @@ export function createAgentRunner(sandboxConfig: SandboxConfig): AgentRunner {
});
// Run the agent with user's message
await agent.prompt(ctx.message.text || "(attached files)");
// Prepend recent messages to the user prompt (not system prompt) for better caching
const userPrompt =
`Recent conversation history (last 50 messages):\n` +
`Format: date TAB user TAB text TAB attachments\n\n` +
`${recentMessages}\n\n` +
`---\n\n` +
`Current message: ${ctx.message.text || "(attached files)"}`;
await agent.prompt(userPrompt);
// Log usage summary if there was any usage
if (totalUsage.cost.total > 0) {
const summary = log.logUsageSummary(logCtx, totalUsage);
await ctx.respondInThread(summary);
}
},
abort(): void {