feat(agent): Add /tokens command for cumulative token usage tracking

Added /tokens slash command to TUI that displays session-wide token statistics. Key changes: - Fixed SessionManager to accumulate token usage instead of storing only last event - Added cumulative token tracking to TUI renderer alongside per-request totals - Implemented slash command infrastructure with /tokens autocomplete support - Fixed file autocompletion that was missing from Tab key handling - Clean minimal display format showing input/output/reasoning/cache/tool counts The /tokens command shows: Total usage input: 1,234 output: 567 reasoning: 89 cache read: 100 cache write: 50 tool calls: 2
2026-04-15 07:04:45 +00:00 · 2025-08-11 15:43:48 +02:00 · 2025-08-11 15:43:48 +02:00 · e21a46e68f
commit e21a46e68f
parent 7e3b94ade6
10 changed files with 303 additions and 283 deletions
--- a/models.js
+++ b/models.js
--- a/packages/agent/src/renderers/tui-renderer.ts
+++ b/packages/agent/src/renderers/tui-renderer.ts
@ -2,6 +2,7 @@ import {
 	CombinedAutocompleteProvider,
 	Container,
 	MarkdownComponent,
+	SlashCommand,
 	TextComponent,
 	TextEditor,
 	TUI,
@ -63,6 +64,13 @@ export class TuiRenderer implements AgentEventReceiver {
 	private lastCacheWriteTokens = 0;
 	private lastReasoningTokens = 0;
 	private toolCallCount = 0;
+	// Cumulative token tracking
+	private cumulativeInputTokens = 0;
+	private cumulativeOutputTokens = 0;
+	private cumulativeCacheReadTokens = 0;
+	private cumulativeCacheWriteTokens = 0;
+	private cumulativeReasoningTokens = 0;
+	private cumulativeToolCallCount = 0;
 	private tokenStatusComponent: TextComponent | null = null;

 	constructor() {
@ -74,7 +82,12 @@ export class TuiRenderer implements AgentEventReceiver {

 		// Setup autocomplete for file paths and slash commands
 		const autocompleteProvider = new CombinedAutocompleteProvider(
-			[],
+			[
+				{
+					name: "tokens",
+					description: "Show cumulative token usage for this session",
+				},
+			],
 			process.cwd(), // Base directory for file path completion
 		);
 		this.editor.setAutocompleteProvider(autocompleteProvider);
@ -148,6 +161,17 @@ export class TuiRenderer implements AgentEventReceiver {
 			text = text.trim();
 			if (!text) return;

+			// Handle slash commands
+			if (text.startsWith("/")) {
+				const [command, ...args] = text.slice(1).split(" ");
+				if (command === "tokens") {
+					this.showTokenUsage();
+					return;
+				}
+				// Unknown slash command, ignore
+				return;
+			}
+
 			if (this.onInputCallback) {
 				this.onInputCallback(text);
 			}
@ -192,6 +216,7 @@ export class TuiRenderer implements AgentEventReceiver {

 			case "tool_call":
 				this.toolCallCount++;
+				this.cumulativeToolCallCount++;
 				this.updateTokenDisplay();
 				this.chatContainer.addChild(new TextComponent(chalk.yellow(`[tool] ${event.name}(${event.args})`)));
 				break;
@ -255,6 +280,14 @@ export class TuiRenderer implements AgentEventReceiver {
 				this.lastCacheReadTokens = event.cacheReadTokens;
 				this.lastCacheWriteTokens = event.cacheWriteTokens;
 				this.lastReasoningTokens = event.reasoningTokens;
+
+				// Accumulate cumulative totals
+				this.cumulativeInputTokens += event.inputTokens;
+				this.cumulativeOutputTokens += event.outputTokens;
+				this.cumulativeCacheReadTokens += event.cacheReadTokens;
+				this.cumulativeCacheWriteTokens += event.cacheWriteTokens;
+				this.cumulativeReasoningTokens += event.reasoningTokens;
+
 				this.updateTokenDisplay();
 				break;

@ -282,21 +315,21 @@ export class TuiRenderer implements AgentEventReceiver {
 		this.tokenContainer.clear();

 		// Build token display text
-		let tokenText = chalk.dim(`↑${this.lastInputTokens.toLocaleString()} ↓${this.lastOutputTokens.toLocaleString()}`);
+		let tokenText = chalk.dim(`↑ ${this.lastInputTokens.toLocaleString()} ↓ ${this.lastOutputTokens.toLocaleString()}`);

 		// Add reasoning tokens if present
 		if (this.lastReasoningTokens > 0) {
-			tokenText += chalk.dim(` ⚡${this.lastReasoningTokens.toLocaleString()}`);
+			tokenText += chalk.dim(` ⚡ ${this.lastReasoningTokens.toLocaleString()}`);
 		}

 		// Add cache info if available
 		if (this.lastCacheReadTokens > 0 || this.lastCacheWriteTokens > 0) {
 			const cacheText: string[] = [];
 			if (this.lastCacheReadTokens > 0) {
-				cacheText.push(`⟲${this.lastCacheReadTokens.toLocaleString()}`);
+				cacheText.push(` cache read: ${this.lastCacheReadTokens.toLocaleString()}`);
 			}
 			if (this.lastCacheWriteTokens > 0) {
-				cacheText.push(`⟳${this.lastCacheWriteTokens.toLocaleString()}`);
+				cacheText.push(` cache write: ${this.lastCacheWriteTokens.toLocaleString()}`);
 			}
 			tokenText += chalk.dim(` (${cacheText.join(" ")})`);
 		}
@ -346,6 +379,35 @@ export class TuiRenderer implements AgentEventReceiver {
 		this.ui.requestRender();
 	}

+	private showTokenUsage(): void {
+
+		let tokenText = chalk.dim(`Total usage\n   input: ${this.cumulativeInputTokens.toLocaleString()}\n   output: ${this.cumulativeOutputTokens.toLocaleString()}`);
+
+		if (this.cumulativeReasoningTokens > 0) {
+			tokenText += chalk.dim(`\n   reasoning: ${this.cumulativeReasoningTokens.toLocaleString()}`);
+		}
+
+		if (this.cumulativeCacheReadTokens > 0 || this.cumulativeCacheWriteTokens > 0) {
+			const cacheText: string[] = [];
+			if (this.cumulativeCacheReadTokens > 0) {
+				cacheText.push(`\n  cache read: ${this.cumulativeCacheReadTokens.toLocaleString()}`);
+			}
+			if (this.cumulativeCacheWriteTokens > 0) {
+				cacheText.push(`\n   cache right: ${this.cumulativeCacheWriteTokens.toLocaleString()}`);
+			}
+			tokenText += chalk.dim(` ${cacheText.join(" ")}`);
+		}
+
+
+		if (this.cumulativeToolCallCount > 0) {
+			tokenText += chalk.dim(`\n   tool calls: ${this.cumulativeToolCallCount}`);
+		}
+
+		const tokenSummary = new TextComponent(chalk.italic(tokenText), { bottom: 1 });
+		this.chatContainer.addChild(tokenSummary);
+		this.ui.requestRender();
+	}
+
 	stop(): void {
 		if (this.currentLoadingAnimation) {
 			this.currentLoadingAnimation.stop();
--- a/packages/agent/src/session-manager.ts
+++ b/packages/agent/src/session-manager.ts
@ -156,7 +156,17 @@ export class SessionManager implements AgentEventReceiver {
 					const eventEntry: SessionEvent = entry as SessionEvent;
 					events.push(eventEntry);
 					if (eventEntry.event.type === "token_usage") {
-						totalUsage = entry.event as Extract<AgentEvent, { type: "token_usage" }>;
+						const usage = entry.event as Extract<AgentEvent, { type: "token_usage" }>;
+						if (!totalUsage) {
+							totalUsage = { ...usage };
+						} else {
+							totalUsage.inputTokens += usage.inputTokens;
+							totalUsage.outputTokens += usage.outputTokens;
+							totalUsage.totalTokens += usage.totalTokens;
+							totalUsage.cacheReadTokens += usage.cacheReadTokens;
+							totalUsage.cacheWriteTokens += usage.cacheWriteTokens;
+							totalUsage.reasoningTokens += usage.reasoningTokens;
+						}
 					}
 				}
 			} catch {
--- a/packages/tui/src/components/text-editor.ts
+++ b/packages/tui/src/components/text-editor.ts
@ -649,6 +649,8 @@ export class TextEditor implements Component {
 		// Check if we're in a slash command context
 		if (beforeCursor.trimStart().startsWith("/")) {
 			this.handleSlashCommandCompletion();
+		} else {
+			this.forceFileAutocomplete();
 		}
 	}

@ -656,8 +658,34 @@ export class TextEditor implements Component {
 		// For now, fall back to regular autocomplete (slash commands)
 		// This can be extended later to handle command-specific argument completion
 		this.tryTriggerAutocomplete(true);
+}
+
+	private forceFileAutocomplete(): void {
+		if (!this.autocompleteProvider) return;
+
+		// Check if provider has the force method
+		const provider = this.autocompleteProvider as any;
+		if (!provider.getForceFileSuggestions) {
+			this.tryTriggerAutocomplete(true);
+			return;
+		}
+
+		const suggestions = provider.getForceFileSuggestions(
+			this.state.lines,
+			this.state.cursorLine,
+			this.state.cursorCol,
+		);
+
+		if (suggestions && suggestions.items.length > 0) {
+			this.autocompletePrefix = suggestions.prefix;
+			this.autocompleteList = new SelectList(suggestions.items, 5);
+			this.isAutocompleting = true;
+		} else {
+			this.cancelAutocomplete();
+		}
 	}

+
 	private cancelAutocomplete(): void {
 		this.isAutocompleting = false;
 		this.autocompleteList = undefined as any;
--- a/packages/tui/src/tui.ts
+++ b/packages/tui/src/tui.ts
@ -272,9 +272,6 @@ export class TUI extends Container {
 			this.renderInitial(currentRenderCommands);
 			this.isFirstRender = false;
 		} else {
-			// this.executeDifferentialRender(currentRenderCommands, termHeight);
-			// this.renderDifferential(currentRenderCommands, termHeight);
-			// this.renderDifferentialSurgical(currentRenderCommands, termHeight);
 			this.renderLineBased(currentRenderCommands, termHeight);
 		}

@ -464,272 +461,6 @@ export class TUI extends Container {
 		this.totalLinesRedrawn += linesRedrawn;
 	}

-	private renderDifferentialSurgical(currentCommands: RenderCommand[], termHeight: number): void {
-		const viewportHeight = termHeight - 1; // Leave one line for cursor
-
-		// Build the new lines array
-		const newLines: string[] = [];
-		for (const command of currentCommands) {
-			newLines.push(...command.lines);
-		}
-
-		const totalNewLines = newLines.length;
-		const totalOldLines = this.previousLines.length;
-
-		// Phase 1: Analyze - categorize all changes
-		let firstChangeOffset = -1;
-		let hasLineCountChange = false;
-		let hasStructuralChange = false;
-		const changedLines: Array<{ lineIndex: number; newContent: string }> = [];
-
-		let currentLineOffset = 0;
-
-		for (let i = 0; i < Math.max(currentCommands.length, this.previousRenderCommands.length); i++) {
-			const current = i < currentCommands.length ? currentCommands[i] : null;
-			const previous = i < this.previousRenderCommands.length ? this.previousRenderCommands[i] : null;
-
-			// Structural change: component added/removed/reordered
-			if (!current || !previous || current.id !== previous.id) {
-				hasStructuralChange = true;
-				if (firstChangeOffset === -1) {
-					firstChangeOffset = currentLineOffset;
-				}
-				break;
-			}
-
-			// Line count change
-			if (current.changed && current.lines.length !== previous.lines.length) {
-				hasLineCountChange = true;
-				if (firstChangeOffset === -1) {
-					firstChangeOffset = currentLineOffset;
-				}
-				break;
-			}
-
-			// Content change with same line count - track individual line changes
-			if (current.changed) {
-				for (let j = 0; j < current.lines.length; j++) {
-					const oldLine =
-						currentLineOffset + j < this.previousLines.length ? this.previousLines[currentLineOffset + j] : "";
-					const newLine = current.lines[j];
-
-					if (oldLine !== newLine) {
-						changedLines.push({
-							lineIndex: currentLineOffset + j,
-							newContent: newLine,
-						});
-						if (firstChangeOffset === -1) {
-							firstChangeOffset = currentLineOffset + j;
-						}
-					}
-				}
-			}
-
-			currentLineOffset += current ? current.lines.length : 0;
-		}
-
-		// If nothing changed, do nothing
-		if (firstChangeOffset === -1) {
-			this.previousLines = newLines;
-			return;
-		}
-
-		// Phase 2: Decision - pick rendering strategy
-		const contentStartInViewport = Math.max(0, totalOldLines - viewportHeight);
-		const changePositionInViewport = firstChangeOffset - contentStartInViewport;
-
-		let output = "";
-		let linesRedrawn = 0;
-
-		if (changePositionInViewport < 0) {
-			// Strategy: FULL - change is above viewport, must clear scrollback and re-render all
-			output = "\x1b[3J\x1b[H"; // Clear scrollback and screen, then home cursor
-
-			for (let i = 0; i < newLines.length; i++) {
-				if (i > 0) output += "\r\n";
-				output += newLines[i];
-			}
-
-			if (newLines.length > 0) output += "\r\n";
-			linesRedrawn = newLines.length;
-		} else if (hasStructuralChange || hasLineCountChange) {
-			// Strategy: PARTIAL - changes in viewport but with shifts, clear from change to end
-			// After rendering with a final newline, cursor is one line below the last content line
-			// So if we have N lines (0 to N-1), cursor is at line N
-			// To move to line firstChangeOffset, we need to move up (N - firstChangeOffset) lines
-			// But since cursor is at N (not N-1), we actually need to move up (N - firstChangeOffset) lines
-			// which is totalOldLines - firstChangeOffset
-			const cursorLine = totalOldLines; // Cursor is one past the last line
-			const targetLine = firstChangeOffset;
-			const linesToMoveUp = cursorLine - targetLine;
-
-			if (linesToMoveUp > 0) {
-				output += `\x1b[${linesToMoveUp}A`;
-			}
-
-			// Clear from cursor to end of screen
-			// First ensure we're at the beginning of the line
-			output += "\r";
-			output += "\x1b[0J"; // Clear from cursor to end of screen
-
-			const linesToRender = newLines.slice(firstChangeOffset);
-			for (let i = 0; i < linesToRender.length; i++) {
-				if (i > 0) output += "\r\n";
-				output += linesToRender[i];
-			}
-
-			if (linesToRender.length > 0) output += "\r\n";
-			linesRedrawn = linesToRender.length;
-		} else {
-			// Strategy: SURGICAL - only content changes with same line counts, update only changed lines
-			// The cursor starts at the line after our last content
-			let currentCursorLine = totalOldLines;
-
-			for (const change of changedLines) {
-				// Move cursor to the line that needs updating
-				const linesToMove = currentCursorLine - change.lineIndex;
-
-				if (linesToMove > 0) {
-					output += `\x1b[${linesToMove}A`; // Move up
-				} else if (linesToMove < 0) {
-					output += `\x1b[${-linesToMove}B`; // Move down
-				}
-
-				// Clear the line and write new content
-				output += "\x1b[2K"; // Clear entire line
-				output += "\r"; // Move to start of line
-				output += change.newContent;
-				// Cursor is now at the end of the content on this line
-
-				currentCursorLine = change.lineIndex;
-				linesRedrawn++;
-			}
-
-			// Return cursor to end position
-			// We need to be on the line after our last content line
-			// First ensure we're at start of current line
-			output += "\r";
-			// Move to last content line
-			const lastContentLine = totalNewLines - 1;
-			const linesToMove = lastContentLine - currentCursorLine;
-			if (linesToMove > 0) {
-				output += `\x1b[${linesToMove}B`;
-			} else if (linesToMove < 0) {
-				output += `\x1b[${-linesToMove}A`;
-			}
-			// Now add final newline to position cursor on next line
-			output += "\r\n";
-		}
-
-		this.terminal.write(output);
-
-		// Save what we rendered
-		this.previousLines = newLines;
-		this.totalLinesRedrawn += linesRedrawn;
-	}
-
-	// biome-ignore lint/correctness/noUnusedPrivateClassMembers: Keeping this around as reference for LLM
-	private renderDifferential(currentCommands: RenderCommand[], termHeight: number): void {
-		const viewportHeight = termHeight - 1; // Leave one line for cursor
-
-		// Build the new lines array
-		const newLines: string[] = [];
-		for (const command of currentCommands) {
-			newLines.push(...command.lines);
-		}
-
-		const totalNewLines = newLines.length;
-		const totalOldLines = this.previousLines.length;
-
-		// Find the first line that changed
-		let firstChangedLineOffset = -1;
-		let currentLineOffset = 0;
-
-		for (let i = 0; i < currentCommands.length; i++) {
-			const current = currentCommands[i];
-			const previous = i < this.previousRenderCommands.length ? this.previousRenderCommands[i] : null;
-
-			// Check if this is a new component or component was removed/reordered
-			if (!previous || previous.id !== current.id) {
-				firstChangedLineOffset = currentLineOffset;
-				break;
-			}
-
-			// Check if component content or size changed
-			if (current.changed) {
-				firstChangedLineOffset = currentLineOffset;
-				break;
-			}
-
-			currentLineOffset += current.lines.length;
-		}
-
-		// Also check if we have fewer components now (components removed from end)
-		if (firstChangedLineOffset === -1 && currentCommands.length < this.previousRenderCommands.length) {
-			firstChangedLineOffset = currentLineOffset;
-		}
-
-		// If nothing changed, do nothing
-		if (firstChangedLineOffset === -1) {
-			this.previousLines = newLines;
-			return;
-		}
-
-		// Calculate where the first change is relative to the viewport
-		// If our content exceeds viewport, some is in scrollback
-		const contentStartInViewport = Math.max(0, totalOldLines - viewportHeight);
-		const changePositionInViewport = firstChangedLineOffset - contentStartInViewport;
-
-		let output = "";
-		let linesRedrawn = 0;
-
-		if (changePositionInViewport < 0) {
-			// The change is above the viewport - we cannot reach it with cursor
-			// MUST do full re-render
-			output = "\x1b[3J\x1b[H"; // Clear scrollback and screen, then home cursor
-
-			// Render ALL lines
-			for (let i = 0; i < newLines.length; i++) {
-				if (i > 0) output += "\r\n";
-				output += newLines[i];
-			}
-
-			// Add final newline
-			if (newLines.length > 0) output += "\r\n";
-
-			linesRedrawn = newLines.length;
-		} else {
-			// The change is in the viewport - we can update from there
-			// Calculate how many lines up to move from current cursor position
-			const linesToMoveUp = totalOldLines - firstChangedLineOffset;
-
-			if (linesToMoveUp > 0) {
-				output += `\x1b[${linesToMoveUp}A`;
-			}
-
-			// Clear from here to end of screen
-			output += "\x1b[0J";
-
-			// Render everything from the first change onwards
-			const linesToRender = newLines.slice(firstChangedLineOffset);
-			for (let i = 0; i < linesToRender.length; i++) {
-				if (i > 0) output += "\r\n";
-				output += linesToRender[i];
-			}
-
-			// Add final newline
-			if (linesToRender.length > 0) output += "\r\n";
-
-			linesRedrawn = linesToRender.length;
-		}
-
-		this.terminal.write(output);
-
-		// Save what we rendered
-		this.previousLines = newLines;
-		this.totalLinesRedrawn += linesRedrawn;
-	}
-
 	private handleResize(): void {
 		// Clear screen and reset
 		this.terminal.write("\x1b[2J\x1b[H\x1b[?25l");
--- a/todos/done/20250811-122302-tui-garbled-output-fix-analysis.md
+++ b/todos/done/20250811-122302-tui-garbled-output-fix-analysis.md
--- a/todos/work/20250811-122302-tui-garbled-output-fix/task.md
+++ b/todos/work/20250811-122302-tui-garbled-output-fix/task.md
--- a/todos/done/20250811-150336-token-usage-tracking-analysis.md
+++ b/todos/done/20250811-150336-token-usage-tracking-analysis.md
@ -0,0 +1,161 @@
+# Token Usage Tracking Analysis - pi-agent Codebase
+
+## 1. Token Usage Event Structure and Flow
+
+### Per-Request vs Cumulative Analysis
+
+After reading `/Users/badlogic/workspaces/pi-mono/packages/agent/src/agent.ts` in full, I can confirm that **token usage events are per-request, NOT cumulative**.
+
+**Evidence:**
+- Lines 296-308 in `callModelResponsesApi()`: Token usage is reported directly from API response usage object
+- Lines 435-447 in `callModelChatCompletionsApi()`: Token usage is reported directly from API response usage object
+- The token counts represent what was used for that specific LLM request only
+
+### TokenUsageEvent Definition
+
+**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/agent.ts:16-24`
+
+```typescript
+{
+    type: "token_usage";
+    inputTokens: number;
+    outputTokens: number;
+    totalTokens: number;
+    cacheReadTokens: number;
+    cacheWriteTokens: number;
+    reasoningTokens: number;
+}
+```
+
+## 2. Current Token Usage Display Implementation
+
+### TUI Renderer
+**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/renderers/tui-renderer.ts`
+
+**Current Behavior:**
+- Lines 60-66: Stores "last" token values (not cumulative)
+- Lines 251-259: Updates token counts on `token_usage` events
+- Lines 280-311: Displays current request tokens in `updateTokenDisplay()`
+- Format: `↑{input} ↓{output} ⚡{reasoning} ⟲{cache_read} ⟳{cache_write} ⚒ {tool_calls}`
+
+**Comment on line 252:** "Store the latest token counts (not cumulative since prompt includes full context)"
+
+### Console Renderer
+**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/renderers/console-renderer.ts`
+
+**Current Behavior:**
+- Lines 11-16: Stores "last" token values
+- Lines 165-172: Updates token counts on `token_usage` events  
+- Lines 52-82: Displays tokens after each assistant message
+
+## 3. Session Storage
+
+### SessionManager
+**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/session-manager.ts`
+
+**Current Implementation:**
+- Lines 138-146: Has a `totalUsage` field in `SessionData` interface
+- Lines 158-160: **BUG**: Only stores the LAST token_usage event, not cumulative totals
+- This should accumulate all token usage across the session
+
+## 4. Slash Command Infrastructure
+
+### Existing Slash Command Support
+**Location:** `/Users/badlogic/workspaces/pi-mono/packages/tui/src/autocomplete.ts`
+
+**Available Infrastructure:**
+- `SlashCommand` interface with `name`, `description`, optional `getArgumentCompletions`
+- `CombinedAutocompleteProvider` handles slash command detection and completion
+- Text editor auto-triggers on "/" at start of line
+
+### Current Usage in TUI Renderer
+**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/renderers/tui-renderer.ts:75-80`
+
+```typescript
+const autocompleteProvider = new CombinedAutocompleteProvider(
+    [],  // <-- Empty command array!
+    process.cwd(),
+);
+```
+
+**No slash commands are currently implemented in the agent TUI!**
+
+### Example Implementation
+**Reference:** `/Users/badlogic/workspaces/pi-mono/packages/tui/test/chat-app.ts:25-60`
+
+Shows how to:
+1. Define slash commands with `CombinedAutocompleteProvider`
+2. Handle slash command execution in `editor.onSubmit`
+3. Add responses to chat container
+
+## 5. Implementation Requirements for /tokens Command
+
+### What Needs to Change
+
+1. **Add Cumulative Token Tracking to TUI Renderer**
+   - Add cumulative token counters alongside current "last" counters
+   - Update cumulative totals on each `token_usage` event
+
+2. **Add /tokens Slash Command**
+   - Add to `CombinedAutocompleteProvider` in tui-renderer.ts
+   - Handle in `editor.onSubmit` callback
+   - Display formatted token summary as `TextComponent` in chat container
+
+3. **Fix SessionManager Bug**
+   - Change `totalUsage` calculation to accumulate all token_usage events
+   - This will enable session-wide token tracking
+
+4. **Message Handling in TUI**
+   - Need to capture user input before it goes to agent
+   - Check if it's a slash command vs regular message
+   - Route accordingly
+
+### Current User Input Flow
+**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/main.ts:190-198`
+
+```typescript
+while (true) {
+    const userInput = await renderer.getUserInput();
+    try {
+        await agent.ask(userInput);  // All input goes to agent
+    } catch (e: any) {
+        await renderer.on({ type: "error", message: e.message });
+    }
+}
+```
+
+**Problem:** All user input goes directly to the agent - no interception for slash commands!
+
+### Required Architecture Change
+
+Need to modify the TUI interactive loop to:
+1. Check if user input starts with "/"
+2. If slash command: handle locally in renderer
+3. If regular message: pass to agent as before
+
+## 6. Token Display Format Recommendations
+
+Based on existing format patterns, the `/tokens` command should display:
+
+```
+Session Token Usage:
+↑ 1,234 input tokens
+↓ 5,678 output tokens  
+⚡ 2,345 reasoning tokens
+⟲ 890 cache read tokens
+⟳ 123 cache write tokens
+📊 12,270 total tokens
+⚒ 5 tool calls
+```
+
+## Summary
+
+The current implementation tracks per-request token usage only. To add cumulative token tracking with a `/tokens` command, we need to:
+
+1. **Fix SessionManager** to properly accumulate token usage
+2. **Add cumulative tracking** to TUI renderer  
+3. **Implement slash command infrastructure** in the agent (currently missing)
+4. **Modify user input handling** to intercept slash commands before they reach the agent
+5. **Add /tokens command** that displays formatted cumulative statistics
+
+The TUI framework already supports slash commands, but the agent TUI renderer doesn't use them yet.
--- a/todos/done/20250811-150336-token-usage-tracking.md
+++ b/todos/done/20250811-150336-token-usage-tracking.md
@ -0,0 +1,28 @@
+# Add Token Usage Tracking Command
+**Status:** Done
+**Agent PID:** 71159
+
+## Original Todo
+- agent: we get token_usage events. the last we get tells us how many input/output/cache read/cache write/reasoning tokens where used for the last request to the LLM endpoint. We want to:
+    - have a /tokens command that outputs the accumulative counts, can just add it to the chat messages container as a nicely formatted TextComponent
+    - means the tui-renderer needs to keep track of accumulative stats as well, not just last request stats.
+    - please check agent.ts (read in full) to see if token_usage is actually some form of accumulative thing, or a per request to llm thing. want to undersatnd what we get.
+
+## Description
+Add a `/tokens` slash command to the TUI that displays cumulative token usage statistics for the current session. This includes fixing the SessionManager to properly accumulate token usage and implementing slash command infrastructure in the agent's TUI renderer.
+
+*Read [analysis.md](./analysis.md) in full for detailed codebase research and context*
+
+## Implementation Plan
+- [x] Fix SessionManager to accumulate token usage instead of storing only the last event (packages/agent/src/session-manager.ts:158-160)
+- [x] Add cumulative token tracking properties to TUI renderer (packages/agent/src/renderers/tui-renderer.ts:60-66)
+- [x] Add /tokens slash command to CombinedAutocompleteProvider (packages/agent/src/renderers/tui-renderer.ts:75-80)
+- [x] Modify TUI renderer's onSubmit to handle slash commands locally (packages/agent/src/renderers/tui-renderer.ts:159-177)
+- [x] Implement /tokens command handler that displays formatted cumulative statistics
+- [x] Update token_usage event handler to accumulate totals (packages/agent/src/renderers/tui-renderer.ts:275-291)
+- [x] Test: Verify /tokens command displays correct cumulative totals
+- [x] Test: Send multiple messages and confirm accumulation works correctly
+- [x] Fix file autocompletion that was broken by slash command implementation
+
+## Notes
+[Implementation notes]
--- a/todos/todos.md
+++ b/todos/todos.md
@ -1,3 +1,10 @@
+
+- agent: max output tokens is fixed to 2000 in responess and chat completions calls
+
+- tui: if text editor gets bigger than viewport, we get artifacts in scrollbuffer
+
+- tui: need to benachmark our renderer. always compares old lines vs new lines and does a diff. might be a bit much for 100k+ lines.
+
 - pods: pi start outputs all models that can be run on the pod. however, it doesn't check the vllm version. e.g. gpt-oss can only run via vllm+gpt-oss. glm4.5 can only run on vllm nightly.

 - agent: improve reasoning section in README.md
@ -25,15 +32,10 @@
        - Anthropic: ❌ `/v1/models` (no context info)
        - OpenAI: ❌ `/v1/models` (no context info)
    - For Anthropic/OpenAI, may need hardcoded fallback values or separate lookup table
+    - Display how much of the context window is used by the current context

 - agent: compaction & micro compactionexi

- agent: token usage output sucks, make it better
-    - current: ↑1,706 ↓409 ⚒ 2
-    - maybe: ↑ 1,706 - ↓ 409 - ⚒ 2 (or dot?)
-    - add context window usage percentage (e.g., "23% context used")
-    - requires context length detection from models endpoint (see todo above)
-
 - agent: test for basic functionality, including thinking, completions & responses API support for all the known providers and their endpoints.

 - agent: groq responses api throws on second message
@ -72,6 +74,4 @@

 - pods: if a pod is down and i run `pi list`, verifying processes says All processes verified. But that can't be true, as we can no longer SSH into the pod to check.

- agent: start a new agent session. when i press CTRL+C, "Press Ctrl+C again to exit" appears above the text editor followed by an empty line. After about 1 second, the empty line disappears. We should either not show the empty line, or always show the empty line. Maybe Ctrl+C info should be displayed below the text editor.
-
 - agent: we need to make system prompt and tools pluggable. We need to figure out the simplest way for users to define system prompts and toolkits. A toolkit could be a subset of the built-in tools, a mixture of a subset of the built-in tools plus custom self-made tools, maybe include MCP servers, and so on. We need to figure out a way to make this super easy. users should be able to write their tools in whatever language they fancy. which means that probably something like process spawning plus studio communication transport would make the most sense. but then we were back at MCP basically. And that does not support interruptibility, which we need for the agent. So if the agent invokes the tool and the user presses escape in the interface, then the tool invocation must be interrupted and whatever it's doing must stop, including killing all sub-processes. For MCP this could be solved for studio MCP servers by, since we spawn those on startup or whenever we load the tools, we spawn a process for an MCP server and then reuse that process for subsequent tool invocations. If the user interrupts then we could just kill that process, assuming that anything it's doing or any of its sub-processes will be killed along the way. So I guess tools could all be written as MCP servers, but that's a lot of overhead. It would also be nice to be able to provide tools just as a bash script that gets some inputs and return some outputs based on the inputs Same for Go apps or TypeScript apps invoked by MPX TSX. just make the barrier of entry for writing your own tools super fucking low. not necessarily going full MCP. but we also need to support MCP. So whatever we arrive at, we then need to take our built-in tools and see if those can be refactored to work with our new tools