feat(agent): Add /tokens command for cumulative token usage tracking

Added /tokens slash command to TUI that displays session-wide token statistics.
Key changes:
- Fixed SessionManager to accumulate token usage instead of storing only last event
- Added cumulative token tracking to TUI renderer alongside per-request totals
- Implemented slash command infrastructure with /tokens autocomplete support
- Fixed file autocompletion that was missing from Tab key handling
- Clean minimal display format showing input/output/reasoning/cache/tool counts

The /tokens command shows:
Total usage
   input: 1,234
   output: 567
   reasoning: 89
   cache read: 100
   cache write: 50
   tool calls: 2
This commit is contained in:
Mario Zechner 2025-08-11 15:43:48 +02:00
parent 7e3b94ade6
commit e21a46e68f
10 changed files with 303 additions and 283 deletions

View file

View file

@ -2,6 +2,7 @@ import {
CombinedAutocompleteProvider,
Container,
MarkdownComponent,
SlashCommand,
TextComponent,
TextEditor,
TUI,
@ -63,6 +64,13 @@ export class TuiRenderer implements AgentEventReceiver {
private lastCacheWriteTokens = 0;
private lastReasoningTokens = 0;
private toolCallCount = 0;
// Cumulative token tracking
private cumulativeInputTokens = 0;
private cumulativeOutputTokens = 0;
private cumulativeCacheReadTokens = 0;
private cumulativeCacheWriteTokens = 0;
private cumulativeReasoningTokens = 0;
private cumulativeToolCallCount = 0;
private tokenStatusComponent: TextComponent | null = null;
constructor() {
@ -74,7 +82,12 @@ export class TuiRenderer implements AgentEventReceiver {
// Setup autocomplete for file paths and slash commands
const autocompleteProvider = new CombinedAutocompleteProvider(
[],
[
{
name: "tokens",
description: "Show cumulative token usage for this session",
},
],
process.cwd(), // Base directory for file path completion
);
this.editor.setAutocompleteProvider(autocompleteProvider);
@ -148,6 +161,17 @@ export class TuiRenderer implements AgentEventReceiver {
text = text.trim();
if (!text) return;
// Handle slash commands
if (text.startsWith("/")) {
const [command, ...args] = text.slice(1).split(" ");
if (command === "tokens") {
this.showTokenUsage();
return;
}
// Unknown slash command, ignore
return;
}
if (this.onInputCallback) {
this.onInputCallback(text);
}
@ -192,6 +216,7 @@ export class TuiRenderer implements AgentEventReceiver {
case "tool_call":
this.toolCallCount++;
this.cumulativeToolCallCount++;
this.updateTokenDisplay();
this.chatContainer.addChild(new TextComponent(chalk.yellow(`[tool] ${event.name}(${event.args})`)));
break;
@ -255,6 +280,14 @@ export class TuiRenderer implements AgentEventReceiver {
this.lastCacheReadTokens = event.cacheReadTokens;
this.lastCacheWriteTokens = event.cacheWriteTokens;
this.lastReasoningTokens = event.reasoningTokens;
// Accumulate cumulative totals
this.cumulativeInputTokens += event.inputTokens;
this.cumulativeOutputTokens += event.outputTokens;
this.cumulativeCacheReadTokens += event.cacheReadTokens;
this.cumulativeCacheWriteTokens += event.cacheWriteTokens;
this.cumulativeReasoningTokens += event.reasoningTokens;
this.updateTokenDisplay();
break;
@ -282,21 +315,21 @@ export class TuiRenderer implements AgentEventReceiver {
this.tokenContainer.clear();
// Build token display text
let tokenText = chalk.dim(`${this.lastInputTokens.toLocaleString()}${this.lastOutputTokens.toLocaleString()}`);
let tokenText = chalk.dim(` ${this.lastInputTokens.toLocaleString()} ${this.lastOutputTokens.toLocaleString()}`);
// Add reasoning tokens if present
if (this.lastReasoningTokens > 0) {
tokenText += chalk.dim(`${this.lastReasoningTokens.toLocaleString()}`);
tokenText += chalk.dim(` ${this.lastReasoningTokens.toLocaleString()}`);
}
// Add cache info if available
if (this.lastCacheReadTokens > 0 || this.lastCacheWriteTokens > 0) {
const cacheText: string[] = [];
if (this.lastCacheReadTokens > 0) {
cacheText.push(`${this.lastCacheReadTokens.toLocaleString()}`);
cacheText.push(` cache read: ${this.lastCacheReadTokens.toLocaleString()}`);
}
if (this.lastCacheWriteTokens > 0) {
cacheText.push(`${this.lastCacheWriteTokens.toLocaleString()}`);
cacheText.push(` cache write: ${this.lastCacheWriteTokens.toLocaleString()}`);
}
tokenText += chalk.dim(` (${cacheText.join(" ")})`);
}
@ -346,6 +379,35 @@ export class TuiRenderer implements AgentEventReceiver {
this.ui.requestRender();
}
private showTokenUsage(): void {
let tokenText = chalk.dim(`Total usage\n input: ${this.cumulativeInputTokens.toLocaleString()}\n output: ${this.cumulativeOutputTokens.toLocaleString()}`);
if (this.cumulativeReasoningTokens > 0) {
tokenText += chalk.dim(`\n reasoning: ${this.cumulativeReasoningTokens.toLocaleString()}`);
}
if (this.cumulativeCacheReadTokens > 0 || this.cumulativeCacheWriteTokens > 0) {
const cacheText: string[] = [];
if (this.cumulativeCacheReadTokens > 0) {
cacheText.push(`\n cache read: ${this.cumulativeCacheReadTokens.toLocaleString()}`);
}
if (this.cumulativeCacheWriteTokens > 0) {
cacheText.push(`\n cache right: ${this.cumulativeCacheWriteTokens.toLocaleString()}`);
}
tokenText += chalk.dim(` ${cacheText.join(" ")}`);
}
if (this.cumulativeToolCallCount > 0) {
tokenText += chalk.dim(`\n tool calls: ${this.cumulativeToolCallCount}`);
}
const tokenSummary = new TextComponent(chalk.italic(tokenText), { bottom: 1 });
this.chatContainer.addChild(tokenSummary);
this.ui.requestRender();
}
stop(): void {
if (this.currentLoadingAnimation) {
this.currentLoadingAnimation.stop();

View file

@ -156,7 +156,17 @@ export class SessionManager implements AgentEventReceiver {
const eventEntry: SessionEvent = entry as SessionEvent;
events.push(eventEntry);
if (eventEntry.event.type === "token_usage") {
totalUsage = entry.event as Extract<AgentEvent, { type: "token_usage" }>;
const usage = entry.event as Extract<AgentEvent, { type: "token_usage" }>;
if (!totalUsage) {
totalUsage = { ...usage };
} else {
totalUsage.inputTokens += usage.inputTokens;
totalUsage.outputTokens += usage.outputTokens;
totalUsage.totalTokens += usage.totalTokens;
totalUsage.cacheReadTokens += usage.cacheReadTokens;
totalUsage.cacheWriteTokens += usage.cacheWriteTokens;
totalUsage.reasoningTokens += usage.reasoningTokens;
}
}
}
} catch {

View file

@ -649,6 +649,8 @@ export class TextEditor implements Component {
// Check if we're in a slash command context
if (beforeCursor.trimStart().startsWith("/")) {
this.handleSlashCommandCompletion();
} else {
this.forceFileAutocomplete();
}
}
@ -656,8 +658,34 @@ export class TextEditor implements Component {
// For now, fall back to regular autocomplete (slash commands)
// This can be extended later to handle command-specific argument completion
this.tryTriggerAutocomplete(true);
}
private forceFileAutocomplete(): void {
if (!this.autocompleteProvider) return;
// Check if provider has the force method
const provider = this.autocompleteProvider as any;
if (!provider.getForceFileSuggestions) {
this.tryTriggerAutocomplete(true);
return;
}
const suggestions = provider.getForceFileSuggestions(
this.state.lines,
this.state.cursorLine,
this.state.cursorCol,
);
if (suggestions && suggestions.items.length > 0) {
this.autocompletePrefix = suggestions.prefix;
this.autocompleteList = new SelectList(suggestions.items, 5);
this.isAutocompleting = true;
} else {
this.cancelAutocomplete();
}
}
private cancelAutocomplete(): void {
this.isAutocompleting = false;
this.autocompleteList = undefined as any;

View file

@ -272,9 +272,6 @@ export class TUI extends Container {
this.renderInitial(currentRenderCommands);
this.isFirstRender = false;
} else {
// this.executeDifferentialRender(currentRenderCommands, termHeight);
// this.renderDifferential(currentRenderCommands, termHeight);
// this.renderDifferentialSurgical(currentRenderCommands, termHeight);
this.renderLineBased(currentRenderCommands, termHeight);
}
@ -464,272 +461,6 @@ export class TUI extends Container {
this.totalLinesRedrawn += linesRedrawn;
}
private renderDifferentialSurgical(currentCommands: RenderCommand[], termHeight: number): void {
const viewportHeight = termHeight - 1; // Leave one line for cursor
// Build the new lines array
const newLines: string[] = [];
for (const command of currentCommands) {
newLines.push(...command.lines);
}
const totalNewLines = newLines.length;
const totalOldLines = this.previousLines.length;
// Phase 1: Analyze - categorize all changes
let firstChangeOffset = -1;
let hasLineCountChange = false;
let hasStructuralChange = false;
const changedLines: Array<{ lineIndex: number; newContent: string }> = [];
let currentLineOffset = 0;
for (let i = 0; i < Math.max(currentCommands.length, this.previousRenderCommands.length); i++) {
const current = i < currentCommands.length ? currentCommands[i] : null;
const previous = i < this.previousRenderCommands.length ? this.previousRenderCommands[i] : null;
// Structural change: component added/removed/reordered
if (!current || !previous || current.id !== previous.id) {
hasStructuralChange = true;
if (firstChangeOffset === -1) {
firstChangeOffset = currentLineOffset;
}
break;
}
// Line count change
if (current.changed && current.lines.length !== previous.lines.length) {
hasLineCountChange = true;
if (firstChangeOffset === -1) {
firstChangeOffset = currentLineOffset;
}
break;
}
// Content change with same line count - track individual line changes
if (current.changed) {
for (let j = 0; j < current.lines.length; j++) {
const oldLine =
currentLineOffset + j < this.previousLines.length ? this.previousLines[currentLineOffset + j] : "";
const newLine = current.lines[j];
if (oldLine !== newLine) {
changedLines.push({
lineIndex: currentLineOffset + j,
newContent: newLine,
});
if (firstChangeOffset === -1) {
firstChangeOffset = currentLineOffset + j;
}
}
}
}
currentLineOffset += current ? current.lines.length : 0;
}
// If nothing changed, do nothing
if (firstChangeOffset === -1) {
this.previousLines = newLines;
return;
}
// Phase 2: Decision - pick rendering strategy
const contentStartInViewport = Math.max(0, totalOldLines - viewportHeight);
const changePositionInViewport = firstChangeOffset - contentStartInViewport;
let output = "";
let linesRedrawn = 0;
if (changePositionInViewport < 0) {
// Strategy: FULL - change is above viewport, must clear scrollback and re-render all
output = "\x1b[3J\x1b[H"; // Clear scrollback and screen, then home cursor
for (let i = 0; i < newLines.length; i++) {
if (i > 0) output += "\r\n";
output += newLines[i];
}
if (newLines.length > 0) output += "\r\n";
linesRedrawn = newLines.length;
} else if (hasStructuralChange || hasLineCountChange) {
// Strategy: PARTIAL - changes in viewport but with shifts, clear from change to end
// After rendering with a final newline, cursor is one line below the last content line
// So if we have N lines (0 to N-1), cursor is at line N
// To move to line firstChangeOffset, we need to move up (N - firstChangeOffset) lines
// But since cursor is at N (not N-1), we actually need to move up (N - firstChangeOffset) lines
// which is totalOldLines - firstChangeOffset
const cursorLine = totalOldLines; // Cursor is one past the last line
const targetLine = firstChangeOffset;
const linesToMoveUp = cursorLine - targetLine;
if (linesToMoveUp > 0) {
output += `\x1b[${linesToMoveUp}A`;
}
// Clear from cursor to end of screen
// First ensure we're at the beginning of the line
output += "\r";
output += "\x1b[0J"; // Clear from cursor to end of screen
const linesToRender = newLines.slice(firstChangeOffset);
for (let i = 0; i < linesToRender.length; i++) {
if (i > 0) output += "\r\n";
output += linesToRender[i];
}
if (linesToRender.length > 0) output += "\r\n";
linesRedrawn = linesToRender.length;
} else {
// Strategy: SURGICAL - only content changes with same line counts, update only changed lines
// The cursor starts at the line after our last content
let currentCursorLine = totalOldLines;
for (const change of changedLines) {
// Move cursor to the line that needs updating
const linesToMove = currentCursorLine - change.lineIndex;
if (linesToMove > 0) {
output += `\x1b[${linesToMove}A`; // Move up
} else if (linesToMove < 0) {
output += `\x1b[${-linesToMove}B`; // Move down
}
// Clear the line and write new content
output += "\x1b[2K"; // Clear entire line
output += "\r"; // Move to start of line
output += change.newContent;
// Cursor is now at the end of the content on this line
currentCursorLine = change.lineIndex;
linesRedrawn++;
}
// Return cursor to end position
// We need to be on the line after our last content line
// First ensure we're at start of current line
output += "\r";
// Move to last content line
const lastContentLine = totalNewLines - 1;
const linesToMove = lastContentLine - currentCursorLine;
if (linesToMove > 0) {
output += `\x1b[${linesToMove}B`;
} else if (linesToMove < 0) {
output += `\x1b[${-linesToMove}A`;
}
// Now add final newline to position cursor on next line
output += "\r\n";
}
this.terminal.write(output);
// Save what we rendered
this.previousLines = newLines;
this.totalLinesRedrawn += linesRedrawn;
}
// biome-ignore lint/correctness/noUnusedPrivateClassMembers: Keeping this around as reference for LLM
private renderDifferential(currentCommands: RenderCommand[], termHeight: number): void {
const viewportHeight = termHeight - 1; // Leave one line for cursor
// Build the new lines array
const newLines: string[] = [];
for (const command of currentCommands) {
newLines.push(...command.lines);
}
const totalNewLines = newLines.length;
const totalOldLines = this.previousLines.length;
// Find the first line that changed
let firstChangedLineOffset = -1;
let currentLineOffset = 0;
for (let i = 0; i < currentCommands.length; i++) {
const current = currentCommands[i];
const previous = i < this.previousRenderCommands.length ? this.previousRenderCommands[i] : null;
// Check if this is a new component or component was removed/reordered
if (!previous || previous.id !== current.id) {
firstChangedLineOffset = currentLineOffset;
break;
}
// Check if component content or size changed
if (current.changed) {
firstChangedLineOffset = currentLineOffset;
break;
}
currentLineOffset += current.lines.length;
}
// Also check if we have fewer components now (components removed from end)
if (firstChangedLineOffset === -1 && currentCommands.length < this.previousRenderCommands.length) {
firstChangedLineOffset = currentLineOffset;
}
// If nothing changed, do nothing
if (firstChangedLineOffset === -1) {
this.previousLines = newLines;
return;
}
// Calculate where the first change is relative to the viewport
// If our content exceeds viewport, some is in scrollback
const contentStartInViewport = Math.max(0, totalOldLines - viewportHeight);
const changePositionInViewport = firstChangedLineOffset - contentStartInViewport;
let output = "";
let linesRedrawn = 0;
if (changePositionInViewport < 0) {
// The change is above the viewport - we cannot reach it with cursor
// MUST do full re-render
output = "\x1b[3J\x1b[H"; // Clear scrollback and screen, then home cursor
// Render ALL lines
for (let i = 0; i < newLines.length; i++) {
if (i > 0) output += "\r\n";
output += newLines[i];
}
// Add final newline
if (newLines.length > 0) output += "\r\n";
linesRedrawn = newLines.length;
} else {
// The change is in the viewport - we can update from there
// Calculate how many lines up to move from current cursor position
const linesToMoveUp = totalOldLines - firstChangedLineOffset;
if (linesToMoveUp > 0) {
output += `\x1b[${linesToMoveUp}A`;
}
// Clear from here to end of screen
output += "\x1b[0J";
// Render everything from the first change onwards
const linesToRender = newLines.slice(firstChangedLineOffset);
for (let i = 0; i < linesToRender.length; i++) {
if (i > 0) output += "\r\n";
output += linesToRender[i];
}
// Add final newline
if (linesToRender.length > 0) output += "\r\n";
linesRedrawn = linesToRender.length;
}
this.terminal.write(output);
// Save what we rendered
this.previousLines = newLines;
this.totalLinesRedrawn += linesRedrawn;
}
private handleResize(): void {
// Clear screen and reset
this.terminal.write("\x1b[2J\x1b[H\x1b[?25l");

View file

@ -0,0 +1,161 @@
# Token Usage Tracking Analysis - pi-agent Codebase
## 1. Token Usage Event Structure and Flow
### Per-Request vs Cumulative Analysis
After reading `/Users/badlogic/workspaces/pi-mono/packages/agent/src/agent.ts` in full, I can confirm that **token usage events are per-request, NOT cumulative**.
**Evidence:**
- Lines 296-308 in `callModelResponsesApi()`: Token usage is reported directly from API response usage object
- Lines 435-447 in `callModelChatCompletionsApi()`: Token usage is reported directly from API response usage object
- The token counts represent what was used for that specific LLM request only
### TokenUsageEvent Definition
**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/agent.ts:16-24`
```typescript
{
type: "token_usage";
inputTokens: number;
outputTokens: number;
totalTokens: number;
cacheReadTokens: number;
cacheWriteTokens: number;
reasoningTokens: number;
}
```
## 2. Current Token Usage Display Implementation
### TUI Renderer
**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/renderers/tui-renderer.ts`
**Current Behavior:**
- Lines 60-66: Stores "last" token values (not cumulative)
- Lines 251-259: Updates token counts on `token_usage` events
- Lines 280-311: Displays current request tokens in `updateTokenDisplay()`
- Format: `↑{input} ↓{output} ⚡{reasoning} ⟲{cache_read} ⟳{cache_write} ⚒ {tool_calls}`
**Comment on line 252:** "Store the latest token counts (not cumulative since prompt includes full context)"
### Console Renderer
**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/renderers/console-renderer.ts`
**Current Behavior:**
- Lines 11-16: Stores "last" token values
- Lines 165-172: Updates token counts on `token_usage` events
- Lines 52-82: Displays tokens after each assistant message
## 3. Session Storage
### SessionManager
**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/session-manager.ts`
**Current Implementation:**
- Lines 138-146: Has a `totalUsage` field in `SessionData` interface
- Lines 158-160: **BUG**: Only stores the LAST token_usage event, not cumulative totals
- This should accumulate all token usage across the session
## 4. Slash Command Infrastructure
### Existing Slash Command Support
**Location:** `/Users/badlogic/workspaces/pi-mono/packages/tui/src/autocomplete.ts`
**Available Infrastructure:**
- `SlashCommand` interface with `name`, `description`, optional `getArgumentCompletions`
- `CombinedAutocompleteProvider` handles slash command detection and completion
- Text editor auto-triggers on "/" at start of line
### Current Usage in TUI Renderer
**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/renderers/tui-renderer.ts:75-80`
```typescript
const autocompleteProvider = new CombinedAutocompleteProvider(
[], // <-- Empty command array!
process.cwd(),
);
```
**No slash commands are currently implemented in the agent TUI!**
### Example Implementation
**Reference:** `/Users/badlogic/workspaces/pi-mono/packages/tui/test/chat-app.ts:25-60`
Shows how to:
1. Define slash commands with `CombinedAutocompleteProvider`
2. Handle slash command execution in `editor.onSubmit`
3. Add responses to chat container
## 5. Implementation Requirements for /tokens Command
### What Needs to Change
1. **Add Cumulative Token Tracking to TUI Renderer**
- Add cumulative token counters alongside current "last" counters
- Update cumulative totals on each `token_usage` event
2. **Add /tokens Slash Command**
- Add to `CombinedAutocompleteProvider` in tui-renderer.ts
- Handle in `editor.onSubmit` callback
- Display formatted token summary as `TextComponent` in chat container
3. **Fix SessionManager Bug**
- Change `totalUsage` calculation to accumulate all token_usage events
- This will enable session-wide token tracking
4. **Message Handling in TUI**
- Need to capture user input before it goes to agent
- Check if it's a slash command vs regular message
- Route accordingly
### Current User Input Flow
**Location:** `/Users/badlogic/workspaces/pi-mono/packages/agent/src/main.ts:190-198`
```typescript
while (true) {
const userInput = await renderer.getUserInput();
try {
await agent.ask(userInput); // All input goes to agent
} catch (e: any) {
await renderer.on({ type: "error", message: e.message });
}
}
```
**Problem:** All user input goes directly to the agent - no interception for slash commands!
### Required Architecture Change
Need to modify the TUI interactive loop to:
1. Check if user input starts with "/"
2. If slash command: handle locally in renderer
3. If regular message: pass to agent as before
## 6. Token Display Format Recommendations
Based on existing format patterns, the `/tokens` command should display:
```
Session Token Usage:
↑ 1,234 input tokens
↓ 5,678 output tokens
⚡ 2,345 reasoning tokens
⟲ 890 cache read tokens
⟳ 123 cache write tokens
📊 12,270 total tokens
⚒ 5 tool calls
```
## Summary
The current implementation tracks per-request token usage only. To add cumulative token tracking with a `/tokens` command, we need to:
1. **Fix SessionManager** to properly accumulate token usage
2. **Add cumulative tracking** to TUI renderer
3. **Implement slash command infrastructure** in the agent (currently missing)
4. **Modify user input handling** to intercept slash commands before they reach the agent
5. **Add /tokens command** that displays formatted cumulative statistics
The TUI framework already supports slash commands, but the agent TUI renderer doesn't use them yet.

View file

@ -0,0 +1,28 @@
# Add Token Usage Tracking Command
**Status:** Done
**Agent PID:** 71159
## Original Todo
- agent: we get token_usage events. the last we get tells us how many input/output/cache read/cache write/reasoning tokens where used for the last request to the LLM endpoint. We want to:
- have a /tokens command that outputs the accumulative counts, can just add it to the chat messages container as a nicely formatted TextComponent
- means the tui-renderer needs to keep track of accumulative stats as well, not just last request stats.
- please check agent.ts (read in full) to see if token_usage is actually some form of accumulative thing, or a per request to llm thing. want to undersatnd what we get.
## Description
Add a `/tokens` slash command to the TUI that displays cumulative token usage statistics for the current session. This includes fixing the SessionManager to properly accumulate token usage and implementing slash command infrastructure in the agent's TUI renderer.
*Read [analysis.md](./analysis.md) in full for detailed codebase research and context*
## Implementation Plan
- [x] Fix SessionManager to accumulate token usage instead of storing only the last event (packages/agent/src/session-manager.ts:158-160)
- [x] Add cumulative token tracking properties to TUI renderer (packages/agent/src/renderers/tui-renderer.ts:60-66)
- [x] Add /tokens slash command to CombinedAutocompleteProvider (packages/agent/src/renderers/tui-renderer.ts:75-80)
- [x] Modify TUI renderer's onSubmit to handle slash commands locally (packages/agent/src/renderers/tui-renderer.ts:159-177)
- [x] Implement /tokens command handler that displays formatted cumulative statistics
- [x] Update token_usage event handler to accumulate totals (packages/agent/src/renderers/tui-renderer.ts:275-291)
- [x] Test: Verify /tokens command displays correct cumulative totals
- [x] Test: Send multiple messages and confirm accumulation works correctly
- [x] Fix file autocompletion that was broken by slash command implementation
## Notes
[Implementation notes]

View file

@ -1,3 +1,10 @@
- agent: max output tokens is fixed to 2000 in responess and chat completions calls
- tui: if text editor gets bigger than viewport, we get artifacts in scrollbuffer
- tui: need to benachmark our renderer. always compares old lines vs new lines and does a diff. might be a bit much for 100k+ lines.
- pods: pi start outputs all models that can be run on the pod. however, it doesn't check the vllm version. e.g. gpt-oss can only run via vllm+gpt-oss. glm4.5 can only run on vllm nightly.
- agent: improve reasoning section in README.md
@ -25,15 +32,10 @@
- Anthropic: ❌ `/v1/models` (no context info)
- OpenAI: ❌ `/v1/models` (no context info)
- For Anthropic/OpenAI, may need hardcoded fallback values or separate lookup table
- Display how much of the context window is used by the current context
- agent: compaction & micro compactionexi
- agent: token usage output sucks, make it better
- current: ↑1,706 ↓409 ⚒ 2
- maybe: ↑ 1,706 - ↓ 409 - ⚒ 2 (or dot?)
- add context window usage percentage (e.g., "23% context used")
- requires context length detection from models endpoint (see todo above)
- agent: test for basic functionality, including thinking, completions & responses API support for all the known providers and their endpoints.
- agent: groq responses api throws on second message
@ -72,6 +74,4 @@
- pods: if a pod is down and i run `pi list`, verifying processes says All processes verified. But that can't be true, as we can no longer SSH into the pod to check.
- agent: start a new agent session. when i press CTRL+C, "Press Ctrl+C again to exit" appears above the text editor followed by an empty line. After about 1 second, the empty line disappears. We should either not show the empty line, or always show the empty line. Maybe Ctrl+C info should be displayed below the text editor.
- agent: we need to make system prompt and tools pluggable. We need to figure out the simplest way for users to define system prompts and toolkits. A toolkit could be a subset of the built-in tools, a mixture of a subset of the built-in tools plus custom self-made tools, maybe include MCP servers, and so on. We need to figure out a way to make this super easy. users should be able to write their tools in whatever language they fancy. which means that probably something like process spawning plus studio communication transport would make the most sense. but then we were back at MCP basically. And that does not support interruptibility, which we need for the agent. So if the agent invokes the tool and the user presses escape in the interface, then the tool invocation must be interrupted and whatever it's doing must stop, including killing all sub-processes. For MCP this could be solved for studio MCP servers by, since we spawn those on startup or whenever we load the tools, we spawn a process for an MCP server and then reuse that process for subsequent tool invocations. If the user interrupts then we could just kill that process, assuming that anything it's doing or any of its sub-processes will be killed along the way. So I guess tools could all be written as MCP servers, but that's a lot of overhead. It would also be nice to be able to provide tools just as a bash script that gets some inputs and return some outputs based on the inputs Same for Go apps or TypeScript apps invoked by MPX TSX. just make the barrier of entry for writing your own tools super fucking low. not necessarily going full MCP. but we also need to support MCP. So whatever we arrive at, we then need to take our built-in tools and see if those can be refactored to work with our new tools