From ae553890517e48435e8573a52d328523140afdf9 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Tue, 30 Dec 2025 23:40:49 +0100 Subject: [PATCH] Rewrite agent README with clearer structure and event flow documentation --- packages/agent/README.md | 503 ++++++++++++++++++--------------------- 1 file changed, 238 insertions(+), 265 deletions(-) diff --git a/packages/agent/README.md b/packages/agent/README.md index 0d9b1403..eb523d57 100644 --- a/packages/agent/README.md +++ b/packages/agent/README.md @@ -1,127 +1,160 @@ -# @mariozechner/pi-agent-core +# @mariozechner/pi-agent -Stateful agent with tool execution, event streaming, and extensible message types. Built on `@mariozechner/pi-ai`. +Stateful agent with tool execution and event streaming. Built on `@mariozechner/pi-ai`. ## Installation ```bash -npm install @mariozechner/pi-agent-core +npm install @mariozechner/pi-agent ``` ## Quick Start ```typescript -import { Agent } from '@mariozechner/pi-agent-core'; -import { getModel } from '@mariozechner/pi-ai'; +import { Agent } from "@mariozechner/pi-agent"; +import { getModel } from "@mariozechner/pi-ai"; const agent = new Agent({ initialState: { - systemPrompt: 'You are a helpful assistant.', - model: getModel('anthropic', 'claude-sonnet-4-20250514'), - thinkingLevel: 'medium', - tools: [] - } + systemPrompt: "You are a helpful assistant.", + model: getModel("anthropic", "claude-sonnet-4-20250514"), + }, }); -// Subscribe to events for reactive UI updates agent.subscribe((event) => { - switch (event.type) { - case 'message_start': - console.log(`${event.message.role} message started`); - break; - case 'message_update': - // Only emitted for assistant messages during streaming - // event.message is partial - may have incomplete content - for (const block of event.message.content) { - if (block.type === 'text') process.stdout.write(block.text); - } - break; - case 'message_end': - console.log(`${event.message.role} message complete`); - break; - case 'tool_execution_start': - console.log(`Calling ${event.toolName}...`); - break; - case 'tool_execution_end': - console.log(`Result:`, event.result.content); - break; + if (event.type === "message_update") { + // Stream assistant response + for (const block of event.message.content) { + if (block.type === "text") process.stdout.write(block.text); + } } }); -await agent.prompt('Hello, world!'); -console.log(agent.state.messages); +await agent.prompt("Hello!"); ``` -## AgentMessage vs LLM Message +## Core Concepts -The agent internally works with `AgentMessage`, a flexible type that can include: +### AgentMessage vs LLM Message + +The agent works with `AgentMessage`, a flexible type that can include: - Standard LLM messages (`user`, `assistant`, `toolResult`) -- Custom app-specific message types (via declaration merging) +- Custom app-specific message types via declaration merging -LLMs only understand a subset: `user`, `assistant`, and `toolResult` messages with specific content formats. The `convertToLlm` function bridges this gap. +LLMs only understand `user`, `assistant`, and `toolResult`. The `convertToLlm` function bridges this gap by filtering and transforming messages before each LLM call. -### Why This Separation? - -1. **Rich UI state**: Store UI-specific data (attachments metadata, custom message types) alongside the conversation -2. **Session persistence**: Save the full conversation state including app-specific messages -3. **Context manipulation**: Transform messages before sending to LLM (compaction, injection, filtering) - -### The Conversion Flow +### Message Flow ``` -AgentMessage[] → transformContext() → AgentMessage[] → convertToLlm() → Message[] → LLM - ↑ (optional) (required) - | - App state with custom types, - attachments, UI metadata +AgentMessage[] → transformContext() → AgentMessage[] → convertToLlm() → Message[] → LLM + (optional) (required) ``` -### Constraints +1. **transformContext**: Prune old messages, inject external context +2. **convertToLlm**: Filter out UI-only messages, convert custom types to LLM format -**Messages passed to `prompt()` or queued via `queueMessage()` must convert to LLM messages with `role: "user"` or `role: "toolResult"`.** +## Event Flow -When calling `continue()`, the last message in the context must also convert to `user` or `toolResult`. The LLM expects to respond to a user or tool result, not to its own assistant message. +The agent emits events for UI updates. Understanding the event sequence helps build responsive interfaces. + +### prompt() Event Sequence + +When you call `prompt("Hello")`: + +``` +prompt("Hello") +├─ agent_start +├─ turn_start +├─ message_start { message: userMessage } // Your prompt +├─ message_end { message: userMessage } +├─ message_start { message: assistantMessage } // LLM starts responding +├─ message_update { message: partial... } // Streaming chunks +├─ message_update { message: partial... } +├─ message_end { message: assistantMessage } // Complete response +├─ turn_end { message, toolResults: [] } +└─ agent_end { messages: [...] } +``` + +### With Tool Calls + +If the assistant calls tools, the loop continues: + +``` +prompt("Read config.json") +├─ agent_start +├─ turn_start +├─ message_start/end { userMessage } +├─ message_start { assistantMessage with toolCall } +├─ message_update... +├─ message_end { assistantMessage } +├─ tool_execution_start { toolCallId, toolName, args } +├─ tool_execution_update { partialResult } // If tool streams +├─ tool_execution_end { toolCallId, result } +├─ message_start/end { toolResultMessage } +├─ turn_end { message, toolResults: [toolResult] } +│ +├─ turn_start // Next turn +├─ message_start { assistantMessage } // LLM responds to tool result +├─ message_update... +├─ message_end +├─ turn_end +└─ agent_end +``` + +### continue() Event Sequence + +`continue()` resumes from existing context without adding a new message. Use it for retries after errors. ```typescript -// OK: Standard user message -await agent.prompt('Hello'); - -// OK: Custom type that converts to user message -await agent.prompt({ role: 'hookMessage', content: 'System notification', timestamp: Date.now() }); -// But convertToLlm must handle this: -convertToLlm: (messages) => messages.map(m => { - if (m.role === 'hookMessage') { - return { role: 'user', content: m.content, timestamp: m.timestamp }; - } - return m; -}) - -// ERROR: Cannot prompt with assistant message -await agent.prompt({ role: 'assistant', content: [...], ... }); // Will fail at LLM +// After an error, retry from current state +await agent.continue(); ``` +The last message in context must be `user` or `toolResult` (not `assistant`). + +### Event Types + +| Event | Description | +|-------|-------------| +| `agent_start` | Agent begins processing | +| `agent_end` | Agent completes with all new messages | +| `turn_start` | New turn begins (one LLM call + tool executions) | +| `turn_end` | Turn completes with assistant message and tool results | +| `message_start` | Any message begins (user, assistant, toolResult) | +| `message_update` | **Assistant only.** Partial message during streaming | +| `message_end` | Message completes | +| `tool_execution_start` | Tool begins | +| `tool_execution_update` | Tool streams progress | +| `tool_execution_end` | Tool completes | + ## Agent Options ```typescript -interface AgentOptions { - initialState?: Partial; +const agent = new Agent({ + // Initial state + initialState: { + systemPrompt: string, + model: Model, + thinkingLevel: "off" | "minimal" | "low" | "medium" | "high" | "xhigh", + tools: AgentTool[], + messages: AgentMessage[], + }, - // Converts AgentMessage[] to LLM-compatible Message[] before each LLM call. - // Default: filters to user/assistant/toolResult and converts image attachments. - convertToLlm?: (messages: AgentMessage[]) => Message[] | Promise; + // Convert AgentMessage[] to LLM Message[] (required for custom message types) + convertToLlm: (messages) => messages.filter(...), - // Transform context before convertToLlm (for pruning, compaction, injecting context) - transformContext?: (messages: AgentMessage[], signal?: AbortSignal) => Promise; + // Transform context before convertToLlm (for pruning, compaction) + transformContext: async (messages, signal) => pruneOldMessages(messages), - // Queue mode: 'all' sends all queued messages, 'one-at-a-time' sends one per turn - queueMode?: 'all' | 'one-at-a-time'; + // How to handle queued messages: "one-at-a-time" (default) or "all" + queueMode: "one-at-a-time", - // Custom stream function (for proxy backends). Default: streamSimple from pi-ai - streamFn?: StreamFn; + // Custom stream function (for proxy backends) + streamFn: streamProxy, - // Dynamic API key resolution (useful for expiring OAuth tokens) - getApiKey?: (provider: string) => Promise | string | undefined; -} + // Dynamic API key resolution (for expiring OAuth tokens) + getApiKey: async (provider) => refreshToken(), +}); ``` ## Agent State @@ -130,250 +163,190 @@ interface AgentOptions { interface AgentState { systemPrompt: string; model: Model; - thinkingLevel: ThinkingLevel; // 'off' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' + thinkingLevel: ThinkingLevel; tools: AgentTool[]; - messages: AgentMessage[]; // Full conversation including custom types + messages: AgentMessage[]; isStreaming: boolean; - streamMessage: AgentMessage | null; // Current partial message during streaming + streamMessage: AgentMessage | null; // Current partial during streaming pendingToolCalls: Set; error?: string; } ``` -## Events +Access via `agent.state`. During streaming, `streamMessage` contains the partial assistant message. -Events provide fine-grained lifecycle information for building reactive UIs. +## Methods -### Event Types - -| Event | Description | -|-------|-------------| -| `agent_start` | Agent begins processing | -| `agent_end` | Agent completes, contains all generated messages | -| `turn_start` | New turn begins (one LLM response + tool executions) | -| `turn_end` | Turn completes with assistant message and tool results | -| `message_start` | Message begins (user, assistant, or toolResult) | -| `message_update` | **Assistant messages only.** Partial message during streaming | -| `message_end` | Message completes | -| `tool_execution_start` | Tool begins execution | -| `tool_execution_update` | Tool streams progress | -| `tool_execution_end` | Tool completes with result | - -### Message Events for prompt() and queueMessage() - -When you call `prompt(message)`, the agent emits `message_start` and `message_end` events for that message before the assistant responds: - -``` -prompt(userMessage) - → agent_start - → turn_start - → message_start { message: userMessage } - → message_end { message: userMessage } - → message_start { message: assistantMessage } // LLM starts responding - → message_update { message: partialAssistant } // streaming... - → message_end { message: assistantMessage } - ... -``` - -Queued messages (via `queueMessage()`) emit the same events when injected: - -``` -// During tool execution, a message is queued -agent.queueMessage(interruptMessage) - -// After tool completes, before next LLM call: - → message_start { message: interruptMessage } - → message_end { message: interruptMessage } - → message_start { message: assistantMessage } // LLM responds to interrupt - ... -``` - -### Handling Partial Messages in Reactive UIs - -`message_update` events contain partial assistant messages during streaming. The `event.message` may have: -- Incomplete text (truncated mid-word) -- Partial tool call arguments -- Missing content blocks that haven't started streaming yet - -**Pattern for reactive UIs:** +### Prompting ```typescript -agent.subscribe((event) => { - switch (event.type) { - case 'message_start': - if (event.message.role === 'assistant') { - // Create placeholder in UI - ui.addMessage({ id: tempId, role: 'assistant', content: [] }); - } - break; +// Text prompt +await agent.prompt("Hello"); - case 'message_update': - // Replace placeholder content with partial content - // This is only emitted for assistant messages - ui.updateMessage(tempId, event.message.content); - break; +// With images +await agent.prompt("What's in this image?", [ + { type: "image", data: base64Data, mimeType: "image/jpeg" } +]); - case 'message_end': - if (event.message.role === 'assistant') { - // Finalize with complete message - ui.finalizeMessage(tempId, event.message); - } - break; - } -}); -``` - -**Accessing the current partial message:** - -During streaming, `agent.state.streamMessage` contains the current partial message. This is useful for rendering outside the event handler: - -```typescript -// In a render loop or reactive binding -if (agent.state.isStreaming && agent.state.streamMessage) { - renderPartialMessage(agent.state.streamMessage); -} -``` - -## Custom Message Types - -Extend `AgentMessage` for app-specific messages via declaration merging: - -```typescript -declare module '@mariozechner/pi-agent-core' { - interface CustomAgentMessages { - artifact: { role: 'artifact'; code: string; language: string; timestamp: number }; - notification: { role: 'notification'; text: string; timestamp: number }; - } -} - -// AgentMessage now includes your custom types -const msg: AgentMessage = { role: 'artifact', code: '...', language: 'typescript', timestamp: Date.now() }; -``` - -Custom messages are stored in state but filtered out by the default `convertToLlm`. Provide your own converter to handle them: - -```typescript -const agent = new Agent({ - convertToLlm: (messages) => { - return messages - .filter(m => m.role !== 'notification') // Filter out UI-only messages - .map(m => { - if (m.role === 'artifact') { - // Convert to user message so LLM sees the artifact - return { role: 'user', content: `[Artifact: ${m.language}]\n${m.code}`, timestamp: m.timestamp }; - } - return m; - }); - } +// AgentMessage directly +await agent.prompt({ role: "user", content: "Hello", timestamp: Date.now() }); + +// Continue from current context (last message must be user or toolResult) +await agent.continue(); +``` + +### State Management + +```typescript +agent.setSystemPrompt("New prompt"); +agent.setModel(getModel("openai", "gpt-4o")); +agent.setThinkingLevel("medium"); +agent.setTools([myTool]); +agent.replaceMessages(newMessages); +agent.appendMessage(message); +agent.clearMessages(); +agent.reset(); // Clear everything +``` + +### Control + +```typescript +agent.abort(); // Cancel current operation +await agent.waitForIdle(); // Wait for completion +``` + +### Events + +```typescript +const unsubscribe = agent.subscribe((event) => { + console.log(event.type); }); +unsubscribe(); ``` ## Message Queue -Queue messages to inject at the next turn: +Queue messages to inject during tool execution (for user interruptions): ```typescript -agent.setQueueMode('one-at-a-time'); +agent.setQueueMode("one-at-a-time"); -// Queue while agent is streaming +// While agent is running tools agent.queueMessage({ - role: 'user', - content: 'Stop what you are doing and focus on this instead.', - timestamp: Date.now() + role: "user", + content: "Stop! Do this instead.", + timestamp: Date.now(), }); ``` -When queued messages are detected after a tool call, remaining tool calls are skipped with error results ("Skipped due to queued user message"). The queued message is then injected before the next assistant response. +When queued messages are detected after a tool completes: +1. Remaining tools are skipped with error results +2. Queued message is injected +3. LLM responds to the interruption -## Images +## Custom Message Types -User messages can include images: +Extend `AgentMessage` via declaration merging: ```typescript -await agent.prompt('What is in this image?', [ - { type: 'image', data: base64ImageData, mimeType: 'image/jpeg' } -]); +declare module "@mariozechner/pi-agent" { + interface CustomAgentMessages { + notification: { role: "notification"; text: string; timestamp: number }; + } +} + +// Now valid +const msg: AgentMessage = { role: "notification", text: "Info", timestamp: Date.now() }; +``` + +Handle custom types in `convertToLlm`: + +```typescript +const agent = new Agent({ + convertToLlm: (messages) => messages.flatMap(m => { + if (m.role === "notification") return []; // Filter out + return [m]; + }), +}); +``` + +## Tools + +Tools extend `Tool` from pi-ai with an `execute` function: + +```typescript +import { Type } from "@sinclair/typebox"; + +const readFileTool: AgentTool = { + name: "read_file", + label: "Read File", // For UI display + description: "Read a file's contents", + parameters: Type.Object({ + path: Type.String({ description: "File path" }), + }), + execute: async (toolCallId, params, signal, onUpdate) => { + const content = await fs.readFile(params.path, "utf-8"); + + // Optional: stream progress + onUpdate?.({ content: [{ type: "text", text: "Reading..." }], details: {} }); + + return { + content: [{ type: "text", text: content }], + details: { path: params.path, size: content.length }, + }; + }, +}; + +agent.setTools([readFileTool]); ``` ## Proxy Usage -For browser apps that need to proxy through a backend, use `streamProxy`: +For browser apps that proxy through a backend: ```typescript -import { Agent, streamProxy } from '@mariozechner/pi-agent-core'; +import { Agent, streamProxy } from "@mariozechner/pi-agent"; const agent = new Agent({ - streamFn: (model, context, options) => streamProxy( - '/api/agent', - model, - context, - options, - { 'Authorization': 'Bearer ...' } - ) + streamFn: (model, context, options) => + streamProxy(model, context, { + ...options, + authToken: "...", + proxyUrl: "https://your-server.com", + }), }); ``` ## Low-Level API -For more control, use `agentLoop` and `agentLoopContinue` directly: +For direct control without the Agent class: ```typescript -import { agentLoop, agentLoopContinue, AgentContext, AgentLoopConfig } from '@mariozechner/pi-agent-core'; -import { getModel, streamSimple } from '@mariozechner/pi-ai'; +import { agentLoop, agentLoopContinue } from "@mariozechner/pi-agent"; const context: AgentContext = { - systemPrompt: 'You are helpful.', + systemPrompt: "You are helpful.", messages: [], - tools: [myTool] + tools: [], }; const config: AgentLoopConfig = { - model: getModel('openai', 'gpt-4o-mini'), - convertToLlm: (msgs) => msgs.filter(m => ['user', 'assistant', 'toolResult'].includes(m.role)) + model: getModel("openai", "gpt-4o"), + convertToLlm: (msgs) => msgs.filter(m => ["user", "assistant", "toolResult"].includes(m.role)), }; -const userMessage = { role: 'user', content: 'Hello', timestamp: Date.now() }; +const userMessage = { role: "user", content: "Hello", timestamp: Date.now() }; -for await (const event of agentLoop(userMessage, context, config, undefined, streamSimple)) { +for await (const event of agentLoop([userMessage], context, config)) { console.log(event.type); } -// Continue from existing context (e.g., after overflow recovery) -// Last message in context must convert to 'user' or 'toolResult' -for await (const event of agentLoopContinue(context, config, undefined, streamSimple)) { +// Continue from existing context +for await (const event of agentLoopContinue(context, config)) { console.log(event.type); } ``` -## API Reference - -### Agent Methods - -| Method | Description | -|--------|-------------| -| `prompt(text, images?)` | Send a user prompt with optional images | -| `prompt(message)` | Send an AgentMessage directly (must convert to user/toolResult) | -| `continue()` | Continue from current context (last message must convert to user/toolResult) | -| `abort()` | Abort current operation | -| `waitForIdle()` | Promise that resolves when agent is idle | -| `reset()` | Clear all messages and state | -| `subscribe(fn)` | Subscribe to events, returns unsubscribe function | -| `queueMessage(msg)` | Queue message for next turn (must convert to user/toolResult) | -| `clearMessageQueue()` | Clear queued messages | - -### State Mutators - -| Method | Description | -|--------|-------------| -| `setSystemPrompt(v)` | Update system prompt | -| `setModel(m)` | Switch model | -| `setThinkingLevel(l)` | Set reasoning level | -| `setQueueMode(m)` | Set queue mode | -| `setTools(t)` | Update available tools | -| `replaceMessages(ms)` | Replace all messages | -| `appendMessage(m)` | Append a message | -| `clearMessages()` | Clear all messages | - ## License MIT