diff --git a/packages/agent/README.md b/packages/agent/README.md index ef545216..ab3daeb9 100644 --- a/packages/agent/README.md +++ b/packages/agent/README.md @@ -26,11 +26,19 @@ const agent = new Agent({ // Subscribe to events for reactive UI updates agent.subscribe((event) => { switch (event.type) { + case 'message_start': + console.log(`${event.message.role} message started`); + break; case 'message_update': + // Only emitted for assistant messages during streaming + // event.message is partial - may have incomplete content for (const block of event.message.content) { if (block.type === 'text') process.stdout.write(block.text); } break; + case 'message_end': + console.log(`${event.message.role} message complete`); + break; case 'tool_execution_start': console.log(`Calling ${event.toolName}...`); break; @@ -44,17 +52,66 @@ await agent.prompt('Hello, world!'); console.log(agent.state.messages); ``` +## AgentMessage vs LLM Message + +The agent internally works with `AgentMessage`, a flexible type that can include: +- Standard LLM messages (`user`, `assistant`, `toolResult`) +- User messages with attachments +- Custom app-specific message types (via declaration merging) + +LLMs only understand a subset: `user`, `assistant`, and `toolResult` messages with specific content formats. The `convertToLlm` function bridges this gap. + +### Why This Separation? + +1. **Rich UI state**: Store UI-specific data (attachments metadata, custom message types) alongside the conversation +2. **Session persistence**: Save the full conversation state including app-specific messages +3. **Context manipulation**: Transform messages before sending to LLM (compaction, injection, filtering) + +### The Conversion Flow + +``` +AgentMessage[] → transformContext() → AgentMessage[] → convertToLlm() → Message[] → LLM + ↑ (optional) (required) + | + App state with custom types, + attachments, UI metadata +``` + +### Constraints + +**Messages passed to `prompt()` or queued via `queueMessage()` must convert to LLM messages with `role: "user"` or `role: "toolResult"`.** + +When calling `continue()`, the last message in the context must also convert to `user` or `toolResult`. The LLM expects to respond to a user or tool result, not to its own assistant message. + +```typescript +// OK: Standard user message +await agent.prompt('Hello'); + +// OK: Custom type that converts to user message +await agent.prompt({ role: 'hookMessage', content: 'System notification', timestamp: Date.now() }); +// But convertToLlm must handle this: +convertToLlm: (messages) => messages.map(m => { + if (m.role === 'hookMessage') { + return { role: 'user', content: m.content, timestamp: m.timestamp }; + } + return m; +}) + +// ERROR: Cannot prompt with assistant message +await agent.prompt({ role: 'assistant', content: [...], ... }); // Will fail at LLM +``` + ## Agent Options ```typescript interface AgentOptions { initialState?: Partial; - // Converts AgentMessage[] to LLM-compatible Message[] before each call. - // Default: filters to user/assistant/toolResult and converts attachments. + // Converts AgentMessage[] to LLM-compatible Message[] before each LLM call. + // Default: filters to user/assistant/toolResult and converts image attachments. convertToLlm?: (messages: AgentMessage[]) => Message[] | Promise; - // Transform context before convertToLlm (for pruning, injecting context, etc.) + // Transform context before convertToLlm (for pruning, compaction, injecting context) transformContext?: (messages: AgentMessage[], signal?: AbortSignal) => Promise; // Queue mode: 'all' sends all queued messages, 'one-at-a-time' sends one per turn @@ -76,9 +133,9 @@ interface AgentState { model: Model; thinkingLevel: ThinkingLevel; // 'off' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' tools: AgentTool[]; - messages: AgentMessage[]; + messages: AgentMessage[]; // Full conversation including custom types isStreaming: boolean; - streamMessage: AgentMessage | null; + streamMessage: AgentMessage | null; // Current partial message during streaming pendingToolCalls: Set; error?: string; } @@ -86,7 +143,9 @@ interface AgentState { ## Events -Events provide fine-grained lifecycle information for building reactive UIs: +Events provide fine-grained lifecycle information for building reactive UIs. + +### Event Types | Event | Description | |-------|-------------| @@ -95,12 +154,84 @@ Events provide fine-grained lifecycle information for building reactive UIs: | `turn_start` | New turn begins (one LLM response + tool executions) | | `turn_end` | Turn completes with assistant message and tool results | | `message_start` | Message begins (user, assistant, or toolResult) | -| `message_update` | Assistant message streaming update | +| `message_update` | **Assistant messages only.** Partial message during streaming | | `message_end` | Message completes | | `tool_execution_start` | Tool begins execution | | `tool_execution_update` | Tool streams progress | | `tool_execution_end` | Tool completes with result | +### Message Events for prompt() and queueMessage() + +When you call `prompt(message)`, the agent emits `message_start` and `message_end` events for that message before the assistant responds: + +``` +prompt(userMessage) + → message_start { message: userMessage } + → message_end { message: userMessage } + → message_start { message: assistantMessage } // LLM starts responding + → message_update { message: partialAssistant } // streaming... + → message_end { message: assistantMessage } +``` + +Queued messages (via `queueMessage()`) emit the same events when injected: + +``` +// During tool execution, a message is queued +agent.queueMessage(interruptMessage) + +// After tool completes, before next LLM call: + → message_start { message: interruptMessage } + → message_end { message: interruptMessage } + → message_start { message: assistantMessage } // LLM responds to interrupt + ... +``` + +### Handling Partial Messages in Reactive UIs + +`message_update` events contain partial assistant messages during streaming. The `event.message` may have: +- Incomplete text (truncated mid-word) +- Partial tool call arguments +- Missing content blocks that haven't started streaming yet + +**Pattern for reactive UIs:** + +```typescript +agent.subscribe((event) => { + switch (event.type) { + case 'message_start': + if (event.message.role === 'assistant') { + // Create placeholder in UI + ui.addMessage({ id: tempId, role: 'assistant', content: [] }); + } + break; + + case 'message_update': + // Replace placeholder content with partial content + // This is only emitted for assistant messages + ui.updateMessage(tempId, event.message.content); + break; + + case 'message_end': + if (event.message.role === 'assistant') { + // Finalize with complete message + ui.finalizeMessage(tempId, event.message); + } + break; + } +}); +``` + +**Accessing the current partial message:** + +During streaming, `agent.state.streamMessage` contains the current partial message. This is useful for rendering outside the event handler: + +```typescript +// In a render loop or reactive binding +if (agent.state.isStreaming && agent.state.streamMessage) { + renderPartialMessage(agent.state.streamMessage); +} +``` + ## Custom Message Types Extend `AgentMessage` for app-specific messages via declaration merging: @@ -123,10 +254,10 @@ Custom messages are stored in state but filtered out by the default `convertToLl const agent = new Agent({ convertToLlm: (messages) => { return messages - .filter(m => m.role === 'user' || m.role === 'assistant' || m.role === 'toolResult') + .filter(m => m.role !== 'notification') // Filter out UI-only messages .map(m => { - // Convert custom types or pass through if (m.role === 'artifact') { + // Convert to user message so LLM sees the artifact return { role: 'user', content: `[Artifact: ${m.language}]\n${m.code}`, timestamp: m.timestamp }; } return m; @@ -150,7 +281,7 @@ agent.queueMessage({ }); ``` -When queued messages are detected after a tool call, remaining tool calls are skipped with error results. +When queued messages are detected after a tool call, remaining tool calls are skipped with error results ("Skipped due to queued user message"). The queued message is then injected before the next assistant response. ## Images @@ -206,6 +337,7 @@ for await (const event of agentLoop(userMessage, context, config, undefined, str } // Continue from existing context (e.g., after overflow recovery) +// Last message in context must convert to 'user' or 'toolResult' for await (const event of agentLoopContinue(context, config, undefined, streamSimple)) { console.log(event.type); } @@ -217,14 +349,14 @@ for await (const event of agentLoopContinue(context, config, undefined, streamSi | Method | Description | |--------|-------------| -| `prompt(text, attachments?)` | Send a user prompt | -| `prompt(message)` | Send an AgentMessage directly | -| `continue()` | Continue from current context | +| `prompt(text, images?)` | Send a user prompt with optional images | +| `prompt(message)` | Send an AgentMessage directly (must convert to user/toolResult) | +| `continue()` | Continue from current context (last message must convert to user/toolResult) | | `abort()` | Abort current operation | | `waitForIdle()` | Promise that resolves when agent is idle | | `reset()` | Clear all messages and state | | `subscribe(fn)` | Subscribe to events, returns unsubscribe function | -| `queueMessage(msg)` | Queue message for next turn | +| `queueMessage(msg)` | Queue message for next turn (must convert to user/toolResult) | | `clearMessageQueue()` | Clear queued messages | ### State Mutators