From b315abf998300b378d5e82cac189f32ee8f375b5 Mon Sep 17 00:00:00 2001 From: Aliou Diallo Date: Thu, 5 Feb 2026 02:33:05 +0100 Subject: [PATCH 1/2] fix(coding-agent): forward images through steer/followUp during streaming prompt() computed currentImages but never passed them to _queueSteer() or _queueFollowUp() in the streaming branch. Both methods only accepted text and built content as [{ type: 'text', text }], dropping images. - _queueSteer/_queueFollowUp now accept optional ImageContent[] - streaming branch in prompt() passes currentImages through - public steer()/followUp() accept and forward optional images - RPC types, handler, and client updated for steer/follow_up images - rpc.md: document images on steer/follow_up, fix ImageContent examples --- packages/coding-agent/docs/rpc.md | 18 +++++++++-- .../coding-agent/src/core/agent-session.ts | 30 ++++++++++++------- .../coding-agent/src/modes/rpc/rpc-client.ts | 8 ++--- .../coding-agent/src/modes/rpc/rpc-mode.ts | 4 +-- .../coding-agent/src/modes/rpc/rpc-types.ts | 4 +-- 5 files changed, 44 insertions(+), 20 deletions(-) diff --git a/packages/coding-agent/docs/rpc.md b/packages/coding-agent/docs/rpc.md index 880ca29f..c49e8a90 100644 --- a/packages/coding-agent/docs/rpc.md +++ b/packages/coding-agent/docs/rpc.md @@ -38,7 +38,7 @@ Send a user prompt to the agent. Returns immediately; events stream asynchronous With images: ```json -{"type": "prompt", "message": "What's in this image?", "images": [{"type": "image", "source": {"type": "base64", "mediaType": "image/png", "data": "..."}}]} +{"type": "prompt", "message": "What's in this image?", "images": [{"type": "image", "data": "base64-encoded-data", "mimeType": "image/png"}]} ``` **During streaming**: If the agent is already streaming, you must specify `streamingBehavior` to queue the message: @@ -61,7 +61,7 @@ Response: {"id": "req-1", "type": "response", "command": "prompt", "success": true} ``` -The `images` field is optional. Each image uses `ImageContent` format with base64 or URL source. +The `images` field is optional. Each image uses `ImageContent` format: `{"type": "image", "data": "base64-encoded-data", "mimeType": "image/png"}`. #### steer @@ -71,6 +71,13 @@ Queue a steering message to interrupt the agent mid-run. Delivered after current {"type": "steer", "message": "Stop and do this instead"} ``` +With images: +```json +{"type": "steer", "message": "Look at this instead", "images": [{"type": "image", "data": "base64-encoded-data", "mimeType": "image/png"}]} +``` + +The `images` field is optional. Each image uses `ImageContent` format (same as `prompt`). + Response: ```json {"type": "response", "command": "steer", "success": true} @@ -86,6 +93,13 @@ Queue a follow-up message to be processed after the agent finishes. Delivered on {"type": "follow_up", "message": "After you're done, also do this"} ``` +With images: +```json +{"type": "follow_up", "message": "Also check this image", "images": [{"type": "image", "data": "base64-encoded-data", "mimeType": "image/png"}]} +``` + +The `images` field is optional. Each image uses `ImageContent` format (same as `prompt`). + Response: ```json {"type": "response", "command": "follow_up", "success": true} diff --git a/packages/coding-agent/src/core/agent-session.ts b/packages/coding-agent/src/core/agent-session.ts index 4eba14f1..20e5dadf 100644 --- a/packages/coding-agent/src/core/agent-session.ts +++ b/packages/coding-agent/src/core/agent-session.ts @@ -695,9 +695,9 @@ export class AgentSession { ); } if (options.streamingBehavior === "followUp") { - await this._queueFollowUp(expandedText); + await this._queueFollowUp(expandedText, currentImages); } else { - await this._queueSteer(expandedText); + await this._queueSteer(expandedText, currentImages); } return; } @@ -856,9 +856,10 @@ export class AgentSession { * Queue a steering message to interrupt the agent mid-run. * Delivered after current tool execution, skips remaining tools. * Expands skill commands and prompt templates. Errors on extension commands. + * @param images Optional image attachments to include with the message * @throws Error if text is an extension command */ - async steer(text: string): Promise { + async steer(text: string, images?: ImageContent[]): Promise { // Check for extension commands (cannot be queued) if (text.startsWith("/")) { this._throwIfExtensionCommand(text); @@ -868,16 +869,17 @@ export class AgentSession { let expandedText = this._expandSkillCommand(text); expandedText = expandPromptTemplate(expandedText, [...this.promptTemplates]); - await this._queueSteer(expandedText); + await this._queueSteer(expandedText, images); } /** * Queue a follow-up message to be processed after the agent finishes. * Delivered only when agent has no more tool calls or steering messages. * Expands skill commands and prompt templates. Errors on extension commands. + * @param images Optional image attachments to include with the message * @throws Error if text is an extension command */ - async followUp(text: string): Promise { + async followUp(text: string, images?: ImageContent[]): Promise { // Check for extension commands (cannot be queued) if (text.startsWith("/")) { this._throwIfExtensionCommand(text); @@ -887,17 +889,21 @@ export class AgentSession { let expandedText = this._expandSkillCommand(text); expandedText = expandPromptTemplate(expandedText, [...this.promptTemplates]); - await this._queueFollowUp(expandedText); + await this._queueFollowUp(expandedText, images); } /** * Internal: Queue a steering message (already expanded, no extension command check). */ - private async _queueSteer(text: string): Promise { + private async _queueSteer(text: string, images?: ImageContent[]): Promise { this._steeringMessages.push(text); + const content: (TextContent | ImageContent)[] = [{ type: "text", text }]; + if (images) { + content.push(...images); + } this.agent.steer({ role: "user", - content: [{ type: "text", text }], + content, timestamp: Date.now(), }); } @@ -905,11 +911,15 @@ export class AgentSession { /** * Internal: Queue a follow-up message (already expanded, no extension command check). */ - private async _queueFollowUp(text: string): Promise { + private async _queueFollowUp(text: string, images?: ImageContent[]): Promise { this._followUpMessages.push(text); + const content: (TextContent | ImageContent)[] = [{ type: "text", text }]; + if (images) { + content.push(...images); + } this.agent.followUp({ role: "user", - content: [{ type: "text", text }], + content, timestamp: Date.now(), }); } diff --git a/packages/coding-agent/src/modes/rpc/rpc-client.ts b/packages/coding-agent/src/modes/rpc/rpc-client.ts index 91d2b0f8..b5c61dfb 100644 --- a/packages/coding-agent/src/modes/rpc/rpc-client.ts +++ b/packages/coding-agent/src/modes/rpc/rpc-client.ts @@ -175,15 +175,15 @@ export class RpcClient { /** * Queue a steering message to interrupt the agent mid-run. */ - async steer(message: string): Promise { - await this.send({ type: "steer", message }); + async steer(message: string, images?: ImageContent[]): Promise { + await this.send({ type: "steer", message, images }); } /** * Queue a follow-up message to be processed after the agent finishes. */ - async followUp(message: string): Promise { - await this.send({ type: "follow_up", message }); + async followUp(message: string, images?: ImageContent[]): Promise { + await this.send({ type: "follow_up", message, images }); } /** diff --git a/packages/coding-agent/src/modes/rpc/rpc-mode.ts b/packages/coding-agent/src/modes/rpc/rpc-mode.ts index 0408b2ce..78b9849e 100644 --- a/packages/coding-agent/src/modes/rpc/rpc-mode.ts +++ b/packages/coding-agent/src/modes/rpc/rpc-mode.ts @@ -328,12 +328,12 @@ export async function runRpcMode(session: AgentSession): Promise { } case "steer": { - await session.steer(command.message); + await session.steer(command.message, command.images); return success(id, "steer"); } case "follow_up": { - await session.followUp(command.message); + await session.followUp(command.message, command.images); return success(id, "follow_up"); } diff --git a/packages/coding-agent/src/modes/rpc/rpc-types.ts b/packages/coding-agent/src/modes/rpc/rpc-types.ts index 84c8d088..d21bf14f 100644 --- a/packages/coding-agent/src/modes/rpc/rpc-types.ts +++ b/packages/coding-agent/src/modes/rpc/rpc-types.ts @@ -18,8 +18,8 @@ import type { CompactionResult } from "../../core/compaction/index.js"; export type RpcCommand = // Prompting | { id?: string; type: "prompt"; message: string; images?: ImageContent[]; streamingBehavior?: "steer" | "followUp" } - | { id?: string; type: "steer"; message: string } - | { id?: string; type: "follow_up"; message: string } + | { id?: string; type: "steer"; message: string; images?: ImageContent[] } + | { id?: string; type: "follow_up"; message: string; images?: ImageContent[] } | { id?: string; type: "abort" } | { id?: string; type: "new_session"; parentSession?: string } From 2fb9f90150ae1fe90eb5e3a2b64f891fe9734fde Mon Sep 17 00:00:00 2001 From: Aliou Diallo Date: Thu, 5 Feb 2026 02:39:04 +0100 Subject: [PATCH 2/2] docs: update changelog for PR #1271 --- packages/coding-agent/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 46805a0e..81940412 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -7,6 +7,10 @@ - API keys in `auth.json` now support shell command resolution (`!command`) and environment variable lookup, matching the behavior in `models.json` - Added `minimal-mode.ts` example extension demonstrating how to override built-in tool rendering for a minimal display mode +### Fixed + +- Fixed images being silently dropped when `prompt()` is called with both `images` and `streamingBehavior` during streaming. `steer()`, `followUp()`, and the corresponding RPC commands now accept optional images. ([#1271](https://github.com/badlogic/pi-mono/pull/1271) by [@aliou](https://github.com/aliou)) + ## [0.51.6] - 2026-02-04 ### New Features