Add image support in tool results across all providers

Tool results now use content blocks and can include both text and images.
All providers (Anthropic, Google, OpenAI Completions, OpenAI Responses)
correctly pass images from tool results to LLMs.

- Update ToolResultMessage type to use content blocks
- Add placeholder text for image-only tool results in Google/Anthropic
- OpenAI providers send tool result + follow-up user message with images
- Fix Anthropic JSON parsing for empty tool arguments
- Add comprehensive tests for image-only and text+image tool results
- Update README with tool result content blocks API
This commit is contained in:
Mario Zechner 2025-11-12 10:45:56 +01:00
parent 9dac37d836
commit 84dcab219b
37 changed files with 720 additions and 544 deletions

View file

@ -95,7 +95,7 @@ export const bashTool: AgentTool<typeof bashSchema> = {
if (output) output += "\n\n";
reject(new Error(`${output}Command exited with code ${code}`));
} else {
resolve({ output: output || "(no output)", details: undefined });
resolve({ content: [{ type: "text", text: output || "(no output)" }], details: undefined });
}
});

View file

@ -37,7 +37,7 @@ export const editTool: AgentTool<typeof editSchema> = {
) => {
const absolutePath = resolvePath(expandPath(path));
return new Promise<{ output: string; details: undefined }>((resolve, reject) => {
return new Promise<{ content: Array<{ type: "text"; text: string }>; details: undefined }>((resolve, reject) => {
// Check if already aborted
if (signal?.aborted) {
reject(new Error("Operation aborted"));
@ -131,7 +131,12 @@ export const editTool: AgentTool<typeof editSchema> = {
}
resolve({
output: `Successfully replaced text in ${path}. Changed ${oldText.length} characters to ${newText.length} characters.`,
content: [
{
type: "text",
text: `Successfully replaced text in ${path}. Changed ${oldText.length} characters to ${newText.length} characters.`,
},
],
details: undefined,
});
} catch (error: any) {

View file

@ -1,9 +1,9 @@
import * as os from "node:os";
import type { AgentTool } from "@mariozechner/pi-ai";
import type { AgentTool, ImageContent, TextContent } from "@mariozechner/pi-ai";
import { Type } from "@sinclair/typebox";
import { constants } from "fs";
import { access, readFile } from "fs/promises";
import { resolve as resolvePath } from "path";
import { extname, resolve as resolvePath } from "path";
/**
* Expand ~ to home directory
@ -18,6 +18,27 @@ function expandPath(filePath: string): string {
return filePath;
}
/**
* Map of file extensions to MIME types for common image formats
*/
const IMAGE_MIME_TYPES: Record<string, string> = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
".bmp": "image/bmp",
".svg": "image/svg+xml",
};
/**
* Check if a file is an image based on its extension
*/
function isImageFile(filePath: string): string | null {
const ext = extname(filePath).toLowerCase();
return IMAGE_MIME_TYPES[ext] || null;
}
const readSchema = Type.Object({
path: Type.String({ description: "Path to the file to read (relative or absolute)" }),
});
@ -25,12 +46,14 @@ const readSchema = Type.Object({
export const readTool: AgentTool<typeof readSchema> = {
name: "read",
label: "read",
description: "Read the contents of a file. Returns the full file content as text.",
description:
"Read the contents of a file. Supports text files and images (jpg, png, gif, webp, bmp, svg). Images are sent as attachments to the model.",
parameters: readSchema,
execute: async (_toolCallId: string, { path }: { path: string }, signal?: AbortSignal) => {
const absolutePath = resolvePath(expandPath(path));
const mimeType = isImageFile(absolutePath);
return new Promise<{ output: string; details: undefined }>((resolve, reject) => {
return new Promise<{ content: (TextContent | ImageContent)[]; details: undefined }>((resolve, reject) => {
// Check if already aborted
if (signal?.aborted) {
reject(new Error("Operation aborted"));
@ -68,8 +91,23 @@ export const readTool: AgentTool<typeof readSchema> = {
return;
}
// Read the file
const content = await readFile(absolutePath, "utf-8");
// Read the file based on type
let content: (TextContent | ImageContent)[];
if (mimeType) {
// Read as image (binary)
const buffer = await readFile(absolutePath);
const base64 = buffer.toString("base64");
content = [
{ type: "text", text: `Read image file: ${path}` },
{ type: "image", data: base64, mimeType },
];
} else {
// Read as text
const textContent = await readFile(absolutePath, "utf-8");
content = [{ type: "text", text: textContent }];
}
// Check if aborted after reading
if (aborted) {
@ -81,7 +119,7 @@ export const readTool: AgentTool<typeof readSchema> = {
signal.removeEventListener("abort", onAbort);
}
resolve({ output: content, details: undefined });
resolve({ content, details: undefined });
} catch (error: any) {
// Clean up abort handler
if (signal) {

View file

@ -32,7 +32,7 @@ export const writeTool: AgentTool<typeof writeSchema> = {
const absolutePath = resolvePath(expandPath(path));
const dir = dirname(absolutePath);
return new Promise<{ output: string; details: undefined }>((resolve, reject) => {
return new Promise<{ content: Array<{ type: "text"; text: string }>; details: undefined }>((resolve, reject) => {
// Check if already aborted
if (signal?.aborted) {
reject(new Error("Operation aborted"));
@ -75,7 +75,10 @@ export const writeTool: AgentTool<typeof writeSchema> = {
signal.removeEventListener("abort", onAbort);
}
resolve({ output: `Successfully wrote ${content.length} bytes to ${path}`, details: undefined });
resolve({
content: [{ type: "text", text: `Successfully wrote ${content.length} bytes to ${path}` }],
details: undefined,
});
} catch (error: any) {
// Clean up abort handler
if (signal) {

View file

@ -60,7 +60,10 @@ export class ToolExecutionComponent extends Container {
private contentText: Text;
private toolName: string;
private args: any;
private result?: { output: string; isError: boolean };
private result?: {
content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>;
isError: boolean;
};
constructor(toolName: string, args: any) {
super();
@ -78,7 +81,10 @@ export class ToolExecutionComponent extends Container {
this.updateDisplay();
}
updateResult(result: { output: string; isError: boolean }): void {
updateResult(result: {
content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>;
isError: boolean;
}): void {
this.result = result;
this.updateDisplay();
}
@ -94,6 +100,24 @@ export class ToolExecutionComponent extends Container {
this.contentText.setText(this.formatToolExecution());
}
private getTextOutput(): string {
if (!this.result) return "";
// Extract text from content blocks
const textBlocks = this.result.content?.filter((c: any) => c.type === "text") || [];
const imageBlocks = this.result.content?.filter((c: any) => c.type === "image") || [];
let output = textBlocks.map((c: any) => c.text).join("\n");
// Add indicator for images
if (imageBlocks.length > 0) {
const imageIndicators = imageBlocks.map((img: any) => `[Image: ${img.mimeType}]`).join("\n");
output = output ? `${output}\n${imageIndicators}` : imageIndicators;
}
return output;
}
private formatToolExecution(): string {
let text = "";
@ -104,7 +128,7 @@ export class ToolExecutionComponent extends Container {
if (this.result) {
// Show output without code fences - more minimal
const output = this.result.output.trim();
const output = this.getTextOutput().trim();
if (output) {
const lines = output.split("\n");
const maxLines = 5;
@ -122,7 +146,8 @@ export class ToolExecutionComponent extends Container {
text = chalk.bold("read") + " " + (path ? chalk.cyan(path) : chalk.dim("..."));
if (this.result) {
const lines = this.result.output.split("\n");
const output = this.getTextOutput();
const lines = output.split("\n");
const maxLines = 10;
const displayLines = lines.slice(0, maxLines);
const remaining = lines.length - maxLines;
@ -168,8 +193,9 @@ export class ToolExecutionComponent extends Container {
const content = JSON.stringify(this.args, null, 2);
text += "\n\n" + content;
if (this.result?.output) {
text += "\n" + this.result.output;
const output = this.getTextOutput();
if (output) {
text += "\n" + output;
}
}

View file

@ -244,7 +244,7 @@ export class TuiRenderer {
assistantMsg.stopReason === "aborted" ? "Operation aborted" : assistantMsg.errorMessage || "Error";
for (const [toolCallId, component] of this.pendingTools.entries()) {
component.updateResult({
output: errorMessage,
content: [{ type: "text", text: errorMessage }],
isError: true,
});
}
@ -273,8 +273,12 @@ export class TuiRenderer {
const component = this.pendingTools.get(event.toolCallId);
if (component) {
// Update the component with the result
const content =
typeof event.result === "string"
? [{ type: "text" as const, text: event.result }]
: event.result.content;
component.updateResult({
output: typeof event.result === "string" ? event.result : event.result.output,
content,
isError: event.isError,
});
this.pendingTools.delete(event.toolCallId);
@ -358,7 +362,7 @@ export class TuiRenderer {
? "Operation aborted"
: assistantMsg.errorMessage || "Error";
component.updateResult({
output: errorMessage,
content: [{ type: "text", text: errorMessage }],
isError: true,
});
} else {
@ -373,7 +377,7 @@ export class TuiRenderer {
const component = this.pendingTools.get(toolResultMsg.toolCallId);
if (component) {
component.updateResult({
output: toolResultMsg.output,
content: toolResultMsg.content,
isError: toolResultMsg.isError,
});
// Remove from pending map since it's complete