diff --git a/packages/agent/test/e2e.test.ts b/packages/agent/test/e2e.test.ts index 25475c3a..1087e7bd 100644 --- a/packages/agent/test/e2e.test.ts +++ b/packages/agent/test/e2e.test.ts @@ -80,10 +80,15 @@ async function toolExecution(model: Model) { const toolResultMsg = agent.state.messages.find((m) => m.role === "toolResult"); expect(toolResultMsg).toBeDefined(); if (toolResultMsg?.role !== "toolResult") throw new Error("Expected tool result message"); - expect(toolResultMsg.output).toBeDefined(); + const textContent = + toolResultMsg.content + ?.filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("\n") || ""; + expect(textContent).toBeDefined(); const expectedResult = 123 * 456; - expect(toolResultMsg.output).toContain(String(expectedResult)); + expect(textContent).toContain(String(expectedResult)); const finalMessage = agent.state.messages[agent.state.messages.length - 1]; if (finalMessage.role !== "assistant") throw new Error("Expected final assistant message"); diff --git a/packages/ai/README.md b/packages/ai/README.md index 51abfd6a..ad4bb21f 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -98,7 +98,6 @@ for await (const event of s) { const finalMessage = await s.result(); context.messages.push(finalMessage); -// Handle tool calls if any // Handle tool calls if any const toolCalls = finalMessage.content.filter(b => b.type === 'toolCall'); for (const call of toolCalls) { @@ -111,13 +110,14 @@ for (const call of toolCalls) { }) : 'Unknown tool'; - // Add tool result to context + // Add tool result to context (supports text and images) context.messages.push({ role: 'toolResult', toolCallId: call.id, toolName: call.name, - output: result, - isError: false + content: [{ type: 'text', text: result }], + isError: false, + timestamp: Date.now() }); } @@ -179,7 +179,11 @@ const bookMeetingTool: Tool = { ### Handling Tool Calls +Tool results use content blocks and can include both text and images: + ```typescript +import { readFileSync } from 'fs'; + const context: Context = { messages: [{ role: 'user', content: 'What is the weather in London?' }], tools: [weatherTool] @@ -194,16 +198,31 @@ for (const block of response.content) { // If validation fails, an error event is emitted const result = await executeWeatherApi(block.arguments); - // Add tool result to continue the conversation + // Add tool result with text content context.messages.push({ role: 'toolResult', toolCallId: block.id, toolName: block.name, - output: JSON.stringify(result), - isError: false + content: [{ type: 'text', text: JSON.stringify(result) }], + isError: false, + timestamp: Date.now() }); } } + +// Tool results can also include images (for vision-capable models) +const imageBuffer = readFileSync('chart.png'); +context.messages.push({ + role: 'toolResult', + toolCallId: 'tool_xyz', + toolName: 'generate_chart', + content: [ + { type: 'text', text: 'Generated chart showing temperature trends' }, + { type: 'image', data: imageBuffer.toString('base64'), mimeType: 'image/png' } + ], + isError: false, + timestamp: Date.now() +}); ``` ### Streaming Tool Calls with Partial JSON @@ -625,7 +644,7 @@ const geminiResponse = await complete(gemini, context); All providers can handle messages from other providers, including: - Text content -- Tool calls and tool results +- Tool calls and tool results (including images in tool results) - Thinking/reasoning blocks (transformed to tagged text for cross-provider compatibility) - Aborted messages with partial content @@ -818,6 +837,23 @@ const weatherTool: AgentTool = { }; } }; + +// Tools can also return images alongside text +const chartTool: AgentTool = { + label: 'Generate Chart', + name: 'generate_chart', + description: 'Generate a chart from data', + parameters: Type.Object({ data: Type.Array(Type.Number()) }), + execute: async (toolCallId, args) => { + const chartImage = await generateChartImage(args.data); + return { + content: [ + { type: 'text', text: `Generated chart with ${args.data.length} data points` }, + { type: 'image', data: chartImage.toString('base64'), mimeType: 'image/png' } + ] + }; + } +}; ``` ### Validation and Error Handling diff --git a/packages/ai/src/agent/agent-loop.ts b/packages/ai/src/agent/agent-loop.ts index db2470d1..e93d5f7d 100644 --- a/packages/ai/src/agent/agent-loop.ts +++ b/packages/ai/src/agent/agent-loop.ts @@ -216,11 +216,15 @@ async function executeToolCalls( isError, }); + // Convert result to content blocks + const content: ToolResultMessage["content"] = + typeof resultOrError === "string" ? [{ type: "text", text: resultOrError }] : resultOrError.content; + const toolResultMessage: ToolResultMessage = { role: "toolResult", toolCallId: toolCall.id, toolName: toolCall.name, - output: typeof resultOrError === "string" ? resultOrError : resultOrError.output, + content, details: typeof resultOrError === "string" ? ({} as T) : resultOrError.details, isError, timestamp: Date.now(), diff --git a/packages/ai/src/agent/tools/calculate.ts b/packages/ai/src/agent/tools/calculate.ts index c2a4e5a8..ef34359d 100644 --- a/packages/ai/src/agent/tools/calculate.ts +++ b/packages/ai/src/agent/tools/calculate.ts @@ -1,15 +1,15 @@ import { type Static, Type } from "@sinclair/typebox"; -import type { AgentTool } from "../../agent/types.js"; +import type { AgentTool, AgentToolResult } from "../../agent/types.js"; -export interface CalculateResult { - output: string; +export interface CalculateResult extends AgentToolResult { + content: Array<{ type: "text"; text: string }>; details: undefined; } export function calculate(expression: string): CalculateResult { try { const result = new Function("return " + expression)(); - return { output: `${expression} = ${result}`, details: undefined }; + return { content: [{ type: "text", text: `${expression} = ${result}` }], details: undefined }; } catch (e: any) { throw new Error(e.message || String(e)); } diff --git a/packages/ai/src/agent/tools/get-current-time.ts b/packages/ai/src/agent/tools/get-current-time.ts index 94afd7be..a9ffed25 100644 --- a/packages/ai/src/agent/tools/get-current-time.ts +++ b/packages/ai/src/agent/tools/get-current-time.ts @@ -8,20 +8,22 @@ export async function getCurrentTime(timezone?: string): Promise { - // Output of the tool to be given to the LLM in ToolResultMessage.content - output: string; - // Details to be displayed in a UI or loggedty + // Content blocks supporting text and images + content: (TextContent | ImageContent)[]; + // Details to be displayed in a UI or logged details: T; } diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts index b7fedd7a..ed76a6bc 100644 --- a/packages/ai/src/providers/anthropic.ts +++ b/packages/ai/src/providers/anthropic.ts @@ -9,6 +9,7 @@ import type { Api, AssistantMessage, Context, + ImageContent, Message, Model, StopReason, @@ -26,6 +27,58 @@ import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; import { validateToolArguments } from "../utils/validation.js"; import { transformMessages } from "./transorm-messages.js"; +/** + * Convert content blocks to Anthropic API format + */ +function convertContentBlocks(content: (TextContent | ImageContent)[]): + | string + | Array< + | { type: "text"; text: string } + | { + type: "image"; + source: { + type: "base64"; + media_type: "image/jpeg" | "image/png" | "image/gif" | "image/webp"; + data: string; + }; + } + > { + // If only text blocks, return as concatenated string for simplicity + const hasImages = content.some((c) => c.type === "image"); + if (!hasImages) { + return sanitizeSurrogates(content.map((c) => (c as TextContent).text).join("\n")); + } + + // If we have images, convert to content block array + const blocks = content.map((block) => { + if (block.type === "text") { + return { + type: "text" as const, + text: sanitizeSurrogates(block.text), + }; + } + return { + type: "image" as const, + source: { + type: "base64" as const, + media_type: block.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp", + data: block.data, + }, + }; + }); + + // If only images (no text), add placeholder text block + const hasText = blocks.some((b) => b.type === "text"); + if (!hasText) { + blocks.unshift({ + type: "text" as const, + text: "(see attached image)", + }); + } + + return blocks; +} + export interface AnthropicOptions extends StreamOptions { thinkingEnabled?: boolean; thinkingBudgetTokens?: number; @@ -171,7 +224,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = ( partial: output, }); } else if (block.type === "toolCall") { - block.arguments = JSON.parse(block.partialJson); + block.arguments = parseStreamingJson(block.partialJson); // Validate tool arguments if tool definition is available if (context.tools) { @@ -432,7 +485,7 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages"> toolResults.push({ type: "tool_result", tool_use_id: sanitizeToolCallId(msg.toolCallId), - content: sanitizeSurrogates(msg.output), + content: convertContentBlocks(msg.content), is_error: msg.isError, }); @@ -443,7 +496,7 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages"> toolResults.push({ type: "tool_result", tool_use_id: sanitizeToolCallId(nextMsg.toolCallId), - content: sanitizeSurrogates(nextMsg.output), + content: convertContentBlocks(nextMsg.content), is_error: nextMsg.isError, }); j++; diff --git a/packages/ai/src/providers/google.ts b/packages/ai/src/providers/google.ts index db84f225..132d3a57 100644 --- a/packages/ai/src/providers/google.ts +++ b/packages/ai/src/providers/google.ts @@ -377,20 +377,44 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context) parts, }); } else if (msg.role === "toolResult") { + // Build parts array with functionResponse and/or images + const parts: Part[] = []; + + // Extract text and image content + const textResult = msg.content + .filter((c) => c.type === "text") + .map((c) => (c as any).text) + .join("\n"); + const imageBlocks = model.input.includes("image") ? msg.content.filter((c) => c.type === "image") : []; + + // Always add functionResponse with text result (or placeholder if only images) + const hasText = textResult.length > 0; + const hasImages = imageBlocks.length > 0; + + parts.push({ + functionResponse: { + id: msg.toolCallId, + name: msg.toolName, + response: { + result: hasText ? sanitizeSurrogates(textResult) : hasImages ? "(see attached image)" : "", + isError: msg.isError, + }, + }, + }); + + // Add any images as inlineData parts + for (const imageBlock of imageBlocks) { + parts.push({ + inlineData: { + mimeType: (imageBlock as any).mimeType, + data: (imageBlock as any).data, + }, + }); + } + contents.push({ role: "user", - parts: [ - { - functionResponse: { - id: msg.toolCallId, - name: msg.toolName, - response: { - result: sanitizeSurrogates(msg.output), - isError: msg.isError, - }, - }, - }, - ], + parts, }); } } diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index ca6197fc..8740d0fe 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -386,11 +386,50 @@ function convertMessages(model: Model<"openai-completions">, context: Context): } params.push(assistantMsg); } else if (msg.role === "toolResult") { + // Extract text and image content + const textResult = msg.content + .filter((c) => c.type === "text") + .map((c) => (c as any).text) + .join("\n"); + const hasImages = msg.content.some((c) => c.type === "image"); + + // Always send tool result with text (or placeholder if only images) + const hasText = textResult.length > 0; params.push({ role: "tool", - content: sanitizeSurrogates(msg.output), + content: sanitizeSurrogates(hasText ? textResult : "(see attached image)"), tool_call_id: msg.toolCallId, }); + + // If there are images and model supports them, send a follow-up user message with images + if (hasImages && model.input.includes("image")) { + const contentBlocks: Array< + { type: "text"; text: string } | { type: "image_url"; image_url: { url: string } } + > = []; + + // Add text prefix + contentBlocks.push({ + type: "text", + text: "Attached image(s) from tool result:", + }); + + // Add images + for (const block of msg.content) { + if (block.type === "image") { + contentBlocks.push({ + type: "image_url", + image_url: { + url: `data:${(block as any).mimeType};base64,${(block as any).data}`, + }, + }); + } + } + + params.push({ + role: "user", + content: contentBlocks, + }); + } } } diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts index da5ff586..accf5092 100644 --- a/packages/ai/src/providers/openai-responses.ts +++ b/packages/ai/src/providers/openai-responses.ts @@ -435,11 +435,47 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re if (output.length === 0) continue; messages.push(...output); } else if (msg.role === "toolResult") { + // Extract text and image content + const textResult = msg.content + .filter((c) => c.type === "text") + .map((c) => (c as any).text) + .join("\n"); + const hasImages = msg.content.some((c) => c.type === "image"); + + // Always send function_call_output with text (or placeholder if only images) + const hasText = textResult.length > 0; messages.push({ type: "function_call_output", call_id: msg.toolCallId.split("|")[0], - output: sanitizeSurrogates(msg.output), + output: sanitizeSurrogates(hasText ? textResult : "(see attached image)"), }); + + // If there are images and model supports them, send a follow-up user message with images + if (hasImages && model.input.includes("image")) { + const contentParts: ResponseInputContent[] = []; + + // Add text prefix + contentParts.push({ + type: "input_text", + text: "Attached image(s) from tool result:", + } satisfies ResponseInputText); + + // Add images + for (const block of msg.content) { + if (block.type === "image") { + contentParts.push({ + type: "input_image", + detail: "auto", + image_url: `data:${(block as any).mimeType};base64,${(block as any).data}`, + } satisfies ResponseInputImage); + } + } + + messages.push({ + role: "user", + content: contentParts, + }); + } } } diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index 2afcdaba..0328bd50 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -114,7 +114,7 @@ export interface ToolResultMessage { role: "toolResult"; toolCallId: string; toolName: string; - output: string; + content: (TextContent | ImageContent)[]; // Supports text and images details?: TDetails; isError: boolean; timestamp: number; // Unix timestamp in milliseconds diff --git a/packages/ai/test/agent.test.ts b/packages/ai/test/agent.test.ts index f4e6222c..c939ff00 100644 --- a/packages/ai/test/agent.test.ts +++ b/packages/ai/test/agent.test.ts @@ -60,14 +60,18 @@ async function calculateTest(model: Model, options: Opti break; case "tool_execution_end": - if (!event.isError && typeof event.result === "object" && event.result.output) { + if (!event.isError && typeof event.result === "object" && event.result.content) { + const textOutput = event.result.content + .filter((c: any) => c.type === "text") + .map((c: any) => c.text) + .join("\n"); toolCallCount++; // Extract number from output like "expression = result" - const match = event.result.output.match(/=\s*([\d.]+)/); + const match = textOutput.match(/=\s*([\d.]+)/); if (match) { const value = parseFloat(match[1]); toolResults.push(value); - console.log(`Tool ${toolCallCount}: ${event.result.output}`); + console.log(`Tool ${toolCallCount}: ${textOutput}`); } } break; diff --git a/packages/ai/test/handoff.test.ts b/packages/ai/test/handoff.test.ts index b38110a3..fad942c6 100644 --- a/packages/ai/test/handoff.test.ts +++ b/packages/ai/test/handoff.test.ts @@ -55,7 +55,7 @@ const providerContexts = { role: "toolResult" as const, toolCallId: "toolu_01abc123", toolName: "get_weather", - output: "Weather in Tokyo: 18°C, partly cloudy", + content: [{ type: "text", text: "Weather in Tokyo: 18°C, partly cloudy" }], isError: false, timestamp: Date.now(), } satisfies ToolResultMessage, @@ -106,7 +106,7 @@ const providerContexts = { role: "toolResult" as const, toolCallId: "call_gemini_123", toolName: "get_weather", - output: "Weather in Berlin: 22°C, sunny", + content: [{ type: "text", text: "Weather in Berlin: 22°C, sunny" }], isError: false, timestamp: Date.now(), } satisfies ToolResultMessage, @@ -156,7 +156,7 @@ const providerContexts = { role: "toolResult" as const, toolCallId: "call_abc123", toolName: "get_weather", - output: "Weather in London: 15°C, rainy", + content: [{ type: "text", text: "Weather in London: 15°C, rainy" }], isError: false, timestamp: Date.now(), } satisfies ToolResultMessage, @@ -208,7 +208,7 @@ const providerContexts = { role: "toolResult" as const, toolCallId: "call_789_item_012", // Match the updated ID format toolName: "get_weather", - output: "Weather in Sydney: 25°C, clear", + content: [{ type: "text", text: "Weather in Sydney: 25°C, clear" }], isError: false, timestamp: Date.now(), } satisfies ToolResultMessage, diff --git a/packages/ai/test/image-tool-result.test.ts b/packages/ai/test/image-tool-result.test.ts new file mode 100644 index 00000000..37a12e72 --- /dev/null +++ b/packages/ai/test/image-tool-result.test.ts @@ -0,0 +1,263 @@ +import { readFileSync } from "node:fs"; +import { join } from "node:path"; +import { Type } from "@sinclair/typebox"; +import { describe, expect, it } from "vitest"; +import type { Api, Context, Model, Tool, ToolResultMessage } from "../src/index.js"; +import { complete, getModel } from "../src/index.js"; +import type { OptionsForApi } from "../src/types.js"; + +/** + * Test that tool results containing only images work correctly across all providers. + * This verifies that: + * 1. Tool results can contain image content blocks + * 2. Providers correctly pass images from tool results to the LLM + * 3. The LLM can see and describe images returned by tools + */ +async function handleToolWithImageResult(model: Model, options?: OptionsForApi) { + // Check if the model supports images + if (!model.input.includes("image")) { + console.log(`Skipping tool image result test - model ${model.id} doesn't support images`); + return; + } + + // Read the test image + const imagePath = join(__dirname, "data", "red-circle.png"); + const imageBuffer = readFileSync(imagePath); + const base64Image = imageBuffer.toString("base64"); + + // Define a tool that returns only an image (no text) + const getImageSchema = Type.Object({}); + const getImageTool: Tool = { + name: "get_circle", + description: "Returns a circle image for visualization", + parameters: getImageSchema, + }; + + const context: Context = { + systemPrompt: "You are a helpful assistant that uses tools when asked.", + messages: [ + { + role: "user", + content: "Use the get_circle tool to get an image, and describe what you see, shapes, colors, etc.", + timestamp: Date.now(), + }, + ], + tools: [getImageTool], + }; + + // First request - LLM should call the tool + const firstResponse = await complete(model, context, options); + expect(firstResponse.stopReason).toBe("toolUse"); + + // Find the tool call + const toolCall = firstResponse.content.find((b) => b.type === "toolCall"); + expect(toolCall).toBeTruthy(); + if (!toolCall || toolCall.type !== "toolCall") { + throw new Error("Expected tool call"); + } + expect(toolCall.name).toBe("get_circle"); + + // Add the tool call to context + context.messages.push(firstResponse); + + // Create tool result with ONLY an image (no text) + const toolResult: ToolResultMessage = { + role: "toolResult", + toolCallId: toolCall.id, + toolName: toolCall.name, + content: [ + { + type: "image", + data: base64Image, + mimeType: "image/png", + }, + ], + isError: false, + timestamp: Date.now(), + }; + + context.messages.push(toolResult); + + // Second request - LLM should describe the image from the tool result + const secondResponse = await complete(model, context, options); + expect(secondResponse.stopReason).toBe("stop"); + expect(secondResponse.errorMessage).toBeFalsy(); + + // Verify the LLM can see and describe the image + const textContent = secondResponse.content.find((b) => b.type === "text"); + expect(textContent).toBeTruthy(); + if (textContent && textContent.type === "text") { + const lowerContent = textContent.text.toLowerCase(); + // Should mention red and circle since that's what the image shows + expect(lowerContent).toContain("red"); + expect(lowerContent).toContain("circle"); + } +} + +/** + * Test that tool results containing both text and images work correctly across all providers. + * This verifies that: + * 1. Tool results can contain mixed content blocks (text + images) + * 2. Providers correctly pass both text and images from tool results to the LLM + * 3. The LLM can see both the text and images in tool results + */ +async function handleToolWithTextAndImageResult(model: Model, options?: OptionsForApi) { + // Check if the model supports images + if (!model.input.includes("image")) { + console.log(`Skipping tool text+image result test - model ${model.id} doesn't support images`); + return; + } + + // Read the test image + const imagePath = join(__dirname, "data", "red-circle.png"); + const imageBuffer = readFileSync(imagePath); + const base64Image = imageBuffer.toString("base64"); + + // Define a tool that returns both text and an image + const getImageSchema = Type.Object({}); + const getImageTool: Tool = { + name: "get_circle_with_description", + description: "Returns a circle image with a text description", + parameters: getImageSchema, + }; + + const context: Context = { + systemPrompt: "You are a helpful assistant that uses tools when asked.", + messages: [ + { + role: "user", + content: "Use the get_circle_with_description tool and tell me what you learned.", + timestamp: Date.now(), + }, + ], + tools: [getImageTool], + }; + + // First request - LLM should call the tool + const firstResponse = await complete(model, context, options); + expect(firstResponse.stopReason).toBe("toolUse"); + + // Find the tool call + const toolCall = firstResponse.content.find((b) => b.type === "toolCall"); + expect(toolCall).toBeTruthy(); + if (!toolCall || toolCall.type !== "toolCall") { + throw new Error("Expected tool call"); + } + expect(toolCall.name).toBe("get_circle_with_description"); + + // Add the tool call to context + context.messages.push(firstResponse); + + // Create tool result with BOTH text and image + const toolResult: ToolResultMessage = { + role: "toolResult", + toolCallId: toolCall.id, + toolName: toolCall.name, + content: [ + { + type: "text", + text: "This is a geometric shape with specific properties: it has a diameter of 100 pixels.", + }, + { + type: "image", + data: base64Image, + mimeType: "image/png", + }, + ], + isError: false, + timestamp: Date.now(), + }; + + context.messages.push(toolResult); + + // Second request - LLM should describe both the text and image from the tool result + const secondResponse = await complete(model, context, options); + expect(secondResponse.stopReason).toBe("stop"); + expect(secondResponse.errorMessage).toBeFalsy(); + + // Verify the LLM can see both text and image + const textContent = secondResponse.content.find((b) => b.type === "text"); + expect(textContent).toBeTruthy(); + if (textContent && textContent.type === "text") { + const lowerContent = textContent.text.toLowerCase(); + // Should mention details from the text (diameter/pixels) + expect(lowerContent.match(/diameter|100|pixel/)).toBeTruthy(); + // Should also mention the visual properties (red and circle) + expect(lowerContent).toContain("red"); + expect(lowerContent).toContain("circle"); + } +} + +describe("Tool Results with Images", () => { + describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider (gemini-2.5-flash)", () => { + const llm = getModel("google", "gemini-2.5-flash"); + + it("should handle tool result with only image", async () => { + await handleToolWithImageResult(llm); + }); + + it("should handle tool result with text and image", async () => { + await handleToolWithTextAndImageResult(llm); + }); + }); + + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => { + const llm: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" }; + + it("should handle tool result with only image", async () => { + await handleToolWithImageResult(llm); + }); + + it("should handle tool result with text and image", async () => { + await handleToolWithTextAndImageResult(llm); + }); + }); + + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => { + const llm = getModel("openai", "gpt-5-mini"); + + it("should handle tool result with only image", async () => { + await handleToolWithImageResult(llm); + }); + + it("should handle tool result with text and image", async () => { + await handleToolWithTextAndImageResult(llm); + }); + }); + + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-haiku-4-5)", () => { + const model = getModel("anthropic", "claude-haiku-4-5"); + + it("should handle tool result with only image", async () => { + await handleToolWithImageResult(model); + }); + + it("should handle tool result with text and image", async () => { + await handleToolWithTextAndImageResult(model); + }); + }); + + describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-5)", () => { + const model = getModel("anthropic", "claude-sonnet-4-5"); + + it("should handle tool result with only image", async () => { + await handleToolWithImageResult(model); + }); + + it("should handle tool result with text and image", async () => { + await handleToolWithTextAndImageResult(model); + }); + }); + + describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v)", () => { + const llm = getModel("openrouter", "z-ai/glm-4.5v"); + + it("should handle tool result with only image", async () => { + await handleToolWithImageResult(llm); + }); + + it("should handle tool result with text and image", async () => { + await handleToolWithTextAndImageResult(llm); + }); + }); +}); diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts index 5a7f58e5..3ec28db1 100644 --- a/packages/ai/test/stream.test.ts +++ b/packages/ai/test/stream.test.ts @@ -305,7 +305,7 @@ async function multiTurn(model: Model, options?: Options role: "toolResult", toolCallId: block.id, toolName: block.name, - output: `${result}`, + content: [{ type: "text", text: `${result}` }], isError: false, timestamp: Date.now(), }); diff --git a/packages/ai/test/tool-validation.test.ts b/packages/ai/test/tool-validation.test.ts index 27f3e46b..709d0946 100644 --- a/packages/ai/test/tool-validation.test.ts +++ b/packages/ai/test/tool-validation.test.ts @@ -27,7 +27,7 @@ describe("Tool Validation with TypeBox and AJV", () => { parameters: testSchema, execute: async (_toolCallId, args) => { return { - output: `Processed: ${args.name}, ${args.age}, ${args.email}`, + content: [{ type: "text", text: `Processed: ${args.name}, ${args.age}, ${args.email}` }], details: undefined, }; }, @@ -130,7 +130,11 @@ describe("Tool Validation with TypeBox and AJV", () => { const result = await testTool.execute("test-id", validInput as TestParams); - expect(result.output).toBe("Processed: John Doe, 30, john@example.com"); + const textOutput = result.content + .filter((c: any) => c.type === "text") + .map((c: any) => c.text) + .join("\n"); + expect(textOutput).toBe("Processed: John Doe, 30, john@example.com"); expect(result.details).toBeUndefined(); }); }); diff --git a/packages/ai/test/unicode-surrogate.test.ts b/packages/ai/test/unicode-surrogate.test.ts index 1ca80e46..c52a311a 100644 --- a/packages/ai/test/unicode-surrogate.test.ts +++ b/packages/ai/test/unicode-surrogate.test.ts @@ -62,7 +62,10 @@ async function testEmojiInToolResults(llm: Model, option role: "toolResult", toolCallId: "test_1", toolName: "test_tool", - output: `Test with emoji 🙈 and other characters: + content: [ + { + type: "text", + text: `Test with emoji 🙈 and other characters: - Monkey emoji: 🙈 - Thumbs up: 👍 - Heart: ❤️ @@ -73,6 +76,8 @@ async function testEmojiInToolResults(llm: Model, option - Chinese: 你好 - Mathematical symbols: ∑∫∂√ - Special quotes: "curly" 'quotes'`, + }, + ], isError: false, timestamp: Date.now(), }; @@ -141,7 +146,10 @@ async function testRealWorldLinkedInData(llm: Model, opt role: "toolResult", toolCallId: "linkedin_1", toolName: "linkedin_skill", - output: `Post: Hab einen "Generative KI für Nicht-Techniker" Workshop gebaut. + content: [ + { + type: "text", + text: `Post: Hab einen "Generative KI für Nicht-Techniker" Workshop gebaut. Unanswered Comments: 2 => { @@ -156,6 +164,8 @@ Unanswered Comments: 2 } ] }`, + }, + ], isError: false, timestamp: Date.now(), }; @@ -226,7 +236,7 @@ async function testUnpairedHighSurrogate(llm: Model, opt role: "toolResult", toolCallId: "test_2", toolName: "test_tool", - output: `Text with unpaired surrogate: ${unpairedSurrogate} <- should be sanitized`, + content: [{ type: "text", text: `Text with unpaired surrogate: ${unpairedSurrogate} <- should be sanitized` }], isError: false, timestamp: Date.now(), }; diff --git a/packages/coding-agent/example.json b/packages/coding-agent/example.json deleted file mode 100644 index 32691ff9..00000000 --- a/packages/coding-agent/example.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "name": "example", - "version": "1.0.0", - "description": "A JSON file formatted with tabs", - "main": "index.js", - "type": "module", - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1", - "start": "node index.js", - "dev": "nodemon index.js", - "build": "tsc", - "lint": "eslint .", - "format": "prettier --write .", - "clean": "rm -rf dist node_modules" - }, - "keywords": [ - "example", - "json", - "tabs", - "nodejs", - "typescript", - "api" - ], - "author": "Assistant", - "license": "MIT", - "repository": { - "type": "git", - "url": "https://github.com/example/example-repo.git" - }, - "bugs": { - "url": "https://github.com/example/example-repo/issues" - }, - "homepage": "https://github.com/example/example-repo#readme", - "engines": { - "node": ">=18.0.0", - "npm": ">=9.0.0" - }, - "dependencies": { - "express": "^4.18.0", - "dotenv": "^16.0.3", - "axios": "^1.6.0", - "lodash": "^4.17.21", - "mongoose": "^8.0.0", - "redis": "^4.6.0", - "jsonwebtoken": "^9.0.2", - "bcrypt": "^5.1.1", - "winston": "^3.11.0" - }, - "devDependencies": { - "@types/node": "^20.10.0", - "@types/express": "^4.17.21", - "@types/bcrypt": "^5.0.2", - "@types/jsonwebtoken": "^9.0.5", - "typescript": "^5.3.3", - "nodemon": "^3.0.2", - "eslint": "^8.55.0", - "prettier": "^3.1.1", - "vitest": "^1.0.4", - "supertest": "^6.3.3" - }, - "config": { - "port": 3000, - "env": "development" - }, - "private": false -} diff --git a/packages/coding-agent/example.txt b/packages/coding-agent/example.txt deleted file mode 100644 index 15f02622..00000000 --- a/packages/coding-agent/example.txt +++ /dev/null @@ -1,79 +0,0 @@ -The Amazing Adventures of Fox and Dog -====================================== - -Long ago, in a mystical forest clearing, there lived an incredibly fast brown fox. -This legendary fox was renowned throughout the entire woodland for its incredible speed and agility. - -Each dawn, the fox would sprint through the ancient trees, soaring over logs and babbling brooks. -The woodland creatures gazed in wonder as it flashed past them like a streak of copper lightning. - -At the clearing's edge, there also lived a very lazy dog. -This happy dog much preferred napping in the warm sunshine to any kind of adventure. - -One fateful morning, the fox challenged the dog to an epic race across the meadow. -The dog yawned deeply and declined, saying "Why rush around when you can rest peacefully?" - -The fox laughed and zipped away, exploring distant hills and valleys. -The dog simply rolled over and continued its peaceful slumber. - -As the sun set, the fox returned, exhausted from its day of running. -The dog opened one eye and wagged its tail contentedly. - -"I've seen the whole world today!" exclaimed the tired fox proudly. -"And I've enjoyed every moment right here," replied the lazy dog. - -Sometimes speed and adventure bring joy to life's journey. -Other times, stillness and contentment are the greatest treasures. - -Both the quick fox and the lazy dog lived happily in their own ways. - -And so their friendship grew stronger with each passing season. -The fox would return from adventures with tales of distant lands. -The dog would listen contentedly, never needing to leave home. -They learned that happiness comes in many different forms. -The forest creatures admired their unlikely bond. -Some days the fox would rest beside the dog in the sunshine. -Other days the dog would take a short stroll with the fox. -They discovered balance between motion and stillness. -The wise old owl observed them from his towering oak. -He noted that both had found their true nature. -Winter came and blanketed the forest in sparkling snow. -The fox's copper fur stood out against the white landscape. -The dog found a cozy spot by the warmest rock. -They shared stories as snowflakes drifted down around them. -Spring arrived with flowers blooming across the meadow. -The fox chased butterflies through fields of wildflowers. -The dog rolled in patches of soft clover and sweet grass. -Summer brought long days of golden light and warmth. -The fox discovered hidden streams in the deep forest. -The dog found the perfect shady spot beneath an elm tree. -Autumn painted the woods in brilliant reds and golds. -The fox leaped through piles of crunchy fallen leaves. -The dog watched the changing colors from his favorite perch. -Years passed and both grew wiser in their own ways. -The fox learned when to rest and the dog learned when to play. -Young animals would visit to hear their wisdom. -"Be true to yourself," the fox would always say. -"Find joy in your own path," the dog would add. -Their story spread throughout the woodland realm. -It became a tale told to every new generation. -Parents would share it with their curious young ones. -Teachers would use it in lessons about acceptance. -Travelers would stop to see the famous pair. -Artists painted pictures of the fox and dog together. -Poets wrote verses about their enduring friendship. -Musicians composed songs celebrating their harmony. -The clearing became a place of peace and understanding. -All creatures were welcome to rest there. -The fox still runs when the spirit moves him. -The dog still naps when the mood strikes him. -Neither judges the other for their choices. -Both have found contentment in being themselves. -The moon rises over the peaceful forest each night. -Stars twinkle above the quiet clearing. -The fox and dog sleep side by side. -Dreams of adventure and rest mingle together. -Morning will bring new possibilities for both. -But tonight, all is calm and perfect. -This is how true friendship looks. -The End. diff --git a/packages/coding-agent/long-data.json b/packages/coding-agent/long-data.json deleted file mode 100644 index b7f8943a..00000000 --- a/packages/coding-agent/long-data.json +++ /dev/null @@ -1,263 +0,0 @@ -{ - "project": { - "id": "proj_9876543210", - "name": "Advanced E-Commerce Platform", - "description": "A comprehensive multi-vendor marketplace with real-time analytics", - "status": "active", - "created": "2024-01-15T08:30:00Z", - "updated": "2024-03-20T14:45:00Z", - "version": "2.4.1" - }, - "team": { - "members": [ - { - "id": "usr_001", - "name": "Sarah Chen", - "role": "Lead Developer", - "email": "sarah.chen@example.com", - "skills": ["TypeScript", "React", "Node.js", "PostgreSQL"], - "joined": "2023-06-01", - "active": true - }, - { - "id": "usr_002", - "name": "Marcus Johnson", - "role": "Backend Engineer", - "email": "marcus.j@example.com", - "skills": ["Python", "Django", "Redis", "Docker"], - "joined": "2023-07-15", - "active": true - }, - { - "id": "usr_003", - "name": "Elena Rodriguez", - "role": "UX Designer", - "email": "elena.r@example.com", - "skills": ["Figma", "UI/UX", "Prototyping", "User Research"], - "joined": "2023-08-20", - "active": true - }, - { - "id": "usr_004", - "name": "Ahmed Hassan", - "role": "DevOps Engineer", - "email": "ahmed.h@example.com", - "skills": ["Kubernetes", "AWS", "Terraform", "CI/CD"], - "joined": "2023-09-10", - "active": true - } - ], - "departments": ["Engineering", "Design", "Operations", "Marketing"] - }, - "features": { - "authentication": { - "enabled": true, - "providers": ["email", "google", "github", "facebook"], - "mfa": true, - "sessionTimeout": 3600, - "passwordPolicy": { - "minLength": 12, - "requireUppercase": true, - "requireNumbers": true, - "requireSpecialChars": true - } - }, - "payments": { - "enabled": true, - "gateways": ["stripe", "paypal", "square"], - "currencies": ["USD", "EUR", "GBP", "JPY", "CAD", "AUD"], - "refunds": true, - "subscriptions": true - }, - "analytics": { - "enabled": true, - "realtime": true, - "metrics": ["pageViews", "conversions", "revenue", "userActivity"], - "reporting": { - "daily": true, - "weekly": true, - "monthly": true, - "custom": true - } - } - }, - "infrastructure": { - "cloud": { - "provider": "AWS", - "region": "us-east-1", - "zones": ["us-east-1a", "us-east-1b", "us-east-1c"], - "services": { - "compute": ["EC2", "Lambda", "ECS"], - "storage": ["S3", "EBS", "EFS"], - "database": ["RDS", "DynamoDB", "ElastiCache"], - "networking": ["VPC", "CloudFront", "Route53"] - } - }, - "monitoring": { - "tools": ["Prometheus", "Grafana", "DataDog", "Sentry"], - "alerts": { - "email": true, - "slack": true, - "pagerduty": true - } - } - }, - "api": { - "version": "v2", - "baseUrl": "https://api.example.com", - "endpoints": [ - { - "path": "/users", - "methods": ["GET", "POST", "PUT", "DELETE"], - "auth": true, - "rateLimit": 1000 - }, - { - "path": "/products", - "methods": ["GET", "POST", "PUT", "DELETE"], - "auth": true, - "rateLimit": 5000 - }, - { - "path": "/orders", - "methods": ["GET", "POST", "PUT"], - "auth": true, - "rateLimit": 2000 - }, - { - "path": "/analytics", - "methods": ["GET"], - "auth": true, - "rateLimit": 500 - } - ], - "documentation": "https://docs.example.com/api" - }, - "database": { - "primary": { - "type": "PostgreSQL", - "version": "15.2", - "host": "db-primary.example.com", - "port": 5432, - "replicas": 3, - "backup": { - "enabled": true, - "frequency": "hourly", - "retention": 30 - } - }, - "cache": { - "type": "Redis", - "version": "7.0", - "host": "cache.example.com", - "port": 6379, - "ttl": 3600 - } - }, - "security": { - "ssl": { - "enabled": true, - "provider": "LetsEncrypt", - "autoRenew": true - }, - "firewall": { - "enabled": true, - "rules": [ - { - "name": "allow-https", - "port": 443, - "protocol": "TCP", - "source": "0.0.0.0/0" - }, - { - "name": "allow-http", - "port": 80, - "protocol": "TCP", - "source": "0.0.0.0/0" - }, - { - "name": "allow-ssh", - "port": 22, - "protocol": "TCP", - "source": "10.0.0.0/8" - } - ] - }, - "scanning": { - "vulnerabilities": true, - "dependencies": true, - "secrets": true - } - }, - "testing": { - "unit": { - "framework": "Vitest", - "coverage": 87.5, - "threshold": 80 - }, - "integration": { - "framework": "Playwright", - "browsers": ["chromium", "firefox", "webkit"], - "coverage": 72.3 - }, - "e2e": { - "framework": "Cypress", - "coverage": 65.8 - } - }, - "deployment": { - "strategy": "blue-green", - "automation": true, - "environments": [ - { - "name": "development", - "url": "https://dev.example.com", - "branch": "develop", - "autoDeployOn": ["push"] - }, - { - "name": "staging", - "url": "https://staging.example.com", - "branch": "staging", - "autoDeployOn": ["pull_request"] - }, - { - "name": "production", - "url": "https://example.com", - "branch": "main", - "autoDeployOn": ["tag"] - } - ] - }, - "logs": { - "level": "info", - "format": "json", - "retention": 90, - "aggregation": { - "enabled": true, - "service": "CloudWatch", - "queries": [ - "error count by hour", - "request latency p95", - "unique users per day" - ] - } - }, - "compliance": { - "gdpr": true, - "ccpa": true, - "hipaa": false, - "soc2": true, - "dataRetention": { - "user": 2555, - "logs": 90, - "backups": 30 - } - }, - "metadata": { - "tags": ["production", "ecommerce", "marketplace", "saas"], - "owner": "engineering-team", - "costCenter": "CC-2024-001", - "criticality": "high" - } -} diff --git a/packages/coding-agent/src/tools/bash.ts b/packages/coding-agent/src/tools/bash.ts index 3116d12d..d34b5d77 100644 --- a/packages/coding-agent/src/tools/bash.ts +++ b/packages/coding-agent/src/tools/bash.ts @@ -95,7 +95,7 @@ export const bashTool: AgentTool = { if (output) output += "\n\n"; reject(new Error(`${output}Command exited with code ${code}`)); } else { - resolve({ output: output || "(no output)", details: undefined }); + resolve({ content: [{ type: "text", text: output || "(no output)" }], details: undefined }); } }); diff --git a/packages/coding-agent/src/tools/edit.ts b/packages/coding-agent/src/tools/edit.ts index bf42ed41..3d5832f4 100644 --- a/packages/coding-agent/src/tools/edit.ts +++ b/packages/coding-agent/src/tools/edit.ts @@ -37,7 +37,7 @@ export const editTool: AgentTool = { ) => { const absolutePath = resolvePath(expandPath(path)); - return new Promise<{ output: string; details: undefined }>((resolve, reject) => { + return new Promise<{ content: Array<{ type: "text"; text: string }>; details: undefined }>((resolve, reject) => { // Check if already aborted if (signal?.aborted) { reject(new Error("Operation aborted")); @@ -131,7 +131,12 @@ export const editTool: AgentTool = { } resolve({ - output: `Successfully replaced text in ${path}. Changed ${oldText.length} characters to ${newText.length} characters.`, + content: [ + { + type: "text", + text: `Successfully replaced text in ${path}. Changed ${oldText.length} characters to ${newText.length} characters.`, + }, + ], details: undefined, }); } catch (error: any) { diff --git a/packages/coding-agent/src/tools/read.ts b/packages/coding-agent/src/tools/read.ts index 2c60b61f..6ffe69e9 100644 --- a/packages/coding-agent/src/tools/read.ts +++ b/packages/coding-agent/src/tools/read.ts @@ -1,9 +1,9 @@ import * as os from "node:os"; -import type { AgentTool } from "@mariozechner/pi-ai"; +import type { AgentTool, ImageContent, TextContent } from "@mariozechner/pi-ai"; import { Type } from "@sinclair/typebox"; import { constants } from "fs"; import { access, readFile } from "fs/promises"; -import { resolve as resolvePath } from "path"; +import { extname, resolve as resolvePath } from "path"; /** * Expand ~ to home directory @@ -18,6 +18,27 @@ function expandPath(filePath: string): string { return filePath; } +/** + * Map of file extensions to MIME types for common image formats + */ +const IMAGE_MIME_TYPES: Record = { + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + ".gif": "image/gif", + ".webp": "image/webp", + ".bmp": "image/bmp", + ".svg": "image/svg+xml", +}; + +/** + * Check if a file is an image based on its extension + */ +function isImageFile(filePath: string): string | null { + const ext = extname(filePath).toLowerCase(); + return IMAGE_MIME_TYPES[ext] || null; +} + const readSchema = Type.Object({ path: Type.String({ description: "Path to the file to read (relative or absolute)" }), }); @@ -25,12 +46,14 @@ const readSchema = Type.Object({ export const readTool: AgentTool = { name: "read", label: "read", - description: "Read the contents of a file. Returns the full file content as text.", + description: + "Read the contents of a file. Supports text files and images (jpg, png, gif, webp, bmp, svg). Images are sent as attachments to the model.", parameters: readSchema, execute: async (_toolCallId: string, { path }: { path: string }, signal?: AbortSignal) => { const absolutePath = resolvePath(expandPath(path)); + const mimeType = isImageFile(absolutePath); - return new Promise<{ output: string; details: undefined }>((resolve, reject) => { + return new Promise<{ content: (TextContent | ImageContent)[]; details: undefined }>((resolve, reject) => { // Check if already aborted if (signal?.aborted) { reject(new Error("Operation aborted")); @@ -68,8 +91,23 @@ export const readTool: AgentTool = { return; } - // Read the file - const content = await readFile(absolutePath, "utf-8"); + // Read the file based on type + let content: (TextContent | ImageContent)[]; + + if (mimeType) { + // Read as image (binary) + const buffer = await readFile(absolutePath); + const base64 = buffer.toString("base64"); + + content = [ + { type: "text", text: `Read image file: ${path}` }, + { type: "image", data: base64, mimeType }, + ]; + } else { + // Read as text + const textContent = await readFile(absolutePath, "utf-8"); + content = [{ type: "text", text: textContent }]; + } // Check if aborted after reading if (aborted) { @@ -81,7 +119,7 @@ export const readTool: AgentTool = { signal.removeEventListener("abort", onAbort); } - resolve({ output: content, details: undefined }); + resolve({ content, details: undefined }); } catch (error: any) { // Clean up abort handler if (signal) { diff --git a/packages/coding-agent/src/tools/write.ts b/packages/coding-agent/src/tools/write.ts index 37bcf5ba..b70a9fff 100644 --- a/packages/coding-agent/src/tools/write.ts +++ b/packages/coding-agent/src/tools/write.ts @@ -32,7 +32,7 @@ export const writeTool: AgentTool = { const absolutePath = resolvePath(expandPath(path)); const dir = dirname(absolutePath); - return new Promise<{ output: string; details: undefined }>((resolve, reject) => { + return new Promise<{ content: Array<{ type: "text"; text: string }>; details: undefined }>((resolve, reject) => { // Check if already aborted if (signal?.aborted) { reject(new Error("Operation aborted")); @@ -75,7 +75,10 @@ export const writeTool: AgentTool = { signal.removeEventListener("abort", onAbort); } - resolve({ output: `Successfully wrote ${content.length} bytes to ${path}`, details: undefined }); + resolve({ + content: [{ type: "text", text: `Successfully wrote ${content.length} bytes to ${path}` }], + details: undefined, + }); } catch (error: any) { // Clean up abort handler if (signal) { diff --git a/packages/coding-agent/src/tui/tool-execution.ts b/packages/coding-agent/src/tui/tool-execution.ts index 1ddf349b..5f1b33a5 100644 --- a/packages/coding-agent/src/tui/tool-execution.ts +++ b/packages/coding-agent/src/tui/tool-execution.ts @@ -60,7 +60,10 @@ export class ToolExecutionComponent extends Container { private contentText: Text; private toolName: string; private args: any; - private result?: { output: string; isError: boolean }; + private result?: { + content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>; + isError: boolean; + }; constructor(toolName: string, args: any) { super(); @@ -78,7 +81,10 @@ export class ToolExecutionComponent extends Container { this.updateDisplay(); } - updateResult(result: { output: string; isError: boolean }): void { + updateResult(result: { + content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>; + isError: boolean; + }): void { this.result = result; this.updateDisplay(); } @@ -94,6 +100,24 @@ export class ToolExecutionComponent extends Container { this.contentText.setText(this.formatToolExecution()); } + private getTextOutput(): string { + if (!this.result) return ""; + + // Extract text from content blocks + const textBlocks = this.result.content?.filter((c: any) => c.type === "text") || []; + const imageBlocks = this.result.content?.filter((c: any) => c.type === "image") || []; + + let output = textBlocks.map((c: any) => c.text).join("\n"); + + // Add indicator for images + if (imageBlocks.length > 0) { + const imageIndicators = imageBlocks.map((img: any) => `[Image: ${img.mimeType}]`).join("\n"); + output = output ? `${output}\n${imageIndicators}` : imageIndicators; + } + + return output; + } + private formatToolExecution(): string { let text = ""; @@ -104,7 +128,7 @@ export class ToolExecutionComponent extends Container { if (this.result) { // Show output without code fences - more minimal - const output = this.result.output.trim(); + const output = this.getTextOutput().trim(); if (output) { const lines = output.split("\n"); const maxLines = 5; @@ -122,7 +146,8 @@ export class ToolExecutionComponent extends Container { text = chalk.bold("read") + " " + (path ? chalk.cyan(path) : chalk.dim("...")); if (this.result) { - const lines = this.result.output.split("\n"); + const output = this.getTextOutput(); + const lines = output.split("\n"); const maxLines = 10; const displayLines = lines.slice(0, maxLines); const remaining = lines.length - maxLines; @@ -168,8 +193,9 @@ export class ToolExecutionComponent extends Container { const content = JSON.stringify(this.args, null, 2); text += "\n\n" + content; - if (this.result?.output) { - text += "\n" + this.result.output; + const output = this.getTextOutput(); + if (output) { + text += "\n" + output; } } diff --git a/packages/coding-agent/src/tui/tui-renderer.ts b/packages/coding-agent/src/tui/tui-renderer.ts index b6f32574..77f6f441 100644 --- a/packages/coding-agent/src/tui/tui-renderer.ts +++ b/packages/coding-agent/src/tui/tui-renderer.ts @@ -244,7 +244,7 @@ export class TuiRenderer { assistantMsg.stopReason === "aborted" ? "Operation aborted" : assistantMsg.errorMessage || "Error"; for (const [toolCallId, component] of this.pendingTools.entries()) { component.updateResult({ - output: errorMessage, + content: [{ type: "text", text: errorMessage }], isError: true, }); } @@ -273,8 +273,12 @@ export class TuiRenderer { const component = this.pendingTools.get(event.toolCallId); if (component) { // Update the component with the result + const content = + typeof event.result === "string" + ? [{ type: "text" as const, text: event.result }] + : event.result.content; component.updateResult({ - output: typeof event.result === "string" ? event.result : event.result.output, + content, isError: event.isError, }); this.pendingTools.delete(event.toolCallId); @@ -358,7 +362,7 @@ export class TuiRenderer { ? "Operation aborted" : assistantMsg.errorMessage || "Error"; component.updateResult({ - output: errorMessage, + content: [{ type: "text", text: errorMessage }], isError: true, }); } else { @@ -373,7 +377,7 @@ export class TuiRenderer { const component = this.pendingTools.get(toolResultMsg.toolCallId); if (component) { component.updateResult({ - output: toolResultMsg.output, + content: toolResultMsg.content, isError: toolResultMsg.isError, }); // Remove from pending map since it's complete diff --git a/packages/coding-agent/test.json b/packages/coding-agent/test.json deleted file mode 100644 index f8001741..00000000 --- a/packages/coding-agent/test.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "name": "test-file", - "version": "1.0.0", - "description": "A test JSON file with tab indentation", - "author": "coding-agent", - "data": { - "items": [ - { - "id": 1, - "name": "First item", - "active": true - }, - { - "id": 2, - "name": "Second item", - "active": false - } - ], - "metadata": { - "created": "2024-11-11", - "tags": [ - "test", - "example", - "json" - ] - } - } -} diff --git a/packages/coding-agent/test/tools.test.ts b/packages/coding-agent/test/tools.test.ts index 7f019402..36277bfd 100644 --- a/packages/coding-agent/test/tools.test.ts +++ b/packages/coding-agent/test/tools.test.ts @@ -7,6 +7,16 @@ import { editTool } from "../src/tools/edit.js"; import { readTool } from "../src/tools/read.js"; import { writeTool } from "../src/tools/write.js"; +// Helper to extract text from content blocks +function getTextOutput(result: any): string { + return ( + result.content + ?.filter((c: any) => c.type === "text") + .map((c: any) => c.text) + .join("\n") || "" + ); +} + describe("Coding Agent Tools", () => { let testDir: string; @@ -29,7 +39,7 @@ describe("Coding Agent Tools", () => { const result = await readTool.execute("test-call-1", { path: testFile }); - expect(result.output).toBe(content); + expect(getTextOutput(result)).toBe(content); expect(result.details).toBeUndefined(); }); @@ -38,8 +48,8 @@ describe("Coding Agent Tools", () => { const result = await readTool.execute("test-call-2", { path: testFile }); - expect(result.output).toContain("Error"); - expect(result.output).toContain("File not found"); + expect(getTextOutput(result)).toContain("Error"); + expect(getTextOutput(result)).toContain("File not found"); }); }); @@ -50,8 +60,8 @@ describe("Coding Agent Tools", () => { const result = await writeTool.execute("test-call-3", { path: testFile, content }); - expect(result.output).toContain("Successfully wrote"); - expect(result.output).toContain(testFile); + expect(getTextOutput(result)).toContain("Successfully wrote"); + expect(getTextOutput(result)).toContain(testFile); expect(result.details).toBeUndefined(); }); @@ -61,7 +71,7 @@ describe("Coding Agent Tools", () => { const result = await writeTool.execute("test-call-4", { path: testFile, content }); - expect(result.output).toContain("Successfully wrote"); + expect(getTextOutput(result)).toContain("Successfully wrote"); }); }); @@ -77,7 +87,7 @@ describe("Coding Agent Tools", () => { newText: "testing", }); - expect(result.output).toContain("Successfully replaced"); + expect(getTextOutput(result)).toContain("Successfully replaced"); expect(result.details).toBeUndefined(); }); @@ -92,7 +102,7 @@ describe("Coding Agent Tools", () => { newText: "testing", }); - expect(result.output).toContain("Could not find the exact text"); + expect(getTextOutput(result)).toContain("Could not find the exact text"); }); it("should fail if text appears multiple times", async () => { @@ -106,7 +116,7 @@ describe("Coding Agent Tools", () => { newText: "bar", }); - expect(result.output).toContain("Found 3 occurrences"); + expect(getTextOutput(result)).toContain("Found 3 occurrences"); }); }); @@ -114,20 +124,20 @@ describe("Coding Agent Tools", () => { it("should execute simple commands", async () => { const result = await bashTool.execute("test-call-8", { command: "echo 'test output'" }); - expect(result.output).toContain("test output"); + expect(getTextOutput(result)).toContain("test output"); expect(result.details).toBeUndefined(); }); it("should handle command errors", async () => { const result = await bashTool.execute("test-call-9", { command: "exit 1" }); - expect(result.output).toContain("Command failed"); + expect(getTextOutput(result)).toContain("Command failed"); }); it("should respect timeout", async () => { const result = await bashTool.execute("test-call-10", { command: "sleep 35" }); - expect(result.output).toContain("Command failed"); + expect(getTextOutput(result)).toContain("Command failed"); }, 35000); }); }); diff --git a/packages/web-ui/src/components/Messages.ts b/packages/web-ui/src/components/Messages.ts index 46304327..267ec223 100644 --- a/packages/web-ui/src/components/Messages.ts +++ b/packages/web-ui/src/components/Messages.ts @@ -191,7 +191,12 @@ export class ToolMessageDebugView extends LitElement { } override render() { - const output = this.pretty(this.result?.output); + const textOutput = + this.result?.content + ?.filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("\n") || ""; + const output = this.pretty(textOutput); const details = this.pretty(this.result?.details); return html` @@ -240,7 +245,7 @@ export class ToolMessage extends LitElement { ? { role: "toolResult", isError: true, - output: "", + content: [], toolCallId: this.toolCall.id, toolName: this.toolCall.name, timestamp: Date.now(), diff --git a/packages/web-ui/src/tools/artifacts/artifacts-tool-renderer.ts b/packages/web-ui/src/tools/artifacts/artifacts-tool-renderer.ts index 58469ef5..7c909258 100644 --- a/packages/web-ui/src/tools/artifacts/artifacts-tool-renderer.ts +++ b/packages/web-ui/src/tools/artifacts/artifacts-tool-renderer.ts @@ -10,6 +10,17 @@ import type { ToolRenderer, ToolRenderResult } from "../types.js"; import { ArtifactPill } from "./ArtifactPill.js"; import type { ArtifactsPanel, ArtifactsParams } from "./artifacts.js"; +// Helper to extract text from content blocks +function getTextOutput(result: ToolResultMessage | undefined): string { + if (!result) return ""; + return ( + result.content + ?.filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("\n") || "" + ); +} + // Helper to determine language for syntax highlighting function getLanguageFromFilename(filename?: string): string { if (!filename) return "text"; @@ -109,8 +120,8 @@ export class ArtifactsToolRenderer implements ToolRenderer` : ""} ${ isHtml - ? html`` - : html`
${result.output || i18n("An error occurred")}
` + ? html`` + : html`
${getTextOutput(result) || i18n("An error occurred")}
` } @@ -124,7 +135,7 @@ export class ArtifactsToolRenderer implements ToolRenderer ${renderHeader(state, FileCode2, headerText)} -
${result.output || i18n("An error occurred")}
+
${getTextOutput(result) || i18n("An error occurred")}
`, isCustom: false, @@ -141,7 +152,7 @@ export class ArtifactsToolRenderer implements ToolRenderer @@ -157,7 +168,7 @@ export class ArtifactsToolRenderer implements ToolRenderer @@ -175,7 +186,7 @@ export class ArtifactsToolRenderer implements ToolRenderer diff --git a/packages/web-ui/src/tools/artifacts/artifacts.ts b/packages/web-ui/src/tools/artifacts/artifacts.ts index 17b86e29..b9bdef7f 100644 --- a/packages/web-ui/src/tools/artifacts/artifacts.ts +++ b/packages/web-ui/src/tools/artifacts/artifacts.ts @@ -284,7 +284,7 @@ export class ArtifactsPanel extends LitElement { // Execute mutates our local store and returns a plain output execute: async (_toolCallId: string, args: Static, _signal?: AbortSignal) => { const output = await this.executeCommand(args); - return { output, details: undefined }; + return { content: [{ type: "text", text: output }], details: undefined }; }, }; } diff --git a/packages/web-ui/src/tools/extract-document.ts b/packages/web-ui/src/tools/extract-document.ts index 5188b423..1f5ec645 100644 --- a/packages/web-ui/src/tools/extract-document.ts +++ b/packages/web-ui/src/tools/extract-document.ts @@ -166,7 +166,7 @@ export function createExtractDocumentTool(): AgentTool c.type === "text") + .map((c: any) => c.text) + .join("\n") || ""; return { content: html` diff --git a/packages/web-ui/src/tools/javascript-repl.ts b/packages/web-ui/src/tools/javascript-repl.ts index 697f553b..2c214173 100644 --- a/packages/web-ui/src/tools/javascript-repl.ts +++ b/packages/web-ui/src/tools/javascript-repl.ts @@ -187,7 +187,7 @@ export function createJavaScriptReplTool(): AgentTool c.type === "text") + .map((c: any) => c.text) + .join("\n") || ""; const files = result.details?.files || []; const attachments: Attachment[] = files.map((f, i) => { diff --git a/packages/web-ui/src/tools/renderers/BashRenderer.ts b/packages/web-ui/src/tools/renderers/BashRenderer.ts index 682792c5..0acc1b94 100644 --- a/packages/web-ui/src/tools/renderers/BashRenderer.ts +++ b/packages/web-ui/src/tools/renderers/BashRenderer.ts @@ -16,7 +16,11 @@ export class BashRenderer implements ToolRenderer { // With result: show command + output if (result && params?.command) { - const output = result.output || ""; + const output = + result.content + ?.filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("\n") || ""; const combined = output ? `> ${params.command}\n\n${output}` : `> ${params.command}`; return { content: html` diff --git a/packages/web-ui/src/tools/renderers/CalculateRenderer.ts b/packages/web-ui/src/tools/renderers/CalculateRenderer.ts index 3c595cfb..e3e895d9 100644 --- a/packages/web-ui/src/tools/renderers/CalculateRenderer.ts +++ b/packages/web-ui/src/tools/renderers/CalculateRenderer.ts @@ -16,7 +16,11 @@ export class CalculateRenderer implements ToolRenderer c.type === "text") + .map((c: any) => c.text) + .join("\n") || ""; // Error: show expression in header, error below if (result.isError) { diff --git a/packages/web-ui/src/tools/renderers/DefaultRenderer.ts b/packages/web-ui/src/tools/renderers/DefaultRenderer.ts index 266742b1..619dc596 100644 --- a/packages/web-ui/src/tools/renderers/DefaultRenderer.ts +++ b/packages/web-ui/src/tools/renderers/DefaultRenderer.ts @@ -25,7 +25,11 @@ export class DefaultRenderer implements ToolRenderer { // With result: show header + params + result if (result) { - let outputJson = result.output || i18n("(no output)"); + let outputJson = + result.content + ?.filter((c) => c.type === "text") + .map((c: any) => c.text) + .join("\n") || i18n("(no output)"); let outputLanguage = "text"; // Try to parse and pretty-print if it's valid JSON diff --git a/packages/web-ui/src/tools/renderers/GetCurrentTimeRenderer.ts b/packages/web-ui/src/tools/renderers/GetCurrentTimeRenderer.ts index ca954f7d..cadd1af7 100644 --- a/packages/web-ui/src/tools/renderers/GetCurrentTimeRenderer.ts +++ b/packages/web-ui/src/tools/renderers/GetCurrentTimeRenderer.ts @@ -19,7 +19,11 @@ export class GetCurrentTimeRenderer implements ToolRenderer c.type === "text") + .map((c: any) => c.text) + .join("\n") || ""; const headerText = params.timezone ? `${i18n("Getting current time in")} ${params.timezone}` : i18n("Getting current date and time"); @@ -43,7 +47,11 @@ export class GetCurrentTimeRenderer implements ToolRenderer c.type === "text") + .map((c: any) => c.text) + .join("\n") || ""; // Error: show header, error below if (result.isError) {