diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index eda79305..a333e15b 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -2,6 +2,14 @@ ## [Unreleased] +### Breaking Changes + +- Removed provider-level tool argument validation. Validation now happens in `agentLoop` via `executeToolCalls`, allowing models to retry on validation errors. For manual tool execution, use `validateToolCall(tools, toolCall)` or `validateToolArguments(tool, toolCall)`. + +### Added + +- Added `validateToolCall(tools, toolCall)` helper that finds the tool by name and validates arguments. + ## [0.13.0] - 2025-12-06 ### Breaking Changes diff --git a/packages/ai/README.md b/packages/ai/README.md index c633fa76..52d69605 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -194,8 +194,8 @@ const response = await complete(model, context); // Check for tool calls in the response for (const block of response.content) { if (block.type === 'toolCall') { - // Arguments are automatically validated against the TypeBox schema using AJV - // If validation fails, an error event is emitted + // Execute your tool with the arguments + // See "Validating Tool Arguments" section for validation const result = await executeWeatherApi(block.arguments); // Add tool result with text content @@ -253,7 +253,7 @@ for await (const event of s) { } if (event.type === 'toolcall_end') { - // Here toolCall.arguments is complete and validated + // Here toolCall.arguments is complete (but not yet validated) const toolCall = event.toolCall; console.log(`Tool completed: ${toolCall.name}`, toolCall.arguments); } @@ -267,9 +267,44 @@ for await (const event of s) { - Arrays may be incomplete - Nested objects may be partially populated - At minimum, `arguments` will be an empty object `{}`, never `undefined` -- Full validation only occurs at `toolcall_end` when arguments are complete - The Google provider does not support function call streaming. Instead, you will receive a single `toolcall_delta` event with the full arguments. +### Validating Tool Arguments + +When using `agentLoop`, tool arguments are automatically validated against your TypeBox schemas before execution. If validation fails, the error is returned to the model as a tool result, allowing it to retry. + +When implementing your own tool execution loop with `stream()` or `complete()`, use `validateToolCall` to validate arguments before passing them to your tools: + +```typescript +import { stream, validateToolCall, Tool } from '@mariozechner/pi-ai'; + +const tools: Tool[] = [weatherTool, calculatorTool]; +const s = stream(model, { messages, tools }); + +for await (const event of s) { + if (event.type === 'toolcall_end') { + const toolCall = event.toolCall; + + try { + // Validate arguments against the tool's schema (throws on invalid args) + const validatedArgs = validateToolCall(tools, toolCall); + const result = await executeMyTool(toolCall.name, validatedArgs); + // ... add tool result to context + } catch (error) { + // Validation failed - return error as tool result so model can retry + context.messages.push({ + role: 'toolResult', + toolCallId: toolCall.id, + toolName: toolCall.name, + content: [{ type: 'text', text: error.message }], + isError: true, + timestamp: Date.now() + }); + } + } +} +``` + ### Complete Event Reference All streaming events emitted during assistant message generation: diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts index 874686b7..f9ed0753 100644 --- a/packages/ai/src/index.ts +++ b/packages/ai/src/index.ts @@ -8,3 +8,4 @@ export * from "./stream.js"; export * from "./types.js"; export * from "./utils/overflow.js"; export * from "./utils/typebox-helpers.js"; +export * from "./utils/validation.js"; diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts index e2e91be2..ff6e60e2 100644 --- a/packages/ai/src/providers/anthropic.ts +++ b/packages/ai/src/providers/anthropic.ts @@ -25,7 +25,7 @@ import type { import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { validateToolArguments } from "../utils/validation.js"; + import { transformMessages } from "./transorm-messages.js"; /** @@ -231,15 +231,6 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = ( }); } else if (block.type === "toolCall") { block.arguments = parseStreamingJson(block.partialJson); - - // Validate tool arguments if tool definition is available - if (context.tools) { - const tool = context.tools.find((t) => t.name === block.name); - if (tool) { - block.arguments = validateToolArguments(tool, block); - } - } - delete (block as any).partialJson; stream.push({ type: "toolcall_end", diff --git a/packages/ai/src/providers/google.ts b/packages/ai/src/providers/google.ts index 9d3ade4f..5b5a0356 100644 --- a/packages/ai/src/providers/google.ts +++ b/packages/ai/src/providers/google.ts @@ -23,7 +23,7 @@ import type { } from "../types.js"; import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { validateToolArguments } from "../utils/validation.js"; + import { transformMessages } from "./transorm-messages.js"; export interface GoogleOptions extends StreamOptions { @@ -166,14 +166,6 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = ( ...(part.thoughtSignature && { thoughtSignature: part.thoughtSignature }), }; - // Validate tool arguments if tool definition is available - if (context.tools) { - const tool = context.tools.find((t) => t.name === toolCall.name); - if (tool) { - toolCall.arguments = validateToolArguments(tool, toolCall); - } - } - output.content.push(toolCall); stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output }); stream.push({ diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index a3c0a17e..ca9f1c30 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -23,7 +23,7 @@ import type { import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { validateToolArguments } from "../utils/validation.js"; + import { transformMessages } from "./transorm-messages.js"; export interface OpenAICompletionsOptions extends StreamOptions { @@ -84,15 +84,6 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = ( }); } else if (block.type === "toolCall") { block.arguments = JSON.parse(block.partialArgs || "{}"); - - // Validate tool arguments if tool definition is available - if (context.tools) { - const tool = context.tools.find((t) => t.name === block.name); - if (tool) { - block.arguments = validateToolArguments(tool, block); - } - } - delete block.partialArgs; stream.push({ type: "toolcall_end", diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts index 76a582be..c36e5254 100644 --- a/packages/ai/src/providers/openai-responses.ts +++ b/packages/ai/src/providers/openai-responses.ts @@ -27,7 +27,7 @@ import type { import { AssistantMessageEventStream } from "../utils/event-stream.js"; import { parseStreamingJson } from "../utils/json-parse.js"; import { sanitizeSurrogates } from "../utils/sanitize-unicode.js"; -import { validateToolArguments } from "../utils/validation.js"; + import { transformMessages } from "./transorm-messages.js"; // OpenAI Responses-specific options @@ -239,14 +239,6 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = ( arguments: JSON.parse(item.arguments), }; - // Validate tool arguments if tool definition is available - if (context.tools) { - const tool = context.tools.find((t) => t.name === toolCall.name); - if (tool) { - toolCall.arguments = validateToolArguments(tool, toolCall); - } - } - stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output }); } } diff --git a/packages/ai/src/utils/validation.ts b/packages/ai/src/utils/validation.ts index 08335807..4c778880 100644 --- a/packages/ai/src/utils/validation.ts +++ b/packages/ai/src/utils/validation.ts @@ -27,6 +27,21 @@ if (!isBrowserExtension) { } } +/** + * Finds a tool by name and validates the tool call arguments against its TypeBox schema + * @param tools Array of tool definitions + * @param toolCall The tool call from the LLM + * @returns The validated arguments + * @throws Error if tool is not found or validation fails + */ +export function validateToolCall(tools: Tool[], toolCall: ToolCall): any { + const tool = tools.find((t) => t.name === toolCall.name); + if (!tool) { + throw new Error(`Tool "${toolCall.name}" not found`); + } + return validateToolArguments(tool, toolCall); +} + /** * Validates tool call arguments against the tool's TypeBox schema * @param tool The tool definition with TypeBox schema diff --git a/packages/coding-agent/test/tools.test.ts b/packages/coding-agent/test/tools.test.ts index 8331315b..9eeebd1e 100644 --- a/packages/coding-agent/test/tools.test.ts +++ b/packages/coding-agent/test/tools.test.ts @@ -43,7 +43,8 @@ describe("Coding Agent Tools", () => { const result = await readTool.execute("test-call-1", { path: testFile }); expect(getTextOutput(result)).toBe(content); - expect(getTextOutput(result)).not.toContain("more lines not shown"); + // No truncation message since file fits within limits + expect(getTextOutput(result)).not.toContain("Use offset="); expect(result.details).toBeUndefined(); }); @@ -64,23 +65,21 @@ describe("Coding Agent Tools", () => { expect(output).toContain("Line 1"); expect(output).toContain("Line 2000"); expect(output).not.toContain("Line 2001"); - expect(output).toContain("500 more lines not shown"); - expect(output).toContain("Use offset=2001 to continue reading"); + expect(output).toContain("[Showing lines 1-2000 of 2500. Use offset=2001 to continue]"); }); - it("should truncate long lines and show notice", async () => { - const testFile = join(testDir, "long-lines.txt"); - const longLine = "a".repeat(3000); - const content = `Short line\n${longLine}\nAnother short line`; - writeFileSync(testFile, content); + it("should truncate when byte limit exceeded", async () => { + const testFile = join(testDir, "large-bytes.txt"); + // Create file that exceeds 50KB byte limit but has fewer than 2000 lines + const lines = Array.from({ length: 500 }, (_, i) => `Line ${i + 1}: ${"x".repeat(200)}`); + writeFileSync(testFile, lines.join("\n")); const result = await readTool.execute("test-call-4", { path: testFile }); const output = getTextOutput(result); - expect(output).toContain("Short line"); - expect(output).toContain("Another short line"); - expect(output).toContain("Some lines were truncated to 2000 characters"); - expect(output.split("\n")[1].length).toBe(2000); + expect(output).toContain("Line 1:"); + // Should show byte limit message + expect(output).toMatch(/\[Showing lines 1-\d+ of 500 \(.* limit\)\. Use offset=\d+ to continue\]/); }); it("should handle offset parameter", async () => { @@ -94,7 +93,8 @@ describe("Coding Agent Tools", () => { expect(output).not.toContain("Line 50"); expect(output).toContain("Line 51"); expect(output).toContain("Line 100"); - expect(output).not.toContain("more lines not shown"); + // No truncation message since file fits within limits + expect(output).not.toContain("Use offset="); }); it("should handle limit parameter", async () => { @@ -108,8 +108,7 @@ describe("Coding Agent Tools", () => { expect(output).toContain("Line 1"); expect(output).toContain("Line 10"); expect(output).not.toContain("Line 11"); - expect(output).toContain("90 more lines not shown"); - expect(output).toContain("Use offset=11 to continue reading"); + expect(output).toContain("[90 more lines in file. Use offset=11 to continue]"); }); it("should handle offset + limit together", async () => { @@ -128,8 +127,7 @@ describe("Coding Agent Tools", () => { expect(output).toContain("Line 41"); expect(output).toContain("Line 60"); expect(output).not.toContain("Line 61"); - expect(output).toContain("40 more lines not shown"); - expect(output).toContain("Use offset=61 to continue reading"); + expect(output).toContain("[40 more lines in file. Use offset=61 to continue]"); }); it("should show error when offset is beyond file length", async () => { @@ -141,17 +139,19 @@ describe("Coding Agent Tools", () => { ); }); - it("should show both truncation notices when applicable", async () => { - const testFile = join(testDir, "both-truncations.txt"); - const longLine = "b".repeat(3000); - const lines = Array.from({ length: 2500 }, (_, i) => (i === 500 ? longLine : `Line ${i + 1}`)); + it("should include truncation details when truncated", async () => { + const testFile = join(testDir, "large-file.txt"); + const lines = Array.from({ length: 2500 }, (_, i) => `Line ${i + 1}`); writeFileSync(testFile, lines.join("\n")); const result = await readTool.execute("test-call-9", { path: testFile }); - const output = getTextOutput(result); - expect(output).toContain("Some lines were truncated to 2000 characters"); - expect(output).toContain("500 more lines not shown"); + expect(result.details).toBeDefined(); + expect(result.details?.truncation).toBeDefined(); + expect(result.details?.truncation?.truncated).toBe(true); + expect(result.details?.truncation?.truncatedBy).toBe("lines"); + expect(result.details?.truncation?.totalLines).toBe(2500); + expect(result.details?.truncation?.outputLines).toBe(2000); }); }); @@ -276,7 +276,7 @@ describe("Coding Agent Tools", () => { expect(output).toContain("context.txt-1- before"); expect(output).toContain("context.txt:2: match one"); expect(output).toContain("context.txt-3- after"); - expect(output).toContain("(truncated, limit of 1 matches reached)"); + expect(output).toContain("[1 matches limit reached. Use limit=2 for more, or refine pattern]"); // Ensure second match is not present expect(output).not.toContain("match two"); });