From 51f5448a5c20ef2bb8ff92054506551acf807fef Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Wed, 1 Oct 2025 22:18:30 +0200 Subject: [PATCH] Remove tool calls for which there are no results in subsequent user messages. --- CLAUDE.md | 7 +- README.md | 1 + .../ai/src/providers/transorm-messages.ts | 108 +++++++++++++----- .../ai/test/tool-call-without-result.test.ts | 82 +++++++++++++ 4 files changed, 169 insertions(+), 29 deletions(-) create mode 100644 packages/ai/test/tool-call-without-result.test.ts diff --git a/CLAUDE.md b/CLAUDE.md index 24a407a6..20574e1e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,9 +1,12 @@ -- When receiving the first user message, you MUST read the following files in full, in parallel: +- When receiving the first user message, ask the user which module(s) they want to work on. Then you MUST read the corresponding README.md files in full, in parallel: - README.md - packages/ai/README.md - packages/tui/README.md - packages/agent/README.md - packages/pods/README.md + - packages/browser-extension/README.md - We must NEVER have type any anywhere, unless absolutely, positively necessary. - If you are working with an external API, check node_modules for the type definitions as needed instead of assuming things. -- Always run `npm run check` in the project's root directory after making code changes. \ No newline at end of file +- Always run `npm run check` in the project's root directory after making code changes. +- You must NEVER run `npm run dev` yourself. Doing is means you failed the user hard. +- \ No newline at end of file diff --git a/README.md b/README.md index f3fe3686..d740dab3 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ A collection of tools for managing LLM deployments and building AI agents. - **[@mariozechner/pi-ai](packages/ai)** - Unified multi-provider LLM API - **[@mariozechner/pi-tui](packages/tui)** - Terminal UI library with differential rendering +- **[@mariozechner/pi](packages/browser-extension)** - CLI for managing vLLM deployments on GPU pods - **[@mariozechner/pi-agent](packages/agent)** - General-purpose agent with tool calling and session persistence - **[@mariozechner/pi](packages/pods)** - CLI for managing vLLM deployments on GPU pods diff --git a/packages/ai/src/providers/transorm-messages.ts b/packages/ai/src/providers/transorm-messages.ts index bd876b8d..72f32ba7 100644 --- a/packages/ai/src/providers/transorm-messages.ts +++ b/packages/ai/src/providers/transorm-messages.ts @@ -1,40 +1,94 @@ import type { Api, AssistantMessage, Message, Model } from "../types.js"; export function transformMessages(messages: Message[], model: Model): Message[] { - return messages.map((msg) => { - // User and toolResult messages pass through unchanged - if (msg.role === "user" || msg.role === "toolResult") { - return msg; - } - - // Assistant messages need transformation check - if (msg.role === "assistant") { - const assistantMsg = msg as AssistantMessage; - - // If message is from the same provider and API, keep as is - if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) { + return messages + .map((msg) => { + // User and toolResult messages pass through unchanged + if (msg.role === "user" || msg.role === "toolResult") { return msg; } - // Transform message from different provider/model - const transformedContent = assistantMsg.content.map((block) => { - if (block.type === "thinking") { - // Convert thinking block to text block with tags - return { - type: "text" as const, - text: `\n${block.thinking}\n`, - }; + // Assistant messages need transformation check + if (msg.role === "assistant") { + const assistantMsg = msg as AssistantMessage; + + // If message is from the same provider and API, keep as is + if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) { + return msg; } - // All other blocks (text, toolCall) pass through unchanged - return block; + + // Transform message from different provider/model + const transformedContent = assistantMsg.content.map((block) => { + if (block.type === "thinking") { + // Convert thinking block to text block with tags + return { + type: "text" as const, + text: `\n${block.thinking}\n`, + }; + } + // All other blocks (text, toolCall) pass through unchanged + return block; + }); + + // Return transformed assistant message + return { + ...assistantMsg, + content: transformedContent, + }; + } + return msg; + }) + .map((msg, index, allMessages) => { + // Second pass: filter out tool calls without corresponding tool results + if (msg.role !== "assistant") { + return msg; + } + + const assistantMsg = msg as AssistantMessage; + const isLastMessage = index === allMessages.length - 1; + + // If this is the last message, keep all tool calls (ongoing turn) + if (isLastMessage) { + return msg; + } + + // Extract tool call IDs from this message + const toolCallIds = assistantMsg.content + .filter((block) => block.type === "toolCall") + .map((block) => (block.type === "toolCall" ? block.id : "")); + + // If no tool calls, return as is + if (toolCallIds.length === 0) { + return msg; + } + + // Scan forward through subsequent messages to find matching tool results + const matchedToolCallIds = new Set(); + for (let i = index + 1; i < allMessages.length; i++) { + const nextMsg = allMessages[i]; + + // Stop scanning when we hit another assistant message + if (nextMsg.role === "assistant") { + break; + } + + // Check tool result messages for matching IDs + if (nextMsg.role === "toolResult") { + matchedToolCallIds.add(nextMsg.toolCallId); + } + } + + // Filter out tool calls that don't have corresponding results + const filteredContent = assistantMsg.content.filter((block) => { + if (block.type === "toolCall") { + return matchedToolCallIds.has(block.id); + } + return true; // Keep all non-toolCall blocks }); - // Return transformed assistant message return { ...assistantMsg, - content: transformedContent, + content: filteredContent, }; - } - return msg; - }); + }); } diff --git a/packages/ai/test/tool-call-without-result.test.ts b/packages/ai/test/tool-call-without-result.test.ts new file mode 100644 index 00000000..fbf92c2e --- /dev/null +++ b/packages/ai/test/tool-call-without-result.test.ts @@ -0,0 +1,82 @@ +import { type Static, Type } from "@sinclair/typebox"; +import { describe, expect, it } from "vitest"; +import { getModel } from "../src/models.js"; +import { complete } from "../src/stream.js"; +import type { Context, Tool } from "../src/types.js"; + +// Simple calculate tool +const calculateSchema = Type.Object({ + expression: Type.String({ description: "The mathematical expression to evaluate" }), +}); + +type CalculateParams = Static; + +const calculateTool: Tool = { + name: "calculate", + description: "Evaluate mathematical expressions", + parameters: calculateSchema, +}; + +describe("Tool Call Without Result Tests", () => { + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider - Missing Tool Result", () => { + const model = getModel("anthropic", "claude-3-5-haiku-20241022"); + + it("should filter out tool calls without corresponding tool results", async () => { + // Step 1: Create context with the calculate tool + const context: Context = { + systemPrompt: "You are a helpful assistant. Use the calculate tool when asked to perform calculations.", + messages: [], + tools: [calculateTool], + }; + + // Step 2: Ask the LLM to make a tool call + context.messages.push({ + role: "user", + content: "Please calculate 25 * 18 using the calculate tool.", + }); + + // Step 3: Get the assistant's response (should contain a tool call) + const firstResponse = await complete(model, context); + context.messages.push(firstResponse); + + console.log("First response:", JSON.stringify(firstResponse, null, 2)); + + // Verify the response contains a tool call + const hasToolCall = firstResponse.content.some((block) => block.type === "toolCall"); + expect(hasToolCall).toBe(true); + + if (!hasToolCall) { + throw new Error("Expected assistant to make a tool call, but none was found"); + } + + // Step 4: Send a user message WITHOUT providing tool result + // This simulates the scenario where a tool call was aborted/cancelled + context.messages.push({ + role: "user", + content: "Never mind, just tell me what is 2+2?", + }); + + // Step 5: The fix should filter out the orphaned tool call, and the request should succeed + const secondResponse = await complete(model, context); + console.log("Second response:", JSON.stringify(secondResponse, null, 2)); + + // The request should succeed (not error) - that's the main thing we're testing + expect(secondResponse.stopReason).not.toBe("error"); + + // Should have some content in the response + expect(secondResponse.content.length).toBeGreaterThan(0); + + // The LLM may choose to answer directly or make a new tool call - either is fine + // The important thing is it didn't fail with the orphaned tool call error + const textContent = secondResponse.content + .filter((block) => block.type === "text") + .map((block) => (block.type === "text" ? block.text : "")) + .join(" "); + expect(textContent.length).toBeGreaterThan(0); + console.log("Answer:", textContent); + + // Verify the stop reason is either "stop" or "toolUse" (new tool call) + expect(["stop", "toolUse"]).toContain(secondResponse.stopReason); + }, 30000); + }); +});