From 51f5448a5c20ef2bb8ff92054506551acf807fef Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Wed, 1 Oct 2025 22:18:30 +0200
Subject: [PATCH] Remove tool calls for which there are no results in
 subsequent user messages.

---
 CLAUDE.md                                     |   7 +-
 README.md                                     |   1 +
 .../ai/src/providers/transorm-messages.ts     | 108 +++++++++++++-----
 .../ai/test/tool-call-without-result.test.ts  |  82 +++++++++++++
 4 files changed, 169 insertions(+), 29 deletions(-)
 create mode 100644 packages/ai/test/tool-call-without-result.test.ts
diff --git a/CLAUDE.md b/CLAUDE.md
index 24a407a6..20574e1e 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1,9 +1,12 @@
-- When receiving the first user message, you MUST read the following files in full, in parallel:
+- When receiving the first user message, ask the user which module(s) they want to work on. Then you MUST read the corresponding README.md files in full, in parallel:
     - README.md
     - packages/ai/README.md
     - packages/tui/README.md
     - packages/agent/README.md
     - packages/pods/README.md
+    - packages/browser-extension/README.md
 - We must NEVER have type any anywhere, unless absolutely, positively necessary.
 - If you are working with an external API, check node_modules for the type definitions as needed instead of assuming things.
-- Always run `npm run check` in the project's root directory after making code changes.
\ No newline at end of file
+- Always run `npm run check` in the project's root directory after making code changes.
+- You must NEVER run `npm run dev` yourself. Doing is means you failed the user hard.
+-
\ No newline at end of file
diff --git a/README.md b/README.md
index f3fe3686..d740dab3 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,7 @@ A collection of tools for managing LLM deployments and building AI agents.
 
 - **[@mariozechner/pi-ai](packages/ai)** - Unified multi-provider LLM API
 - **[@mariozechner/pi-tui](packages/tui)** - Terminal UI library with differential rendering
+- **[@mariozechner/pi](packages/browser-extension)** - CLI for managing vLLM deployments on GPU pods
 - **[@mariozechner/pi-agent](packages/agent)** - General-purpose agent with tool calling and session persistence
 - **[@mariozechner/pi](packages/pods)** - CLI for managing vLLM deployments on GPU pods
 
diff --git a/packages/ai/src/providers/transorm-messages.ts b/packages/ai/src/providers/transorm-messages.ts
index bd876b8d..72f32ba7 100644
--- a/packages/ai/src/providers/transorm-messages.ts
+++ b/packages/ai/src/providers/transorm-messages.ts
@@ -1,40 +1,94 @@
 import type { Api, AssistantMessage, Message, Model } from "../types.js";
 
 export function transformMessages<TApi extends Api>(messages: Message[], model: Model<TApi>): Message[] {
-	return messages.map((msg) => {
-		// User and toolResult messages pass through unchanged
-		if (msg.role === "user" || msg.role === "toolResult") {
-			return msg;
-		}
-
-		// Assistant messages need transformation check
-		if (msg.role === "assistant") {
-			const assistantMsg = msg as AssistantMessage;
-
-			// If message is from the same provider and API, keep as is
-			if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
+	return messages
+		.map((msg) => {
+			// User and toolResult messages pass through unchanged
+			if (msg.role === "user" || msg.role === "toolResult") {
 				return msg;
 			}
 
-			// Transform message from different provider/model
-			const transformedContent = assistantMsg.content.map((block) => {
-				if (block.type === "thinking") {
-					// Convert thinking block to text block with <thinking> tags
-					return {
-						type: "text" as const,
-						text: `<thinking>\n${block.thinking}\n</thinking>`,
-					};
+			// Assistant messages need transformation check
+			if (msg.role === "assistant") {
+				const assistantMsg = msg as AssistantMessage;
+
+				// If message is from the same provider and API, keep as is
+				if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
+					return msg;
 				}
-				// All other blocks (text, toolCall) pass through unchanged
-				return block;
+
+				// Transform message from different provider/model
+				const transformedContent = assistantMsg.content.map((block) => {
+					if (block.type === "thinking") {
+						// Convert thinking block to text block with <thinking> tags
+						return {
+							type: "text" as const,
+							text: `<thinking>\n${block.thinking}\n</thinking>`,
+						};
+					}
+					// All other blocks (text, toolCall) pass through unchanged
+					return block;
+				});
+
+				// Return transformed assistant message
+				return {
+					...assistantMsg,
+					content: transformedContent,
+				};
+			}
+			return msg;
+		})
+		.map((msg, index, allMessages) => {
+			// Second pass: filter out tool calls without corresponding tool results
+			if (msg.role !== "assistant") {
+				return msg;
+			}
+
+			const assistantMsg = msg as AssistantMessage;
+			const isLastMessage = index === allMessages.length - 1;
+
+			// If this is the last message, keep all tool calls (ongoing turn)
+			if (isLastMessage) {
+				return msg;
+			}
+
+			// Extract tool call IDs from this message
+			const toolCallIds = assistantMsg.content
+				.filter((block) => block.type === "toolCall")
+				.map((block) => (block.type === "toolCall" ? block.id : ""));
+
+			// If no tool calls, return as is
+			if (toolCallIds.length === 0) {
+				return msg;
+			}
+
+			// Scan forward through subsequent messages to find matching tool results
+			const matchedToolCallIds = new Set<string>();
+			for (let i = index + 1; i < allMessages.length; i++) {
+				const nextMsg = allMessages[i];
+
+				// Stop scanning when we hit another assistant message
+				if (nextMsg.role === "assistant") {
+					break;
+				}
+
+				// Check tool result messages for matching IDs
+				if (nextMsg.role === "toolResult") {
+					matchedToolCallIds.add(nextMsg.toolCallId);
+				}
+			}
+
+			// Filter out tool calls that don't have corresponding results
+			const filteredContent = assistantMsg.content.filter((block) => {
+				if (block.type === "toolCall") {
+					return matchedToolCallIds.has(block.id);
+				}
+				return true; // Keep all non-toolCall blocks
 			});
 
-			// Return transformed assistant message
 			return {
 				...assistantMsg,
-				content: transformedContent,
+				content: filteredContent,
 			};
-		}
-		return msg;
-	});
+		});
 }
diff --git a/packages/ai/test/tool-call-without-result.test.ts b/packages/ai/test/tool-call-without-result.test.ts
new file mode 100644
index 00000000..fbf92c2e
--- /dev/null
+++ b/packages/ai/test/tool-call-without-result.test.ts
@@ -0,0 +1,82 @@
+import { type Static, Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete } from "../src/stream.js";
+import type { Context, Tool } from "../src/types.js";
+
+// Simple calculate tool
+const calculateSchema = Type.Object({
+	expression: Type.String({ description: "The mathematical expression to evaluate" }),
+});
+
+type CalculateParams = Static<typeof calculateSchema>;
+
+const calculateTool: Tool = {
+	name: "calculate",
+	description: "Evaluate mathematical expressions",
+	parameters: calculateSchema,
+};
+
+describe("Tool Call Without Result Tests", () => {
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider - Missing Tool Result", () => {
+		const model = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+		it("should filter out tool calls without corresponding tool results", async () => {
+			// Step 1: Create context with the calculate tool
+			const context: Context = {
+				systemPrompt: "You are a helpful assistant. Use the calculate tool when asked to perform calculations.",
+				messages: [],
+				tools: [calculateTool],
+			};
+
+			// Step 2: Ask the LLM to make a tool call
+			context.messages.push({
+				role: "user",
+				content: "Please calculate 25 * 18 using the calculate tool.",
+			});
+
+			// Step 3: Get the assistant's response (should contain a tool call)
+			const firstResponse = await complete(model, context);
+			context.messages.push(firstResponse);
+
+			console.log("First response:", JSON.stringify(firstResponse, null, 2));
+
+			// Verify the response contains a tool call
+			const hasToolCall = firstResponse.content.some((block) => block.type === "toolCall");
+			expect(hasToolCall).toBe(true);
+
+			if (!hasToolCall) {
+				throw new Error("Expected assistant to make a tool call, but none was found");
+			}
+
+			// Step 4: Send a user message WITHOUT providing tool result
+			// This simulates the scenario where a tool call was aborted/cancelled
+			context.messages.push({
+				role: "user",
+				content: "Never mind, just tell me what is 2+2?",
+			});
+
+			// Step 5: The fix should filter out the orphaned tool call, and the request should succeed
+			const secondResponse = await complete(model, context);
+			console.log("Second response:", JSON.stringify(secondResponse, null, 2));
+
+			// The request should succeed (not error) - that's the main thing we're testing
+			expect(secondResponse.stopReason).not.toBe("error");
+
+			// Should have some content in the response
+			expect(secondResponse.content.length).toBeGreaterThan(0);
+
+			// The LLM may choose to answer directly or make a new tool call - either is fine
+			// The important thing is it didn't fail with the orphaned tool call error
+			const textContent = secondResponse.content
+				.filter((block) => block.type === "text")
+				.map((block) => (block.type === "text" ? block.text : ""))
+				.join(" ");
+			expect(textContent.length).toBeGreaterThan(0);
+			console.log("Answer:", textContent);
+
+			// Verify the stop reason is either "stop" or "toolUse" (new tool call)
+			expect(["stop", "toolUse"]).toContain(secondResponse.stopReason);
+		}, 30000);
+	});
+});