From fb1fdb6006241125c729c229675708c8201dfcfe Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Sat, 20 Dec 2025 20:30:57 +0100
Subject: [PATCH] Fix orphaned tool calls by inserting synthetic empty results

When a user interrupts a tool call flow (sends a message without providing
tool results), APIs like OpenAI Responses and Anthropic fail because:
- OpenAI requires tool outputs for function calls
- OpenAI requires reasoning items to have their following items
- Anthropic requires non-empty content for error tool results

Instead of filtering out orphaned tool calls (which breaks thinking signatures),
we now insert synthetic empty tool results with isError: true and content
'No result provided'. This preserves the conversation structure and satisfies
all API requirements.
---
 .../ai/src/providers/transorm-messages.ts     | 209 ++++++++++--------
 packages/ai/test/tokens.test.ts               |  30 ++-
 2 files changed, 133 insertions(+), 106 deletions(-)
diff --git a/packages/ai/src/providers/transorm-messages.ts b/packages/ai/src/providers/transorm-messages.ts
index c1c047f6..a7226977 100644
--- a/packages/ai/src/providers/transorm-messages.ts
+++ b/packages/ai/src/providers/transorm-messages.ts
@@ -1,4 +1,4 @@
-import type { Api, AssistantMessage, Message, Model, ToolCall } from "../types.js";
+import type { Api, AssistantMessage, Message, Model, ToolCall, ToolResultMessage } from "../types.js";
 
 /**
  * Normalize tool call ID for GitHub Copilot cross-API compatibility.
@@ -13,118 +13,131 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
 	// Build a map of original tool call IDs to normalized IDs for github-copilot cross-API switches
 	const toolCallIdMap = new Map<string, string>();
 
-	return messages
-		.map((msg) => {
-			// User messages pass through unchanged
-			if (msg.role === "user") {
-				return msg;
-			}
+	// First pass: transform messages (thinking blocks, tool call ID normalization)
+	const transformed = messages.map((msg) => {
+		// User messages pass through unchanged
+		if (msg.role === "user") {
+			return msg;
+		}
 
-			// Handle toolResult messages - normalize toolCallId if we have a mapping
-			if (msg.role === "toolResult") {
-				const normalizedId = toolCallIdMap.get(msg.toolCallId);
-				if (normalizedId && normalizedId !== msg.toolCallId) {
-					return { ...msg, toolCallId: normalizedId };
-				}
-				return msg;
-			}
-
-			// Assistant messages need transformation check
-			if (msg.role === "assistant") {
-				const assistantMsg = msg as AssistantMessage;
-
-				// If message is from the same provider and API, keep as is
-				if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
-					return msg;
-				}
-
-				// Check if we need to normalize tool call IDs (github-copilot cross-API)
-				const needsToolCallIdNormalization =
-					assistantMsg.provider === "github-copilot" &&
-					model.provider === "github-copilot" &&
-					assistantMsg.api !== model.api;
-
-				// Transform message from different provider/model
-				const transformedContent = assistantMsg.content.map((block) => {
-					if (block.type === "thinking") {
-						// Convert thinking block to text block with <thinking> tags
-						return {
-							type: "text" as const,
-							text: `<thinking>\n${block.thinking}\n</thinking>`,
-						};
-					}
-					// Normalize tool call IDs for github-copilot cross-API switches
-					if (block.type === "toolCall" && needsToolCallIdNormalization) {
-						const toolCall = block as ToolCall;
-						const normalizedId = normalizeCopilotToolCallId(toolCall.id);
-						if (normalizedId !== toolCall.id) {
-							toolCallIdMap.set(toolCall.id, normalizedId);
-							return { ...toolCall, id: normalizedId };
-						}
-					}
-					// All other blocks pass through unchanged
-					return block;
-				});
-
-				// Return transformed assistant message
-				return {
-					...assistantMsg,
-					content: transformedContent,
-				};
+		// Handle toolResult messages - normalize toolCallId if we have a mapping
+		if (msg.role === "toolResult") {
+			const normalizedId = toolCallIdMap.get(msg.toolCallId);
+			if (normalizedId && normalizedId !== msg.toolCallId) {
+				return { ...msg, toolCallId: normalizedId };
 			}
 			return msg;
-		})
-		.map((msg, index, allMessages) => {
-			// Second pass: filter out tool calls without corresponding tool results
-			if (msg.role !== "assistant") {
-				return msg;
-			}
+		}
 
+		// Assistant messages need transformation check
+		if (msg.role === "assistant") {
 			const assistantMsg = msg as AssistantMessage;
-			const isLastMessage = index === allMessages.length - 1;
 
-			// If this is the last message, keep all tool calls (ongoing turn)
-			if (isLastMessage) {
+			// If message is from the same provider and API, keep as is
+			if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
 				return msg;
 			}
 
-			// Extract tool call IDs from this message
-			const toolCallIds = assistantMsg.content
-				.filter((block) => block.type === "toolCall")
-				.map((block) => (block.type === "toolCall" ? block.id : ""));
+			// Check if we need to normalize tool call IDs (github-copilot cross-API)
+			const needsToolCallIdNormalization =
+				assistantMsg.provider === "github-copilot" &&
+				model.provider === "github-copilot" &&
+				assistantMsg.api !== model.api;
 
-			// If no tool calls, return as is
-			if (toolCallIds.length === 0) {
-				return msg;
-			}
-
-			// Scan forward through subsequent messages to find matching tool results
-			const matchedToolCallIds = new Set<string>();
-			for (let i = index + 1; i < allMessages.length; i++) {
-				const nextMsg = allMessages[i];
-
-				// Stop scanning when we hit another assistant message
-				if (nextMsg.role === "assistant") {
-					break;
+			// Transform message from different provider/model
+			const transformedContent = assistantMsg.content.map((block) => {
+				if (block.type === "thinking") {
+					// Convert thinking block to text block with <thinking> tags
+					return {
+						type: "text" as const,
+						text: `<thinking>\n${block.thinking}\n</thinking>`,
+					};
 				}
-
-				// Check tool result messages for matching IDs
-				if (nextMsg.role === "toolResult") {
-					matchedToolCallIds.add(nextMsg.toolCallId);
+				// Normalize tool call IDs for github-copilot cross-API switches
+				if (block.type === "toolCall" && needsToolCallIdNormalization) {
+					const toolCall = block as ToolCall;
+					const normalizedId = normalizeCopilotToolCallId(toolCall.id);
+					if (normalizedId !== toolCall.id) {
+						toolCallIdMap.set(toolCall.id, normalizedId);
+						return { ...toolCall, id: normalizedId };
+					}
 				}
-			}
-
-			// Filter out tool calls that don't have corresponding results
-			const filteredContent = assistantMsg.content.filter((block) => {
-				if (block.type === "toolCall") {
-					return matchedToolCallIds.has(block.id);
-				}
-				return true; // Keep all non-toolCall blocks
+				// All other blocks pass through unchanged
+				return block;
 			});
 
+			// Return transformed assistant message
 			return {
 				...assistantMsg,
-				content: filteredContent,
+				content: transformedContent,
 			};
-		});
+		}
+		return msg;
+	});
+
+	// Second pass: insert synthetic empty tool results for orphaned tool calls
+	// This preserves thinking signatures and satisfies API requirements
+	const result: Message[] = [];
+	let pendingToolCalls: ToolCall[] = [];
+	let existingToolResultIds = new Set<string>();
+
+	for (let i = 0; i < transformed.length; i++) {
+		const msg = transformed[i];
+
+		if (msg.role === "assistant") {
+			// If we have pending orphaned tool calls from a previous assistant, insert synthetic results now
+			if (pendingToolCalls.length > 0) {
+				for (const tc of pendingToolCalls) {
+					if (!existingToolResultIds.has(tc.id)) {
+						result.push({
+							role: "toolResult",
+							toolCallId: tc.id,
+							toolName: tc.name,
+							content: [{ type: "text", text: "No result provided" }],
+							isError: true,
+							timestamp: Date.now(),
+						} as ToolResultMessage);
+					}
+				}
+				pendingToolCalls = [];
+				existingToolResultIds = new Set();
+			}
+
+			// Track tool calls from this assistant message
+			const assistantMsg = msg as AssistantMessage;
+			const toolCalls = assistantMsg.content.filter((b) => b.type === "toolCall") as ToolCall[];
+			if (toolCalls.length > 0) {
+				pendingToolCalls = toolCalls;
+				existingToolResultIds = new Set();
+			}
+
+			result.push(msg);
+		} else if (msg.role === "toolResult") {
+			existingToolResultIds.add(msg.toolCallId);
+			result.push(msg);
+		} else if (msg.role === "user") {
+			// User message interrupts tool flow - insert synthetic results for orphaned calls
+			if (pendingToolCalls.length > 0) {
+				for (const tc of pendingToolCalls) {
+					if (!existingToolResultIds.has(tc.id)) {
+						result.push({
+							role: "toolResult",
+							toolCallId: tc.id,
+							toolName: tc.name,
+							content: [{ type: "text", text: "No result provided" }],
+							isError: true,
+							timestamp: Date.now(),
+						} as ToolResultMessage);
+					}
+				}
+				pendingToolCalls = [];
+				existingToolResultIds = new Set();
+			}
+			result.push(msg);
+		} else {
+			result.push(msg);
+		}
+	}
+
+	return result;
 }
diff --git a/packages/ai/test/tokens.test.ts b/packages/ai/test/tokens.test.ts
index 489e09d4..877cb60c 100644
--- a/packages/ai/test/tokens.test.ts
+++ b/packages/ai/test/tokens.test.ts
@@ -17,7 +17,7 @@ async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: Op
 		messages: [
 			{
 				role: "user",
-				content: "Write a long poem with 10 stanzas about the beauty of nature.",
+				content: "Write a long poem with 20 stanzas about the beauty of nature.",
 				timestamp: Date.now(),
 			},
 		],
@@ -27,10 +27,14 @@ async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: Op
 	const response = stream(llm, context, { ...options, signal: controller.signal });
 
 	let abortFired = false;
+	let text = "";
 	for await (const event of response) {
 		if (!abortFired && (event.type === "text_delta" || event.type === "thinking_delta")) {
-			abortFired = true;
-			setTimeout(() => controller.abort(), 3000);
+			text += event.delta;
+			if (text.length >= 1000) {
+				abortFired = true;
+				controller.abort();
+			}
 		}
 	}
 
@@ -38,16 +42,26 @@ async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: Op
 
 	expect(msg.stopReason).toBe("aborted");
 
-	// OpenAI providers only send usage in the final chunk, so when aborted they have no token stats
-	// Anthropic and Google send usage information early in the stream
-	if (llm.api === "openai-completions" || llm.api === "openai-responses") {
+	// OpenAI providers, Gemini CLI, zai, and the GPT-OSS model on Antigravity only send usage in the final chunk,
+	// so when aborted they have no token stats Anthropic and Google send usage information early in the stream
+	if (
+		llm.api === "openai-completions" ||
+		llm.api === "openai-responses" ||
+		llm.provider === "google-gemini-cli" ||
+		llm.provider === "zai" ||
+		(llm.provider === "google-antigravity" && llm.id.includes("gpt-oss"))
+	) {
 		expect(msg.usage.input).toBe(0);
 		expect(msg.usage.output).toBe(0);
 	} else {
 		expect(msg.usage.input).toBeGreaterThan(0);
 		expect(msg.usage.output).toBeGreaterThan(0);
-		expect(msg.usage.cost.input).toBeGreaterThan(0);
-		expect(msg.usage.cost.total).toBeGreaterThan(0);
+
+		// Antigravity Gemini and Claude models report token usage, but no cost
+		if (llm.provider !== "google-antigravity") {
+			expect(msg.usage.cost.input).toBeGreaterThan(0);
+			expect(msg.usage.cost.total).toBeGreaterThan(0);
+		}
 	}
 }