From d327b9c7681732917b4006f7f4386c6b7deeb0d3 Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Thu, 22 Jan 2026 00:58:49 +0100
Subject: [PATCH] fix(ai): handle same-provider different-model handoff in
 OpenAI Responses API

When switching between OpenAI models (e.g., gpt-5-mini to gpt-5.2-codex),
function_call IDs with fc_ prefix trigger pairing validation errors because
OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items.

The fix omits the id field for function_calls from different models, which
avoids the pairing validation while keeping call_id for matching with
function_call_output.

Fixes #886
---
 packages/ai/CHANGELOG.md                      |   4 +
 packages/ai/src/providers/openai-responses.ts |  23 +-
 ...nai-responses-reasoning-replay-e2e.test.ts | 394 +++++++++++-------
 3 files changed, 279 insertions(+), 142 deletions(-)

diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md
index cca9b6e7..f29d7d32 100644
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@@ -7,6 +7,10 @@
 - Added `headers` option to `StreamOptions` for custom HTTP headers in API requests. Supported by all providers except Amazon Bedrock (which uses AWS SDK auth). Headers are merged with provider defaults and `model.headers`, with `options.headers` taking precedence.
 - Added `originator` option to `loginOpenAICodex()` for custom OAuth client identification
 
+### Fixed
+
+- Fixed OpenAI Responses API 400 error "function_call without required reasoning item" when switching between models (same provider, different model). The fix omits the `id` field for function_calls from different models to avoid triggering OpenAI's reasoning/function_call pairing validation ([#886](https://github.com/badlogic/pi-mono/issues/886))
+
 ## [0.49.2] - 2026-01-19
 
 ### Added
diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts
index 5e9ce8bf..a68bda11 100644
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@@ -488,6 +488,15 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
 			}
 		} else if (msg.role === "assistant") {
 			const output: ResponseInput = [];
+			const assistantMsg = msg as AssistantMessage;
+
+			// Check if this message is from a different model (same provider, different model ID).
+			// For such messages, tool call IDs with fc_ prefix need to be stripped to avoid
+			// OpenAI's reasoning/function_call pairing validation errors.
+			const isDifferentModel =
+				assistantMsg.model !== model.id &&
+				assistantMsg.provider === model.provider &&
+				assistantMsg.api === model.api;
 
 			for (const block of msg.content) {
 				if (block.type === "thinking") {
@@ -513,10 +522,20 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
 					} satisfies ResponseOutputMessage);
 				} else if (block.type === "toolCall") {
 					const toolCall = block as ToolCall;
+					const callId = toolCall.id.split("|")[0];
+					let itemId: string | undefined = toolCall.id.split("|")[1];
+
+					// For different-model messages, set id to undefined to avoid pairing validation.
+					// OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items.
+					// By omitting the id, we avoid triggering that validation (like cross-provider does).
+					if (isDifferentModel && itemId?.startsWith("fc_")) {
+						itemId = undefined;
+					}
+
 					output.push({
 						type: "function_call",
-						id: toolCall.id.split("|")[1],
-						call_id: toolCall.id.split("|")[0],
+						id: itemId,
+						call_id: callId,
 						name: toolCall.name,
 						arguments: JSON.stringify(toolCall.arguments),
 					});
diff --git a/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts b/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts
index 181ddb23..4d6899d4 100644
--- a/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts
+++ b/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts
@@ -2,7 +2,7 @@ import { Type } from "@sinclair/typebox";
 import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
 import { complete, getEnvApiKey } from "../src/stream.js";
-import type { AssistantMessage, Context, Message, ThinkingContent, Tool, ToolCall } from "../src/types.js";
+import type { AssistantMessage, Context, Message, Tool, ToolCall } from "../src/types.js";
 
 const testToolSchema = Type.Object({
 	value: Type.Number({ description: "A number to double" }),
@@ -14,165 +14,279 @@ const testTool: Tool<typeof testToolSchema> = {
 	parameters: testToolSchema,
 };
 
-describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses reasoning replay e2e", () => {
-	it("skips reasoning-only history after an aborted turn", { retry: 2 }, async () => {
-		const model = getModel("openai", "gpt-5-mini");
+describe.skipIf(!process.env.OPENAI_API_KEY || !process.env.ANTHROPIC_API_KEY)(
+	"OpenAI Responses reasoning replay e2e",
+	() => {
+		it("skips reasoning-only history after an aborted turn", { retry: 2 }, async () => {
+			const model = getModel("openai", "gpt-5-mini");
 
-		const apiKey = getEnvApiKey("openai");
-		if (!apiKey) {
-			throw new Error("Missing OPENAI_API_KEY");
-		}
+			const apiKey = getEnvApiKey("openai");
+			if (!apiKey) {
+				throw new Error("Missing OPENAI_API_KEY");
+			}
 
-		const userMessage: Message = {
-			role: "user",
-			content: "Use the double_number tool to double 21.",
-			timestamp: Date.now(),
-		};
+			const userMessage: Message = {
+				role: "user",
+				content: "Use the double_number tool to double 21.",
+				timestamp: Date.now(),
+			};
 
-		const assistantResponse = await complete(
-			model,
-			{
-				systemPrompt: "You are a helpful assistant. Use the tool.",
-				messages: [userMessage],
+			const assistantResponse = await complete(
+				model,
+				{
+					systemPrompt: "You are a helpful assistant. Use the tool.",
+					messages: [userMessage],
+					tools: [testTool],
+				},
+				{
+					apiKey,
+					reasoningEffort: "high",
+				},
+			);
+
+			const thinkingBlock = assistantResponse.content.find(
+				(block) => block.type === "thinking" && block.thinkingSignature,
+			);
+			if (!thinkingBlock || thinkingBlock.type !== "thinking") {
+				throw new Error("Missing thinking signature from OpenAI Responses");
+			}
+
+			const corruptedAssistant: AssistantMessage = {
+				...assistantResponse,
+				content: [thinkingBlock],
+				stopReason: "aborted",
+			};
+
+			const followUp: Message = {
+				role: "user",
+				content: "Say hello to confirm you can continue.",
+				timestamp: Date.now(),
+			};
+
+			const context: Context = {
+				systemPrompt: "You are a helpful assistant.",
+				messages: [userMessage, corruptedAssistant, followUp],
 				tools: [testTool],
-			},
-			{
+			};
+
+			const response = await complete(model, context, {
 				apiKey,
 				reasoningEffort: "high",
-			},
-		);
+			});
 
-		const thinkingBlock = assistantResponse.content.find(
-			(block) => block.type === "thinking" && block.thinkingSignature,
-		);
-		if (!thinkingBlock || thinkingBlock.type !== "thinking") {
-			throw new Error("Missing thinking signature from OpenAI Responses");
-		}
-
-		const corruptedAssistant: AssistantMessage = {
-			...assistantResponse,
-			content: [thinkingBlock],
-			stopReason: "aborted",
-		};
-
-		const followUp: Message = {
-			role: "user",
-			content: "Say hello to confirm you can continue.",
-			timestamp: Date.now(),
-		};
-
-		const context: Context = {
-			systemPrompt: "You are a helpful assistant.",
-			messages: [userMessage, corruptedAssistant, followUp],
-			tools: [testTool],
-		};
-
-		const response = await complete(model, context, {
-			apiKey,
-			reasoningEffort: "high",
+			// The key assertion: no 400 error from orphaned reasoning item
+			expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
+			expect(response.errorMessage).toBeFalsy();
+			// Model should respond (text or tool call)
+			expect(response.content.length).toBeGreaterThan(0);
 		});
 
-		// The key assertion: no 400 error from orphaned reasoning item
-		expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
-		expect(response.errorMessage).toBeFalsy();
-		// Model should respond (text or tool call)
-		expect(response.content.length).toBeGreaterThan(0);
-	});
+		it("handles same-provider different-model handoff with tool calls", { retry: 2 }, async () => {
+			// This tests the scenario where:
+			// 1. Model A (gpt-5-mini) generates reasoning + function_call
+			// 2. User switches to Model B (gpt-5.2-codex) - same provider, different model
+			// 3. transform-messages: isSameModel=false, thinking converted to text
+			// 4. But tool call ID still has OpenAI pairing history (fc_xxx paired with rs_xxx)
+			// 5. Without fix: OpenAI returns 400 "function_call without required reasoning item"
+			// 6. With fix: tool calls/results converted to text, conversation continues
 
-	it("drops orphaned tool calls when reasoning signature is missing", { retry: 2 }, async () => {
-		// This tests the scenario where:
-		// 1. A completed turn has reasoning + function_call
-		// 2. The thinking signature gets lost (e.g., cross-provider handoff, isSameModel=false filtering)
-		// 3. The toolCall remains but reasoning is gone
-		// 4. Without the fix: Azure/OpenAI returns 400 "function_call without required reasoning item"
-		// 5. With the fix: orphaned toolCalls are dropped, conversation continues
+			const modelA = getModel("openai", "gpt-5-mini");
+			const modelB = getModel("openai", "gpt-5.2-codex");
 
-		const model = getModel("openai", "gpt-5-mini");
+			const apiKey = getEnvApiKey("openai");
+			if (!apiKey) {
+				throw new Error("Missing OPENAI_API_KEY");
+			}
 
-		const apiKey = getEnvApiKey("openai");
-		if (!apiKey) {
-			throw new Error("Missing OPENAI_API_KEY");
-		}
+			const userMessage: Message = {
+				role: "user",
+				content: "Use the double_number tool to double 21.",
+				timestamp: Date.now(),
+			};
 
-		const userMessage: Message = {
-			role: "user",
-			content: "Use the double_number tool to double 21.",
-			timestamp: Date.now(),
-		};
+			// Get a real response from Model A with reasoning + tool call
+			const assistantResponse = await complete(
+				modelA,
+				{
+					systemPrompt: "You are a helpful assistant. Always use the tool when asked.",
+					messages: [userMessage],
+					tools: [testTool],
+				},
+				{
+					apiKey,
+					reasoningEffort: "high",
+				},
+			);
 
-		// Get a real response with reasoning + tool call
-		const assistantResponse = await complete(
-			model,
-			{
-				systemPrompt: "You are a helpful assistant. Always use the tool when asked.",
-				messages: [userMessage],
+			const toolCallBlock = assistantResponse.content.find((block) => block.type === "toolCall") as
+				| ToolCall
+				| undefined;
+
+			if (!toolCallBlock) {
+				throw new Error("Missing tool call from OpenAI Responses - model did not use the tool");
+			}
+
+			// Provide a tool result
+			const toolResult: Message = {
+				role: "toolResult",
+				toolCallId: toolCallBlock.id,
+				toolName: toolCallBlock.name,
+				content: [{ type: "text", text: "42" }],
+				isError: false,
+				timestamp: Date.now(),
+			};
+
+			const followUp: Message = {
+				role: "user",
+				content: "What was the result? Answer with just the number.",
+				timestamp: Date.now(),
+			};
+
+			// Now continue with Model B (different model, same provider)
+			const context: Context = {
+				systemPrompt: "You are a helpful assistant. Answer concisely.",
+				messages: [userMessage, assistantResponse, toolResult, followUp],
 				tools: [testTool],
-			},
-			{
+			};
+
+			let capturedPayload: any = null;
+			const response = await complete(modelB, context, {
 				apiKey,
 				reasoningEffort: "high",
-			},
-		);
+				onPayload: (payload) => {
+					capturedPayload = payload;
+				},
+			});
 
-		const thinkingBlock = assistantResponse.content.find(
-			(block) => block.type === "thinking" && block.thinkingSignature,
-		) as ThinkingContent | undefined;
-		const toolCallBlock = assistantResponse.content.find((block) => block.type === "toolCall") as
-			| ToolCall
-			| undefined;
+			// The key assertion: no 400 error from orphaned function_call
+			expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
+			expect(response.errorMessage).toBeFalsy();
+			expect(response.content.length).toBeGreaterThan(0);
 
-		if (!thinkingBlock) {
-			throw new Error("Missing thinking block from OpenAI Responses");
-		}
-		if (!toolCallBlock) {
-			throw new Error("Missing tool call from OpenAI Responses - model did not use the tool");
-		}
+			// Log what was sent for debugging
+			const input = capturedPayload?.input as any[];
+			const functionCalls = input?.filter((item: any) => item.type === "function_call") || [];
+			const reasoningItems = input?.filter((item: any) => item.type === "reasoning") || [];
 
-		// Simulate corruption: keep toolCall but strip thinkingSignature
-		// This mimics what happens when isSameModel=false and thinking text is empty
-		const corruptedThinking: ThinkingContent = {
-			type: "thinking",
-			thinking: thinkingBlock.thinking,
-			// thinkingSignature intentionally omitted - simulates it being lost
-		};
+			console.log("Payload sent to API:");
+			console.log("- function_calls:", functionCalls.length);
+			console.log("- reasoning items:", reasoningItems.length);
+			console.log("- full input:", JSON.stringify(input, null, 2));
 
-		const corruptedAssistant: AssistantMessage = {
-			...assistantResponse,
-			content: [corruptedThinking, toolCallBlock],
-			stopReason: "toolUse", // Completed successfully, not aborted
-		};
-
-		// Provide a tool result to continue the conversation
-		const toolResult: Message = {
-			role: "toolResult",
-			toolCallId: toolCallBlock.id,
-			toolName: toolCallBlock.name,
-			content: [{ type: "text", text: "42" }],
-			isError: false,
-			timestamp: Date.now(),
-		};
-
-		const followUp: Message = {
-			role: "user",
-			content: "What was the result?",
-			timestamp: Date.now(),
-		};
-
-		const context: Context = {
-			systemPrompt: "You are a helpful assistant.",
-			messages: [userMessage, corruptedAssistant, toolResult, followUp],
-			tools: [testTool],
-		};
-
-		const response = await complete(model, context, {
-			apiKey,
-			reasoningEffort: "high",
+			// Verify the model understood the context
+			const responseText = response.content
+				.filter((b) => b.type === "text")
+				.map((b) => (b as any).text)
+				.join("");
+			expect(responseText).toContain("42");
 		});
 
-		// The key assertion: no 400 error from orphaned function_call
-		// Error would be: "function_call was provided without its required reasoning item"
-		expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
-		expect(response.errorMessage).toBeFalsy();
-		expect(response.content.length).toBeGreaterThan(0);
-	});
-});
+		it("handles cross-provider handoff from Anthropic to OpenAI Codex", { retry: 2 }, async () => {
+			// This tests cross-provider handoff:
+			// 1. Anthropic model generates thinking + function_call (toolu_xxx ID)
+			// 2. User switches to OpenAI Codex
+			// 3. transform-messages: isSameModel=false, thinking converted to text
+			// 4. Tool call ID is Anthropic format (toolu_xxx), no OpenAI pairing history
+			// 5. Should work because foreign IDs have no pairing expectation
+
+			const anthropicModel = getModel("anthropic", "claude-sonnet-4-5");
+			const codexModel = getModel("openai", "gpt-5.2-codex");
+
+			const anthropicApiKey = getEnvApiKey("anthropic");
+			const openaiApiKey = getEnvApiKey("openai");
+			if (!anthropicApiKey || !openaiApiKey) {
+				throw new Error("Missing API keys");
+			}
+
+			const userMessage: Message = {
+				role: "user",
+				content: "Use the double_number tool to double 21.",
+				timestamp: Date.now(),
+			};
+
+			// Get a real response from Anthropic with thinking + tool call
+			const assistantResponse = await complete(
+				anthropicModel,
+				{
+					systemPrompt: "You are a helpful assistant. Always use the tool when asked.",
+					messages: [userMessage],
+					tools: [testTool],
+				},
+				{
+					apiKey: anthropicApiKey,
+					thinkingEnabled: true,
+					thinkingBudgetTokens: 5000,
+				},
+			);
+
+			const toolCallBlock = assistantResponse.content.find((block) => block.type === "toolCall") as
+				| ToolCall
+				| undefined;
+
+			if (!toolCallBlock) {
+				throw new Error("Missing tool call from Anthropic - model did not use the tool");
+			}
+
+			console.log("Anthropic tool call ID:", toolCallBlock.id);
+
+			// Provide a tool result
+			const toolResult: Message = {
+				role: "toolResult",
+				toolCallId: toolCallBlock.id,
+				toolName: toolCallBlock.name,
+				content: [{ type: "text", text: "42" }],
+				isError: false,
+				timestamp: Date.now(),
+			};
+
+			const followUp: Message = {
+				role: "user",
+				content: "What was the result? Answer with just the number.",
+				timestamp: Date.now(),
+			};
+
+			// Now continue with Codex (different provider)
+			const context: Context = {
+				systemPrompt: "You are a helpful assistant. Answer concisely.",
+				messages: [userMessage, assistantResponse, toolResult, followUp],
+				tools: [testTool],
+			};
+
+			let capturedPayload: any = null;
+			const response = await complete(codexModel, context, {
+				apiKey: openaiApiKey,
+				reasoningEffort: "high",
+				onPayload: (payload) => {
+					capturedPayload = payload;
+				},
+			});
+
+			// Log what was sent
+			const input = capturedPayload?.input as any[];
+			const functionCalls = input?.filter((item: any) => item.type === "function_call") || [];
+			const reasoningItems = input?.filter((item: any) => item.type === "reasoning") || [];
+
+			console.log("Payload sent to Codex:");
+			console.log("- function_calls:", functionCalls.length);
+			console.log("- reasoning items:", reasoningItems.length);
+			if (functionCalls.length > 0) {
+				console.log(
+					"- function_call IDs:",
+					functionCalls.map((fc: any) => fc.id),
+				);
+			}
+
+			// The key assertion: no 400 error
+			expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
+			expect(response.errorMessage).toBeFalsy();
+			expect(response.content.length).toBeGreaterThan(0);
+
+			// Verify the model understood the context
+			const responseText = response.content
+				.filter((b) => b.type === "text")
+				.map((b) => (b as any).text)
+				.join("");
+			expect(responseText).toContain("42");
+		});
+	},
+);