From d327b9c7681732917b4006f7f4386c6b7deeb0d3 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Thu, 22 Jan 2026 00:58:49 +0100 Subject: [PATCH] fix(ai): handle same-provider different-model handoff in OpenAI Responses API When switching between OpenAI models (e.g., gpt-5-mini to gpt-5.2-codex), function_call IDs with fc_ prefix trigger pairing validation errors because OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items. The fix omits the id field for function_calls from different models, which avoids the pairing validation while keeping call_id for matching with function_call_output. Fixes #886 --- packages/ai/CHANGELOG.md | 4 + packages/ai/src/providers/openai-responses.ts | 23 +- ...nai-responses-reasoning-replay-e2e.test.ts | 394 +++++++++++------- 3 files changed, 279 insertions(+), 142 deletions(-) diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index cca9b6e7..f29d7d32 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -7,6 +7,10 @@ - Added `headers` option to `StreamOptions` for custom HTTP headers in API requests. Supported by all providers except Amazon Bedrock (which uses AWS SDK auth). Headers are merged with provider defaults and `model.headers`, with `options.headers` taking precedence. - Added `originator` option to `loginOpenAICodex()` for custom OAuth client identification +### Fixed + +- Fixed OpenAI Responses API 400 error "function_call without required reasoning item" when switching between models (same provider, different model). The fix omits the `id` field for function_calls from different models to avoid triggering OpenAI's reasoning/function_call pairing validation ([#886](https://github.com/badlogic/pi-mono/issues/886)) + ## [0.49.2] - 2026-01-19 ### Added diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts index 5e9ce8bf..a68bda11 100644 --- a/packages/ai/src/providers/openai-responses.ts +++ b/packages/ai/src/providers/openai-responses.ts @@ -488,6 +488,15 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re } } else if (msg.role === "assistant") { const output: ResponseInput = []; + const assistantMsg = msg as AssistantMessage; + + // Check if this message is from a different model (same provider, different model ID). + // For such messages, tool call IDs with fc_ prefix need to be stripped to avoid + // OpenAI's reasoning/function_call pairing validation errors. + const isDifferentModel = + assistantMsg.model !== model.id && + assistantMsg.provider === model.provider && + assistantMsg.api === model.api; for (const block of msg.content) { if (block.type === "thinking") { @@ -513,10 +522,20 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re } satisfies ResponseOutputMessage); } else if (block.type === "toolCall") { const toolCall = block as ToolCall; + const callId = toolCall.id.split("|")[0]; + let itemId: string | undefined = toolCall.id.split("|")[1]; + + // For different-model messages, set id to undefined to avoid pairing validation. + // OpenAI tracks which fc_xxx IDs were paired with rs_xxx reasoning items. + // By omitting the id, we avoid triggering that validation (like cross-provider does). + if (isDifferentModel && itemId?.startsWith("fc_")) { + itemId = undefined; + } + output.push({ type: "function_call", - id: toolCall.id.split("|")[1], - call_id: toolCall.id.split("|")[0], + id: itemId, + call_id: callId, name: toolCall.name, arguments: JSON.stringify(toolCall.arguments), }); diff --git a/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts b/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts index 181ddb23..4d6899d4 100644 --- a/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts +++ b/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts @@ -2,7 +2,7 @@ import { Type } from "@sinclair/typebox"; import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; import { complete, getEnvApiKey } from "../src/stream.js"; -import type { AssistantMessage, Context, Message, ThinkingContent, Tool, ToolCall } from "../src/types.js"; +import type { AssistantMessage, Context, Message, Tool, ToolCall } from "../src/types.js"; const testToolSchema = Type.Object({ value: Type.Number({ description: "A number to double" }), @@ -14,165 +14,279 @@ const testTool: Tool = { parameters: testToolSchema, }; -describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses reasoning replay e2e", () => { - it("skips reasoning-only history after an aborted turn", { retry: 2 }, async () => { - const model = getModel("openai", "gpt-5-mini"); +describe.skipIf(!process.env.OPENAI_API_KEY || !process.env.ANTHROPIC_API_KEY)( + "OpenAI Responses reasoning replay e2e", + () => { + it("skips reasoning-only history after an aborted turn", { retry: 2 }, async () => { + const model = getModel("openai", "gpt-5-mini"); - const apiKey = getEnvApiKey("openai"); - if (!apiKey) { - throw new Error("Missing OPENAI_API_KEY"); - } + const apiKey = getEnvApiKey("openai"); + if (!apiKey) { + throw new Error("Missing OPENAI_API_KEY"); + } - const userMessage: Message = { - role: "user", - content: "Use the double_number tool to double 21.", - timestamp: Date.now(), - }; + const userMessage: Message = { + role: "user", + content: "Use the double_number tool to double 21.", + timestamp: Date.now(), + }; - const assistantResponse = await complete( - model, - { - systemPrompt: "You are a helpful assistant. Use the tool.", - messages: [userMessage], + const assistantResponse = await complete( + model, + { + systemPrompt: "You are a helpful assistant. Use the tool.", + messages: [userMessage], + tools: [testTool], + }, + { + apiKey, + reasoningEffort: "high", + }, + ); + + const thinkingBlock = assistantResponse.content.find( + (block) => block.type === "thinking" && block.thinkingSignature, + ); + if (!thinkingBlock || thinkingBlock.type !== "thinking") { + throw new Error("Missing thinking signature from OpenAI Responses"); + } + + const corruptedAssistant: AssistantMessage = { + ...assistantResponse, + content: [thinkingBlock], + stopReason: "aborted", + }; + + const followUp: Message = { + role: "user", + content: "Say hello to confirm you can continue.", + timestamp: Date.now(), + }; + + const context: Context = { + systemPrompt: "You are a helpful assistant.", + messages: [userMessage, corruptedAssistant, followUp], tools: [testTool], - }, - { + }; + + const response = await complete(model, context, { apiKey, reasoningEffort: "high", - }, - ); + }); - const thinkingBlock = assistantResponse.content.find( - (block) => block.type === "thinking" && block.thinkingSignature, - ); - if (!thinkingBlock || thinkingBlock.type !== "thinking") { - throw new Error("Missing thinking signature from OpenAI Responses"); - } - - const corruptedAssistant: AssistantMessage = { - ...assistantResponse, - content: [thinkingBlock], - stopReason: "aborted", - }; - - const followUp: Message = { - role: "user", - content: "Say hello to confirm you can continue.", - timestamp: Date.now(), - }; - - const context: Context = { - systemPrompt: "You are a helpful assistant.", - messages: [userMessage, corruptedAssistant, followUp], - tools: [testTool], - }; - - const response = await complete(model, context, { - apiKey, - reasoningEffort: "high", + // The key assertion: no 400 error from orphaned reasoning item + expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error"); + expect(response.errorMessage).toBeFalsy(); + // Model should respond (text or tool call) + expect(response.content.length).toBeGreaterThan(0); }); - // The key assertion: no 400 error from orphaned reasoning item - expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error"); - expect(response.errorMessage).toBeFalsy(); - // Model should respond (text or tool call) - expect(response.content.length).toBeGreaterThan(0); - }); + it("handles same-provider different-model handoff with tool calls", { retry: 2 }, async () => { + // This tests the scenario where: + // 1. Model A (gpt-5-mini) generates reasoning + function_call + // 2. User switches to Model B (gpt-5.2-codex) - same provider, different model + // 3. transform-messages: isSameModel=false, thinking converted to text + // 4. But tool call ID still has OpenAI pairing history (fc_xxx paired with rs_xxx) + // 5. Without fix: OpenAI returns 400 "function_call without required reasoning item" + // 6. With fix: tool calls/results converted to text, conversation continues - it("drops orphaned tool calls when reasoning signature is missing", { retry: 2 }, async () => { - // This tests the scenario where: - // 1. A completed turn has reasoning + function_call - // 2. The thinking signature gets lost (e.g., cross-provider handoff, isSameModel=false filtering) - // 3. The toolCall remains but reasoning is gone - // 4. Without the fix: Azure/OpenAI returns 400 "function_call without required reasoning item" - // 5. With the fix: orphaned toolCalls are dropped, conversation continues + const modelA = getModel("openai", "gpt-5-mini"); + const modelB = getModel("openai", "gpt-5.2-codex"); - const model = getModel("openai", "gpt-5-mini"); + const apiKey = getEnvApiKey("openai"); + if (!apiKey) { + throw new Error("Missing OPENAI_API_KEY"); + } - const apiKey = getEnvApiKey("openai"); - if (!apiKey) { - throw new Error("Missing OPENAI_API_KEY"); - } + const userMessage: Message = { + role: "user", + content: "Use the double_number tool to double 21.", + timestamp: Date.now(), + }; - const userMessage: Message = { - role: "user", - content: "Use the double_number tool to double 21.", - timestamp: Date.now(), - }; + // Get a real response from Model A with reasoning + tool call + const assistantResponse = await complete( + modelA, + { + systemPrompt: "You are a helpful assistant. Always use the tool when asked.", + messages: [userMessage], + tools: [testTool], + }, + { + apiKey, + reasoningEffort: "high", + }, + ); - // Get a real response with reasoning + tool call - const assistantResponse = await complete( - model, - { - systemPrompt: "You are a helpful assistant. Always use the tool when asked.", - messages: [userMessage], + const toolCallBlock = assistantResponse.content.find((block) => block.type === "toolCall") as + | ToolCall + | undefined; + + if (!toolCallBlock) { + throw new Error("Missing tool call from OpenAI Responses - model did not use the tool"); + } + + // Provide a tool result + const toolResult: Message = { + role: "toolResult", + toolCallId: toolCallBlock.id, + toolName: toolCallBlock.name, + content: [{ type: "text", text: "42" }], + isError: false, + timestamp: Date.now(), + }; + + const followUp: Message = { + role: "user", + content: "What was the result? Answer with just the number.", + timestamp: Date.now(), + }; + + // Now continue with Model B (different model, same provider) + const context: Context = { + systemPrompt: "You are a helpful assistant. Answer concisely.", + messages: [userMessage, assistantResponse, toolResult, followUp], tools: [testTool], - }, - { + }; + + let capturedPayload: any = null; + const response = await complete(modelB, context, { apiKey, reasoningEffort: "high", - }, - ); + onPayload: (payload) => { + capturedPayload = payload; + }, + }); - const thinkingBlock = assistantResponse.content.find( - (block) => block.type === "thinking" && block.thinkingSignature, - ) as ThinkingContent | undefined; - const toolCallBlock = assistantResponse.content.find((block) => block.type === "toolCall") as - | ToolCall - | undefined; + // The key assertion: no 400 error from orphaned function_call + expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error"); + expect(response.errorMessage).toBeFalsy(); + expect(response.content.length).toBeGreaterThan(0); - if (!thinkingBlock) { - throw new Error("Missing thinking block from OpenAI Responses"); - } - if (!toolCallBlock) { - throw new Error("Missing tool call from OpenAI Responses - model did not use the tool"); - } + // Log what was sent for debugging + const input = capturedPayload?.input as any[]; + const functionCalls = input?.filter((item: any) => item.type === "function_call") || []; + const reasoningItems = input?.filter((item: any) => item.type === "reasoning") || []; - // Simulate corruption: keep toolCall but strip thinkingSignature - // This mimics what happens when isSameModel=false and thinking text is empty - const corruptedThinking: ThinkingContent = { - type: "thinking", - thinking: thinkingBlock.thinking, - // thinkingSignature intentionally omitted - simulates it being lost - }; + console.log("Payload sent to API:"); + console.log("- function_calls:", functionCalls.length); + console.log("- reasoning items:", reasoningItems.length); + console.log("- full input:", JSON.stringify(input, null, 2)); - const corruptedAssistant: AssistantMessage = { - ...assistantResponse, - content: [corruptedThinking, toolCallBlock], - stopReason: "toolUse", // Completed successfully, not aborted - }; - - // Provide a tool result to continue the conversation - const toolResult: Message = { - role: "toolResult", - toolCallId: toolCallBlock.id, - toolName: toolCallBlock.name, - content: [{ type: "text", text: "42" }], - isError: false, - timestamp: Date.now(), - }; - - const followUp: Message = { - role: "user", - content: "What was the result?", - timestamp: Date.now(), - }; - - const context: Context = { - systemPrompt: "You are a helpful assistant.", - messages: [userMessage, corruptedAssistant, toolResult, followUp], - tools: [testTool], - }; - - const response = await complete(model, context, { - apiKey, - reasoningEffort: "high", + // Verify the model understood the context + const responseText = response.content + .filter((b) => b.type === "text") + .map((b) => (b as any).text) + .join(""); + expect(responseText).toContain("42"); }); - // The key assertion: no 400 error from orphaned function_call - // Error would be: "function_call was provided without its required reasoning item" - expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error"); - expect(response.errorMessage).toBeFalsy(); - expect(response.content.length).toBeGreaterThan(0); - }); -}); + it("handles cross-provider handoff from Anthropic to OpenAI Codex", { retry: 2 }, async () => { + // This tests cross-provider handoff: + // 1. Anthropic model generates thinking + function_call (toolu_xxx ID) + // 2. User switches to OpenAI Codex + // 3. transform-messages: isSameModel=false, thinking converted to text + // 4. Tool call ID is Anthropic format (toolu_xxx), no OpenAI pairing history + // 5. Should work because foreign IDs have no pairing expectation + + const anthropicModel = getModel("anthropic", "claude-sonnet-4-5"); + const codexModel = getModel("openai", "gpt-5.2-codex"); + + const anthropicApiKey = getEnvApiKey("anthropic"); + const openaiApiKey = getEnvApiKey("openai"); + if (!anthropicApiKey || !openaiApiKey) { + throw new Error("Missing API keys"); + } + + const userMessage: Message = { + role: "user", + content: "Use the double_number tool to double 21.", + timestamp: Date.now(), + }; + + // Get a real response from Anthropic with thinking + tool call + const assistantResponse = await complete( + anthropicModel, + { + systemPrompt: "You are a helpful assistant. Always use the tool when asked.", + messages: [userMessage], + tools: [testTool], + }, + { + apiKey: anthropicApiKey, + thinkingEnabled: true, + thinkingBudgetTokens: 5000, + }, + ); + + const toolCallBlock = assistantResponse.content.find((block) => block.type === "toolCall") as + | ToolCall + | undefined; + + if (!toolCallBlock) { + throw new Error("Missing tool call from Anthropic - model did not use the tool"); + } + + console.log("Anthropic tool call ID:", toolCallBlock.id); + + // Provide a tool result + const toolResult: Message = { + role: "toolResult", + toolCallId: toolCallBlock.id, + toolName: toolCallBlock.name, + content: [{ type: "text", text: "42" }], + isError: false, + timestamp: Date.now(), + }; + + const followUp: Message = { + role: "user", + content: "What was the result? Answer with just the number.", + timestamp: Date.now(), + }; + + // Now continue with Codex (different provider) + const context: Context = { + systemPrompt: "You are a helpful assistant. Answer concisely.", + messages: [userMessage, assistantResponse, toolResult, followUp], + tools: [testTool], + }; + + let capturedPayload: any = null; + const response = await complete(codexModel, context, { + apiKey: openaiApiKey, + reasoningEffort: "high", + onPayload: (payload) => { + capturedPayload = payload; + }, + }); + + // Log what was sent + const input = capturedPayload?.input as any[]; + const functionCalls = input?.filter((item: any) => item.type === "function_call") || []; + const reasoningItems = input?.filter((item: any) => item.type === "reasoning") || []; + + console.log("Payload sent to Codex:"); + console.log("- function_calls:", functionCalls.length); + console.log("- reasoning items:", reasoningItems.length); + if (functionCalls.length > 0) { + console.log( + "- function_call IDs:", + functionCalls.map((fc: any) => fc.id), + ); + } + + // The key assertion: no 400 error + expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error"); + expect(response.errorMessage).toBeFalsy(); + expect(response.content.length).toBeGreaterThan(0); + + // Verify the model understood the context + const responseText = response.content + .filter((b) => b.type === "text") + .map((b) => (b as any).text) + .join(""); + expect(responseText).toContain("42"); + }); + }, +);