From 2d27a2c7285feba21699cadec6beadb6f095b5b6 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Mon, 19 Jan 2026 15:55:18 +0100 Subject: [PATCH] fix(ai): skip errored/aborted assistant messages in transform-messages Fixes OpenAI Responses 400 error 'reasoning without following item' by skipping errored/aborted assistant messages entirely rather than filtering at the provider level. This covers openai-responses, openai-codex-responses, and future providers. Removes strictResponsesPairing compat option (no longer needed). Closes #838 --- packages/ai/CHANGELOG.md | 10 ++- packages/ai/README.md | 2 +- .../src/providers/openai-codex-responses.ts | 4 +- packages/ai/src/providers/openai-responses.ts | 23 +----- .../ai/src/providers/transform-messages.ts | 26 +++--- packages/ai/src/types.ts | 3 +- ...nai-responses-reasoning-replay-e2e.test.ts | 81 +++++++++++++++++++ packages/coding-agent/CHANGELOG.md | 2 +- packages/coding-agent/README.md | 8 -- .../coding-agent/src/core/model-registry.ts | 2 +- 10 files changed, 109 insertions(+), 52 deletions(-) create mode 100644 packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 8eab3c78..47e53379 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -2,11 +2,19 @@ ## [Unreleased] +### Fixed + +- Fixed OpenAI Responses 400 error "reasoning without following item" by skipping errored/aborted assistant messages entirely in transform-messages.ts ([#838](https://github.com/badlogic/pi-mono/pull/838)) + +### Removed + +- Removed `strictResponsesPairing` compat option (no longer needed after the transform-messages fix) + ## [0.49.1] - 2026-01-18 ### Added -- Added `OpenAIResponsesCompat` interface with `strictResponsesPairing` option for Azure OpenAI Responses API, which requires strict reasoning/message pairing in history replay ([#768](https://github.com/badlogic/pi-mono/pull/768) by [@nicobako](https://github.com/nicobako)) +- Added `OpenAIResponsesCompat` interface with `strictResponsesPairing` option for Azure OpenAI Responses API, which requires strict reasoning/message pairing in history replay ([#768](https://github.com/badlogic/pi-mono/pull/768) by [@prateekmedia](https://github.com/prateekmedia)) ### Changed diff --git a/packages/ai/README.md b/packages/ai/README.md index c8347acf..c033da38 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -729,7 +729,7 @@ interface OpenAICompletionsCompat { } interface OpenAIResponsesCompat { - strictResponsesPairing?: boolean; // Enforce strict reasoning/message pairing for OpenAI Responses history replay on providers like Azure (default: false) + // Reserved for future use } ``` diff --git a/packages/ai/src/providers/openai-codex-responses.ts b/packages/ai/src/providers/openai-codex-responses.ts index 5c3f55f3..fcfed917 100644 --- a/packages/ai/src/providers/openai-codex-responses.ts +++ b/packages/ai/src/providers/openai-codex-responses.ts @@ -329,7 +329,7 @@ function convertAssistantMessage(msg: AssistantMessage): unknown[] { const output: unknown[] = []; for (const block of msg.content) { - if (block.type === "thinking" && msg.stopReason !== "error" && block.thinkingSignature) { + if (block.type === "thinking" && block.thinkingSignature) { output.push(JSON.parse(block.thinkingSignature)); } else if (block.type === "text") { output.push({ @@ -338,7 +338,7 @@ function convertAssistantMessage(msg: AssistantMessage): unknown[] { content: [{ type: "output_text", text: sanitizeSurrogates(block.text), annotations: [] }], status: "completed", }); - } else if (block.type === "toolCall" && msg.stopReason !== "error") { + } else if (block.type === "toolCall") { const [callId, id] = block.id.split("|"); output.push({ type: "function_call", diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts index d685126d..2cbb9dc5 100644 --- a/packages/ai/src/providers/openai-responses.ts +++ b/packages/ai/src/providers/openai-responses.ts @@ -478,22 +478,9 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re } } else if (msg.role === "assistant") { const output: ResponseInput = []; - const strictResponsesPairing = model.compat?.strictResponsesPairing ?? false; - let isIncomplete = false; - let shouldReplayReasoning = msg.stopReason !== "error"; - let allowToolCalls = msg.stopReason !== "error"; - if (strictResponsesPairing) { - isIncomplete = msg.stopReason === "error" || msg.stopReason === "aborted"; - const hasPairedContent = msg.content.some( - (b) => b.type === "toolCall" || (b.type === "text" && (b as TextContent).text.trim().length > 0), - ); - shouldReplayReasoning = !isIncomplete && hasPairedContent; - allowToolCalls = !isIncomplete; - } for (const block of msg.content) { - // Do not submit thinking blocks if the completion had an error (i.e. abort) - if (block.type === "thinking" && shouldReplayReasoning) { + if (block.type === "thinking") { if (block.thinkingSignature) { const reasoningItem = JSON.parse(block.thinkingSignature); output.push(reasoningItem); @@ -504,11 +491,6 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re let msgId = textBlock.textSignature; if (!msgId) { msgId = `msg_${msgIndex}`; - } - // For incomplete turns, never replay the original message id (if any). - // Generate a stable synthetic id so strict pairing providers do not expect a paired reasoning item. - if (strictResponsesPairing && isIncomplete) { - msgId = `msg_${msgIndex}_${shortHash(textBlock.text)}`; } else if (msgId.length > 64) { msgId = `msg_${shortHash(msgId)}`; } @@ -519,8 +501,7 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re status: "completed", id: msgId, } satisfies ResponseOutputMessage); - // Do not submit toolcall blocks if the completion had an error (i.e. abort) - } else if (block.type === "toolCall" && allowToolCalls) { + } else if (block.type === "toolCall") { const toolCall = block as ToolCall; output.push({ type: "function_call", diff --git a/packages/ai/src/providers/transform-messages.ts b/packages/ai/src/providers/transform-messages.ts index 22cc35cb..f4e37b35 100644 --- a/packages/ai/src/providers/transform-messages.ts +++ b/packages/ai/src/providers/transform-messages.ts @@ -118,27 +118,23 @@ export function transformMessages( existingToolResultIds = new Set(); } - // Track tool calls from this assistant message - // Don't track tool calls from errored messages - they will be dropped by - // provider-specific converters, so we shouldn't create synthetic results for them + // Skip errored/aborted assistant messages entirely. + // These are incomplete turns that shouldn't be replayed: + // - May have partial content (reasoning without message, incomplete tool calls) + // - Replaying them can cause API errors (e.g., OpenAI "reasoning without following item") + // - The model should retry from the last valid state const assistantMsg = msg as AssistantMessage; - const toolCalls = - assistantMsg.stopReason === "error" - ? [] - : (assistantMsg.content.filter((b) => b.type === "toolCall") as ToolCall[]); + if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") { + continue; + } + + // Track tool calls from this assistant message + const toolCalls = assistantMsg.content.filter((b) => b.type === "toolCall") as ToolCall[]; if (toolCalls.length > 0) { pendingToolCalls = toolCalls; existingToolResultIds = new Set(); } - // Skip empty assistant messages (no content and no tool calls) - // This handles error responses (e.g., 429/500) that produced no content - // All providers already filter these in convertMessages, but we do it here - // centrally to prevent issues with the tool_use -> tool_result chain - if (assistantMsg.content.length === 0 && toolCalls.length === 0) { - continue; - } - result.push(msg); } else if (msg.role === "toolResult") { existingToolResultIds.add(msg.toolCallId); diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index 2e32601a..d64f3fd1 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -236,8 +236,7 @@ export interface OpenAICompletionsCompat { /** Compatibility settings for OpenAI Responses APIs. */ export interface OpenAIResponsesCompat { - /** Whether OpenAI Responses history replay requires strict reasoning/message pairing (for providers like Azure). */ - strictResponsesPairing?: boolean; + // Reserved for future use } // Model interface for the unified model system diff --git a/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts b/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts new file mode 100644 index 00000000..114da76e --- /dev/null +++ b/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts @@ -0,0 +1,81 @@ +import { Type } from "@sinclair/typebox"; +import { describe, expect, it } from "vitest"; +import { getModel } from "../src/models.js"; +import { complete, getEnvApiKey } from "../src/stream.js"; +import type { AssistantMessage, Context, Message, Tool } from "../src/types.js"; + +const testToolSchema = Type.Object({ + value: Type.Number({ description: "A number to double" }), +}); + +const testTool: Tool = { + name: "double_number", + description: "Doubles a number and returns the result", + parameters: testToolSchema, +}; + +describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses reasoning replay e2e", () => { + it("skips reasoning-only history after an aborted turn", { retry: 2 }, async () => { + const model = getModel("openai", "gpt-5-mini"); + + const apiKey = getEnvApiKey("openai"); + if (!apiKey) { + throw new Error("Missing OPENAI_API_KEY"); + } + + const userMessage: Message = { + role: "user", + content: "Use the double_number tool to double 21.", + timestamp: Date.now(), + }; + + const assistantResponse = await complete( + model, + { + systemPrompt: "You are a helpful assistant. Use the tool.", + messages: [userMessage], + tools: [testTool], + }, + { + apiKey, + reasoningEffort: "high", + }, + ); + + const thinkingBlock = assistantResponse.content.find( + (block) => block.type === "thinking" && block.thinkingSignature, + ); + if (!thinkingBlock || thinkingBlock.type !== "thinking") { + throw new Error("Missing thinking signature from OpenAI Responses"); + } + + const corruptedAssistant: AssistantMessage = { + ...assistantResponse, + content: [thinkingBlock], + stopReason: "aborted", + }; + + const followUp: Message = { + role: "user", + content: "Say hello to confirm you can continue.", + timestamp: Date.now(), + }; + + const context: Context = { + systemPrompt: "You are a helpful assistant.", + messages: [userMessage, corruptedAssistant, followUp], + tools: [testTool], + }; + + const response = await complete(model, context, { + apiKey, + reasoningEffort: "high", + }); + + // The key assertion: no 400 error from orphaned reasoning item + expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error"); + expect(response.errorMessage).toBeFalsy(); + // Model should respond (text or tool call) + expect(response.content.length).toBeGreaterThan(0); + }); +}); diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 708caeed..abb284b5 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -14,7 +14,7 @@ ### Added -- Added `strictResponsesPairing` compat option for custom OpenAI Responses models on Azure ([#768](https://github.com/badlogic/pi-mono/pull/768) by [@nicobako](https://github.com/nicobako)) +- Added `strictResponsesPairing` compat option for custom OpenAI Responses models on Azure ([#768](https://github.com/badlogic/pi-mono/pull/768) by [@prateekmedia](https://github.com/prateekmedia)) - Session selector (`/resume`) now supports path display toggle (`Ctrl+P`) and session deletion (`Ctrl+D`) with inline confirmation ([#816](https://github.com/badlogic/pi-mono/pull/816) by [@w-winter](https://github.com/w-winter)) - Added undo support in interactive mode with Ctrl+- hotkey. ([#831](https://github.com/badlogic/pi-mono/pull/831) by [@Perlence](https://github.com/Perlence)) diff --git a/packages/coding-agent/README.md b/packages/coding-agent/README.md index 70fe6f0e..b491575a 100644 --- a/packages/coding-agent/README.md +++ b/packages/coding-agent/README.md @@ -751,14 +751,6 @@ To fully replace a built-in provider with custom models, include the `models` ar | `supportsUsageInStreaming` | Whether provider supports `stream_options: { include_usage: true }`. Default: `true` | | `maxTokensField` | Use `max_completion_tokens` or `max_tokens` | -**OpenAI Responses (`openai-responses`):** - -| Field | Description | -|-------|-------------| -| `strictResponsesPairing` | Enforce strict reasoning/message pairing when replaying OpenAI Responses history on providers like Azure (default: `false`) | - -If you see 400 errors like "item of type 'reasoning' was provided without its required following item" or "message/function_call was provided without its required reasoning item", set `compat.strictResponsesPairing: true` on the affected model in `models.json`. - **Live reload:** The file reloads each time you open `/model`. Edit during session; no restart needed. **Model selection priority:** diff --git a/packages/coding-agent/src/core/model-registry.ts b/packages/coding-agent/src/core/model-registry.ts index b2010e95..5ccec0d7 100644 --- a/packages/coding-agent/src/core/model-registry.ts +++ b/packages/coding-agent/src/core/model-registry.ts @@ -28,7 +28,7 @@ const OpenAICompletionsCompatSchema = Type.Object({ }); const OpenAIResponsesCompatSchema = Type.Object({ - strictResponsesPairing: Type.Optional(Type.Boolean()), + // Reserved for future use }); const OpenAICompatSchema = Type.Union([OpenAICompletionsCompatSchema, OpenAIResponsesCompatSchema]);