From 2d27a2c7285feba21699cadec6beadb6f095b5b6 Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Mon, 19 Jan 2026 15:55:18 +0100
Subject: [PATCH] fix(ai): skip errored/aborted assistant messages in
 transform-messages

Fixes OpenAI Responses 400 error 'reasoning without following item' by
skipping errored/aborted assistant messages entirely rather than filtering
at the provider level. This covers openai-responses, openai-codex-responses,
and future providers.

Removes strictResponsesPairing compat option (no longer needed).

Closes #838
---
 packages/ai/CHANGELOG.md                      | 10 ++-
 packages/ai/README.md                         |  2 +-
 .../src/providers/openai-codex-responses.ts   |  4 +-
 packages/ai/src/providers/openai-responses.ts | 23 +-----
 .../ai/src/providers/transform-messages.ts    | 26 +++---
 packages/ai/src/types.ts                      |  3 +-
 ...nai-responses-reasoning-replay-e2e.test.ts | 81 +++++++++++++++++++
 packages/coding-agent/CHANGELOG.md            |  2 +-
 packages/coding-agent/README.md               |  8 --
 .../coding-agent/src/core/model-registry.ts   |  2 +-
 10 files changed, 109 insertions(+), 52 deletions(-)
 create mode 100644 packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts

diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md
index 8eab3c78..47e53379 100644
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@@ -2,11 +2,19 @@
 
 ## [Unreleased]
 
+### Fixed
+
+- Fixed OpenAI Responses 400 error "reasoning without following item" by skipping errored/aborted assistant messages entirely in transform-messages.ts ([#838](https://github.com/badlogic/pi-mono/pull/838))
+
+### Removed
+
+- Removed `strictResponsesPairing` compat option (no longer needed after the transform-messages fix)
+
 ## [0.49.1] - 2026-01-18
 
 ### Added
 
-- Added `OpenAIResponsesCompat` interface with `strictResponsesPairing` option for Azure OpenAI Responses API, which requires strict reasoning/message pairing in history replay ([#768](https://github.com/badlogic/pi-mono/pull/768) by [@nicobako](https://github.com/nicobako))
+- Added `OpenAIResponsesCompat` interface with `strictResponsesPairing` option for Azure OpenAI Responses API, which requires strict reasoning/message pairing in history replay ([#768](https://github.com/badlogic/pi-mono/pull/768) by [@prateekmedia](https://github.com/prateekmedia))
 
 ### Changed
 
diff --git a/packages/ai/README.md b/packages/ai/README.md
index c8347acf..c033da38 100644
--- a/packages/ai/README.md
+++ b/packages/ai/README.md
@@ -729,7 +729,7 @@ interface OpenAICompletionsCompat {
 }
 
 interface OpenAIResponsesCompat {
-  strictResponsesPairing?: boolean; // Enforce strict reasoning/message pairing for OpenAI Responses history replay on providers like Azure (default: false)
+  // Reserved for future use
 }
 ```
 
diff --git a/packages/ai/src/providers/openai-codex-responses.ts b/packages/ai/src/providers/openai-codex-responses.ts
index 5c3f55f3..fcfed917 100644
--- a/packages/ai/src/providers/openai-codex-responses.ts
+++ b/packages/ai/src/providers/openai-codex-responses.ts
@@ -329,7 +329,7 @@ function convertAssistantMessage(msg: AssistantMessage): unknown[] {
 	const output: unknown[] = [];
 
 	for (const block of msg.content) {
-		if (block.type === "thinking" && msg.stopReason !== "error" && block.thinkingSignature) {
+		if (block.type === "thinking" && block.thinkingSignature) {
 			output.push(JSON.parse(block.thinkingSignature));
 		} else if (block.type === "text") {
 			output.push({
@@ -338,7 +338,7 @@ function convertAssistantMessage(msg: AssistantMessage): unknown[] {
 				content: [{ type: "output_text", text: sanitizeSurrogates(block.text), annotations: [] }],
 				status: "completed",
 			});
-		} else if (block.type === "toolCall" && msg.stopReason !== "error") {
+		} else if (block.type === "toolCall") {
 			const [callId, id] = block.id.split("|");
 			output.push({
 				type: "function_call",
diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts
index d685126d..2cbb9dc5 100644
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@@ -478,22 +478,9 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
 			}
 		} else if (msg.role === "assistant") {
 			const output: ResponseInput = [];
-			const strictResponsesPairing = model.compat?.strictResponsesPairing ?? false;
-			let isIncomplete = false;
-			let shouldReplayReasoning = msg.stopReason !== "error";
-			let allowToolCalls = msg.stopReason !== "error";
-			if (strictResponsesPairing) {
-				isIncomplete = msg.stopReason === "error" || msg.stopReason === "aborted";
-				const hasPairedContent = msg.content.some(
-					(b) => b.type === "toolCall" || (b.type === "text" && (b as TextContent).text.trim().length > 0),
-				);
-				shouldReplayReasoning = !isIncomplete && hasPairedContent;
-				allowToolCalls = !isIncomplete;
-			}
 
 			for (const block of msg.content) {
-				// Do not submit thinking blocks if the completion had an error (i.e. abort)
-				if (block.type === "thinking" && shouldReplayReasoning) {
+				if (block.type === "thinking") {
 					if (block.thinkingSignature) {
 						const reasoningItem = JSON.parse(block.thinkingSignature);
 						output.push(reasoningItem);
@@ -504,11 +491,6 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
 					let msgId = textBlock.textSignature;
 					if (!msgId) {
 						msgId = `msg_${msgIndex}`;
-					}
-					// For incomplete turns, never replay the original message id (if any).
-					// Generate a stable synthetic id so strict pairing providers do not expect a paired reasoning item.
-					if (strictResponsesPairing && isIncomplete) {
-						msgId = `msg_${msgIndex}_${shortHash(textBlock.text)}`;
 					} else if (msgId.length > 64) {
 						msgId = `msg_${shortHash(msgId)}`;
 					}
@@ -519,8 +501,7 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
 						status: "completed",
 						id: msgId,
 					} satisfies ResponseOutputMessage);
-					// Do not submit toolcall blocks if the completion had an error (i.e. abort)
-				} else if (block.type === "toolCall" && allowToolCalls) {
+				} else if (block.type === "toolCall") {
 					const toolCall = block as ToolCall;
 					output.push({
 						type: "function_call",
diff --git a/packages/ai/src/providers/transform-messages.ts b/packages/ai/src/providers/transform-messages.ts
index 22cc35cb..f4e37b35 100644
--- a/packages/ai/src/providers/transform-messages.ts
+++ b/packages/ai/src/providers/transform-messages.ts
@@ -118,27 +118,23 @@ export function transformMessages<TApi extends Api>(
 				existingToolResultIds = new Set();
 			}
 
-			// Track tool calls from this assistant message
-			// Don't track tool calls from errored messages - they will be dropped by
-			// provider-specific converters, so we shouldn't create synthetic results for them
+			// Skip errored/aborted assistant messages entirely.
+			// These are incomplete turns that shouldn't be replayed:
+			// - May have partial content (reasoning without message, incomplete tool calls)
+			// - Replaying them can cause API errors (e.g., OpenAI "reasoning without following item")
+			// - The model should retry from the last valid state
 			const assistantMsg = msg as AssistantMessage;
-			const toolCalls =
-				assistantMsg.stopReason === "error"
-					? []
-					: (assistantMsg.content.filter((b) => b.type === "toolCall") as ToolCall[]);
+			if (assistantMsg.stopReason === "error" || assistantMsg.stopReason === "aborted") {
+				continue;
+			}
+
+			// Track tool calls from this assistant message
+			const toolCalls = assistantMsg.content.filter((b) => b.type === "toolCall") as ToolCall[];
 			if (toolCalls.length > 0) {
 				pendingToolCalls = toolCalls;
 				existingToolResultIds = new Set();
 			}
 
-			// Skip empty assistant messages (no content and no tool calls)
-			// This handles error responses (e.g., 429/500) that produced no content
-			// All providers already filter these in convertMessages, but we do it here
-			// centrally to prevent issues with the tool_use -> tool_result chain
-			if (assistantMsg.content.length === 0 && toolCalls.length === 0) {
-				continue;
-			}
-
 			result.push(msg);
 		} else if (msg.role === "toolResult") {
 			existingToolResultIds.add(msg.toolCallId);
diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts
index 2e32601a..d64f3fd1 100644
--- a/packages/ai/src/types.ts
+++ b/packages/ai/src/types.ts
@@ -236,8 +236,7 @@ export interface OpenAICompletionsCompat {
 
 /** Compatibility settings for OpenAI Responses APIs. */
 export interface OpenAIResponsesCompat {
-	/** Whether OpenAI Responses history replay requires strict reasoning/message pairing (for providers like Azure). */
-	strictResponsesPairing?: boolean;
+	// Reserved for future use
 }
 
 // Model interface for the unified model system
diff --git a/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts b/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts
new file mode 100644
index 00000000..114da76e
--- /dev/null
+++ b/packages/ai/test/openai-responses-reasoning-replay-e2e.test.ts
@@ -0,0 +1,81 @@
+import { Type } from "@sinclair/typebox";
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { complete, getEnvApiKey } from "../src/stream.js";
+import type { AssistantMessage, Context, Message, Tool } from "../src/types.js";
+
+const testToolSchema = Type.Object({
+	value: Type.Number({ description: "A number to double" }),
+});
+
+const testTool: Tool<typeof testToolSchema> = {
+	name: "double_number",
+	description: "Doubles a number and returns the result",
+	parameters: testToolSchema,
+};
+
+describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses reasoning replay e2e", () => {
+	it("skips reasoning-only history after an aborted turn", { retry: 2 }, async () => {
+		const model = getModel("openai", "gpt-5-mini");
+
+		const apiKey = getEnvApiKey("openai");
+		if (!apiKey) {
+			throw new Error("Missing OPENAI_API_KEY");
+		}
+
+		const userMessage: Message = {
+			role: "user",
+			content: "Use the double_number tool to double 21.",
+			timestamp: Date.now(),
+		};
+
+		const assistantResponse = await complete(
+			model,
+			{
+				systemPrompt: "You are a helpful assistant. Use the tool.",
+				messages: [userMessage],
+				tools: [testTool],
+			},
+			{
+				apiKey,
+				reasoningEffort: "high",
+			},
+		);
+
+		const thinkingBlock = assistantResponse.content.find(
+			(block) => block.type === "thinking" && block.thinkingSignature,
+		);
+		if (!thinkingBlock || thinkingBlock.type !== "thinking") {
+			throw new Error("Missing thinking signature from OpenAI Responses");
+		}
+
+		const corruptedAssistant: AssistantMessage = {
+			...assistantResponse,
+			content: [thinkingBlock],
+			stopReason: "aborted",
+		};
+
+		const followUp: Message = {
+			role: "user",
+			content: "Say hello to confirm you can continue.",
+			timestamp: Date.now(),
+		};
+
+		const context: Context = {
+			systemPrompt: "You are a helpful assistant.",
+			messages: [userMessage, corruptedAssistant, followUp],
+			tools: [testTool],
+		};
+
+		const response = await complete(model, context, {
+			apiKey,
+			reasoningEffort: "high",
+		});
+
+		// The key assertion: no 400 error from orphaned reasoning item
+		expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
+		expect(response.errorMessage).toBeFalsy();
+		// Model should respond (text or tool call)
+		expect(response.content.length).toBeGreaterThan(0);
+	});
+});
diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md
index 708caeed..abb284b5 100644
--- a/packages/coding-agent/CHANGELOG.md
+++ b/packages/coding-agent/CHANGELOG.md
@@ -14,7 +14,7 @@
 
 ### Added
 
-- Added `strictResponsesPairing` compat option for custom OpenAI Responses models on Azure ([#768](https://github.com/badlogic/pi-mono/pull/768) by [@nicobako](https://github.com/nicobako))
+- Added `strictResponsesPairing` compat option for custom OpenAI Responses models on Azure ([#768](https://github.com/badlogic/pi-mono/pull/768) by [@prateekmedia](https://github.com/prateekmedia))
 - Session selector (`/resume`) now supports path display toggle (`Ctrl+P`) and session deletion (`Ctrl+D`) with inline confirmation ([#816](https://github.com/badlogic/pi-mono/pull/816) by [@w-winter](https://github.com/w-winter))
 - Added undo support in interactive mode with Ctrl+- hotkey. ([#831](https://github.com/badlogic/pi-mono/pull/831) by [@Perlence](https://github.com/Perlence))
 
diff --git a/packages/coding-agent/README.md b/packages/coding-agent/README.md
index 70fe6f0e..b491575a 100644
--- a/packages/coding-agent/README.md
+++ b/packages/coding-agent/README.md
@@ -751,14 +751,6 @@ To fully replace a built-in provider with custom models, include the `models` ar
 | `supportsUsageInStreaming` | Whether provider supports `stream_options: { include_usage: true }`. Default: `true` |
 | `maxTokensField` | Use `max_completion_tokens` or `max_tokens` |
 
-**OpenAI Responses (`openai-responses`):**
-
-| Field | Description |
-|-------|-------------|
-| `strictResponsesPairing` | Enforce strict reasoning/message pairing when replaying OpenAI Responses history on providers like Azure (default: `false`) |
-
-If you see 400 errors like "item of type 'reasoning' was provided without its required following item" or "message/function_call was provided without its required reasoning item", set `compat.strictResponsesPairing: true` on the affected model in `models.json`.
-
 **Live reload:** The file reloads each time you open `/model`. Edit during session; no restart needed.
 
 **Model selection priority:**
diff --git a/packages/coding-agent/src/core/model-registry.ts b/packages/coding-agent/src/core/model-registry.ts
index b2010e95..5ccec0d7 100644
--- a/packages/coding-agent/src/core/model-registry.ts
+++ b/packages/coding-agent/src/core/model-registry.ts
@@ -28,7 +28,7 @@ const OpenAICompletionsCompatSchema = Type.Object({
 });
 
 const OpenAIResponsesCompatSchema = Type.Object({
-	strictResponsesPairing: Type.Optional(Type.Boolean()),
+	// Reserved for future use
 });
 
 const OpenAICompatSchema = Type.Union([OpenAICompletionsCompatSchema, OpenAIResponsesCompatSchema]);