From 36e774282d0a36d3aa21e13b07b7c56394876f56 Mon Sep 17 00:00:00 2001
From: butelo <xesgarcia@gmail.com>
Date: Sun, 4 Jan 2026 18:12:09 +0100
Subject: [PATCH] fix duplicated thinking tokens in chutes (#443)

Co-authored-by: xes garcia <xes.garcia@deus.ai>
---
 packages/ai/CHANGELOG.md                      |  1 +
 .../ai/src/providers/openai-completions.ts    | 48 +++++++++++--------
 2 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md
index c45dfaf4..3862b456 100644
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@@ -46,6 +46,7 @@
 ### Fixed
 
 - **OpenAI completions empty content blocks**: Empty text or thinking blocks in assistant messages are now filtered out before sending to the OpenAI completions API, preventing validation errors. ([#344](https://github.com/badlogic/pi-mono/pull/344) by [@default-anton](https://github.com/default-anton))
+- **Thinking token duplication**: Fixed thinking content duplication with chutes.ai provider. The provider was returning thinking content in both `reasoning_content` and `reasoning` fields, causing each chunk to be processed twice. Now only the first non-empty reasoning field is used.
 - **zAi provider API mapping**: Fixed zAi models to use `openai-completions` API with correct base URL (`https://api.z.ai/api/coding/paas/v4`) instead of incorrect Anthropic API mapping. ([#344](https://github.com/badlogic/pi-mono/pull/344), [#358](https://github.com/badlogic/pi-mono/pull/358) by [@default-anton](https://github.com/default-anton))
 
 ## [0.28.0] - 2025-12-25
diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts
index e13c0e89..30ee6ca2 100644
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@@ -196,34 +196,44 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 
 					// Some endpoints return reasoning in reasoning_content (llama.cpp),
 					// or reasoning (other openai compatible endpoints)
+					// Use the first non-empty reasoning field to avoid duplication
+					// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
 					const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
+					let foundReasoningField: string | null = null;
 					for (const field of reasoningFields) {
 						if (
 							(choice.delta as any)[field] !== null &&
 							(choice.delta as any)[field] !== undefined &&
 							(choice.delta as any)[field].length > 0
 						) {
-							if (!currentBlock || currentBlock.type !== "thinking") {
-								finishCurrentBlock(currentBlock);
-								currentBlock = {
-									type: "thinking",
-									thinking: "",
-									thinkingSignature: field,
-								};
-								output.content.push(currentBlock);
-								stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+							if (!foundReasoningField) {
+								foundReasoningField = field;
+								break;
 							}
+						}
+					}
 
-							if (currentBlock.type === "thinking") {
-								const delta = (choice.delta as any)[field];
-								currentBlock.thinking += delta;
-								stream.push({
-									type: "thinking_delta",
-									contentIndex: blockIndex(),
-									delta,
-									partial: output,
-								});
-							}
+					if (foundReasoningField) {
+						if (!currentBlock || currentBlock.type !== "thinking") {
+							finishCurrentBlock(currentBlock);
+							currentBlock = {
+								type: "thinking",
+								thinking: "",
+								thinkingSignature: foundReasoningField,
+							};
+							output.content.push(currentBlock);
+							stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+						}
+
+						if (currentBlock.type === "thinking") {
+							const delta = (choice.delta as any)[foundReasoningField];
+							currentBlock.thinking += delta;
+							stream.push({
+								type: "thinking_delta",
+								contentIndex: blockIndex(),
+								delta,
+								partial: output,
+							});
 						}
 					}