From 36e774282d0a36d3aa21e13b07b7c56394876f56 Mon Sep 17 00:00:00 2001 From: butelo Date: Sun, 4 Jan 2026 18:12:09 +0100 Subject: [PATCH] fix duplicated thinking tokens in chutes (#443) Co-authored-by: xes garcia --- packages/ai/CHANGELOG.md | 1 + .../ai/src/providers/openai-completions.ts | 48 +++++++++++-------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index c45dfaf4..3862b456 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -46,6 +46,7 @@ ### Fixed - **OpenAI completions empty content blocks**: Empty text or thinking blocks in assistant messages are now filtered out before sending to the OpenAI completions API, preventing validation errors. ([#344](https://github.com/badlogic/pi-mono/pull/344) by [@default-anton](https://github.com/default-anton)) +- **Thinking token duplication**: Fixed thinking content duplication with chutes.ai provider. The provider was returning thinking content in both `reasoning_content` and `reasoning` fields, causing each chunk to be processed twice. Now only the first non-empty reasoning field is used. - **zAi provider API mapping**: Fixed zAi models to use `openai-completions` API with correct base URL (`https://api.z.ai/api/coding/paas/v4`) instead of incorrect Anthropic API mapping. ([#344](https://github.com/badlogic/pi-mono/pull/344), [#358](https://github.com/badlogic/pi-mono/pull/358) by [@default-anton](https://github.com/default-anton)) ## [0.28.0] - 2025-12-25 diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index e13c0e89..30ee6ca2 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -196,34 +196,44 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = ( // Some endpoints return reasoning in reasoning_content (llama.cpp), // or reasoning (other openai compatible endpoints) + // Use the first non-empty reasoning field to avoid duplication + // (e.g., chutes.ai returns both reasoning_content and reasoning with same content) const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"]; + let foundReasoningField: string | null = null; for (const field of reasoningFields) { if ( (choice.delta as any)[field] !== null && (choice.delta as any)[field] !== undefined && (choice.delta as any)[field].length > 0 ) { - if (!currentBlock || currentBlock.type !== "thinking") { - finishCurrentBlock(currentBlock); - currentBlock = { - type: "thinking", - thinking: "", - thinkingSignature: field, - }; - output.content.push(currentBlock); - stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output }); + if (!foundReasoningField) { + foundReasoningField = field; + break; } + } + } - if (currentBlock.type === "thinking") { - const delta = (choice.delta as any)[field]; - currentBlock.thinking += delta; - stream.push({ - type: "thinking_delta", - contentIndex: blockIndex(), - delta, - partial: output, - }); - } + if (foundReasoningField) { + if (!currentBlock || currentBlock.type !== "thinking") { + finishCurrentBlock(currentBlock); + currentBlock = { + type: "thinking", + thinking: "", + thinkingSignature: foundReasoningField, + }; + output.content.push(currentBlock); + stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output }); + } + + if (currentBlock.type === "thinking") { + const delta = (choice.delta as any)[foundReasoningField]; + currentBlock.thinking += delta; + stream.push({ + type: "thinking_delta", + contentIndex: blockIndex(), + delta, + partial: output, + }); } }