fix(ai): handle redacted_thinking blocks, skip interleaved beta for adaptive models, drop temperature with thinking

- Map redacted_thinking to ThinkingContent with redacted: true instead of
  adding a new content type. The opaque payload goes in thinkingSignature,
  thinking text is set to "[Reasoning redacted]" so it renders naturally
  everywhere. Cross-model transform drops redacted blocks.
- Skip interleaved-thinking-2025-05-14 beta header for Opus 4.6 / Sonnet 4.6
  where adaptive thinking makes it deprecated/redundant.
- Do not send temperature when thinkingEnabled is true (incompatible with
  both adaptive and budget-based thinking).

Based on #1665 by @tctev
This commit is contained in:
Mario Zechner 2026-02-27 21:53:25 +01:00
parent afe9ae06e8
commit 9825c13f5f
4 changed files with 38 additions and 3 deletions

View file

@ -6,6 +6,9 @@
- Restored built-in OAuth providers when unregistering dynamically registered provider IDs and added `resetOAuthProviders()` for registry reset flows.
- Fixed Z.ai thinking control using wrong parameter name (`thinking` instead of `enable_thinking`), causing thinking to always be enabled and wasting tokens/latency ([#1674](https://github.com/badlogic/pi-mono/pull/1674) by [@okuyam2y](https://github.com/okuyam2y))
- Fixed `redacted_thinking` blocks being silently dropped during Anthropic streaming. They are now captured as `ThinkingContent` with `redacted: true`, passed back to the API in multi-turn conversations, and handled in cross-model message transformation ([#1665](https://github.com/badlogic/pi-mono/pull/1665) by [@tctev](https://github.com/tctev))
- Fixed `interleaved-thinking-2025-05-14` beta header being sent for adaptive thinking models (Opus 4.6, Sonnet 4.6) where the header is deprecated or redundant ([#1665](https://github.com/badlogic/pi-mono/pull/1665) by [@tctev](https://github.com/tctev))
- Fixed temperature being sent alongside extended thinking, which is incompatible with both adaptive and budget-based thinking modes ([#1665](https://github.com/badlogic/pi-mono/pull/1665) by [@tctev](https://github.com/tctev))
## [0.55.1] - 2026-02-26

View file

@ -273,6 +273,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
};
output.content.push(block);
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
} else if (event.content_block.type === "redacted_thinking") {
const block: Block = {
type: "thinking",
thinking: "[Reasoning redacted]",
thinkingSignature: event.content_block.data,
redacted: true,
index: event.index,
};
output.content.push(block);
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
} else if (event.content_block.type === "tool_use") {
const block: Block = {
type: "toolCall",
@ -496,10 +506,14 @@ function createClient(
optionsHeaders?: Record<string, string>,
dynamicHeaders?: Record<string, string>,
): { client: Anthropic; isOAuthToken: boolean } {
// Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.
// The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.
const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id);
// Copilot: Bearer auth, selective betas (no fine-grained-tool-streaming)
if (model.provider === "github-copilot") {
const betaFeatures: string[] = [];
if (interleavedThinking) {
if (needsInterleavedBeta) {
betaFeatures.push("interleaved-thinking-2025-05-14");
}
@ -524,7 +538,7 @@ function createClient(
}
const betaFeatures = ["fine-grained-tool-streaming-2025-05-14"];
if (interleavedThinking) {
if (needsInterleavedBeta) {
betaFeatures.push("interleaved-thinking-2025-05-14");
}
@ -611,7 +625,8 @@ function buildParams(
];
}
if (options?.temperature !== undefined) {
// Temperature is incompatible with extended thinking (adaptive or budget-based).
if (options?.temperature !== undefined && !options?.thinkingEnabled) {
params.temperature = options.temperature;
}
@ -723,6 +738,14 @@ function convertMessages(
text: sanitizeSurrogates(block.text),
});
} else if (block.type === "thinking") {
// Redacted thinking: pass the opaque payload back as redacted_thinking
if (block.redacted) {
blocks.push({
type: "redacted_thinking",
data: block.thinkingSignature!,
});
continue;
}
if (block.thinking.trim().length === 0) continue;
// If thinking signature is missing/empty (e.g., from aborted stream),
// convert to plain text block without <thinking> tags to avoid API rejection

View file

@ -39,6 +39,11 @@ export function transformMessages<TApi extends Api>(
const transformedContent = assistantMsg.content.flatMap((block) => {
if (block.type === "thinking") {
// Redacted thinking is opaque encrypted content, only valid for the same model.
// Drop it for cross-model to avoid API errors.
if (block.redacted) {
return isSameModel ? block : [];
}
// For same model: keep thinking blocks with signatures (needed for replay)
// even if the thinking text is empty (OpenAI encrypted reasoning)
if (isSameModel && block.thinkingSignature) return block;

View file

@ -128,6 +128,10 @@ export interface ThinkingContent {
type: "thinking";
thinking: string;
thinkingSignature?: string; // e.g., for OpenAI responses, the reasoning item ID
/** When true, the thinking content was redacted by safety filters. The opaque
* encrypted payload is stored in `thinkingSignature` so it can be passed back
* to the API for multi-turn continuity. */
redacted?: boolean;
}
export interface ImageContent {