fix(ai): use enable_thinking for Z.ai instead of thinking param (#1674)

Z.ai uses the same enable_thinking: boolean parameter as Qwen to control reasoning, not thinking: { type: "enabled" | "disabled" }.

The wrong parameter name means Z.ai ignores the disable request and always runs with thinking enabled, wasting tokens and adding latency.

Merge the Z.ai and Qwen branches since they use the same format.

PR by @okuyam2y
This commit is contained in:
Yoshiaki Okuyama 2026-02-28 05:33:14 +09:00 committed by GitHub
parent 8880cd3cb8
commit 22b3be834e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -423,12 +423,8 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
params.tool_choice = options.toolChoice; params.tool_choice = options.toolChoice;
} }
if (compat.thinkingFormat === "zai" && model.reasoning) { if ((compat.thinkingFormat === "zai" || compat.thinkingFormat === "qwen") && model.reasoning) {
// Z.ai uses binary thinking: { type: "enabled" | "disabled" } // Both Z.ai and Qwen use enable_thinking: boolean
// Must explicitly disable since z.ai defaults to thinking enabled
(params as any).thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
} else if (compat.thinkingFormat === "qwen" && model.reasoning) {
// Qwen uses enable_thinking: boolean
(params as any).enable_thinking = !!options?.reasoningEffort; (params as any).enable_thinking = !!options?.reasoningEffort;
} else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) { } else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
// OpenAI-style reasoning_effort // OpenAI-style reasoning_effort