Merge branch 'merge/pr-940'

This commit is contained in:
Mario Zechner 2026-02-01 01:53:26 +01:00
commit 39d69f42fd
13 changed files with 395 additions and 7 deletions

11
package-lock.json generated
View file

@ -12,7 +12,8 @@
"packages/web-ui/example",
"packages/coding-agent/examples/extensions/with-deps",
"packages/coding-agent/examples/extensions/custom-provider-anthropic",
"packages/coding-agent/examples/extensions/custom-provider-gitlab-duo"
"packages/coding-agent/examples/extensions/custom-provider-gitlab-duo",
"packages/coding-agent/examples/extensions/custom-provider-qwen-cli"
],
"dependencies": {
"@mariozechner/jiti": "^2.6.5",
@ -7136,6 +7137,10 @@
"resolved": "packages/coding-agent/examples/extensions/custom-provider-gitlab-duo",
"link": true
},
"node_modules/pi-extension-custom-provider-qwen-cli": {
"resolved": "packages/coding-agent/examples/extensions/custom-provider-qwen-cli",
"link": true
},
"node_modules/pi-extension-with-deps": {
"resolved": "packages/coding-agent/examples/extensions/with-deps",
"link": true
@ -9020,6 +9025,10 @@
"name": "pi-extension-custom-provider-gitlab-duo",
"version": "1.1.7"
},
"packages/coding-agent/examples/extensions/custom-provider-qwen-cli": {
"name": "pi-extension-custom-provider-qwen-cli",
"version": "1.0.0"
},
"packages/coding-agent/examples/extensions/with-deps": {
"name": "pi-extension-with-deps",
"version": "1.14.7",

View file

@ -7,7 +7,8 @@
"packages/web-ui/example",
"packages/coding-agent/examples/extensions/with-deps",
"packages/coding-agent/examples/extensions/custom-provider-anthropic",
"packages/coding-agent/examples/extensions/custom-provider-gitlab-duo"
"packages/coding-agent/examples/extensions/custom-provider-gitlab-duo",
"packages/coding-agent/examples/extensions/custom-provider-qwen-cli"
],
"scripts": {
"clean": "npm run clean --workspaces",

View file

@ -5,6 +5,7 @@
### Added
- Added `maxRetryDelayMs` option to `StreamOptions` to cap server-requested retry delays. When a provider (e.g., Google Gemini CLI) requests a delay longer than this value, the request fails immediately with an informative error instead of waiting silently. Default: 60000ms (60 seconds). Set to 0 to disable the cap. ([#1123](https://github.com/badlogic/pi-mono/issues/1123))
- Added Qwen thinking format support for OpenAI-compatible completions via `enable_thinking`. ([#940](https://github.com/badlogic/pi-mono/pull/940) by [@4h9fbZ](https://github.com/4h9fbZ))
## [0.50.7] - 2026-01-31

View file

@ -732,7 +732,7 @@ interface OpenAICompletionsCompat {
supportsDeveloperRole?: boolean; // Whether provider supports `developer` role vs `system` (default: true)
supportsReasoningEffort?: boolean; // Whether provider supports `reasoning_effort` (default: true)
maxTokensField?: 'max_completion_tokens' | 'max_tokens'; // Which field name to use (default: max_completion_tokens)
thinkingFormat?: 'openai' | 'zai'; // Format for reasoning param: 'openai' uses reasoning_effort, 'zai' uses thinking: { type: "enabled" } (default: openai)
thinkingFormat?: 'openai' | 'zai' | 'qwen'; // Format for reasoning param: 'openai' uses reasoning_effort, 'zai' uses thinking: { type: "enabled" }, 'qwen' uses enable_thinking: boolean (default: openai)
}
interface OpenAIResponsesCompat {

View file

@ -442,6 +442,9 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
// Must explicitly disable since z.ai defaults to thinking enabled
(params as any).thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
} else if (compat.thinkingFormat === "qwen" && model.reasoning) {
// Qwen uses enable_thinking: boolean
(params as any).enable_thinking = !!options?.reasoningEffort;
} else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
// OpenAI-style reasoning_effort
params.reasoning_effort = options.reasoningEffort;

View file

@ -222,8 +222,8 @@ export interface OpenAICompletionsCompat {
requiresThinkingAsText?: boolean;
/** Whether tool call IDs must be normalized to Mistral format (exactly 9 alphanumeric chars). Default: auto-detected from URL. */
requiresMistralToolIds?: boolean;
/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "zai" uses thinking: { type: "enabled" }. Default: "openai". */
thinkingFormat?: "openai" | "zai";
/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "zai" uses thinking: { type: "enabled" }, "qwen" uses enable_thinking: boolean. Default: "openai". */
thinkingFormat?: "openai" | "zai" | "qwen";
/** OpenRouter-specific routing preferences. Only used when baseUrl points to OpenRouter. */
openRouterRouting?: OpenRouterRouting;
/** Vercel AI Gateway routing preferences. Only used when baseUrl points to Vercel AI Gateway. */

View file

@ -7,6 +7,8 @@
- Added `newSession`, `tree`, and `fork` keybinding actions for `/new`, `/tree`, and `/fork` commands. All unbound by default. ([#1114](https://github.com/badlogic/pi-mono/pull/1114) by [@juanibiapina](https://github.com/juanibiapina))
- Added `retry.maxDelayMs` setting to cap maximum server-requested retry delay. When a provider requests a longer delay (e.g., Google's "quota will reset after 5h"), the request fails immediately with an informative error instead of waiting silently. Default: 60000ms (60 seconds). ([#1123](https://github.com/badlogic/pi-mono/issues/1123))
- `/resume` session picker: new "Threaded" sort mode (now default) displays sessions in a tree structure based on fork relationships. Compact one-line format with message count and age on the right. ([#1124](https://github.com/badlogic/pi-mono/pull/1124) by [@pasky](https://github.com/pasky))
- Added Qwen CLI OAuth provider extension example. ([#940](https://github.com/badlogic/pi-mono/pull/940) by [@4h9fbZ](https://github.com/4h9fbZ))
- Added OAuth `modifyModels` hook support for extension-registered providers at registration time. ([#940](https://github.com/badlogic/pi-mono/pull/940) by [@4h9fbZ](https://github.com/4h9fbZ))
### Fixed

View file

@ -135,6 +135,7 @@ models: [{
maxTokensField: "max_tokens", // instead of "max_completion_tokens"
requiresToolResultName: true, // tool results need name field
requiresMistralToolIds: true // tool IDs must be 9 alphanumeric chars
thinkingFormat: "qwen" // uses enable_thinking: true
}
}]
```
@ -532,7 +533,7 @@ interface ProviderModelConfig {
requiresAssistantAfterToolResult?: boolean;
requiresThinkingAsText?: boolean;
requiresMistralToolIds?: boolean;
thinkingFormat?: "openai" | "zai";
thinkingFormat?: "openai" | "zai" | "qwen";
};
}
```

View file

@ -108,6 +108,7 @@ cp permission-gate.ts ~/.pi/agent/extensions/
|-----------|-------------|
| `custom-provider-anthropic/` | Custom Anthropic provider with OAuth support and custom streaming implementation |
| `custom-provider-gitlab-duo/` | GitLab Duo provider using pi-ai's built-in Anthropic/OpenAI streaming via proxy |
| `custom-provider-qwen-cli/` | Qwen CLI provider with OAuth device flow and OpenAI-compatible models |
### External Dependencies

View file

@ -0,0 +1 @@
node_modules/

View file

@ -0,0 +1,345 @@
/**
* Qwen CLI Provider Extension
*
* Provides access to Qwen models via OAuth authentication with chat.qwen.ai.
* Uses device code flow with PKCE for secure browser-based authentication.
*
* Usage:
* pi -e ./packages/coding-agent/examples/extensions/custom-provider-qwen-cli
* # Then /login qwen-cli, or set QWEN_CLI_API_KEY=...
*/
import type { OAuthCredentials, OAuthLoginCallbacks } from "@mariozechner/pi-ai";
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
// =============================================================================
// Constants
// =============================================================================
const QWEN_DEVICE_CODE_ENDPOINT = "https://chat.qwen.ai/api/v1/oauth2/device/code";
const QWEN_TOKEN_ENDPOINT = "https://chat.qwen.ai/api/v1/oauth2/token";
const QWEN_CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56";
const QWEN_SCOPE = "openid profile email model.completion";
const QWEN_GRANT_TYPE = "urn:ietf:params:oauth:grant-type:device_code";
const QWEN_DEFAULT_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1";
const QWEN_POLL_INTERVAL_MS = 2000;
// =============================================================================
// PKCE Helpers
// =============================================================================
async function generatePKCE(): Promise<{ verifier: string; challenge: string }> {
const array = new Uint8Array(32);
crypto.getRandomValues(array);
const verifier = btoa(String.fromCharCode(...array))
.replace(/\+/g, "-")
.replace(/\//g, "_")
.replace(/=+$/, "");
const encoder = new TextEncoder();
const data = encoder.encode(verifier);
const hash = await crypto.subtle.digest("SHA-256", data);
const challenge = btoa(String.fromCharCode(...new Uint8Array(hash)))
.replace(/\+/g, "-")
.replace(/\//g, "_")
.replace(/=+$/, "");
return { verifier, challenge };
}
// =============================================================================
// OAuth Implementation
// =============================================================================
interface DeviceCodeResponse {
device_code: string;
user_code: string;
verification_uri: string;
verification_uri_complete?: string;
expires_in: number;
interval?: number;
}
interface TokenResponse {
access_token: string;
refresh_token?: string;
token_type: string;
expires_in: number;
resource_url?: string;
}
function abortableSleep(ms: number, signal?: AbortSignal): Promise<void> {
return new Promise((resolve, reject) => {
if (signal?.aborted) {
reject(new Error("Login cancelled"));
return;
}
const timeout = setTimeout(resolve, ms);
signal?.addEventListener(
"abort",
() => {
clearTimeout(timeout);
reject(new Error("Login cancelled"));
},
{ once: true },
);
});
}
async function startDeviceFlow(): Promise<{ deviceCode: DeviceCodeResponse; verifier: string }> {
const { verifier, challenge } = await generatePKCE();
const body = new URLSearchParams({
client_id: QWEN_CLIENT_ID,
scope: QWEN_SCOPE,
code_challenge: challenge,
code_challenge_method: "S256",
});
const headers: Record<string, string> = {
"Content-Type": "application/x-www-form-urlencoded",
Accept: "application/json",
};
const requestId = globalThis.crypto?.randomUUID?.();
if (requestId) headers["x-request-id"] = requestId;
const response = await fetch(QWEN_DEVICE_CODE_ENDPOINT, {
method: "POST",
headers,
body: body.toString(),
});
if (!response.ok) {
const text = await response.text();
throw new Error(`Device code request failed: ${response.status} ${text}`);
}
const data = (await response.json()) as DeviceCodeResponse;
if (!data.device_code || !data.user_code || !data.verification_uri) {
throw new Error("Invalid device code response: missing required fields");
}
return { deviceCode: data, verifier };
}
async function pollForToken(
deviceCode: string,
verifier: string,
intervalSeconds: number | undefined,
expiresIn: number,
signal?: AbortSignal,
): Promise<TokenResponse> {
const deadline = Date.now() + expiresIn * 1000;
const resolvedIntervalSeconds =
typeof intervalSeconds === "number" && Number.isFinite(intervalSeconds) && intervalSeconds > 0
? intervalSeconds
: QWEN_POLL_INTERVAL_MS / 1000;
let intervalMs = Math.max(1000, Math.floor(resolvedIntervalSeconds * 1000));
const handleTokenError = async (error: string, description?: string): Promise<boolean> => {
switch (error) {
case "authorization_pending":
await abortableSleep(intervalMs, signal);
return true;
case "slow_down":
intervalMs = Math.min(intervalMs + 5000, 10000);
await abortableSleep(intervalMs, signal);
return true;
case "expired_token":
throw new Error("Device code expired. Please restart authentication.");
case "access_denied":
throw new Error("Authorization denied by user.");
default:
throw new Error(`Token request failed: ${error} - ${description || ""}`);
}
};
while (Date.now() < deadline) {
if (signal?.aborted) {
throw new Error("Login cancelled");
}
const body = new URLSearchParams({
grant_type: QWEN_GRANT_TYPE,
client_id: QWEN_CLIENT_ID,
device_code: deviceCode,
code_verifier: verifier,
});
const response = await fetch(QWEN_TOKEN_ENDPOINT, {
method: "POST",
headers: {
"Content-Type": "application/x-www-form-urlencoded",
Accept: "application/json",
},
body: body.toString(),
});
const responseText = await response.text();
let data: (TokenResponse & { error?: string; error_description?: string }) | null = null;
if (responseText) {
try {
data = JSON.parse(responseText) as TokenResponse & { error?: string; error_description?: string };
} catch {
data = null;
}
}
const error = data?.error;
const errorDescription = data?.error_description;
if (!response.ok) {
if (error && (await handleTokenError(error, errorDescription))) {
continue;
}
throw new Error(`Token request failed: ${response.status} ${response.statusText}. Response: ${responseText}`);
}
if (data?.access_token) {
return data;
}
if (error && (await handleTokenError(error, errorDescription))) {
continue;
}
throw new Error("Token request failed: missing access token in response");
}
throw new Error("Authentication timed out. Please try again.");
}
async function loginQwen(callbacks: OAuthLoginCallbacks): Promise<OAuthCredentials> {
const { deviceCode, verifier } = await startDeviceFlow();
// Show verification URL and user code to user
const authUrl = deviceCode.verification_uri_complete || deviceCode.verification_uri;
const instructions = deviceCode.verification_uri_complete
? undefined // Code is already embedded in the URL
: `Enter code: ${deviceCode.user_code}`;
callbacks.onAuth({ url: authUrl, instructions });
// Poll for token
const tokenResponse = await pollForToken(
deviceCode.device_code,
verifier,
deviceCode.interval,
deviceCode.expires_in,
callbacks.signal,
);
// Calculate expiry with 5-minute buffer
const expiresAt = Date.now() + tokenResponse.expires_in * 1000 - 5 * 60 * 1000;
return {
refresh: tokenResponse.refresh_token || "",
access: tokenResponse.access_token,
expires: expiresAt,
// Store resource_url for API base URL if provided
enterpriseUrl: tokenResponse.resource_url,
};
}
async function refreshQwenToken(credentials: OAuthCredentials): Promise<OAuthCredentials> {
const body = new URLSearchParams({
grant_type: "refresh_token",
refresh_token: credentials.refresh,
client_id: QWEN_CLIENT_ID,
});
const response = await fetch(QWEN_TOKEN_ENDPOINT, {
method: "POST",
headers: {
"Content-Type": "application/x-www-form-urlencoded",
Accept: "application/json",
},
body: body.toString(),
});
if (!response.ok) {
const text = await response.text();
throw new Error(`Token refresh failed: ${response.status} ${text}`);
}
const data = (await response.json()) as TokenResponse;
if (!data.access_token) {
throw new Error("Token refresh failed: no access token in response");
}
const expiresAt = Date.now() + data.expires_in * 1000 - 5 * 60 * 1000;
return {
refresh: data.refresh_token || credentials.refresh,
access: data.access_token,
expires: expiresAt,
enterpriseUrl: data.resource_url ?? credentials.enterpriseUrl,
};
}
function getQwenBaseUrl(resourceUrl?: string): string {
if (!resourceUrl) {
return QWEN_DEFAULT_BASE_URL;
}
let url = resourceUrl.startsWith("http") ? resourceUrl : `https://${resourceUrl}`;
if (!url.endsWith("/v1")) {
url = `${url}/v1`;
}
return url;
}
// =============================================================================
// Extension Entry Point
// =============================================================================
export default function (pi: ExtensionAPI) {
pi.registerProvider("qwen-cli", {
baseUrl: QWEN_DEFAULT_BASE_URL,
apiKey: "QWEN_CLI_API_KEY",
api: "openai-completions",
models: [
{
id: "qwen3-coder-plus",
name: "Qwen3 Coder Plus",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 65536,
},
{
id: "qwen3-coder-flash",
name: "Qwen3 Coder Flash",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1000000,
maxTokens: 65536,
},
{
id: "vision-model",
name: "Qwen3 VL Plus",
reasoning: true,
input: ["text", "image"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 262144,
maxTokens: 32768,
compat: { supportsDeveloperRole: false, thinkingFormat: "qwen" },
},
],
oauth: {
name: "Qwen CLI",
login: loginQwen,
refreshToken: refreshQwenToken,
getApiKey: (cred) => cred.access,
modifyModels: (models, cred) => {
const baseUrl = getQwenBaseUrl(cred.enterpriseUrl as string | undefined);
return models.map((m) => (m.provider === "qwen-cli" ? { ...m, baseUrl } : m));
},
},
});
}

View file

@ -0,0 +1,16 @@
{
"name": "pi-extension-custom-provider-qwen-cli",
"private": true,
"version": "1.0.0",
"type": "module",
"scripts": {
"clean": "echo 'nothing to clean'",
"build": "echo 'nothing to build'",
"check": "echo 'nothing to check'"
},
"pi": {
"extensions": [
"./index.ts"
]
}
}

View file

@ -48,7 +48,7 @@ const OpenAICompletionsCompatSchema = Type.Object({
requiresAssistantAfterToolResult: Type.Optional(Type.Boolean()),
requiresThinkingAsText: Type.Optional(Type.Boolean()),
requiresMistralToolIds: Type.Optional(Type.Boolean()),
thinkingFormat: Type.Optional(Type.Union([Type.Literal("openai"), Type.Literal("zai")])),
thinkingFormat: Type.Optional(Type.Union([Type.Literal("openai"), Type.Literal("zai"), Type.Literal("qwen")])),
openRouterRouting: Type.Optional(OpenRouterRoutingSchema),
vercelGatewayRouting: Type.Optional(VercelGatewayRoutingSchema),
});
@ -544,6 +544,14 @@ export class ModelRegistry {
compat: modelDef.compat,
} as Model<Api>);
}
// Apply OAuth modifyModels if credentials exist (e.g., to update baseUrl)
if (config.oauth?.modifyModels) {
const cred = this.authStorage.get(providerName);
if (cred?.type === "oauth") {
this.models = config.oauth.modifyModels(this.models, cred);
}
}
} else if (config.baseUrl) {
// Override-only: update baseUrl/headers for existing models
const resolvedHeaders = resolveHeaders(config.headers);