From b6b64dff864a52e358d99cc68d7365798f24c780 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Tue, 28 Oct 2025 00:21:54 +0100 Subject: [PATCH] Better proxy handling. --- packages/ai/src/models.generated.ts | 179 ++++++++++-------- packages/ai/src/stream.ts | 4 +- .../src/agent/transports/ProviderTransport.ts | 15 +- .../web-ui/src/components/ProviderKeyInput.ts | 11 +- packages/web-ui/src/tools/extract-document.ts | 137 ++++++++------ packages/web-ui/src/utils/proxy-utils.ts | 112 +++++++++++ 6 files changed, 298 insertions(+), 160 deletions(-) create mode 100644 packages/web-ui/src/utils/proxy-utils.ts diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 083b4b0b..fc3228e8 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -1046,6 +1046,23 @@ export const MODELS = { contextWindow: 400000, maxTokens: 128000, } satisfies Model<"openai-responses">, + "gpt-5-pro": { + id: "gpt-5-pro", + name: "GPT-5 Pro", + api: "openai-responses", + provider: "openai", + baseUrl: "https://api.openai.com/v1", + reasoning: true, + input: ["text", "image"], + cost: { + input: 15, + output: 120, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 400000, + maxTokens: 272000, + } satisfies Model<"openai-responses">, "gpt-5-chat-latest": { id: "gpt-5-chat-latest", name: "GPT-5 Chat Latest", @@ -1915,8 +1932,8 @@ export const MODELS = { reasoning: false, input: ["text"], cost: { - input: 0.39999999999999997, - output: 2, + input: 0.5700000000000001, + output: 2.2800000000000002, cacheRead: 0, cacheWrite: 0, }, @@ -1966,13 +1983,13 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.19999999999999998, - output: 0.7, + input: 0.15, + output: 0.6, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 131072, - maxTokens: 32768, + contextWindow: 4096, + maxTokens: 4000, } satisfies Model<"openai-completions">, "z-ai/glm-4.6": { id: "z-ai/glm-4.6", @@ -1983,8 +2000,8 @@ export const MODELS = { reasoning: true, input: ["text"], cost: { - input: 0.5, - output: 1.75, + input: 0.44999999999999996, + output: 2.0999999999999996, cacheRead: 0, cacheWrite: 0, }, @@ -2051,13 +2068,13 @@ export const MODELS = { reasoning: false, input: ["text", "image"], cost: { - input: 0.3, - output: 1.2, + input: 0.22, + output: 0.88, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 262144, - maxTokens: 262144, + contextWindow: 4096, + maxTokens: 4000, } satisfies Model<"openai-completions">, "qwen/qwen3-max": { id: "qwen/qwen3-max", @@ -3553,7 +3570,7 @@ export const MODELS = { cacheWrite: 0, }, contextWindow: 131072, - maxTokens: 2048, + maxTokens: 4096, } satisfies Model<"openai-completions">, "meta-llama/llama-3.3-70b-instruct": { id: "meta-llama/llama-3.3-70b-instruct", @@ -3810,23 +3827,6 @@ export const MODELS = { contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, - "cohere/command-r-plus-08-2024": { - id: "cohere/command-r-plus-08-2024", - name: "Cohere: Command R+ (08-2024)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 2.5, - output: 10, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4000, - } satisfies Model<"openai-completions">, "cohere/command-r-08-2024": { id: "cohere/command-r-08-2024", name: "Cohere: Command R (08-2024)", @@ -3844,6 +3844,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4000, } satisfies Model<"openai-completions">, + "cohere/command-r-plus-08-2024": { + id: "cohere/command-r-plus-08-2024", + name: "Cohere: Command R+ (08-2024)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 2.5, + output: 10, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4000, + } satisfies Model<"openai-completions">, "sao10k/l3.1-euryale-70b": { id: "sao10k/l3.1-euryale-70b", name: "Sao10K: Llama 3.1 Euryale 70B v2.2", @@ -3912,23 +3929,6 @@ export const MODELS = { contextWindow: 16384, maxTokens: 16384, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-405b-instruct": { - id: "meta-llama/llama-3.1-405b-instruct", - name: "Meta: Llama 3.1 405B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.7999999999999999, - output: 0.7999999999999999, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-70b-instruct": { id: "meta-llama/llama-3.1-70b-instruct", name: "Meta: Llama 3.1 70B Instruct", @@ -3946,6 +3946,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 4096, } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-405b-instruct": { + id: "meta-llama/llama-3.1-405b-instruct", + name: "Meta: Llama 3.1 405B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.7999999999999999, + output: 0.7999999999999999, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "mistralai/mistral-nemo": { id: "mistralai/mistral-nemo", name: "Mistral: Mistral Nemo", @@ -4065,23 +4082,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "meta-llama/llama-3-8b-instruct": { - id: "meta-llama/llama-3-8b-instruct", - name: "Meta: Llama 3 8B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.03, - output: 0.06, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "meta-llama/llama-3-70b-instruct": { id: "meta-llama/llama-3-70b-instruct", name: "Meta: Llama 3 70B Instruct", @@ -4099,6 +4099,23 @@ export const MODELS = { contextWindow: 8192, maxTokens: 16384, } satisfies Model<"openai-completions">, + "meta-llama/llama-3-8b-instruct": { + id: "meta-llama/llama-3-8b-instruct", + name: "Meta: Llama 3 8B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.03, + output: 0.06, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "mistralai/mixtral-8x22b-instruct": { id: "mistralai/mixtral-8x22b-instruct", name: "Mistral: Mixtral 8x22B Instruct", @@ -4133,23 +4150,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-small": { - id: "mistralai/mistral-small", - name: "Mistral Small", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.19999999999999998, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "mistralai/mistral-tiny": { id: "mistralai/mistral-tiny", name: "Mistral Tiny", @@ -4167,6 +4167,23 @@ export const MODELS = { contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, + "mistralai/mistral-small": { + id: "mistralai/mistral-small", + name: "Mistral Small", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.19999999999999998, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mixtral-8x7b-instruct": { id: "mistralai/mixtral-8x7b-instruct", name: "Mistral: Mixtral 8x7B Instruct", diff --git a/packages/ai/src/stream.ts b/packages/ai/src/stream.ts index c46e7da5..9faf8be7 100644 --- a/packages/ai/src/stream.ts +++ b/packages/ai/src/stream.ts @@ -117,7 +117,7 @@ function mapOptionsForApi( ): OptionsForApi { const base = { temperature: options?.temperature, - maxTokens: options?.maxTokens, + maxTokens: options?.maxTokens || model.maxTokens, signal: options?.signal, apiKey: apiKey || options?.apiKey, }; @@ -130,7 +130,7 @@ function mapOptionsForApi( minimal: 1024, low: 2048, medium: 8192, - high: Math.min(25000, model.maxTokens - 1000), + high: 16384, }; return { diff --git a/packages/web-ui/src/agent/transports/ProviderTransport.ts b/packages/web-ui/src/agent/transports/ProviderTransport.ts index 9682e275..e92f2fa2 100644 --- a/packages/web-ui/src/agent/transports/ProviderTransport.ts +++ b/packages/web-ui/src/agent/transports/ProviderTransport.ts @@ -6,11 +6,12 @@ import { type UserMessage, } from "@mariozechner/pi-ai"; import { getAppStorage } from "../../storage/app-storage.js"; +import { applyProxyIfNeeded } from "../../utils/proxy-utils.js"; import type { AgentRunConfig, AgentTransport } from "./types.js"; /** * Transport that calls LLM providers directly. - * Optionally routes calls through a CORS proxy if enabled in settings. + * Uses CORS proxy only for providers that require it (Anthropic OAuth, Z-AI). */ export class ProviderTransport implements AgentTransport { async *run(messages: Message[], userMessage: Message, cfg: AgentRunConfig, signal?: AbortSignal) { @@ -20,18 +21,12 @@ export class ProviderTransport implements AgentTransport { throw new Error("no-api-key"); } - // Check if CORS proxy is enabled + // Get proxy URL from settings (if available) const proxyEnabled = await getAppStorage().settings.get("proxy.enabled"); const proxyUrl = await getAppStorage().settings.get("proxy.url"); - // Clone model and modify baseUrl if proxy is enabled - let model = cfg.model; - if (proxyEnabled && proxyUrl && cfg.model.baseUrl) { - model = { - ...cfg.model, - baseUrl: `${proxyUrl}/?url=${encodeURIComponent(cfg.model.baseUrl)}`, - }; - } + // Apply proxy only if this provider/key combination requires it + const model = applyProxyIfNeeded(cfg.model, apiKey, proxyEnabled ? proxyUrl || undefined : undefined); // Messages are already LLM-compatible (filtered by Agent) const context: AgentContext = { diff --git a/packages/web-ui/src/components/ProviderKeyInput.ts b/packages/web-ui/src/components/ProviderKeyInput.ts index e74ec0d8..72478c09 100644 --- a/packages/web-ui/src/components/ProviderKeyInput.ts +++ b/packages/web-ui/src/components/ProviderKeyInput.ts @@ -3,6 +3,7 @@ import { type Context, complete, getModel } from "@mariozechner/pi-ai"; import { LitElement } from "lit"; import { customElement, property, state } from "lit/decorators.js"; import { getAppStorage } from "../storage/app-storage.js"; +import { applyProxyIfNeeded } from "../utils/proxy-utils.js"; // Test models for each provider const TEST_MODELS: Record = { @@ -51,16 +52,12 @@ export class ProviderKeyInput extends LitElement { let model = getModel(provider as any, modelId); if (!model) return false; - // Check if CORS proxy is enabled and apply it + // Get proxy URL from settings (if available) const proxyEnabled = await getAppStorage().settings.get("proxy.enabled"); const proxyUrl = await getAppStorage().settings.get("proxy.url"); - if (proxyEnabled && proxyUrl && model.baseUrl) { - model = { - ...model, - baseUrl: `${proxyUrl}/?url=${encodeURIComponent(model.baseUrl)}`, - }; - } + // Apply proxy only if this provider/key combination requires it + model = applyProxyIfNeeded(model, apiKey, proxyEnabled ? proxyUrl || undefined : undefined); const context: Context = { messages: [{ role: "user", content: "Reply with: ok", timestamp: Date.now() }], diff --git a/packages/web-ui/src/tools/extract-document.ts b/packages/web-ui/src/tools/extract-document.ts index 476aab93..5188b423 100644 --- a/packages/web-ui/src/tools/extract-document.ts +++ b/packages/web-ui/src/tools/extract-document.ts @@ -5,6 +5,7 @@ import { createRef, ref } from "lit/directives/ref.js"; import { FileText } from "lucide"; import { EXTRACT_DOCUMENT_DESCRIPTION } from "../prompts/prompts.js"; import { loadAttachment } from "../utils/attachment-utils.js"; +import { isCorsError } from "../utils/proxy-utils.js"; import { registerToolRenderer, renderCollapsibleHeader, renderHeader } from "./renderer-registry.js"; import type { ToolRenderer, ToolRenderResult } from "./types.js"; @@ -34,13 +35,13 @@ export interface ExtractDocumentResult { export function createExtractDocumentTool(): AgentTool & { corsProxyUrl?: string; } { - return { + const tool = { label: "Extract Document", name: "extract_document", - corsProxyUrl: undefined, // Can be set by consumer (e.g., from user settings) + corsProxyUrl: undefined as string | undefined, // Can be set by consumer (e.g., from user settings) description: EXTRACT_DOCUMENT_DESCRIPTION, parameters: extractDocumentSchema, - execute: async function (_toolCallId: string, args: ExtractDocumentParams, signal?: AbortSignal) { + execute: async (_toolCallId: string, args: ExtractDocumentParams, signal?: AbortSignal) => { if (signal?.aborted) { throw new Error("Extract document aborted"); } @@ -57,17 +58,11 @@ export function createExtractDocumentTool(): AgentTool { const response = await fetch(fetchUrl, { signal }); if (!response.ok) { @@ -98,52 +93,31 @@ export function createExtractDocumentTool(): AgentTool` + ? html`` : "" } diff --git a/packages/web-ui/src/utils/proxy-utils.ts b/packages/web-ui/src/utils/proxy-utils.ts new file mode 100644 index 00000000..65c68e57 --- /dev/null +++ b/packages/web-ui/src/utils/proxy-utils.ts @@ -0,0 +1,112 @@ +import type { Api, Model } from "@mariozechner/pi-ai"; + +/** + * Centralized proxy decision logic. + * + * Determines whether to use a CORS proxy for LLM API requests based on: + * - Provider name + * - API key pattern (for providers where it matters) + */ + +/** + * Check if a provider/API key combination requires a CORS proxy. + * + * @param provider - Provider name (e.g., "anthropic", "openai", "zai") + * @param apiKey - API key for the provider + * @returns true if proxy is required, false otherwise + */ +export function shouldUseProxyForProvider(provider: string, apiKey: string): boolean { + switch (provider.toLowerCase()) { + case "zai": + // Z-AI always requires proxy + return true; + + case "anthropic": + // Anthropic OAuth tokens (sk-ant-oat-*) require proxy + // Regular API keys (sk-ant-api-*) do NOT require proxy + return apiKey.startsWith("sk-ant-oat"); + + // These providers work without proxy + case "openai": + case "google": + case "groq": + case "openrouter": + case "cerebras": + case "xai": + case "ollama": + case "lmstudio": + return false; + + // Unknown providers - assume no proxy needed + // This allows new providers to work by default + default: + return false; + } +} + +/** + * Apply CORS proxy to a model's baseUrl if needed. + * + * @param model - The model to potentially proxy + * @param apiKey - API key for the provider + * @param proxyUrl - CORS proxy URL (e.g., "https://proxy.mariozechner.at/proxy") + * @returns Model with modified baseUrl if proxy is needed, otherwise original model + */ +export function applyProxyIfNeeded(model: Model, apiKey: string, proxyUrl?: string): Model { + // If no proxy URL configured, return original model + if (!proxyUrl) { + return model; + } + + // If model has no baseUrl, can't proxy it + if (!model.baseUrl) { + return model; + } + + // Check if this provider/key needs proxy + if (!shouldUseProxyForProvider(model.provider, apiKey)) { + return model; + } + + // Apply proxy to baseUrl + return { + ...model, + baseUrl: `${proxyUrl}/?url=${encodeURIComponent(model.baseUrl)}`, + }; +} + +/** + * Check if an error is likely a CORS error. + * + * CORS errors in browsers typically manifest as: + * - TypeError with message "Failed to fetch" + * - NetworkError + * + * @param error - The error to check + * @returns true if error is likely a CORS error + */ +export function isCorsError(error: unknown): boolean { + if (!(error instanceof Error)) { + return false; + } + + // Check for common CORS error patterns + const message = error.message.toLowerCase(); + + // "Failed to fetch" is the standard CORS error in most browsers + if (error.name === "TypeError" && message.includes("failed to fetch")) { + return true; + } + + // Some browsers report "NetworkError" + if (error.name === "NetworkError") { + return true; + } + + // CORS-specific messages + if (message.includes("cors") || message.includes("cross-origin")) { + return true; + } + + return false; +}