From 87a1a9ded4df489d43b26137b5a22a1d5549f11e Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Mon, 8 Dec 2025 19:00:57 +0100 Subject: [PATCH] Add OpenAICompat for openai-completions provider quirks Fixes #133 --- packages/ai/CHANGELOG.md | 2 + packages/ai/README.md | 36 +++++++++ .../ai/src/providers/openai-completions.ts | 77 +++++++++++++------ packages/ai/src/types.ts | 17 ++++ packages/coding-agent/CHANGELOG.md | 4 + packages/coding-agent/README.md | 41 ++++++++++ packages/coding-agent/src/model-config.ts | 12 ++- 7 files changed, 165 insertions(+), 24 deletions(-) diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index a333e15b..ff468b87 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -10,6 +10,8 @@ - Added `validateToolCall(tools, toolCall)` helper that finds the tool by name and validates arguments. +- **OpenAI compatibility overrides**: Added `compat` field to `Model` for `openai-completions` API, allowing explicit configuration of provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Falls back to URL-based detection if not set. Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/pi-mono/issues/133), thanks @fink-andreas for the initial idea and PR) + ## [0.13.0] - 2025-12-06 ### Breaking Changes diff --git a/packages/ai/README.md b/packages/ai/README.md index 52d69605..67a17c68 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -611,6 +611,23 @@ const ollamaModel: Model<'openai-completions'> = { maxTokens: 32000 }; +// Example: LiteLLM proxy with explicit compat settings +const litellmModel: Model<'openai-completions'> = { + id: 'gpt-4o', + name: 'GPT-4o (via LiteLLM)', + api: 'openai-completions', + provider: 'litellm', + baseUrl: 'http://localhost:4000/v1', + reasoning: false, + input: ['text', 'image'], + cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 16384, + compat: { + supportsStore: false, // LiteLLM doesn't support the store field + } +}; + // Example: Custom endpoint with headers (bypassing Cloudflare bot detection) const proxyModel: Model<'anthropic-messages'> = { id: 'claude-sonnet-4', @@ -635,6 +652,25 @@ const response = await stream(ollamaModel, context, { }); ``` +### OpenAI Compatibility Settings + +The `openai-completions` API is implemented by many providers with minor differences. By default, the library auto-detects compatibility settings based on `baseUrl` for known providers (Cerebras, xAI, Mistral, Chutes, etc.). For custom proxies or unknown endpoints, you can override these settings via the `compat` field: + +```typescript +interface OpenAICompat { + supportsStore?: boolean; // Whether provider supports the `store` field (default: true) + supportsDeveloperRole?: boolean; // Whether provider supports `developer` role vs `system` (default: true) + supportsReasoningEffort?: boolean; // Whether provider supports `reasoning_effort` (default: true) + maxTokensField?: 'max_completion_tokens' | 'max_tokens'; // Which field name to use (default: max_completion_tokens) +} +``` + +If `compat` is not set, the library falls back to URL-based detection. If `compat` is partially set, unspecified fields use the detected defaults. This is useful for: + +- **LiteLLM proxies**: May not support `store` field +- **Custom inference servers**: May use non-standard field names +- **Self-hosted endpoints**: May have different feature support + ### Type Safety Models are typed by their API, ensuring type-safe options: diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index ca9f1c30..582e826f 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -12,6 +12,7 @@ import type { AssistantMessage, Context, Model, + OpenAICompat, StopReason, StreamFunction, StreamOptions, @@ -267,7 +268,8 @@ function createClient(model: Model<"openai-completions">, apiKey?: string) { } function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) { - const messages = convertMessages(model, context); + const compat = getCompat(model); + const messages = convertMessages(model, context, compat); const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: model.id, @@ -276,27 +278,20 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio stream_options: { include_usage: true }, }; - // Cerebras/xAI/Mistral dont like the "store" field - if ( - !model.baseUrl.includes("cerebras.ai") && - !model.baseUrl.includes("api.x.ai") && - !model.baseUrl.includes("mistral.ai") && - !model.baseUrl.includes("chutes.ai") - ) { + if (compat.supportsStore) { params.store = false; } if (options?.maxTokens) { - // Mistral/Chutes uses max_tokens instead of max_completion_tokens - if (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) { - (params as any).max_tokens = options?.maxTokens; + if (compat.maxTokensField === "max_tokens") { + (params as any).max_tokens = options.maxTokens; } else { - params.max_completion_tokens = options?.maxTokens; + params.max_completion_tokens = options.maxTokens; } } if (options?.temperature !== undefined) { - params.temperature = options?.temperature; + params.temperature = options.temperature; } if (context.tools) { @@ -307,27 +302,24 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio params.tool_choice = options.toolChoice; } - // Grok models don't like reasoning_effort - if (options?.reasoningEffort && model.reasoning && !model.id.toLowerCase().includes("grok")) { + if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) { params.reasoning_effort = options.reasoningEffort; } return params; } -function convertMessages(model: Model<"openai-completions">, context: Context): ChatCompletionMessageParam[] { +function convertMessages( + model: Model<"openai-completions">, + context: Context, + compat: Required, +): ChatCompletionMessageParam[] { const params: ChatCompletionMessageParam[] = []; const transformedMessages = transformMessages(context.messages, model); if (context.systemPrompt) { - // Cerebras/xAi/Mistral/Chutes don't like the "developer" role - const useDeveloperRole = - model.reasoning && - !model.baseUrl.includes("cerebras.ai") && - !model.baseUrl.includes("api.x.ai") && - !model.baseUrl.includes("mistral.ai") && - !model.baseUrl.includes("chutes.ai"); + const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole; const role = useDeveloperRole ? "developer" : "system"; params.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) }); } @@ -482,3 +474,42 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto } } } + +/** + * Detect compatibility settings from baseUrl for known providers. + * Returns a fully resolved OpenAICompat object with all fields set. + */ +function detectCompatFromUrl(baseUrl: string): Required { + const isNonStandard = + baseUrl.includes("cerebras.ai") || + baseUrl.includes("api.x.ai") || + baseUrl.includes("mistral.ai") || + baseUrl.includes("chutes.ai"); + + const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai"); + + const isGrok = baseUrl.includes("api.x.ai"); + + return { + supportsStore: !isNonStandard, + supportsDeveloperRole: !isNonStandard, + supportsReasoningEffort: !isGrok, + maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens", + }; +} + +/** + * Get resolved compatibility settings for a model. + * Uses explicit model.compat if provided, otherwise auto-detects from URL. + */ +function getCompat(model: Model<"openai-completions">): Required { + const detected = detectCompatFromUrl(model.baseUrl); + if (!model.compat) return detected; + + return { + supportsStore: model.compat.supportsStore ?? detected.supportsStore, + supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole, + supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort, + maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField, + }; +} diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index a7269bc8..0f22a3f2 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -152,6 +152,21 @@ export type AssistantMessageEvent = | { type: "done"; reason: Extract; message: AssistantMessage } | { type: "error"; reason: Extract; error: AssistantMessage }; +/** + * Compatibility settings for openai-completions API. + * Use this to override URL-based auto-detection for custom providers. + */ +export interface OpenAICompat { + /** Whether the provider supports the `store` field. Default: auto-detected from URL. */ + supportsStore?: boolean; + /** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */ + supportsDeveloperRole?: boolean; + /** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */ + supportsReasoningEffort?: boolean; + /** Which field to use for max tokens. Default: auto-detected from URL. */ + maxTokensField?: "max_completion_tokens" | "max_tokens"; +} + // Model interface for the unified model system export interface Model { id: string; @@ -170,4 +185,6 @@ export interface Model { contextWindow: number; maxTokens: number; headers?: Record; + /** Compatibility overrides for openai-completions API. If not set, auto-detected from baseUrl. */ + compat?: TApi extends "openai-completions" ? OpenAICompat : never; } diff --git a/packages/coding-agent/CHANGELOG.md b/packages/coding-agent/CHANGELOG.md index 355bd294..6b507fb5 100644 --- a/packages/coding-agent/CHANGELOG.md +++ b/packages/coding-agent/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- **OpenAI compatibility overrides in models.json**: Custom models using `openai-completions` API can now specify a `compat` object to override provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/pi-mono/issues/133), thanks @fink-andreas for the initial idea and PR) + ## [0.13.2] - 2025-12-07 ### Changed diff --git a/packages/coding-agent/README.md b/packages/coding-agent/README.md index 5f47d103..1e6a73c6 100644 --- a/packages/coding-agent/README.md +++ b/packages/coding-agent/README.md @@ -315,6 +315,47 @@ You can add custom HTTP headers to bypass Cloudflare bot detection, add authenti - **Model-level `headers`**: Additional headers for specific models (merged with provider headers) - Model headers override provider headers when keys conflict +### OpenAI Compatibility Settings + +The `openai-completions` API is implemented by many providers with minor differences (Ollama, vLLM, LiteLLM, llama.cpp, etc.). By default, compatibility settings are auto-detected from the `baseUrl`. For custom proxies or unknown endpoints, you can override these via the `compat` field on models: + +```json +{ + "providers": { + "litellm": { + "baseUrl": "http://localhost:4000/v1", + "apiKey": "LITELLM_API_KEY", + "api": "openai-completions", + "models": [ + { + "id": "gpt-4o", + "name": "GPT-4o (via LiteLLM)", + "reasoning": false, + "input": ["text", "image"], + "cost": {"input": 2.5, "output": 10, "cacheRead": 0, "cacheWrite": 0}, + "contextWindow": 128000, + "maxTokens": 16384, + "compat": { + "supportsStore": false + } + } + ] + } + } +} +``` + +Available `compat` fields (all optional, auto-detected if not set): + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `supportsStore` | boolean | auto | Whether provider supports the `store` field | +| `supportsDeveloperRole` | boolean | auto | Whether provider supports `developer` role (vs `system`) | +| `supportsReasoningEffort` | boolean | auto | Whether provider supports `reasoning_effort` parameter | +| `maxTokensField` | string | auto | Use `"max_completion_tokens"` or `"max_tokens"` | + +If `compat` is partially set, unspecified fields use auto-detected values. + ### Authorization Header Some providers require an explicit `Authorization: Bearer ` header. Set `authHeader: true` to automatically add this header using the resolved `apiKey`: diff --git a/packages/coding-agent/src/model-config.ts b/packages/coding-agent/src/model-config.ts index abbebdb1..820fab33 100644 --- a/packages/coding-agent/src/model-config.ts +++ b/packages/coding-agent/src/model-config.ts @@ -9,6 +9,14 @@ import { loadOAuthCredentials } from "./oauth/storage.js"; // Handle both default and named exports const Ajv = (AjvModule as any).default || AjvModule; +// Schema for OpenAI compatibility settings +const OpenAICompatSchema = Type.Object({ + supportsStore: Type.Optional(Type.Boolean()), + supportsDeveloperRole: Type.Optional(Type.Boolean()), + supportsReasoningEffort: Type.Optional(Type.Boolean()), + maxTokensField: Type.Optional(Type.Union([Type.Literal("max_completion_tokens"), Type.Literal("max_tokens")])), +}); + // Schema for custom model definition const ModelDefinitionSchema = Type.Object({ id: Type.String({ minLength: 1 }), @@ -32,6 +40,7 @@ const ModelDefinitionSchema = Type.Object({ contextWindow: Type.Number(), maxTokens: Type.Number(), headers: Type.Optional(Type.Record(Type.String(), Type.String())), + compat: Type.Optional(OpenAICompatSchema), }); const ProviderConfigSchema = Type.Object({ @@ -201,7 +210,8 @@ function parseModels(config: ModelsConfig): Model[] { contextWindow: modelDef.contextWindow, maxTokens: modelDef.maxTokens, headers, - }); + compat: modelDef.compat, + } as Model); } }