Add OpenAICompat for openai-completions provider quirks

Fixes #133
This commit is contained in:
Mario Zechner 2025-12-08 19:00:57 +01:00
parent e34e0c503f
commit 87a1a9ded4
7 changed files with 165 additions and 24 deletions

View file

@ -10,6 +10,8 @@
- Added `validateToolCall(tools, toolCall)` helper that finds the tool by name and validates arguments.
- **OpenAI compatibility overrides**: Added `compat` field to `Model` for `openai-completions` API, allowing explicit configuration of provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Falls back to URL-based detection if not set. Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/pi-mono/issues/133), thanks @fink-andreas for the initial idea and PR)
## [0.13.0] - 2025-12-06
### Breaking Changes

View file

@ -611,6 +611,23 @@ const ollamaModel: Model<'openai-completions'> = {
maxTokens: 32000
};
// Example: LiteLLM proxy with explicit compat settings
const litellmModel: Model<'openai-completions'> = {
id: 'gpt-4o',
name: 'GPT-4o (via LiteLLM)',
api: 'openai-completions',
provider: 'litellm',
baseUrl: 'http://localhost:4000/v1',
reasoning: false,
input: ['text', 'image'],
cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 16384,
compat: {
supportsStore: false, // LiteLLM doesn't support the store field
}
};
// Example: Custom endpoint with headers (bypassing Cloudflare bot detection)
const proxyModel: Model<'anthropic-messages'> = {
id: 'claude-sonnet-4',
@ -635,6 +652,25 @@ const response = await stream(ollamaModel, context, {
});
```
### OpenAI Compatibility Settings
The `openai-completions` API is implemented by many providers with minor differences. By default, the library auto-detects compatibility settings based on `baseUrl` for known providers (Cerebras, xAI, Mistral, Chutes, etc.). For custom proxies or unknown endpoints, you can override these settings via the `compat` field:
```typescript
interface OpenAICompat {
supportsStore?: boolean; // Whether provider supports the `store` field (default: true)
supportsDeveloperRole?: boolean; // Whether provider supports `developer` role vs `system` (default: true)
supportsReasoningEffort?: boolean; // Whether provider supports `reasoning_effort` (default: true)
maxTokensField?: 'max_completion_tokens' | 'max_tokens'; // Which field name to use (default: max_completion_tokens)
}
```
If `compat` is not set, the library falls back to URL-based detection. If `compat` is partially set, unspecified fields use the detected defaults. This is useful for:
- **LiteLLM proxies**: May not support `store` field
- **Custom inference servers**: May use non-standard field names
- **Self-hosted endpoints**: May have different feature support
### Type Safety
Models are typed by their API, ensuring type-safe options:

View file

@ -12,6 +12,7 @@ import type {
AssistantMessage,
Context,
Model,
OpenAICompat,
StopReason,
StreamFunction,
StreamOptions,
@ -267,7 +268,8 @@ function createClient(model: Model<"openai-completions">, apiKey?: string) {
}
function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
const messages = convertMessages(model, context);
const compat = getCompat(model);
const messages = convertMessages(model, context, compat);
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: model.id,
@ -276,27 +278,20 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
stream_options: { include_usage: true },
};
// Cerebras/xAI/Mistral dont like the "store" field
if (
!model.baseUrl.includes("cerebras.ai") &&
!model.baseUrl.includes("api.x.ai") &&
!model.baseUrl.includes("mistral.ai") &&
!model.baseUrl.includes("chutes.ai")
) {
if (compat.supportsStore) {
params.store = false;
}
if (options?.maxTokens) {
// Mistral/Chutes uses max_tokens instead of max_completion_tokens
if (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) {
(params as any).max_tokens = options?.maxTokens;
if (compat.maxTokensField === "max_tokens") {
(params as any).max_tokens = options.maxTokens;
} else {
params.max_completion_tokens = options?.maxTokens;
params.max_completion_tokens = options.maxTokens;
}
}
if (options?.temperature !== undefined) {
params.temperature = options?.temperature;
params.temperature = options.temperature;
}
if (context.tools) {
@ -307,27 +302,24 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
params.tool_choice = options.toolChoice;
}
// Grok models don't like reasoning_effort
if (options?.reasoningEffort && model.reasoning && !model.id.toLowerCase().includes("grok")) {
if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
params.reasoning_effort = options.reasoningEffort;
}
return params;
}
function convertMessages(model: Model<"openai-completions">, context: Context): ChatCompletionMessageParam[] {
function convertMessages(
model: Model<"openai-completions">,
context: Context,
compat: Required<OpenAICompat>,
): ChatCompletionMessageParam[] {
const params: ChatCompletionMessageParam[] = [];
const transformedMessages = transformMessages(context.messages, model);
if (context.systemPrompt) {
// Cerebras/xAi/Mistral/Chutes don't like the "developer" role
const useDeveloperRole =
model.reasoning &&
!model.baseUrl.includes("cerebras.ai") &&
!model.baseUrl.includes("api.x.ai") &&
!model.baseUrl.includes("mistral.ai") &&
!model.baseUrl.includes("chutes.ai");
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
const role = useDeveloperRole ? "developer" : "system";
params.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });
}
@ -482,3 +474,42 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto
}
}
}
/**
* Detect compatibility settings from baseUrl for known providers.
* Returns a fully resolved OpenAICompat object with all fields set.
*/
function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
const isNonStandard =
baseUrl.includes("cerebras.ai") ||
baseUrl.includes("api.x.ai") ||
baseUrl.includes("mistral.ai") ||
baseUrl.includes("chutes.ai");
const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
const isGrok = baseUrl.includes("api.x.ai");
return {
supportsStore: !isNonStandard,
supportsDeveloperRole: !isNonStandard,
supportsReasoningEffort: !isGrok,
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
};
}
/**
* Get resolved compatibility settings for a model.
* Uses explicit model.compat if provided, otherwise auto-detects from URL.
*/
function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
const detected = detectCompatFromUrl(model.baseUrl);
if (!model.compat) return detected;
return {
supportsStore: model.compat.supportsStore ?? detected.supportsStore,
supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
};
}

View file

@ -152,6 +152,21 @@ export type AssistantMessageEvent =
| { type: "done"; reason: Extract<StopReason, "stop" | "length" | "toolUse">; message: AssistantMessage }
| { type: "error"; reason: Extract<StopReason, "aborted" | "error">; error: AssistantMessage };
/**
* Compatibility settings for openai-completions API.
* Use this to override URL-based auto-detection for custom providers.
*/
export interface OpenAICompat {
/** Whether the provider supports the `store` field. Default: auto-detected from URL. */
supportsStore?: boolean;
/** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */
supportsDeveloperRole?: boolean;
/** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
supportsReasoningEffort?: boolean;
/** Which field to use for max tokens. Default: auto-detected from URL. */
maxTokensField?: "max_completion_tokens" | "max_tokens";
}
// Model interface for the unified model system
export interface Model<TApi extends Api> {
id: string;
@ -170,4 +185,6 @@ export interface Model<TApi extends Api> {
contextWindow: number;
maxTokens: number;
headers?: Record<string, string>;
/** Compatibility overrides for openai-completions API. If not set, auto-detected from baseUrl. */
compat?: TApi extends "openai-completions" ? OpenAICompat : never;
}