mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-21 10:00:39 +00:00
parent
e34e0c503f
commit
87a1a9ded4
7 changed files with 165 additions and 24 deletions
|
|
@ -10,6 +10,8 @@
|
||||||
|
|
||||||
- Added `validateToolCall(tools, toolCall)` helper that finds the tool by name and validates arguments.
|
- Added `validateToolCall(tools, toolCall)` helper that finds the tool by name and validates arguments.
|
||||||
|
|
||||||
|
- **OpenAI compatibility overrides**: Added `compat` field to `Model` for `openai-completions` API, allowing explicit configuration of provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Falls back to URL-based detection if not set. Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/pi-mono/issues/133), thanks @fink-andreas for the initial idea and PR)
|
||||||
|
|
||||||
## [0.13.0] - 2025-12-06
|
## [0.13.0] - 2025-12-06
|
||||||
|
|
||||||
### Breaking Changes
|
### Breaking Changes
|
||||||
|
|
|
||||||
|
|
@ -611,6 +611,23 @@ const ollamaModel: Model<'openai-completions'> = {
|
||||||
maxTokens: 32000
|
maxTokens: 32000
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Example: LiteLLM proxy with explicit compat settings
|
||||||
|
const litellmModel: Model<'openai-completions'> = {
|
||||||
|
id: 'gpt-4o',
|
||||||
|
name: 'GPT-4o (via LiteLLM)',
|
||||||
|
api: 'openai-completions',
|
||||||
|
provider: 'litellm',
|
||||||
|
baseUrl: 'http://localhost:4000/v1',
|
||||||
|
reasoning: false,
|
||||||
|
input: ['text', 'image'],
|
||||||
|
cost: { input: 2.5, output: 10, cacheRead: 0, cacheWrite: 0 },
|
||||||
|
contextWindow: 128000,
|
||||||
|
maxTokens: 16384,
|
||||||
|
compat: {
|
||||||
|
supportsStore: false, // LiteLLM doesn't support the store field
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Example: Custom endpoint with headers (bypassing Cloudflare bot detection)
|
// Example: Custom endpoint with headers (bypassing Cloudflare bot detection)
|
||||||
const proxyModel: Model<'anthropic-messages'> = {
|
const proxyModel: Model<'anthropic-messages'> = {
|
||||||
id: 'claude-sonnet-4',
|
id: 'claude-sonnet-4',
|
||||||
|
|
@ -635,6 +652,25 @@ const response = await stream(ollamaModel, context, {
|
||||||
});
|
});
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### OpenAI Compatibility Settings
|
||||||
|
|
||||||
|
The `openai-completions` API is implemented by many providers with minor differences. By default, the library auto-detects compatibility settings based on `baseUrl` for known providers (Cerebras, xAI, Mistral, Chutes, etc.). For custom proxies or unknown endpoints, you can override these settings via the `compat` field:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
interface OpenAICompat {
|
||||||
|
supportsStore?: boolean; // Whether provider supports the `store` field (default: true)
|
||||||
|
supportsDeveloperRole?: boolean; // Whether provider supports `developer` role vs `system` (default: true)
|
||||||
|
supportsReasoningEffort?: boolean; // Whether provider supports `reasoning_effort` (default: true)
|
||||||
|
maxTokensField?: 'max_completion_tokens' | 'max_tokens'; // Which field name to use (default: max_completion_tokens)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
If `compat` is not set, the library falls back to URL-based detection. If `compat` is partially set, unspecified fields use the detected defaults. This is useful for:
|
||||||
|
|
||||||
|
- **LiteLLM proxies**: May not support `store` field
|
||||||
|
- **Custom inference servers**: May use non-standard field names
|
||||||
|
- **Self-hosted endpoints**: May have different feature support
|
||||||
|
|
||||||
### Type Safety
|
### Type Safety
|
||||||
|
|
||||||
Models are typed by their API, ensuring type-safe options:
|
Models are typed by their API, ensuring type-safe options:
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import type {
|
||||||
AssistantMessage,
|
AssistantMessage,
|
||||||
Context,
|
Context,
|
||||||
Model,
|
Model,
|
||||||
|
OpenAICompat,
|
||||||
StopReason,
|
StopReason,
|
||||||
StreamFunction,
|
StreamFunction,
|
||||||
StreamOptions,
|
StreamOptions,
|
||||||
|
|
@ -267,7 +268,8 @@ function createClient(model: Model<"openai-completions">, apiKey?: string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
|
function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
|
||||||
const messages = convertMessages(model, context);
|
const compat = getCompat(model);
|
||||||
|
const messages = convertMessages(model, context, compat);
|
||||||
|
|
||||||
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
|
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
|
||||||
model: model.id,
|
model: model.id,
|
||||||
|
|
@ -276,27 +278,20 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
||||||
stream_options: { include_usage: true },
|
stream_options: { include_usage: true },
|
||||||
};
|
};
|
||||||
|
|
||||||
// Cerebras/xAI/Mistral dont like the "store" field
|
if (compat.supportsStore) {
|
||||||
if (
|
|
||||||
!model.baseUrl.includes("cerebras.ai") &&
|
|
||||||
!model.baseUrl.includes("api.x.ai") &&
|
|
||||||
!model.baseUrl.includes("mistral.ai") &&
|
|
||||||
!model.baseUrl.includes("chutes.ai")
|
|
||||||
) {
|
|
||||||
params.store = false;
|
params.store = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options?.maxTokens) {
|
if (options?.maxTokens) {
|
||||||
// Mistral/Chutes uses max_tokens instead of max_completion_tokens
|
if (compat.maxTokensField === "max_tokens") {
|
||||||
if (model.baseUrl.includes("mistral.ai") || model.baseUrl.includes("chutes.ai")) {
|
(params as any).max_tokens = options.maxTokens;
|
||||||
(params as any).max_tokens = options?.maxTokens;
|
|
||||||
} else {
|
} else {
|
||||||
params.max_completion_tokens = options?.maxTokens;
|
params.max_completion_tokens = options.maxTokens;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (options?.temperature !== undefined) {
|
if (options?.temperature !== undefined) {
|
||||||
params.temperature = options?.temperature;
|
params.temperature = options.temperature;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (context.tools) {
|
if (context.tools) {
|
||||||
|
|
@ -307,27 +302,24 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
||||||
params.tool_choice = options.toolChoice;
|
params.tool_choice = options.toolChoice;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grok models don't like reasoning_effort
|
if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
|
||||||
if (options?.reasoningEffort && model.reasoning && !model.id.toLowerCase().includes("grok")) {
|
|
||||||
params.reasoning_effort = options.reasoningEffort;
|
params.reasoning_effort = options.reasoningEffort;
|
||||||
}
|
}
|
||||||
|
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
function convertMessages(model: Model<"openai-completions">, context: Context): ChatCompletionMessageParam[] {
|
function convertMessages(
|
||||||
|
model: Model<"openai-completions">,
|
||||||
|
context: Context,
|
||||||
|
compat: Required<OpenAICompat>,
|
||||||
|
): ChatCompletionMessageParam[] {
|
||||||
const params: ChatCompletionMessageParam[] = [];
|
const params: ChatCompletionMessageParam[] = [];
|
||||||
|
|
||||||
const transformedMessages = transformMessages(context.messages, model);
|
const transformedMessages = transformMessages(context.messages, model);
|
||||||
|
|
||||||
if (context.systemPrompt) {
|
if (context.systemPrompt) {
|
||||||
// Cerebras/xAi/Mistral/Chutes don't like the "developer" role
|
const useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;
|
||||||
const useDeveloperRole =
|
|
||||||
model.reasoning &&
|
|
||||||
!model.baseUrl.includes("cerebras.ai") &&
|
|
||||||
!model.baseUrl.includes("api.x.ai") &&
|
|
||||||
!model.baseUrl.includes("mistral.ai") &&
|
|
||||||
!model.baseUrl.includes("chutes.ai");
|
|
||||||
const role = useDeveloperRole ? "developer" : "system";
|
const role = useDeveloperRole ? "developer" : "system";
|
||||||
params.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });
|
params.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });
|
||||||
}
|
}
|
||||||
|
|
@ -482,3 +474,42 @@ function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): Sto
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect compatibility settings from baseUrl for known providers.
|
||||||
|
* Returns a fully resolved OpenAICompat object with all fields set.
|
||||||
|
*/
|
||||||
|
function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
|
||||||
|
const isNonStandard =
|
||||||
|
baseUrl.includes("cerebras.ai") ||
|
||||||
|
baseUrl.includes("api.x.ai") ||
|
||||||
|
baseUrl.includes("mistral.ai") ||
|
||||||
|
baseUrl.includes("chutes.ai");
|
||||||
|
|
||||||
|
const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
|
||||||
|
|
||||||
|
const isGrok = baseUrl.includes("api.x.ai");
|
||||||
|
|
||||||
|
return {
|
||||||
|
supportsStore: !isNonStandard,
|
||||||
|
supportsDeveloperRole: !isNonStandard,
|
||||||
|
supportsReasoningEffort: !isGrok,
|
||||||
|
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get resolved compatibility settings for a model.
|
||||||
|
* Uses explicit model.compat if provided, otherwise auto-detects from URL.
|
||||||
|
*/
|
||||||
|
function getCompat(model: Model<"openai-completions">): Required<OpenAICompat> {
|
||||||
|
const detected = detectCompatFromUrl(model.baseUrl);
|
||||||
|
if (!model.compat) return detected;
|
||||||
|
|
||||||
|
return {
|
||||||
|
supportsStore: model.compat.supportsStore ?? detected.supportsStore,
|
||||||
|
supportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,
|
||||||
|
supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
|
||||||
|
maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -152,6 +152,21 @@ export type AssistantMessageEvent =
|
||||||
| { type: "done"; reason: Extract<StopReason, "stop" | "length" | "toolUse">; message: AssistantMessage }
|
| { type: "done"; reason: Extract<StopReason, "stop" | "length" | "toolUse">; message: AssistantMessage }
|
||||||
| { type: "error"; reason: Extract<StopReason, "aborted" | "error">; error: AssistantMessage };
|
| { type: "error"; reason: Extract<StopReason, "aborted" | "error">; error: AssistantMessage };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compatibility settings for openai-completions API.
|
||||||
|
* Use this to override URL-based auto-detection for custom providers.
|
||||||
|
*/
|
||||||
|
export interface OpenAICompat {
|
||||||
|
/** Whether the provider supports the `store` field. Default: auto-detected from URL. */
|
||||||
|
supportsStore?: boolean;
|
||||||
|
/** Whether the provider supports the `developer` role (vs `system`). Default: auto-detected from URL. */
|
||||||
|
supportsDeveloperRole?: boolean;
|
||||||
|
/** Whether the provider supports `reasoning_effort`. Default: auto-detected from URL. */
|
||||||
|
supportsReasoningEffort?: boolean;
|
||||||
|
/** Which field to use for max tokens. Default: auto-detected from URL. */
|
||||||
|
maxTokensField?: "max_completion_tokens" | "max_tokens";
|
||||||
|
}
|
||||||
|
|
||||||
// Model interface for the unified model system
|
// Model interface for the unified model system
|
||||||
export interface Model<TApi extends Api> {
|
export interface Model<TApi extends Api> {
|
||||||
id: string;
|
id: string;
|
||||||
|
|
@ -170,4 +185,6 @@ export interface Model<TApi extends Api> {
|
||||||
contextWindow: number;
|
contextWindow: number;
|
||||||
maxTokens: number;
|
maxTokens: number;
|
||||||
headers?: Record<string, string>;
|
headers?: Record<string, string>;
|
||||||
|
/** Compatibility overrides for openai-completions API. If not set, auto-detected from baseUrl. */
|
||||||
|
compat?: TApi extends "openai-completions" ? OpenAICompat : never;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,10 @@
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
### Added
|
||||||
|
|
||||||
|
- **OpenAI compatibility overrides in models.json**: Custom models using `openai-completions` API can now specify a `compat` object to override provider quirks (`supportsStore`, `supportsDeveloperRole`, `supportsReasoningEffort`, `maxTokensField`). Useful for LiteLLM, custom proxies, and other non-standard endpoints. ([#133](https://github.com/badlogic/pi-mono/issues/133), thanks @fink-andreas for the initial idea and PR)
|
||||||
|
|
||||||
## [0.13.2] - 2025-12-07
|
## [0.13.2] - 2025-12-07
|
||||||
|
|
||||||
### Changed
|
### Changed
|
||||||
|
|
|
||||||
|
|
@ -315,6 +315,47 @@ You can add custom HTTP headers to bypass Cloudflare bot detection, add authenti
|
||||||
- **Model-level `headers`**: Additional headers for specific models (merged with provider headers)
|
- **Model-level `headers`**: Additional headers for specific models (merged with provider headers)
|
||||||
- Model headers override provider headers when keys conflict
|
- Model headers override provider headers when keys conflict
|
||||||
|
|
||||||
|
### OpenAI Compatibility Settings
|
||||||
|
|
||||||
|
The `openai-completions` API is implemented by many providers with minor differences (Ollama, vLLM, LiteLLM, llama.cpp, etc.). By default, compatibility settings are auto-detected from the `baseUrl`. For custom proxies or unknown endpoints, you can override these via the `compat` field on models:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"providers": {
|
||||||
|
"litellm": {
|
||||||
|
"baseUrl": "http://localhost:4000/v1",
|
||||||
|
"apiKey": "LITELLM_API_KEY",
|
||||||
|
"api": "openai-completions",
|
||||||
|
"models": [
|
||||||
|
{
|
||||||
|
"id": "gpt-4o",
|
||||||
|
"name": "GPT-4o (via LiteLLM)",
|
||||||
|
"reasoning": false,
|
||||||
|
"input": ["text", "image"],
|
||||||
|
"cost": {"input": 2.5, "output": 10, "cacheRead": 0, "cacheWrite": 0},
|
||||||
|
"contextWindow": 128000,
|
||||||
|
"maxTokens": 16384,
|
||||||
|
"compat": {
|
||||||
|
"supportsStore": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Available `compat` fields (all optional, auto-detected if not set):
|
||||||
|
|
||||||
|
| Field | Type | Default | Description |
|
||||||
|
|-------|------|---------|-------------|
|
||||||
|
| `supportsStore` | boolean | auto | Whether provider supports the `store` field |
|
||||||
|
| `supportsDeveloperRole` | boolean | auto | Whether provider supports `developer` role (vs `system`) |
|
||||||
|
| `supportsReasoningEffort` | boolean | auto | Whether provider supports `reasoning_effort` parameter |
|
||||||
|
| `maxTokensField` | string | auto | Use `"max_completion_tokens"` or `"max_tokens"` |
|
||||||
|
|
||||||
|
If `compat` is partially set, unspecified fields use auto-detected values.
|
||||||
|
|
||||||
### Authorization Header
|
### Authorization Header
|
||||||
|
|
||||||
Some providers require an explicit `Authorization: Bearer <token>` header. Set `authHeader: true` to automatically add this header using the resolved `apiKey`:
|
Some providers require an explicit `Authorization: Bearer <token>` header. Set `authHeader: true` to automatically add this header using the resolved `apiKey`:
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,14 @@ import { loadOAuthCredentials } from "./oauth/storage.js";
|
||||||
// Handle both default and named exports
|
// Handle both default and named exports
|
||||||
const Ajv = (AjvModule as any).default || AjvModule;
|
const Ajv = (AjvModule as any).default || AjvModule;
|
||||||
|
|
||||||
|
// Schema for OpenAI compatibility settings
|
||||||
|
const OpenAICompatSchema = Type.Object({
|
||||||
|
supportsStore: Type.Optional(Type.Boolean()),
|
||||||
|
supportsDeveloperRole: Type.Optional(Type.Boolean()),
|
||||||
|
supportsReasoningEffort: Type.Optional(Type.Boolean()),
|
||||||
|
maxTokensField: Type.Optional(Type.Union([Type.Literal("max_completion_tokens"), Type.Literal("max_tokens")])),
|
||||||
|
});
|
||||||
|
|
||||||
// Schema for custom model definition
|
// Schema for custom model definition
|
||||||
const ModelDefinitionSchema = Type.Object({
|
const ModelDefinitionSchema = Type.Object({
|
||||||
id: Type.String({ minLength: 1 }),
|
id: Type.String({ minLength: 1 }),
|
||||||
|
|
@ -32,6 +40,7 @@ const ModelDefinitionSchema = Type.Object({
|
||||||
contextWindow: Type.Number(),
|
contextWindow: Type.Number(),
|
||||||
maxTokens: Type.Number(),
|
maxTokens: Type.Number(),
|
||||||
headers: Type.Optional(Type.Record(Type.String(), Type.String())),
|
headers: Type.Optional(Type.Record(Type.String(), Type.String())),
|
||||||
|
compat: Type.Optional(OpenAICompatSchema),
|
||||||
});
|
});
|
||||||
|
|
||||||
const ProviderConfigSchema = Type.Object({
|
const ProviderConfigSchema = Type.Object({
|
||||||
|
|
@ -201,7 +210,8 @@ function parseModels(config: ModelsConfig): Model<Api>[] {
|
||||||
contextWindow: modelDef.contextWindow,
|
contextWindow: modelDef.contextWindow,
|
||||||
maxTokens: modelDef.maxTokens,
|
maxTokens: modelDef.maxTokens,
|
||||||
headers,
|
headers,
|
||||||
});
|
compat: modelDef.compat,
|
||||||
|
} as Model<Api>);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue