mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-16 09:04:26 +00:00
411 lines
12 KiB
TypeScript
411 lines
12 KiB
TypeScript
import { supportsXhigh } from "./models.js";
|
|
import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic.js";
|
|
import { type GoogleOptions, streamGoogle } from "./providers/google.js";
|
|
import {
|
|
type GoogleGeminiCliOptions,
|
|
type GoogleThinkingLevel,
|
|
streamGoogleGeminiCli,
|
|
} from "./providers/google-gemini-cli.js";
|
|
import { type GoogleVertexOptions, streamGoogleVertex } from "./providers/google-vertex.js";
|
|
import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions.js";
|
|
import { type OpenAIResponsesOptions, streamOpenAIResponses } from "./providers/openai-responses.js";
|
|
import type {
|
|
Api,
|
|
AssistantMessage,
|
|
AssistantMessageEventStream,
|
|
Context,
|
|
KnownProvider,
|
|
Model,
|
|
OptionsForApi,
|
|
ReasoningEffort,
|
|
SimpleStreamOptions,
|
|
} from "./types.js";
|
|
|
|
/**
|
|
* Get API key for provider from known environment variables, e.g. OPENAI_API_KEY.
|
|
*
|
|
* Will not return API keys for providers that require OAuth tokens.
|
|
*/
|
|
export function getEnvApiKey(provider: KnownProvider): string | undefined;
|
|
export function getEnvApiKey(provider: string): string | undefined;
|
|
export function getEnvApiKey(provider: any): string | undefined {
|
|
// Fall back to environment variables
|
|
if (provider === "github-copilot") {
|
|
return process.env.COPILOT_GITHUB_TOKEN || process.env.GH_TOKEN || process.env.GITHUB_TOKEN;
|
|
}
|
|
|
|
// ANTHROPIC_OAUTH_TOKEN takes precedence over ANTHROPIC_API_KEY
|
|
if (provider === "anthropic") {
|
|
return process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
|
|
}
|
|
|
|
// Vertex AI uses Application Default Credentials, not API keys.
|
|
// Auth is configured via `gcloud auth application-default login`.
|
|
// Don't return a dummy value - require explicit auth.json configuration.
|
|
|
|
const envMap: Record<string, string> = {
|
|
openai: "OPENAI_API_KEY",
|
|
google: "GEMINI_API_KEY",
|
|
groq: "GROQ_API_KEY",
|
|
cerebras: "CEREBRAS_API_KEY",
|
|
xai: "XAI_API_KEY",
|
|
openrouter: "OPENROUTER_API_KEY",
|
|
zai: "ZAI_API_KEY",
|
|
mistral: "MISTRAL_API_KEY",
|
|
};
|
|
|
|
const envVar = envMap[provider];
|
|
return envVar ? process.env[envVar] : undefined;
|
|
}
|
|
|
|
export function stream<TApi extends Api>(
|
|
model: Model<TApi>,
|
|
context: Context,
|
|
options?: OptionsForApi<TApi>,
|
|
): AssistantMessageEventStream {
|
|
// Vertex AI uses Application Default Credentials, not API keys
|
|
if (model.api === "google-vertex") {
|
|
return streamGoogleVertex(model as Model<"google-vertex">, context, options as GoogleVertexOptions);
|
|
}
|
|
|
|
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
|
if (!apiKey) {
|
|
throw new Error(`No API key for provider: ${model.provider}`);
|
|
}
|
|
const providerOptions = { ...options, apiKey };
|
|
|
|
const api: Api = model.api;
|
|
switch (api) {
|
|
case "anthropic-messages":
|
|
return streamAnthropic(model as Model<"anthropic-messages">, context, providerOptions);
|
|
|
|
case "openai-completions":
|
|
return streamOpenAICompletions(model as Model<"openai-completions">, context, providerOptions as any);
|
|
|
|
case "openai-responses":
|
|
return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
|
|
|
|
case "google-generative-ai":
|
|
return streamGoogle(model as Model<"google-generative-ai">, context, providerOptions);
|
|
|
|
case "google-gemini-cli":
|
|
return streamGoogleGeminiCli(
|
|
model as Model<"google-gemini-cli">,
|
|
context,
|
|
providerOptions as GoogleGeminiCliOptions,
|
|
);
|
|
|
|
default: {
|
|
// This should never be reached if all Api cases are handled
|
|
const _exhaustive: never = api;
|
|
throw new Error(`Unhandled API: ${_exhaustive}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
export async function complete<TApi extends Api>(
|
|
model: Model<TApi>,
|
|
context: Context,
|
|
options?: OptionsForApi<TApi>,
|
|
): Promise<AssistantMessage> {
|
|
const s = stream(model, context, options);
|
|
return s.result();
|
|
}
|
|
|
|
export function streamSimple<TApi extends Api>(
|
|
model: Model<TApi>,
|
|
context: Context,
|
|
options?: SimpleStreamOptions,
|
|
): AssistantMessageEventStream {
|
|
// Vertex AI uses Application Default Credentials, not API keys
|
|
if (model.api === "google-vertex") {
|
|
const providerOptions = mapOptionsForApi(model, options, undefined);
|
|
return stream(model, context, providerOptions);
|
|
}
|
|
|
|
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
|
if (!apiKey) {
|
|
throw new Error(`No API key for provider: ${model.provider}`);
|
|
}
|
|
|
|
const providerOptions = mapOptionsForApi(model, options, apiKey);
|
|
return stream(model, context, providerOptions);
|
|
}
|
|
|
|
export async function completeSimple<TApi extends Api>(
|
|
model: Model<TApi>,
|
|
context: Context,
|
|
options?: SimpleStreamOptions,
|
|
): Promise<AssistantMessage> {
|
|
const s = streamSimple(model, context, options);
|
|
return s.result();
|
|
}
|
|
|
|
function mapOptionsForApi<TApi extends Api>(
|
|
model: Model<TApi>,
|
|
options?: SimpleStreamOptions,
|
|
apiKey?: string,
|
|
): OptionsForApi<TApi> {
|
|
const base = {
|
|
temperature: options?.temperature,
|
|
maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
|
|
signal: options?.signal,
|
|
apiKey: apiKey || options?.apiKey,
|
|
};
|
|
|
|
// Helper to clamp xhigh to high for providers that don't support it
|
|
const clampReasoning = (effort: ReasoningEffort | undefined) => (effort === "xhigh" ? "high" : effort);
|
|
|
|
switch (model.api) {
|
|
case "anthropic-messages": {
|
|
// Explicitly disable thinking when reasoning is not specified
|
|
if (!options?.reasoning) {
|
|
return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
|
|
}
|
|
|
|
// Claude requires max_tokens > thinking.budget_tokens
|
|
// So we need to ensure maxTokens accounts for both thinking and output
|
|
const anthropicBudgets = {
|
|
minimal: 1024,
|
|
low: 2048,
|
|
medium: 8192,
|
|
high: 16384,
|
|
};
|
|
|
|
const minOutputTokens = 1024;
|
|
let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!];
|
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
|
|
|
// If not enough room for thinking + output, reduce thinking budget
|
|
if (maxTokens <= thinkingBudget) {
|
|
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
|
}
|
|
|
|
return {
|
|
...base,
|
|
maxTokens,
|
|
thinkingEnabled: true,
|
|
thinkingBudgetTokens: thinkingBudget,
|
|
} satisfies AnthropicOptions;
|
|
}
|
|
|
|
case "openai-completions":
|
|
return {
|
|
...base,
|
|
reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
|
|
} satisfies OpenAICompletionsOptions;
|
|
|
|
case "openai-responses":
|
|
return {
|
|
...base,
|
|
reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
|
|
} satisfies OpenAIResponsesOptions;
|
|
|
|
case "google-generative-ai": {
|
|
// Explicitly disable thinking when reasoning is not specified
|
|
// This is needed because Gemini has "dynamic thinking" enabled by default
|
|
if (!options?.reasoning) {
|
|
return { ...base, thinking: { enabled: false } } satisfies GoogleOptions;
|
|
}
|
|
|
|
const googleModel = model as Model<"google-generative-ai">;
|
|
const effort = clampReasoning(options.reasoning)!;
|
|
|
|
// Gemini 3 models use thinkingLevel exclusively instead of thinkingBudget.
|
|
// https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
|
return {
|
|
...base,
|
|
thinking: {
|
|
enabled: true,
|
|
level: getGemini3ThinkingLevel(effort, googleModel),
|
|
},
|
|
} satisfies GoogleOptions;
|
|
}
|
|
|
|
return {
|
|
...base,
|
|
thinking: {
|
|
enabled: true,
|
|
budgetTokens: getGoogleBudget(googleModel, effort),
|
|
},
|
|
} satisfies GoogleOptions;
|
|
}
|
|
|
|
case "google-gemini-cli": {
|
|
if (!options?.reasoning) {
|
|
return { ...base, thinking: { enabled: false } } satisfies GoogleGeminiCliOptions;
|
|
}
|
|
|
|
const effort = clampReasoning(options.reasoning)!;
|
|
|
|
// Gemini 3 models use thinkingLevel instead of thinkingBudget
|
|
if (model.id.includes("3-pro") || model.id.includes("3-flash")) {
|
|
return {
|
|
...base,
|
|
thinking: {
|
|
enabled: true,
|
|
level: getGeminiCliThinkingLevel(effort, model.id),
|
|
},
|
|
} satisfies GoogleGeminiCliOptions;
|
|
}
|
|
|
|
// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
|
|
// Claude requires max_tokens > thinking.budget_tokens
|
|
// So we need to ensure maxTokens accounts for both thinking and output
|
|
const budgets: Record<ClampedReasoningEffort, number> = {
|
|
minimal: 1024,
|
|
low: 2048,
|
|
medium: 8192,
|
|
high: 16384,
|
|
};
|
|
|
|
const minOutputTokens = 1024;
|
|
let thinkingBudget = budgets[effort];
|
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
|
|
|
// If not enough room for thinking + output, reduce thinking budget
|
|
if (maxTokens <= thinkingBudget) {
|
|
thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
|
|
}
|
|
|
|
return {
|
|
...base,
|
|
maxTokens,
|
|
thinking: {
|
|
enabled: true,
|
|
budgetTokens: thinkingBudget,
|
|
},
|
|
} satisfies GoogleGeminiCliOptions;
|
|
}
|
|
|
|
case "google-vertex": {
|
|
// Explicitly disable thinking when reasoning is not specified
|
|
if (!options?.reasoning) {
|
|
return { ...base, thinking: { enabled: false } } satisfies GoogleVertexOptions;
|
|
}
|
|
|
|
const vertexModel = model as Model<"google-vertex">;
|
|
const effort = clampReasoning(options.reasoning)!;
|
|
const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
|
|
|
|
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
|
return {
|
|
...base,
|
|
thinking: {
|
|
enabled: true,
|
|
level: getGemini3ThinkingLevel(effort, geminiModel),
|
|
},
|
|
} satisfies GoogleVertexOptions;
|
|
}
|
|
|
|
return {
|
|
...base,
|
|
thinking: {
|
|
enabled: true,
|
|
budgetTokens: getGoogleBudget(geminiModel, effort),
|
|
},
|
|
} satisfies GoogleVertexOptions;
|
|
}
|
|
|
|
default: {
|
|
// Exhaustiveness check
|
|
const _exhaustive: never = model.api;
|
|
throw new Error(`Unhandled API in mapOptionsForApi: ${_exhaustive}`);
|
|
}
|
|
}
|
|
}
|
|
|
|
type ClampedReasoningEffort = Exclude<ReasoningEffort, "xhigh">;
|
|
|
|
function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
|
|
// Covers gemini-3-pro, gemini-3-pro-preview, and possible other prefixed ids in the future
|
|
return model.id.includes("3-pro");
|
|
}
|
|
|
|
function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
|
|
// Covers gemini-3-flash, gemini-3-flash-preview, and possible other prefixed ids in the future
|
|
return model.id.includes("3-flash");
|
|
}
|
|
|
|
function getGemini3ThinkingLevel(
|
|
effort: ClampedReasoningEffort,
|
|
model: Model<"google-generative-ai">,
|
|
): GoogleThinkingLevel {
|
|
if (isGemini3ProModel(model)) {
|
|
// Gemini 3 Pro only supports LOW/HIGH (for now)
|
|
switch (effort) {
|
|
case "minimal":
|
|
case "low":
|
|
return "LOW";
|
|
case "medium":
|
|
case "high":
|
|
return "HIGH";
|
|
}
|
|
}
|
|
// Gemini 3 Flash supports all four levels
|
|
switch (effort) {
|
|
case "minimal":
|
|
return "MINIMAL";
|
|
case "low":
|
|
return "LOW";
|
|
case "medium":
|
|
return "MEDIUM";
|
|
case "high":
|
|
return "HIGH";
|
|
}
|
|
}
|
|
|
|
function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: string): GoogleThinkingLevel {
|
|
if (modelId.includes("3-pro")) {
|
|
// Gemini 3 Pro only supports LOW/HIGH (for now)
|
|
switch (effort) {
|
|
case "minimal":
|
|
case "low":
|
|
return "LOW";
|
|
case "medium":
|
|
case "high":
|
|
return "HIGH";
|
|
}
|
|
}
|
|
// Gemini 3 Flash supports all four levels
|
|
switch (effort) {
|
|
case "minimal":
|
|
return "MINIMAL";
|
|
case "low":
|
|
return "LOW";
|
|
case "medium":
|
|
return "MEDIUM";
|
|
case "high":
|
|
return "HIGH";
|
|
}
|
|
}
|
|
|
|
function getGoogleBudget(model: Model<"google-generative-ai">, effort: ClampedReasoningEffort): number {
|
|
// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
if (model.id.includes("2.5-pro")) {
|
|
const budgets: Record<ClampedReasoningEffort, number> = {
|
|
minimal: 128,
|
|
low: 2048,
|
|
medium: 8192,
|
|
high: 32768,
|
|
};
|
|
return budgets[effort];
|
|
}
|
|
|
|
if (model.id.includes("2.5-flash")) {
|
|
// Covers 2.5-flash-lite as well
|
|
const budgets: Record<ClampedReasoningEffort, number> = {
|
|
minimal: 128,
|
|
low: 2048,
|
|
medium: 8192,
|
|
high: 24576,
|
|
};
|
|
return budgets[effort];
|
|
}
|
|
|
|
// Unknown model - use dynamic
|
|
return -1;
|
|
}
|