Massive refactor of API

- Switch to function based API
- Anthropic SDK style async generator
- Fully typed with escape hatches for custom models
This commit is contained in:
Mario Zechner 2025-09-02 23:59:36 +02:00
parent 004de3c9d0
commit 66cefb236e
29 changed files with 5835 additions and 6225 deletions

View file

@ -1,47 +1,43 @@
import { type AnthropicOptions, streamAnthropic } from "./providers/anthropic.js";
import { type GoogleOptions, streamGoogle } from "./providers/google.js";
import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions.js";
import { type OpenAIResponsesOptions, streamOpenAIResponses } from "./providers/openai-responses.js";
import type {
Api,
AssistantMessage,
AssistantMessageEvent,
Context,
GenerateFunction,
GenerateOptionsUnified,
GenerateStream,
KnownProvider,
Model,
OptionsForApi,
ReasoningEffort,
SimpleGenerateOptions,
} from "./types.js";
export class QueuedGenerateStream implements GenerateStream {
private queue: AssistantMessageEvent[] = [];
private waiting: ((value: IteratorResult<AssistantMessageEvent>) => void)[] = [];
private done = false;
private error?: Error;
private finalMessagePromise: Promise<AssistantMessage>;
private resolveFinalMessage!: (message: AssistantMessage) => void;
private rejectFinalMessage!: (error: Error) => void;
constructor() {
this.finalMessagePromise = new Promise((resolve, reject) => {
this.finalMessagePromise = new Promise((resolve) => {
this.resolveFinalMessage = resolve;
this.rejectFinalMessage = reject;
});
}
push(event: AssistantMessageEvent): void {
if (this.done) return;
// If it's the done event, resolve the final message
if (event.type === "done") {
this.done = true;
this.resolveFinalMessage(event.message);
}
// If it's an error event, reject the final message
if (event.type === "error") {
this.error = new Error(event.error);
if (!this.done) {
this.rejectFinalMessage(this.error);
}
this.done = true;
this.resolveFinalMessage(event.partial);
}
// Deliver to waiting consumer or queue it
@ -86,31 +82,14 @@ export class QueuedGenerateStream implements GenerateStream {
}
}
// API implementations registry
const apiImplementations: Map<Api | string, GenerateFunction> = new Map();
/**
* Register a custom API implementation
*/
export function registerApi(api: string, impl: GenerateFunction): void {
apiImplementations.set(api, impl);
}
// API key storage
const apiKeys: Map<string, string> = new Map();
/**
* Set an API key for a provider
*/
export function setApiKey(provider: KnownProvider, key: string): void;
export function setApiKey(provider: string, key: string): void;
export function setApiKey(provider: any, key: string): void {
apiKeys.set(provider, key);
}
/**
* Get API key for a provider
*/
export function getApiKey(provider: KnownProvider): string | undefined;
export function getApiKey(provider: string): string | undefined;
export function getApiKey(provider: any): string | undefined {
@ -133,45 +112,76 @@ export function getApiKey(provider: any): string | undefined {
return envVar ? process.env[envVar] : undefined;
}
/**
* Main generate function
*/
export function generate(model: Model, context: Context, options?: GenerateOptionsUnified): GenerateStream {
// Get implementation
const impl = apiImplementations.get(model.api);
if (!impl) {
throw new Error(`Unsupported API: ${model.api}`);
export function stream<TApi extends Api>(
model: Model<TApi>,
context: Context,
options?: OptionsForApi<TApi>,
): GenerateStream {
const apiKey = options?.apiKey || getApiKey(model.provider);
if (!apiKey) {
throw new Error(`No API key for provider: ${model.provider}`);
}
const providerOptions = { ...options, apiKey };
// Get API key from options or environment
const api: Api = model.api;
switch (api) {
case "anthropic-messages":
return streamAnthropic(model as Model<"anthropic-messages">, context, providerOptions);
case "openai-completions":
return streamOpenAICompletions(model as Model<"openai-completions">, context, providerOptions as any);
case "openai-responses":
return streamOpenAIResponses(model as Model<"openai-responses">, context, providerOptions as any);
case "google-generative-ai":
return streamGoogle(model as Model<"google-generative-ai">, context, providerOptions);
default: {
// This should never be reached if all Api cases are handled
const _exhaustive: never = api;
throw new Error(`Unhandled API: ${_exhaustive}`);
}
}
}
export async function complete<TApi extends Api>(
model: Model<TApi>,
context: Context,
options?: OptionsForApi<TApi>,
): Promise<AssistantMessage> {
const s = stream(model, context, options);
return s.finalMessage();
}
export function streamSimple<TApi extends Api>(
model: Model<TApi>,
context: Context,
options?: SimpleGenerateOptions,
): GenerateStream {
const apiKey = options?.apiKey || getApiKey(model.provider);
if (!apiKey) {
throw new Error(`No API key for provider: ${model.provider}`);
}
// Map generic options to provider-specific
const providerOptions = mapOptionsForApi(model.api, model, options, apiKey);
// Return the GenerateStream from implementation
return impl(model, context, providerOptions);
const providerOptions = mapOptionsForApi(model, options, apiKey);
return stream(model, context, providerOptions);
}
/**
* Helper to generate and get complete response (no streaming)
*/
export async function generateComplete(
model: Model,
export async function completeSimple<TApi extends Api>(
model: Model<TApi>,
context: Context,
options?: GenerateOptionsUnified,
options?: SimpleGenerateOptions,
): Promise<AssistantMessage> {
const stream = generate(model, context, options);
return stream.finalMessage();
const s = streamSimple(model, context, options);
return s.finalMessage();
}
/**
* Map generic options to provider-specific options
*/
function mapOptionsForApi(api: Api | string, model: Model, options?: GenerateOptionsUnified, apiKey?: string): any {
function mapOptionsForApi<TApi extends Api>(
model: Model<TApi>,
options?: SimpleGenerateOptions,
apiKey?: string,
): OptionsForApi<TApi> {
const base = {
temperature: options?.temperature,
maxTokens: options?.maxTokens,
@ -179,18 +189,10 @@ function mapOptionsForApi(api: Api | string, model: Model, options?: GenerateOpt
apiKey: apiKey || options?.apiKey,
};
switch (api) {
case "openai-responses":
case "openai-completions":
return {
...base,
reasoning_effort: options?.reasoning,
};
switch (model.api) {
case "anthropic-messages": {
if (!options?.reasoning) return base;
if (!options?.reasoning) return base satisfies AnthropicOptions;
// Map effort to token budget
const anthropicBudgets = {
minimal: 1024,
low: 2048,
@ -200,55 +202,60 @@ function mapOptionsForApi(api: Api | string, model: Model, options?: GenerateOpt
return {
...base,
thinking: {
enabled: true,
budgetTokens: anthropicBudgets[options.reasoning],
},
};
thinkingEnabled: true,
thinkingBudgetTokens: anthropicBudgets[options.reasoning],
} satisfies AnthropicOptions;
}
case "google-generative-ai": {
if (!options?.reasoning) return { ...base, thinking_budget: -1 };
// Model-specific mapping for Google
const googleBudget = getGoogleBudget(model, options.reasoning);
case "openai-completions":
return {
...base,
thinking_budget: googleBudget,
};
reasoningEffort: options?.reasoning,
} satisfies OpenAICompletionsOptions;
case "openai-responses":
return {
...base,
reasoningEffort: options?.reasoning,
} satisfies OpenAIResponsesOptions;
case "google-generative-ai": {
if (!options?.reasoning) return base as any;
const googleBudget = getGoogleBudget(model as Model<"google-generative-ai">, options.reasoning);
return {
...base,
thinking: {
enabled: true,
budgetTokens: googleBudget,
},
} satisfies GoogleOptions;
}
default: {
// Exhaustiveness check
const _exhaustive: never = model.api;
throw new Error(`Unhandled API in mapOptionsForApi: ${_exhaustive}`);
}
default:
return base;
}
}
/**
* Get Google thinking budget based on model and effort
*/
function getGoogleBudget(model: Model, effort: ReasoningEffort): number {
// Model-specific logic
if (model.id.includes("flash-lite")) {
const budgets = {
minimal: 512,
low: 2048,
medium: 8192,
high: 24576,
};
return budgets[effort];
}
if (model.id.includes("pro")) {
function getGoogleBudget(model: Model<"google-generative-ai">, effort: ReasoningEffort): number {
// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
if (model.id.includes("2.5-pro")) {
const budgets = {
minimal: 128,
low: 2048,
medium: 8192,
high: Math.min(25000, 32768),
high: 32768,
};
return budgets[effort];
}
if (model.id.includes("flash")) {
if (model.id.includes("2.5-flash")) {
// Covers 2.5-flash-lite as well
const budgets = {
minimal: 0, // Disable thinking
minimal: 128,
low: 2048,
medium: 8192,
high: 24576,
@ -259,10 +266,3 @@ function getGoogleBudget(model: Model, effort: ReasoningEffort): number {
// Unknown model - use dynamic
return -1;
}
// Register built-in API implementations
// Import the new function-based implementations
import { generateAnthropic } from "./providers/anthropic-generate.js";
// Register Anthropic implementation
apiImplementations.set("anthropic-messages", generateAnthropic);

View file

@ -1,37 +1,8 @@
// @mariozechner/pi-ai - Unified LLM API with automatic model discovery
// This package provides a common interface for working with multiple LLM providers
export const version = "0.5.8";
// Export generate API
export {
generate,
generateComplete,
getApiKey,
QueuedGenerateStream,
registerApi,
setApiKey,
} from "./generate.js";
// Export generated models data
export { PROVIDERS } from "./models.generated.js";
// Export model utilities
export {
calculateCost,
getModel,
type KnownProvider,
registerModel,
} from "./models.js";
// Legacy providers (to be deprecated)
export { AnthropicLLM } from "./providers/anthropic.js";
export { GoogleLLM } from "./providers/google.js";
export { OpenAICompletionsLLM } from "./providers/openai-completions.js";
export { OpenAIResponsesLLM } from "./providers/openai-responses.js";
// Export types
export type * from "./types.js";
// TODO: Remove these legacy exports once consumers are updated
export function createLLM(): never {
throw new Error("createLLM is deprecated. Use generate() with getModel() instead.");
}
export * from "./generate.js";
export * from "./models.generated.js";
export * from "./models.js";
export * from "./providers/anthropic.js";
export * from "./providers/google.js";
export * from "./providers/openai-completions.js";
export * from "./providers/openai-responses.js";
export * from "./types.js";

File diff suppressed because it is too large Load diff

View file

@ -1,44 +1,39 @@
import { PROVIDERS } from "./models.generated.js";
import type { KnownProvider, Model, Usage } from "./types.js";
import type { Api, KnownProvider, Model, Usage } from "./types.js";
// Re-export Model type
export type { KnownProvider, Model } from "./types.js";
// Dynamic model registry initialized from PROVIDERS
const modelRegistry: Map<string, Map<string, Model>> = new Map();
const modelRegistry: Map<string, Map<string, Model<Api>>> = new Map();
// Initialize registry from PROVIDERS on module load
for (const [provider, models] of Object.entries(PROVIDERS)) {
const providerModels = new Map<string, Model>();
const providerModels = new Map<string, Model<Api>>();
for (const [id, model] of Object.entries(models)) {
providerModels.set(id, model as Model);
providerModels.set(id, model as Model<Api>);
}
modelRegistry.set(provider, providerModels);
}
/**
* Get a model from the registry - typed overload for known providers
*/
export function getModel<P extends KnownProvider>(provider: P, modelId: keyof (typeof PROVIDERS)[P]): Model;
export function getModel(provider: string, modelId: string): Model | undefined;
export function getModel(provider: any, modelId: any): Model | undefined {
return modelRegistry.get(provider)?.get(modelId);
type ModelApi<
TProvider extends KnownProvider,
TModelId extends keyof (typeof PROVIDERS)[TProvider],
> = (typeof PROVIDERS)[TProvider][TModelId] extends { api: infer TApi } ? (TApi extends Api ? TApi : never) : never;
export function getModel<TProvider extends KnownProvider, TModelId extends keyof (typeof PROVIDERS)[TProvider]>(
provider: TProvider,
modelId: TModelId,
): Model<ModelApi<TProvider, TModelId>>;
export function getModel<TApi extends Api>(provider: string, modelId: string): Model<TApi> | undefined;
export function getModel<TApi extends Api>(provider: any, modelId: any): Model<TApi> | undefined {
return modelRegistry.get(provider)?.get(modelId) as Model<TApi> | undefined;
}
/**
* Register a custom model
*/
export function registerModel(model: Model): void {
export function registerModel<TApi extends Api>(model: Model<TApi>): void {
if (!modelRegistry.has(model.provider)) {
modelRegistry.set(model.provider, new Map());
}
modelRegistry.get(model.provider)!.set(model.id, model);
}
/**
* Calculate cost for token usage
*/
export function calculateCost(model: Model, usage: Usage): Usage["cost"] {
export function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage): Usage["cost"] {
usage.cost.input = (model.cost.input / 1000000) * usage.input;
usage.cost.output = (model.cost.output / 1000000) * usage.output;
usage.cost.cacheRead = (model.cost.cacheRead / 1000000) * usage.cacheRead;

View file

@ -1,425 +0,0 @@
import Anthropic from "@anthropic-ai/sdk";
import type {
ContentBlockParam,
MessageCreateParamsStreaming,
MessageParam,
Tool,
} from "@anthropic-ai/sdk/resources/messages.js";
import { QueuedGenerateStream } from "../generate.js";
import { calculateCost } from "../models.js";
import type {
Api,
AssistantMessage,
Context,
GenerateFunction,
GenerateOptions,
GenerateStream,
Message,
Model,
StopReason,
TextContent,
ThinkingContent,
ToolCall,
} from "../types.js";
import { transformMessages } from "./utils.js";
// Anthropic-specific options
export interface AnthropicOptions extends GenerateOptions {
thinking?: {
enabled: boolean;
budgetTokens?: number;
};
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
}
/**
* Generate function for Anthropic API
*/
export const generateAnthropic: GenerateFunction<AnthropicOptions> = (
model: Model,
context: Context,
options: AnthropicOptions,
): GenerateStream => {
const stream = new QueuedGenerateStream();
// Start async processing
(async () => {
const output: AssistantMessage = {
role: "assistant",
content: [],
api: "anthropic-messages" as Api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
};
try {
// Create Anthropic client
const client = createAnthropicClient(model, options.apiKey!);
// Convert messages
const messages = convertMessages(context.messages, model, "anthropic-messages");
// Build params
const params = buildAnthropicParams(model, context, options, messages, client.isOAuthToken);
// Create Anthropic stream
const anthropicStream = client.client.messages.stream(
{
...params,
stream: true,
},
{
signal: options.signal,
},
);
// Emit start event
stream.push({
type: "start",
partial: output,
});
// Process Anthropic events
let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
for await (const event of anthropicStream) {
if (event.type === "content_block_start") {
if (event.content_block.type === "text") {
currentBlock = {
type: "text",
text: "",
};
output.content.push(currentBlock);
stream.push({ type: "text_start", partial: output });
} else if (event.content_block.type === "thinking") {
currentBlock = {
type: "thinking",
thinking: "",
thinkingSignature: "",
};
output.content.push(currentBlock);
stream.push({ type: "thinking_start", partial: output });
} else if (event.content_block.type === "tool_use") {
// We wait for the full tool use to be streamed
currentBlock = {
type: "toolCall",
id: event.content_block.id,
name: event.content_block.name,
arguments: event.content_block.input as Record<string, any>,
partialJson: "",
};
}
} else if (event.type === "content_block_delta") {
if (event.delta.type === "text_delta") {
if (currentBlock && currentBlock.type === "text") {
currentBlock.text += event.delta.text;
stream.push({
type: "text_delta",
delta: event.delta.text,
partial: output,
});
}
} else if (event.delta.type === "thinking_delta") {
if (currentBlock && currentBlock.type === "thinking") {
currentBlock.thinking += event.delta.thinking;
stream.push({
type: "thinking_delta",
delta: event.delta.thinking,
partial: output,
});
}
} else if (event.delta.type === "input_json_delta") {
if (currentBlock && currentBlock.type === "toolCall") {
currentBlock.partialJson += event.delta.partial_json;
}
} else if (event.delta.type === "signature_delta") {
if (currentBlock && currentBlock.type === "thinking") {
currentBlock.thinkingSignature = currentBlock.thinkingSignature || "";
currentBlock.thinkingSignature += event.delta.signature;
}
}
} else if (event.type === "content_block_stop") {
if (currentBlock) {
if (currentBlock.type === "text") {
stream.push({ type: "text_end", content: currentBlock.text, partial: output });
} else if (currentBlock.type === "thinking") {
stream.push({ type: "thinking_end", content: currentBlock.thinking, partial: output });
} else if (currentBlock.type === "toolCall") {
const finalToolCall: ToolCall = {
type: "toolCall",
id: currentBlock.id,
name: currentBlock.name,
arguments: JSON.parse(currentBlock.partialJson),
};
output.content.push(finalToolCall);
stream.push({ type: "toolCall", toolCall: finalToolCall, partial: output });
}
currentBlock = null;
}
} else if (event.type === "message_delta") {
if (event.delta.stop_reason) {
output.stopReason = mapStopReason(event.delta.stop_reason);
}
output.usage.input += event.usage.input_tokens || 0;
output.usage.output += event.usage.output_tokens || 0;
output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
calculateCost(model, output.usage);
}
}
// Emit done event with final message
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
output.stopReason = "error";
output.error = error instanceof Error ? error.message : JSON.stringify(error);
stream.push({ type: "error", error: output.error, partial: output });
stream.end();
}
})();
return stream;
};
// Helper to create Anthropic client
interface AnthropicClientWrapper {
client: Anthropic;
isOAuthToken: boolean;
}
function createAnthropicClient(model: Model, apiKey: string): AnthropicClientWrapper {
if (apiKey.includes("sk-ant-oat")) {
const defaultHeaders = {
accept: "application/json",
"anthropic-dangerous-direct-browser-access": "true",
"anthropic-beta": "oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
};
// Clear the env var if we're in Node.js to prevent SDK from using it
if (typeof process !== "undefined" && process.env) {
process.env.ANTHROPIC_API_KEY = undefined;
}
const client = new Anthropic({
apiKey: null,
authToken: apiKey,
baseURL: model.baseUrl,
defaultHeaders,
dangerouslyAllowBrowser: true,
});
return { client, isOAuthToken: true };
} else {
const defaultHeaders = {
accept: "application/json",
"anthropic-dangerous-direct-browser-access": "true",
"anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
};
const client = new Anthropic({
apiKey,
baseURL: model.baseUrl,
dangerouslyAllowBrowser: true,
defaultHeaders,
});
return { client, isOAuthToken: false };
}
}
// Build Anthropic API params
function buildAnthropicParams(
model: Model,
context: Context,
options: AnthropicOptions,
messages: MessageParam[],
isOAuthToken: boolean,
): MessageCreateParamsStreaming {
const params: MessageCreateParamsStreaming = {
model: model.id,
messages,
max_tokens: options.maxTokens || model.maxTokens,
stream: true,
};
// For OAuth tokens, we MUST include Claude Code identity
if (isOAuthToken) {
params.system = [
{
type: "text",
text: "You are Claude Code, Anthropic's official CLI for Claude.",
cache_control: {
type: "ephemeral",
},
},
];
if (context.systemPrompt) {
params.system.push({
type: "text",
text: context.systemPrompt,
cache_control: {
type: "ephemeral",
},
});
}
} else if (context.systemPrompt) {
params.system = context.systemPrompt;
}
if (options.temperature !== undefined) {
params.temperature = options.temperature;
}
if (context.tools) {
params.tools = convertTools(context.tools);
}
// Only enable thinking if the model supports it
if (options.thinking?.enabled && model.reasoning) {
params.thinking = {
type: "enabled",
budget_tokens: options.thinking.budgetTokens || 1024,
};
}
if (options.toolChoice) {
if (typeof options.toolChoice === "string") {
params.tool_choice = { type: options.toolChoice };
} else {
params.tool_choice = options.toolChoice;
}
}
return params;
}
// Convert messages to Anthropic format
function convertMessages(messages: Message[], model: Model, api: Api): MessageParam[] {
const params: MessageParam[] = [];
// Transform messages for cross-provider compatibility
const transformedMessages = transformMessages(messages, model, api);
for (const msg of transformedMessages) {
if (msg.role === "user") {
// Handle both string and array content
if (typeof msg.content === "string") {
params.push({
role: "user",
content: msg.content,
});
} else {
// Convert array content to Anthropic format
const blocks: ContentBlockParam[] = msg.content.map((item) => {
if (item.type === "text") {
return {
type: "text",
text: item.text,
};
} else {
// Image content
return {
type: "image",
source: {
type: "base64",
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
data: item.data,
},
};
}
});
const filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
params.push({
role: "user",
content: filteredBlocks,
});
}
} else if (msg.role === "assistant") {
const blocks: ContentBlockParam[] = [];
for (const block of msg.content) {
if (block.type === "text") {
blocks.push({
type: "text",
text: block.text,
});
} else if (block.type === "thinking") {
blocks.push({
type: "thinking",
thinking: block.thinking,
signature: block.thinkingSignature || "",
});
} else if (block.type === "toolCall") {
blocks.push({
type: "tool_use",
id: block.id,
name: block.name,
input: block.arguments,
});
}
}
params.push({
role: "assistant",
content: blocks,
});
} else if (msg.role === "toolResult") {
params.push({
role: "user",
content: [
{
type: "tool_result",
tool_use_id: msg.toolCallId,
content: msg.content,
is_error: msg.isError,
},
],
});
}
}
return params;
}
// Convert tools to Anthropic format
function convertTools(tools: Context["tools"]): Tool[] {
if (!tools) return [];
return tools.map((tool) => ({
name: tool.name,
description: tool.description,
input_schema: {
type: "object" as const,
properties: tool.parameters.properties || {},
required: tool.parameters.required || [],
},
}));
}
// Map Anthropic stop reason to our StopReason type
function mapStopReason(reason: Anthropic.Messages.StopReason | null): StopReason {
switch (reason) {
case "end_turn":
return "stop";
case "max_tokens":
return "length";
case "tool_use":
return "toolUse";
case "refusal":
return "safety";
case "pause_turn": // Stop is good enough -> resubmit
return "stop";
case "stop_sequence":
return "stop"; // We don't supply stop sequences, so this should never happen
default:
return "stop";
}
}

View file

@ -3,91 +3,46 @@ import type {
ContentBlockParam,
MessageCreateParamsStreaming,
MessageParam,
Tool,
} from "@anthropic-ai/sdk/resources/messages.js";
import { QueuedGenerateStream } from "../generate.js";
import { calculateCost } from "../models.js";
import type {
Api,
AssistantMessage,
Context,
LLM,
LLMOptions,
GenerateFunction,
GenerateOptions,
GenerateStream,
Message,
Model,
StopReason,
TextContent,
ThinkingContent,
Tool,
ToolCall,
} from "../types.js";
import { transformMessages } from "./utils.js";
export interface AnthropicLLMOptions extends LLMOptions {
thinking?: {
enabled: boolean;
budgetTokens?: number;
};
export interface AnthropicOptions extends GenerateOptions {
thinkingEnabled?: boolean;
thinkingBudgetTokens?: number;
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
}
export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
private client: Anthropic;
private modelInfo: Model;
private isOAuthToken: boolean = false;
export const streamAnthropic: GenerateFunction<"anthropic-messages"> = (
model: Model<"anthropic-messages">,
context: Context,
options?: AnthropicOptions,
): GenerateStream => {
const stream = new QueuedGenerateStream();
constructor(model: Model, apiKey?: string) {
if (!apiKey) {
if (!process.env.ANTHROPIC_API_KEY) {
throw new Error(
"Anthropic API key is required. Set ANTHROPIC_API_KEY environment variable or pass it as an argument.",
);
}
apiKey = process.env.ANTHROPIC_API_KEY;
}
if (apiKey.includes("sk-ant-oat")) {
const defaultHeaders = {
accept: "application/json",
"anthropic-dangerous-direct-browser-access": "true",
"anthropic-beta": "oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
};
// Clear the env var if we're in Node.js to prevent SDK from using it
if (typeof process !== "undefined" && process.env) {
process.env.ANTHROPIC_API_KEY = undefined;
}
this.client = new Anthropic({
apiKey: null,
authToken: apiKey,
baseURL: model.baseUrl,
defaultHeaders,
dangerouslyAllowBrowser: true,
});
this.isOAuthToken = true;
} else {
const defaultHeaders = {
accept: "application/json",
"anthropic-dangerous-direct-browser-access": "true",
"anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
};
this.client = new Anthropic({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true, defaultHeaders });
this.isOAuthToken = false;
}
this.modelInfo = model;
}
getModel(): Model {
return this.modelInfo;
}
getApi(): string {
return "anthropic-messages";
}
async generate(context: Context, options?: AnthropicLLMOptions): Promise<AssistantMessage> {
(async () => {
const output: AssistantMessage = {
role: "assistant",
content: [],
api: this.getApi(),
provider: this.modelInfo.provider,
model: this.modelInfo.id,
api: "anthropic-messages" as Api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
@ -99,77 +54,14 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
};
try {
const messages = this.convertMessages(context.messages);
const params: MessageCreateParamsStreaming = {
model: this.modelInfo.id,
messages,
max_tokens: options?.maxTokens || 4096,
stream: true,
};
// For OAuth tokens, we MUST include Claude Code identity
if (this.isOAuthToken) {
params.system = [
{
type: "text",
text: "You are Claude Code, Anthropic's official CLI for Claude.",
cache_control: {
type: "ephemeral",
},
},
];
if (context.systemPrompt) {
params.system.push({
type: "text",
text: context.systemPrompt,
cache_control: {
type: "ephemeral",
},
});
}
} else if (context.systemPrompt) {
params.system = context.systemPrompt;
}
if (options?.temperature !== undefined) {
params.temperature = options?.temperature;
}
if (context.tools) {
params.tools = this.convertTools(context.tools);
}
// Only enable thinking if the model supports it
if (options?.thinking?.enabled && this.modelInfo.reasoning) {
params.thinking = {
type: "enabled",
budget_tokens: options.thinking.budgetTokens || 1024,
};
}
if (options?.toolChoice) {
if (typeof options.toolChoice === "string") {
params.tool_choice = { type: options.toolChoice };
} else {
params.tool_choice = options.toolChoice;
}
}
const stream = this.client.messages.stream(
{
...params,
stream: true,
},
{
signal: options?.signal,
},
);
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
const { client, isOAuthToken } = createClient(model, options?.apiKey!);
const params = buildParams(model, context, isOAuthToken, options);
const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
stream.push({ type: "start", partial: output });
let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
for await (const event of stream) {
for await (const event of anthropicStream) {
if (event.type === "content_block_start") {
if (event.content_block.type === "text") {
currentBlock = {
@ -177,7 +69,7 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
text: "",
};
output.content.push(currentBlock);
options?.onEvent?.({ type: "text_start" });
stream.push({ type: "text_start", partial: output });
} else if (event.content_block.type === "thinking") {
currentBlock = {
type: "thinking",
@ -185,9 +77,9 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
thinkingSignature: "",
};
output.content.push(currentBlock);
options?.onEvent?.({ type: "thinking_start" });
stream.push({ type: "thinking_start", partial: output });
} else if (event.content_block.type === "tool_use") {
// We wait for the full tool use to be streamed to send the event
// We wait for the full tool use to be streamed
currentBlock = {
type: "toolCall",
id: event.content_block.id,
@ -200,15 +92,19 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
if (event.delta.type === "text_delta") {
if (currentBlock && currentBlock.type === "text") {
currentBlock.text += event.delta.text;
options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: event.delta.text });
stream.push({
type: "text_delta",
delta: event.delta.text,
partial: output,
});
}
} else if (event.delta.type === "thinking_delta") {
if (currentBlock && currentBlock.type === "thinking") {
currentBlock.thinking += event.delta.thinking;
options?.onEvent?.({
stream.push({
type: "thinking_delta",
content: currentBlock.thinking,
delta: event.delta.thinking,
partial: output,
});
}
} else if (event.delta.type === "input_json_delta") {
@ -224,9 +120,17 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
} else if (event.type === "content_block_stop") {
if (currentBlock) {
if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
stream.push({
type: "text_end",
content: currentBlock.text,
partial: output,
});
} else if (currentBlock.type === "thinking") {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
stream.push({
type: "thinking_end",
content: currentBlock.thinking,
partial: output,
});
} else if (currentBlock.type === "toolCall") {
const finalToolCall: ToolCall = {
type: "toolCall",
@ -235,150 +139,274 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
arguments: JSON.parse(currentBlock.partialJson),
};
output.content.push(finalToolCall);
options?.onEvent?.({ type: "toolCall", toolCall: finalToolCall });
stream.push({
type: "toolCall",
toolCall: finalToolCall,
partial: output,
});
}
currentBlock = null;
}
} else if (event.type === "message_delta") {
if (event.delta.stop_reason) {
output.stopReason = this.mapStopReason(event.delta.stop_reason);
output.stopReason = mapStopReason(event.delta.stop_reason);
}
output.usage.input += event.usage.input_tokens || 0;
output.usage.output += event.usage.output_tokens || 0;
output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
calculateCost(this.modelInfo, output.usage);
calculateCost(model, output.usage);
}
}
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
return output;
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
output.stopReason = "error";
output.error = error instanceof Error ? error.message : JSON.stringify(error);
options?.onEvent?.({ type: "error", error: output.error });
return output;
stream.push({ type: "error", error: output.error, partial: output });
stream.end();
}
})();
return stream;
};
function createClient(
model: Model<"anthropic-messages">,
apiKey: string,
): { client: Anthropic; isOAuthToken: boolean } {
if (apiKey.includes("sk-ant-oat")) {
const defaultHeaders = {
accept: "application/json",
"anthropic-dangerous-direct-browser-access": "true",
"anthropic-beta": "oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
};
// Clear the env var if we're in Node.js to prevent SDK from using it
if (typeof process !== "undefined" && process.env) {
process.env.ANTHROPIC_API_KEY = undefined;
}
const client = new Anthropic({
apiKey: null,
authToken: apiKey,
baseURL: model.baseUrl,
defaultHeaders,
dangerouslyAllowBrowser: true,
});
return { client, isOAuthToken: true };
} else {
const defaultHeaders = {
accept: "application/json",
"anthropic-dangerous-direct-browser-access": "true",
"anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
};
const client = new Anthropic({
apiKey,
baseURL: model.baseUrl,
dangerouslyAllowBrowser: true,
defaultHeaders,
});
return { client, isOAuthToken: false };
}
}
function buildParams(
model: Model<"anthropic-messages">,
context: Context,
isOAuthToken: boolean,
options?: AnthropicOptions,
): MessageCreateParamsStreaming {
const params: MessageCreateParamsStreaming = {
model: model.id,
messages: convertMessages(context.messages, model),
max_tokens: options?.maxTokens || model.maxTokens,
stream: true,
};
// For OAuth tokens, we MUST include Claude Code identity
if (isOAuthToken) {
params.system = [
{
type: "text",
text: "You are Claude Code, Anthropic's official CLI for Claude.",
cache_control: {
type: "ephemeral",
},
},
];
if (context.systemPrompt) {
params.system.push({
type: "text",
text: context.systemPrompt,
cache_control: {
type: "ephemeral",
},
});
}
} else if (context.systemPrompt) {
params.system = context.systemPrompt;
}
if (options?.temperature !== undefined) {
params.temperature = options.temperature;
}
if (context.tools) {
params.tools = convertTools(context.tools);
}
if (options?.thinkingEnabled && model.reasoning) {
params.thinking = {
type: "enabled",
budget_tokens: options.thinkingBudgetTokens || 1024,
};
}
if (options?.toolChoice) {
if (typeof options.toolChoice === "string") {
params.tool_choice = { type: options.toolChoice };
} else {
params.tool_choice = options.toolChoice;
}
}
private convertMessages(messages: Message[]): MessageParam[] {
const params: MessageParam[] = [];
return params;
}
// Transform messages for cross-provider compatibility
const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
function convertMessages(messages: Message[], model: Model<"anthropic-messages">): MessageParam[] {
const params: MessageParam[] = [];
for (const msg of transformedMessages) {
if (msg.role === "user") {
// Handle both string and array content
if (typeof msg.content === "string") {
// Transform messages for cross-provider compatibility
const transformedMessages = transformMessages(messages, model);
for (const msg of transformedMessages) {
if (msg.role === "user") {
if (typeof msg.content === "string") {
if (msg.content.trim().length > 0) {
params.push({
role: "user",
content: msg.content,
});
} else {
// Convert array content to Anthropic format
const blocks: ContentBlockParam[] = msg.content.map((item) => {
if (item.type === "text") {
return {
type: "text",
text: item.text,
};
} else {
// Image content
return {
type: "image",
source: {
type: "base64",
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
data: item.data,
},
};
}
});
const filteredBlocks = !this.modelInfo?.input.includes("image")
? blocks.filter((b) => b.type !== "image")
: blocks;
params.push({
role: "user",
content: filteredBlocks,
});
}
} else if (msg.role === "assistant") {
const blocks: ContentBlockParam[] = [];
for (const block of msg.content) {
if (block.type === "text") {
blocks.push({
} else {
const blocks: ContentBlockParam[] = msg.content.map((item) => {
if (item.type === "text") {
return {
type: "text",
text: block.text,
});
} else if (block.type === "thinking") {
blocks.push({
type: "thinking",
thinking: block.thinking,
signature: block.thinkingSignature || "",
});
} else if (block.type === "toolCall") {
blocks.push({
type: "tool_use",
id: block.id,
name: block.name,
input: block.arguments,
});
text: item.text,
};
} else {
return {
type: "image",
source: {
type: "base64",
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
data: item.data,
},
};
}
}
params.push({
role: "assistant",
content: blocks,
});
} else if (msg.role === "toolResult") {
let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
filteredBlocks = filteredBlocks.filter((b) => {
if (b.type === "text") {
return b.text.trim().length > 0;
}
return true;
});
if (filteredBlocks.length === 0) continue;
params.push({
role: "user",
content: [
{
type: "tool_result",
tool_use_id: msg.toolCallId,
content: msg.content,
is_error: msg.isError,
},
],
content: filteredBlocks,
});
}
} else if (msg.role === "assistant") {
const blocks: ContentBlockParam[] = [];
for (const block of msg.content) {
if (block.type === "text") {
if (block.text.trim().length === 0) continue;
blocks.push({
type: "text",
text: block.text,
});
} else if (block.type === "thinking") {
if (block.thinking.trim().length === 0) continue;
blocks.push({
type: "thinking",
thinking: block.thinking,
signature: block.thinkingSignature || "",
});
} else if (block.type === "toolCall") {
blocks.push({
type: "tool_use",
id: block.id,
name: block.name,
input: block.arguments,
});
}
}
if (blocks.length === 0) continue;
params.push({
role: "assistant",
content: blocks,
});
} else if (msg.role === "toolResult") {
params.push({
role: "user",
content: [
{
type: "tool_result",
tool_use_id: msg.toolCallId,
content: msg.content,
is_error: msg.isError,
},
],
});
}
return params;
}
return params;
}
private convertTools(tools: Context["tools"]): Tool[] {
if (!tools) return [];
function convertTools(tools: Tool[]): Anthropic.Messages.Tool[] {
if (!tools) return [];
return tools.map((tool) => ({
name: tool.name,
description: tool.description,
input_schema: {
type: "object" as const,
properties: tool.parameters.properties || {},
required: tool.parameters.required || [],
},
}));
}
return tools.map((tool) => ({
name: tool.name,
description: tool.description,
input_schema: {
type: "object" as const,
properties: tool.parameters.properties || {},
required: tool.parameters.required || [],
},
}));
}
private mapStopReason(reason: Anthropic.Messages.StopReason | null): StopReason {
switch (reason) {
case "end_turn":
return "stop";
case "max_tokens":
return "length";
case "tool_use":
return "toolUse";
case "refusal":
return "safety";
case "pause_turn": // Stop is good enough -> resubmit
return "stop";
case "stop_sequence":
return "stop"; // We don't supply stop sequences, so this should never happen
default:
return "stop";
function mapStopReason(reason: Anthropic.Messages.StopReason): StopReason {
switch (reason) {
case "end_turn":
return "stop";
case "max_tokens":
return "length";
case "tool_use":
return "toolUse";
case "refusal":
return "safety";
case "pause_turn": // Stop is good enough -> resubmit
return "stop";
case "stop_sequence":
return "stop"; // We don't supply stop sequences, so this should never happen
default: {
const _exhaustive: never = reason;
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
}
}
}

View file

@ -1,19 +1,21 @@
import {
type Content,
type FinishReason,
FinishReason,
FunctionCallingConfigMode,
type GenerateContentConfig,
type GenerateContentParameters,
GoogleGenAI,
type Part,
} from "@google/genai";
import { QueuedGenerateStream } from "../generate.js";
import { calculateCost } from "../models.js";
import type {
Api,
AssistantMessage,
Context,
LLM,
LLMOptions,
Message,
GenerateFunction,
GenerateOptions,
GenerateStream,
Model,
StopReason,
TextContent,
@ -23,7 +25,7 @@ import type {
} from "../types.js";
import { transformMessages } from "./utils.js";
export interface GoogleLLMOptions extends LLMOptions {
export interface GoogleOptions extends GenerateOptions {
toolChoice?: "auto" | "none" | "any";
thinking?: {
enabled: boolean;
@ -31,38 +33,20 @@ export interface GoogleLLMOptions extends LLMOptions {
};
}
export class GoogleLLM implements LLM<GoogleLLMOptions> {
private client: GoogleGenAI;
private modelInfo: Model;
export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
model: Model<"google-generative-ai">,
context: Context,
options?: GoogleOptions,
): GenerateStream => {
const stream = new QueuedGenerateStream();
constructor(model: Model, apiKey?: string) {
if (!apiKey) {
if (!process.env.GEMINI_API_KEY) {
throw new Error(
"Gemini API key is required. Set GEMINI_API_KEY environment variable or pass it as an argument.",
);
}
apiKey = process.env.GEMINI_API_KEY;
}
this.client = new GoogleGenAI({ apiKey });
this.modelInfo = model;
}
getModel(): Model {
return this.modelInfo;
}
getApi(): string {
return "google-generative-ai";
}
async generate(context: Context, options?: GoogleLLMOptions): Promise<AssistantMessage> {
(async () => {
const output: AssistantMessage = {
role: "assistant",
content: [],
api: this.getApi(),
provider: this.modelInfo.provider,
model: this.modelInfo.id,
api: "google-generative-ai" as Api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
@ -72,70 +56,20 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
},
stopReason: "stop",
};
try {
const contents = this.convertMessages(context.messages);
const client = createClient(options?.apiKey);
const params = buildParams(model, context, options);
const googleStream = await client.models.generateContentStream(params);
// Build generation config
const generationConfig: GenerateContentConfig = {};
if (options?.temperature !== undefined) {
generationConfig.temperature = options.temperature;
}
if (options?.maxTokens !== undefined) {
generationConfig.maxOutputTokens = options.maxTokens;
}
// Build the config object
const config: GenerateContentConfig = {
...(Object.keys(generationConfig).length > 0 && generationConfig),
...(context.systemPrompt && { systemInstruction: context.systemPrompt }),
...(context.tools && { tools: this.convertTools(context.tools) }),
};
// Add tool config if needed
if (context.tools && options?.toolChoice) {
config.toolConfig = {
functionCallingConfig: {
mode: this.mapToolChoice(options.toolChoice),
},
};
}
// Add thinking config if enabled and model supports it
if (options?.thinking?.enabled && this.modelInfo.reasoning) {
config.thinkingConfig = {
includeThoughts: true,
...(options.thinking.budgetTokens !== undefined && { thinkingBudget: options.thinking.budgetTokens }),
};
}
// Abort signal
if (options?.signal) {
if (options.signal.aborted) {
throw new Error("Request aborted");
}
config.abortSignal = options.signal;
}
// Build the request parameters
const params: GenerateContentParameters = {
model: this.modelInfo.id,
contents,
config,
};
const stream = await this.client.models.generateContentStream(params);
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
stream.push({ type: "start", partial: output });
let currentBlock: TextContent | ThinkingContent | null = null;
for await (const chunk of stream) {
// Extract parts from the chunk
for await (const chunk of googleStream) {
const candidate = chunk.candidates?.[0];
if (candidate?.content?.parts) {
for (const part of candidate.content.parts) {
if (part.text !== undefined) {
const isThinking = part.thought === true;
// Check if we need to switch blocks
if (
!currentBlock ||
(isThinking && currentBlock.type !== "thinking") ||
@ -143,50 +77,60 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
) {
if (currentBlock) {
if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
stream.push({
type: "text_end",
content: currentBlock.text,
partial: output,
});
} else {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
stream.push({
type: "thinking_end",
content: currentBlock.thinking,
partial: output,
});
}
}
// Start new block
if (isThinking) {
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
options?.onEvent?.({ type: "thinking_start" });
stream.push({ type: "thinking_start", partial: output });
} else {
currentBlock = { type: "text", text: "" };
options?.onEvent?.({ type: "text_start" });
stream.push({ type: "text_start", partial: output });
}
output.content.push(currentBlock);
}
// Append content to current block
if (currentBlock.type === "thinking") {
currentBlock.thinking += part.text;
currentBlock.thinkingSignature = part.thoughtSignature;
options?.onEvent?.({
stream.push({
type: "thinking_delta",
content: currentBlock.thinking,
delta: part.text,
partial: output,
});
} else {
currentBlock.text += part.text;
options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: part.text });
stream.push({ type: "text_delta", delta: part.text, partial: output });
}
}
// Handle function calls
if (part.functionCall) {
if (currentBlock) {
if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
stream.push({
type: "text_end",
content: currentBlock.text,
partial: output,
});
} else {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
stream.push({
type: "thinking_end",
content: currentBlock.thinking,
partial: output,
});
}
currentBlock = null;
}
// Add tool call
const toolCallId = part.functionCall.id || `${part.functionCall.name}_${Date.now()}`;
const toolCall: ToolCall = {
type: "toolCall",
@ -195,21 +139,18 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
arguments: part.functionCall.args as Record<string, any>,
};
output.content.push(toolCall);
options?.onEvent?.({ type: "toolCall", toolCall });
stream.push({ type: "toolCall", toolCall, partial: output });
}
}
}
// Map finish reason
if (candidate?.finishReason) {
output.stopReason = this.mapStopReason(candidate.finishReason);
// Check if we have tool calls in blocks
output.stopReason = mapStopReason(candidate.finishReason);
if (output.content.some((b) => b.type === "toolCall")) {
output.stopReason = "toolUse";
}
}
// Capture usage metadata if available
if (chunk.usageMetadata) {
output.usage = {
input: chunk.usageMetadata.promptTokenCount || 0,
@ -225,166 +166,223 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
total: 0,
},
};
calculateCost(this.modelInfo, output.usage);
calculateCost(model, output.usage);
}
}
// Finalize last block
if (currentBlock) {
if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
stream.push({ type: "text_end", content: currentBlock.text, partial: output });
} else {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
stream.push({ type: "thinking_end", content: currentBlock.thinking, partial: output });
}
}
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
return output;
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
output.stopReason = "error";
output.error = error instanceof Error ? error.message : JSON.stringify(error);
options?.onEvent?.({ type: "error", error: output.error });
return output;
stream.push({ type: "error", error: output.error, partial: output });
stream.end();
}
})();
return stream;
};
function createClient(apiKey?: string): GoogleGenAI {
if (!apiKey) {
if (!process.env.GEMINI_API_KEY) {
throw new Error(
"Gemini API key is required. Set GEMINI_API_KEY environment variable or pass it as an argument.",
);
}
apiKey = process.env.GEMINI_API_KEY;
}
return new GoogleGenAI({ apiKey });
}
function buildParams(
model: Model<"google-generative-ai">,
context: Context,
options: GoogleOptions = {},
): GenerateContentParameters {
const contents = convertMessages(model, context);
const generationConfig: GenerateContentConfig = {};
if (options.temperature !== undefined) {
generationConfig.temperature = options.temperature;
}
if (options.maxTokens !== undefined) {
generationConfig.maxOutputTokens = options.maxTokens;
}
private convertMessages(messages: Message[]): Content[] {
const contents: Content[] = [];
const config: GenerateContentConfig = {
...(Object.keys(generationConfig).length > 0 && generationConfig),
...(context.systemPrompt && { systemInstruction: context.systemPrompt }),
...(context.tools && { tools: convertTools(context.tools) }),
};
// Transform messages for cross-provider compatibility
const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
if (context.tools && options.toolChoice) {
config.toolConfig = {
functionCallingConfig: {
mode: mapToolChoice(options.toolChoice),
},
};
}
for (const msg of transformedMessages) {
if (msg.role === "user") {
// Handle both string and array content
if (typeof msg.content === "string") {
contents.push({
role: "user",
parts: [{ text: msg.content }],
});
} else {
// Convert array content to Google format
const parts: Part[] = msg.content.map((item) => {
if (item.type === "text") {
return { text: item.text };
} else {
// Image content - Google uses inlineData
return {
inlineData: {
mimeType: item.mimeType,
data: item.data,
},
};
}
});
const filteredParts = !this.modelInfo?.input.includes("image")
? parts.filter((p) => p.text !== undefined)
: parts;
contents.push({
role: "user",
parts: filteredParts,
});
}
} else if (msg.role === "assistant") {
const parts: Part[] = [];
if (options.thinking?.enabled && model.reasoning) {
config.thinkingConfig = {
includeThoughts: true,
...(options.thinking.budgetTokens !== undefined && { thinkingBudget: options.thinking.budgetTokens }),
};
}
// Process content blocks
for (const block of msg.content) {
if (block.type === "text") {
parts.push({ text: block.text });
} else if (block.type === "thinking") {
const thinkingPart: Part = {
thought: true,
thoughtSignature: block.thinkingSignature,
text: block.thinking,
};
parts.push(thinkingPart);
} else if (block.type === "toolCall") {
parts.push({
functionCall: {
id: block.id,
name: block.name,
args: block.arguments,
},
});
}
}
if (options.signal) {
if (options.signal.aborted) {
throw new Error("Request aborted");
}
config.abortSignal = options.signal;
}
if (parts.length > 0) {
contents.push({
role: "model",
parts,
});
}
} else if (msg.role === "toolResult") {
const params: GenerateContentParameters = {
model: model.id,
contents,
config,
};
return params;
}
function convertMessages(model: Model<"google-generative-ai">, context: Context): Content[] {
const contents: Content[] = [];
const transformedMessages = transformMessages(context.messages, model);
for (const msg of transformedMessages) {
if (msg.role === "user") {
if (typeof msg.content === "string") {
contents.push({
role: "user",
parts: [
{
functionResponse: {
id: msg.toolCallId,
name: msg.toolName,
response: {
result: msg.content,
isError: msg.isError,
},
parts: [{ text: msg.content }],
});
} else {
const parts: Part[] = msg.content.map((item) => {
if (item.type === "text") {
return { text: item.text };
} else {
return {
inlineData: {
mimeType: item.mimeType,
data: item.data,
},
},
],
};
}
});
const filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts;
if (filteredParts.length === 0) continue;
contents.push({
role: "user",
parts: filteredParts,
});
}
}
} else if (msg.role === "assistant") {
const parts: Part[] = [];
return contents;
}
for (const block of msg.content) {
if (block.type === "text") {
parts.push({ text: block.text });
} else if (block.type === "thinking") {
const thinkingPart: Part = {
thought: true,
thoughtSignature: block.thinkingSignature,
text: block.thinking,
};
parts.push(thinkingPart);
} else if (block.type === "toolCall") {
parts.push({
functionCall: {
id: block.id,
name: block.name,
args: block.arguments,
},
});
}
}
private convertTools(tools: Tool[]): any[] {
return [
{
functionDeclarations: tools.map((tool) => ({
name: tool.name,
description: tool.description,
parameters: tool.parameters,
})),
},
];
}
private mapToolChoice(choice: string): FunctionCallingConfigMode {
switch (choice) {
case "auto":
return FunctionCallingConfigMode.AUTO;
case "none":
return FunctionCallingConfigMode.NONE;
case "any":
return FunctionCallingConfigMode.ANY;
default:
return FunctionCallingConfigMode.AUTO;
if (parts.length === 0) continue;
contents.push({
role: "model",
parts,
});
} else if (msg.role === "toolResult") {
contents.push({
role: "user",
parts: [
{
functionResponse: {
id: msg.toolCallId,
name: msg.toolName,
response: {
result: msg.content,
isError: msg.isError,
},
},
},
],
});
}
}
private mapStopReason(reason: FinishReason): StopReason {
switch (reason) {
case "STOP":
return "stop";
case "MAX_TOKENS":
return "length";
case "BLOCKLIST":
case "PROHIBITED_CONTENT":
case "SPII":
case "SAFETY":
case "IMAGE_SAFETY":
return "safety";
case "RECITATION":
return "safety";
case "FINISH_REASON_UNSPECIFIED":
case "OTHER":
case "LANGUAGE":
case "MALFORMED_FUNCTION_CALL":
case "UNEXPECTED_TOOL_CALL":
return "error";
default:
return "stop";
return contents;
}
function convertTools(tools: Tool[]): any[] {
return [
{
functionDeclarations: tools.map((tool) => ({
name: tool.name,
description: tool.description,
parameters: tool.parameters,
})),
},
];
}
function mapToolChoice(choice: string): FunctionCallingConfigMode {
switch (choice) {
case "auto":
return FunctionCallingConfigMode.AUTO;
case "none":
return FunctionCallingConfigMode.NONE;
case "any":
return FunctionCallingConfigMode.ANY;
default:
return FunctionCallingConfigMode.AUTO;
}
}
function mapStopReason(reason: FinishReason): StopReason {
switch (reason) {
case FinishReason.STOP:
return "stop";
case FinishReason.MAX_TOKENS:
return "length";
case FinishReason.BLOCKLIST:
case FinishReason.PROHIBITED_CONTENT:
case FinishReason.SPII:
case FinishReason.SAFETY:
case FinishReason.IMAGE_SAFETY:
case FinishReason.RECITATION:
return "safety";
case FinishReason.FINISH_REASON_UNSPECIFIED:
case FinishReason.OTHER:
case FinishReason.LANGUAGE:
case FinishReason.MALFORMED_FUNCTION_CALL:
case FinishReason.UNEXPECTED_TOOL_CALL:
return "error";
default: {
const _exhaustive: never = reason;
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
}
}
}

View file

@ -1,18 +1,20 @@
import OpenAI from "openai";
import type {
ChatCompletionAssistantMessageParam,
ChatCompletionChunk,
ChatCompletionContentPart,
ChatCompletionContentPartImage,
ChatCompletionContentPartText,
ChatCompletionMessageParam,
} from "openai/resources/chat/completions.js";
import { QueuedGenerateStream } from "../generate.js";
import { calculateCost } from "../models.js";
import type {
AssistantMessage,
Context,
LLM,
LLMOptions,
Message,
GenerateFunction,
GenerateOptions,
GenerateStream,
Model,
StopReason,
TextContent,
@ -22,43 +24,25 @@ import type {
} from "../types.js";
import { transformMessages } from "./utils.js";
export interface OpenAICompletionsLLMOptions extends LLMOptions {
export interface OpenAICompletionsOptions extends GenerateOptions {
toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
reasoningEffort?: "low" | "medium" | "high";
reasoningEffort?: "minimal" | "low" | "medium" | "high";
}
export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
private client: OpenAI;
private modelInfo: Model;
export const streamOpenAICompletions: GenerateFunction<"openai-completions"> = (
model: Model<"openai-completions">,
context: Context,
options?: OpenAICompletionsOptions,
): GenerateStream => {
const stream = new QueuedGenerateStream();
constructor(model: Model, apiKey?: string) {
if (!apiKey) {
if (!process.env.OPENAI_API_KEY) {
throw new Error(
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
);
}
apiKey = process.env.OPENAI_API_KEY;
}
this.client = new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
this.modelInfo = model;
}
getModel(): Model {
return this.modelInfo;
}
getApi(): string {
return "openai-completions";
}
async generate(request: Context, options?: OpenAICompletionsLLMOptions): Promise<AssistantMessage> {
(async () => {
const output: AssistantMessage = {
role: "assistant",
content: [],
api: this.getApi(),
provider: this.modelInfo.provider,
model: this.modelInfo.id,
api: model.api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
@ -70,52 +54,13 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
};
try {
const messages = this.convertMessages(request.messages, request.systemPrompt);
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: this.modelInfo.id,
messages,
stream: true,
stream_options: { include_usage: true },
};
// Cerebras/xAI dont like the "store" field
if (!this.modelInfo.baseUrl?.includes("cerebras.ai") && !this.modelInfo.baseUrl?.includes("api.x.ai")) {
params.store = false;
}
if (options?.maxTokens) {
params.max_completion_tokens = options?.maxTokens;
}
if (options?.temperature !== undefined) {
params.temperature = options?.temperature;
}
if (request.tools) {
params.tools = this.convertTools(request.tools);
}
if (options?.toolChoice) {
params.tool_choice = options.toolChoice;
}
if (
options?.reasoningEffort &&
this.modelInfo.reasoning &&
!this.modelInfo.id.toLowerCase().includes("grok")
) {
params.reasoning_effort = options.reasoningEffort;
}
const stream = await this.client.chat.completions.create(params, {
signal: options?.signal,
});
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
const client = createClient(model, options?.apiKey);
const params = buildParams(model, context, options);
const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
stream.push({ type: "start", partial: output });
let currentBlock: TextContent | ThinkingContent | (ToolCall & { partialArgs?: string }) | null = null;
for await (const chunk of stream) {
for await (const chunk of openaiStream) {
if (chunk.usage) {
output.usage = {
input: chunk.usage.prompt_tokens || 0,
@ -132,137 +77,170 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
total: 0,
},
};
calculateCost(this.modelInfo, output.usage);
calculateCost(model, output.usage);
}
const choice = chunk.choices[0];
if (!choice) continue;
// Capture finish reason
if (choice.finish_reason) {
output.stopReason = this.mapStopReason(choice.finish_reason);
output.stopReason = mapStopReason(choice.finish_reason);
}
if (choice.delta) {
// Handle text content
if (
choice.delta.content !== null &&
choice.delta.content !== undefined &&
choice.delta.content.length > 0
) {
// Check if we need to switch to text block
if (!currentBlock || currentBlock.type !== "text") {
// Save current block if exists
if (currentBlock) {
if (currentBlock.type === "thinking") {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
stream.push({
type: "thinking_end",
content: currentBlock.thinking,
partial: output,
});
} else if (currentBlock.type === "toolCall") {
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
delete currentBlock.partialArgs;
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
stream.push({
type: "toolCall",
toolCall: currentBlock as ToolCall,
partial: output,
});
}
}
// Start new text block
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
options?.onEvent?.({ type: "text_start" });
stream.push({ type: "text_start", partial: output });
}
// Append to text block
if (currentBlock.type === "text") {
currentBlock.text += choice.delta.content;
options?.onEvent?.({
stream.push({
type: "text_delta",
content: currentBlock.text,
delta: choice.delta.content,
partial: output,
});
}
}
// Handle reasoning_content field
// Some endpoints return reasoning in reasoning_content (llama.cpp)
if (
(choice.delta as any).reasoning_content !== null &&
(choice.delta as any).reasoning_content !== undefined &&
(choice.delta as any).reasoning_content.length > 0
) {
// Check if we need to switch to thinking block
if (!currentBlock || currentBlock.type !== "thinking") {
// Save current block if exists
if (currentBlock) {
if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
stream.push({
type: "text_end",
content: currentBlock.text,
partial: output,
});
} else if (currentBlock.type === "toolCall") {
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
delete currentBlock.partialArgs;
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
stream.push({
type: "toolCall",
toolCall: currentBlock as ToolCall,
partial: output,
});
}
}
// Start new thinking block
currentBlock = { type: "thinking", thinking: "", thinkingSignature: "reasoning_content" };
currentBlock = {
type: "thinking",
thinking: "",
thinkingSignature: "reasoning_content",
};
output.content.push(currentBlock);
options?.onEvent?.({ type: "thinking_start" });
stream.push({ type: "thinking_start", partial: output });
}
// Append to thinking block
if (currentBlock.type === "thinking") {
const delta = (choice.delta as any).reasoning_content;
currentBlock.thinking += delta;
options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta });
stream.push({
type: "thinking_delta",
delta,
partial: output,
});
}
}
// Handle reasoning field
// Some endpoints return reasoning in reasining (ollama, xAI, ...)
if (
(choice.delta as any).reasoning !== null &&
(choice.delta as any).reasoning !== undefined &&
(choice.delta as any).reasoning.length > 0
) {
// Check if we need to switch to thinking block
if (!currentBlock || currentBlock.type !== "thinking") {
// Save current block if exists
if (currentBlock) {
if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
stream.push({
type: "text_end",
content: currentBlock.text,
partial: output,
});
} else if (currentBlock.type === "toolCall") {
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
delete currentBlock.partialArgs;
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
stream.push({
type: "toolCall",
toolCall: currentBlock as ToolCall,
partial: output,
});
}
}
// Start new thinking block
currentBlock = { type: "thinking", thinking: "", thinkingSignature: "reasoning" };
currentBlock = {
type: "thinking",
thinking: "",
thinkingSignature: "reasoning",
};
output.content.push(currentBlock);
options?.onEvent?.({ type: "thinking_start" });
stream.push({ type: "thinking_start", partial: output });
}
// Append to thinking block
if (currentBlock.type === "thinking") {
const delta = (choice.delta as any).reasoning;
currentBlock.thinking += delta;
options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta });
stream.push({ type: "thinking_delta", delta, partial: output });
}
}
// Handle tool calls
if (choice?.delta?.tool_calls) {
for (const toolCall of choice.delta.tool_calls) {
// Check if we need a new tool call block
if (
!currentBlock ||
currentBlock.type !== "toolCall" ||
(toolCall.id && currentBlock.id !== toolCall.id)
) {
// Save current block if exists
if (currentBlock) {
if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
stream.push({
type: "text_end",
content: currentBlock.text,
partial: output,
});
} else if (currentBlock.type === "thinking") {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
stream.push({
type: "thinking_end",
content: currentBlock.thinking,
partial: output,
});
} else if (currentBlock.type === "toolCall") {
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
delete currentBlock.partialArgs;
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
stream.push({
type: "toolCall",
toolCall: currentBlock as ToolCall,
partial: output,
});
}
}
// Start new tool call block
currentBlock = {
type: "toolCall",
id: toolCall.id || "",
@ -273,7 +251,6 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
output.content.push(currentBlock);
}
// Accumulate tool call data
if (currentBlock.type === "toolCall") {
if (toolCall.id) currentBlock.id = toolCall.id;
if (toolCall.function?.name) currentBlock.name = toolCall.function.name;
@ -286,16 +263,27 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
}
}
// Save final block if exists
if (currentBlock) {
if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
stream.push({
type: "text_end",
content: currentBlock.text,
partial: output,
});
} else if (currentBlock.type === "thinking") {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
stream.push({
type: "thinking_end",
content: currentBlock.thinking,
partial: output,
});
} else if (currentBlock.type === "toolCall") {
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
delete currentBlock.partialArgs;
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
stream.push({
type: "toolCall",
toolCall: currentBlock as ToolCall,
partial: output,
});
}
}
@ -303,141 +291,188 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
throw new Error("Request was aborted");
}
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
return output;
} catch (error) {
// Update output with error information
output.stopReason = "error";
output.error = error instanceof Error ? error.message : String(error);
options?.onEvent?.({ type: "error", error: output.error });
return output;
stream.push({ type: "error", error: output.error, partial: output });
stream.end();
}
})();
return stream;
};
function createClient(model: Model<"openai-completions">, apiKey?: string) {
if (!apiKey) {
if (!process.env.OPENAI_API_KEY) {
throw new Error(
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
);
}
apiKey = process.env.OPENAI_API_KEY;
}
return new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
}
function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
const messages = convertMessages(model, context);
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model: model.id,
messages,
stream: true,
stream_options: { include_usage: true },
};
// Cerebras/xAI dont like the "store" field
if (!model.baseUrl.includes("cerebras.ai") && !model.baseUrl.includes("api.x.ai")) {
params.store = false;
}
private convertMessages(messages: Message[], systemPrompt?: string): ChatCompletionMessageParam[] {
const params: ChatCompletionMessageParam[] = [];
if (options?.maxTokens) {
params.max_completion_tokens = options?.maxTokens;
}
// Transform messages for cross-provider compatibility
const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
if (options?.temperature !== undefined) {
params.temperature = options?.temperature;
}
// Add system prompt if provided
if (systemPrompt) {
// Cerebras/xAi don't like the "developer" role
const useDeveloperRole =
this.modelInfo.reasoning &&
!this.modelInfo.baseUrl?.includes("cerebras.ai") &&
!this.modelInfo.baseUrl?.includes("api.x.ai");
const role = useDeveloperRole ? "developer" : "system";
params.push({ role: role, content: systemPrompt });
}
if (context.tools) {
params.tools = convertTools(context.tools);
}
// Convert messages
for (const msg of transformedMessages) {
if (msg.role === "user") {
// Handle both string and array content
if (typeof msg.content === "string") {
params.push({
role: "user",
content: msg.content,
});
} else {
// Convert array content to OpenAI format
const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
if (item.type === "text") {
return {
type: "text",
text: item.text,
} satisfies ChatCompletionContentPartText;
} else {
// Image content - OpenAI uses data URLs
return {
type: "image_url",
image_url: {
url: `data:${item.mimeType};base64,${item.data}`,
},
} satisfies ChatCompletionContentPartImage;
}
});
const filteredContent = !this.modelInfo?.input.includes("image")
? content.filter((c) => c.type !== "image_url")
: content;
params.push({
role: "user",
content: filteredContent,
});
}
} else if (msg.role === "assistant") {
const assistantMsg: ChatCompletionMessageParam = {
role: "assistant",
content: null,
};
if (options?.toolChoice) {
params.tool_choice = options.toolChoice;
}
// Build content from blocks
const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
if (textBlocks.length > 0) {
assistantMsg.content = textBlocks.map((b) => b.text).join("");
}
// Grok models don't like reasoning_effort
if (options?.reasoningEffort && model.reasoning && !model.id.toLowerCase().includes("grok")) {
params.reasoning_effort = options.reasoningEffort;
}
// Handle thinking blocks for llama.cpp server + gpt-oss
const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
if (thinkingBlocks.length > 0) {
// Use the signature from the first thinking block if available
const signature = thinkingBlocks[0].thinkingSignature;
if (signature && signature.length > 0) {
(assistantMsg as any)[signature] = thinkingBlocks.map((b) => b.thinking).join("");
}
}
return params;
}
// Handle tool calls
const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
if (toolCalls.length > 0) {
assistantMsg.tool_calls = toolCalls.map((tc) => ({
id: tc.id,
type: "function" as const,
function: {
name: tc.name,
arguments: JSON.stringify(tc.arguments),
},
}));
}
function convertMessages(model: Model<"openai-completions">, context: Context): ChatCompletionMessageParam[] {
const params: ChatCompletionMessageParam[] = [];
params.push(assistantMsg);
} else if (msg.role === "toolResult") {
const transformedMessages = transformMessages(context.messages, model);
if (context.systemPrompt) {
// Cerebras/xAi don't like the "developer" role
const useDeveloperRole =
model.reasoning && !model.baseUrl.includes("cerebras.ai") && !model.baseUrl.includes("api.x.ai");
const role = useDeveloperRole ? "developer" : "system";
params.push({ role: role, content: context.systemPrompt });
}
for (const msg of transformedMessages) {
if (msg.role === "user") {
if (typeof msg.content === "string") {
params.push({
role: "tool",
role: "user",
content: msg.content,
tool_call_id: msg.toolCallId,
});
} else {
const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
if (item.type === "text") {
return {
type: "text",
text: item.text,
} satisfies ChatCompletionContentPartText;
} else {
return {
type: "image_url",
image_url: {
url: `data:${item.mimeType};base64,${item.data}`,
},
} satisfies ChatCompletionContentPartImage;
}
});
const filteredContent = !model.input.includes("image")
? content.filter((c) => c.type !== "image_url")
: content;
if (filteredContent.length === 0) continue;
params.push({
role: "user",
content: filteredContent,
});
}
} else if (msg.role === "assistant") {
const assistantMsg: ChatCompletionAssistantMessageParam = {
role: "assistant",
content: null,
};
const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
if (textBlocks.length > 0) {
assistantMsg.content = textBlocks.map((b) => b.text).join("");
}
// Handle thinking blocks for llama.cpp server + gpt-oss
const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
if (thinkingBlocks.length > 0) {
// Use the signature from the first thinking block if available
const signature = thinkingBlocks[0].thinkingSignature;
if (signature && signature.length > 0) {
(assistantMsg as any)[signature] = thinkingBlocks.map((b) => b.thinking).join("");
}
}
const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
if (toolCalls.length > 0) {
assistantMsg.tool_calls = toolCalls.map((tc) => ({
id: tc.id,
type: "function" as const,
function: {
name: tc.name,
arguments: JSON.stringify(tc.arguments),
},
}));
}
params.push(assistantMsg);
} else if (msg.role === "toolResult") {
params.push({
role: "tool",
content: msg.content,
tool_call_id: msg.toolCallId,
});
}
return params;
}
private convertTools(tools: Tool[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
return tools.map((tool) => ({
type: "function",
function: {
name: tool.name,
description: tool.description,
parameters: tool.parameters,
},
}));
}
return params;
}
private mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | null): StopReason {
switch (reason) {
case "stop":
return "stop";
case "length":
return "length";
case "function_call":
case "tool_calls":
return "toolUse";
case "content_filter":
return "safety";
default:
return "stop";
function convertTools(tools: Tool[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
return tools.map((tool) => ({
type: "function",
function: {
name: tool.name,
description: tool.description,
parameters: tool.parameters,
},
}));
}
function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): StopReason {
if (reason === null) return "stop";
switch (reason) {
case "stop":
return "stop";
case "length":
return "length";
case "function_call":
case "tool_calls":
return "toolUse";
case "content_filter":
return "safety";
default: {
const _exhaustive: never = reason;
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
}
}
}

View file

@ -10,58 +10,49 @@ import type {
ResponseOutputMessage,
ResponseReasoningItem,
} from "openai/resources/responses/responses.js";
import { QueuedGenerateStream } from "../generate.js";
import { calculateCost } from "../models.js";
import type {
Api,
AssistantMessage,
Context,
LLM,
LLMOptions,
GenerateFunction,
GenerateOptions,
GenerateStream,
Message,
Model,
StopReason,
TextContent,
ThinkingContent,
Tool,
ToolCall,
} from "../types.js";
import { transformMessages } from "./utils.js";
export interface OpenAIResponsesLLMOptions extends LLMOptions {
// OpenAI Responses-specific options
export interface OpenAIResponsesOptions extends GenerateOptions {
reasoningEffort?: "minimal" | "low" | "medium" | "high";
reasoningSummary?: "auto" | "detailed" | "concise" | null;
}
export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
private client: OpenAI;
private modelInfo: Model;
/**
* Generate function for OpenAI Responses API
*/
export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
model: Model<"openai-responses">,
context: Context,
options?: OpenAIResponsesOptions,
): GenerateStream => {
const stream = new QueuedGenerateStream();
constructor(model: Model, apiKey?: string) {
if (!apiKey) {
if (!process.env.OPENAI_API_KEY) {
throw new Error(
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
);
}
apiKey = process.env.OPENAI_API_KEY;
}
this.client = new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
this.modelInfo = model;
}
getModel(): Model {
return this.modelInfo;
}
getApi(): string {
return "openai-responses";
}
async generate(request: Context, options?: OpenAIResponsesLLMOptions): Promise<AssistantMessage> {
// Start async processing
(async () => {
const output: AssistantMessage = {
role: "assistant",
content: [],
api: this.getApi(),
provider: this.modelInfo.provider,
model: this.modelInfo.id,
api: "openai-responses" as Api,
provider: model.provider,
model: model.id,
usage: {
input: 0,
output: 0,
@ -71,77 +62,31 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
},
stopReason: "stop",
};
try {
const input = this.convertToInput(request.messages, request.systemPrompt);
// Create OpenAI client
const client = createClient(model, options?.apiKey);
const params = buildParams(model, context, options);
const openaiStream = await client.responses.create(params, { signal: options?.signal });
stream.push({ type: "start", partial: output });
const params: ResponseCreateParamsStreaming = {
model: this.modelInfo.id,
input,
stream: true,
};
if (options?.maxTokens) {
params.max_output_tokens = options?.maxTokens;
}
if (options?.temperature !== undefined) {
params.temperature = options?.temperature;
}
if (request.tools) {
params.tools = this.convertTools(request.tools);
}
// Add reasoning options for models that support it
if (this.modelInfo?.reasoning) {
if (options?.reasoningEffort || options?.reasoningSummary) {
params.reasoning = {
effort: options?.reasoningEffort || "medium",
summary: options?.reasoningSummary || "auto",
};
params.include = ["reasoning.encrypted_content"];
} else {
params.reasoning = {
effort: this.modelInfo.name.startsWith("gpt-5") ? "minimal" : null,
summary: null,
};
if (this.modelInfo.name.startsWith("gpt-5")) {
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
input.push({
role: "developer",
content: [
{
type: "input_text",
text: "# Juice: 0 !important",
},
],
});
}
}
}
const stream = await this.client.responses.create(params, {
signal: options?.signal,
});
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
const outputItems: (ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall)[] = [];
let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
let currentBlock: ThinkingContent | TextContent | ToolCall | null = null;
for await (const event of stream) {
for await (const event of openaiStream) {
// Handle output item start
if (event.type === "response.output_item.added") {
const item = event.item;
if (item.type === "reasoning") {
options?.onEvent?.({ type: "thinking_start" });
outputItems.push(item);
currentItem = item;
currentBlock = { type: "thinking", thinking: "" };
output.content.push(currentBlock);
stream.push({ type: "thinking_start", partial: output });
} else if (item.type === "message") {
options?.onEvent?.({ type: "text_start" });
outputItems.push(item);
currentItem = item;
currentBlock = { type: "text", text: "" };
output.content.push(currentBlock);
stream.push({ type: "text_start", partial: output });
}
}
// Handle reasoning summary deltas
@ -151,30 +96,42 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
currentItem.summary.push(event.part);
}
} else if (event.type === "response.reasoning_summary_text.delta") {
if (currentItem && currentItem.type === "reasoning") {
if (
currentItem &&
currentItem.type === "reasoning" &&
currentBlock &&
currentBlock.type === "thinking"
) {
currentItem.summary = currentItem.summary || [];
const lastPart = currentItem.summary[currentItem.summary.length - 1];
if (lastPart) {
currentBlock.thinking += event.delta;
lastPart.text += event.delta;
options?.onEvent?.({
stream.push({
type: "thinking_delta",
content: currentItem.summary.map((s) => s.text).join("\n\n"),
delta: event.delta,
partial: output,
});
}
}
}
// Add a new line between summary parts (hack...)
else if (event.type === "response.reasoning_summary_part.done") {
if (currentItem && currentItem.type === "reasoning") {
if (
currentItem &&
currentItem.type === "reasoning" &&
currentBlock &&
currentBlock.type === "thinking"
) {
currentItem.summary = currentItem.summary || [];
const lastPart = currentItem.summary[currentItem.summary.length - 1];
if (lastPart) {
currentBlock.thinking += "\n\n";
lastPart.text += "\n\n";
options?.onEvent?.({
stream.push({
type: "thinking_delta",
content: currentItem.summary.map((s) => s.text).join("\n\n"),
delta: "\n\n",
partial: output,
});
}
}
@ -186,30 +143,28 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
currentItem.content.push(event.part);
}
} else if (event.type === "response.output_text.delta") {
if (currentItem && currentItem.type === "message") {
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
const lastPart = currentItem.content[currentItem.content.length - 1];
if (lastPart && lastPart.type === "output_text") {
currentBlock.text += event.delta;
lastPart.text += event.delta;
options?.onEvent?.({
stream.push({
type: "text_delta",
content: currentItem.content
.map((c) => (c.type === "output_text" ? c.text : c.refusal))
.join(""),
delta: event.delta,
partial: output,
});
}
}
} else if (event.type === "response.refusal.delta") {
if (currentItem && currentItem.type === "message") {
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
const lastPart = currentItem.content[currentItem.content.length - 1];
if (lastPart && lastPart.type === "refusal") {
currentBlock.text += event.delta;
lastPart.refusal += event.delta;
options?.onEvent?.({
stream.push({
type: "text_delta",
content: currentItem.content
.map((c) => (c.type === "output_text" ? c.text : c.refusal))
.join(""),
delta: event.delta,
partial: output,
});
}
}
@ -218,14 +173,24 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
else if (event.type === "response.output_item.done") {
const item = event.item;
if (item.type === "reasoning") {
outputItems[outputItems.length - 1] = item; // Update with final item
const thinkingContent = item.summary?.map((s) => s.text).join("\n\n") || "";
options?.onEvent?.({ type: "thinking_end", content: thinkingContent });
} else if (item.type === "message") {
outputItems[outputItems.length - 1] = item; // Update with final item
const textContent = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
options?.onEvent?.({ type: "text_end", content: textContent });
if (item.type === "reasoning" && currentBlock && currentBlock.type === "thinking") {
currentBlock.thinking = item.summary?.map((s) => s.text).join("\n\n") || "";
currentBlock.thinkingSignature = JSON.stringify(item);
stream.push({
type: "thinking_end",
content: currentBlock.thinking,
partial: output,
});
currentBlock = null;
} else if (item.type === "message" && currentBlock && currentBlock.type === "text") {
currentBlock.text = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
currentBlock.textSignature = item.id;
stream.push({
type: "text_end",
content: currentBlock.text,
partial: output,
});
currentBlock = null;
} else if (item.type === "function_call") {
const toolCall: ToolCall = {
type: "toolCall",
@ -233,8 +198,8 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
name: item.name,
arguments: JSON.parse(item.arguments),
};
options?.onEvent?.({ type: "toolCall", toolCall });
outputItems.push(item);
output.content.push(toolCall);
stream.push({ type: "toolCall", toolCall, partial: output });
}
}
// Handle completion
@ -249,10 +214,10 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
}
calculateCost(this.modelInfo, output.usage);
calculateCost(model, output.usage);
// Map status to stop reason
output.stopReason = this.mapStopReason(response?.status);
if (outputItems.some((b) => b.type === "function_call") && output.stopReason === "stop") {
output.stopReason = mapStopReason(response?.status);
if (output.content.some((b) => b.type === "toolCall") && output.stopReason === "stop") {
output.stopReason = "toolUse";
}
}
@ -260,173 +225,215 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
else if (event.type === "error") {
output.stopReason = "error";
output.error = `Code ${event.code}: ${event.message}` || "Unknown error";
options?.onEvent?.({ type: "error", error: output.error });
stream.push({ type: "error", error: output.error, partial: output });
stream.end();
return output;
} else if (event.type === "response.failed") {
output.stopReason = "error";
output.error = "Unknown error";
options?.onEvent?.({ type: "error", error: output.error });
stream.push({ type: "error", error: output.error, partial: output });
stream.end();
return output;
}
}
// Convert output items to blocks
for (const item of outputItems) {
if (item.type === "reasoning") {
output.content.push({
type: "thinking",
thinking: item.summary?.map((s: any) => s.text).join("\n\n") || "",
thinkingSignature: JSON.stringify(item), // Full item for resubmission
});
} else if (item.type === "message") {
output.content.push({
type: "text",
text: item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""),
textSignature: item.id, // ID for resubmission
});
} else if (item.type === "function_call") {
output.content.push({
type: "toolCall",
id: item.call_id + "|" + item.id,
name: item.name,
arguments: JSON.parse(item.arguments),
});
}
}
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
return output;
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
output.stopReason = "error";
output.error = error instanceof Error ? error.message : JSON.stringify(error);
options?.onEvent?.({ type: "error", error: output.error });
return output;
stream.push({ type: "error", error: output.error, partial: output });
stream.end();
}
})();
return stream;
};
function createClient(model: Model<"openai-responses">, apiKey?: string) {
if (!apiKey) {
if (!process.env.OPENAI_API_KEY) {
throw new Error(
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
);
}
apiKey = process.env.OPENAI_API_KEY;
}
return new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
}
function buildParams(model: Model<"openai-responses">, context: Context, options?: OpenAIResponsesOptions) {
const messages = convertMessages(model, context);
const params: ResponseCreateParamsStreaming = {
model: model.id,
input: messages,
stream: true,
};
if (options?.maxTokens) {
params.max_output_tokens = options?.maxTokens;
}
private convertToInput(messages: Message[], systemPrompt?: string): ResponseInput {
const input: ResponseInput = [];
if (options?.temperature !== undefined) {
params.temperature = options?.temperature;
}
// Transform messages for cross-provider compatibility
const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
if (context.tools) {
params.tools = convertTools(context.tools);
}
// Add system prompt if provided
if (systemPrompt) {
const role = this.modelInfo?.reasoning ? "developer" : "system";
input.push({
role,
content: systemPrompt,
});
}
if (model.reasoning) {
if (options?.reasoningEffort || options?.reasoningSummary) {
params.reasoning = {
effort: options?.reasoningEffort || "medium",
summary: options?.reasoningSummary || "auto",
};
params.include = ["reasoning.encrypted_content"];
} else {
params.reasoning = {
effort: model.name.startsWith("gpt-5") ? "minimal" : null,
summary: null,
};
// Convert messages
for (const msg of transformedMessages) {
if (msg.role === "user") {
// Handle both string and array content
if (typeof msg.content === "string") {
input.push({
role: "user",
content: [{ type: "input_text", text: msg.content }],
});
} else {
// Convert array content to OpenAI Responses format
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
if (item.type === "text") {
return {
type: "input_text",
text: item.text,
} satisfies ResponseInputText;
} else {
// Image content - OpenAI Responses uses data URLs
return {
type: "input_image",
detail: "auto",
image_url: `data:${item.mimeType};base64,${item.data}`,
} satisfies ResponseInputImage;
}
});
const filteredContent = !this.modelInfo?.input.includes("image")
? content.filter((c) => c.type !== "input_image")
: content;
input.push({
role: "user",
content: filteredContent,
});
}
} else if (msg.role === "assistant") {
// Process content blocks in order
const output: ResponseInput = [];
for (const block of msg.content) {
// Do not submit thinking blocks if the completion had an error (i.e. abort)
if (block.type === "thinking" && msg.stopReason !== "error") {
// Push the full reasoning item(s) from signature
if (block.thinkingSignature) {
const reasoningItem = JSON.parse(block.thinkingSignature);
output.push(reasoningItem);
}
} else if (block.type === "text") {
const textBlock = block as TextContent;
output.push({
type: "message",
role: "assistant",
content: [{ type: "output_text", text: textBlock.text, annotations: [] }],
status: "completed",
id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
} satisfies ResponseOutputMessage);
// Do not submit thinking blocks if the completion had an error (i.e. abort)
} else if (block.type === "toolCall" && msg.stopReason !== "error") {
const toolCall = block as ToolCall;
output.push({
type: "function_call",
id: toolCall.id.split("|")[1], // Extract original ID
call_id: toolCall.id.split("|")[0], // Extract call session ID
name: toolCall.name,
arguments: JSON.stringify(toolCall.arguments),
});
}
}
// Add all output items to input
input.push(...output);
} else if (msg.role === "toolResult") {
// Tool results are sent as function_call_output
input.push({
type: "function_call_output",
call_id: msg.toolCallId.split("|")[0], // Extract call session ID
output: msg.content,
if (model.name.startsWith("gpt-5")) {
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
messages.push({
role: "developer",
content: [
{
type: "input_text",
text: "# Juice: 0 !important",
},
],
});
}
}
return input;
}
private convertTools(tools: Tool[]): OpenAITool[] {
return tools.map((tool) => ({
type: "function",
name: tool.name,
description: tool.description,
parameters: tool.parameters,
strict: null,
}));
return params;
}
function convertMessages(model: Model<"openai-responses">, context: Context): ResponseInput {
const messages: ResponseInput = [];
const transformedMessages = transformMessages(context.messages, model);
if (context.systemPrompt) {
const role = model.reasoning ? "developer" : "system";
messages.push({
role,
content: context.systemPrompt,
});
}
private mapStopReason(status: string | undefined): StopReason {
switch (status) {
case "completed":
return "stop";
case "incomplete":
return "length";
case "failed":
case "cancelled":
return "error";
default:
return "stop";
for (const msg of transformedMessages) {
if (msg.role === "user") {
if (typeof msg.content === "string") {
messages.push({
role: "user",
content: [{ type: "input_text", text: msg.content }],
});
} else {
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
if (item.type === "text") {
return {
type: "input_text",
text: item.text,
} satisfies ResponseInputText;
} else {
return {
type: "input_image",
detail: "auto",
image_url: `data:${item.mimeType};base64,${item.data}`,
} satisfies ResponseInputImage;
}
});
const filteredContent = !model.input.includes("image")
? content.filter((c) => c.type !== "input_image")
: content;
if (filteredContent.length === 0) continue;
messages.push({
role: "user",
content: filteredContent,
});
}
} else if (msg.role === "assistant") {
const output: ResponseInput = [];
for (const block of msg.content) {
// Do not submit thinking blocks if the completion had an error (i.e. abort)
if (block.type === "thinking" && msg.stopReason !== "error") {
if (block.thinkingSignature) {
const reasoningItem = JSON.parse(block.thinkingSignature);
output.push(reasoningItem);
}
} else if (block.type === "text") {
const textBlock = block as TextContent;
output.push({
type: "message",
role: "assistant",
content: [{ type: "output_text", text: textBlock.text, annotations: [] }],
status: "completed",
id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
} satisfies ResponseOutputMessage);
// Do not submit toolcall blocks if the completion had an error (i.e. abort)
} else if (block.type === "toolCall" && msg.stopReason !== "error") {
const toolCall = block as ToolCall;
output.push({
type: "function_call",
id: toolCall.id.split("|")[1],
call_id: toolCall.id.split("|")[0],
name: toolCall.name,
arguments: JSON.stringify(toolCall.arguments),
});
}
}
if (output.length === 0) continue;
messages.push(...output);
} else if (msg.role === "toolResult") {
messages.push({
type: "function_call_output",
call_id: msg.toolCallId.split("|")[0],
output: msg.content,
});
}
}
return messages;
}
function convertTools(tools: Tool[]): OpenAITool[] {
return tools.map((tool) => ({
type: "function",
name: tool.name,
description: tool.description,
parameters: tool.parameters,
strict: null,
}));
}
function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
if (!status) return "stop";
switch (status) {
case "completed":
return "stop";
case "incomplete":
return "length";
case "failed":
case "cancelled":
return "error";
// These two are wonky ...
case "in_progress":
case "queued":
return "stop";
default: {
const _exhaustive: never = status;
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
}
}
}

View file

@ -1,18 +1,6 @@
import type { AssistantMessage, Message, Model } from "../types.js";
import type { Api, AssistantMessage, Message, Model } from "../types.js";
/**
* Transform messages for cross-provider compatibility.
*
* - User and toolResult messages are copied verbatim
* - Assistant messages:
* - If from the same provider/model, copied as-is
* - If from different provider/model, thinking blocks are converted to text blocks with <thinking></thinking> tags
*
* @param messages The messages to transform
* @param model The target model that will process these messages
* @returns A copy of the messages array with transformations applied
*/
export function transformMessages(messages: Message[], model: Model, api: string): Message[] {
export function transformMessages<TApi extends Api>(messages: Message[], model: Model<TApi>): Message[] {
return messages.map((msg) => {
// User and toolResult messages pass through unchanged
if (msg.role === "user" || msg.role === "toolResult") {
@ -24,7 +12,7 @@ export function transformMessages(messages: Message[], model: Model, api: string
const assistantMsg = msg as AssistantMessage;
// If message is from the same provider and API, keep as is
if (assistantMsg.provider === model.provider && assistantMsg.api === api) {
if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
return msg;
}
@ -47,8 +35,6 @@ export function transformMessages(messages: Message[], model: Model, api: string
content: transformedContent,
};
}
// Should not reach here, but return as-is for safety
return msg;
});
}

View file

@ -1,5 +1,27 @@
export type KnownApi = "openai-completions" | "openai-responses" | "anthropic-messages" | "google-generative-ai";
export type Api = KnownApi | string;
import type { AnthropicOptions } from "./providers/anthropic";
import type { GoogleOptions } from "./providers/google";
import type { OpenAICompletionsOptions } from "./providers/openai-completions";
import type { OpenAIResponsesOptions } from "./providers/openai-responses";
export type Api = "openai-completions" | "openai-responses" | "anthropic-messages" | "google-generative-ai";
export interface ApiOptionsMap {
"anthropic-messages": AnthropicOptions;
"openai-completions": OpenAICompletionsOptions;
"openai-responses": OpenAIResponsesOptions;
"google-generative-ai": GoogleOptions;
}
// Compile-time exhaustiveness check - this will fail if ApiOptionsMap doesn't have all KnownApi keys
type _CheckExhaustive = ApiOptionsMap extends Record<Api, GenerateOptions>
? Record<Api, GenerateOptions> extends ApiOptionsMap
? true
: ["ApiOptionsMap is missing some KnownApi values", Exclude<Api, keyof ApiOptionsMap>]
: ["ApiOptionsMap doesn't extend Record<KnownApi, GenerateOptions>"];
const _exhaustive: _CheckExhaustive = true;
// Helper type to get options for a specific API
export type OptionsForApi<TApi extends Api> = ApiOptionsMap[TApi];
export type KnownProvider = "anthropic" | "google" | "openai" | "xai" | "groq" | "cerebras" | "openrouter";
export type Provider = KnownProvider | string;
@ -21,31 +43,17 @@ export interface GenerateOptions {
}
// Unified options with reasoning (what public generate() accepts)
export interface GenerateOptionsUnified extends GenerateOptions {
export interface SimpleGenerateOptions extends GenerateOptions {
reasoning?: ReasoningEffort;
}
// Generic GenerateFunction with typed options
export type GenerateFunction<TOptions extends GenerateOptions = GenerateOptions> = (
model: Model,
export type GenerateFunction<TApi extends Api> = (
model: Model<TApi>,
context: Context,
options: TOptions,
options: OptionsForApi<TApi>,
) => GenerateStream;
// Legacy LLM interface (to be removed)
export interface LLMOptions {
temperature?: number;
maxTokens?: number;
onEvent?: (event: AssistantMessageEvent) => void;
signal?: AbortSignal;
}
export interface LLM<T extends LLMOptions> {
generate(request: Context, options?: T): Promise<AssistantMessage>;
getModel(): Model;
getApi(): string;
}
export interface TextContent {
type: "text";
text: string;
@ -100,7 +108,7 @@ export interface AssistantMessage {
model: string;
usage: Usage;
stopReason: StopReason;
error?: string | Error;
error?: string;
}
export interface ToolResultMessage {
@ -138,10 +146,10 @@ export type AssistantMessageEvent =
| { type: "error"; error: string; partial: AssistantMessage };
// Model interface for the unified model system
export interface Model {
export interface Model<TApi extends Api> {
id: string;
name: string;
api: Api;
api: TApi;
provider: Provider;
baseUrl: string;
reasoning: boolean;