mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-21 23:04:41 +00:00
Massive refactor of API
- Switch to function based API - Anthropic SDK style async generator - Fully typed with escape hatches for custom models
This commit is contained in:
parent
004de3c9d0
commit
66cefb236e
29 changed files with 5835 additions and 6225 deletions
|
|
@ -1,425 +0,0 @@
|
|||
import Anthropic from "@anthropic-ai/sdk";
|
||||
import type {
|
||||
ContentBlockParam,
|
||||
MessageCreateParamsStreaming,
|
||||
MessageParam,
|
||||
Tool,
|
||||
} from "@anthropic-ai/sdk/resources/messages.js";
|
||||
import { QueuedGenerateStream } from "../generate.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Context,
|
||||
GenerateFunction,
|
||||
GenerateOptions,
|
||||
GenerateStream,
|
||||
Message,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
ToolCall,
|
||||
} from "../types.js";
|
||||
import { transformMessages } from "./utils.js";
|
||||
|
||||
// Anthropic-specific options
|
||||
export interface AnthropicOptions extends GenerateOptions {
|
||||
thinking?: {
|
||||
enabled: boolean;
|
||||
budgetTokens?: number;
|
||||
};
|
||||
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate function for Anthropic API
|
||||
*/
|
||||
export const generateAnthropic: GenerateFunction<AnthropicOptions> = (
|
||||
model: Model,
|
||||
context: Context,
|
||||
options: AnthropicOptions,
|
||||
): GenerateStream => {
|
||||
const stream = new QueuedGenerateStream();
|
||||
|
||||
// Start async processing
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: "anthropic-messages" as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
};
|
||||
|
||||
try {
|
||||
// Create Anthropic client
|
||||
const client = createAnthropicClient(model, options.apiKey!);
|
||||
|
||||
// Convert messages
|
||||
const messages = convertMessages(context.messages, model, "anthropic-messages");
|
||||
|
||||
// Build params
|
||||
const params = buildAnthropicParams(model, context, options, messages, client.isOAuthToken);
|
||||
|
||||
// Create Anthropic stream
|
||||
const anthropicStream = client.client.messages.stream(
|
||||
{
|
||||
...params,
|
||||
stream: true,
|
||||
},
|
||||
{
|
||||
signal: options.signal,
|
||||
},
|
||||
);
|
||||
|
||||
// Emit start event
|
||||
stream.push({
|
||||
type: "start",
|
||||
partial: output,
|
||||
});
|
||||
|
||||
// Process Anthropic events
|
||||
let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
|
||||
|
||||
for await (const event of anthropicStream) {
|
||||
if (event.type === "content_block_start") {
|
||||
if (event.content_block.type === "text") {
|
||||
currentBlock = {
|
||||
type: "text",
|
||||
text: "",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "text_start", partial: output });
|
||||
} else if (event.content_block.type === "thinking") {
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
} else if (event.content_block.type === "tool_use") {
|
||||
// We wait for the full tool use to be streamed
|
||||
currentBlock = {
|
||||
type: "toolCall",
|
||||
id: event.content_block.id,
|
||||
name: event.content_block.name,
|
||||
arguments: event.content_block.input as Record<string, any>,
|
||||
partialJson: "",
|
||||
};
|
||||
}
|
||||
} else if (event.type === "content_block_delta") {
|
||||
if (event.delta.type === "text_delta") {
|
||||
if (currentBlock && currentBlock.type === "text") {
|
||||
currentBlock.text += event.delta.text;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
delta: event.delta.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "thinking_delta") {
|
||||
if (currentBlock && currentBlock.type === "thinking") {
|
||||
currentBlock.thinking += event.delta.thinking;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
delta: event.delta.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "input_json_delta") {
|
||||
if (currentBlock && currentBlock.type === "toolCall") {
|
||||
currentBlock.partialJson += event.delta.partial_json;
|
||||
}
|
||||
} else if (event.delta.type === "signature_delta") {
|
||||
if (currentBlock && currentBlock.type === "thinking") {
|
||||
currentBlock.thinkingSignature = currentBlock.thinkingSignature || "";
|
||||
currentBlock.thinkingSignature += event.delta.signature;
|
||||
}
|
||||
}
|
||||
} else if (event.type === "content_block_stop") {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({ type: "text_end", content: currentBlock.text, partial: output });
|
||||
} else if (currentBlock.type === "thinking") {
|
||||
stream.push({ type: "thinking_end", content: currentBlock.thinking, partial: output });
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
const finalToolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: currentBlock.id,
|
||||
name: currentBlock.name,
|
||||
arguments: JSON.parse(currentBlock.partialJson),
|
||||
};
|
||||
output.content.push(finalToolCall);
|
||||
stream.push({ type: "toolCall", toolCall: finalToolCall, partial: output });
|
||||
}
|
||||
currentBlock = null;
|
||||
}
|
||||
} else if (event.type === "message_delta") {
|
||||
if (event.delta.stop_reason) {
|
||||
output.stopReason = mapStopReason(event.delta.stop_reason);
|
||||
}
|
||||
output.usage.input += event.usage.input_tokens || 0;
|
||||
output.usage.output += event.usage.output_tokens || 0;
|
||||
output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
||||
// Emit done event with final message
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
output.stopReason = "error";
|
||||
output.error = error instanceof Error ? error.message : JSON.stringify(error);
|
||||
stream.push({ type: "error", error: output.error, partial: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
// Helper to create Anthropic client
|
||||
interface AnthropicClientWrapper {
|
||||
client: Anthropic;
|
||||
isOAuthToken: boolean;
|
||||
}
|
||||
|
||||
function createAnthropicClient(model: Model, apiKey: string): AnthropicClientWrapper {
|
||||
if (apiKey.includes("sk-ant-oat")) {
|
||||
const defaultHeaders = {
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
"anthropic-beta": "oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
|
||||
};
|
||||
|
||||
// Clear the env var if we're in Node.js to prevent SDK from using it
|
||||
if (typeof process !== "undefined" && process.env) {
|
||||
process.env.ANTHROPIC_API_KEY = undefined;
|
||||
}
|
||||
|
||||
const client = new Anthropic({
|
||||
apiKey: null,
|
||||
authToken: apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
defaultHeaders,
|
||||
dangerouslyAllowBrowser: true,
|
||||
});
|
||||
|
||||
return { client, isOAuthToken: true };
|
||||
} else {
|
||||
const defaultHeaders = {
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
"anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
|
||||
};
|
||||
|
||||
const client = new Anthropic({
|
||||
apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders,
|
||||
});
|
||||
|
||||
return { client, isOAuthToken: false };
|
||||
}
|
||||
}
|
||||
|
||||
// Build Anthropic API params
|
||||
function buildAnthropicParams(
|
||||
model: Model,
|
||||
context: Context,
|
||||
options: AnthropicOptions,
|
||||
messages: MessageParam[],
|
||||
isOAuthToken: boolean,
|
||||
): MessageCreateParamsStreaming {
|
||||
const params: MessageCreateParamsStreaming = {
|
||||
model: model.id,
|
||||
messages,
|
||||
max_tokens: options.maxTokens || model.maxTokens,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
// For OAuth tokens, we MUST include Claude Code identity
|
||||
if (isOAuthToken) {
|
||||
params.system = [
|
||||
{
|
||||
type: "text",
|
||||
text: "You are Claude Code, Anthropic's official CLI for Claude.",
|
||||
cache_control: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
},
|
||||
];
|
||||
if (context.systemPrompt) {
|
||||
params.system.push({
|
||||
type: "text",
|
||||
text: context.systemPrompt,
|
||||
cache_control: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
});
|
||||
}
|
||||
} else if (context.systemPrompt) {
|
||||
params.system = context.systemPrompt;
|
||||
}
|
||||
|
||||
if (options.temperature !== undefined) {
|
||||
params.temperature = options.temperature;
|
||||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools);
|
||||
}
|
||||
|
||||
// Only enable thinking if the model supports it
|
||||
if (options.thinking?.enabled && model.reasoning) {
|
||||
params.thinking = {
|
||||
type: "enabled",
|
||||
budget_tokens: options.thinking.budgetTokens || 1024,
|
||||
};
|
||||
}
|
||||
|
||||
if (options.toolChoice) {
|
||||
if (typeof options.toolChoice === "string") {
|
||||
params.tool_choice = { type: options.toolChoice };
|
||||
} else {
|
||||
params.tool_choice = options.toolChoice;
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
// Convert messages to Anthropic format
|
||||
function convertMessages(messages: Message[], model: Model, api: Api): MessageParam[] {
|
||||
const params: MessageParam[] = [];
|
||||
|
||||
// Transform messages for cross-provider compatibility
|
||||
const transformedMessages = transformMessages(messages, model, api);
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
// Handle both string and array content
|
||||
if (typeof msg.content === "string") {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: msg.content,
|
||||
});
|
||||
} else {
|
||||
// Convert array content to Anthropic format
|
||||
const blocks: ContentBlockParam[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: item.text,
|
||||
};
|
||||
} else {
|
||||
// Image content
|
||||
return {
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
});
|
||||
const filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
|
||||
params.push({
|
||||
role: "user",
|
||||
content: filteredBlocks,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const blocks: ContentBlockParam[] = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
blocks.push({
|
||||
type: "text",
|
||||
text: block.text,
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
blocks.push({
|
||||
type: "thinking",
|
||||
thinking: block.thinking,
|
||||
signature: block.thinkingSignature || "",
|
||||
});
|
||||
} else if (block.type === "toolCall") {
|
||||
blocks.push({
|
||||
type: "tool_use",
|
||||
id: block.id,
|
||||
name: block.name,
|
||||
input: block.arguments,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
params.push({
|
||||
role: "assistant",
|
||||
content: blocks,
|
||||
});
|
||||
} else if (msg.role === "toolResult") {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: msg.toolCallId,
|
||||
content: msg.content,
|
||||
is_error: msg.isError,
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
// Convert tools to Anthropic format
|
||||
function convertTools(tools: Context["tools"]): Tool[] {
|
||||
if (!tools) return [];
|
||||
|
||||
return tools.map((tool) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: tool.parameters.properties || {},
|
||||
required: tool.parameters.required || [],
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
// Map Anthropic stop reason to our StopReason type
|
||||
function mapStopReason(reason: Anthropic.Messages.StopReason | null): StopReason {
|
||||
switch (reason) {
|
||||
case "end_turn":
|
||||
return "stop";
|
||||
case "max_tokens":
|
||||
return "length";
|
||||
case "tool_use":
|
||||
return "toolUse";
|
||||
case "refusal":
|
||||
return "safety";
|
||||
case "pause_turn": // Stop is good enough -> resubmit
|
||||
return "stop";
|
||||
case "stop_sequence":
|
||||
return "stop"; // We don't supply stop sequences, so this should never happen
|
||||
default:
|
||||
return "stop";
|
||||
}
|
||||
}
|
||||
|
|
@ -3,91 +3,46 @@ import type {
|
|||
ContentBlockParam,
|
||||
MessageCreateParamsStreaming,
|
||||
MessageParam,
|
||||
Tool,
|
||||
} from "@anthropic-ai/sdk/resources/messages.js";
|
||||
import { QueuedGenerateStream } from "../generate.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Context,
|
||||
LLM,
|
||||
LLMOptions,
|
||||
GenerateFunction,
|
||||
GenerateOptions,
|
||||
GenerateStream,
|
||||
Message,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
} from "../types.js";
|
||||
import { transformMessages } from "./utils.js";
|
||||
|
||||
export interface AnthropicLLMOptions extends LLMOptions {
|
||||
thinking?: {
|
||||
enabled: boolean;
|
||||
budgetTokens?: number;
|
||||
};
|
||||
export interface AnthropicOptions extends GenerateOptions {
|
||||
thinkingEnabled?: boolean;
|
||||
thinkingBudgetTokens?: number;
|
||||
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
||||
}
|
||||
|
||||
export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
||||
private client: Anthropic;
|
||||
private modelInfo: Model;
|
||||
private isOAuthToken: boolean = false;
|
||||
export const streamAnthropic: GenerateFunction<"anthropic-messages"> = (
|
||||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
options?: AnthropicOptions,
|
||||
): GenerateStream => {
|
||||
const stream = new QueuedGenerateStream();
|
||||
|
||||
constructor(model: Model, apiKey?: string) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.ANTHROPIC_API_KEY) {
|
||||
throw new Error(
|
||||
"Anthropic API key is required. Set ANTHROPIC_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.ANTHROPIC_API_KEY;
|
||||
}
|
||||
if (apiKey.includes("sk-ant-oat")) {
|
||||
const defaultHeaders = {
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
"anthropic-beta": "oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
|
||||
};
|
||||
|
||||
// Clear the env var if we're in Node.js to prevent SDK from using it
|
||||
if (typeof process !== "undefined" && process.env) {
|
||||
process.env.ANTHROPIC_API_KEY = undefined;
|
||||
}
|
||||
this.client = new Anthropic({
|
||||
apiKey: null,
|
||||
authToken: apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
defaultHeaders,
|
||||
dangerouslyAllowBrowser: true,
|
||||
});
|
||||
this.isOAuthToken = true;
|
||||
} else {
|
||||
const defaultHeaders = {
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
"anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
|
||||
};
|
||||
this.client = new Anthropic({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true, defaultHeaders });
|
||||
this.isOAuthToken = false;
|
||||
}
|
||||
this.modelInfo = model;
|
||||
}
|
||||
|
||||
getModel(): Model {
|
||||
return this.modelInfo;
|
||||
}
|
||||
|
||||
getApi(): string {
|
||||
return "anthropic-messages";
|
||||
}
|
||||
|
||||
async generate(context: Context, options?: AnthropicLLMOptions): Promise<AssistantMessage> {
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: this.getApi(),
|
||||
provider: this.modelInfo.provider,
|
||||
model: this.modelInfo.id,
|
||||
api: "anthropic-messages" as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
@ -99,77 +54,14 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
|||
};
|
||||
|
||||
try {
|
||||
const messages = this.convertMessages(context.messages);
|
||||
|
||||
const params: MessageCreateParamsStreaming = {
|
||||
model: this.modelInfo.id,
|
||||
messages,
|
||||
max_tokens: options?.maxTokens || 4096,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
// For OAuth tokens, we MUST include Claude Code identity
|
||||
if (this.isOAuthToken) {
|
||||
params.system = [
|
||||
{
|
||||
type: "text",
|
||||
text: "You are Claude Code, Anthropic's official CLI for Claude.",
|
||||
cache_control: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
},
|
||||
];
|
||||
if (context.systemPrompt) {
|
||||
params.system.push({
|
||||
type: "text",
|
||||
text: context.systemPrompt,
|
||||
cache_control: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
});
|
||||
}
|
||||
} else if (context.systemPrompt) {
|
||||
params.system = context.systemPrompt;
|
||||
}
|
||||
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options?.temperature;
|
||||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = this.convertTools(context.tools);
|
||||
}
|
||||
|
||||
// Only enable thinking if the model supports it
|
||||
if (options?.thinking?.enabled && this.modelInfo.reasoning) {
|
||||
params.thinking = {
|
||||
type: "enabled",
|
||||
budget_tokens: options.thinking.budgetTokens || 1024,
|
||||
};
|
||||
}
|
||||
|
||||
if (options?.toolChoice) {
|
||||
if (typeof options.toolChoice === "string") {
|
||||
params.tool_choice = { type: options.toolChoice };
|
||||
} else {
|
||||
params.tool_choice = options.toolChoice;
|
||||
}
|
||||
}
|
||||
|
||||
const stream = this.client.messages.stream(
|
||||
{
|
||||
...params,
|
||||
stream: true,
|
||||
},
|
||||
{
|
||||
signal: options?.signal,
|
||||
},
|
||||
);
|
||||
|
||||
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
|
||||
const { client, isOAuthToken } = createClient(model, options?.apiKey!);
|
||||
const params = buildParams(model, context, isOAuthToken, options);
|
||||
const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
|
||||
for await (const event of stream) {
|
||||
|
||||
for await (const event of anthropicStream) {
|
||||
if (event.type === "content_block_start") {
|
||||
if (event.content_block.type === "text") {
|
||||
currentBlock = {
|
||||
|
|
@ -177,7 +69,7 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
|||
text: "",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
options?.onEvent?.({ type: "text_start" });
|
||||
stream.push({ type: "text_start", partial: output });
|
||||
} else if (event.content_block.type === "thinking") {
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
|
|
@ -185,9 +77,9 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
|||
thinkingSignature: "",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
options?.onEvent?.({ type: "thinking_start" });
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
} else if (event.content_block.type === "tool_use") {
|
||||
// We wait for the full tool use to be streamed to send the event
|
||||
// We wait for the full tool use to be streamed
|
||||
currentBlock = {
|
||||
type: "toolCall",
|
||||
id: event.content_block.id,
|
||||
|
|
@ -200,15 +92,19 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
|||
if (event.delta.type === "text_delta") {
|
||||
if (currentBlock && currentBlock.type === "text") {
|
||||
currentBlock.text += event.delta.text;
|
||||
options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: event.delta.text });
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
delta: event.delta.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "thinking_delta") {
|
||||
if (currentBlock && currentBlock.type === "thinking") {
|
||||
currentBlock.thinking += event.delta.thinking;
|
||||
options?.onEvent?.({
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
content: currentBlock.thinking,
|
||||
delta: event.delta.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "input_json_delta") {
|
||||
|
|
@ -224,9 +120,17 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
|||
} else if (event.type === "content_block_stop") {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "thinking") {
|
||||
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
const finalToolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
|
|
@ -235,150 +139,274 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
|||
arguments: JSON.parse(currentBlock.partialJson),
|
||||
};
|
||||
output.content.push(finalToolCall);
|
||||
options?.onEvent?.({ type: "toolCall", toolCall: finalToolCall });
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: finalToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock = null;
|
||||
}
|
||||
} else if (event.type === "message_delta") {
|
||||
if (event.delta.stop_reason) {
|
||||
output.stopReason = this.mapStopReason(event.delta.stop_reason);
|
||||
output.stopReason = mapStopReason(event.delta.stop_reason);
|
||||
}
|
||||
output.usage.input += event.usage.input_tokens || 0;
|
||||
output.usage.output += event.usage.output_tokens || 0;
|
||||
output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
|
||||
calculateCost(this.modelInfo, output.usage);
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
||||
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
|
||||
return output;
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
output.stopReason = "error";
|
||||
output.error = error instanceof Error ? error.message : JSON.stringify(error);
|
||||
options?.onEvent?.({ type: "error", error: output.error });
|
||||
return output;
|
||||
stream.push({ type: "error", error: output.error, partial: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
function createClient(
|
||||
model: Model<"anthropic-messages">,
|
||||
apiKey: string,
|
||||
): { client: Anthropic; isOAuthToken: boolean } {
|
||||
if (apiKey.includes("sk-ant-oat")) {
|
||||
const defaultHeaders = {
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
"anthropic-beta": "oauth-2025-04-20,fine-grained-tool-streaming-2025-05-14",
|
||||
};
|
||||
|
||||
// Clear the env var if we're in Node.js to prevent SDK from using it
|
||||
if (typeof process !== "undefined" && process.env) {
|
||||
process.env.ANTHROPIC_API_KEY = undefined;
|
||||
}
|
||||
|
||||
const client = new Anthropic({
|
||||
apiKey: null,
|
||||
authToken: apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
defaultHeaders,
|
||||
dangerouslyAllowBrowser: true,
|
||||
});
|
||||
|
||||
return { client, isOAuthToken: true };
|
||||
} else {
|
||||
const defaultHeaders = {
|
||||
accept: "application/json",
|
||||
"anthropic-dangerous-direct-browser-access": "true",
|
||||
"anthropic-beta": "fine-grained-tool-streaming-2025-05-14",
|
||||
};
|
||||
|
||||
const client = new Anthropic({
|
||||
apiKey,
|
||||
baseURL: model.baseUrl,
|
||||
dangerouslyAllowBrowser: true,
|
||||
defaultHeaders,
|
||||
});
|
||||
|
||||
return { client, isOAuthToken: false };
|
||||
}
|
||||
}
|
||||
|
||||
function buildParams(
|
||||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
isOAuthToken: boolean,
|
||||
options?: AnthropicOptions,
|
||||
): MessageCreateParamsStreaming {
|
||||
const params: MessageCreateParamsStreaming = {
|
||||
model: model.id,
|
||||
messages: convertMessages(context.messages, model),
|
||||
max_tokens: options?.maxTokens || model.maxTokens,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
// For OAuth tokens, we MUST include Claude Code identity
|
||||
if (isOAuthToken) {
|
||||
params.system = [
|
||||
{
|
||||
type: "text",
|
||||
text: "You are Claude Code, Anthropic's official CLI for Claude.",
|
||||
cache_control: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
},
|
||||
];
|
||||
if (context.systemPrompt) {
|
||||
params.system.push({
|
||||
type: "text",
|
||||
text: context.systemPrompt,
|
||||
cache_control: {
|
||||
type: "ephemeral",
|
||||
},
|
||||
});
|
||||
}
|
||||
} else if (context.systemPrompt) {
|
||||
params.system = context.systemPrompt;
|
||||
}
|
||||
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options.temperature;
|
||||
}
|
||||
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools);
|
||||
}
|
||||
|
||||
if (options?.thinkingEnabled && model.reasoning) {
|
||||
params.thinking = {
|
||||
type: "enabled",
|
||||
budget_tokens: options.thinkingBudgetTokens || 1024,
|
||||
};
|
||||
}
|
||||
|
||||
if (options?.toolChoice) {
|
||||
if (typeof options.toolChoice === "string") {
|
||||
params.tool_choice = { type: options.toolChoice };
|
||||
} else {
|
||||
params.tool_choice = options.toolChoice;
|
||||
}
|
||||
}
|
||||
|
||||
private convertMessages(messages: Message[]): MessageParam[] {
|
||||
const params: MessageParam[] = [];
|
||||
return params;
|
||||
}
|
||||
|
||||
// Transform messages for cross-provider compatibility
|
||||
const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
|
||||
function convertMessages(messages: Message[], model: Model<"anthropic-messages">): MessageParam[] {
|
||||
const params: MessageParam[] = [];
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
// Handle both string and array content
|
||||
if (typeof msg.content === "string") {
|
||||
// Transform messages for cross-provider compatibility
|
||||
const transformedMessages = transformMessages(messages, model);
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
if (msg.content.trim().length > 0) {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: msg.content,
|
||||
});
|
||||
} else {
|
||||
// Convert array content to Anthropic format
|
||||
const blocks: ContentBlockParam[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: item.text,
|
||||
};
|
||||
} else {
|
||||
// Image content
|
||||
return {
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
});
|
||||
const filteredBlocks = !this.modelInfo?.input.includes("image")
|
||||
? blocks.filter((b) => b.type !== "image")
|
||||
: blocks;
|
||||
params.push({
|
||||
role: "user",
|
||||
content: filteredBlocks,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const blocks: ContentBlockParam[] = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
blocks.push({
|
||||
} else {
|
||||
const blocks: ContentBlockParam[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: block.text,
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
blocks.push({
|
||||
type: "thinking",
|
||||
thinking: block.thinking,
|
||||
signature: block.thinkingSignature || "",
|
||||
});
|
||||
} else if (block.type === "toolCall") {
|
||||
blocks.push({
|
||||
type: "tool_use",
|
||||
id: block.id,
|
||||
name: block.name,
|
||||
input: block.arguments,
|
||||
});
|
||||
text: item.text,
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
type: "image",
|
||||
source: {
|
||||
type: "base64",
|
||||
media_type: item.mimeType as "image/jpeg" | "image/png" | "image/gif" | "image/webp",
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
params.push({
|
||||
role: "assistant",
|
||||
content: blocks,
|
||||
});
|
||||
} else if (msg.role === "toolResult") {
|
||||
let filteredBlocks = !model?.input.includes("image") ? blocks.filter((b) => b.type !== "image") : blocks;
|
||||
filteredBlocks = filteredBlocks.filter((b) => {
|
||||
if (b.type === "text") {
|
||||
return b.text.trim().length > 0;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
if (filteredBlocks.length === 0) continue;
|
||||
params.push({
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: msg.toolCallId,
|
||||
content: msg.content,
|
||||
is_error: msg.isError,
|
||||
},
|
||||
],
|
||||
content: filteredBlocks,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const blocks: ContentBlockParam[] = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
if (block.text.trim().length === 0) continue;
|
||||
blocks.push({
|
||||
type: "text",
|
||||
text: block.text,
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
if (block.thinking.trim().length === 0) continue;
|
||||
blocks.push({
|
||||
type: "thinking",
|
||||
thinking: block.thinking,
|
||||
signature: block.thinkingSignature || "",
|
||||
});
|
||||
} else if (block.type === "toolCall") {
|
||||
blocks.push({
|
||||
type: "tool_use",
|
||||
id: block.id,
|
||||
name: block.name,
|
||||
input: block.arguments,
|
||||
});
|
||||
}
|
||||
}
|
||||
if (blocks.length === 0) continue;
|
||||
params.push({
|
||||
role: "assistant",
|
||||
content: blocks,
|
||||
});
|
||||
} else if (msg.role === "toolResult") {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: msg.toolCallId,
|
||||
content: msg.content,
|
||||
is_error: msg.isError,
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
return params;
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
private convertTools(tools: Context["tools"]): Tool[] {
|
||||
if (!tools) return [];
|
||||
function convertTools(tools: Tool[]): Anthropic.Messages.Tool[] {
|
||||
if (!tools) return [];
|
||||
|
||||
return tools.map((tool) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: tool.parameters.properties || {},
|
||||
required: tool.parameters.required || [],
|
||||
},
|
||||
}));
|
||||
}
|
||||
return tools.map((tool) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
input_schema: {
|
||||
type: "object" as const,
|
||||
properties: tool.parameters.properties || {},
|
||||
required: tool.parameters.required || [],
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
private mapStopReason(reason: Anthropic.Messages.StopReason | null): StopReason {
|
||||
switch (reason) {
|
||||
case "end_turn":
|
||||
return "stop";
|
||||
case "max_tokens":
|
||||
return "length";
|
||||
case "tool_use":
|
||||
return "toolUse";
|
||||
case "refusal":
|
||||
return "safety";
|
||||
case "pause_turn": // Stop is good enough -> resubmit
|
||||
return "stop";
|
||||
case "stop_sequence":
|
||||
return "stop"; // We don't supply stop sequences, so this should never happen
|
||||
default:
|
||||
return "stop";
|
||||
function mapStopReason(reason: Anthropic.Messages.StopReason): StopReason {
|
||||
switch (reason) {
|
||||
case "end_turn":
|
||||
return "stop";
|
||||
case "max_tokens":
|
||||
return "length";
|
||||
case "tool_use":
|
||||
return "toolUse";
|
||||
case "refusal":
|
||||
return "safety";
|
||||
case "pause_turn": // Stop is good enough -> resubmit
|
||||
return "stop";
|
||||
case "stop_sequence":
|
||||
return "stop"; // We don't supply stop sequences, so this should never happen
|
||||
default: {
|
||||
const _exhaustive: never = reason;
|
||||
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,19 +1,21 @@
|
|||
import {
|
||||
type Content,
|
||||
type FinishReason,
|
||||
FinishReason,
|
||||
FunctionCallingConfigMode,
|
||||
type GenerateContentConfig,
|
||||
type GenerateContentParameters,
|
||||
GoogleGenAI,
|
||||
type Part,
|
||||
} from "@google/genai";
|
||||
import { QueuedGenerateStream } from "../generate.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Context,
|
||||
LLM,
|
||||
LLMOptions,
|
||||
Message,
|
||||
GenerateFunction,
|
||||
GenerateOptions,
|
||||
GenerateStream,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
|
|
@ -23,7 +25,7 @@ import type {
|
|||
} from "../types.js";
|
||||
import { transformMessages } from "./utils.js";
|
||||
|
||||
export interface GoogleLLMOptions extends LLMOptions {
|
||||
export interface GoogleOptions extends GenerateOptions {
|
||||
toolChoice?: "auto" | "none" | "any";
|
||||
thinking?: {
|
||||
enabled: boolean;
|
||||
|
|
@ -31,38 +33,20 @@ export interface GoogleLLMOptions extends LLMOptions {
|
|||
};
|
||||
}
|
||||
|
||||
export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
||||
private client: GoogleGenAI;
|
||||
private modelInfo: Model;
|
||||
export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
|
||||
model: Model<"google-generative-ai">,
|
||||
context: Context,
|
||||
options?: GoogleOptions,
|
||||
): GenerateStream => {
|
||||
const stream = new QueuedGenerateStream();
|
||||
|
||||
constructor(model: Model, apiKey?: string) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.GEMINI_API_KEY) {
|
||||
throw new Error(
|
||||
"Gemini API key is required. Set GEMINI_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.GEMINI_API_KEY;
|
||||
}
|
||||
this.client = new GoogleGenAI({ apiKey });
|
||||
this.modelInfo = model;
|
||||
}
|
||||
|
||||
getModel(): Model {
|
||||
return this.modelInfo;
|
||||
}
|
||||
|
||||
getApi(): string {
|
||||
return "google-generative-ai";
|
||||
}
|
||||
|
||||
async generate(context: Context, options?: GoogleLLMOptions): Promise<AssistantMessage> {
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: this.getApi(),
|
||||
provider: this.modelInfo.provider,
|
||||
model: this.modelInfo.id,
|
||||
api: "google-generative-ai" as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
@ -72,70 +56,20 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
|||
},
|
||||
stopReason: "stop",
|
||||
};
|
||||
|
||||
try {
|
||||
const contents = this.convertMessages(context.messages);
|
||||
const client = createClient(options?.apiKey);
|
||||
const params = buildParams(model, context, options);
|
||||
const googleStream = await client.models.generateContentStream(params);
|
||||
|
||||
// Build generation config
|
||||
const generationConfig: GenerateContentConfig = {};
|
||||
if (options?.temperature !== undefined) {
|
||||
generationConfig.temperature = options.temperature;
|
||||
}
|
||||
if (options?.maxTokens !== undefined) {
|
||||
generationConfig.maxOutputTokens = options.maxTokens;
|
||||
}
|
||||
|
||||
// Build the config object
|
||||
const config: GenerateContentConfig = {
|
||||
...(Object.keys(generationConfig).length > 0 && generationConfig),
|
||||
...(context.systemPrompt && { systemInstruction: context.systemPrompt }),
|
||||
...(context.tools && { tools: this.convertTools(context.tools) }),
|
||||
};
|
||||
|
||||
// Add tool config if needed
|
||||
if (context.tools && options?.toolChoice) {
|
||||
config.toolConfig = {
|
||||
functionCallingConfig: {
|
||||
mode: this.mapToolChoice(options.toolChoice),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Add thinking config if enabled and model supports it
|
||||
if (options?.thinking?.enabled && this.modelInfo.reasoning) {
|
||||
config.thinkingConfig = {
|
||||
includeThoughts: true,
|
||||
...(options.thinking.budgetTokens !== undefined && { thinkingBudget: options.thinking.budgetTokens }),
|
||||
};
|
||||
}
|
||||
|
||||
// Abort signal
|
||||
if (options?.signal) {
|
||||
if (options.signal.aborted) {
|
||||
throw new Error("Request aborted");
|
||||
}
|
||||
config.abortSignal = options.signal;
|
||||
}
|
||||
|
||||
// Build the request parameters
|
||||
const params: GenerateContentParameters = {
|
||||
model: this.modelInfo.id,
|
||||
contents,
|
||||
config,
|
||||
};
|
||||
|
||||
const stream = await this.client.models.generateContentStream(params);
|
||||
|
||||
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
|
||||
stream.push({ type: "start", partial: output });
|
||||
let currentBlock: TextContent | ThinkingContent | null = null;
|
||||
for await (const chunk of stream) {
|
||||
// Extract parts from the chunk
|
||||
for await (const chunk of googleStream) {
|
||||
const candidate = chunk.candidates?.[0];
|
||||
if (candidate?.content?.parts) {
|
||||
for (const part of candidate.content.parts) {
|
||||
if (part.text !== undefined) {
|
||||
const isThinking = part.thought === true;
|
||||
|
||||
// Check if we need to switch blocks
|
||||
if (
|
||||
!currentBlock ||
|
||||
(isThinking && currentBlock.type !== "thinking") ||
|
||||
|
|
@ -143,50 +77,60 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
|||
) {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Start new block
|
||||
if (isThinking) {
|
||||
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
||||
options?.onEvent?.({ type: "thinking_start" });
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
} else {
|
||||
currentBlock = { type: "text", text: "" };
|
||||
options?.onEvent?.({ type: "text_start" });
|
||||
stream.push({ type: "text_start", partial: output });
|
||||
}
|
||||
output.content.push(currentBlock);
|
||||
}
|
||||
|
||||
// Append content to current block
|
||||
if (currentBlock.type === "thinking") {
|
||||
currentBlock.thinking += part.text;
|
||||
currentBlock.thinkingSignature = part.thoughtSignature;
|
||||
options?.onEvent?.({
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
content: currentBlock.thinking,
|
||||
delta: part.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
currentBlock.text += part.text;
|
||||
options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: part.text });
|
||||
stream.push({ type: "text_delta", delta: part.text, partial: output });
|
||||
}
|
||||
}
|
||||
|
||||
// Handle function calls
|
||||
if (part.functionCall) {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock = null;
|
||||
}
|
||||
|
||||
// Add tool call
|
||||
const toolCallId = part.functionCall.id || `${part.functionCall.name}_${Date.now()}`;
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
|
|
@ -195,21 +139,18 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
|||
arguments: part.functionCall.args as Record<string, any>,
|
||||
};
|
||||
output.content.push(toolCall);
|
||||
options?.onEvent?.({ type: "toolCall", toolCall });
|
||||
stream.push({ type: "toolCall", toolCall, partial: output });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Map finish reason
|
||||
if (candidate?.finishReason) {
|
||||
output.stopReason = this.mapStopReason(candidate.finishReason);
|
||||
// Check if we have tool calls in blocks
|
||||
output.stopReason = mapStopReason(candidate.finishReason);
|
||||
if (output.content.some((b) => b.type === "toolCall")) {
|
||||
output.stopReason = "toolUse";
|
||||
}
|
||||
}
|
||||
|
||||
// Capture usage metadata if available
|
||||
if (chunk.usageMetadata) {
|
||||
output.usage = {
|
||||
input: chunk.usageMetadata.promptTokenCount || 0,
|
||||
|
|
@ -225,166 +166,223 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
|||
total: 0,
|
||||
},
|
||||
};
|
||||
calculateCost(this.modelInfo, output.usage);
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize last block
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
|
||||
stream.push({ type: "text_end", content: currentBlock.text, partial: output });
|
||||
} else {
|
||||
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
|
||||
stream.push({ type: "thinking_end", content: currentBlock.thinking, partial: output });
|
||||
}
|
||||
}
|
||||
|
||||
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
|
||||
return output;
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
output.stopReason = "error";
|
||||
output.error = error instanceof Error ? error.message : JSON.stringify(error);
|
||||
options?.onEvent?.({ type: "error", error: output.error });
|
||||
return output;
|
||||
stream.push({ type: "error", error: output.error, partial: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
function createClient(apiKey?: string): GoogleGenAI {
|
||||
if (!apiKey) {
|
||||
if (!process.env.GEMINI_API_KEY) {
|
||||
throw new Error(
|
||||
"Gemini API key is required. Set GEMINI_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.GEMINI_API_KEY;
|
||||
}
|
||||
return new GoogleGenAI({ apiKey });
|
||||
}
|
||||
|
||||
function buildParams(
|
||||
model: Model<"google-generative-ai">,
|
||||
context: Context,
|
||||
options: GoogleOptions = {},
|
||||
): GenerateContentParameters {
|
||||
const contents = convertMessages(model, context);
|
||||
|
||||
const generationConfig: GenerateContentConfig = {};
|
||||
if (options.temperature !== undefined) {
|
||||
generationConfig.temperature = options.temperature;
|
||||
}
|
||||
if (options.maxTokens !== undefined) {
|
||||
generationConfig.maxOutputTokens = options.maxTokens;
|
||||
}
|
||||
|
||||
private convertMessages(messages: Message[]): Content[] {
|
||||
const contents: Content[] = [];
|
||||
const config: GenerateContentConfig = {
|
||||
...(Object.keys(generationConfig).length > 0 && generationConfig),
|
||||
...(context.systemPrompt && { systemInstruction: context.systemPrompt }),
|
||||
...(context.tools && { tools: convertTools(context.tools) }),
|
||||
};
|
||||
|
||||
// Transform messages for cross-provider compatibility
|
||||
const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
|
||||
if (context.tools && options.toolChoice) {
|
||||
config.toolConfig = {
|
||||
functionCallingConfig: {
|
||||
mode: mapToolChoice(options.toolChoice),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
// Handle both string and array content
|
||||
if (typeof msg.content === "string") {
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: [{ text: msg.content }],
|
||||
});
|
||||
} else {
|
||||
// Convert array content to Google format
|
||||
const parts: Part[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return { text: item.text };
|
||||
} else {
|
||||
// Image content - Google uses inlineData
|
||||
return {
|
||||
inlineData: {
|
||||
mimeType: item.mimeType,
|
||||
data: item.data,
|
||||
},
|
||||
};
|
||||
}
|
||||
});
|
||||
const filteredParts = !this.modelInfo?.input.includes("image")
|
||||
? parts.filter((p) => p.text !== undefined)
|
||||
: parts;
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: filteredParts,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const parts: Part[] = [];
|
||||
if (options.thinking?.enabled && model.reasoning) {
|
||||
config.thinkingConfig = {
|
||||
includeThoughts: true,
|
||||
...(options.thinking.budgetTokens !== undefined && { thinkingBudget: options.thinking.budgetTokens }),
|
||||
};
|
||||
}
|
||||
|
||||
// Process content blocks
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
parts.push({ text: block.text });
|
||||
} else if (block.type === "thinking") {
|
||||
const thinkingPart: Part = {
|
||||
thought: true,
|
||||
thoughtSignature: block.thinkingSignature,
|
||||
text: block.thinking,
|
||||
};
|
||||
parts.push(thinkingPart);
|
||||
} else if (block.type === "toolCall") {
|
||||
parts.push({
|
||||
functionCall: {
|
||||
id: block.id,
|
||||
name: block.name,
|
||||
args: block.arguments,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
if (options.signal) {
|
||||
if (options.signal.aborted) {
|
||||
throw new Error("Request aborted");
|
||||
}
|
||||
config.abortSignal = options.signal;
|
||||
}
|
||||
|
||||
if (parts.length > 0) {
|
||||
contents.push({
|
||||
role: "model",
|
||||
parts,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "toolResult") {
|
||||
const params: GenerateContentParameters = {
|
||||
model: model.id,
|
||||
contents,
|
||||
config,
|
||||
};
|
||||
|
||||
return params;
|
||||
}
|
||||
function convertMessages(model: Model<"google-generative-ai">, context: Context): Content[] {
|
||||
const contents: Content[] = [];
|
||||
const transformedMessages = transformMessages(context.messages, model);
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
id: msg.toolCallId,
|
||||
name: msg.toolName,
|
||||
response: {
|
||||
result: msg.content,
|
||||
isError: msg.isError,
|
||||
},
|
||||
parts: [{ text: msg.content }],
|
||||
});
|
||||
} else {
|
||||
const parts: Part[] = msg.content.map((item) => {
|
||||
if (item.type === "text") {
|
||||
return { text: item.text };
|
||||
} else {
|
||||
return {
|
||||
inlineData: {
|
||||
mimeType: item.mimeType,
|
||||
data: item.data,
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
});
|
||||
const filteredParts = !model.input.includes("image") ? parts.filter((p) => p.text !== undefined) : parts;
|
||||
if (filteredParts.length === 0) continue;
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: filteredParts,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const parts: Part[] = [];
|
||||
|
||||
return contents;
|
||||
}
|
||||
for (const block of msg.content) {
|
||||
if (block.type === "text") {
|
||||
parts.push({ text: block.text });
|
||||
} else if (block.type === "thinking") {
|
||||
const thinkingPart: Part = {
|
||||
thought: true,
|
||||
thoughtSignature: block.thinkingSignature,
|
||||
text: block.thinking,
|
||||
};
|
||||
parts.push(thinkingPart);
|
||||
} else if (block.type === "toolCall") {
|
||||
parts.push({
|
||||
functionCall: {
|
||||
id: block.id,
|
||||
name: block.name,
|
||||
args: block.arguments,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private convertTools(tools: Tool[]): any[] {
|
||||
return [
|
||||
{
|
||||
functionDeclarations: tools.map((tool) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
})),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
private mapToolChoice(choice: string): FunctionCallingConfigMode {
|
||||
switch (choice) {
|
||||
case "auto":
|
||||
return FunctionCallingConfigMode.AUTO;
|
||||
case "none":
|
||||
return FunctionCallingConfigMode.NONE;
|
||||
case "any":
|
||||
return FunctionCallingConfigMode.ANY;
|
||||
default:
|
||||
return FunctionCallingConfigMode.AUTO;
|
||||
if (parts.length === 0) continue;
|
||||
contents.push({
|
||||
role: "model",
|
||||
parts,
|
||||
});
|
||||
} else if (msg.role === "toolResult") {
|
||||
contents.push({
|
||||
role: "user",
|
||||
parts: [
|
||||
{
|
||||
functionResponse: {
|
||||
id: msg.toolCallId,
|
||||
name: msg.toolName,
|
||||
response: {
|
||||
result: msg.content,
|
||||
isError: msg.isError,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private mapStopReason(reason: FinishReason): StopReason {
|
||||
switch (reason) {
|
||||
case "STOP":
|
||||
return "stop";
|
||||
case "MAX_TOKENS":
|
||||
return "length";
|
||||
case "BLOCKLIST":
|
||||
case "PROHIBITED_CONTENT":
|
||||
case "SPII":
|
||||
case "SAFETY":
|
||||
case "IMAGE_SAFETY":
|
||||
return "safety";
|
||||
case "RECITATION":
|
||||
return "safety";
|
||||
case "FINISH_REASON_UNSPECIFIED":
|
||||
case "OTHER":
|
||||
case "LANGUAGE":
|
||||
case "MALFORMED_FUNCTION_CALL":
|
||||
case "UNEXPECTED_TOOL_CALL":
|
||||
return "error";
|
||||
default:
|
||||
return "stop";
|
||||
return contents;
|
||||
}
|
||||
|
||||
function convertTools(tools: Tool[]): any[] {
|
||||
return [
|
||||
{
|
||||
functionDeclarations: tools.map((tool) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
})),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
function mapToolChoice(choice: string): FunctionCallingConfigMode {
|
||||
switch (choice) {
|
||||
case "auto":
|
||||
return FunctionCallingConfigMode.AUTO;
|
||||
case "none":
|
||||
return FunctionCallingConfigMode.NONE;
|
||||
case "any":
|
||||
return FunctionCallingConfigMode.ANY;
|
||||
default:
|
||||
return FunctionCallingConfigMode.AUTO;
|
||||
}
|
||||
}
|
||||
|
||||
function mapStopReason(reason: FinishReason): StopReason {
|
||||
switch (reason) {
|
||||
case FinishReason.STOP:
|
||||
return "stop";
|
||||
case FinishReason.MAX_TOKENS:
|
||||
return "length";
|
||||
case FinishReason.BLOCKLIST:
|
||||
case FinishReason.PROHIBITED_CONTENT:
|
||||
case FinishReason.SPII:
|
||||
case FinishReason.SAFETY:
|
||||
case FinishReason.IMAGE_SAFETY:
|
||||
case FinishReason.RECITATION:
|
||||
return "safety";
|
||||
case FinishReason.FINISH_REASON_UNSPECIFIED:
|
||||
case FinishReason.OTHER:
|
||||
case FinishReason.LANGUAGE:
|
||||
case FinishReason.MALFORMED_FUNCTION_CALL:
|
||||
case FinishReason.UNEXPECTED_TOOL_CALL:
|
||||
return "error";
|
||||
default: {
|
||||
const _exhaustive: never = reason;
|
||||
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,20 @@
|
|||
import OpenAI from "openai";
|
||||
import type {
|
||||
ChatCompletionAssistantMessageParam,
|
||||
ChatCompletionChunk,
|
||||
ChatCompletionContentPart,
|
||||
ChatCompletionContentPartImage,
|
||||
ChatCompletionContentPartText,
|
||||
ChatCompletionMessageParam,
|
||||
} from "openai/resources/chat/completions.js";
|
||||
import { QueuedGenerateStream } from "../generate.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
AssistantMessage,
|
||||
Context,
|
||||
LLM,
|
||||
LLMOptions,
|
||||
Message,
|
||||
GenerateFunction,
|
||||
GenerateOptions,
|
||||
GenerateStream,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
|
|
@ -22,43 +24,25 @@ import type {
|
|||
} from "../types.js";
|
||||
import { transformMessages } from "./utils.js";
|
||||
|
||||
export interface OpenAICompletionsLLMOptions extends LLMOptions {
|
||||
export interface OpenAICompletionsOptions extends GenerateOptions {
|
||||
toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
|
||||
reasoningEffort?: "low" | "medium" | "high";
|
||||
reasoningEffort?: "minimal" | "low" | "medium" | "high";
|
||||
}
|
||||
|
||||
export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
||||
private client: OpenAI;
|
||||
private modelInfo: Model;
|
||||
export const streamOpenAICompletions: GenerateFunction<"openai-completions"> = (
|
||||
model: Model<"openai-completions">,
|
||||
context: Context,
|
||||
options?: OpenAICompletionsOptions,
|
||||
): GenerateStream => {
|
||||
const stream = new QueuedGenerateStream();
|
||||
|
||||
constructor(model: Model, apiKey?: string) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
throw new Error(
|
||||
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.OPENAI_API_KEY;
|
||||
}
|
||||
this.client = new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
|
||||
this.modelInfo = model;
|
||||
}
|
||||
|
||||
getModel(): Model {
|
||||
return this.modelInfo;
|
||||
}
|
||||
|
||||
getApi(): string {
|
||||
return "openai-completions";
|
||||
}
|
||||
|
||||
async generate(request: Context, options?: OpenAICompletionsLLMOptions): Promise<AssistantMessage> {
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: this.getApi(),
|
||||
provider: this.modelInfo.provider,
|
||||
model: this.modelInfo.id,
|
||||
api: model.api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
@ -70,52 +54,13 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
|||
};
|
||||
|
||||
try {
|
||||
const messages = this.convertMessages(request.messages, request.systemPrompt);
|
||||
|
||||
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
|
||||
model: this.modelInfo.id,
|
||||
messages,
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
};
|
||||
|
||||
// Cerebras/xAI dont like the "store" field
|
||||
if (!this.modelInfo.baseUrl?.includes("cerebras.ai") && !this.modelInfo.baseUrl?.includes("api.x.ai")) {
|
||||
params.store = false;
|
||||
}
|
||||
|
||||
if (options?.maxTokens) {
|
||||
params.max_completion_tokens = options?.maxTokens;
|
||||
}
|
||||
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options?.temperature;
|
||||
}
|
||||
|
||||
if (request.tools) {
|
||||
params.tools = this.convertTools(request.tools);
|
||||
}
|
||||
|
||||
if (options?.toolChoice) {
|
||||
params.tool_choice = options.toolChoice;
|
||||
}
|
||||
|
||||
if (
|
||||
options?.reasoningEffort &&
|
||||
this.modelInfo.reasoning &&
|
||||
!this.modelInfo.id.toLowerCase().includes("grok")
|
||||
) {
|
||||
params.reasoning_effort = options.reasoningEffort;
|
||||
}
|
||||
|
||||
const stream = await this.client.chat.completions.create(params, {
|
||||
signal: options?.signal,
|
||||
});
|
||||
|
||||
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
|
||||
const client = createClient(model, options?.apiKey);
|
||||
const params = buildParams(model, context, options);
|
||||
const openaiStream = await client.chat.completions.create(params, { signal: options?.signal });
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
let currentBlock: TextContent | ThinkingContent | (ToolCall & { partialArgs?: string }) | null = null;
|
||||
for await (const chunk of stream) {
|
||||
for await (const chunk of openaiStream) {
|
||||
if (chunk.usage) {
|
||||
output.usage = {
|
||||
input: chunk.usage.prompt_tokens || 0,
|
||||
|
|
@ -132,137 +77,170 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
|||
total: 0,
|
||||
},
|
||||
};
|
||||
calculateCost(this.modelInfo, output.usage);
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
|
||||
const choice = chunk.choices[0];
|
||||
if (!choice) continue;
|
||||
|
||||
// Capture finish reason
|
||||
if (choice.finish_reason) {
|
||||
output.stopReason = this.mapStopReason(choice.finish_reason);
|
||||
output.stopReason = mapStopReason(choice.finish_reason);
|
||||
}
|
||||
|
||||
if (choice.delta) {
|
||||
// Handle text content
|
||||
if (
|
||||
choice.delta.content !== null &&
|
||||
choice.delta.content !== undefined &&
|
||||
choice.delta.content.length > 0
|
||||
) {
|
||||
// Check if we need to switch to text block
|
||||
if (!currentBlock || currentBlock.type !== "text") {
|
||||
// Save current block if exists
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "thinking") {
|
||||
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
// Start new text block
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
options?.onEvent?.({ type: "text_start" });
|
||||
stream.push({ type: "text_start", partial: output });
|
||||
}
|
||||
// Append to text block
|
||||
|
||||
if (currentBlock.type === "text") {
|
||||
currentBlock.text += choice.delta.content;
|
||||
options?.onEvent?.({
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
content: currentBlock.text,
|
||||
delta: choice.delta.content,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Handle reasoning_content field
|
||||
// Some endpoints return reasoning in reasoning_content (llama.cpp)
|
||||
if (
|
||||
(choice.delta as any).reasoning_content !== null &&
|
||||
(choice.delta as any).reasoning_content !== undefined &&
|
||||
(choice.delta as any).reasoning_content.length > 0
|
||||
) {
|
||||
// Check if we need to switch to thinking block
|
||||
if (!currentBlock || currentBlock.type !== "thinking") {
|
||||
// Save current block if exists
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
// Start new thinking block
|
||||
currentBlock = { type: "thinking", thinking: "", thinkingSignature: "reasoning_content" };
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "reasoning_content",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
options?.onEvent?.({ type: "thinking_start" });
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
}
|
||||
// Append to thinking block
|
||||
|
||||
if (currentBlock.type === "thinking") {
|
||||
const delta = (choice.delta as any).reasoning_content;
|
||||
currentBlock.thinking += delta;
|
||||
options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta });
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Handle reasoning field
|
||||
// Some endpoints return reasoning in reasining (ollama, xAI, ...)
|
||||
if (
|
||||
(choice.delta as any).reasoning !== null &&
|
||||
(choice.delta as any).reasoning !== undefined &&
|
||||
(choice.delta as any).reasoning.length > 0
|
||||
) {
|
||||
// Check if we need to switch to thinking block
|
||||
if (!currentBlock || currentBlock.type !== "thinking") {
|
||||
// Save current block if exists
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
// Start new thinking block
|
||||
currentBlock = { type: "thinking", thinking: "", thinkingSignature: "reasoning" };
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "reasoning",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
options?.onEvent?.({ type: "thinking_start" });
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
}
|
||||
// Append to thinking block
|
||||
|
||||
if (currentBlock.type === "thinking") {
|
||||
const delta = (choice.delta as any).reasoning;
|
||||
currentBlock.thinking += delta;
|
||||
options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta });
|
||||
stream.push({ type: "thinking_delta", delta, partial: output });
|
||||
}
|
||||
}
|
||||
|
||||
// Handle tool calls
|
||||
if (choice?.delta?.tool_calls) {
|
||||
for (const toolCall of choice.delta.tool_calls) {
|
||||
// Check if we need a new tool call block
|
||||
if (
|
||||
!currentBlock ||
|
||||
currentBlock.type !== "toolCall" ||
|
||||
(toolCall.id && currentBlock.id !== toolCall.id)
|
||||
) {
|
||||
// Save current block if exists
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "thinking") {
|
||||
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Start new tool call block
|
||||
currentBlock = {
|
||||
type: "toolCall",
|
||||
id: toolCall.id || "",
|
||||
|
|
@ -273,7 +251,6 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
|||
output.content.push(currentBlock);
|
||||
}
|
||||
|
||||
// Accumulate tool call data
|
||||
if (currentBlock.type === "toolCall") {
|
||||
if (toolCall.id) currentBlock.id = toolCall.id;
|
||||
if (toolCall.function?.name) currentBlock.name = toolCall.function.name;
|
||||
|
|
@ -286,16 +263,27 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
|||
}
|
||||
}
|
||||
|
||||
// Save final block if exists
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "thinking") {
|
||||
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall });
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -303,141 +291,188 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
|||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
return output;
|
||||
} catch (error) {
|
||||
// Update output with error information
|
||||
output.stopReason = "error";
|
||||
output.error = error instanceof Error ? error.message : String(error);
|
||||
options?.onEvent?.({ type: "error", error: output.error });
|
||||
return output;
|
||||
stream.push({ type: "error", error: output.error, partial: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
function createClient(model: Model<"openai-completions">, apiKey?: string) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
throw new Error(
|
||||
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.OPENAI_API_KEY;
|
||||
}
|
||||
return new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
|
||||
}
|
||||
|
||||
function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
|
||||
const messages = convertMessages(model, context);
|
||||
|
||||
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
|
||||
model: model.id,
|
||||
messages,
|
||||
stream: true,
|
||||
stream_options: { include_usage: true },
|
||||
};
|
||||
|
||||
// Cerebras/xAI dont like the "store" field
|
||||
if (!model.baseUrl.includes("cerebras.ai") && !model.baseUrl.includes("api.x.ai")) {
|
||||
params.store = false;
|
||||
}
|
||||
|
||||
private convertMessages(messages: Message[], systemPrompt?: string): ChatCompletionMessageParam[] {
|
||||
const params: ChatCompletionMessageParam[] = [];
|
||||
if (options?.maxTokens) {
|
||||
params.max_completion_tokens = options?.maxTokens;
|
||||
}
|
||||
|
||||
// Transform messages for cross-provider compatibility
|
||||
const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options?.temperature;
|
||||
}
|
||||
|
||||
// Add system prompt if provided
|
||||
if (systemPrompt) {
|
||||
// Cerebras/xAi don't like the "developer" role
|
||||
const useDeveloperRole =
|
||||
this.modelInfo.reasoning &&
|
||||
!this.modelInfo.baseUrl?.includes("cerebras.ai") &&
|
||||
!this.modelInfo.baseUrl?.includes("api.x.ai");
|
||||
const role = useDeveloperRole ? "developer" : "system";
|
||||
params.push({ role: role, content: systemPrompt });
|
||||
}
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools);
|
||||
}
|
||||
|
||||
// Convert messages
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
// Handle both string and array content
|
||||
if (typeof msg.content === "string") {
|
||||
params.push({
|
||||
role: "user",
|
||||
content: msg.content,
|
||||
});
|
||||
} else {
|
||||
// Convert array content to OpenAI format
|
||||
const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: item.text,
|
||||
} satisfies ChatCompletionContentPartText;
|
||||
} else {
|
||||
// Image content - OpenAI uses data URLs
|
||||
return {
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:${item.mimeType};base64,${item.data}`,
|
||||
},
|
||||
} satisfies ChatCompletionContentPartImage;
|
||||
}
|
||||
});
|
||||
const filteredContent = !this.modelInfo?.input.includes("image")
|
||||
? content.filter((c) => c.type !== "image_url")
|
||||
: content;
|
||||
params.push({
|
||||
role: "user",
|
||||
content: filteredContent,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const assistantMsg: ChatCompletionMessageParam = {
|
||||
role: "assistant",
|
||||
content: null,
|
||||
};
|
||||
if (options?.toolChoice) {
|
||||
params.tool_choice = options.toolChoice;
|
||||
}
|
||||
|
||||
// Build content from blocks
|
||||
const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
|
||||
if (textBlocks.length > 0) {
|
||||
assistantMsg.content = textBlocks.map((b) => b.text).join("");
|
||||
}
|
||||
// Grok models don't like reasoning_effort
|
||||
if (options?.reasoningEffort && model.reasoning && !model.id.toLowerCase().includes("grok")) {
|
||||
params.reasoning_effort = options.reasoningEffort;
|
||||
}
|
||||
|
||||
// Handle thinking blocks for llama.cpp server + gpt-oss
|
||||
const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
|
||||
if (thinkingBlocks.length > 0) {
|
||||
// Use the signature from the first thinking block if available
|
||||
const signature = thinkingBlocks[0].thinkingSignature;
|
||||
if (signature && signature.length > 0) {
|
||||
(assistantMsg as any)[signature] = thinkingBlocks.map((b) => b.thinking).join("");
|
||||
}
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
// Handle tool calls
|
||||
const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
|
||||
if (toolCalls.length > 0) {
|
||||
assistantMsg.tool_calls = toolCalls.map((tc) => ({
|
||||
id: tc.id,
|
||||
type: "function" as const,
|
||||
function: {
|
||||
name: tc.name,
|
||||
arguments: JSON.stringify(tc.arguments),
|
||||
},
|
||||
}));
|
||||
}
|
||||
function convertMessages(model: Model<"openai-completions">, context: Context): ChatCompletionMessageParam[] {
|
||||
const params: ChatCompletionMessageParam[] = [];
|
||||
|
||||
params.push(assistantMsg);
|
||||
} else if (msg.role === "toolResult") {
|
||||
const transformedMessages = transformMessages(context.messages, model);
|
||||
|
||||
if (context.systemPrompt) {
|
||||
// Cerebras/xAi don't like the "developer" role
|
||||
const useDeveloperRole =
|
||||
model.reasoning && !model.baseUrl.includes("cerebras.ai") && !model.baseUrl.includes("api.x.ai");
|
||||
const role = useDeveloperRole ? "developer" : "system";
|
||||
params.push({ role: role, content: context.systemPrompt });
|
||||
}
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
params.push({
|
||||
role: "tool",
|
||||
role: "user",
|
||||
content: msg.content,
|
||||
tool_call_id: msg.toolCallId,
|
||||
});
|
||||
} else {
|
||||
const content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "text",
|
||||
text: item.text,
|
||||
} satisfies ChatCompletionContentPartText;
|
||||
} else {
|
||||
return {
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:${item.mimeType};base64,${item.data}`,
|
||||
},
|
||||
} satisfies ChatCompletionContentPartImage;
|
||||
}
|
||||
});
|
||||
const filteredContent = !model.input.includes("image")
|
||||
? content.filter((c) => c.type !== "image_url")
|
||||
: content;
|
||||
if (filteredContent.length === 0) continue;
|
||||
params.push({
|
||||
role: "user",
|
||||
content: filteredContent,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const assistantMsg: ChatCompletionAssistantMessageParam = {
|
||||
role: "assistant",
|
||||
content: null,
|
||||
};
|
||||
|
||||
const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
|
||||
if (textBlocks.length > 0) {
|
||||
assistantMsg.content = textBlocks.map((b) => b.text).join("");
|
||||
}
|
||||
|
||||
// Handle thinking blocks for llama.cpp server + gpt-oss
|
||||
const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
|
||||
if (thinkingBlocks.length > 0) {
|
||||
// Use the signature from the first thinking block if available
|
||||
const signature = thinkingBlocks[0].thinkingSignature;
|
||||
if (signature && signature.length > 0) {
|
||||
(assistantMsg as any)[signature] = thinkingBlocks.map((b) => b.thinking).join("");
|
||||
}
|
||||
}
|
||||
|
||||
const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
|
||||
if (toolCalls.length > 0) {
|
||||
assistantMsg.tool_calls = toolCalls.map((tc) => ({
|
||||
id: tc.id,
|
||||
type: "function" as const,
|
||||
function: {
|
||||
name: tc.name,
|
||||
arguments: JSON.stringify(tc.arguments),
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
params.push(assistantMsg);
|
||||
} else if (msg.role === "toolResult") {
|
||||
params.push({
|
||||
role: "tool",
|
||||
content: msg.content,
|
||||
tool_call_id: msg.toolCallId,
|
||||
});
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
private convertTools(tools: Tool[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
|
||||
return tools.map((tool) => ({
|
||||
type: "function",
|
||||
function: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
},
|
||||
}));
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
private mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | null): StopReason {
|
||||
switch (reason) {
|
||||
case "stop":
|
||||
return "stop";
|
||||
case "length":
|
||||
return "length";
|
||||
case "function_call":
|
||||
case "tool_calls":
|
||||
return "toolUse";
|
||||
case "content_filter":
|
||||
return "safety";
|
||||
default:
|
||||
return "stop";
|
||||
function convertTools(tools: Tool[]): OpenAI.Chat.Completions.ChatCompletionTool[] {
|
||||
return tools.map((tool) => ({
|
||||
type: "function",
|
||||
function: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"]): StopReason {
|
||||
if (reason === null) return "stop";
|
||||
switch (reason) {
|
||||
case "stop":
|
||||
return "stop";
|
||||
case "length":
|
||||
return "length";
|
||||
case "function_call":
|
||||
case "tool_calls":
|
||||
return "toolUse";
|
||||
case "content_filter":
|
||||
return "safety";
|
||||
default: {
|
||||
const _exhaustive: never = reason;
|
||||
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,58 +10,49 @@ import type {
|
|||
ResponseOutputMessage,
|
||||
ResponseReasoningItem,
|
||||
} from "openai/resources/responses/responses.js";
|
||||
import { QueuedGenerateStream } from "../generate.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
AssistantMessage,
|
||||
Context,
|
||||
LLM,
|
||||
LLMOptions,
|
||||
GenerateFunction,
|
||||
GenerateOptions,
|
||||
GenerateStream,
|
||||
Message,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
} from "../types.js";
|
||||
import { transformMessages } from "./utils.js";
|
||||
|
||||
export interface OpenAIResponsesLLMOptions extends LLMOptions {
|
||||
// OpenAI Responses-specific options
|
||||
export interface OpenAIResponsesOptions extends GenerateOptions {
|
||||
reasoningEffort?: "minimal" | "low" | "medium" | "high";
|
||||
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
||||
}
|
||||
|
||||
export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
||||
private client: OpenAI;
|
||||
private modelInfo: Model;
|
||||
/**
|
||||
* Generate function for OpenAI Responses API
|
||||
*/
|
||||
export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
||||
model: Model<"openai-responses">,
|
||||
context: Context,
|
||||
options?: OpenAIResponsesOptions,
|
||||
): GenerateStream => {
|
||||
const stream = new QueuedGenerateStream();
|
||||
|
||||
constructor(model: Model, apiKey?: string) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
throw new Error(
|
||||
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.OPENAI_API_KEY;
|
||||
}
|
||||
this.client = new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
|
||||
this.modelInfo = model;
|
||||
}
|
||||
|
||||
getModel(): Model {
|
||||
return this.modelInfo;
|
||||
}
|
||||
|
||||
getApi(): string {
|
||||
return "openai-responses";
|
||||
}
|
||||
|
||||
async generate(request: Context, options?: OpenAIResponsesLLMOptions): Promise<AssistantMessage> {
|
||||
// Start async processing
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
role: "assistant",
|
||||
content: [],
|
||||
api: this.getApi(),
|
||||
provider: this.modelInfo.provider,
|
||||
model: this.modelInfo.id,
|
||||
api: "openai-responses" as Api,
|
||||
provider: model.provider,
|
||||
model: model.id,
|
||||
usage: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
@ -71,77 +62,31 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
|||
},
|
||||
stopReason: "stop",
|
||||
};
|
||||
|
||||
try {
|
||||
const input = this.convertToInput(request.messages, request.systemPrompt);
|
||||
// Create OpenAI client
|
||||
const client = createClient(model, options?.apiKey);
|
||||
const params = buildParams(model, context, options);
|
||||
const openaiStream = await client.responses.create(params, { signal: options?.signal });
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
const params: ResponseCreateParamsStreaming = {
|
||||
model: this.modelInfo.id,
|
||||
input,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
if (options?.maxTokens) {
|
||||
params.max_output_tokens = options?.maxTokens;
|
||||
}
|
||||
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options?.temperature;
|
||||
}
|
||||
|
||||
if (request.tools) {
|
||||
params.tools = this.convertTools(request.tools);
|
||||
}
|
||||
|
||||
// Add reasoning options for models that support it
|
||||
if (this.modelInfo?.reasoning) {
|
||||
if (options?.reasoningEffort || options?.reasoningSummary) {
|
||||
params.reasoning = {
|
||||
effort: options?.reasoningEffort || "medium",
|
||||
summary: options?.reasoningSummary || "auto",
|
||||
};
|
||||
params.include = ["reasoning.encrypted_content"];
|
||||
} else {
|
||||
params.reasoning = {
|
||||
effort: this.modelInfo.name.startsWith("gpt-5") ? "minimal" : null,
|
||||
summary: null,
|
||||
};
|
||||
|
||||
if (this.modelInfo.name.startsWith("gpt-5")) {
|
||||
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
||||
input.push({
|
||||
role: "developer",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "# Juice: 0 !important",
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const stream = await this.client.responses.create(params, {
|
||||
signal: options?.signal,
|
||||
});
|
||||
|
||||
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
|
||||
|
||||
const outputItems: (ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall)[] = [];
|
||||
let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
|
||||
let currentBlock: ThinkingContent | TextContent | ToolCall | null = null;
|
||||
|
||||
for await (const event of stream) {
|
||||
for await (const event of openaiStream) {
|
||||
// Handle output item start
|
||||
if (event.type === "response.output_item.added") {
|
||||
const item = event.item;
|
||||
if (item.type === "reasoning") {
|
||||
options?.onEvent?.({ type: "thinking_start" });
|
||||
outputItems.push(item);
|
||||
currentItem = item;
|
||||
currentBlock = { type: "thinking", thinking: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
} else if (item.type === "message") {
|
||||
options?.onEvent?.({ type: "text_start" });
|
||||
outputItems.push(item);
|
||||
currentItem = item;
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "text_start", partial: output });
|
||||
}
|
||||
}
|
||||
// Handle reasoning summary deltas
|
||||
|
|
@ -151,30 +96,42 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
|||
currentItem.summary.push(event.part);
|
||||
}
|
||||
} else if (event.type === "response.reasoning_summary_text.delta") {
|
||||
if (currentItem && currentItem.type === "reasoning") {
|
||||
if (
|
||||
currentItem &&
|
||||
currentItem.type === "reasoning" &&
|
||||
currentBlock &&
|
||||
currentBlock.type === "thinking"
|
||||
) {
|
||||
currentItem.summary = currentItem.summary || [];
|
||||
const lastPart = currentItem.summary[currentItem.summary.length - 1];
|
||||
if (lastPart) {
|
||||
currentBlock.thinking += event.delta;
|
||||
lastPart.text += event.delta;
|
||||
options?.onEvent?.({
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
content: currentItem.summary.map((s) => s.text).join("\n\n"),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add a new line between summary parts (hack...)
|
||||
else if (event.type === "response.reasoning_summary_part.done") {
|
||||
if (currentItem && currentItem.type === "reasoning") {
|
||||
if (
|
||||
currentItem &&
|
||||
currentItem.type === "reasoning" &&
|
||||
currentBlock &&
|
||||
currentBlock.type === "thinking"
|
||||
) {
|
||||
currentItem.summary = currentItem.summary || [];
|
||||
const lastPart = currentItem.summary[currentItem.summary.length - 1];
|
||||
if (lastPart) {
|
||||
currentBlock.thinking += "\n\n";
|
||||
lastPart.text += "\n\n";
|
||||
options?.onEvent?.({
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
content: currentItem.summary.map((s) => s.text).join("\n\n"),
|
||||
delta: "\n\n",
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -186,30 +143,28 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
|||
currentItem.content.push(event.part);
|
||||
}
|
||||
} else if (event.type === "response.output_text.delta") {
|
||||
if (currentItem && currentItem.type === "message") {
|
||||
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
|
||||
const lastPart = currentItem.content[currentItem.content.length - 1];
|
||||
if (lastPart && lastPart.type === "output_text") {
|
||||
currentBlock.text += event.delta;
|
||||
lastPart.text += event.delta;
|
||||
options?.onEvent?.({
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
content: currentItem.content
|
||||
.map((c) => (c.type === "output_text" ? c.text : c.refusal))
|
||||
.join(""),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (event.type === "response.refusal.delta") {
|
||||
if (currentItem && currentItem.type === "message") {
|
||||
if (currentItem && currentItem.type === "message" && currentBlock && currentBlock.type === "text") {
|
||||
const lastPart = currentItem.content[currentItem.content.length - 1];
|
||||
if (lastPart && lastPart.type === "refusal") {
|
||||
currentBlock.text += event.delta;
|
||||
lastPart.refusal += event.delta;
|
||||
options?.onEvent?.({
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
content: currentItem.content
|
||||
.map((c) => (c.type === "output_text" ? c.text : c.refusal))
|
||||
.join(""),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -218,14 +173,24 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
|||
else if (event.type === "response.output_item.done") {
|
||||
const item = event.item;
|
||||
|
||||
if (item.type === "reasoning") {
|
||||
outputItems[outputItems.length - 1] = item; // Update with final item
|
||||
const thinkingContent = item.summary?.map((s) => s.text).join("\n\n") || "";
|
||||
options?.onEvent?.({ type: "thinking_end", content: thinkingContent });
|
||||
} else if (item.type === "message") {
|
||||
outputItems[outputItems.length - 1] = item; // Update with final item
|
||||
const textContent = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
|
||||
options?.onEvent?.({ type: "text_end", content: textContent });
|
||||
if (item.type === "reasoning" && currentBlock && currentBlock.type === "thinking") {
|
||||
currentBlock.thinking = item.summary?.map((s) => s.text).join("\n\n") || "";
|
||||
currentBlock.thinkingSignature = JSON.stringify(item);
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
currentBlock = null;
|
||||
} else if (item.type === "message" && currentBlock && currentBlock.type === "text") {
|
||||
currentBlock.text = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
|
||||
currentBlock.textSignature = item.id;
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
currentBlock = null;
|
||||
} else if (item.type === "function_call") {
|
||||
const toolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
|
|
@ -233,8 +198,8 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
|||
name: item.name,
|
||||
arguments: JSON.parse(item.arguments),
|
||||
};
|
||||
options?.onEvent?.({ type: "toolCall", toolCall });
|
||||
outputItems.push(item);
|
||||
output.content.push(toolCall);
|
||||
stream.push({ type: "toolCall", toolCall, partial: output });
|
||||
}
|
||||
}
|
||||
// Handle completion
|
||||
|
|
@ -249,10 +214,10 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
|||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
}
|
||||
calculateCost(this.modelInfo, output.usage);
|
||||
calculateCost(model, output.usage);
|
||||
// Map status to stop reason
|
||||
output.stopReason = this.mapStopReason(response?.status);
|
||||
if (outputItems.some((b) => b.type === "function_call") && output.stopReason === "stop") {
|
||||
output.stopReason = mapStopReason(response?.status);
|
||||
if (output.content.some((b) => b.type === "toolCall") && output.stopReason === "stop") {
|
||||
output.stopReason = "toolUse";
|
||||
}
|
||||
}
|
||||
|
|
@ -260,173 +225,215 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
|||
else if (event.type === "error") {
|
||||
output.stopReason = "error";
|
||||
output.error = `Code ${event.code}: ${event.message}` || "Unknown error";
|
||||
options?.onEvent?.({ type: "error", error: output.error });
|
||||
stream.push({ type: "error", error: output.error, partial: output });
|
||||
stream.end();
|
||||
return output;
|
||||
} else if (event.type === "response.failed") {
|
||||
output.stopReason = "error";
|
||||
output.error = "Unknown error";
|
||||
options?.onEvent?.({ type: "error", error: output.error });
|
||||
stream.push({ type: "error", error: output.error, partial: output });
|
||||
stream.end();
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
// Convert output items to blocks
|
||||
for (const item of outputItems) {
|
||||
if (item.type === "reasoning") {
|
||||
output.content.push({
|
||||
type: "thinking",
|
||||
thinking: item.summary?.map((s: any) => s.text).join("\n\n") || "",
|
||||
thinkingSignature: JSON.stringify(item), // Full item for resubmission
|
||||
});
|
||||
} else if (item.type === "message") {
|
||||
output.content.push({
|
||||
type: "text",
|
||||
text: item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""),
|
||||
textSignature: item.id, // ID for resubmission
|
||||
});
|
||||
} else if (item.type === "function_call") {
|
||||
output.content.push({
|
||||
type: "toolCall",
|
||||
id: item.call_id + "|" + item.id,
|
||||
name: item.name,
|
||||
arguments: JSON.parse(item.arguments),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
}
|
||||
|
||||
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
|
||||
return output;
|
||||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
output.stopReason = "error";
|
||||
output.error = error instanceof Error ? error.message : JSON.stringify(error);
|
||||
options?.onEvent?.({ type: "error", error: output.error });
|
||||
return output;
|
||||
stream.push({ type: "error", error: output.error, partial: output });
|
||||
stream.end();
|
||||
}
|
||||
})();
|
||||
|
||||
return stream;
|
||||
};
|
||||
|
||||
function createClient(model: Model<"openai-responses">, apiKey?: string) {
|
||||
if (!apiKey) {
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
throw new Error(
|
||||
"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.",
|
||||
);
|
||||
}
|
||||
apiKey = process.env.OPENAI_API_KEY;
|
||||
}
|
||||
return new OpenAI({ apiKey, baseURL: model.baseUrl, dangerouslyAllowBrowser: true });
|
||||
}
|
||||
|
||||
function buildParams(model: Model<"openai-responses">, context: Context, options?: OpenAIResponsesOptions) {
|
||||
const messages = convertMessages(model, context);
|
||||
|
||||
const params: ResponseCreateParamsStreaming = {
|
||||
model: model.id,
|
||||
input: messages,
|
||||
stream: true,
|
||||
};
|
||||
|
||||
if (options?.maxTokens) {
|
||||
params.max_output_tokens = options?.maxTokens;
|
||||
}
|
||||
|
||||
private convertToInput(messages: Message[], systemPrompt?: string): ResponseInput {
|
||||
const input: ResponseInput = [];
|
||||
if (options?.temperature !== undefined) {
|
||||
params.temperature = options?.temperature;
|
||||
}
|
||||
|
||||
// Transform messages for cross-provider compatibility
|
||||
const transformedMessages = transformMessages(messages, this.modelInfo, this.getApi());
|
||||
if (context.tools) {
|
||||
params.tools = convertTools(context.tools);
|
||||
}
|
||||
|
||||
// Add system prompt if provided
|
||||
if (systemPrompt) {
|
||||
const role = this.modelInfo?.reasoning ? "developer" : "system";
|
||||
input.push({
|
||||
role,
|
||||
content: systemPrompt,
|
||||
});
|
||||
}
|
||||
if (model.reasoning) {
|
||||
if (options?.reasoningEffort || options?.reasoningSummary) {
|
||||
params.reasoning = {
|
||||
effort: options?.reasoningEffort || "medium",
|
||||
summary: options?.reasoningSummary || "auto",
|
||||
};
|
||||
params.include = ["reasoning.encrypted_content"];
|
||||
} else {
|
||||
params.reasoning = {
|
||||
effort: model.name.startsWith("gpt-5") ? "minimal" : null,
|
||||
summary: null,
|
||||
};
|
||||
|
||||
// Convert messages
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
// Handle both string and array content
|
||||
if (typeof msg.content === "string") {
|
||||
input.push({
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: msg.content }],
|
||||
});
|
||||
} else {
|
||||
// Convert array content to OpenAI Responses format
|
||||
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "input_text",
|
||||
text: item.text,
|
||||
} satisfies ResponseInputText;
|
||||
} else {
|
||||
// Image content - OpenAI Responses uses data URLs
|
||||
return {
|
||||
type: "input_image",
|
||||
detail: "auto",
|
||||
image_url: `data:${item.mimeType};base64,${item.data}`,
|
||||
} satisfies ResponseInputImage;
|
||||
}
|
||||
});
|
||||
const filteredContent = !this.modelInfo?.input.includes("image")
|
||||
? content.filter((c) => c.type !== "input_image")
|
||||
: content;
|
||||
input.push({
|
||||
role: "user",
|
||||
content: filteredContent,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
// Process content blocks in order
|
||||
const output: ResponseInput = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
// Do not submit thinking blocks if the completion had an error (i.e. abort)
|
||||
if (block.type === "thinking" && msg.stopReason !== "error") {
|
||||
// Push the full reasoning item(s) from signature
|
||||
if (block.thinkingSignature) {
|
||||
const reasoningItem = JSON.parse(block.thinkingSignature);
|
||||
output.push(reasoningItem);
|
||||
}
|
||||
} else if (block.type === "text") {
|
||||
const textBlock = block as TextContent;
|
||||
output.push({
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
content: [{ type: "output_text", text: textBlock.text, annotations: [] }],
|
||||
status: "completed",
|
||||
id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
|
||||
} satisfies ResponseOutputMessage);
|
||||
// Do not submit thinking blocks if the completion had an error (i.e. abort)
|
||||
} else if (block.type === "toolCall" && msg.stopReason !== "error") {
|
||||
const toolCall = block as ToolCall;
|
||||
output.push({
|
||||
type: "function_call",
|
||||
id: toolCall.id.split("|")[1], // Extract original ID
|
||||
call_id: toolCall.id.split("|")[0], // Extract call session ID
|
||||
name: toolCall.name,
|
||||
arguments: JSON.stringify(toolCall.arguments),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Add all output items to input
|
||||
input.push(...output);
|
||||
} else if (msg.role === "toolResult") {
|
||||
// Tool results are sent as function_call_output
|
||||
input.push({
|
||||
type: "function_call_output",
|
||||
call_id: msg.toolCallId.split("|")[0], // Extract call session ID
|
||||
output: msg.content,
|
||||
if (model.name.startsWith("gpt-5")) {
|
||||
// Jesus Christ, see https://community.openai.com/t/need-reasoning-false-option-for-gpt-5/1351588/7
|
||||
messages.push({
|
||||
role: "developer",
|
||||
content: [
|
||||
{
|
||||
type: "input_text",
|
||||
text: "# Juice: 0 !important",
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return input;
|
||||
}
|
||||
|
||||
private convertTools(tools: Tool[]): OpenAITool[] {
|
||||
return tools.map((tool) => ({
|
||||
type: "function",
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
strict: null,
|
||||
}));
|
||||
return params;
|
||||
}
|
||||
|
||||
function convertMessages(model: Model<"openai-responses">, context: Context): ResponseInput {
|
||||
const messages: ResponseInput = [];
|
||||
|
||||
const transformedMessages = transformMessages(context.messages, model);
|
||||
|
||||
if (context.systemPrompt) {
|
||||
const role = model.reasoning ? "developer" : "system";
|
||||
messages.push({
|
||||
role,
|
||||
content: context.systemPrompt,
|
||||
});
|
||||
}
|
||||
|
||||
private mapStopReason(status: string | undefined): StopReason {
|
||||
switch (status) {
|
||||
case "completed":
|
||||
return "stop";
|
||||
case "incomplete":
|
||||
return "length";
|
||||
case "failed":
|
||||
case "cancelled":
|
||||
return "error";
|
||||
default:
|
||||
return "stop";
|
||||
for (const msg of transformedMessages) {
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
messages.push({
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: msg.content }],
|
||||
});
|
||||
} else {
|
||||
const content: ResponseInputContent[] = msg.content.map((item): ResponseInputContent => {
|
||||
if (item.type === "text") {
|
||||
return {
|
||||
type: "input_text",
|
||||
text: item.text,
|
||||
} satisfies ResponseInputText;
|
||||
} else {
|
||||
return {
|
||||
type: "input_image",
|
||||
detail: "auto",
|
||||
image_url: `data:${item.mimeType};base64,${item.data}`,
|
||||
} satisfies ResponseInputImage;
|
||||
}
|
||||
});
|
||||
const filteredContent = !model.input.includes("image")
|
||||
? content.filter((c) => c.type !== "input_image")
|
||||
: content;
|
||||
if (filteredContent.length === 0) continue;
|
||||
messages.push({
|
||||
role: "user",
|
||||
content: filteredContent,
|
||||
});
|
||||
}
|
||||
} else if (msg.role === "assistant") {
|
||||
const output: ResponseInput = [];
|
||||
|
||||
for (const block of msg.content) {
|
||||
// Do not submit thinking blocks if the completion had an error (i.e. abort)
|
||||
if (block.type === "thinking" && msg.stopReason !== "error") {
|
||||
if (block.thinkingSignature) {
|
||||
const reasoningItem = JSON.parse(block.thinkingSignature);
|
||||
output.push(reasoningItem);
|
||||
}
|
||||
} else if (block.type === "text") {
|
||||
const textBlock = block as TextContent;
|
||||
output.push({
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
content: [{ type: "output_text", text: textBlock.text, annotations: [] }],
|
||||
status: "completed",
|
||||
id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
|
||||
} satisfies ResponseOutputMessage);
|
||||
// Do not submit toolcall blocks if the completion had an error (i.e. abort)
|
||||
} else if (block.type === "toolCall" && msg.stopReason !== "error") {
|
||||
const toolCall = block as ToolCall;
|
||||
output.push({
|
||||
type: "function_call",
|
||||
id: toolCall.id.split("|")[1],
|
||||
call_id: toolCall.id.split("|")[0],
|
||||
name: toolCall.name,
|
||||
arguments: JSON.stringify(toolCall.arguments),
|
||||
});
|
||||
}
|
||||
}
|
||||
if (output.length === 0) continue;
|
||||
messages.push(...output);
|
||||
} else if (msg.role === "toolResult") {
|
||||
messages.push({
|
||||
type: "function_call_output",
|
||||
call_id: msg.toolCallId.split("|")[0],
|
||||
output: msg.content,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
function convertTools(tools: Tool[]): OpenAITool[] {
|
||||
return tools.map((tool) => ({
|
||||
type: "function",
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters,
|
||||
strict: null,
|
||||
}));
|
||||
}
|
||||
|
||||
function mapStopReason(status: OpenAI.Responses.ResponseStatus | undefined): StopReason {
|
||||
if (!status) return "stop";
|
||||
switch (status) {
|
||||
case "completed":
|
||||
return "stop";
|
||||
case "incomplete":
|
||||
return "length";
|
||||
case "failed":
|
||||
case "cancelled":
|
||||
return "error";
|
||||
// These two are wonky ...
|
||||
case "in_progress":
|
||||
case "queued":
|
||||
return "stop";
|
||||
default: {
|
||||
const _exhaustive: never = status;
|
||||
throw new Error(`Unhandled stop reason: ${_exhaustive}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,6 @@
|
|||
import type { AssistantMessage, Message, Model } from "../types.js";
|
||||
import type { Api, AssistantMessage, Message, Model } from "../types.js";
|
||||
|
||||
/**
|
||||
* Transform messages for cross-provider compatibility.
|
||||
*
|
||||
* - User and toolResult messages are copied verbatim
|
||||
* - Assistant messages:
|
||||
* - If from the same provider/model, copied as-is
|
||||
* - If from different provider/model, thinking blocks are converted to text blocks with <thinking></thinking> tags
|
||||
*
|
||||
* @param messages The messages to transform
|
||||
* @param model The target model that will process these messages
|
||||
* @returns A copy of the messages array with transformations applied
|
||||
*/
|
||||
export function transformMessages(messages: Message[], model: Model, api: string): Message[] {
|
||||
export function transformMessages<TApi extends Api>(messages: Message[], model: Model<TApi>): Message[] {
|
||||
return messages.map((msg) => {
|
||||
// User and toolResult messages pass through unchanged
|
||||
if (msg.role === "user" || msg.role === "toolResult") {
|
||||
|
|
@ -24,7 +12,7 @@ export function transformMessages(messages: Message[], model: Model, api: string
|
|||
const assistantMsg = msg as AssistantMessage;
|
||||
|
||||
// If message is from the same provider and API, keep as is
|
||||
if (assistantMsg.provider === model.provider && assistantMsg.api === api) {
|
||||
if (assistantMsg.provider === model.provider && assistantMsg.api === model.api) {
|
||||
return msg;
|
||||
}
|
||||
|
||||
|
|
@ -47,8 +35,6 @@ export function transformMessages(messages: Message[], model: Model, api: string
|
|||
content: transformedContent,
|
||||
};
|
||||
}
|
||||
|
||||
// Should not reach here, but return as-is for safety
|
||||
return msg;
|
||||
});
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue