feat(ai): Add OpenAI-compatible provider examples for multiple services

- Add examples for Cerebras, Groq, Ollama, and OpenRouter
- Update OpenAI Completions provider to handle base URL properly
- Simplify README formatting
- All examples use the same OpenAICompletionsLLM provider with different base URLs
This commit is contained in:
Mario Zechner 2025-08-25 17:41:47 +02:00
parent 6112029076
commit 4bb3a5ad02
6 changed files with 371 additions and 74 deletions

View file

@ -32,6 +32,7 @@ const streamResponse = await llm.complete({
}, { }, {
onText: (chunk) => process.stdout.write(chunk), onText: (chunk) => process.stdout.write(chunk),
onThinking: (chunk) => process.stderr.write(chunk), onThinking: (chunk) => process.stderr.write(chunk),
// Provider specific config
thinking: { enabled: true } thinking: { enabled: true }
}); });
@ -60,24 +61,6 @@ if (toolResponse.toolCalls) {
} }
``` ```
## Features
- **Unified Interface**: Same API across OpenAI, Anthropic, and Gemini
- **Streaming**: Real-time text and thinking streams with completion signals
- **Tool Calling**: Consistent function calling with automatic ID generation
- **Thinking Mode**: Access reasoning tokens (o1, Claude, Gemini 2.0)
- **Token Tracking**: Input, output, cache, and thinking token counts
- **Error Handling**: Graceful fallbacks with detailed error messages
## Providers
| Provider | Models | Thinking | Tools | Streaming |
|----------|--------|----------|-------|-----------|
| OpenAI Completions | gpt-4o, gpt-4o-mini | ❌ | ✅ | ✅ |
| OpenAI Responses | o1, o3, gpt-5 | ✅ | ✅ | ✅ |
| Anthropic | claude-3.5-sonnet, claude-3.5-haiku | ✅ | ✅ | ✅ |
| Gemini | gemini-2.0-flash, gemini-2.0-pro | ✅ | ✅ | ✅ |
## Development ## Development
This package is part of the pi monorepo. See the main README for development instructions. This package is part of the pi monorepo. See the main README for development instructions.

View file

@ -43,9 +43,13 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
messages, messages,
stream: true, stream: true,
stream_options: { include_usage: true }, stream_options: { include_usage: true },
store: false,
}; };
// Cerebras doesn't like the "store" field
if (!this.client.baseURL?.includes("cerebras.ai")) {
(params as any).store = false;
}
if (options?.maxTokens) { if (options?.maxTokens) {
params.max_completion_tokens = options?.maxTokens; params.max_completion_tokens = options?.maxTokens;
} }
@ -71,6 +75,8 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
}); });
let content = ""; let content = "";
let reasoningContent = "";
let reasoningField: "reasoning" | "reasoning_content" | null = null;
const toolCallsMap = new Map< const toolCallsMap = new Map<
number, number,
{ {
@ -86,56 +92,8 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
cacheWrite: 0, cacheWrite: 0,
}; };
let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null; let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null;
let blockType: "text" | "thinking" | null = null;
let inTextBlock = false;
for await (const chunk of stream) { for await (const chunk of stream) {
const choice = chunk.choices[0];
// Handle text content
if (choice?.delta?.content) {
content += choice.delta.content;
options?.onText?.(choice.delta.content, false);
inTextBlock = true;
}
// Handle tool calls
if (choice?.delta?.tool_calls) {
if (inTextBlock) {
// If we were in a text block, signal its end
options?.onText?.("", true);
inTextBlock = false;
}
for (const toolCall of choice.delta.tool_calls) {
const index = toolCall.index;
if (!toolCallsMap.has(index)) {
toolCallsMap.set(index, {
id: toolCall.id || "",
name: toolCall.function?.name || "",
arguments: "",
});
}
const existing = toolCallsMap.get(index)!;
if (toolCall.id) existing.id = toolCall.id;
if (toolCall.function?.name) existing.name = toolCall.function.name;
if (toolCall.function?.arguments) {
existing.arguments += toolCall.function.arguments;
}
}
}
// Capture finish reason
if (choice?.finish_reason) {
if (inTextBlock) {
// If we were in a text block, signal its end
options?.onText?.("", true);
inTextBlock = false;
}
finishReason = choice.finish_reason;
}
// Capture usage
if (chunk.usage) { if (chunk.usage) {
usage = { usage = {
input: chunk.usage.prompt_tokens || 0, input: chunk.usage.prompt_tokens || 0,
@ -143,9 +101,96 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0, cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
cacheWrite: 0, cacheWrite: 0,
}; };
}
// Note: reasoning tokens are in completion_tokens_details?.reasoning_tokens const choice = chunk.choices[0];
// but we don't have actual thinking content from Chat Completions API if (!choice) continue;
if (choice.delta) {
// Handle text content
if (
choice.delta.content !== null &&
choice.delta.content !== undefined &&
choice.delta.content.length > 0
) {
if (blockType === "thinking") {
options?.onThinking?.("", true);
blockType = null;
}
content += choice.delta.content;
options?.onText?.(choice.delta.content, false);
blockType = "text";
}
// Handle LLAMA.cpp reasoning_content
if (
(choice.delta as any).reasoning_content !== null &&
(choice.delta as any).reasoning_content !== undefined
) {
if (blockType === "text") {
options?.onText?.("", true);
blockType = null;
}
reasoningContent += (choice.delta as any).reasoning_content;
reasoningField = "reasoning_content";
options?.onThinking?.((choice.delta as any).reasoning_content, false);
blockType = "thinking";
}
// Handle Ollama reasoning field
if ((choice.delta as any).reasoning !== null && (choice.delta as any).reasoning !== undefined) {
if (blockType === "text") {
options?.onText?.("", true);
blockType = null;
}
reasoningContent += (choice.delta as any).reasoning;
reasoningField = "reasoning";
options?.onThinking?.((choice.delta as any).reasoning, false);
blockType = "thinking";
}
// Handle tool calls
if (choice?.delta?.tool_calls) {
if (blockType === "text") {
options?.onText?.("", true);
blockType = null;
}
if (blockType === "thinking") {
options?.onThinking?.("", true);
blockType = null;
}
for (const toolCall of choice.delta.tool_calls) {
const index = toolCall.index;
if (!toolCallsMap.has(index)) {
toolCallsMap.set(index, {
id: toolCall.id || "",
name: toolCall.function?.name || "",
arguments: "",
});
}
const existing = toolCallsMap.get(index)!;
if (toolCall.id) existing.id = toolCall.id;
if (toolCall.function?.name) existing.name = toolCall.function.name;
if (toolCall.function?.arguments) {
existing.arguments += toolCall.function.arguments;
}
}
}
}
// Capture finish reason
if (choice.finish_reason) {
if (blockType === "text") {
options?.onText?.("", true);
blockType = null;
}
if (blockType === "thinking") {
options?.onThinking?.("", true);
blockType = null;
}
finishReason = choice.finish_reason;
} }
} }
@ -159,7 +204,8 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
return { return {
role: "assistant", role: "assistant",
content: content || undefined, content: content || undefined,
thinking: undefined, // Chat Completions doesn't provide actual thinking content thinking: reasoningContent || undefined,
thinkingSignature: reasoningField || undefined,
toolCalls: toolCalls.length > 0 ? toolCalls : undefined, toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
model: this.model, model: this.model,
usage, usage,
@ -186,7 +232,8 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
// Add system prompt if provided // Add system prompt if provided
if (systemPrompt) { if (systemPrompt) {
const role = this.isReasoningModel() ? "developer" : "system"; // Cerebras doesn't like the "developer" role
const role = this.isReasoningModel() && !this.client.baseURL?.includes("cerebras.ai") ? "developer" : "system";
params.push({ role: role, content: systemPrompt }); params.push({ role: role, content: systemPrompt });
} }
@ -203,6 +250,11 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
content: msg.content || null, content: msg.content || null,
}; };
// LLama.cpp server + gpt-oss
if (msg.thinking && msg.thinkingSignature && msg.thinkingSignature.length > 0) {
(assistantMsg as any)[msg.thinkingSignature] = msg.thinking;
}
if (msg.toolCalls) { if (msg.toolCalls) {
assistantMsg.tool_calls = msg.toolCalls.map((tc) => ({ assistantMsg.tool_calls = msg.toolCalls.map((tc) => ({
id: tc.id, id: tc.id,
@ -255,7 +307,7 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
} }
private isReasoningModel(): boolean { private isReasoningModel(): boolean {
// TODO base on models.dev data // TODO base on models.dev
return this.model.includes("o1") || this.model.includes("o3"); return true;
} }
} }

View file

@ -0,0 +1,65 @@
import chalk from "chalk";
import { Context, Tool } from "../../src/types";
import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions";
// Define a simple calculator tool
const tools: Tool[] = [
{
name: "calculate",
description: "Perform a mathematical calculation",
parameters: {
type: "object" as const,
properties: {
expression: {
type: "string",
description: "The mathematical expression to evaluate"
}
},
required: ["expression"]
}
}
];
const options: OpenAICompletionsLLMOptions = {
onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
reasoningEffort: "medium",
toolChoice: "auto"
};
const ai = new OpenAICompletionsLLM("gpt-oss-120b", process.env.CEREBRAS_API_KEY, "https://api.cerebras.ai/v1");
const context: Context = {
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
messages: [
{
role: "user",
content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool. You must use the tool to answer both math questions.",
}
],
tools
}
while (true) {
let msg = await ai.complete(context, options)
context.messages.push(msg);
console.log();
for (const toolCall of msg.toolCalls || []) {
if (toolCall.name === "calculate") {
const expression = toolCall.arguments.expression;
const result = eval(expression);
context.messages.push({
role: "toolResult",
content: `The result of ${expression} is ${result}.`,
toolCallId: toolCall.id,
isError: false
});
}
}
if (msg.stopReason != "toolUse") break;
}
console.log();
console.log(chalk.yellow(JSON.stringify(context.messages, null, 2)));

View file

@ -0,0 +1,66 @@
import chalk from "chalk";
import { Context, Tool } from "../../src/types";
import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions";
// Define a simple calculator tool
const tools: Tool[] = [
{
name: "calculate",
description: "Perform a mathematical calculation",
parameters: {
type: "object" as const,
properties: {
expression: {
type: "string",
description: "The mathematical expression to evaluate"
}
},
required: ["expression"]
}
}
];
const options: OpenAICompletionsLLMOptions = {
onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
reasoningEffort: "medium",
toolChoice: "auto"
};
const ai = new OpenAICompletionsLLM("openai/gpt-oss-20b", process.env.GROQ_API_KEY, "https://api.groq.com/openai/v1");
const context: Context = {
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
messages: [
{
role: "user",
content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.",
}
],
tools
}
while (true) {
let msg = await ai.complete(context, options)
context.messages.push(msg);
console.log();
console.log(chalk.yellow(JSON.stringify(msg, null, 2)));
for (const toolCall of msg.toolCalls || []) {
if (toolCall.name === "calculate") {
const expression = toolCall.arguments.expression;
const result = eval(expression);
context.messages.push({
role: "toolResult",
content: `The result of ${expression} is ${result}.`,
toolCallId: toolCall.id,
isError: false
});
}
}
if (msg.stopReason != "toolUse") break;
}
console.log();
console.log(chalk.yellow(JSON.stringify(context.messages, null, 2)));

View file

@ -0,0 +1,66 @@
import chalk from "chalk";
import { Context, Tool } from "../../src/types";
import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions";
// Define a simple calculator tool
const tools: Tool[] = [
{
name: "calculate",
description: "Perform a mathematical calculation",
parameters: {
type: "object" as const,
properties: {
expression: {
type: "string",
description: "The mathematical expression to evaluate"
}
},
required: ["expression"]
}
}
];
const options: OpenAICompletionsLLMOptions = {
onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
reasoningEffort: "medium",
toolChoice: "auto"
};
const ai = new OpenAICompletionsLLM("gpt-oss:20b", "dummy", "http://localhost:11434/v1");
const context: Context = {
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
messages: [
{
role: "user",
content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.",
}
],
tools
}
while (true) {
let msg = await ai.complete(context, options)
context.messages.push(msg);
console.log();
console.log(chalk.yellow(JSON.stringify(msg, null, 2)));
for (const toolCall of msg.toolCalls || []) {
if (toolCall.name === "calculate") {
const expression = toolCall.arguments.expression;
const result = eval(expression);
context.messages.push({
role: "toolResult",
content: `The result of ${expression} is ${result}.`,
toolCallId: toolCall.id,
isError: false
});
}
}
if (msg.stopReason == "stop") break;
}
console.log();
console.log(chalk.yellow(JSON.stringify(context.messages, null, 2)));

View file

@ -0,0 +1,65 @@
import chalk from "chalk";
import { Context, Tool } from "../../src/types";
import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions";
// Define a simple calculator tool
const tools: Tool[] = [
{
name: "calculate",
description: "Perform a mathematical calculation",
parameters: {
type: "object" as const,
properties: {
expression: {
type: "string",
description: "The mathematical expression to evaluate"
}
},
required: ["expression"]
}
}
];
const options: OpenAICompletionsLLMOptions = {
onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
reasoningEffort: "medium",
toolChoice: "auto"
};
const ai = new OpenAICompletionsLLM("z-ai/glm-4.5", process.env.OPENROUTER_API_KEY, "https://openrouter.ai/api/v1");
const context: Context = {
systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
messages: [
{
role: "user",
content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.",
}
],
tools
}
while (true) {
let msg = await ai.complete(context, options)
context.messages.push(msg);
console.log();
for (const toolCall of msg.toolCalls || []) {
if (toolCall.name === "calculate") {
const expression = toolCall.arguments.expression;
const result = eval(expression);
context.messages.push({
role: "toolResult",
content: `The result of ${expression} is ${result}.`,
toolCallId: toolCall.id,
isError: false
});
}
}
if (msg.stopReason != "toolUse") break;
}
console.log();
console.log(chalk.yellow(JSON.stringify(context.messages, null, 2)));