From 4bb3a5ad020eff4df09a07bff1ce208c0fa1a416 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Mon, 25 Aug 2025 17:41:47 +0200 Subject: [PATCH] feat(ai): Add OpenAI-compatible provider examples for multiple services - Add examples for Cerebras, Groq, Ollama, and OpenRouter - Update OpenAI Completions provider to handle base URL properly - Simplify README formatting - All examples use the same OpenAICompletionsLLM provider with different base URLs --- packages/ai/README.md | 19 +- .../ai/src/providers/openai-completions.ts | 164 ++++++++++++------ .../ai/test/examples/cerebras-completions.ts | 65 +++++++ packages/ai/test/examples/groq-completions.ts | 66 +++++++ .../ai/test/examples/ollama-completions.ts | 66 +++++++ .../test/examples/openrouter-completions.ts | 65 +++++++ 6 files changed, 371 insertions(+), 74 deletions(-) create mode 100644 packages/ai/test/examples/cerebras-completions.ts create mode 100644 packages/ai/test/examples/groq-completions.ts create mode 100644 packages/ai/test/examples/ollama-completions.ts create mode 100644 packages/ai/test/examples/openrouter-completions.ts diff --git a/packages/ai/README.md b/packages/ai/README.md index 2cd9ccb9..a238d7cd 100644 --- a/packages/ai/README.md +++ b/packages/ai/README.md @@ -32,6 +32,7 @@ const streamResponse = await llm.complete({ }, { onText: (chunk) => process.stdout.write(chunk), onThinking: (chunk) => process.stderr.write(chunk), + // Provider specific config thinking: { enabled: true } }); @@ -60,24 +61,6 @@ if (toolResponse.toolCalls) { } ``` -## Features - -- **Unified Interface**: Same API across OpenAI, Anthropic, and Gemini -- **Streaming**: Real-time text and thinking streams with completion signals -- **Tool Calling**: Consistent function calling with automatic ID generation -- **Thinking Mode**: Access reasoning tokens (o1, Claude, Gemini 2.0) -- **Token Tracking**: Input, output, cache, and thinking token counts -- **Error Handling**: Graceful fallbacks with detailed error messages - -## Providers - -| Provider | Models | Thinking | Tools | Streaming | -|----------|--------|----------|-------|-----------| -| OpenAI Completions | gpt-4o, gpt-4o-mini | ❌ | ✅ | ✅ | -| OpenAI Responses | o1, o3, gpt-5 | ✅ | ✅ | ✅ | -| Anthropic | claude-3.5-sonnet, claude-3.5-haiku | ✅ | ✅ | ✅ | -| Gemini | gemini-2.0-flash, gemini-2.0-pro | ✅ | ✅ | ✅ | - ## Development This package is part of the pi monorepo. See the main README for development instructions. diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index e2253ba8..4fc6436b 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -43,9 +43,13 @@ export class OpenAICompletionsLLM implements LLM { messages, stream: true, stream_options: { include_usage: true }, - store: false, }; + // Cerebras doesn't like the "store" field + if (!this.client.baseURL?.includes("cerebras.ai")) { + (params as any).store = false; + } + if (options?.maxTokens) { params.max_completion_tokens = options?.maxTokens; } @@ -71,6 +75,8 @@ export class OpenAICompletionsLLM implements LLM { }); let content = ""; + let reasoningContent = ""; + let reasoningField: "reasoning" | "reasoning_content" | null = null; const toolCallsMap = new Map< number, { @@ -86,56 +92,8 @@ export class OpenAICompletionsLLM implements LLM { cacheWrite: 0, }; let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null; - - let inTextBlock = false; + let blockType: "text" | "thinking" | null = null; for await (const chunk of stream) { - const choice = chunk.choices[0]; - - // Handle text content - if (choice?.delta?.content) { - content += choice.delta.content; - options?.onText?.(choice.delta.content, false); - inTextBlock = true; - } - - // Handle tool calls - if (choice?.delta?.tool_calls) { - if (inTextBlock) { - // If we were in a text block, signal its end - options?.onText?.("", true); - inTextBlock = false; - } - for (const toolCall of choice.delta.tool_calls) { - const index = toolCall.index; - - if (!toolCallsMap.has(index)) { - toolCallsMap.set(index, { - id: toolCall.id || "", - name: toolCall.function?.name || "", - arguments: "", - }); - } - - const existing = toolCallsMap.get(index)!; - if (toolCall.id) existing.id = toolCall.id; - if (toolCall.function?.name) existing.name = toolCall.function.name; - if (toolCall.function?.arguments) { - existing.arguments += toolCall.function.arguments; - } - } - } - - // Capture finish reason - if (choice?.finish_reason) { - if (inTextBlock) { - // If we were in a text block, signal its end - options?.onText?.("", true); - inTextBlock = false; - } - finishReason = choice.finish_reason; - } - - // Capture usage if (chunk.usage) { usage = { input: chunk.usage.prompt_tokens || 0, @@ -143,9 +101,96 @@ export class OpenAICompletionsLLM implements LLM { cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0, cacheWrite: 0, }; + } - // Note: reasoning tokens are in completion_tokens_details?.reasoning_tokens - // but we don't have actual thinking content from Chat Completions API + const choice = chunk.choices[0]; + if (!choice) continue; + + if (choice.delta) { + // Handle text content + if ( + choice.delta.content !== null && + choice.delta.content !== undefined && + choice.delta.content.length > 0 + ) { + if (blockType === "thinking") { + options?.onThinking?.("", true); + blockType = null; + } + content += choice.delta.content; + options?.onText?.(choice.delta.content, false); + blockType = "text"; + } + + // Handle LLAMA.cpp reasoning_content + if ( + (choice.delta as any).reasoning_content !== null && + (choice.delta as any).reasoning_content !== undefined + ) { + if (blockType === "text") { + options?.onText?.("", true); + blockType = null; + } + reasoningContent += (choice.delta as any).reasoning_content; + reasoningField = "reasoning_content"; + options?.onThinking?.((choice.delta as any).reasoning_content, false); + blockType = "thinking"; + } + + // Handle Ollama reasoning field + if ((choice.delta as any).reasoning !== null && (choice.delta as any).reasoning !== undefined) { + if (blockType === "text") { + options?.onText?.("", true); + blockType = null; + } + reasoningContent += (choice.delta as any).reasoning; + reasoningField = "reasoning"; + options?.onThinking?.((choice.delta as any).reasoning, false); + blockType = "thinking"; + } + + // Handle tool calls + if (choice?.delta?.tool_calls) { + if (blockType === "text") { + options?.onText?.("", true); + blockType = null; + } + if (blockType === "thinking") { + options?.onThinking?.("", true); + blockType = null; + } + for (const toolCall of choice.delta.tool_calls) { + const index = toolCall.index; + + if (!toolCallsMap.has(index)) { + toolCallsMap.set(index, { + id: toolCall.id || "", + name: toolCall.function?.name || "", + arguments: "", + }); + } + + const existing = toolCallsMap.get(index)!; + if (toolCall.id) existing.id = toolCall.id; + if (toolCall.function?.name) existing.name = toolCall.function.name; + if (toolCall.function?.arguments) { + existing.arguments += toolCall.function.arguments; + } + } + } + } + + // Capture finish reason + if (choice.finish_reason) { + if (blockType === "text") { + options?.onText?.("", true); + blockType = null; + } + if (blockType === "thinking") { + options?.onThinking?.("", true); + blockType = null; + } + finishReason = choice.finish_reason; } } @@ -159,7 +204,8 @@ export class OpenAICompletionsLLM implements LLM { return { role: "assistant", content: content || undefined, - thinking: undefined, // Chat Completions doesn't provide actual thinking content + thinking: reasoningContent || undefined, + thinkingSignature: reasoningField || undefined, toolCalls: toolCalls.length > 0 ? toolCalls : undefined, model: this.model, usage, @@ -186,7 +232,8 @@ export class OpenAICompletionsLLM implements LLM { // Add system prompt if provided if (systemPrompt) { - const role = this.isReasoningModel() ? "developer" : "system"; + // Cerebras doesn't like the "developer" role + const role = this.isReasoningModel() && !this.client.baseURL?.includes("cerebras.ai") ? "developer" : "system"; params.push({ role: role, content: systemPrompt }); } @@ -203,6 +250,11 @@ export class OpenAICompletionsLLM implements LLM { content: msg.content || null, }; + // LLama.cpp server + gpt-oss + if (msg.thinking && msg.thinkingSignature && msg.thinkingSignature.length > 0) { + (assistantMsg as any)[msg.thinkingSignature] = msg.thinking; + } + if (msg.toolCalls) { assistantMsg.tool_calls = msg.toolCalls.map((tc) => ({ id: tc.id, @@ -255,7 +307,7 @@ export class OpenAICompletionsLLM implements LLM { } private isReasoningModel(): boolean { - // TODO base on models.dev data - return this.model.includes("o1") || this.model.includes("o3"); + // TODO base on models.dev + return true; } } diff --git a/packages/ai/test/examples/cerebras-completions.ts b/packages/ai/test/examples/cerebras-completions.ts new file mode 100644 index 00000000..9c3d03f9 --- /dev/null +++ b/packages/ai/test/examples/cerebras-completions.ts @@ -0,0 +1,65 @@ +import chalk from "chalk"; +import { Context, Tool } from "../../src/types"; +import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions"; + +// Define a simple calculator tool +const tools: Tool[] = [ + { + name: "calculate", + description: "Perform a mathematical calculation", + parameters: { + type: "object" as const, + properties: { + expression: { + type: "string", + description: "The mathematical expression to evaluate" + } + }, + required: ["expression"] + } + } +]; + +const options: OpenAICompletionsLLMOptions = { + onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")), + onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))), + reasoningEffort: "medium", + toolChoice: "auto" +}; +const ai = new OpenAICompletionsLLM("gpt-oss-120b", process.env.CEREBRAS_API_KEY, "https://api.cerebras.ai/v1"); +const context: Context = { + systemPrompt: "You are a helpful assistant that can use tools to answer questions.", + messages: [ + { + role: "user", + content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool. You must use the tool to answer both math questions.", + } + ], + tools +} + +while (true) { + let msg = await ai.complete(context, options) + context.messages.push(msg); + console.log(); + + for (const toolCall of msg.toolCalls || []) { + if (toolCall.name === "calculate") { + const expression = toolCall.arguments.expression; + const result = eval(expression); + context.messages.push({ + role: "toolResult", + content: `The result of ${expression} is ${result}.`, + toolCallId: toolCall.id, + isError: false + }); + } + } + if (msg.stopReason != "toolUse") break; +} +console.log(); +console.log(chalk.yellow(JSON.stringify(context.messages, null, 2))); + + + + diff --git a/packages/ai/test/examples/groq-completions.ts b/packages/ai/test/examples/groq-completions.ts new file mode 100644 index 00000000..f4e5f29e --- /dev/null +++ b/packages/ai/test/examples/groq-completions.ts @@ -0,0 +1,66 @@ +import chalk from "chalk"; +import { Context, Tool } from "../../src/types"; +import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions"; + +// Define a simple calculator tool +const tools: Tool[] = [ + { + name: "calculate", + description: "Perform a mathematical calculation", + parameters: { + type: "object" as const, + properties: { + expression: { + type: "string", + description: "The mathematical expression to evaluate" + } + }, + required: ["expression"] + } + } +]; + +const options: OpenAICompletionsLLMOptions = { + onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")), + onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))), + reasoningEffort: "medium", + toolChoice: "auto" +}; +const ai = new OpenAICompletionsLLM("openai/gpt-oss-20b", process.env.GROQ_API_KEY, "https://api.groq.com/openai/v1"); +const context: Context = { + systemPrompt: "You are a helpful assistant that can use tools to answer questions.", + messages: [ + { + role: "user", + content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.", + } + ], + tools +} + +while (true) { + let msg = await ai.complete(context, options) + context.messages.push(msg); + console.log(); + console.log(chalk.yellow(JSON.stringify(msg, null, 2))); + + for (const toolCall of msg.toolCalls || []) { + if (toolCall.name === "calculate") { + const expression = toolCall.arguments.expression; + const result = eval(expression); + context.messages.push({ + role: "toolResult", + content: `The result of ${expression} is ${result}.`, + toolCallId: toolCall.id, + isError: false + }); + } + } + if (msg.stopReason != "toolUse") break; +} +console.log(); +console.log(chalk.yellow(JSON.stringify(context.messages, null, 2))); + + + + diff --git a/packages/ai/test/examples/ollama-completions.ts b/packages/ai/test/examples/ollama-completions.ts new file mode 100644 index 00000000..f58ca356 --- /dev/null +++ b/packages/ai/test/examples/ollama-completions.ts @@ -0,0 +1,66 @@ +import chalk from "chalk"; +import { Context, Tool } from "../../src/types"; +import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions"; + +// Define a simple calculator tool +const tools: Tool[] = [ + { + name: "calculate", + description: "Perform a mathematical calculation", + parameters: { + type: "object" as const, + properties: { + expression: { + type: "string", + description: "The mathematical expression to evaluate" + } + }, + required: ["expression"] + } + } +]; + +const options: OpenAICompletionsLLMOptions = { + onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")), + onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))), + reasoningEffort: "medium", + toolChoice: "auto" +}; +const ai = new OpenAICompletionsLLM("gpt-oss:20b", "dummy", "http://localhost:11434/v1"); +const context: Context = { + systemPrompt: "You are a helpful assistant that can use tools to answer questions.", + messages: [ + { + role: "user", + content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.", + } + ], + tools +} + +while (true) { + let msg = await ai.complete(context, options) + context.messages.push(msg); + console.log(); + console.log(chalk.yellow(JSON.stringify(msg, null, 2))); + + for (const toolCall of msg.toolCalls || []) { + if (toolCall.name === "calculate") { + const expression = toolCall.arguments.expression; + const result = eval(expression); + context.messages.push({ + role: "toolResult", + content: `The result of ${expression} is ${result}.`, + toolCallId: toolCall.id, + isError: false + }); + } + } + if (msg.stopReason == "stop") break; +} +console.log(); +console.log(chalk.yellow(JSON.stringify(context.messages, null, 2))); + + + + diff --git a/packages/ai/test/examples/openrouter-completions.ts b/packages/ai/test/examples/openrouter-completions.ts new file mode 100644 index 00000000..9def960a --- /dev/null +++ b/packages/ai/test/examples/openrouter-completions.ts @@ -0,0 +1,65 @@ +import chalk from "chalk"; +import { Context, Tool } from "../../src/types"; +import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions"; + +// Define a simple calculator tool +const tools: Tool[] = [ + { + name: "calculate", + description: "Perform a mathematical calculation", + parameters: { + type: "object" as const, + properties: { + expression: { + type: "string", + description: "The mathematical expression to evaluate" + } + }, + required: ["expression"] + } + } +]; + +const options: OpenAICompletionsLLMOptions = { + onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")), + onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))), + reasoningEffort: "medium", + toolChoice: "auto" +}; +const ai = new OpenAICompletionsLLM("z-ai/glm-4.5", process.env.OPENROUTER_API_KEY, "https://openrouter.ai/api/v1"); +const context: Context = { + systemPrompt: "You are a helpful assistant that can use tools to answer questions.", + messages: [ + { + role: "user", + content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.", + } + ], + tools +} + +while (true) { + let msg = await ai.complete(context, options) + context.messages.push(msg); + console.log(); + + for (const toolCall of msg.toolCalls || []) { + if (toolCall.name === "calculate") { + const expression = toolCall.arguments.expression; + const result = eval(expression); + context.messages.push({ + role: "toolResult", + content: `The result of ${expression} is ${result}.`, + toolCallId: toolCall.id, + isError: false + }); + } + } + if (msg.stopReason != "toolUse") break; +} +console.log(); +console.log(chalk.yellow(JSON.stringify(context.messages, null, 2))); + + + +