feat(ai): Add OpenAI-compatible provider examples for multiple services

- Add examples for Cerebras, Groq, Ollama, and OpenRouter - Update OpenAI Completions provider to handle base URL properly - Simplify README formatting - All examples use the same OpenAICompletionsLLM provider with different base URLs
2026-04-21 04:00:10 +00:00 · 2025-08-25 17:41:47 +02:00 · 2025-08-25 17:41:47 +02:00 · 4bb3a5ad02
commit 4bb3a5ad02
parent 6112029076
6 changed files with 371 additions and 74 deletions
--- a/packages/ai/README.md
+++ b/packages/ai/README.md
@ -32,6 +32,7 @@ const streamResponse = await llm.complete({
 }, {
  onText: (chunk) => process.stdout.write(chunk),
  onThinking: (chunk) => process.stderr.write(chunk),
  // Provider specific config
  thinking: { enabled: true }
 });
@ -60,24 +61,6 @@ if (toolResponse.toolCalls) {
 }
 ```
 ## Features
 - **Unified Interface**: Same API across OpenAI, Anthropic, and Gemini
 - **Streaming**: Real-time text and thinking streams with completion signals
 - **Tool Calling**: Consistent function calling with automatic ID generation
 - **Thinking Mode**: Access reasoning tokens (o1, Claude, Gemini 2.0)
 - **Token Tracking**: Input, output, cache, and thinking token counts
 - **Error Handling**: Graceful fallbacks with detailed error messages
 ## Providers
 | Provider | Models | Thinking | Tools | Streaming |
 |----------|--------|----------|-------|-----------|
 | OpenAI Completions | gpt-4o, gpt-4o-mini | ❌ | ✅ | ✅ |
 | OpenAI Responses | o1, o3, gpt-5 | ✅ | ✅ | ✅ |
 | Anthropic | claude-3.5-sonnet, claude-3.5-haiku | ✅ | ✅ | ✅ |
 | Gemini | gemini-2.0-flash, gemini-2.0-pro | ✅ | ✅ | ✅ |
 ## Development
 This package is part of the pi monorepo. See the main README for development instructions.
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@ -43,9 +43,13 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 				messages,
 				stream: true,
 				stream_options: { include_usage: true },
 				store: false,
 			};
 			// Cerebras doesn't like the "store" field
 			if (!this.client.baseURL?.includes("cerebras.ai")) {
 				(params as any).store = false;
 			}
 			if (options?.maxTokens) {
 				params.max_completion_tokens = options?.maxTokens;
 			}
@ -71,6 +75,8 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 			});
 			let content = "";
 			let reasoningContent = "";
 			let reasoningField: "reasoning" | "reasoning_content" | null = null;
 			const toolCallsMap = new Map<
 				number,
 				{
@ -86,56 +92,8 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 				cacheWrite: 0,
 			};
 			let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null;
-
+			let blockType: "text" | "thinking" | null = null;
 			let inTextBlock = false;
 			for await (const chunk of stream) {
 				const choice = chunk.choices[0];
 				// Handle text content
 				if (choice?.delta?.content) {
 					content += choice.delta.content;
 					options?.onText?.(choice.delta.content, false);
 					inTextBlock = true;
 				}
 				// Handle tool calls
 				if (choice?.delta?.tool_calls) {
 					if (inTextBlock) {
 						// If we were in a text block, signal its end
 						options?.onText?.("", true);
 						inTextBlock = false;
 					}
 					for (const toolCall of choice.delta.tool_calls) {
 						const index = toolCall.index;
 						if (!toolCallsMap.has(index)) {
 							toolCallsMap.set(index, {
 								id: toolCall.id || "",
 								name: toolCall.function?.name || "",
 								arguments: "",
 							});
 						}
 						const existing = toolCallsMap.get(index)!;
 						if (toolCall.id) existing.id = toolCall.id;
 						if (toolCall.function?.name) existing.name = toolCall.function.name;
 						if (toolCall.function?.arguments) {
 							existing.arguments += toolCall.function.arguments;
 						}
 					}
 				}
 				// Capture finish reason
 				if (choice?.finish_reason) {
 					if (inTextBlock) {
 						// If we were in a text block, signal its end
 						options?.onText?.("", true);
 						inTextBlock = false;
 					}
 					finishReason = choice.finish_reason;
 				}
 				// Capture usage
 				if (chunk.usage) {
 					usage = {
 						input: chunk.usage.prompt_tokens || 0,
@ -143,9 +101,96 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 						cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
 						cacheWrite: 0,
 					};
 				}
-					// Note: reasoning tokens are in completion_tokens_details?.reasoning_tokens
+				const choice = chunk.choices[0];
-					// but we don't have actual thinking content from Chat Completions API
+				if (!choice) continue;
 				if (choice.delta) {
 					// Handle text content
 					if (
 						choice.delta.content !== null &&
 						choice.delta.content !== undefined &&
 						choice.delta.content.length > 0
 					) {
 						if (blockType === "thinking") {
 							options?.onThinking?.("", true);
 							blockType = null;
 						}
 						content += choice.delta.content;
 						options?.onText?.(choice.delta.content, false);
 						blockType = "text";
 					}
 					// Handle LLAMA.cpp reasoning_content
 					if (
 						(choice.delta as any).reasoning_content !== null &&
 						(choice.delta as any).reasoning_content !== undefined
 					) {
 						if (blockType === "text") {
 							options?.onText?.("", true);
 							blockType = null;
 						}
 						reasoningContent += (choice.delta as any).reasoning_content;
 						reasoningField = "reasoning_content";
 						options?.onThinking?.((choice.delta as any).reasoning_content, false);
 						blockType = "thinking";
 					}
 					// Handle Ollama reasoning field
 					if ((choice.delta as any).reasoning !== null && (choice.delta as any).reasoning !== undefined) {
 						if (blockType === "text") {
 							options?.onText?.("", true);
 							blockType = null;
 						}
 						reasoningContent += (choice.delta as any).reasoning;
 						reasoningField = "reasoning";
 						options?.onThinking?.((choice.delta as any).reasoning, false);
 						blockType = "thinking";
 					}
 					// Handle tool calls
 					if (choice?.delta?.tool_calls) {
 						if (blockType === "text") {
 							options?.onText?.("", true);
 							blockType = null;
 						}
 						if (blockType === "thinking") {
 							options?.onThinking?.("", true);
 							blockType = null;
 						}
 						for (const toolCall of choice.delta.tool_calls) {
 							const index = toolCall.index;
 							if (!toolCallsMap.has(index)) {
 								toolCallsMap.set(index, {
 									id: toolCall.id || "",
 									name: toolCall.function?.name || "",
 									arguments: "",
 								});
 							}
 							const existing = toolCallsMap.get(index)!;
 							if (toolCall.id) existing.id = toolCall.id;
 							if (toolCall.function?.name) existing.name = toolCall.function.name;
 							if (toolCall.function?.arguments) {
 								existing.arguments += toolCall.function.arguments;
 							}
 						}
 					}
 				}
 				// Capture finish reason
 				if (choice.finish_reason) {
 					if (blockType === "text") {
 						options?.onText?.("", true);
 						blockType = null;
 					}
 					if (blockType === "thinking") {
 						options?.onThinking?.("", true);
 						blockType = null;
 					}
 					finishReason = choice.finish_reason;
 				}
 			}
@ -159,7 +204,8 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 			return {
 				role: "assistant",
 				content: content || undefined,
-				thinking: undefined, // Chat Completions doesn't provide actual thinking content
+				thinking: reasoningContent || undefined,
 				thinkingSignature: reasoningField || undefined,
 				toolCalls: toolCalls.length > 0 ? toolCalls : undefined,
 				model: this.model,
 				usage,
@ -186,7 +232,8 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 		// Add system prompt if provided
 		if (systemPrompt) {
-			const role = this.isReasoningModel() ? "developer" : "system";
+			// Cerebras doesn't like the "developer" role
 			const role = this.isReasoningModel() && !this.client.baseURL?.includes("cerebras.ai") ? "developer" : "system";
 			params.push({ role: role, content: systemPrompt });
 		}
@ -203,6 +250,11 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 					content: msg.content || null,
 				};
 				// LLama.cpp server + gpt-oss
 				if (msg.thinking && msg.thinkingSignature && msg.thinkingSignature.length > 0) {
 					(assistantMsg as any)[msg.thinkingSignature] = msg.thinking;
 				}
 				if (msg.toolCalls) {
 					assistantMsg.tool_calls = msg.toolCalls.map((tc) => ({
 						id: tc.id,
@ -255,7 +307,7 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
 	}
 	private isReasoningModel(): boolean {
-		// TODO base on models.dev data
+		// TODO base on models.dev
-		return this.model.includes("o1") || this.model.includes("o3");
+		return true;
 	}
 }
--- a/packages/ai/test/examples/cerebras-completions.ts
+++ b/packages/ai/test/examples/cerebras-completions.ts
@ -0,0 +1,65 @@
 import chalk from "chalk";
 import { Context, Tool } from "../../src/types";
 import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions";
 // Define a simple calculator tool
 const tools: Tool[] = [
    {
        name: "calculate",
        description: "Perform a mathematical calculation",
        parameters: {
            type: "object" as const,
            properties: {
                expression: {
                    type: "string",
                    description: "The mathematical expression to evaluate"
                }
            },
            required: ["expression"]
        }
    }
 ];
 const options: OpenAICompletionsLLMOptions = {
    onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
    onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
    reasoningEffort: "medium",
    toolChoice: "auto"
 };
 const ai = new OpenAICompletionsLLM("gpt-oss-120b", process.env.CEREBRAS_API_KEY, "https://api.cerebras.ai/v1");
 const context: Context = {
        systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
        messages: [
        {
            role: "user",
            content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool. You must use the tool to answer both math questions.",
        }
    ],
    tools
 }
 while (true) {
    let msg = await ai.complete(context, options)
    context.messages.push(msg);
    console.log();
    for (const toolCall of msg.toolCalls || []) {
        if (toolCall.name === "calculate") {
            const expression = toolCall.arguments.expression;
            const result = eval(expression);
            context.messages.push({
                role: "toolResult",
                content: `The result of ${expression} is ${result}.`,
                toolCallId: toolCall.id,
                isError: false
            });
        }
    }
    if (msg.stopReason != "toolUse") break;
 }
 console.log();
 console.log(chalk.yellow(JSON.stringify(context.messages, null, 2)));
--- a/packages/ai/test/examples/groq-completions.ts
+++ b/packages/ai/test/examples/groq-completions.ts
@ -0,0 +1,66 @@
 import chalk from "chalk";
 import { Context, Tool } from "../../src/types";
 import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions";
 // Define a simple calculator tool
 const tools: Tool[] = [
    {
        name: "calculate",
        description: "Perform a mathematical calculation",
        parameters: {
            type: "object" as const,
            properties: {
                expression: {
                    type: "string",
                    description: "The mathematical expression to evaluate"
                }
            },
            required: ["expression"]
        }
    }
 ];
 const options: OpenAICompletionsLLMOptions = {
    onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
    onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
    reasoningEffort: "medium",
    toolChoice: "auto"
 };
 const ai = new OpenAICompletionsLLM("openai/gpt-oss-20b", process.env.GROQ_API_KEY, "https://api.groq.com/openai/v1");
 const context: Context = {
        systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
        messages: [
        {
            role: "user",
            content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.",
        }
    ],
    tools
 }
 while (true) {
    let msg = await ai.complete(context, options)
    context.messages.push(msg);
    console.log();
    console.log(chalk.yellow(JSON.stringify(msg, null, 2)));
    for (const toolCall of msg.toolCalls || []) {
        if (toolCall.name === "calculate") {
            const expression = toolCall.arguments.expression;
            const result = eval(expression);
            context.messages.push({
                role: "toolResult",
                content: `The result of ${expression} is ${result}.`,
                toolCallId: toolCall.id,
                isError: false
            });
        }
    }
    if (msg.stopReason != "toolUse") break;
 }
 console.log();
 console.log(chalk.yellow(JSON.stringify(context.messages, null, 2)));
--- a/packages/ai/test/examples/ollama-completions.ts
+++ b/packages/ai/test/examples/ollama-completions.ts
@ -0,0 +1,66 @@
 import chalk from "chalk";
 import { Context, Tool } from "../../src/types";
 import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions";
 // Define a simple calculator tool
 const tools: Tool[] = [
    {
        name: "calculate",
        description: "Perform a mathematical calculation",
        parameters: {
            type: "object" as const,
            properties: {
                expression: {
                    type: "string",
                    description: "The mathematical expression to evaluate"
                }
            },
            required: ["expression"]
        }
    }
 ];
 const options: OpenAICompletionsLLMOptions = {
    onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
    onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
    reasoningEffort: "medium",
    toolChoice: "auto"
 };
 const ai = new OpenAICompletionsLLM("gpt-oss:20b", "dummy", "http://localhost:11434/v1");
 const context: Context = {
        systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
        messages: [
        {
            role: "user",
            content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.",
        }
    ],
    tools
 }
 while (true) {
    let msg = await ai.complete(context, options)
    context.messages.push(msg);
    console.log();
    console.log(chalk.yellow(JSON.stringify(msg, null, 2)));
    for (const toolCall of msg.toolCalls || []) {
        if (toolCall.name === "calculate") {
            const expression = toolCall.arguments.expression;
            const result = eval(expression);
            context.messages.push({
                role: "toolResult",
                content: `The result of ${expression} is ${result}.`,
                toolCallId: toolCall.id,
                isError: false
            });
        }
    }
    if (msg.stopReason == "stop") break;
 }
 console.log();
 console.log(chalk.yellow(JSON.stringify(context.messages, null, 2)));
--- a/packages/ai/test/examples/openrouter-completions.ts
+++ b/packages/ai/test/examples/openrouter-completions.ts
@ -0,0 +1,65 @@
 import chalk from "chalk";
 import { Context, Tool } from "../../src/types";
 import { OpenAICompletionsLLM, OpenAICompletionsLLMOptions } from "../../src/providers/openai-completions";
 // Define a simple calculator tool
 const tools: Tool[] = [
    {
        name: "calculate",
        description: "Perform a mathematical calculation",
        parameters: {
            type: "object" as const,
            properties: {
                expression: {
                    type: "string",
                    description: "The mathematical expression to evaluate"
                }
            },
            required: ["expression"]
        }
    }
 ];
 const options: OpenAICompletionsLLMOptions = {
    onText: (t, complete) => process.stdout.write(t + (complete ? "\n" : "")),
    onThinking: (t, complete) => process.stdout.write(chalk.dim(t + (complete ? "\n" : ""))),
    reasoningEffort: "medium",
    toolChoice: "auto"
 };
 const ai = new OpenAICompletionsLLM("z-ai/glm-4.5", process.env.OPENROUTER_API_KEY, "https://openrouter.ai/api/v1");
 const context: Context = {
        systemPrompt: "You are a helpful assistant that can use tools to answer questions.",
        messages: [
        {
            role: "user",
            content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.",
        }
    ],
    tools
 }
 while (true) {
    let msg = await ai.complete(context, options)
    context.messages.push(msg);
    console.log();
    for (const toolCall of msg.toolCalls || []) {
        if (toolCall.name === "calculate") {
            const expression = toolCall.arguments.expression;
            const result = eval(expression);
            context.messages.push({
                role: "toolResult",
                content: `The result of ${expression} is ${result}.`,
                toolCallId: toolCall.id,
                isError: false
            });
        }
    }
    if (msg.stopReason != "toolUse") break;
 }
 console.log();
 console.log(chalk.yellow(JSON.stringify(context.messages, null, 2)));