diff --git a/package-lock.json b/package-lock.json index c434192c..36d62149 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1604,6 +1604,7 @@ "dependencies": { "@anthropic-ai/sdk": "0.60.0", "@google/genai": "1.14.0", + "chalk": "^5.5.0", "openai": "5.12.2" }, "devDependencies": {}, diff --git a/packages/ai/package.json b/packages/ai/package.json index ce3f2273..7af2e7a0 100644 --- a/packages/ai/package.json +++ b/packages/ai/package.json @@ -15,7 +15,8 @@ "dependencies": { "openai": "5.12.2", "@anthropic-ai/sdk": "0.60.0", - "@google/genai": "1.14.0" + "@google/genai": "1.14.0", + "chalk": "^5.5.0" }, "devDependencies": {}, "keywords": ["ai", "llm", "openai", "anthropic", "gemini", "unified", "api"], diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts new file mode 100644 index 00000000..d7d7b003 --- /dev/null +++ b/packages/ai/src/providers/anthropic.ts @@ -0,0 +1,246 @@ +import Anthropic from "@anthropic-ai/sdk"; +import type { + ContentBlockParam, + MessageCreateParamsStreaming, + MessageParam, + Tool, +} from "@anthropic-ai/sdk/resources/messages.js"; +import type { AI, AssistantMessage, Event, Message, Request, StopReason, TokenUsage, ToolCall } from "../types.js"; + +export interface AnthropicOptions { + thinking?: { + enabled: boolean; + budgetTokens?: number; + }; + toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string }; +} + +export class AnthropicAI implements AI { + private client: Anthropic; + private model: string; + + constructor(model: string, apiKey?: string, baseUrl?: string) { + if (!apiKey) { + if (!process.env.ANTHROPIC_API_KEY) { + throw new Error( + "Anthropic API key is required. Set ANTHROPIC_API_KEY environment variable or pass it as an argument.", + ); + } + apiKey = process.env.ANTHROPIC_API_KEY; + } + this.client = new Anthropic({ apiKey, baseURL: baseUrl }); + this.model = model; + } + + async complete(request: Request, options?: AnthropicOptions): Promise { + try { + const messages = this.convertMessages(request.messages); + + const params: MessageCreateParamsStreaming = { + model: this.model, + messages, + max_tokens: request.maxTokens || 4096, + stream: true, + }; + + if (request.systemPrompt) { + params.system = request.systemPrompt; + } + + if (request.temperature !== undefined) { + params.temperature = request.temperature; + } + + if (request.tools) { + params.tools = this.convertTools(request.tools); + } + + if (options?.thinking?.enabled) { + params.thinking = { + type: "enabled", + budget_tokens: options.thinking.budgetTokens || 1024, + }; + } + + if (options?.toolChoice) { + if (typeof options.toolChoice === "string") { + params.tool_choice = { type: options.toolChoice }; + } else { + params.tool_choice = options.toolChoice; + } + } + + const stream = this.client.messages.stream( + { + ...params, + stream: true, + }, + { + signal: request.signal, + }, + ); + + for await (const event of stream) { + if (event.type === "content_block_delta") { + if (event.delta.type === "text_delta") { + request.onText?.(event.delta.text); + } + if (event.delta.type === "thinking_delta") { + request.onThinking?.(event.delta.thinking); + } + } + } + const msg = await stream.finalMessage(); + const thinking = msg.content.some((block) => block.type === "thinking") + ? msg.content + .filter((block) => block.type === "thinking") + .map((block) => block.thinking) + .join("\n") + : undefined; + // This is kinda wrong if there is more than one thinking block. We do not use interleaved thinking though, so we should + // always have a single thinking block. + const thinkingSignature = msg.content.some((block) => block.type === "thinking") + ? msg.content + .filter((block) => block.type === "thinking") + .map((block) => block.signature) + .join("\n") + : undefined; + const content = msg.content.some((block) => block.type === "text") + ? msg.content + .filter((block) => block.type === "text") + .map((block) => block.text) + .join("\n") + : undefined; + const toolCalls: ToolCall[] = msg.content + .filter((block) => block.type === "tool_use") + .map((block) => ({ + id: block.id, + name: block.name, + arguments: block.input as Record, + })); + const usage: TokenUsage = { + input: msg.usage.input_tokens, + output: msg.usage.output_tokens, + cacheRead: msg.usage.cache_read_input_tokens || 0, + cacheWrite: msg.usage.cache_creation_input_tokens || 0, + // TODO add cost + }; + + return { + role: "assistant", + content, + thinking, + thinkingSignature, + toolCalls, + model: this.model, + usage, + stopResaon: this.mapStopReason(msg.stop_reason), + }; + } catch (error) { + return { + role: "assistant", + model: this.model, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + }, + stopResaon: "error", + error: error instanceof Error ? error.message : String(error), + }; + } + } + + private convertMessages(messages: Message[]): MessageParam[] { + const params: MessageParam[] = []; + + for (const msg of messages) { + if (msg.role === "user") { + params.push({ + role: "user", + content: msg.content, + }); + } else if (msg.role === "assistant") { + const blocks: ContentBlockParam[] = []; + + if (msg.thinking && msg.thinkingSignature) { + blocks.push({ + type: "thinking", + thinking: msg.thinking, + signature: msg.thinkingSignature, + }); + } + + if (msg.content) { + blocks.push({ + type: "text", + text: msg.content, + }); + } + + if (msg.toolCalls) { + for (const toolCall of msg.toolCalls) { + blocks.push({ + type: "tool_use", + id: toolCall.id, + name: toolCall.name, + input: toolCall.arguments, + }); + } + } + + params.push({ + role: "assistant", + content: blocks, + }); + } else if (msg.role === "toolResult") { + params.push({ + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: msg.toolCallId, + content: msg.content, + is_error: msg.isError, + }, + ], + }); + } + } + return params; + } + + private convertTools(tools: Request["tools"]): Tool[] { + if (!tools) return []; + + return tools.map((tool) => ({ + name: tool.name, + description: tool.description, + input_schema: { + type: "object" as const, + properties: tool.parameters.properties || {}, + required: tool.parameters.required || [], + }, + })); + } + + private mapStopReason(reason: Anthropic.Messages.StopReason | null): StopReason { + switch (reason) { + case "end_turn": + return "stop"; + case "max_tokens": + return "length"; + case "tool_use": + return "toolUse"; + case "refusal": + return "safety"; + case "pause_turn": // Stop is good enough -> resubmit + return "stop"; + case "stop_sequence": + return "stop"; // We don't supply stop sequences, so this should never happen + default: + return "stop"; + } + } +} diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts new file mode 100644 index 00000000..2eaf8eed --- /dev/null +++ b/packages/ai/src/types.ts @@ -0,0 +1,105 @@ +export interface AI { + complete(request: Request, options?: T): Promise; +} + +export interface ModelInfo { + id: string; + name: string; + provider: string; + capabilities: { + reasoning: boolean; + toolCall: boolean; + vision: boolean; + audio?: boolean; + }; + cost: { + input: number; // per million tokens + output: number; // per million tokens + cacheRead?: number; + cacheWrite?: number; + }; + limits: { + context: number; + output: number; + }; + knowledge?: string; +} + +export interface UserMessage { + role: "user"; + content: string; +} + +export interface AssistantMessage { + role: "assistant"; + thinking?: string; + thinkingSignature?: string; // Leaky abstraction: needed for Anthropic + content?: string; + toolCalls?: { + id: string; + name: string; + arguments: Record; + }[]; + model: string; + usage: TokenUsage; + + stopResaon: StopReason; + error?: string | Error; +} + +export interface ToolResultMessage { + role: "toolResult"; + content: string; + toolCallId: string; + isError: boolean; +} + +export type Message = UserMessage | AssistantMessage | ToolResultMessage; + +export interface Tool { + name: string; + description: string; + parameters: Record; // JSON Schema +} + +export interface Request { + systemPrompt?: string; + messages: Message[]; + tools?: Tool[]; + temperature?: number; + maxTokens?: number; + onText?: (text: string) => void; + onThinking?: (thinking: string) => void; + signal?: AbortSignal; +} + +export type Event = + | { type: "start"; model: string; provider: string } + | { type: "text"; content: string; delta: string } + | { type: "thinking"; content: string; delta: string } + | { type: "toolCall"; toolCall: ToolCall } + | { type: "usage"; usage: TokenUsage } + | { type: "done"; reason: StopReason; message: AssistantMessage } + | { type: "error"; error: Error }; + +export interface ToolCall { + id: string; + name: string; + arguments: Record; +} + +export interface TokenUsage { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + cost?: { + input: number; + output: number; + cacheRead: number; + cacheWrite: number; + total: number; + }; +} + +export type StopReason = "stop" | "length" | "toolUse" | "safety" | "error"; diff --git a/packages/ai/test/examples/anthropic.ts b/packages/ai/test/examples/anthropic.ts new file mode 100644 index 00000000..d6c0d66b --- /dev/null +++ b/packages/ai/test/examples/anthropic.ts @@ -0,0 +1,63 @@ +import Anthropic from "@anthropic-ai/sdk"; +import { MessageCreateParamsBase } from "@anthropic-ai/sdk/resources/messages.mjs"; +import chalk from "chalk"; +import { AnthropicAI } from "../../src/providers/anthropic"; +import { Request, Message, Tool } from "../../src/types"; + +const anthropic = new Anthropic(); + +// Define a simple calculator tool +const tools: Tool[] = [ + { + name: "calculate", + description: "Perform a mathematical calculation", + parameters: { + type: "object" as const, + properties: { + expression: { + type: "string", + description: "The mathematical expression to evaluate" + } + }, + required: ["expression"] + } + } +]; + +const ai = new AnthropicAI("claude-sonnet-4-0"); +const context: Request = { + messages: [ + { + role: "user", + content: "Think about birds briefly. Then give me a list of 10 birds. Finally, calculate 42 * 17 + 123 and 453 + 434 in parallel using the calculator tool.", + } + ], + tools, + onText: (t) => process.stdout.write(t), + onThinking: (t) => process.stdout.write(chalk.dim(t)) +} + +const options = {thinking: { enabled: true }}; +let msg = await ai.complete(context, options) +context.messages.push(msg); +console.log(JSON.stringify(msg, null, 2)); + +for (const toolCall of msg.toolCalls || []) { + if (toolCall.name === "calculate") { + const expression = toolCall.arguments.expression; + const result = eval(expression); + context.messages.push({ + role: "toolResult", + content: `The result of ${expression} is ${result}.`, + toolCallId: toolCall.id, + isError: false + }); + } +} + +msg = await ai.complete(context, options); +console.log(JSON.stringify(msg, null, 2)); + + + + diff --git a/todos/work/20250817-202050-ai-implementation-plan/analysis.md b/todos/work/20250817-202050-ai-implementation-plan/analysis.md new file mode 100644 index 00000000..4c1053f6 --- /dev/null +++ b/todos/work/20250817-202050-ai-implementation-plan/analysis.md @@ -0,0 +1,140 @@ +# AI Package Implementation Analysis + +## Overview +Based on the comprehensive plan in `packages/ai/plan.md` and detailed API documentation for OpenAI, Anthropic, and Gemini SDKs, the AI package needs to provide a unified API that abstracts over these three providers while maintaining their unique capabilities. + +## Existing Codebase Context + +### Current Structure +- Monorepo using npm workspaces with packages in `packages/` directory +- Existing packages: `tui`, `agent`, `pods` +- TypeScript/ESM modules with Node.js ≥20.0.0 +- Biome for linting and formatting +- Lockstep versioning at 0.5.8 + +### Package Location +The AI package should be created at `packages/ai/` following the existing pattern. + +## Key Implementation Requirements + +### Core Features +1. **Unified Client API** - Single interface for all providers +2. **Streaming First** - All providers support streaming, non-streaming is collected events +3. **Provider Adapters** - OpenAI, Anthropic, Gemini adapters +4. **Event Normalization** - Consistent event types across providers +5. **Tool/Function Calling** - Unified interface for tools across providers +6. **Thinking/Reasoning** - Support for reasoning models (o1/o3, Claude thinking, Gemini thinking) +7. **Token Tracking** - Usage and cost calculation +8. **Abort Support** - Request cancellation via AbortController +9. **Error Mapping** - Normalized error handling +10. **Caching** - Automatic caching strategies per provider + +### Provider-Specific Handling + +#### OpenAI +- Dual APIs: Chat Completions vs Responses API +- Responses API for o1/o3 reasoning content +- Developer role for o1/o3 system prompts +- Stream options for token usage + +#### Anthropic +- Content blocks always arrays +- Separate system parameter +- Tool results as user messages +- Explicit thinking budget allocation +- Cache control per block + +#### Gemini +- Parts-based content system +- Separate systemInstruction parameter +- Model role instead of assistant +- Thinking via part.thought flag +- Function calls in parts array + +## Implementation Structure + +``` +packages/ai/ +├── src/ +│ ├── index.ts # Main exports +│ ├── types.ts # Unified type definitions +│ ├── client.ts # Main AI client class +│ ├── adapters/ +│ │ ├── base.ts # Base adapter interface +│ │ ├── openai.ts # OpenAI adapter +│ │ ├── anthropic.ts # Anthropic adapter +│ │ └── gemini.ts # Gemini adapter +│ ├── models/ +│ │ ├── models.ts # Model info lookup +│ │ └── models-data.ts # Generated models database +│ ├── errors.ts # Error mapping +│ ├── events.ts # Event stream handling +│ ├── costs.ts # Cost tracking +│ └── utils.ts # Utility functions +├── test/ +│ ├── openai.test.ts +│ ├── anthropic.test.ts +│ └── gemini.test.ts +├── scripts/ +│ └── update-models.ts # Update models database +├── package.json +├── tsconfig.build.json +└── README.md +``` + +## Dependencies +- `openai`: ^5.12.2 (for OpenAI SDK) +- `@anthropic-ai/sdk`: Latest +- `@google/genai`: Latest + +## Files to Create/Modify + +### New Files in packages/ai/ +1. `package.json` - Package configuration +2. `tsconfig.build.json` - TypeScript build config +3. `src/index.ts` - Main exports +4. `src/types.ts` - Type definitions +5. `src/client.ts` - Main AI class +6. `src/adapters/base.ts` - Base adapter +7. `src/adapters/openai.ts` - OpenAI implementation +8. `src/adapters/anthropic.ts` - Anthropic implementation +9. `src/adapters/gemini.ts` - Gemini implementation +10. `src/models/models.ts` - Model info +11. `src/errors.ts` - Error handling +12. `src/events.ts` - Event streaming +13. `src/costs.ts` - Cost tracking +14. `README.md` - Package documentation + +### Files to Modify +1. Root `tsconfig.json` - Add path mapping for @mariozechner/pi-ai +2. Root `package.json` - Add to build script order + +## Implementation Strategy + +### Phase 1: Core Structure +- Create package structure and configuration +- Define unified types and interfaces +- Implement base adapter interface + +### Phase 2: Provider Adapters +- Implement OpenAI adapter (both APIs) +- Implement Anthropic adapter +- Implement Gemini adapter + +### Phase 3: Features +- Add streaming support +- Implement tool calling +- Add thinking/reasoning support +- Implement token tracking + +### Phase 4: Polish +- Error mapping and handling +- Cost calculation +- Model information database +- Documentation and examples + +## Testing Approach +- Unit tests for each adapter +- Integration tests with mock responses +- Example scripts for manual testing +- Verify streaming, tools, thinking for each provider \ No newline at end of file diff --git a/todos/work/20250817-202050-ai-implementation-plan/task.md b/todos/work/20250817-202050-ai-implementation-plan/task.md new file mode 100644 index 00000000..d00e9e59 --- /dev/null +++ b/todos/work/20250817-202050-ai-implementation-plan/task.md @@ -0,0 +1,40 @@ +# AI Package Implementation Plan +**Status:** InProgress +**Agent PID:** 5114 + +## Original Todo +ai: create an implementation plan based on packages/ai/plan.md and implement it + +## Description +Implement the unified AI API as designed in packages/ai/plan.md. Create a single interface that works with OpenAI, Anthropic, and Gemini SDKs, handling their differences internally while exposing unified streaming events, tool calling, thinking/reasoning, and caching capabilities. + +*Read [plan.md](packages/ai/plan.md) in full for the complete API design and implementation details* + +## Implementation Plan +- [x] Define unified types in src/types.ts based on plan.md interfaces (AIConfig, Message, Request, Event, TokenUsage, ModelInfo) +- [ ] Implement OpenAI provider in src/providers/openai.ts with both Chat Completions and Responses API support +- [x] Implement Anthropic provider in src/providers/anthropic.ts with MessageStream and content blocks handling +- [ ] Implement Gemini provider in src/providers/gemini.ts with parts system and thinking extraction +- [ ] Create main AI class in src/index.ts that selects and uses appropriate adapter +- [ ] Implement models database in src/models.ts with model information and cost data +- [ ] Add cost calculation integrated into each adapter's token tracking +- [ ] Create comprehensive test suite in test/ai.test.ts using Node.js test framework +- [ ] Test: Model database lookup and capabilities detection +- [ ] Test: Basic completion (non-streaming) for all providers (OpenAI, Anthropic, Gemini, OpenRouter, Groq) +- [ ] Test: Streaming responses with event normalization across all providers +- [ ] Test: Thinking/reasoning extraction (o1 via Responses API, Claude thinking, Gemini thinking) +- [ ] Test: Tool calling flow with execution and continuation across providers +- [ ] Test: Automatic caching (Anthropic explicit, OpenAI/Gemini automatic) +- [ ] Test: Message serialization/deserialization with full conversation history +- [ ] Test: Cross-provider conversation continuation (start with one provider, continue with another) +- [ ] Test: Abort/cancellation via AbortController +- [ ] Test: Error handling and retry logic for each provider +- [ ] Test: Cost tracking accuracy with known token counts +- [ ] Update root tsconfig.json paths to include @mariozechner/pi-ai +- [ ] Update root package.json build script to include AI package + +## Notes +- Package structure already exists at packages/ai with dependencies installed +- Each adapter handles its own event normalization internally +- Tests use Node.js built-in test framework as per project conventions +- Available API keys: OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, GROQ_API_KEY, OPENROUTER_API_KEY \ No newline at end of file