diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts index 5291dcfe..8439891d 100644 --- a/packages/ai/src/providers/anthropic.ts +++ b/packages/ai/src/providers/anthropic.ts @@ -6,7 +6,17 @@ import type { Tool, } from "@anthropic-ai/sdk/resources/messages.js"; import { calculateCost } from "../models.js"; -import type { AssistantMessage, Context, LLM, LLMOptions, Message, Model, StopReason, Usage } from "../types.js"; +import type { + AssistantMessage, + Context, + LLM, + LLMOptions, + Message, + Model, + StopReason, + ToolCall, + Usage, +} from "../types.js"; export interface AnthropicLLMOptions extends LLMOptions { thinking?: { @@ -119,8 +129,9 @@ export class AnthropicLLM implements LLM { }, ); - let blockType: "text" | "thinking" | "other" = "other"; + let blockType: "text" | "thinking" | "toolUse" | "other" = "other"; let blockContent = ""; + let toolCall: (ToolCall & { partialJson: string }) | null = null; for await (const event of stream) { if (event.type === "content_block_start") { if (event.content_block.type === "text") { @@ -131,6 +142,17 @@ export class AnthropicLLM implements LLM { blockType = "thinking"; blockContent = ""; options?.onEvent?.({ type: "thinking_start" }); + } else if (event.content_block.type === "tool_use") { + // We wait for the full tool use to be streamed to send the event + toolCall = { + type: "toolCall", + id: event.content_block.id, + name: event.content_block.name, + arguments: event.content_block.input as Record, + partialJson: "", + }; + blockType = "toolUse"; + blockContent = ""; } else { blockType = "other"; blockContent = ""; @@ -145,12 +167,24 @@ export class AnthropicLLM implements LLM { options?.onEvent?.({ type: "thinking_delta", content: blockContent, delta: event.delta.thinking }); blockContent += event.delta.thinking; } + if (event.delta.type === "input_json_delta") { + toolCall!.partialJson += event.delta.partial_json; + } } if (event.type === "content_block_stop") { if (blockType === "text") { options?.onEvent?.({ type: "text_end", content: blockContent }); } else if (blockType === "thinking") { options?.onEvent?.({ type: "thinking_end", content: blockContent }); + } else if (blockType === "toolUse") { + const finalToolCall: ToolCall = { + type: "toolCall", + id: toolCall!.id, + name: toolCall!.name, + arguments: toolCall!.partialJson ? JSON.parse(toolCall!.partialJson) : toolCall!.arguments, + }; + toolCall = null; + options?.onEvent?.({ type: "toolCall", toolCall: finalToolCall }); } blockType = "other"; } @@ -194,16 +228,19 @@ export class AnthropicLLM implements LLM { }; calculateCost(this.modelInfo, usage); - return { + const output = { role: "assistant", content: blocks, provider: this.modelInfo.provider, model: this.modelInfo.id, usage, stopReason: this.mapStopReason(msg.stop_reason), - }; + } satisfies AssistantMessage; + options?.onEvent?.({ type: "done", reason: output.stopReason, message: output }); + + return output; } catch (error) { - return { + const output = { role: "assistant", content: [], provider: this.modelInfo.provider, @@ -216,8 +253,10 @@ export class AnthropicLLM implements LLM { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "error", - error: error instanceof Error ? error.message : String(error), - }; + error: error instanceof Error ? error.message : JSON.stringify(error), + } satisfies AssistantMessage; + options?.onEvent?.({ type: "error", error: output.error }); + return output; } } diff --git a/packages/ai/src/providers/google.ts b/packages/ai/src/providers/google.ts index 1d3e0ff6..36f5aef6 100644 --- a/packages/ai/src/providers/google.ts +++ b/packages/ai/src/providers/google.ts @@ -148,7 +148,11 @@ export class GoogleLLM implements LLM { if (currentBlock.type === "thinking") { currentBlock.thinking += part.text; currentBlock.thinkingSignature = part.thoughtSignature; - options?.onEvent?.({type: "thinking_delta", content: currentBlock.thinking, delta: part.text }); + options?.onEvent?.({ + type: "thinking_delta", + content: currentBlock.thinking, + delta: part.text, + }); } else { currentBlock.text += part.text; options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: part.text }); diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index ca908078..4cfc1719 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -15,6 +15,8 @@ import type { Message, Model, StopReason, + TextContent, + ThinkingContent, Tool, ToolCall, Usage, @@ -90,10 +92,8 @@ export class OpenAICompletionsLLM implements LLM { signal: options?.signal, }); - let content = ""; - let reasoningContent = ""; - let reasoningField: "reasoning" | "reasoning_content" | null = null; - const parsedToolCalls: { id: string; name: string; arguments: string }[] = []; + const blocks: AssistantMessage["content"] = []; + let currentBlock: TextContent | ThinkingContent | (ToolCall & { partialArgs?: string }) | null = null; let usage: Usage = { input: 0, output: 0, @@ -102,7 +102,6 @@ export class OpenAICompletionsLLM implements LLM { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null; - let blockType: "text" | "thinking" | null = null; for await (const chunk of stream) { if (chunk.usage) { usage = { @@ -132,13 +131,32 @@ export class OpenAICompletionsLLM implements LLM { choice.delta.content !== undefined && choice.delta.content.length > 0 ) { - if (blockType === "thinking") { - options?.onThinking?.("", true); - blockType = null; + // Check if we need to switch to text block + if (!currentBlock || currentBlock.type !== "text") { + // Save current block if exists + if (currentBlock) { + if (currentBlock.type === "thinking") { + options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking }); + } else if (currentBlock.type === "toolCall") { + currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}"); + delete currentBlock.partialArgs; + options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall }); + } + blocks.push(currentBlock); + } + // Start new text block + currentBlock = { type: "text", text: "" }; + options?.onEvent?.({ type: "text_start" }); + } + // Append to text block + if (currentBlock.type === "text") { + options?.onEvent?.({ + type: "text_delta", + content: currentBlock.text, + delta: choice.delta.content, + }); + currentBlock.text += choice.delta.content; } - content += choice.delta.content; - options?.onText?.(choice.delta.content, false); - blockType = "text"; } // Handle reasoning_content field @@ -146,55 +164,98 @@ export class OpenAICompletionsLLM implements LLM { (choice.delta as any).reasoning_content !== null && (choice.delta as any).reasoning_content !== undefined ) { - if (blockType === "text") { - options?.onText?.("", true); - blockType = null; + // Check if we need to switch to thinking block + if (!currentBlock || currentBlock.type !== "thinking") { + // Save current block if exists + if (currentBlock) { + if (currentBlock.type === "text") { + options?.onEvent?.({ type: "text_end", content: currentBlock.text }); + } else if (currentBlock.type === "toolCall") { + currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}"); + delete currentBlock.partialArgs; + options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall }); + } + blocks.push(currentBlock); + } + // Start new thinking block + currentBlock = { type: "thinking", thinking: "", thinkingSignature: "reasoning_content" }; + options?.onEvent?.({ type: "thinking_start" }); + } + // Append to thinking block + if (currentBlock.type === "thinking") { + const delta = (choice.delta as any).reasoning_content; + options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta }); + currentBlock.thinking += delta; } - reasoningContent += (choice.delta as any).reasoning_content; - reasoningField = "reasoning_content"; - options?.onThinking?.((choice.delta as any).reasoning_content, false); - blockType = "thinking"; } // Handle reasoning field if ((choice.delta as any).reasoning !== null && (choice.delta as any).reasoning !== undefined) { - if (blockType === "text") { - options?.onText?.("", true); - blockType = null; + // Check if we need to switch to thinking block + if (!currentBlock || currentBlock.type !== "thinking") { + // Save current block if exists + if (currentBlock) { + if (currentBlock.type === "text") { + options?.onEvent?.({ type: "text_end", content: currentBlock.text }); + } else if (currentBlock.type === "toolCall") { + currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}"); + delete currentBlock.partialArgs; + options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall }); + } + blocks.push(currentBlock); + } + // Start new thinking block + currentBlock = { type: "thinking", thinking: "", thinkingSignature: "reasoning" }; + options?.onEvent?.({ type: "thinking_start" }); + } + // Append to thinking block + if (currentBlock.type === "thinking") { + const delta = (choice.delta as any).reasoning; + options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta }); + currentBlock.thinking += delta; } - reasoningContent += (choice.delta as any).reasoning; - reasoningField = "reasoning"; - options?.onThinking?.((choice.delta as any).reasoning, false); - blockType = "thinking"; } // Handle tool calls if (choice?.delta?.tool_calls) { - if (blockType === "text") { - options?.onText?.("", true); - blockType = null; - } - if (blockType === "thinking") { - options?.onThinking?.("", true); - blockType = null; - } for (const toolCall of choice.delta.tool_calls) { + // Check if we need a new tool call block if ( - parsedToolCalls.length === 0 || - (toolCall.id !== undefined && parsedToolCalls[parsedToolCalls.length - 1].id !== toolCall.id) + !currentBlock || + currentBlock.type !== "toolCall" || + (toolCall.id && currentBlock.id !== toolCall.id) ) { - parsedToolCalls.push({ + // Save current block if exists + if (currentBlock) { + if (currentBlock.type === "text") { + options?.onEvent?.({ type: "text_end", content: currentBlock.text }); + } else if (currentBlock.type === "thinking") { + options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking }); + } else if (currentBlock.type === "toolCall") { + currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}"); + delete currentBlock.partialArgs; + options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall }); + } + blocks.push(currentBlock); + } + + // Start new tool call block + currentBlock = { + type: "toolCall", id: toolCall.id || "", name: toolCall.function?.name || "", - arguments: "", - }); + arguments: {}, + partialArgs: "", + }; } - const current = parsedToolCalls[parsedToolCalls.length - 1]; - if (toolCall.id) current.id = toolCall.id; - if (toolCall.function?.name) current.name = toolCall.function.name; - if (toolCall.function?.arguments) { - current.arguments += toolCall.function.arguments; + // Accumulate tool call data + if (currentBlock.type === "toolCall") { + if (toolCall.id) currentBlock.id = toolCall.id; + if (toolCall.function?.name) currentBlock.name = toolCall.function.name; + if (toolCall.function?.arguments) { + currentBlock.partialArgs += toolCall.function.arguments; + } } } } @@ -202,42 +263,41 @@ export class OpenAICompletionsLLM implements LLM { // Capture finish reason if (choice.finish_reason) { - if (blockType === "text") { - options?.onText?.("", true); - blockType = null; - } - if (blockType === "thinking") { - options?.onThinking?.("", true); - blockType = null; - } finishReason = choice.finish_reason; } } - // Convert tool calls map to array - const toolCalls: ToolCall[] = parsedToolCalls.map((tc) => ({ - id: tc.id, - name: tc.name, - arguments: JSON.parse(tc.arguments), - })); + // Save final block if exists + if (currentBlock) { + if (currentBlock.type === "text") { + options?.onEvent?.({ type: "text_end", content: currentBlock.text }); + } else if (currentBlock.type === "thinking") { + options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking }); + } else if (currentBlock.type === "toolCall") { + currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}"); + delete currentBlock.partialArgs; + options?.onEvent?.({ type: "toolCall", toolCall: currentBlock as ToolCall }); + } + blocks.push(currentBlock); + } // Calculate cost calculateCost(this.modelInfo, usage); - return { + const output = { role: "assistant", - content: content || undefined, - thinking: reasoningContent || undefined, - thinkingSignature: reasoningField || undefined, - toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + content: blocks, provider: this.modelInfo.provider, model: this.modelInfo.id, usage, stopReason: this.mapStopReason(finishReason), - }; + } satisfies AssistantMessage; + options?.onEvent?.({ type: "done", reason: output.stopReason, message: output }); + return output; } catch (error) { - return { + const output = { role: "assistant", + content: [], provider: this.modelInfo.provider, model: this.modelInfo.id, usage: { @@ -249,7 +309,9 @@ export class OpenAICompletionsLLM implements LLM { }, stopReason: "error", error: error instanceof Error ? error.message : String(error), - }; + } satisfies AssistantMessage; + options?.onEvent?.({ type: "error", error: output.error || "Unknown error" }); + return output; } } @@ -302,16 +364,29 @@ export class OpenAICompletionsLLM implements LLM { } else if (msg.role === "assistant") { const assistantMsg: ChatCompletionMessageParam = { role: "assistant", - content: msg.content || null, + content: null, }; - // LLama.cpp server + gpt-oss - if (msg.thinking && msg.thinkingSignature && msg.thinkingSignature.length > 0) { - (assistantMsg as any)[msg.thinkingSignature] = msg.thinking; + // Build content from blocks + const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[]; + if (textBlocks.length > 0) { + assistantMsg.content = textBlocks.map((b) => b.text).join(""); } - if (msg.toolCalls) { - assistantMsg.tool_calls = msg.toolCalls.map((tc) => ({ + // Handle thinking blocks for llama.cpp server + gpt-oss + const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[]; + if (thinkingBlocks.length > 0) { + // Use the signature from the first thinking block if available + const signature = thinkingBlocks[0].thinkingSignature; + if (signature && signature.length > 0) { + (assistantMsg as any)[signature] = thinkingBlocks.map((b) => b.thinking).join(""); + } + } + + // Handle tool calls + const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[]; + if (toolCalls.length > 0) { + assistantMsg.tool_calls = toolCalls.map((tc) => ({ id: tc.id, type: "function" as const, function: { diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index 2011da5a..01227c1c 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -69,8 +69,9 @@ export interface AssistantMessage { export interface ToolResultMessage { role: "toolResult"; - content: string; toolCallId: string; + toolName: string; + content: string; isError: boolean; } @@ -97,9 +98,8 @@ export type AssistantMessageEvent = | { type: "thinking_delta"; content: string; delta: string } | { type: "thinking_end"; content: string } | { type: "toolCall"; toolCall: ToolCall } - | { type: "usage"; usage: Usage } | { type: "done"; reason: StopReason; message: AssistantMessage } - | { type: "error"; error: Error }; + | { type: "error"; error: string }; // Model interface for the unified model system export interface Model { diff --git a/packages/ai/test/providers.test.ts b/packages/ai/test/providers.test.ts index 36dd28d7..f2e0f9bd 100644 --- a/packages/ai/test/providers.test.ts +++ b/packages/ai/test/providers.test.ts @@ -47,7 +47,7 @@ async function basicTextGeneration(llm: LLM) { expect(response.usage.input).toBeGreaterThan(0); expect(response.usage.output).toBeGreaterThan(0); expect(response.error).toBeFalsy(); - expect(response.content.map(b => b.type == "text" ? b.text : "").join("\n")).toContain("Hello test successful"); + expect(response.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Hello test successful"); context.messages.push(response); context.messages.push({ role: "user", content: "Now say 'Goodbye test successful'" }); @@ -56,10 +56,10 @@ async function basicTextGeneration(llm: LLM) { expect(secondResponse.role).toBe("assistant"); expect(secondResponse.content).toBeTruthy(); - expect(secondResponse.usage.input).toBeGreaterThan(0); + expect(secondResponse.usage.input + secondResponse.usage.cacheRead).toBeGreaterThan(0); expect(secondResponse.usage.output).toBeGreaterThan(0); expect(secondResponse.error).toBeFalsy(); - expect(secondResponse.content.map(b => b.type == "text" ? b.text : "").join("\n")).toContain("Goodbye test successful"); + expect(secondResponse.content.map(b => b.type == "text" ? b.text : "").join("")).toContain("Goodbye test successful"); } async function handleToolCall(llm: LLM) { @@ -225,8 +225,9 @@ async function multiTurn(llm: LLM, thinkingOptions: T) // Add tool result to context context.messages.push({ role: "toolResult", - content: `${result}`, toolCallId: block.id, + toolName: block.name, + content: `${result}`, isError: false }); } @@ -275,6 +276,10 @@ describe("AI Providers E2E Tests", () => { it("should handle multi-turn with thinking and tools", async () => { await multiTurn(llm, {thinking: { enabled: true, budgetTokens: 2048 }}); }); + + it("should handle image input", async () => { + await handleImage(llm); + }); }); describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => {