diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index 4cfc1719..0979002e 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -150,19 +150,20 @@ export class OpenAICompletionsLLM implements LLM { } // Append to text block if (currentBlock.type === "text") { + currentBlock.text += choice.delta.content; options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: choice.delta.content, }); - currentBlock.text += choice.delta.content; } } // Handle reasoning_content field if ( (choice.delta as any).reasoning_content !== null && - (choice.delta as any).reasoning_content !== undefined + (choice.delta as any).reasoning_content !== undefined && + (choice.delta as any).reasoning_content.length > 0 ) { // Check if we need to switch to thinking block if (!currentBlock || currentBlock.type !== "thinking") { @@ -184,13 +185,17 @@ export class OpenAICompletionsLLM implements LLM { // Append to thinking block if (currentBlock.type === "thinking") { const delta = (choice.delta as any).reasoning_content; - options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta }); currentBlock.thinking += delta; + options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta }); } } // Handle reasoning field - if ((choice.delta as any).reasoning !== null && (choice.delta as any).reasoning !== undefined) { + if ( + (choice.delta as any).reasoning !== null && + (choice.delta as any).reasoning !== undefined && + (choice.delta as any).reasoning.length > 0 + ) { // Check if we need to switch to thinking block if (!currentBlock || currentBlock.type !== "thinking") { // Save current block if exists @@ -211,8 +216,8 @@ export class OpenAICompletionsLLM implements LLM { // Append to thinking block if (currentBlock.type === "thinking") { const delta = (choice.delta as any).reasoning; - options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta }); currentBlock.thinking += delta; + options?.onEvent?.({ type: "thinking_delta", content: currentBlock.thinking, delta }); } } diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts index 79bdd8a2..a0a34991 100644 --- a/packages/ai/src/providers/openai-responses.ts +++ b/packages/ai/src/providers/openai-responses.ts @@ -2,13 +2,14 @@ import OpenAI from "openai"; import type { Tool as OpenAITool, ResponseCreateParamsStreaming, + ResponseFunctionToolCall, ResponseInput, ResponseInputContent, ResponseInputImage, ResponseInputText, + ResponseOutputMessage, ResponseReasoningItem, } from "openai/resources/responses/responses.js"; -import type { ResponseOutputMessage } from "openai/resources/responses/responses.mjs"; import type { AssistantMessage, Context, @@ -17,6 +18,7 @@ import type { Message, Model, StopReason, + TextContent, Tool, ToolCall, Usage, @@ -83,11 +85,9 @@ export class OpenAIResponsesLLM implements LLM { signal: options?.signal, }); - let content = ""; - let contentSignature = ""; - let thinking = ""; - const toolCalls: ToolCall[] = []; - const reasoningItems: ResponseReasoningItem[] = []; + const outputItems: (ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall)[] = []; // any for function_call items + let currentTextAccum = ""; // For delta accumulation + let currentThinkingAccum = ""; // For delta accumulation let usage: Usage = { input: 0, output: 0, @@ -98,41 +98,61 @@ export class OpenAIResponsesLLM implements LLM { let stopReason: StopReason = "stop"; for await (const event of stream) { - // Handle reasoning summary for models that support it - if (event.type === "response.reasoning_summary_text.delta") { - const delta = event.delta; - thinking += delta; - options?.onThinking?.(delta, false); - } else if (event.type === "response.reasoning_summary_text.done") { - if (event.text) { - thinking = event.text; + // Handle output item start + if (event.type === "response.output_item.added") { + const item = event.item; + if (item.type === "reasoning") { + options?.onEvent?.({ type: "thinking_start" }); + currentThinkingAccum = ""; + } else if (item.type === "message") { + options?.onEvent?.({ type: "text_start" }); + currentTextAccum = ""; } - options?.onThinking?.("", true); } - // Handle main text output + // Handle reasoning summary deltas + else if (event.type === "response.reasoning_summary_text.delta") { + const delta = event.delta; + currentThinkingAccum += delta; + options?.onEvent?.({ type: "thinking_delta", content: currentThinkingAccum, delta }); + } + // Add a new line between summary parts (hack...) + else if (event.type === "response.reasoning_summary_part.done") { + currentThinkingAccum += "\n\n"; + options?.onEvent?.({ type: "thinking_delta", content: currentThinkingAccum, delta: "\n\n" }); + } + // Handle text output deltas else if (event.type === "response.output_text.delta") { const delta = event.delta; - content += delta; - options?.onText?.(delta, false); - } else if (event.type === "response.output_text.done") { - if (event.text) { - content = event.text; - } - options?.onText?.("", true); - contentSignature = event.item_id; + currentTextAccum += delta; + options?.onEvent?.({ type: "text_delta", content: currentTextAccum, delta }); } - // Handle function calls + // Handle refusal output deltas + else if (event.type === "response.refusal.delta") { + const delta = event.delta; + currentTextAccum += delta; + options?.onEvent?.({ type: "text_delta", content: currentTextAccum, delta }); + } + // Handle output item completion else if (event.type === "response.output_item.done") { const item = event.item; - if (item?.type === "function_call") { - toolCalls.push({ + + if (item.type === "reasoning") { + const thinkingContent = item.summary?.map((s: any) => s.text).join("\n\n") || ""; + options?.onEvent?.({ type: "thinking_end", content: thinkingContent }); + outputItems.push(item); + } else if (item.type === "message") { + const textContent = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""); + options?.onEvent?.({ type: "text_end", content: textContent }); + outputItems.push(item); + } else if (item.type === "function_call") { + const toolCall: ToolCall = { + type: "toolCall", id: item.call_id + "|" + item.id, name: item.name, arguments: JSON.parse(item.arguments), - }); - } - if (item.type === "reasoning") { - reasoningItems.push(item); + }; + options?.onEvent?.({ type: "toolCall", toolCall }); + outputItems.push(item); } } // Handle completion @@ -150,38 +170,68 @@ export class OpenAIResponsesLLM implements LLM { // Map status to stop reason stopReason = this.mapStopReason(response?.status); - if (toolCalls.length > 0 && stopReason === "stop") { - stopReason = "toolUse"; - } } // Handle errors else if (event.type === "error") { - return { + const errorOutput = { role: "assistant", + content: [], provider: this.modelInfo.provider, model: this.modelInfo.id, usage, stopReason: "error", error: `Code ${event.code}: ${event.message}` || "Unknown error", - }; + } satisfies AssistantMessage; + options?.onEvent?.({ type: "error", error: errorOutput.error || "Unknown error" }); + return errorOutput; } } - return { + // Convert output items to blocks + const blocks: AssistantMessage["content"] = []; + + for (const item of outputItems) { + if (item.type === "reasoning") { + blocks.push({ + type: "thinking", + thinking: item.summary?.map((s: any) => s.text).join("\n\n") || "", + thinkingSignature: JSON.stringify(item), // Full item for resubmission + }); + } else if (item.type === "message") { + blocks.push({ + type: "text", + text: item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""), + textSignature: item.id, // ID for resubmission + }); + } else if (item.type === "function_call") { + blocks.push({ + type: "toolCall", + id: item.call_id + "|" + item.id, + name: item.name, + arguments: JSON.parse(item.arguments), + }); + } + } + + // Check if we have tool calls for stop reason + if (blocks.some((b) => b.type === "toolCall") && stopReason === "stop") { + stopReason = "toolUse"; + } + + const output = { role: "assistant", - content: content || undefined, - contentSignature: contentSignature || undefined, - thinking: thinking || undefined, - thinkingSignature: JSON.stringify(reasoningItems) || undefined, - toolCalls: toolCalls.length > 0 ? toolCalls : undefined, + content: blocks, provider: this.modelInfo.provider, model: this.modelInfo.id, usage, stopReason, - }; + } satisfies AssistantMessage; + options?.onEvent?.({ type: "done", reason: output.stopReason, message: output }); + return output; } catch (error) { - return { + const output = { role: "assistant", + content: [], provider: this.modelInfo.provider, model: this.modelInfo.id, usage: { @@ -193,7 +243,9 @@ export class OpenAIResponsesLLM implements LLM { }, stopReason: "error", error: error instanceof Error ? error.message : String(error), - }; + } satisfies AssistantMessage; + options?.onEvent?.({ type: "error", error: output.error || "Unknown error" }); + return output; } } @@ -241,13 +293,27 @@ export class OpenAIResponsesLLM implements LLM { }); } } else if (msg.role === "assistant") { - // Assistant messages - add both content and tool calls to output + // Process content blocks in order const output: ResponseInput = []; - if (msg.thinkingSignature) { - output.push(...JSON.parse(msg.thinkingSignature)); - } - if (msg.toolCalls) { - for (const toolCall of msg.toolCalls) { + + for (const block of msg.content) { + if (block.type === "thinking") { + // Push the full reasoning item(s) from signature + if (block.thinkingSignature) { + const reasoningItem = JSON.parse(block.thinkingSignature); + output.push(reasoningItem); + } + } else if (block.type === "text") { + const textBlock = block as TextContent; + output.push({ + type: "message", + role: "assistant", + content: [{ type: "output_text", text: textBlock.text, annotations: [] }], + status: "completed", + id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15), + } satisfies ResponseOutputMessage); + } else if (block.type === "toolCall") { + const toolCall = block as ToolCall; output.push({ type: "function_call", id: toolCall.id.split("|")[1], // Extract original ID @@ -257,15 +323,7 @@ export class OpenAIResponsesLLM implements LLM { }); } } - if (msg.content) { - output.push({ - type: "message", - role: "assistant", - content: [{ type: "output_text", text: msg.content, annotations: [] }], - status: "completed", - id: msg.contentSignature || "msg_" + Math.random().toString(36).substring(2, 15), - } satisfies ResponseOutputMessage); - } + // Add all output items to input input.push(...output); } else if (msg.role === "toolResult") { diff --git a/packages/ai/test/providers.test.ts b/packages/ai/test/providers.test.ts index f2e0f9bd..25dd4fb1 100644 --- a/packages/ai/test/providers.test.ts +++ b/packages/ai/test/providers.test.ts @@ -203,7 +203,7 @@ async function multiTurn(llm: LLM, thinkingOptions: T) // Process content blocks for (const block of response.content) { if (block.type === "text") { - allTextContent += block.text + " "; + allTextContent += block.text; } else if (block.type === "thinking") { hasSeenThinking = true; } else if (block.type === "toolCall") { @@ -250,7 +250,7 @@ async function multiTurn(llm: LLM, thinkingOptions: T) } describe("AI Providers E2E Tests", () => { - describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini Provider", () => { + describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini Provider (gemini-2.5-flash)", () => { let llm: GoogleLLM; beforeAll(() => { @@ -282,7 +282,7 @@ describe("AI Providers E2E Tests", () => { }); }); - describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => { + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => { let llm: OpenAICompletionsLLM; beforeAll(() => { @@ -306,7 +306,7 @@ describe("AI Providers E2E Tests", () => { }); }); - describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => { + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => { let llm: OpenAIResponsesLLM; beforeAll(() => { @@ -338,7 +338,7 @@ describe("AI Providers E2E Tests", () => { }); }); - describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider", () => { + describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-0)", () => { let llm: AnthropicLLM; beforeAll(() => { @@ -370,7 +370,35 @@ describe("AI Providers E2E Tests", () => { }); }); - describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (via OpenAI Completions)", () => { + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (Haiku 3.5)", () => { + let llm: AnthropicLLM; + + beforeAll(() => { + llm = createLLM("anthropic", "claude-3-5-haiku-latest"); + }); + + it("should complete basic text generation", async () => { + await basicTextGeneration(llm); + }); + + it("should handle tool calling", async () => { + await handleToolCall(llm); + }); + + it("should handle streaming", async () => { + await handleStreaming(llm); + }); + + it("should handle multi-turn with thinking and tools", async () => { + await multiTurn(llm, {thinking: {enabled: true}}); + }); + + it("should handle image input", async () => { + await handleImage(llm); + }); + }); + + describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-code-fast-1 via OpenAI Completions)", () => { let llm: OpenAICompletionsLLM; beforeAll(() => { @@ -398,7 +426,7 @@ describe("AI Providers E2E Tests", () => { }); }); - describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider (via OpenAI Completions)", () => { + describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider (gpt-oss-20b via OpenAI Completions)", () => { let llm: OpenAICompletionsLLM; beforeAll(() => { @@ -426,7 +454,7 @@ describe("AI Providers E2E Tests", () => { }); }); - describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider (via OpenAI Completions)", () => { + describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider (gpt-oss-120b via OpenAI Completions)", () => { let llm: OpenAICompletionsLLM; beforeAll(() => { @@ -454,11 +482,11 @@ describe("AI Providers E2E Tests", () => { }); }); - describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (via OpenAI Completions)", () => { + describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => { let llm: OpenAICompletionsLLM; beforeAll(() => { - llm = new OpenAICompletionsLLM(getModel("openrouter", "z-ai/glm-4.5")!, process.env.OPENROUTER_API_KEY!);; + llm = new OpenAICompletionsLLM(getModel("openrouter", "z-ai/glm-4.5v")!, process.env.OPENROUTER_API_KEY!);; }); it("should complete basic text generation", async () => { @@ -480,6 +508,10 @@ describe("AI Providers E2E Tests", () => { it("should handle multi-turn with thinking and tools", async () => { await multiTurn(llm, {reasoningEffort: "medium"}); }); + + it("should handle image input", async () => { + await handleImage(llm); + }); }); // Check if ollama is installed @@ -491,7 +523,7 @@ describe("AI Providers E2E Tests", () => { ollamaInstalled = false; } - describe.skipIf(!ollamaInstalled)("Ollama Provider (via OpenAI Completions)", () => { + describe.skipIf(!ollamaInstalled)("Ollama Provider (gpt-oss-20b via OpenAI Completions)", () => { let llm: OpenAICompletionsLLM; let ollamaProcess: ChildProcess | null = null; @@ -579,60 +611,4 @@ describe("AI Providers E2E Tests", () => { await multiTurn(llm, {reasoningEffort: "medium"}); }); }); - - describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (GLM 4.5)", () => { - let llm: OpenAICompletionsLLM; - - beforeAll(() => { - llm = createLLM("openrouter", "z-ai/glm-4.5", process.env.OPENROUTER_API_KEY!); - }); - - it("should complete basic text generation", async () => { - await basicTextGeneration(llm); - }); - - it("should handle tool calling", async () => { - await handleToolCall(llm); - }); - - it("should handle streaming", async () => { - await handleStreaming(llm); - }); - - it("should handle thinking mode", async () => { - await handleThinking(llm, {reasoningEffort: "medium"}); - }); - - it("should handle multi-turn with thinking and tools", async () => { - await multiTurn(llm, {reasoningEffort: "medium"}); - }); - }); - - describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (Haiku 3.5)", () => { - let llm: AnthropicLLM; - - beforeAll(() => { - llm = createLLM("anthropic", "claude-3-5-haiku-latest"); - }); - - it("should complete basic text generation", async () => { - await basicTextGeneration(llm); - }); - - it("should handle tool calling", async () => { - await handleToolCall(llm); - }); - - it("should handle streaming", async () => { - await handleStreaming(llm); - }); - - it("should handle multi-turn with thinking and tools", async () => { - await multiTurn(llm, {thinking: {enabled: true}}); - }); - - it("should handle image input", async () => { - await handleImage(llm); - }); - }); }); \ No newline at end of file