mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-19 18:04:41 +00:00
Fix streaming for z-ai in anthropic provider, add preliminary support for tool call streaming. Only reporting argument string deltas, not partial JSON objects
This commit is contained in:
parent
2bdb87dfe7
commit
98a876f3a0
21 changed files with 784 additions and 448 deletions
|
|
@ -4,7 +4,7 @@ import type {
|
|||
MessageCreateParamsStreaming,
|
||||
MessageParam,
|
||||
} from "@anthropic-ai/sdk/resources/messages.js";
|
||||
import { QueuedGenerateStream } from "../generate.js";
|
||||
import { AssistantMessageEventStream } from "../event-stream.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
|
|
@ -12,7 +12,6 @@ import type {
|
|||
Context,
|
||||
GenerateFunction,
|
||||
GenerateOptions,
|
||||
GenerateStream,
|
||||
Message,
|
||||
Model,
|
||||
StopReason,
|
||||
|
|
@ -20,8 +19,9 @@ import type {
|
|||
ThinkingContent,
|
||||
Tool,
|
||||
ToolCall,
|
||||
ToolResultMessage,
|
||||
} from "../types.js";
|
||||
import { transformMessages } from "./utils.js";
|
||||
import { transformMessages } from "./transorm-messages.js";
|
||||
|
||||
export interface AnthropicOptions extends GenerateOptions {
|
||||
thinkingEnabled?: boolean;
|
||||
|
|
@ -33,8 +33,8 @@ export const streamAnthropic: GenerateFunction<"anthropic-messages"> = (
|
|||
model: Model<"anthropic-messages">,
|
||||
context: Context,
|
||||
options?: AnthropicOptions,
|
||||
): GenerateStream => {
|
||||
const stream = new QueuedGenerateStream();
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
|
|
@ -59,93 +59,114 @@ export const streamAnthropic: GenerateFunction<"anthropic-messages"> = (
|
|||
const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
|
||||
stream.push({ type: "start", partial: output });
|
||||
|
||||
let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
|
||||
type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string })) & { index: number };
|
||||
const blocks = output.content as Block[];
|
||||
|
||||
for await (const event of anthropicStream) {
|
||||
if (event.type === "content_block_start") {
|
||||
if (event.content_block.type === "text") {
|
||||
currentBlock = {
|
||||
const block: Block = {
|
||||
type: "text",
|
||||
text: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "text_start", partial: output });
|
||||
output.content.push(block);
|
||||
stream.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if (event.content_block.type === "thinking") {
|
||||
currentBlock = {
|
||||
const block: Block = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
output.content.push(block);
|
||||
stream.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output });
|
||||
} else if (event.content_block.type === "tool_use") {
|
||||
// We wait for the full tool use to be streamed
|
||||
currentBlock = {
|
||||
const block: Block = {
|
||||
type: "toolCall",
|
||||
id: event.content_block.id,
|
||||
name: event.content_block.name,
|
||||
arguments: event.content_block.input as Record<string, any>,
|
||||
partialJson: "",
|
||||
index: event.index,
|
||||
};
|
||||
output.content.push(block);
|
||||
stream.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output });
|
||||
}
|
||||
} else if (event.type === "content_block_delta") {
|
||||
if (event.delta.type === "text_delta") {
|
||||
if (currentBlock && currentBlock.type === "text") {
|
||||
currentBlock.text += event.delta.text;
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "text") {
|
||||
block.text += event.delta.text;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "thinking_delta") {
|
||||
if (currentBlock && currentBlock.type === "thinking") {
|
||||
currentBlock.thinking += event.delta.thinking;
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "thinking") {
|
||||
block.thinking += event.delta.thinking;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "input_json_delta") {
|
||||
if (currentBlock && currentBlock.type === "toolCall") {
|
||||
currentBlock.partialJson += event.delta.partial_json;
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "toolCall") {
|
||||
block.partialJson += event.delta.partial_json;
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: index,
|
||||
delta: event.delta.partial_json,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
} else if (event.delta.type === "signature_delta") {
|
||||
if (currentBlock && currentBlock.type === "thinking") {
|
||||
currentBlock.thinkingSignature = currentBlock.thinkingSignature || "";
|
||||
currentBlock.thinkingSignature += event.delta.signature;
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block && block.type === "thinking") {
|
||||
block.thinkingSignature = block.thinkingSignature || "";
|
||||
block.thinkingSignature += event.delta.signature;
|
||||
}
|
||||
}
|
||||
} else if (event.type === "content_block_stop") {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
const index = blocks.findIndex((b) => b.index === event.index);
|
||||
const block = blocks[index];
|
||||
if (block) {
|
||||
delete (block as any).index;
|
||||
if (block.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
contentIndex: index,
|
||||
content: block.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "thinking") {
|
||||
} else if (block.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
contentIndex: index,
|
||||
content: block.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
const finalToolCall: ToolCall = {
|
||||
type: "toolCall",
|
||||
id: currentBlock.id,
|
||||
name: currentBlock.name,
|
||||
arguments: JSON.parse(currentBlock.partialJson),
|
||||
};
|
||||
output.content.push(finalToolCall);
|
||||
} else if (block.type === "toolCall") {
|
||||
block.arguments = JSON.parse(block.partialJson);
|
||||
delete (block as any).partialJson;
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: finalToolCall,
|
||||
type: "toolcall_end",
|
||||
contentIndex: index,
|
||||
toolCall: block,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock = null;
|
||||
}
|
||||
} else if (event.type === "message_delta") {
|
||||
if (event.delta.stop_reason) {
|
||||
|
|
@ -166,6 +187,7 @@ export const streamAnthropic: GenerateFunction<"anthropic-messages"> = (
|
|||
stream.push({ type: "done", reason: output.stopReason, message: output });
|
||||
stream.end();
|
||||
} catch (error) {
|
||||
for (const block of output.content) delete (block as any).index;
|
||||
output.stopReason = "error";
|
||||
output.error = error instanceof Error ? error.message : JSON.stringify(error);
|
||||
stream.push({ type: "error", error: output.error, partial: output });
|
||||
|
|
@ -294,7 +316,9 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
|
|||
// Transform messages for cross-provider compatibility
|
||||
const transformedMessages = transformMessages(messages, model);
|
||||
|
||||
for (const msg of transformedMessages) {
|
||||
for (let i = 0; i < transformedMessages.length; i++) {
|
||||
const msg = transformedMessages[i];
|
||||
|
||||
if (msg.role === "user") {
|
||||
if (typeof msg.content === "string") {
|
||||
if (msg.content.trim().length > 0) {
|
||||
|
|
@ -366,16 +390,37 @@ function convertMessages(messages: Message[], model: Model<"anthropic-messages">
|
|||
content: blocks,
|
||||
});
|
||||
} else if (msg.role === "toolResult") {
|
||||
// Collect all consecutive toolResult messages
|
||||
const toolResults: ContentBlockParam[] = [];
|
||||
|
||||
// Add the current tool result
|
||||
toolResults.push({
|
||||
type: "tool_result",
|
||||
tool_use_id: sanitizeToolCallId(msg.toolCallId),
|
||||
content: msg.output,
|
||||
is_error: msg.isError,
|
||||
});
|
||||
|
||||
// Look ahead for consecutive toolResult messages
|
||||
let j = i + 1;
|
||||
while (j < transformedMessages.length && transformedMessages[j].role === "toolResult") {
|
||||
const nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult
|
||||
toolResults.push({
|
||||
type: "tool_result",
|
||||
tool_use_id: sanitizeToolCallId(nextMsg.toolCallId),
|
||||
content: nextMsg.output,
|
||||
is_error: nextMsg.isError,
|
||||
});
|
||||
j++;
|
||||
}
|
||||
|
||||
// Skip the messages we've already processed
|
||||
i = j - 1;
|
||||
|
||||
// Add a single user message with all tool results
|
||||
params.push({
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: sanitizeToolCallId(msg.toolCallId),
|
||||
content: msg.content,
|
||||
is_error: msg.isError,
|
||||
},
|
||||
],
|
||||
content: toolResults,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import {
|
|||
GoogleGenAI,
|
||||
type Part,
|
||||
} from "@google/genai";
|
||||
import { QueuedGenerateStream } from "../generate.js";
|
||||
import { AssistantMessageEventStream } from "../event-stream.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
|
|
@ -15,7 +15,6 @@ import type {
|
|||
Context,
|
||||
GenerateFunction,
|
||||
GenerateOptions,
|
||||
GenerateStream,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
|
|
@ -23,7 +22,7 @@ import type {
|
|||
Tool,
|
||||
ToolCall,
|
||||
} from "../types.js";
|
||||
import { transformMessages } from "./utils.js";
|
||||
import { transformMessages } from "./transorm-messages.js";
|
||||
|
||||
export interface GoogleOptions extends GenerateOptions {
|
||||
toolChoice?: "auto" | "none" | "any";
|
||||
|
|
@ -40,8 +39,8 @@ export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
|
|||
model: Model<"google-generative-ai">,
|
||||
context: Context,
|
||||
options?: GoogleOptions,
|
||||
): GenerateStream => {
|
||||
const stream = new QueuedGenerateStream();
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
|
|
@ -67,6 +66,8 @@ export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
|
|||
|
||||
stream.push({ type: "start", partial: output });
|
||||
let currentBlock: TextContent | ThinkingContent | null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
for await (const chunk of googleStream) {
|
||||
const candidate = chunk.candidates?.[0];
|
||||
if (candidate?.content?.parts) {
|
||||
|
|
@ -82,12 +83,14 @@ export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
|
|||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blocks.length - 1,
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
|
|
@ -95,10 +98,10 @@ export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
|
|||
}
|
||||
if (isThinking) {
|
||||
currentBlock = { type: "thinking", thinking: "", thinkingSignature: undefined };
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
||||
} else {
|
||||
currentBlock = { type: "text", text: "" };
|
||||
stream.push({ type: "text_start", partial: output });
|
||||
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
||||
}
|
||||
output.content.push(currentBlock);
|
||||
}
|
||||
|
|
@ -107,12 +110,18 @@ export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
|
|||
currentBlock.thinkingSignature = part.thoughtSignature;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
currentBlock.text += part.text;
|
||||
stream.push({ type: "text_delta", delta: part.text, partial: output });
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: part.text,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -121,12 +130,14 @@ export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
|
|||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
|
|
@ -149,7 +160,14 @@ export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
|
|||
arguments: part.functionCall.args as Record<string, any>,
|
||||
};
|
||||
output.content.push(toolCall);
|
||||
stream.push({ type: "toolCall", toolCall, partial: output });
|
||||
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: JSON.stringify(toolCall.arguments),
|
||||
partial: output,
|
||||
});
|
||||
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -182,9 +200,19 @@ export const streamGoogle: GenerateFunction<"google-generative-ai"> = (
|
|||
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({ type: "text_end", content: currentBlock.text, partial: output });
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else {
|
||||
stream.push({ type: "thinking_end", content: currentBlock.thinking, partial: output });
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -333,7 +361,7 @@ function convertMessages(model: Model<"google-generative-ai">, context: Context)
|
|||
id: msg.toolCallId,
|
||||
name: msg.toolName,
|
||||
response: {
|
||||
result: msg.content,
|
||||
result: msg.output,
|
||||
isError: msg.isError,
|
||||
},
|
||||
},
|
||||
|
|
|
|||
|
|
@ -7,14 +7,13 @@ import type {
|
|||
ChatCompletionContentPartText,
|
||||
ChatCompletionMessageParam,
|
||||
} from "openai/resources/chat/completions.js";
|
||||
import { QueuedGenerateStream } from "../generate.js";
|
||||
import { AssistantMessageEventStream } from "../event-stream.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
AssistantMessage,
|
||||
Context,
|
||||
GenerateFunction,
|
||||
GenerateOptions,
|
||||
GenerateStream,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
|
|
@ -22,7 +21,7 @@ import type {
|
|||
Tool,
|
||||
ToolCall,
|
||||
} from "../types.js";
|
||||
import { transformMessages } from "./utils.js";
|
||||
import { transformMessages } from "./transorm-messages.js";
|
||||
|
||||
export interface OpenAICompletionsOptions extends GenerateOptions {
|
||||
toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
|
||||
|
|
@ -33,8 +32,8 @@ export const streamOpenAICompletions: GenerateFunction<"openai-completions"> = (
|
|||
model: Model<"openai-completions">,
|
||||
context: Context,
|
||||
options?: OpenAICompletionsOptions,
|
||||
): GenerateStream => {
|
||||
const stream = new QueuedGenerateStream();
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
(async () => {
|
||||
const output: AssistantMessage = {
|
||||
|
|
@ -60,6 +59,37 @@ export const streamOpenAICompletions: GenerateFunction<"openai-completions"> = (
|
|||
stream.push({ type: "start", partial: output });
|
||||
|
||||
let currentBlock: TextContent | ThinkingContent | (ToolCall & { partialArgs?: string }) | null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
const finishCurrentBlock = (block?: typeof currentBlock) => {
|
||||
if (block) {
|
||||
if (block.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: block.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: block.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (block.type === "toolCall") {
|
||||
block.arguments = JSON.parse(block.partialArgs || "{}");
|
||||
delete block.partialArgs;
|
||||
stream.push({
|
||||
type: "toolcall_end",
|
||||
contentIndex: blockIndex(),
|
||||
toolCall: block,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for await (const chunk of openaiStream) {
|
||||
if (chunk.usage) {
|
||||
output.usage = {
|
||||
|
|
@ -94,119 +124,53 @@ export const streamOpenAICompletions: GenerateFunction<"openai-completions"> = (
|
|||
choice.delta.content.length > 0
|
||||
) {
|
||||
if (!currentBlock || currentBlock.type !== "text") {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "text_start", partial: output });
|
||||
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
||||
}
|
||||
|
||||
if (currentBlock.type === "text") {
|
||||
currentBlock.text += choice.delta.content;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: choice.delta.content,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Some endpoints return reasoning in reasoning_content (llama.cpp)
|
||||
if (
|
||||
(choice.delta as any).reasoning_content !== null &&
|
||||
(choice.delta as any).reasoning_content !== undefined &&
|
||||
(choice.delta as any).reasoning_content.length > 0
|
||||
) {
|
||||
if (!currentBlock || currentBlock.type !== "thinking") {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
||||
// or reasoning (other openai compatible endpoints)
|
||||
const reasoningFields = ["reasoning_content", "reasoning"];
|
||||
for (const field of reasoningFields) {
|
||||
if (
|
||||
(choice.delta as any)[field] !== null &&
|
||||
(choice.delta as any)[field] !== undefined &&
|
||||
(choice.delta as any)[field].length > 0
|
||||
) {
|
||||
if (!currentBlock || currentBlock.type !== "thinking") {
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: field,
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
||||
}
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "reasoning_content",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
}
|
||||
|
||||
if (currentBlock.type === "thinking") {
|
||||
const delta = (choice.delta as any).reasoning_content;
|
||||
currentBlock.thinking += delta;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Some endpoints return reasoning in reasining (ollama, xAI, ...)
|
||||
if (
|
||||
(choice.delta as any).reasoning !== null &&
|
||||
(choice.delta as any).reasoning !== undefined &&
|
||||
(choice.delta as any).reasoning.length > 0
|
||||
) {
|
||||
if (!currentBlock || currentBlock.type !== "thinking") {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
if (currentBlock.type === "thinking") {
|
||||
const delta = (choice.delta as any)[field];
|
||||
currentBlock.thinking += delta;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
currentBlock = {
|
||||
type: "thinking",
|
||||
thinking: "",
|
||||
thinkingSignature: "reasoning",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
}
|
||||
|
||||
if (currentBlock.type === "thinking") {
|
||||
const delta = (choice.delta as any).reasoning;
|
||||
currentBlock.thinking += delta;
|
||||
stream.push({ type: "thinking_delta", delta, partial: output });
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -217,30 +181,7 @@ export const streamOpenAICompletions: GenerateFunction<"openai-completions"> = (
|
|||
currentBlock.type !== "toolCall" ||
|
||||
(toolCall.id && currentBlock.id !== toolCall.id)
|
||||
) {
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
finishCurrentBlock(currentBlock);
|
||||
currentBlock = {
|
||||
type: "toolCall",
|
||||
id: toolCall.id || "",
|
||||
|
|
@ -249,43 +190,30 @@ export const streamOpenAICompletions: GenerateFunction<"openai-completions"> = (
|
|||
partialArgs: "",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
||||
}
|
||||
|
||||
if (currentBlock.type === "toolCall") {
|
||||
if (toolCall.id) currentBlock.id = toolCall.id;
|
||||
if (toolCall.function?.name) currentBlock.name = toolCall.function.name;
|
||||
let delta = "";
|
||||
if (toolCall.function?.arguments) {
|
||||
delta = toolCall.function.arguments;
|
||||
currentBlock.partialArgs += toolCall.function.arguments;
|
||||
}
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (currentBlock) {
|
||||
if (currentBlock.type === "text") {
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "thinking") {
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
} else if (currentBlock.type === "toolCall") {
|
||||
currentBlock.arguments = JSON.parse(currentBlock.partialArgs || "{}");
|
||||
delete currentBlock.partialArgs;
|
||||
stream.push({
|
||||
type: "toolCall",
|
||||
toolCall: currentBlock as ToolCall,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
finishCurrentBlock(currentBlock);
|
||||
|
||||
if (options?.signal?.aborted) {
|
||||
throw new Error("Request was aborted");
|
||||
|
|
@ -438,7 +366,7 @@ function convertMessages(model: Model<"openai-completions">, context: Context):
|
|||
} else if (msg.role === "toolResult") {
|
||||
params.push({
|
||||
role: "tool",
|
||||
content: msg.content,
|
||||
content: msg.output,
|
||||
tool_call_id: msg.toolCallId,
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import type {
|
|||
ResponseOutputMessage,
|
||||
ResponseReasoningItem,
|
||||
} from "openai/resources/responses/responses.js";
|
||||
import { QueuedGenerateStream } from "../generate.js";
|
||||
import { AssistantMessageEventStream } from "../event-stream.js";
|
||||
import { calculateCost } from "../models.js";
|
||||
import type {
|
||||
Api,
|
||||
|
|
@ -18,7 +18,6 @@ import type {
|
|||
Context,
|
||||
GenerateFunction,
|
||||
GenerateOptions,
|
||||
GenerateStream,
|
||||
Model,
|
||||
StopReason,
|
||||
TextContent,
|
||||
|
|
@ -26,7 +25,7 @@ import type {
|
|||
Tool,
|
||||
ToolCall,
|
||||
} from "../types.js";
|
||||
import { transformMessages } from "./utils.js";
|
||||
import { transformMessages } from "./transorm-messages.js";
|
||||
|
||||
// OpenAI Responses-specific options
|
||||
export interface OpenAIResponsesOptions extends GenerateOptions {
|
||||
|
|
@ -41,8 +40,8 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
model: Model<"openai-responses">,
|
||||
context: Context,
|
||||
options?: OpenAIResponsesOptions,
|
||||
): GenerateStream => {
|
||||
const stream = new QueuedGenerateStream();
|
||||
): AssistantMessageEventStream => {
|
||||
const stream = new AssistantMessageEventStream();
|
||||
|
||||
// Start async processing
|
||||
(async () => {
|
||||
|
|
@ -70,7 +69,9 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
stream.push({ type: "start", partial: output });
|
||||
|
||||
let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
|
||||
let currentBlock: ThinkingContent | TextContent | ToolCall | null = null;
|
||||
let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
|
||||
const blocks = output.content;
|
||||
const blockIndex = () => blocks.length - 1;
|
||||
|
||||
for await (const event of openaiStream) {
|
||||
// Handle output item start
|
||||
|
|
@ -80,12 +81,23 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
currentItem = item;
|
||||
currentBlock = { type: "thinking", thinking: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "thinking_start", partial: output });
|
||||
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
||||
} else if (item.type === "message") {
|
||||
currentItem = item;
|
||||
currentBlock = { type: "text", text: "" };
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "text_start", partial: output });
|
||||
stream.push({ type: "text_start", contentIndex: blockIndex(), partial: output });
|
||||
} else if (item.type === "function_call") {
|
||||
currentItem = item;
|
||||
currentBlock = {
|
||||
type: "toolCall",
|
||||
id: item.call_id + "|" + item.id,
|
||||
name: item.name,
|
||||
arguments: {},
|
||||
partialJson: item.arguments || "",
|
||||
};
|
||||
output.content.push(currentBlock);
|
||||
stream.push({ type: "toolcall_start", contentIndex: blockIndex(), partial: output });
|
||||
}
|
||||
}
|
||||
// Handle reasoning summary deltas
|
||||
|
|
@ -108,6 +120,7 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
lastPart.text += event.delta;
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
|
|
@ -129,6 +142,7 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
lastPart.text += "\n\n";
|
||||
stream.push({
|
||||
type: "thinking_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: "\n\n",
|
||||
partial: output,
|
||||
});
|
||||
|
|
@ -149,6 +163,7 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
lastPart.text += event.delta;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
|
|
@ -162,12 +177,36 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
lastPart.refusal += event.delta;
|
||||
stream.push({
|
||||
type: "text_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
// Handle function call argument deltas
|
||||
else if (event.type === "response.function_call_arguments.delta") {
|
||||
if (
|
||||
currentItem &&
|
||||
currentItem.type === "function_call" &&
|
||||
currentBlock &&
|
||||
currentBlock.type === "toolCall"
|
||||
) {
|
||||
currentBlock.partialJson += event.delta;
|
||||
try {
|
||||
const args = JSON.parse(currentBlock.partialJson);
|
||||
currentBlock.arguments = args;
|
||||
} catch {
|
||||
// Ignore JSON parse errors - the JSON might be incomplete
|
||||
}
|
||||
stream.push({
|
||||
type: "toolcall_delta",
|
||||
contentIndex: blockIndex(),
|
||||
delta: event.delta,
|
||||
partial: output,
|
||||
});
|
||||
}
|
||||
}
|
||||
// Handle output item completion
|
||||
else if (event.type === "response.output_item.done") {
|
||||
const item = event.item;
|
||||
|
|
@ -177,6 +216,7 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
currentBlock.thinkingSignature = JSON.stringify(item);
|
||||
stream.push({
|
||||
type: "thinking_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.thinking,
|
||||
partial: output,
|
||||
});
|
||||
|
|
@ -186,6 +226,7 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
currentBlock.textSignature = item.id;
|
||||
stream.push({
|
||||
type: "text_end",
|
||||
contentIndex: blockIndex(),
|
||||
content: currentBlock.text,
|
||||
partial: output,
|
||||
});
|
||||
|
|
@ -197,8 +238,7 @@ export const streamOpenAIResponses: GenerateFunction<"openai-responses"> = (
|
|||
name: item.name,
|
||||
arguments: JSON.parse(item.arguments),
|
||||
};
|
||||
output.content.push(toolCall);
|
||||
stream.push({ type: "toolCall", toolCall, partial: output });
|
||||
stream.push({ type: "toolcall_end", contentIndex: blockIndex(), toolCall, partial: output });
|
||||
}
|
||||
}
|
||||
// Handle completion
|
||||
|
|
@ -398,7 +438,7 @@ function convertMessages(model: Model<"openai-responses">, context: Context): Re
|
|||
messages.push({
|
||||
type: "function_call_output",
|
||||
call_id: msg.toolCallId.split("|")[0],
|
||||
output: msg.content,
|
||||
output: msg.output,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue