refactor(ai): Update API to support partial results on abort

- Anthropic, Google, and OpenAI Responses providers now return partial results when aborted
- Restructured streaming to accumulate content blocks incrementally
- Prevents submission of thinking/toolCall blocks from aborted completions in multi-turn conversations
- Makes UI development easier by providing partial content even when requests are interrupted
This commit is contained in:
Mario Zechner 2025-09-01 01:57:08 +02:00
parent 5d5cd7955b
commit bf1f410c2b
4 changed files with 244 additions and 280 deletions

View file

@ -14,8 +14,9 @@ import type {
Message, Message,
Model, Model,
StopReason, StopReason,
TextContent,
ThinkingContent,
ToolCall, ToolCall,
Usage,
} from "../types.js"; } from "../types.js";
export interface AnthropicLLMOptions extends LLMOptions { export interface AnthropicLLMOptions extends LLMOptions {
@ -61,6 +62,21 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
} }
async complete(context: Context, options?: AnthropicLLMOptions): Promise<AssistantMessage> { async complete(context: Context, options?: AnthropicLLMOptions): Promise<AssistantMessage> {
const output: AssistantMessage = {
role: "assistant",
content: [],
provider: this.modelInfo.provider,
model: this.modelInfo.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
};
try { try {
const messages = this.convertMessages(context.messages); const messages = this.convertMessages(context.messages);
@ -131,132 +147,94 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider }); options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
let blockType: "text" | "thinking" | "toolUse" | "other" = "other"; let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
let blockContent = "";
let toolCall: (ToolCall & { partialJson: string }) | null = null;
for await (const event of stream) { for await (const event of stream) {
if (event.type === "content_block_start") { if (event.type === "content_block_start") {
if (event.content_block.type === "text") { if (event.content_block.type === "text") {
blockType = "text"; currentBlock = {
blockContent = ""; type: "text",
text: "",
};
output.content.push(currentBlock);
options?.onEvent?.({ type: "text_start" }); options?.onEvent?.({ type: "text_start" });
} else if (event.content_block.type === "thinking") { } else if (event.content_block.type === "thinking") {
blockType = "thinking"; currentBlock = {
blockContent = ""; type: "thinking",
thinking: "",
thinkingSignature: "",
};
output.content.push(currentBlock);
options?.onEvent?.({ type: "thinking_start" }); options?.onEvent?.({ type: "thinking_start" });
} else if (event.content_block.type === "tool_use") { } else if (event.content_block.type === "tool_use") {
// We wait for the full tool use to be streamed to send the event // We wait for the full tool use to be streamed to send the event
toolCall = { currentBlock = {
type: "toolCall", type: "toolCall",
id: event.content_block.id, id: event.content_block.id,
name: event.content_block.name, name: event.content_block.name,
arguments: event.content_block.input as Record<string, any>, arguments: event.content_block.input as Record<string, any>,
partialJson: "", partialJson: "",
}; };
blockType = "toolUse";
blockContent = "";
} else {
blockType = "other";
blockContent = "";
} }
} } else if (event.type === "content_block_delta") {
if (event.type === "content_block_delta") {
if (event.delta.type === "text_delta") { if (event.delta.type === "text_delta") {
options?.onEvent?.({ type: "text_delta", content: blockContent, delta: event.delta.text }); if (currentBlock && currentBlock.type === "text") {
blockContent += event.delta.text; currentBlock.text += event.delta.text;
options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: event.delta.text });
} }
if (event.delta.type === "thinking_delta") { } else if (event.delta.type === "thinking_delta") {
options?.onEvent?.({ type: "thinking_delta", content: blockContent, delta: event.delta.thinking }); if (currentBlock && currentBlock.type === "thinking") {
blockContent += event.delta.thinking; currentBlock.thinking += event.delta.thinking;
options?.onEvent?.({
type: "thinking_delta",
content: currentBlock.thinking,
delta: event.delta.thinking,
});
} }
if (event.delta.type === "input_json_delta") { } else if (event.delta.type === "input_json_delta") {
toolCall!.partialJson += event.delta.partial_json; if (currentBlock && currentBlock.type === "toolCall") {
currentBlock.partialJson += event.delta.partial_json;
}
} else if (event.delta.type === "signature_delta") {
if (currentBlock && currentBlock.type === "thinking") {
currentBlock.thinkingSignature = currentBlock.thinkingSignature || "";
currentBlock.thinkingSignature += event.delta.signature;
} }
} }
if (event.type === "content_block_stop") { } else if (event.type === "content_block_stop") {
if (blockType === "text") { if (currentBlock) {
options?.onEvent?.({ type: "text_end", content: blockContent }); if (currentBlock.type === "text") {
} else if (blockType === "thinking") { options?.onEvent?.({ type: "text_end", content: currentBlock.text });
options?.onEvent?.({ type: "thinking_end", content: blockContent }); } else if (currentBlock.type === "thinking") {
} else if (blockType === "toolUse") { options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
} else if (currentBlock.type === "toolCall") {
const finalToolCall: ToolCall = { const finalToolCall: ToolCall = {
type: "toolCall", type: "toolCall",
id: toolCall!.id, id: currentBlock.id,
name: toolCall!.name, name: currentBlock.name,
arguments: toolCall!.partialJson ? JSON.parse(toolCall!.partialJson) : toolCall!.arguments, arguments: JSON.parse(currentBlock.partialJson),
}; };
toolCall = null; output.content.push(finalToolCall);
options?.onEvent?.({ type: "toolCall", toolCall: finalToolCall }); options?.onEvent?.({ type: "toolCall", toolCall: finalToolCall });
} }
blockType = "other"; currentBlock = null;
} }
} else if (event.type === "message_delta") {
if (event.delta.stop_reason) {
output.stopReason = this.mapStopReason(event.delta.stop_reason);
} }
const msg = await stream.finalMessage(); output.usage.input += event.usage.input_tokens || 0;
const blocks: AssistantMessage["content"] = []; output.usage.output += event.usage.output_tokens || 0;
for (const block of msg.content) { output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
if (block.type === "text" && block.text) { output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
blocks.push({ calculateCost(this.modelInfo, output.usage);
type: "text",
text: block.text,
});
} else if (block.type === "thinking" && block.thinking) {
blocks.push({
type: "thinking",
thinking: block.thinking,
thinkingSignature: block.signature,
});
} else if (block.type === "tool_use") {
blocks.push({
type: "toolCall",
id: block.id,
name: block.name,
arguments: block.input as Record<string, any>,
});
} }
} }
const usage: Usage = {
input: msg.usage.input_tokens,
output: msg.usage.output_tokens,
cacheRead: msg.usage.cache_read_input_tokens || 0,
cacheWrite: msg.usage.cache_creation_input_tokens || 0,
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
},
};
calculateCost(this.modelInfo, usage);
const output = {
role: "assistant",
content: blocks,
provider: this.modelInfo.provider,
model: this.modelInfo.id,
usage,
stopReason: this.mapStopReason(msg.stop_reason),
} satisfies AssistantMessage;
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output }); options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
return output; return output;
} catch (error) { } catch (error) {
const output = { output.stopReason = "error";
role: "assistant", output.error = error instanceof Error ? error.message : JSON.stringify(error);
content: [],
provider: this.modelInfo.provider,
model: this.modelInfo.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "error",
error: error instanceof Error ? error.message : JSON.stringify(error),
} satisfies AssistantMessage;
options?.onEvent?.({ type: "error", error: output.error }); options?.onEvent?.({ type: "error", error: output.error });
return output; return output;
} }

View file

@ -20,7 +20,6 @@ import type {
ThinkingContent, ThinkingContent,
Tool, Tool,
ToolCall, ToolCall,
Usage,
} from "../types.js"; } from "../types.js";
export interface GoogleLLMOptions extends LLMOptions { export interface GoogleLLMOptions extends LLMOptions {
@ -33,7 +32,7 @@ export interface GoogleLLMOptions extends LLMOptions {
export class GoogleLLM implements LLM<GoogleLLMOptions> { export class GoogleLLM implements LLM<GoogleLLMOptions> {
private client: GoogleGenAI; private client: GoogleGenAI;
private model: Model; private modelInfo: Model;
constructor(model: Model, apiKey?: string) { constructor(model: Model, apiKey?: string) {
if (!apiKey) { if (!apiKey) {
@ -45,14 +44,28 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
apiKey = process.env.GEMINI_API_KEY; apiKey = process.env.GEMINI_API_KEY;
} }
this.client = new GoogleGenAI({ apiKey }); this.client = new GoogleGenAI({ apiKey });
this.model = model; this.modelInfo = model;
} }
getModel(): Model { getModel(): Model {
return this.model; return this.modelInfo;
} }
async complete(context: Context, options?: GoogleLLMOptions): Promise<AssistantMessage> { async complete(context: Context, options?: GoogleLLMOptions): Promise<AssistantMessage> {
const output: AssistantMessage = {
role: "assistant",
content: [],
provider: this.modelInfo.provider,
model: this.modelInfo.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
};
try { try {
const contents = this.convertMessages(context.messages); const contents = this.convertMessages(context.messages);
@ -82,7 +95,7 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
} }
// Add thinking config if enabled and model supports it // Add thinking config if enabled and model supports it
if (options?.thinking?.enabled && this.model.reasoning) { if (options?.thinking?.enabled && this.modelInfo.reasoning) {
config.thinkingConfig = { config.thinkingConfig = {
includeThoughts: true, includeThoughts: true,
...(options.thinking.budgetTokens !== undefined && { thinkingBudget: options.thinking.budgetTokens }), ...(options.thinking.budgetTokens !== undefined && { thinkingBudget: options.thinking.budgetTokens }),
@ -99,27 +112,15 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
// Build the request parameters // Build the request parameters
const params: GenerateContentParameters = { const params: GenerateContentParameters = {
model: this.model.id, model: this.modelInfo.id,
contents, contents,
config, config,
}; };
const stream = await this.client.models.generateContentStream(params); const stream = await this.client.models.generateContentStream(params);
options?.onEvent?.({ type: "start", model: this.model.id, provider: this.model.provider }); options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
const blocks: AssistantMessage["content"] = [];
let currentBlock: TextContent | ThinkingContent | null = null; let currentBlock: TextContent | ThinkingContent | null = null;
let usage: Usage = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
let stopReason: StopReason = "stop";
// Process the stream
for await (const chunk of stream) { for await (const chunk of stream) {
// Extract parts from the chunk // Extract parts from the chunk
const candidate = chunk.candidates?.[0]; const candidate = chunk.candidates?.[0];
@ -134,14 +135,12 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
(isThinking && currentBlock.type !== "thinking") || (isThinking && currentBlock.type !== "thinking") ||
(!isThinking && currentBlock.type !== "text") (!isThinking && currentBlock.type !== "text")
) { ) {
// Save and finalize current block
if (currentBlock) { if (currentBlock) {
if (currentBlock.type === "text") { if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text }); options?.onEvent?.({ type: "text_end", content: currentBlock.text });
} else { } else {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking }); options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
} }
blocks.push(currentBlock);
} }
// Start new block // Start new block
@ -152,6 +151,7 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
currentBlock = { type: "text", text: "" }; currentBlock = { type: "text", text: "" };
options?.onEvent?.({ type: "text_start" }); options?.onEvent?.({ type: "text_start" });
} }
output.content.push(currentBlock);
} }
// Append content to current block // Append content to current block
@ -171,14 +171,12 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
// Handle function calls // Handle function calls
if (part.functionCall) { if (part.functionCall) {
// Save current block if exists
if (currentBlock) { if (currentBlock) {
if (currentBlock.type === "text") { if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text }); options?.onEvent?.({ type: "text_end", content: currentBlock.text });
} else { } else {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking }); options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
} }
blocks.push(currentBlock);
currentBlock = null; currentBlock = null;
} }
@ -190,7 +188,7 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
name: part.functionCall.name || "", name: part.functionCall.name || "",
arguments: part.functionCall.args as Record<string, any>, arguments: part.functionCall.args as Record<string, any>,
}; };
blocks.push(toolCall); output.content.push(toolCall);
options?.onEvent?.({ type: "toolCall", toolCall }); options?.onEvent?.({ type: "toolCall", toolCall });
} }
} }
@ -198,16 +196,16 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
// Map finish reason // Map finish reason
if (candidate?.finishReason) { if (candidate?.finishReason) {
stopReason = this.mapStopReason(candidate.finishReason); output.stopReason = this.mapStopReason(candidate.finishReason);
// Check if we have tool calls in blocks // Check if we have tool calls in blocks
if (blocks.some((b) => b.type === "toolCall")) { if (output.content.some((b) => b.type === "toolCall")) {
stopReason = "toolUse"; output.stopReason = "toolUse";
} }
} }
// Capture usage metadata if available // Capture usage metadata if available
if (chunk.usageMetadata) { if (chunk.usageMetadata) {
usage = { output.usage = {
input: chunk.usageMetadata.promptTokenCount || 0, input: chunk.usageMetadata.promptTokenCount || 0,
output: output:
(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0), (chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
@ -221,47 +219,15 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
total: 0, total: 0,
}, },
}; };
calculateCost(this.modelInfo, output.usage);
} }
} }
// Save final block if exists options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
if (currentBlock) {
if (currentBlock.type === "text") {
options?.onEvent?.({ type: "text_end", content: currentBlock.text });
} else {
options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
}
blocks.push(currentBlock);
}
calculateCost(this.model, usage);
const output = {
role: "assistant",
content: blocks,
provider: this.model.provider,
model: this.model.id,
usage,
stopReason,
} satisfies AssistantMessage;
options?.onEvent?.({ type: "done", reason: stopReason, message: output });
return output; return output;
} catch (error) { } catch (error) {
const output = { output.stopReason = "error";
role: "assistant", output.error = error instanceof Error ? error.message : JSON.stringify(error);
content: [],
provider: this.model.provider,
model: this.model.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "error",
error: error instanceof Error ? error.message : JSON.stringify(error),
} satisfies AssistantMessage;
options?.onEvent?.({ type: "error", error: output.error }); options?.onEvent?.({ type: "error", error: output.error });
return output; return output;
} }

View file

@ -22,7 +22,6 @@ import type {
TextContent, TextContent,
Tool, Tool,
ToolCall, ToolCall,
Usage,
} from "../types.js"; } from "../types.js";
export interface OpenAIResponsesLLMOptions extends LLMOptions { export interface OpenAIResponsesLLMOptions extends LLMOptions {
@ -52,6 +51,20 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
} }
async complete(request: Context, options?: OpenAIResponsesLLMOptions): Promise<AssistantMessage> { async complete(request: Context, options?: OpenAIResponsesLLMOptions): Promise<AssistantMessage> {
const output: AssistantMessage = {
role: "assistant",
content: [],
provider: this.modelInfo.provider,
model: this.modelInfo.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
};
try { try {
const input = this.convertToInput(request.messages, request.systemPrompt); const input = this.convertToInput(request.messages, request.systemPrompt);
@ -88,17 +101,8 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider }); options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
const outputItems: (ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall)[] = []; // any for function_call items const outputItems: (ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall)[] = [];
let currentTextAccum = ""; // For delta accumulation let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
let currentThinkingAccum = ""; // For delta accumulation
let usage: Usage = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
let stopReason: StopReason = "stop";
for await (const event of stream) { for await (const event of stream) {
// Handle output item start // Handle output item start
@ -106,47 +110,91 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
const item = event.item; const item = event.item;
if (item.type === "reasoning") { if (item.type === "reasoning") {
options?.onEvent?.({ type: "thinking_start" }); options?.onEvent?.({ type: "thinking_start" });
currentThinkingAccum = ""; outputItems.push(item);
currentItem = item;
} else if (item.type === "message") { } else if (item.type === "message") {
options?.onEvent?.({ type: "text_start" }); options?.onEvent?.({ type: "text_start" });
currentTextAccum = ""; outputItems.push(item);
currentItem = item;
} }
} }
// Handle reasoning summary deltas // Handle reasoning summary deltas
else if (event.type === "response.reasoning_summary_text.delta") { else if (event.type === "response.reasoning_summary_part.added") {
const delta = event.delta; if (currentItem && currentItem.type === "reasoning") {
currentThinkingAccum += delta; currentItem.summary = currentItem.summary || [];
options?.onEvent?.({ type: "thinking_delta", content: currentThinkingAccum, delta }); currentItem.summary.push(event.part);
}
} else if (event.type === "response.reasoning_summary_text.delta") {
if (currentItem && currentItem.type === "reasoning") {
currentItem.summary = currentItem.summary || [];
const lastPart = currentItem.summary[currentItem.summary.length - 1];
if (lastPart) {
lastPart.text += event.delta;
options?.onEvent?.({
type: "thinking_delta",
content: currentItem.summary.join("\n\n"),
delta: event.delta,
});
}
}
} }
// Add a new line between summary parts (hack...) // Add a new line between summary parts (hack...)
else if (event.type === "response.reasoning_summary_part.done") { else if (event.type === "response.reasoning_summary_part.done") {
currentThinkingAccum += "\n\n"; if (currentItem && currentItem.type === "reasoning") {
options?.onEvent?.({ type: "thinking_delta", content: currentThinkingAccum, delta: "\n\n" }); options?.onEvent?.({
type: "thinking_delta",
content: currentItem.summary.join("\n\n"),
delta: "\n\n",
});
}
} }
// Handle text output deltas // Handle text output deltas
else if (event.type === "response.output_text.delta") { else if (event.type === "response.content_part.added") {
const delta = event.delta; if (currentItem && currentItem.type === "message") {
currentTextAccum += delta; currentItem.content = currentItem.content || [];
options?.onEvent?.({ type: "text_delta", content: currentTextAccum, delta }); currentItem.content.push(event.part);
}
} else if (event.type === "response.output_text.delta") {
if (currentItem && currentItem.type === "message") {
const lastPart = currentItem.content[currentItem.content.length - 1];
if (lastPart && lastPart.type === "output_text") {
lastPart.text += event.delta;
options?.onEvent?.({
type: "text_delta",
content: currentItem.content
.map((c) => (c.type === "output_text" ? c.text : c.refusal))
.join(""),
delta: event.delta,
});
}
}
} else if (event.type === "response.refusal.delta") {
if (currentItem && currentItem.type === "message") {
const lastPart = currentItem.content[currentItem.content.length - 1];
if (lastPart && lastPart.type === "refusal") {
lastPart.refusal += event.delta;
options?.onEvent?.({
type: "text_delta",
content: currentItem.content
.map((c) => (c.type === "output_text" ? c.text : c.refusal))
.join(""),
delta: event.delta,
});
}
} }
// Handle refusal output deltas
else if (event.type === "response.refusal.delta") {
const delta = event.delta;
currentTextAccum += delta;
options?.onEvent?.({ type: "text_delta", content: currentTextAccum, delta });
} }
// Handle output item completion // Handle output item completion
else if (event.type === "response.output_item.done") { else if (event.type === "response.output_item.done") {
const item = event.item; const item = event.item;
if (item.type === "reasoning") { if (item.type === "reasoning") {
outputItems[outputItems.length - 1] = item; // Update with final item
const thinkingContent = item.summary?.map((s: any) => s.text).join("\n\n") || ""; const thinkingContent = item.summary?.map((s: any) => s.text).join("\n\n") || "";
options?.onEvent?.({ type: "thinking_end", content: thinkingContent }); options?.onEvent?.({ type: "thinking_end", content: thinkingContent });
outputItems.push(item);
} else if (item.type === "message") { } else if (item.type === "message") {
outputItems[outputItems.length - 1] = item; // Update with final item
const textContent = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""); const textContent = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
options?.onEvent?.({ type: "text_end", content: textContent }); options?.onEvent?.({ type: "text_end", content: textContent });
outputItems.push(item);
} else if (item.type === "function_call") { } else if (item.type === "function_call") {
const toolCall: ToolCall = { const toolCall: ToolCall = {
type: "toolCall", type: "toolCall",
@ -162,7 +210,7 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
else if (event.type === "response.completed") { else if (event.type === "response.completed") {
const response = event.response; const response = event.response;
if (response?.usage) { if (response?.usage) {
usage = { output.usage = {
input: response.usage.input_tokens || 0, input: response.usage.input_tokens || 0,
output: response.usage.output_tokens || 0, output: response.usage.output_tokens || 0,
cacheRead: response.usage.input_tokens_details?.cached_tokens || 0, cacheRead: response.usage.input_tokens_details?.cached_tokens || 0,
@ -170,60 +218,43 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
}; };
} }
calculateCost(this.modelInfo, output.usage);
// Map status to stop reason // Map status to stop reason
stopReason = this.mapStopReason(response?.status); output.stopReason = this.mapStopReason(response?.status);
if (outputItems.some((b) => b.type === "function_call") && output.stopReason === "stop") {
output.stopReason = "toolUse";
}
} }
// Handle errors // Handle errors
else if (event.type === "error") { else if (event.type === "error") {
const errorOutput = { output.stopReason = "error";
role: "assistant", output.error = `Code ${event.code}: ${event.message}` || "Unknown error";
content: [], options?.onEvent?.({ type: "error", error: output.error });
provider: this.modelInfo.provider, return output;
model: this.modelInfo.id,
usage,
stopReason: "error",
error: `Code ${event.code}: ${event.message}` || "Unknown error",
} satisfies AssistantMessage;
options?.onEvent?.({ type: "error", error: errorOutput.error || "Unknown error" });
return errorOutput;
} else if (event.type === "response.failed") { } else if (event.type === "response.failed") {
const errorOutput = { output.stopReason = "error";
role: "assistant", output.error = "Unknown error";
content: [], options?.onEvent?.({ type: "error", error: output.error });
provider: this.modelInfo.provider, return output;
model: this.modelInfo.id,
usage,
stopReason: "error",
error: "Unknown error",
} satisfies AssistantMessage;
options?.onEvent?.({ type: "error", error: errorOutput.error || "Unknown error" });
return errorOutput;
} }
} }
if (options?.signal?.aborted) {
throw new Error("Request was aborted");
}
// Convert output items to blocks // Convert output items to blocks
const blocks: AssistantMessage["content"] = [];
for (const item of outputItems) { for (const item of outputItems) {
if (item.type === "reasoning") { if (item.type === "reasoning") {
blocks.push({ output.content.push({
type: "thinking", type: "thinking",
thinking: item.summary?.map((s: any) => s.text).join("\n\n") || "", thinking: item.summary?.map((s: any) => s.text).join("\n\n") || "",
thinkingSignature: JSON.stringify(item), // Full item for resubmission thinkingSignature: JSON.stringify(item), // Full item for resubmission
}); });
} else if (item.type === "message") { } else if (item.type === "message") {
blocks.push({ output.content.push({
type: "text", type: "text",
text: item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""), text: item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""),
textSignature: item.id, // ID for resubmission textSignature: item.id, // ID for resubmission
}); });
} else if (item.type === "function_call") { } else if (item.type === "function_call") {
blocks.push({ output.content.push({
type: "toolCall", type: "toolCall",
id: item.call_id + "|" + item.id, id: item.call_id + "|" + item.id,
name: item.name, name: item.name,
@ -232,40 +263,16 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
} }
} }
// Check if we have tool calls for stop reason if (options?.signal?.aborted) {
if (blocks.some((b) => b.type === "toolCall") && stopReason === "stop") { throw new Error("Request was aborted");
stopReason = "toolUse";
} }
calculateCost(this.modelInfo, usage);
const output = {
role: "assistant",
content: blocks,
provider: this.modelInfo.provider,
model: this.modelInfo.id,
usage,
stopReason,
} satisfies AssistantMessage;
options?.onEvent?.({ type: "done", reason: output.stopReason, message: output }); options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
return output; return output;
} catch (error) { } catch (error) {
const output = { output.stopReason = "error";
role: "assistant", output.error = error instanceof Error ? error.message : JSON.stringify(error);
content: [], options?.onEvent?.({ type: "error", error: output.error });
provider: this.modelInfo.provider,
model: this.modelInfo.id,
usage: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "error",
error: error instanceof Error ? error.message : String(error),
} satisfies AssistantMessage;
options?.onEvent?.({ type: "error", error: output.error || "Unknown error" });
return output; return output;
} }
} }
@ -318,7 +325,8 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
const output: ResponseInput = []; const output: ResponseInput = [];
for (const block of msg.content) { for (const block of msg.content) {
if (block.type === "thinking") { // Do not submit thinking blocks if the completion had an error (i.e. abort)
if (block.type === "thinking" && msg.stopReason !== "error") {
// Push the full reasoning item(s) from signature // Push the full reasoning item(s) from signature
if (block.thinkingSignature) { if (block.thinkingSignature) {
const reasoningItem = JSON.parse(block.thinkingSignature); const reasoningItem = JSON.parse(block.thinkingSignature);
@ -333,7 +341,8 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
status: "completed", status: "completed",
id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15), id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
} satisfies ResponseOutputMessage); } satisfies ResponseOutputMessage);
} else if (block.type === "toolCall") { // Do not submit thinking blocks if the completion had an error (i.e. abort)
} else if (block.type === "toolCall" && msg.stopReason !== "error") {
const toolCall = block as ToolCall; const toolCall = block as ToolCall;
output.push({ output.push({
type: "function_call", type: "function_call",

View file

@ -6,28 +6,38 @@ import { AnthropicLLM } from "../src/providers/anthropic.js";
import type { LLM, LLMOptions, Context } from "../src/types.js"; import type { LLM, LLMOptions, Context } from "../src/types.js";
import { getModel } from "../src/models.js"; import { getModel } from "../src/models.js";
async function testAbortSignal<T extends LLMOptions>(llm: LLM<T>) { async function testAbortSignal<T extends LLMOptions>(llm: LLM<T>, options: T) {
const controller = new AbortController(); const controller = new AbortController();
// Abort after 100ms // Abort after 100ms
setTimeout(() => controller.abort(), 1000); setTimeout(() => controller.abort(), 5000);
const context: Context = { const context: Context = {
messages: [{ messages: [{
role: "user", role: "user",
content: "Write a very long story about a dragon that lives in a mountain. Include lots of details about the dragon's appearance, its daily life, the treasures it guards, and its interactions with nearby villages. Make it at least 1000 words long." content: "What is 15 + 27? Think step by step. Then list 100 first names."
}] }]
}; };
const response = await llm.complete(context, { const response = await llm.complete(context, {
...options,
signal: controller.signal signal: controller.signal
} as T); });
// If we get here without throwing, the abort didn't work // If we get here without throwing, the abort didn't work
expect(response.stopReason).toBe("error"); expect(response.stopReason).toBe("error");
expect(response.content.length).toBeGreaterThan(0);
context.messages.push(response);
context.messages.push({ role: "user", content: "Please continue." });
// Ensure we can still make requests after abort
const followUp = await llm.complete(context, options);
expect(followUp.stopReason).toBe("stop");
expect(followUp.content.length).toBeGreaterThan(0);
} }
async function testImmediateAbort<T extends LLMOptions>(llm: LLM<T>) { async function testImmediateAbort<T extends LLMOptions>(llm: LLM<T>, options: T) {
const controller = new AbortController(); const controller = new AbortController();
// Abort immediately // Abort immediately
@ -38,8 +48,9 @@ async function testImmediateAbort<T extends LLMOptions>(llm: LLM<T>) {
}; };
const response = await llm.complete(context, { const response = await llm.complete(context, {
...options,
signal: controller.signal signal: controller.signal
} as T); });
expect(response.stopReason).toBe("error"); expect(response.stopReason).toBe("error");
} }
@ -52,11 +63,11 @@ describe("AI Providers Abort Tests", () => {
}); });
it("should abort mid-stream", async () => { it("should abort mid-stream", async () => {
await testAbortSignal(llm); await testAbortSignal(llm, { thinking: { enabled: true } });
}); });
it("should handle immediate abort", async () => { it("should handle immediate abort", async () => {
await testImmediateAbort(llm); await testImmediateAbort(llm, { thinking: { enabled: true } });
}); });
}); });
@ -64,15 +75,15 @@ describe("AI Providers Abort Tests", () => {
let llm: OpenAICompletionsLLM; let llm: OpenAICompletionsLLM;
beforeAll(() => { beforeAll(() => {
llm = new OpenAICompletionsLLM(getModel("openai", "gpt-4o-mini")!, process.env.OPENAI_API_KEY!); llm = new OpenAICompletionsLLM(getModel("openai", "gpt-5-mini")!, process.env.OPENAI_API_KEY!);
}); });
it("should abort mid-stream", async () => { it("should abort mid-stream", async () => {
await testAbortSignal(llm); await testAbortSignal(llm, { reasoningEffort: "medium"});
}); });
it("should handle immediate abort", async () => { it("should handle immediate abort", async () => {
await testImmediateAbort(llm); await testImmediateAbort(llm, { reasoningEffort: "medium" });
}); });
}); });
@ -88,27 +99,27 @@ describe("AI Providers Abort Tests", () => {
}); });
it("should abort mid-stream", async () => { it("should abort mid-stream", async () => {
await testAbortSignal(llm); await testAbortSignal(llm, {});
}); });
it("should handle immediate abort", async () => { it("should handle immediate abort", async () => {
await testImmediateAbort(llm); await testImmediateAbort(llm, {});
}); });
}); });
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Abort", () => { describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Abort", () => {
let llm: AnthropicLLM; let llm: AnthropicLLM;
beforeAll(() => { beforeAll(() => {
llm = new AnthropicLLM(getModel("anthropic", "claude-3-5-haiku-latest")!, process.env.ANTHROPIC_API_KEY!); llm = new AnthropicLLM(getModel("anthropic", "claude-opus-4-1")!, process.env.ANTHROPIC_OAUTH_TOKEN!);
}); });
it("should abort mid-stream", async () => { it("should abort mid-stream", async () => {
await testAbortSignal(llm); await testAbortSignal(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
}); });
it("should handle immediate abort", async () => { it("should handle immediate abort", async () => {
await testImmediateAbort(llm); await testImmediateAbort(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
}); });
}); });
}); });