refactor(ai): Update API to support partial results on abort

- Anthropic, Google, and OpenAI Responses providers now return partial results when aborted - Restructured streaming to accumulate content blocks incrementally - Prevents submission of thinking/toolCall blocks from aborted completions in multi-turn conversations - Makes UI development easier by providing partial content even when requests are interrupted
2026-04-20 19:02:10 +00:00 · 2025-09-01 01:57:08 +02:00 · 2025-09-01 01:57:08 +02:00 · bf1f410c2b
commit bf1f410c2b
parent 5d5cd7955b
4 changed files with 244 additions and 280 deletions
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@ -14,8 +14,9 @@ import type {
 	Message,
 	Model,
 	StopReason,
 	TextContent,
 	ThinkingContent,
 	ToolCall,
 	Usage,
 } from "../types.js";
 export interface AnthropicLLMOptions extends LLMOptions {
@ -61,6 +62,21 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
 	}
 	async complete(context: Context, options?: AnthropicLLMOptions): Promise<AssistantMessage> {
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
 			provider: this.modelInfo.provider,
 			model: this.modelInfo.id,
 			usage: {
 				input: 0,
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "stop",
 		};
 		try {
 			const messages = this.convertMessages(context.messages);
@ -131,132 +147,94 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
 			options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
-			let blockType: "text" | "thinking" | "toolUse" | "other" = "other";
+			let currentBlock: ThinkingContent | TextContent | (ToolCall & { partialJson: string }) | null = null;
 			let blockContent = "";
 			let toolCall: (ToolCall & { partialJson: string }) | null = null;
 			for await (const event of stream) {
 				if (event.type === "content_block_start") {
 					if (event.content_block.type === "text") {
-						blockType = "text";
+						currentBlock = {
-						blockContent = "";
+							type: "text",
 							text: "",
 						};
 						output.content.push(currentBlock);
 						options?.onEvent?.({ type: "text_start" });
 					} else if (event.content_block.type === "thinking") {
-						blockType = "thinking";
+						currentBlock = {
-						blockContent = "";
+							type: "thinking",
 							thinking: "",
 							thinkingSignature: "",
 						};
 						output.content.push(currentBlock);
 						options?.onEvent?.({ type: "thinking_start" });
 					} else if (event.content_block.type === "tool_use") {
 						// We wait for the full tool use to be streamed to send the event
-						toolCall = {
+						currentBlock = {
 							type: "toolCall",
 							id: event.content_block.id,
 							name: event.content_block.name,
 							arguments: event.content_block.input as Record<string, any>,
 							partialJson: "",
 						};
 						blockType = "toolUse";
 						blockContent = "";
 					} else {
 						blockType = "other";
 						blockContent = "";
 					}
-				}
+				} else if (event.type === "content_block_delta") {
 				if (event.type === "content_block_delta") {
 					if (event.delta.type === "text_delta") {
-						options?.onEvent?.({ type: "text_delta", content: blockContent, delta: event.delta.text });
+						if (currentBlock && currentBlock.type === "text") {
-						blockContent += event.delta.text;
+							currentBlock.text += event.delta.text;
 							options?.onEvent?.({ type: "text_delta", content: currentBlock.text, delta: event.delta.text });
 						}
-					if (event.delta.type === "thinking_delta") {
+					} else if (event.delta.type === "thinking_delta") {
-						options?.onEvent?.({ type: "thinking_delta", content: blockContent, delta: event.delta.thinking });
+						if (currentBlock && currentBlock.type === "thinking") {
-						blockContent += event.delta.thinking;
+							currentBlock.thinking += event.delta.thinking;
 							options?.onEvent?.({
 								type: "thinking_delta",
 								content: currentBlock.thinking,
 								delta: event.delta.thinking,
 							});
 						}
-					if (event.delta.type === "input_json_delta") {
+					} else if (event.delta.type === "input_json_delta") {
-						toolCall!.partialJson += event.delta.partial_json;
+						if (currentBlock && currentBlock.type === "toolCall") {
 							currentBlock.partialJson += event.delta.partial_json;
 						}
 					} else if (event.delta.type === "signature_delta") {
 						if (currentBlock && currentBlock.type === "thinking") {
 							currentBlock.thinkingSignature = currentBlock.thinkingSignature || "";
 							currentBlock.thinkingSignature += event.delta.signature;
 						}
 					}
-				if (event.type === "content_block_stop") {
+				} else if (event.type === "content_block_stop") {
-					if (blockType === "text") {
+					if (currentBlock) {
-						options?.onEvent?.({ type: "text_end", content: blockContent });
+						if (currentBlock.type === "text") {
-					} else if (blockType === "thinking") {
+							options?.onEvent?.({ type: "text_end", content: currentBlock.text });
-						options?.onEvent?.({ type: "thinking_end", content: blockContent });
+						} else if (currentBlock.type === "thinking") {
-					} else if (blockType === "toolUse") {
+							options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
 						} else if (currentBlock.type === "toolCall") {
 							const finalToolCall: ToolCall = {
 								type: "toolCall",
-							id: toolCall!.id,
+								id: currentBlock.id,
-							name: toolCall!.name,
+								name: currentBlock.name,
-							arguments: toolCall!.partialJson ? JSON.parse(toolCall!.partialJson) : toolCall!.arguments,
+								arguments: JSON.parse(currentBlock.partialJson),
 							};
-						toolCall = null;
+							output.content.push(finalToolCall);
 							options?.onEvent?.({ type: "toolCall", toolCall: finalToolCall });
 						}
-					blockType = "other";
+						currentBlock = null;
 					}
 				} else if (event.type === "message_delta") {
 					if (event.delta.stop_reason) {
 						output.stopReason = this.mapStopReason(event.delta.stop_reason);
 					}
-			const msg = await stream.finalMessage();
+					output.usage.input += event.usage.input_tokens || 0;
-			const blocks: AssistantMessage["content"] = [];
+					output.usage.output += event.usage.output_tokens || 0;
-			for (const block of msg.content) {
+					output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
-				if (block.type === "text" && block.text) {
+					output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
-					blocks.push({
+					calculateCost(this.modelInfo, output.usage);
 						type: "text",
 						text: block.text,
 					});
 				} else if (block.type === "thinking" && block.thinking) {
 					blocks.push({
 						type: "thinking",
 						thinking: block.thinking,
 						thinkingSignature: block.signature,
 					});
 				} else if (block.type === "tool_use") {
 					blocks.push({
 						type: "toolCall",
 						id: block.id,
 						name: block.name,
 						arguments: block.input as Record<string, any>,
 					});
 				}
 			}
 			const usage: Usage = {
 				input: msg.usage.input_tokens,
 				output: msg.usage.output_tokens,
 				cacheRead: msg.usage.cache_read_input_tokens || 0,
 				cacheWrite: msg.usage.cache_creation_input_tokens || 0,
 				cost: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
 					total: 0,
 				},
 			};
 			calculateCost(this.modelInfo, usage);
 			const output = {
 				role: "assistant",
 				content: blocks,
 				provider: this.modelInfo.provider,
 				model: this.modelInfo.id,
 				usage,
 				stopReason: this.mapStopReason(msg.stop_reason),
 			} satisfies AssistantMessage;
 			options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
 			return output;
 		} catch (error) {
-			const output = {
+			output.stopReason = "error";
-				role: "assistant",
+			output.error = error instanceof Error ? error.message : JSON.stringify(error);
 				content: [],
 				provider: this.modelInfo.provider,
 				model: this.modelInfo.id,
 				usage: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
 					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 				},
 				stopReason: "error",
 				error: error instanceof Error ? error.message : JSON.stringify(error),
 			} satisfies AssistantMessage;
 			options?.onEvent?.({ type: "error", error: output.error });
 			return output;
 		}
--- a/packages/ai/src/providers/google.ts
+++ b/packages/ai/src/providers/google.ts
@ -20,7 +20,6 @@ import type {
 	ThinkingContent,
 	Tool,
 	ToolCall,
 	Usage,
 } from "../types.js";
 export interface GoogleLLMOptions extends LLMOptions {
@ -33,7 +32,7 @@ export interface GoogleLLMOptions extends LLMOptions {
 export class GoogleLLM implements LLM<GoogleLLMOptions> {
 	private client: GoogleGenAI;
-	private model: Model;
+	private modelInfo: Model;
 	constructor(model: Model, apiKey?: string) {
 		if (!apiKey) {
@ -45,14 +44,28 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 			apiKey = process.env.GEMINI_API_KEY;
 		}
 		this.client = new GoogleGenAI({ apiKey });
-		this.model = model;
+		this.modelInfo = model;
 	}
 	getModel(): Model {
-		return this.model;
+		return this.modelInfo;
 	}
 	async complete(context: Context, options?: GoogleLLMOptions): Promise<AssistantMessage> {
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
 			provider: this.modelInfo.provider,
 			model: this.modelInfo.id,
 			usage: {
 				input: 0,
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "stop",
 		};
 		try {
 			const contents = this.convertMessages(context.messages);
@ -82,7 +95,7 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 			}
 			// Add thinking config if enabled and model supports it
-			if (options?.thinking?.enabled && this.model.reasoning) {
+			if (options?.thinking?.enabled && this.modelInfo.reasoning) {
 				config.thinkingConfig = {
 					includeThoughts: true,
 					...(options.thinking.budgetTokens !== undefined && { thinkingBudget: options.thinking.budgetTokens }),
@ -99,27 +112,15 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 			// Build the request parameters
 			const params: GenerateContentParameters = {
-				model: this.model.id,
+				model: this.modelInfo.id,
 				contents,
 				config,
 			};
 			const stream = await this.client.models.generateContentStream(params);
-			options?.onEvent?.({ type: "start", model: this.model.id, provider: this.model.provider });
+			options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
 			const blocks: AssistantMessage["content"] = [];
 			let currentBlock: TextContent | ThinkingContent | null = null;
 			let usage: Usage = {
 				input: 0,
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			};
 			let stopReason: StopReason = "stop";
 			// Process the stream
 			for await (const chunk of stream) {
 				// Extract parts from the chunk
 				const candidate = chunk.candidates?.[0];
@ -134,14 +135,12 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 								(isThinking && currentBlock.type !== "thinking") ||
 								(!isThinking && currentBlock.type !== "text")
 							) {
 								// Save and finalize current block
 								if (currentBlock) {
 									if (currentBlock.type === "text") {
 										options?.onEvent?.({ type: "text_end", content: currentBlock.text });
 									} else {
 										options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
 									}
 									blocks.push(currentBlock);
 								}
 								// Start new block
@ -152,6 +151,7 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 									currentBlock = { type: "text", text: "" };
 									options?.onEvent?.({ type: "text_start" });
 								}
 								output.content.push(currentBlock);
 							}
 							// Append content to current block
@ -171,14 +171,12 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 						// Handle function calls
 						if (part.functionCall) {
 							// Save current block if exists
 							if (currentBlock) {
 								if (currentBlock.type === "text") {
 									options?.onEvent?.({ type: "text_end", content: currentBlock.text });
 								} else {
 									options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
 								}
 								blocks.push(currentBlock);
 								currentBlock = null;
 							}
@ -190,7 +188,7 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 								name: part.functionCall.name || "",
 								arguments: part.functionCall.args as Record<string, any>,
 							};
-							blocks.push(toolCall);
+							output.content.push(toolCall);
 							options?.onEvent?.({ type: "toolCall", toolCall });
 						}
 					}
@ -198,16 +196,16 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 				// Map finish reason
 				if (candidate?.finishReason) {
-					stopReason = this.mapStopReason(candidate.finishReason);
+					output.stopReason = this.mapStopReason(candidate.finishReason);
 					// Check if we have tool calls in blocks
-					if (blocks.some((b) => b.type === "toolCall")) {
+					if (output.content.some((b) => b.type === "toolCall")) {
-						stopReason = "toolUse";
+						output.stopReason = "toolUse";
 					}
 				}
 				// Capture usage metadata if available
 				if (chunk.usageMetadata) {
-					usage = {
+					output.usage = {
 						input: chunk.usageMetadata.promptTokenCount || 0,
 						output:
 							(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
@ -221,47 +219,15 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
 							total: 0,
 						},
 					};
 					calculateCost(this.modelInfo, output.usage);
 				}
 			}
-			// Save final block if exists
+			options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
 			if (currentBlock) {
 				if (currentBlock.type === "text") {
 					options?.onEvent?.({ type: "text_end", content: currentBlock.text });
 				} else {
 					options?.onEvent?.({ type: "thinking_end", content: currentBlock.thinking });
 				}
 				blocks.push(currentBlock);
 			}
 			calculateCost(this.model, usage);
 			const output = {
 				role: "assistant",
 				content: blocks,
 				provider: this.model.provider,
 				model: this.model.id,
 				usage,
 				stopReason,
 			} satisfies AssistantMessage;
 			options?.onEvent?.({ type: "done", reason: stopReason, message: output });
 			return output;
 		} catch (error) {
-			const output = {
+			output.stopReason = "error";
-				role: "assistant",
+			output.error = error instanceof Error ? error.message : JSON.stringify(error);
 				content: [],
 				provider: this.model.provider,
 				model: this.model.id,
 				usage: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
 					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 				},
 				stopReason: "error",
 				error: error instanceof Error ? error.message : JSON.stringify(error),
 			} satisfies AssistantMessage;
 			options?.onEvent?.({ type: "error", error: output.error });
 			return output;
 		}
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@ -22,7 +22,6 @@ import type {
 	TextContent,
 	Tool,
 	ToolCall,
 	Usage,
 } from "../types.js";
 export interface OpenAIResponsesLLMOptions extends LLMOptions {
@ -52,6 +51,20 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 	}
 	async complete(request: Context, options?: OpenAIResponsesLLMOptions): Promise<AssistantMessage> {
 		const output: AssistantMessage = {
 			role: "assistant",
 			content: [],
 			provider: this.modelInfo.provider,
 			model: this.modelInfo.id,
 			usage: {
 				input: 0,
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			},
 			stopReason: "stop",
 		};
 		try {
 			const input = this.convertToInput(request.messages, request.systemPrompt);
@ -88,17 +101,8 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 			options?.onEvent?.({ type: "start", model: this.modelInfo.id, provider: this.modelInfo.provider });
-			const outputItems: (ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall)[] = []; // any for function_call items
+			const outputItems: (ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall)[] = [];
-			let currentTextAccum = ""; // For delta accumulation
+			let currentItem: ResponseReasoningItem | ResponseOutputMessage | ResponseFunctionToolCall | null = null;
 			let currentThinkingAccum = ""; // For delta accumulation
 			let usage: Usage = {
 				input: 0,
 				output: 0,
 				cacheRead: 0,
 				cacheWrite: 0,
 				cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 			};
 			let stopReason: StopReason = "stop";
 			for await (const event of stream) {
 				// Handle output item start
@ -106,47 +110,91 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 					const item = event.item;
 					if (item.type === "reasoning") {
 						options?.onEvent?.({ type: "thinking_start" });
-						currentThinkingAccum = "";
+						outputItems.push(item);
 						currentItem = item;
 					} else if (item.type === "message") {
 						options?.onEvent?.({ type: "text_start" });
-						currentTextAccum = "";
+						outputItems.push(item);
 						currentItem = item;
 					}
 				}
 				// Handle reasoning summary deltas
-				else if (event.type === "response.reasoning_summary_text.delta") {
+				else if (event.type === "response.reasoning_summary_part.added") {
-					const delta = event.delta;
+					if (currentItem && currentItem.type === "reasoning") {
-					currentThinkingAccum += delta;
+						currentItem.summary = currentItem.summary || [];
-					options?.onEvent?.({ type: "thinking_delta", content: currentThinkingAccum, delta });
+						currentItem.summary.push(event.part);
 					}
 				} else if (event.type === "response.reasoning_summary_text.delta") {
 					if (currentItem && currentItem.type === "reasoning") {
 						currentItem.summary = currentItem.summary || [];
 						const lastPart = currentItem.summary[currentItem.summary.length - 1];
 						if (lastPart) {
 							lastPart.text += event.delta;
 							options?.onEvent?.({
 								type: "thinking_delta",
 								content: currentItem.summary.join("\n\n"),
 								delta: event.delta,
 							});
 						}
 					}
 				}
 				// Add a new line between summary parts (hack...)
 				else if (event.type === "response.reasoning_summary_part.done") {
-					currentThinkingAccum += "\n\n";
+					if (currentItem && currentItem.type === "reasoning") {
-					options?.onEvent?.({ type: "thinking_delta", content: currentThinkingAccum, delta: "\n\n" });
+						options?.onEvent?.({
 							type: "thinking_delta",
 							content: currentItem.summary.join("\n\n"),
 							delta: "\n\n",
 						});
 					}
 				}
 				// Handle text output deltas
-				else if (event.type === "response.output_text.delta") {
+				else if (event.type === "response.content_part.added") {
-					const delta = event.delta;
+					if (currentItem && currentItem.type === "message") {
-					currentTextAccum += delta;
+						currentItem.content = currentItem.content || [];
-					options?.onEvent?.({ type: "text_delta", content: currentTextAccum, delta });
+						currentItem.content.push(event.part);
 					}
 				} else if (event.type === "response.output_text.delta") {
 					if (currentItem && currentItem.type === "message") {
 						const lastPart = currentItem.content[currentItem.content.length - 1];
 						if (lastPart && lastPart.type === "output_text") {
 							lastPart.text += event.delta;
 							options?.onEvent?.({
 								type: "text_delta",
 								content: currentItem.content
 									.map((c) => (c.type === "output_text" ? c.text : c.refusal))
 									.join(""),
 								delta: event.delta,
 							});
 						}
 					}
 				} else if (event.type === "response.refusal.delta") {
 					if (currentItem && currentItem.type === "message") {
 						const lastPart = currentItem.content[currentItem.content.length - 1];
 						if (lastPart && lastPart.type === "refusal") {
 							lastPart.refusal += event.delta;
 							options?.onEvent?.({
 								type: "text_delta",
 								content: currentItem.content
 									.map((c) => (c.type === "output_text" ? c.text : c.refusal))
 									.join(""),
 								delta: event.delta,
 							});
 						}
 					}
 				// Handle refusal output deltas
 				else if (event.type === "response.refusal.delta") {
 					const delta = event.delta;
 					currentTextAccum += delta;
 					options?.onEvent?.({ type: "text_delta", content: currentTextAccum, delta });
 				}
 				// Handle output item completion
 				else if (event.type === "response.output_item.done") {
 					const item = event.item;
 					if (item.type === "reasoning") {
 						outputItems[outputItems.length - 1] = item; // Update with final item
 						const thinkingContent = item.summary?.map((s: any) => s.text).join("\n\n") || "";
 						options?.onEvent?.({ type: "thinking_end", content: thinkingContent });
 						outputItems.push(item);
 					} else if (item.type === "message") {
 						outputItems[outputItems.length - 1] = item; // Update with final item
 						const textContent = item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join("");
 						options?.onEvent?.({ type: "text_end", content: textContent });
 						outputItems.push(item);
 					} else if (item.type === "function_call") {
 						const toolCall: ToolCall = {
 							type: "toolCall",
@ -162,7 +210,7 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 				else if (event.type === "response.completed") {
 					const response = event.response;
 					if (response?.usage) {
-						usage = {
+						output.usage = {
 							input: response.usage.input_tokens || 0,
 							output: response.usage.output_tokens || 0,
 							cacheRead: response.usage.input_tokens_details?.cached_tokens || 0,
@ -170,60 +218,43 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 							cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 						};
 					}
-
+					calculateCost(this.modelInfo, output.usage);
 					// Map status to stop reason
-					stopReason = this.mapStopReason(response?.status);
+					output.stopReason = this.mapStopReason(response?.status);
 					if (outputItems.some((b) => b.type === "function_call") && output.stopReason === "stop") {
 						output.stopReason = "toolUse";
 					}
 				}
 				// Handle errors
 				else if (event.type === "error") {
-					const errorOutput = {
+					output.stopReason = "error";
-						role: "assistant",
+					output.error = `Code ${event.code}: ${event.message}` || "Unknown error";
-						content: [],
+					options?.onEvent?.({ type: "error", error: output.error });
-						provider: this.modelInfo.provider,
+					return output;
 						model: this.modelInfo.id,
 						usage,
 						stopReason: "error",
 						error: `Code ${event.code}: ${event.message}` || "Unknown error",
 					} satisfies AssistantMessage;
 					options?.onEvent?.({ type: "error", error: errorOutput.error || "Unknown error" });
 					return errorOutput;
 				} else if (event.type === "response.failed") {
-					const errorOutput = {
+					output.stopReason = "error";
-						role: "assistant",
+					output.error = "Unknown error";
-						content: [],
+					options?.onEvent?.({ type: "error", error: output.error });
-						provider: this.modelInfo.provider,
+					return output;
 						model: this.modelInfo.id,
 						usage,
 						stopReason: "error",
 						error: "Unknown error",
 					} satisfies AssistantMessage;
 					options?.onEvent?.({ type: "error", error: errorOutput.error || "Unknown error" });
 					return errorOutput;
 				}
 			}
 			if (options?.signal?.aborted) {
 				throw new Error("Request was aborted");
 			}
 			// Convert output items to blocks
 			const blocks: AssistantMessage["content"] = [];
 			for (const item of outputItems) {
 				if (item.type === "reasoning") {
-					blocks.push({
+					output.content.push({
 						type: "thinking",
 						thinking: item.summary?.map((s: any) => s.text).join("\n\n") || "",
 						thinkingSignature: JSON.stringify(item), // Full item for resubmission
 					});
 				} else if (item.type === "message") {
-					blocks.push({
+					output.content.push({
 						type: "text",
 						text: item.content.map((c) => (c.type === "output_text" ? c.text : c.refusal)).join(""),
 						textSignature: item.id, // ID for resubmission
 					});
 				} else if (item.type === "function_call") {
-					blocks.push({
+					output.content.push({
 						type: "toolCall",
 						id: item.call_id + "|" + item.id,
 						name: item.name,
@ -232,40 +263,16 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 				}
 			}
-			// Check if we have tool calls for stop reason
+			if (options?.signal?.aborted) {
-			if (blocks.some((b) => b.type === "toolCall") && stopReason === "stop") {
+				throw new Error("Request was aborted");
 				stopReason = "toolUse";
 			}
 			calculateCost(this.modelInfo, usage);
 			const output = {
 				role: "assistant",
 				content: blocks,
 				provider: this.modelInfo.provider,
 				model: this.modelInfo.id,
 				usage,
 				stopReason,
 			} satisfies AssistantMessage;
 			options?.onEvent?.({ type: "done", reason: output.stopReason, message: output });
 			return output;
 		} catch (error) {
-			const output = {
+			output.stopReason = "error";
-				role: "assistant",
+			output.error = error instanceof Error ? error.message : JSON.stringify(error);
-				content: [],
+			options?.onEvent?.({ type: "error", error: output.error });
 				provider: this.modelInfo.provider,
 				model: this.modelInfo.id,
 				usage: {
 					input: 0,
 					output: 0,
 					cacheRead: 0,
 					cacheWrite: 0,
 					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
 				},
 				stopReason: "error",
 				error: error instanceof Error ? error.message : String(error),
 			} satisfies AssistantMessage;
 			options?.onEvent?.({ type: "error", error: output.error || "Unknown error" });
 			return output;
 		}
 	}
@ -318,7 +325,8 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 				const output: ResponseInput = [];
 				for (const block of msg.content) {
-					if (block.type === "thinking") {
+					// Do not submit thinking blocks if the completion had an error (i.e. abort)
 					if (block.type === "thinking" && msg.stopReason !== "error") {
 						// Push the full reasoning item(s) from signature
 						if (block.thinkingSignature) {
 							const reasoningItem = JSON.parse(block.thinkingSignature);
@ -333,7 +341,8 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
 							status: "completed",
 							id: textBlock.textSignature || "msg_" + Math.random().toString(36).substring(2, 15),
 						} satisfies ResponseOutputMessage);
-					} else if (block.type === "toolCall") {
+						// Do not submit thinking blocks if the completion had an error (i.e. abort)
 					} else if (block.type === "toolCall" && msg.stopReason !== "error") {
 						const toolCall = block as ToolCall;
 						output.push({
 							type: "function_call",
--- a/packages/ai/test/abort.test.ts
+++ b/packages/ai/test/abort.test.ts
@ -6,28 +6,38 @@ import { AnthropicLLM } from "../src/providers/anthropic.js";
 import type { LLM, LLMOptions, Context } from "../src/types.js";
 import { getModel } from "../src/models.js";
-async function testAbortSignal<T extends LLMOptions>(llm: LLM<T>) {
+async function testAbortSignal<T extends LLMOptions>(llm: LLM<T>, options: T) {
    const controller = new AbortController();
    // Abort after 100ms
-    setTimeout(() => controller.abort(), 1000);
+    setTimeout(() => controller.abort(), 5000);
    const context: Context = {
        messages: [{
            role: "user",
-            content: "Write a very long story about a dragon that lives in a mountain. Include lots of details about the dragon's appearance, its daily life, the treasures it guards, and its interactions with nearby villages. Make it at least 1000 words long."
+            content: "What is 15 + 27? Think step by step. Then list 100 first names."
        }]
    };
    const response = await llm.complete(context, {
        ...options,
        signal: controller.signal
-    } as T);
+    });
    // If we get here without throwing, the abort didn't work
    expect(response.stopReason).toBe("error");
    expect(response.content.length).toBeGreaterThan(0);
    context.messages.push(response);
    context.messages.push({ role: "user", content: "Please continue." });
    // Ensure we can still make requests after abort
    const followUp = await llm.complete(context, options);
    expect(followUp.stopReason).toBe("stop");
    expect(followUp.content.length).toBeGreaterThan(0);
 }
-async function testImmediateAbort<T extends LLMOptions>(llm: LLM<T>) {
+async function testImmediateAbort<T extends LLMOptions>(llm: LLM<T>, options: T) {
    const controller = new AbortController();
    // Abort immediately
@ -38,8 +48,9 @@ async function testImmediateAbort<T extends LLMOptions>(llm: LLM<T>) {
    };
    const response = await llm.complete(context, {
        ...options,
        signal: controller.signal
-    } as T);
+    });
    expect(response.stopReason).toBe("error");
 }
@ -52,11 +63,11 @@ describe("AI Providers Abort Tests", () => {
        });
        it("should abort mid-stream", async () => {
-            await testAbortSignal(llm);
+            await testAbortSignal(llm, { thinking: { enabled: true } });
        });
        it("should handle immediate abort", async () => {
-            await testImmediateAbort(llm);
+            await testImmediateAbort(llm, { thinking: { enabled: true } });
        });
    });
@ -64,15 +75,15 @@ describe("AI Providers Abort Tests", () => {
        let llm: OpenAICompletionsLLM;
        beforeAll(() => {
-            llm = new OpenAICompletionsLLM(getModel("openai", "gpt-4o-mini")!, process.env.OPENAI_API_KEY!);
+            llm = new OpenAICompletionsLLM(getModel("openai", "gpt-5-mini")!, process.env.OPENAI_API_KEY!);
        });
        it("should abort mid-stream", async () => {
-            await testAbortSignal(llm);
+            await testAbortSignal(llm, { reasoningEffort: "medium"});
        });
        it("should handle immediate abort", async () => {
-            await testImmediateAbort(llm);
+            await testImmediateAbort(llm, { reasoningEffort: "medium" });
        });
    });
@ -88,27 +99,27 @@ describe("AI Providers Abort Tests", () => {
        });
        it("should abort mid-stream", async () => {
-            await testAbortSignal(llm);
+            await testAbortSignal(llm, {});
        });
        it("should handle immediate abort", async () => {
-            await testImmediateAbort(llm);
+            await testImmediateAbort(llm, {});
        });
    });
-    describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Abort", () => {
+    describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Abort", () => {
        let llm: AnthropicLLM;
        beforeAll(() => {
-            llm = new AnthropicLLM(getModel("anthropic", "claude-3-5-haiku-latest")!, process.env.ANTHROPIC_API_KEY!);
+            llm = new AnthropicLLM(getModel("anthropic", "claude-opus-4-1")!, process.env.ANTHROPIC_OAUTH_TOKEN!);
        });
        it("should abort mid-stream", async () => {
-            await testAbortSignal(llm);
+            await testAbortSignal(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
        });
        it("should handle immediate abort", async () => {
-            await testImmediateAbort(llm);
+            await testImmediateAbort(llm, {thinking: { enabled: true, budgetTokens: 2048 }});
        });
    });
 });