Fix token statistics on abort for Anthropic provider

- Add handling for message_start event to capture initial token usage - Fix message_delta to use assignment (=) instead of addition (+=) since Anthropic sends cumulative token counts, not incremental - Add comprehensive tests for all providers (Google, OpenAI Completions, OpenAI Responses, Anthropic) - Document OpenAI limitation: token stats only available at stream end Fixes issue where aborted streams had zero token counts despite Anthropic sending input tokens in the initial message_start event.
2026-04-15 06:04:40 +00:00 · 2025-10-26 21:22:24 +01:00 · 2025-10-26 21:22:24 +01:00 · bc8d994a7b
commit bc8d994a7b
parent 23be934a9a
3 changed files with 161 additions and 73 deletions
--- a/packages/ai/src/models.generated.ts
+++ b/packages/ai/src/models.generated.ts
@ -3810,23 +3810,6 @@ export const MODELS = {
 			contextWindow: 32768,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"cohere/command-r-08-2024": {
-			id: "cohere/command-r-08-2024",
-			name: "Cohere: Command R (08-2024)",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.15,
-				output: 0.6,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 128000,
-			maxTokens: 4000,
-		} satisfies Model<"openai-completions">,
 		"cohere/command-r-plus-08-2024": {
 			id: "cohere/command-r-plus-08-2024",
 			name: "Cohere: Command R+ (08-2024)",
@ -3844,6 +3827,23 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4000,
 		} satisfies Model<"openai-completions">,
+		"cohere/command-r-08-2024": {
+			id: "cohere/command-r-08-2024",
+			name: "Cohere: Command R (08-2024)",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.15,
+				output: 0.6,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 128000,
+			maxTokens: 4000,
+		} satisfies Model<"openai-completions">,
 		"sao10k/l3.1-euryale-70b": {
 			id: "sao10k/l3.1-euryale-70b",
 			name: "Sao10K: Llama 3.1 Euryale 70B v2.2",
@ -3912,23 +3912,6 @@ export const MODELS = {
 			contextWindow: 16384,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3.1-70b-instruct": {
-			id: "meta-llama/llama-3.1-70b-instruct",
-			name: "Meta: Llama 3.1 70B Instruct",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.39999999999999997,
-				output: 0.39999999999999997,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 131072,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3.1-405b-instruct": {
 			id: "meta-llama/llama-3.1-405b-instruct",
 			name: "Meta: Llama 3.1 405B Instruct",
@ -3946,6 +3929,23 @@ export const MODELS = {
 			contextWindow: 32768,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
+		"meta-llama/llama-3.1-70b-instruct": {
+			id: "meta-llama/llama-3.1-70b-instruct",
+			name: "Meta: Llama 3.1 70B Instruct",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.39999999999999997,
+				output: 0.39999999999999997,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 131072,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-nemo": {
 			id: "mistralai/mistral-nemo",
 			name: "Mistral: Mistral Nemo",
@ -4065,23 +4065,6 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"meta-llama/llama-3-70b-instruct": {
-			id: "meta-llama/llama-3-70b-instruct",
-			name: "Meta: Llama 3 70B Instruct",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.3,
-				output: 0.39999999999999997,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 8192,
-			maxTokens: 16384,
-		} satisfies Model<"openai-completions">,
 		"meta-llama/llama-3-8b-instruct": {
 			id: "meta-llama/llama-3-8b-instruct",
 			name: "Meta: Llama 3 8B Instruct",
@ -4099,6 +4082,23 @@ export const MODELS = {
 			contextWindow: 8192,
 			maxTokens: 16384,
 		} satisfies Model<"openai-completions">,
+		"meta-llama/llama-3-70b-instruct": {
+			id: "meta-llama/llama-3-70b-instruct",
+			name: "Meta: Llama 3 70B Instruct",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.3,
+				output: 0.39999999999999997,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 8192,
+			maxTokens: 16384,
+		} satisfies Model<"openai-completions">,
 		"mistralai/mixtral-8x22b-instruct": {
 			id: "mistralai/mixtral-8x22b-instruct",
 			name: "Mistral: Mixtral 8x22B Instruct",
@ -4133,23 +4133,6 @@ export const MODELS = {
 			contextWindow: 128000,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
-		"mistralai/mistral-tiny": {
-			id: "mistralai/mistral-tiny",
-			name: "Mistral Tiny",
-			api: "openai-completions",
-			provider: "openrouter",
-			baseUrl: "https://openrouter.ai/api/v1",
-			reasoning: false,
-			input: ["text"],
-			cost: {
-				input: 0.25,
-				output: 0.25,
-				cacheRead: 0,
-				cacheWrite: 0,
-			},
-			contextWindow: 32768,
-			maxTokens: 4096,
-		} satisfies Model<"openai-completions">,
 		"mistralai/mistral-small": {
 			id: "mistralai/mistral-small",
 			name: "Mistral Small",
@ -4167,6 +4150,23 @@ export const MODELS = {
 			contextWindow: 32768,
 			maxTokens: 4096,
 		} satisfies Model<"openai-completions">,
+		"mistralai/mistral-tiny": {
+			id: "mistralai/mistral-tiny",
+			name: "Mistral Tiny",
+			api: "openai-completions",
+			provider: "openrouter",
+			baseUrl: "https://openrouter.ai/api/v1",
+			reasoning: false,
+			input: ["text"],
+			cost: {
+				input: 0.25,
+				output: 0.25,
+				cacheRead: 0,
+				cacheWrite: 0,
+			},
+			contextWindow: 32768,
+			maxTokens: 4096,
+		} satisfies Model<"openai-completions">,
 		"mistralai/mixtral-8x7b-instruct": {
 			id: "mistralai/mixtral-8x7b-instruct",
 			name: "Mistral: Mixtral 8x7B Instruct",
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@ -67,7 +67,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			const blocks = output.content as Block[];

 			for await (const event of anthropicStream) {
-				if (event.type === "content_block_start") {
+				if (event.type === "message_start") {
+					// Capture initial token usage from message_start event
+					// This ensures we have input token counts even if the stream is aborted early
+					output.usage.input = event.message.usage.input_tokens || 0;
+					output.usage.output = event.message.usage.output_tokens || 0;
+					output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
+					output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
+					calculateCost(model, output.usage);
+				} else if (event.type === "content_block_start") {
 					if (event.content_block.type === "text") {
 						const block: Block = {
 							type: "text",
@ -186,10 +194,10 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 					if (event.delta.stop_reason) {
 						output.stopReason = mapStopReason(event.delta.stop_reason);
 					}
-					output.usage.input += event.usage.input_tokens || 0;
-					output.usage.output += event.usage.output_tokens || 0;
-					output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
-					output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
+					output.usage.input = event.usage.input_tokens || 0;
+					output.usage.output = event.usage.output_tokens || 0;
+					output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
+					output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
 					calculateCost(model, output.usage);
 				}
 			}
--- a/packages/ai/test/tokens.test.ts
+++ b/packages/ai/test/tokens.test.ts
@ -0,0 +1,80 @@
+import { describe, expect, it } from "vitest";
+import { getModel } from "../src/models.js";
+import { stream } from "../src/stream.js";
+import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
+
+async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
+	const context: Context = {
+		messages: [
+			{
+				role: "user",
+				content: "Write a long poem with 10 stanzas about the beauty of nature.",
+				timestamp: Date.now(),
+			},
+		],
+	};
+
+	const controller = new AbortController();
+	const response = stream(llm, context, { ...options, signal: controller.signal });
+
+	let abortFired = false;
+	for await (const event of response) {
+		if (!abortFired && (event.type === "text_delta" || event.type === "thinking_delta")) {
+			abortFired = true;
+			setTimeout(() => controller.abort(), 3000);
+		}
+	}
+
+	const msg = await response.result();
+
+	expect(msg.stopReason).toBe("aborted");
+
+	// OpenAI providers only send usage in the final chunk, so when aborted they have no token stats
+	// Anthropic and Google send usage information early in the stream
+	if (llm.api === "openai-completions" || llm.api === "openai-responses") {
+		expect(msg.usage.input).toBe(0);
+		expect(msg.usage.output).toBe(0);
+	} else {
+		expect(msg.usage.input).toBeGreaterThan(0);
+		expect(msg.usage.output).toBeGreaterThan(0);
+		expect(msg.usage.cost.input).toBeGreaterThan(0);
+		expect(msg.usage.cost.total).toBeGreaterThan(0);
+	}
+}
+
+describe("Token Statistics on Abort", () => {
+	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => {
+		const llm = getModel("google", "gemini-2.5-flash");
+
+		it("should include token stats when aborted mid-stream", async () => {
+			await testTokensOnAbort(llm, { thinking: { enabled: true } });
+		}, 10000);
+	});
+
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => {
+		const llm: Model<"openai-completions"> = {
+			...getModel("openai", "gpt-4o-mini")!,
+			api: "openai-completions",
+		};
+
+		it("should include token stats when aborted mid-stream", async () => {
+			await testTokensOnAbort(llm);
+		}, 10000);
+	});
+
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => {
+		const llm = getModel("openai", "gpt-5-mini");
+
+		it("should include token stats when aborted mid-stream", async () => {
+			await testTokensOnAbort(llm);
+		}, 20000);
+	});
+
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY && !process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider", () => {
+		const llm = getModel("anthropic", "claude-opus-4-1-20250805");
+
+		it("should include token stats when aborted mid-stream", async () => {
+			await testTokensOnAbort(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
+		}, 10000);
+	});
+});