diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index aa979734..083b4b0b 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -3810,23 +3810,6 @@ export const MODELS = { contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, - "cohere/command-r-08-2024": { - id: "cohere/command-r-08-2024", - name: "Cohere: Command R (08-2024)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.15, - output: 0.6, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4000, - } satisfies Model<"openai-completions">, "cohere/command-r-plus-08-2024": { id: "cohere/command-r-plus-08-2024", name: "Cohere: Command R+ (08-2024)", @@ -3844,6 +3827,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4000, } satisfies Model<"openai-completions">, + "cohere/command-r-08-2024": { + id: "cohere/command-r-08-2024", + name: "Cohere: Command R (08-2024)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.15, + output: 0.6, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4000, + } satisfies Model<"openai-completions">, "sao10k/l3.1-euryale-70b": { id: "sao10k/l3.1-euryale-70b", name: "Sao10K: Llama 3.1 Euryale 70B v2.2", @@ -3912,23 +3912,6 @@ export const MODELS = { contextWindow: 16384, maxTokens: 16384, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-70b-instruct": { - id: "meta-llama/llama-3.1-70b-instruct", - name: "Meta: Llama 3.1 70B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.39999999999999997, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-405b-instruct": { id: "meta-llama/llama-3.1-405b-instruct", name: "Meta: Llama 3.1 405B Instruct", @@ -3946,6 +3929,23 @@ export const MODELS = { contextWindow: 32768, maxTokens: 16384, } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-70b-instruct": { + id: "meta-llama/llama-3.1-70b-instruct", + name: "Meta: Llama 3.1 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mistral-nemo": { id: "mistralai/mistral-nemo", name: "Mistral: Mistral Nemo", @@ -4065,23 +4065,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "meta-llama/llama-3-70b-instruct": { - id: "meta-llama/llama-3-70b-instruct", - name: "Meta: Llama 3 70B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.3, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 8192, - maxTokens: 16384, - } satisfies Model<"openai-completions">, "meta-llama/llama-3-8b-instruct": { id: "meta-llama/llama-3-8b-instruct", name: "Meta: Llama 3 8B Instruct", @@ -4099,6 +4082,23 @@ export const MODELS = { contextWindow: 8192, maxTokens: 16384, } satisfies Model<"openai-completions">, + "meta-llama/llama-3-70b-instruct": { + id: "meta-llama/llama-3-70b-instruct", + name: "Meta: Llama 3 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.3, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "mistralai/mixtral-8x22b-instruct": { id: "mistralai/mixtral-8x22b-instruct", name: "Mistral: Mixtral 8x22B Instruct", @@ -4133,23 +4133,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "mistralai/mistral-tiny": { - id: "mistralai/mistral-tiny", - name: "Mistral Tiny", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.25, - output: 0.25, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 32768, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "mistralai/mistral-small": { id: "mistralai/mistral-small", name: "Mistral Small", @@ -4167,6 +4150,23 @@ export const MODELS = { contextWindow: 32768, maxTokens: 4096, } satisfies Model<"openai-completions">, + "mistralai/mistral-tiny": { + id: "mistralai/mistral-tiny", + name: "Mistral Tiny", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.25, + output: 0.25, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 32768, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mixtral-8x7b-instruct": { id: "mistralai/mixtral-8x7b-instruct", name: "Mistral: Mixtral 8x7B Instruct", diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts index c2d20769..b7fedd7a 100644 --- a/packages/ai/src/providers/anthropic.ts +++ b/packages/ai/src/providers/anthropic.ts @@ -67,7 +67,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = ( const blocks = output.content as Block[]; for await (const event of anthropicStream) { - if (event.type === "content_block_start") { + if (event.type === "message_start") { + // Capture initial token usage from message_start event + // This ensures we have input token counts even if the stream is aborted early + output.usage.input = event.message.usage.input_tokens || 0; + output.usage.output = event.message.usage.output_tokens || 0; + output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0; + output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0; + calculateCost(model, output.usage); + } else if (event.type === "content_block_start") { if (event.content_block.type === "text") { const block: Block = { type: "text", @@ -186,10 +194,10 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = ( if (event.delta.stop_reason) { output.stopReason = mapStopReason(event.delta.stop_reason); } - output.usage.input += event.usage.input_tokens || 0; - output.usage.output += event.usage.output_tokens || 0; - output.usage.cacheRead += event.usage.cache_read_input_tokens || 0; - output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0; + output.usage.input = event.usage.input_tokens || 0; + output.usage.output = event.usage.output_tokens || 0; + output.usage.cacheRead = event.usage.cache_read_input_tokens || 0; + output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0; calculateCost(model, output.usage); } } diff --git a/packages/ai/test/tokens.test.ts b/packages/ai/test/tokens.test.ts new file mode 100644 index 00000000..322ebe9e --- /dev/null +++ b/packages/ai/test/tokens.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, it } from "vitest"; +import { getModel } from "../src/models.js"; +import { stream } from "../src/stream.js"; +import type { Api, Context, Model, OptionsForApi } from "../src/types.js"; + +async function testTokensOnAbort(llm: Model, options: OptionsForApi = {}) { + const context: Context = { + messages: [ + { + role: "user", + content: "Write a long poem with 10 stanzas about the beauty of nature.", + timestamp: Date.now(), + }, + ], + }; + + const controller = new AbortController(); + const response = stream(llm, context, { ...options, signal: controller.signal }); + + let abortFired = false; + for await (const event of response) { + if (!abortFired && (event.type === "text_delta" || event.type === "thinking_delta")) { + abortFired = true; + setTimeout(() => controller.abort(), 3000); + } + } + + const msg = await response.result(); + + expect(msg.stopReason).toBe("aborted"); + + // OpenAI providers only send usage in the final chunk, so when aborted they have no token stats + // Anthropic and Google send usage information early in the stream + if (llm.api === "openai-completions" || llm.api === "openai-responses") { + expect(msg.usage.input).toBe(0); + expect(msg.usage.output).toBe(0); + } else { + expect(msg.usage.input).toBeGreaterThan(0); + expect(msg.usage.output).toBeGreaterThan(0); + expect(msg.usage.cost.input).toBeGreaterThan(0); + expect(msg.usage.cost.total).toBeGreaterThan(0); + } +} + +describe("Token Statistics on Abort", () => { + describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => { + const llm = getModel("google", "gemini-2.5-flash"); + + it("should include token stats when aborted mid-stream", async () => { + await testTokensOnAbort(llm, { thinking: { enabled: true } }); + }, 10000); + }); + + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => { + const llm: Model<"openai-completions"> = { + ...getModel("openai", "gpt-4o-mini")!, + api: "openai-completions", + }; + + it("should include token stats when aborted mid-stream", async () => { + await testTokensOnAbort(llm); + }, 10000); + }); + + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => { + const llm = getModel("openai", "gpt-5-mini"); + + it("should include token stats when aborted mid-stream", async () => { + await testTokensOnAbort(llm); + }, 20000); + }); + + describe.skipIf(!process.env.ANTHROPIC_API_KEY && !process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider", () => { + const llm = getModel("anthropic", "claude-opus-4-1-20250805"); + + it("should include token stats when aborted mid-stream", async () => { + await testTokensOnAbort(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); + }, 10000); + }); +});