diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts index 08791bc6..9897984d 100644 --- a/packages/agent/src/agent.ts +++ b/packages/agent/src/agent.ts @@ -335,6 +335,7 @@ export class Agent { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: this.abortController?.signal.aborted ? "aborted" : "error", diff --git a/packages/agent/src/transports/AppTransport.ts b/packages/agent/src/transports/AppTransport.ts index 9ef1e8ce..5beb9dc6 100644 --- a/packages/agent/src/transports/AppTransport.ts +++ b/packages/agent/src/transports/AppTransport.ts @@ -44,6 +44,7 @@ function streamSimpleProxy( output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, timestamp: Date.now(), diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 61da782e..2c59840d 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Breaking Changes + +- **Added `totalTokens` field to `Usage` type**: All code that constructs `Usage` objects must now include the `totalTokens` field. This field represents the total tokens processed by the LLM (input + output + cache). For OpenAI and Google, this uses native API values (`total_tokens`, `totalTokenCount`). For Anthropic, it's computed as `input + output + cacheRead + cacheWrite`. + ## [0.12.10] - 2025-12-04 ### Added diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 4e8b0b84..c8a738cd 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -5255,23 +5255,6 @@ export const MODELS = { contextWindow: 131072, maxTokens: 16384, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-405b-instruct": { - id: "meta-llama/llama-3.1-405b-instruct", - name: "Meta: Llama 3.1 405B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 3.5, - output: 3.5, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 130815, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-70b-instruct": { id: "meta-llama/llama-3.1-70b-instruct", name: "Meta: Llama 3.1 70B Instruct", @@ -5289,6 +5272,23 @@ export const MODELS = { contextWindow: 131072, maxTokens: 4096, } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-405b-instruct": { + id: "meta-llama/llama-3.1-405b-instruct", + name: "Meta: Llama 3.1 405B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 3.5, + output: 3.5, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 130815, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mistral-nemo": { id: "mistralai/mistral-nemo", name: "Mistral: Mistral Nemo", @@ -5306,9 +5306,9 @@ export const MODELS = { contextWindow: 131072, maxTokens: 16384, } satisfies Model<"openai-completions">, - "openai/gpt-4o-mini-2024-07-18": { - id: "openai/gpt-4o-mini-2024-07-18", - name: "OpenAI: GPT-4o-mini (2024-07-18)", + "openai/gpt-4o-mini": { + id: "openai/gpt-4o-mini", + name: "OpenAI: GPT-4o-mini", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -5323,9 +5323,9 @@ export const MODELS = { contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-completions">, - "openai/gpt-4o-mini": { - id: "openai/gpt-4o-mini", - name: "OpenAI: GPT-4o-mini", + "openai/gpt-4o-mini-2024-07-18": { + id: "openai/gpt-4o-mini-2024-07-18", + name: "OpenAI: GPT-4o-mini (2024-07-18)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", @@ -5425,23 +5425,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "openai/gpt-4o-2024-05-13": { - id: "openai/gpt-4o-2024-05-13", - name: "OpenAI: GPT-4o (2024-05-13)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 5, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "openai/gpt-4o": { id: "openai/gpt-4o", name: "OpenAI: GPT-4o", @@ -5476,22 +5459,22 @@ export const MODELS = { contextWindow: 128000, maxTokens: 64000, } satisfies Model<"openai-completions">, - "meta-llama/llama-3-70b-instruct": { - id: "meta-llama/llama-3-70b-instruct", - name: "Meta: Llama 3 70B Instruct", + "openai/gpt-4o-2024-05-13": { + id: "openai/gpt-4o-2024-05-13", + name: "OpenAI: GPT-4o (2024-05-13)", api: "openai-completions", provider: "openrouter", baseUrl: "https://openrouter.ai/api/v1", reasoning: false, - input: ["text"], + input: ["text", "image"], cost: { - input: 0.3, - output: 0.39999999999999997, + input: 5, + output: 15, cacheRead: 0, cacheWrite: 0, }, - contextWindow: 8192, - maxTokens: 16384, + contextWindow: 128000, + maxTokens: 4096, } satisfies Model<"openai-completions">, "meta-llama/llama-3-8b-instruct": { id: "meta-llama/llama-3-8b-instruct", @@ -5510,6 +5493,23 @@ export const MODELS = { contextWindow: 8192, maxTokens: 16384, } satisfies Model<"openai-completions">, + "meta-llama/llama-3-70b-instruct": { + id: "meta-llama/llama-3-70b-instruct", + name: "Meta: Llama 3 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.3, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 8192, + maxTokens: 16384, + } satisfies Model<"openai-completions">, "mistralai/mixtral-8x22b-instruct": { id: "mistralai/mixtral-8x22b-instruct", name: "Mistral: Mixtral 8x22B Instruct", @@ -5595,23 +5595,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "openai/gpt-3.5-turbo-0613": { - id: "openai/gpt-3.5-turbo-0613", - name: "OpenAI: GPT-3.5 Turbo (older v0613)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 4095, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "openai/gpt-4-turbo-preview": { id: "openai/gpt-4-turbo-preview", name: "OpenAI: GPT-4 Turbo Preview", @@ -5629,6 +5612,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "openai/gpt-3.5-turbo-0613": { + id: "openai/gpt-3.5-turbo-0613", + name: "OpenAI: GPT-3.5 Turbo (older v0613)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 4095, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mistral-tiny": { id: "mistralai/mistral-tiny", name: "Mistral Tiny", diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts index 39523886..e2e91be2 100644 --- a/packages/ai/src/providers/anthropic.ts +++ b/packages/ai/src/providers/anthropic.ts @@ -105,6 +105,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = ( output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "stop", @@ -129,6 +130,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = ( output.usage.output = event.message.usage.output_tokens || 0; output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0; output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0; + // Anthropic doesn't provide total_tokens, compute from components + output.usage.totalTokens = + output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; calculateCost(model, output.usage); } else if (event.type === "content_block_start") { if (event.content_block.type === "text") { @@ -253,6 +257,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = ( output.usage.output = event.usage.output_tokens || 0; output.usage.cacheRead = event.usage.cache_read_input_tokens || 0; output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0; + // Anthropic doesn't provide total_tokens, compute from components + output.usage.totalTokens = + output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite; calculateCost(model, output.usage); } } diff --git a/packages/ai/src/providers/google.ts b/packages/ai/src/providers/google.ts index 078bac7b..9d3ade4f 100644 --- a/packages/ai/src/providers/google.ts +++ b/packages/ai/src/providers/google.ts @@ -56,6 +56,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = ( output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "stop", @@ -200,6 +201,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = ( (chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0), cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0, cacheWrite: 0, + totalTokens: chunk.usageMetadata.totalTokenCount || 0, cost: { input: 0, output: 0, diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index 22f57503..a3c0a17e 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -50,6 +50,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = ( output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "stop", @@ -106,14 +107,18 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = ( for await (const chunk of openaiStream) { if (chunk.usage) { const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0; + const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0; + const input = (chunk.usage.prompt_tokens || 0) - cachedTokens; + const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens; output.usage = { // OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input - input: (chunk.usage.prompt_tokens || 0) - cachedTokens, - output: - (chunk.usage.completion_tokens || 0) + - (chunk.usage.completion_tokens_details?.reasoning_tokens || 0), + input, + output: outputTokens, cacheRead: cachedTokens, cacheWrite: 0, + // Compute totalTokens ourselves since we add reasoning_tokens to output + // and some providers (e.g., Groq) don't include them in total_tokens + totalTokens: input + outputTokens + cachedTokens, cost: { input: 0, output: 0, diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts index 45569b38..76a582be 100644 --- a/packages/ai/src/providers/openai-responses.ts +++ b/packages/ai/src/providers/openai-responses.ts @@ -59,6 +59,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = ( output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "stop", @@ -260,6 +261,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = ( output: response.usage.output_tokens || 0, cacheRead: cachedTokens, cacheWrite: 0, + totalTokens: response.usage.total_tokens || 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; } diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index f53b4366..a7269bc8 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -82,6 +82,7 @@ export interface Usage { output: number; cacheRead: number; cacheWrite: number; + totalTokens: number; cost: { input: number; output: number; diff --git a/packages/ai/test/empty.test.ts b/packages/ai/test/empty.test.ts index 0d0a8a54..cff10612 100644 --- a/packages/ai/test/empty.test.ts +++ b/packages/ai/test/empty.test.ts @@ -92,6 +92,7 @@ async function testEmptyAssistantMessage(llm: Model, opt output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 10, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "stop", diff --git a/packages/ai/test/handoff.test.ts b/packages/ai/test/handoff.test.ts index fad942c6..5504b71c 100644 --- a/packages/ai/test/handoff.test.ts +++ b/packages/ai/test/handoff.test.ts @@ -46,6 +46,7 @@ const providerContexts = { output: 50, cacheRead: 0, cacheWrite: 0, + totalTokens: 150, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "toolUse", @@ -97,6 +98,7 @@ const providerContexts = { output: 60, cacheRead: 0, cacheWrite: 0, + totalTokens: 180, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "toolUse", @@ -147,6 +149,7 @@ const providerContexts = { output: 55, cacheRead: 0, cacheWrite: 0, + totalTokens: 165, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "toolUse", @@ -199,6 +202,7 @@ const providerContexts = { output: 58, cacheRead: 0, cacheWrite: 0, + totalTokens: 173, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "toolUse", @@ -243,6 +247,7 @@ const providerContexts = { output: 25, cacheRead: 0, cacheWrite: 0, + totalTokens: 75, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "error", diff --git a/packages/ai/test/total-tokens.test.ts b/packages/ai/test/total-tokens.test.ts new file mode 100644 index 00000000..8dc18971 --- /dev/null +++ b/packages/ai/test/total-tokens.test.ts @@ -0,0 +1,331 @@ +/** + * Test totalTokens field across all providers. + * + * totalTokens represents the total number of tokens processed by the LLM, + * including input (with cache) and output (with thinking). This is the + * base for calculating context size for the next request. + * + * - OpenAI Completions: Uses native total_tokens field + * - OpenAI Responses: Uses native total_tokens field + * - Google: Uses native totalTokenCount field + * - Anthropic: Computed as input + output + cacheRead + cacheWrite + * - Other OpenAI-compatible providers: Uses native total_tokens field + */ + +import { describe, expect, it } from "vitest"; +import { getModel } from "../src/models.js"; +import { complete } from "../src/stream.js"; +import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js"; + +// Generate a long system prompt to trigger caching (>2k bytes for most providers) +const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses. + +Here is some additional context that makes this system prompt long enough to trigger caching: + +${Array(50) + .fill( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.", + ) + .join("\n\n")} + +Remember: Always be helpful and concise.`; + +async function testTotalTokensWithCache( + llm: Model, + options: OptionsForApi = {} as OptionsForApi, +): Promise<{ first: Usage; second: Usage }> { + // First request - no cache + const context1: Context = { + systemPrompt: LONG_SYSTEM_PROMPT, + messages: [ + { + role: "user", + content: "What is 2 + 2? Reply with just the number.", + timestamp: Date.now(), + }, + ], + }; + + const response1 = await complete(llm, context1, options); + expect(response1.stopReason).toBe("stop"); + + // Second request - should trigger cache read (same system prompt, add conversation) + const context2: Context = { + systemPrompt: LONG_SYSTEM_PROMPT, + messages: [ + ...context1.messages, + response1, // Include previous assistant response + { + role: "user", + content: "What is 3 + 3? Reply with just the number.", + timestamp: Date.now(), + }, + ], + }; + + const response2 = await complete(llm, context2, options); + expect(response2.stopReason).toBe("stop"); + + return { first: response1.usage, second: response2.usage }; +} + +function logUsage(label: string, usage: Usage) { + const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite; + console.log(` ${label}:`); + console.log( + ` input: ${usage.input}, output: ${usage.output}, cacheRead: ${usage.cacheRead}, cacheWrite: ${usage.cacheWrite}`, + ); + console.log(` totalTokens: ${usage.totalTokens}, computed: ${computed}`); +} + +function assertTotalTokensEqualsComponents(usage: Usage) { + const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite; + expect(usage.totalTokens).toBe(computed); +} + +describe("totalTokens field", () => { + // ========================================================================= + // Anthropic + // ========================================================================= + + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => { + it("claude-3-5-haiku - should return totalTokens equal to sum of components", async () => { + const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); + + console.log(`\nAnthropic / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + + // Anthropic should have cache activity + const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0; + expect(hasCache).toBe(true); + }, 60000); + }); + + describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => { + it("claude-sonnet-4 - should return totalTokens equal to sum of components", async () => { + const llm = getModel("anthropic", "claude-sonnet-4-20250514"); + + console.log(`\nAnthropic OAuth / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_OAUTH_TOKEN }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + + // Anthropic should have cache activity + const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0; + expect(hasCache).toBe(true); + }, 60000); + }); + + // ========================================================================= + // OpenAI + // ========================================================================= + + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => { + it("gpt-4o-mini - should return totalTokens equal to sum of components", async () => { + const llm: Model<"openai-completions"> = { + ...getModel("openai", "gpt-4o-mini")!, + api: "openai-completions", + }; + + console.log(`\nOpenAI Completions / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + }); + + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => { + it("gpt-4o - should return totalTokens equal to sum of components", async () => { + const llm = getModel("openai", "gpt-4o"); + + console.log(`\nOpenAI Responses / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + }); + + // ========================================================================= + // Google + // ========================================================================= + + describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => { + it("gemini-2.0-flash - should return totalTokens equal to sum of components", async () => { + const llm = getModel("google", "gemini-2.0-flash"); + + console.log(`\nGoogle / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + }); + + // ========================================================================= + // xAI + // ========================================================================= + + describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => { + it("grok-3-fast - should return totalTokens equal to sum of components", async () => { + const llm = getModel("xai", "grok-3-fast"); + + console.log(`\nxAI / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + }); + + // ========================================================================= + // Groq + // ========================================================================= + + describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => { + it("openai/gpt-oss-120b - should return totalTokens equal to sum of components", async () => { + const llm = getModel("groq", "openai/gpt-oss-120b"); + + console.log(`\nGroq / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + }); + + // ========================================================================= + // Cerebras + // ========================================================================= + + describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => { + it("gpt-oss-120b - should return totalTokens equal to sum of components", async () => { + const llm = getModel("cerebras", "gpt-oss-120b"); + + console.log(`\nCerebras / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + }); + + // ========================================================================= + // z.ai + // ========================================================================= + + describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => { + it("glm-4.5-flash - should return totalTokens equal to sum of components", async () => { + const llm = getModel("zai", "glm-4.5-flash"); + + console.log(`\nz.ai / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + }); + + // ========================================================================= + // OpenRouter - Multiple backend providers + // ========================================================================= + + describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => { + it("anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", async () => { + const llm = getModel("openrouter", "anthropic/claude-sonnet-4"); + + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + + it("deepseek/deepseek-chat - should return totalTokens equal to sum of components", async () => { + const llm = getModel("openrouter", "deepseek/deepseek-chat"); + + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + + it("mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", async () => { + const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct"); + + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + + it("google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", async () => { + const llm = getModel("openrouter", "google/gemini-2.0-flash-001"); + + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + + it("meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", async () => { + const llm = getModel("openrouter", "meta-llama/llama-4-maverick"); + + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, 60000); + }); +}); diff --git a/packages/ai/test/unicode-surrogate.test.ts b/packages/ai/test/unicode-surrogate.test.ts index c52a311a..d77a2623 100644 --- a/packages/ai/test/unicode-surrogate.test.ts +++ b/packages/ai/test/unicode-surrogate.test.ts @@ -42,6 +42,7 @@ async function testEmojiInToolResults(llm: Model, option output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "toolUse", @@ -126,6 +127,7 @@ async function testRealWorldLinkedInData(llm: Model, opt output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "toolUse", @@ -213,6 +215,7 @@ async function testUnpairedHighSurrogate(llm: Model, opt output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "toolUse", diff --git a/packages/coding-agent/src/compaction.ts b/packages/coding-agent/src/compaction.ts index 3756718d..34ec7f1f 100644 --- a/packages/coding-agent/src/compaction.ts +++ b/packages/coding-agent/src/compaction.ts @@ -32,9 +32,10 @@ export const DEFAULT_COMPACTION_SETTINGS: CompactionSettings = { /** * Calculate total context tokens from usage. + * Uses the native totalTokens field when available, falls back to computing from components. */ export function calculateContextTokens(usage: Usage): number { - return usage.input + usage.output + usage.cacheRead + usage.cacheWrite; + return usage.totalTokens || usage.input + usage.output + usage.cacheRead + usage.cacheWrite; } /** diff --git a/packages/coding-agent/test/compaction.test.ts b/packages/coding-agent/test/compaction.test.ts index 1153bd29..fea8649f 100644 --- a/packages/coding-agent/test/compaction.test.ts +++ b/packages/coding-agent/test/compaction.test.ts @@ -38,6 +38,7 @@ function createMockUsage(input: number, output: number, cacheRead = 0, cacheWrit output, cacheRead, cacheWrite, + totalTokens: input + output + cacheRead + cacheWrite, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; } diff --git a/packages/web-ui/example/src/main.ts b/packages/web-ui/example/src/main.ts index 5d51352b..7c7f4d9f 100644 --- a/packages/web-ui/example/src/main.ts +++ b/packages/web-ui/example/src/main.ts @@ -131,6 +131,7 @@ const saveSession = async () => { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, diff --git a/packages/web-ui/src/agent/agent.ts b/packages/web-ui/src/agent/agent.ts index cdcebe42..893354ca 100644 --- a/packages/web-ui/src/agent/agent.ts +++ b/packages/web-ui/src/agent/agent.ts @@ -308,6 +308,7 @@ export class Agent { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: this.abortController?.signal.aborted ? "aborted" : "error", diff --git a/packages/web-ui/src/agent/transports/AppTransport.ts b/packages/web-ui/src/agent/transports/AppTransport.ts index 810f78c1..0d5135a8 100644 --- a/packages/web-ui/src/agent/transports/AppTransport.ts +++ b/packages/web-ui/src/agent/transports/AppTransport.ts @@ -46,6 +46,7 @@ function streamSimpleProxy( output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, timestamp: Date.now(), diff --git a/packages/web-ui/src/components/AgentInterface.ts b/packages/web-ui/src/components/AgentInterface.ts index 5b964d8e..3d44faa3 100644 --- a/packages/web-ui/src/components/AgentInterface.ts +++ b/packages/web-ui/src/components/AgentInterface.ts @@ -266,6 +266,7 @@ export class AgentInterface extends LitElement { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, } satisfies Usage, ); diff --git a/packages/web-ui/src/storage/stores/sessions-store.ts b/packages/web-ui/src/storage/stores/sessions-store.ts index aed3dbef..40a34edb 100644 --- a/packages/web-ui/src/storage/stores/sessions-store.ts +++ b/packages/web-ui/src/storage/stores/sessions-store.ts @@ -101,6 +101,7 @@ export class SessionsStore extends Store { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, thinkingLevel: state.thinkingLevel || "off", diff --git a/packages/web-ui/src/storage/types.ts b/packages/web-ui/src/storage/types.ts index e6bdd628..038f9657 100644 --- a/packages/web-ui/src/storage/types.ts +++ b/packages/web-ui/src/storage/types.ts @@ -118,6 +118,8 @@ export interface SessionMetadata { cacheRead: number; /** Total cache write tokens */ cacheWrite: number; + /** Total tokens processed */ + totalTokens: number; /** Total cost breakdown */ cost: { input: number; diff --git a/packages/web-ui/src/utils/test-sessions.ts b/packages/web-ui/src/utils/test-sessions.ts index 37154b99..5d54c093 100644 --- a/packages/web-ui/src/utils/test-sessions.ts +++ b/packages/web-ui/src/utils/test-sessions.ts @@ -56,11 +56,13 @@ export const simpleHtml = { output: 375, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.0030632000000000003, output: 0.0015, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.0045632, }, }, @@ -89,11 +91,13 @@ export const simpleHtml = { output: 162, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.003376, output: 0.0006479999999999999, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.004024, }, }, @@ -159,11 +163,13 @@ export const longSession = { output: 455, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.0030632000000000003, output: 0.00182, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.004883200000000001, }, }, @@ -192,11 +198,13 @@ export const longSession = { output: 147, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.0034384000000000003, output: 0.000588, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.0040264, }, }, @@ -235,11 +243,13 @@ export const longSession = { output: 96, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.0035656000000000004, output: 0.000384, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.0039496, }, }, @@ -267,11 +277,13 @@ export const longSession = { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0, }, }, @@ -312,11 +324,13 @@ export const longSession = { output: 115, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.0049456000000000005, output: 0.00045999999999999996, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.005405600000000001, }, }, @@ -348,11 +362,13 @@ export const longSession = { output: 86, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.0050696000000000005, output: 0.00034399999999999996, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.0054136, }, }, @@ -391,11 +407,13 @@ export const longSession = { output: 294, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.005151200000000001, output: 0.001176, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.006327200000000001, }, }, @@ -428,11 +446,13 @@ export const longSession = { output: 159, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.0054152, output: 0.000636, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.0060512000000000005, }, }, @@ -471,11 +491,13 @@ export const longSession = { output: 379, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.005566400000000001, output: 0.001516, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.007082400000000001, }, }, @@ -516,11 +538,13 @@ export const longSession = { output: 537, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.005900000000000001, output: 0.0021479999999999997, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.008048, }, }, @@ -547,11 +571,13 @@ export const longSession = { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0, }, }, @@ -583,11 +609,13 @@ export const longSession = { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0, }, }, @@ -627,11 +655,13 @@ export const longSession = { output: 492, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.024597, output: 0.00738, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.031977, }, }, @@ -672,11 +702,13 @@ export const longSession = { output: 213, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.026211, output: 0.003195, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.029406, }, }, @@ -709,11 +741,13 @@ export const longSession = { output: 134, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.026958, output: 0.00201, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.028968, }, }, @@ -752,11 +786,13 @@ export const longSession = { output: 331, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.02739, output: 0.004965, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.032355, }, }, @@ -788,11 +824,13 @@ export const longSession = { output: 53, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.028443, output: 0.000795, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.029238, }, }, @@ -831,11 +869,13 @@ export const longSession = { output: 329, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.028623, output: 0.004935, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.033558, }, }, @@ -867,11 +907,13 @@ export const longSession = { output: 46, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.029670000000000002, output: 0.00069, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.03036, }, }, @@ -897,11 +939,13 @@ export const longSession = { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0, }, }, @@ -937,11 +981,13 @@ export const longSession = { output: 285, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.029856, output: 0.004275, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.034131, }, }, @@ -974,11 +1020,13 @@ export const longSession = { output: 39, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.030831, output: 0.000585, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.031416, }, }, @@ -1017,11 +1065,13 @@ export const longSession = { output: 473, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.030993, output: 0.007095000000000001, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.038088, }, }, @@ -1048,11 +1098,13 @@ export const longSession = { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0, }, }, @@ -1088,11 +1140,13 @@ export const longSession = { output: 348, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.032556, output: 0.00522, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.037776000000000004, }, }, @@ -1133,11 +1187,13 @@ export const longSession = { output: 310, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.033942, output: 0.0046500000000000005, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.038592, }, }, @@ -1170,11 +1226,13 @@ export const longSession = { output: 53, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.034977, output: 0.000795, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.035772, }, }, @@ -1213,11 +1271,13 @@ export const longSession = { output: 423, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.035160000000000004, output: 0.006345, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.041505, }, }, @@ -1258,11 +1318,13 @@ export const longSession = { output: 193, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.036651, output: 0.002895, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.039546000000000005, }, }, @@ -1295,11 +1357,13 @@ export const longSession = { output: 104, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.037557, output: 0.00156, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.039117, }, }, @@ -1334,11 +1398,13 @@ export const longSession = { output: 146, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.037911, output: 0.00219, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.040101, }, }, @@ -1371,11 +1437,13 @@ export const longSession = { output: 63, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.038535, output: 0.000945, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.03948, }, }, @@ -1401,11 +1469,13 @@ export const longSession = { output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0, }, }, @@ -1445,11 +1515,13 @@ export const longSession = { output: 324, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.038823, output: 0.00486, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.043683, }, }, @@ -1490,11 +1562,13 @@ export const longSession = { output: 385, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.040605, output: 0.005775, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.046380000000000005, }, }, @@ -1531,11 +1605,13 @@ export const longSession = { output: 436, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.043749, output: 0.00654, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.050289, }, }, @@ -1571,11 +1647,13 @@ export const longSession = { output: 685, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.045105, output: 0.010275, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.05538, }, }, @@ -1615,11 +1693,13 @@ export const longSession = { output: 683, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.047214, output: 0.010245, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.057458999999999996, }, }, @@ -1664,11 +1744,13 @@ export const longSession = { output: 3462, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.058758000000000005, output: 0.051930000000000004, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.11068800000000001, }, }, @@ -1697,11 +1779,13 @@ export const longSession = { output: 223, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.069195, output: 0.003345, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.07254000000000001, }, }, @@ -1740,11 +1824,13 @@ export const longSession = { output: 335, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.06991800000000001, output: 0.005025, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.07494300000000001, }, }, @@ -1785,11 +1871,13 @@ export const longSession = { output: 499, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.075036, output: 0.007485, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.08252100000000001, }, }, @@ -1830,11 +1918,13 @@ export const longSession = { output: 462, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.078387, output: 0.00693, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.085317, }, }, @@ -1875,11 +1965,13 @@ export const longSession = { output: 431, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.079914, output: 0.006465, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.086379, }, }, @@ -1920,11 +2012,13 @@ export const longSession = { output: 335, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.083382, output: 0.005025, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.088407, }, }, @@ -1969,11 +2063,13 @@ export const longSession = { output: 1209, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.08655600000000001, output: 0.018135000000000002, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.104691, }, }, @@ -2002,11 +2098,13 @@ export const longSession = { output: 249, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.09024, output: 0.003735, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.093975, }, }, @@ -2045,11 +2143,13 @@ export const longSession = { output: 279, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.091008, output: 0.004185, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.095193, }, }, @@ -2078,11 +2178,13 @@ export const longSession = { output: 54, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.091893, output: 0.0008100000000000001, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.09270300000000001, }, }, @@ -2121,11 +2223,13 @@ export const longSession = { output: 162, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.092097, output: 0.00243, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.094527, }, }, @@ -2155,11 +2259,13 @@ export const longSession = { output: 67, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.09271800000000001, output: 0.001005, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.09372300000000001, }, }, @@ -2199,11 +2305,13 @@ export const longSession = { output: 182, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.092937, output: 0.0027300000000000002, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.095667, }, }, @@ -2233,11 +2341,13 @@ export const longSession = { output: 33, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, cost: { input: 0.093642, output: 0.000495, cacheRead: 0, cacheWrite: 0, + totalTokens: 0, total: 0.094137, }, },