Add totalTokens field to Usage type

- Added totalTokens field to Usage interface in pi-ai
- Anthropic: computed as input + output + cacheRead + cacheWrite
- OpenAI/Google: uses native total_tokens/totalTokenCount
- Fixed openai-completions to compute totalTokens when reasoning tokens present
- Updated calculateContextTokens() to use totalTokens field
- Added comprehensive test covering 13 providers

fixes #130
This commit is contained in:
Mario Zechner 2025-12-06 22:46:02 +01:00
parent 52f1a8cb31
commit 86e5a70ec4
22 changed files with 552 additions and 70 deletions

View file

@ -2,6 +2,10 @@
## [Unreleased]
### Breaking Changes
- **Added `totalTokens` field to `Usage` type**: All code that constructs `Usage` objects must now include the `totalTokens` field. This field represents the total tokens processed by the LLM (input + output + cache). For OpenAI and Google, this uses native API values (`total_tokens`, `totalTokenCount`). For Anthropic, it's computed as `input + output + cacheRead + cacheWrite`.
## [0.12.10] - 2025-12-04
### Added

View file

@ -5255,23 +5255,6 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-405b-instruct": {
id: "meta-llama/llama-3.1-405b-instruct",
name: "Meta: Llama 3.1 405B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 3.5,
output: 3.5,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 130815,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-70b-instruct": {
id: "meta-llama/llama-3.1-70b-instruct",
name: "Meta: Llama 3.1 70B Instruct",
@ -5289,6 +5272,23 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3.1-405b-instruct": {
id: "meta-llama/llama-3.1-405b-instruct",
name: "Meta: Llama 3.1 405B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 3.5,
output: 3.5,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 130815,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-nemo": {
id: "mistralai/mistral-nemo",
name: "Mistral: Mistral Nemo",
@ -5306,9 +5306,9 @@ export const MODELS = {
contextWindow: 131072,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-mini-2024-07-18": {
id: "openai/gpt-4o-mini-2024-07-18",
name: "OpenAI: GPT-4o-mini (2024-07-18)",
"openai/gpt-4o-mini": {
id: "openai/gpt-4o-mini",
name: "OpenAI: GPT-4o-mini",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
@ -5323,9 +5323,9 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-mini": {
id: "openai/gpt-4o-mini",
name: "OpenAI: GPT-4o-mini",
"openai/gpt-4o-mini-2024-07-18": {
id: "openai/gpt-4o-mini-2024-07-18",
name: "OpenAI: GPT-4o-mini (2024-07-18)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
@ -5425,23 +5425,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4o-2024-05-13": {
id: "openai/gpt-4o-2024-05-13",
name: "OpenAI: GPT-4o (2024-05-13)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text", "image"],
cost: {
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4o": {
id: "openai/gpt-4o",
name: "OpenAI: GPT-4o",
@ -5476,22 +5459,22 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 64000,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3-70b-instruct": {
id: "meta-llama/llama-3-70b-instruct",
name: "Meta: Llama 3 70B Instruct",
"openai/gpt-4o-2024-05-13": {
id: "openai/gpt-4o-2024-05-13",
name: "OpenAI: GPT-4o (2024-05-13)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
input: ["text", "image"],
cost: {
input: 0.3,
output: 0.39999999999999997,
input: 5,
output: 15,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 16384,
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3-8b-instruct": {
id: "meta-llama/llama-3-8b-instruct",
@ -5510,6 +5493,23 @@ export const MODELS = {
contextWindow: 8192,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"meta-llama/llama-3-70b-instruct": {
id: "meta-llama/llama-3-70b-instruct",
name: "Meta: Llama 3 70B Instruct",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 0.3,
output: 0.39999999999999997,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 8192,
maxTokens: 16384,
} satisfies Model<"openai-completions">,
"mistralai/mixtral-8x22b-instruct": {
id: "mistralai/mixtral-8x22b-instruct",
name: "Mistral: Mixtral 8x22B Instruct",
@ -5595,23 +5595,6 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo-0613": {
id: "openai/gpt-3.5-turbo-0613",
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 4095,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-4-turbo-preview": {
id: "openai/gpt-4-turbo-preview",
name: "OpenAI: GPT-4 Turbo Preview",
@ -5629,6 +5612,23 @@ export const MODELS = {
contextWindow: 128000,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"openai/gpt-3.5-turbo-0613": {
id: "openai/gpt-3.5-turbo-0613",
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
api: "openai-completions",
provider: "openrouter",
baseUrl: "https://openrouter.ai/api/v1",
reasoning: false,
input: ["text"],
cost: {
input: 1,
output: 2,
cacheRead: 0,
cacheWrite: 0,
},
contextWindow: 4095,
maxTokens: 4096,
} satisfies Model<"openai-completions">,
"mistralai/mistral-tiny": {
id: "mistralai/mistral-tiny",
name: "Mistral Tiny",

View file

@ -105,6 +105,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
@ -129,6 +130,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
output.usage.output = event.message.usage.output_tokens || 0;
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
// Anthropic doesn't provide total_tokens, compute from components
output.usage.totalTokens =
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
calculateCost(model, output.usage);
} else if (event.type === "content_block_start") {
if (event.content_block.type === "text") {
@ -253,6 +257,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
output.usage.output = event.usage.output_tokens || 0;
output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
// Anthropic doesn't provide total_tokens, compute from components
output.usage.totalTokens =
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
calculateCost(model, output.usage);
}
}

View file

@ -56,6 +56,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
@ -200,6 +201,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
cacheWrite: 0,
totalTokens: chunk.usageMetadata.totalTokenCount || 0,
cost: {
input: 0,
output: 0,

View file

@ -50,6 +50,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
@ -106,14 +107,18 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
for await (const chunk of openaiStream) {
if (chunk.usage) {
const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
output.usage = {
// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
input: (chunk.usage.prompt_tokens || 0) - cachedTokens,
output:
(chunk.usage.completion_tokens || 0) +
(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
input,
output: outputTokens,
cacheRead: cachedTokens,
cacheWrite: 0,
// Compute totalTokens ourselves since we add reasoning_tokens to output
// and some providers (e.g., Groq) don't include them in total_tokens
totalTokens: input + outputTokens + cachedTokens,
cost: {
input: 0,
output: 0,

View file

@ -59,6 +59,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
@ -260,6 +261,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
output: response.usage.output_tokens || 0,
cacheRead: cachedTokens,
cacheWrite: 0,
totalTokens: response.usage.total_tokens || 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
}

View file

@ -82,6 +82,7 @@ export interface Usage {
output: number;
cacheRead: number;
cacheWrite: number;
totalTokens: number;
cost: {
input: number;
output: number;

View file

@ -92,6 +92,7 @@ async function testEmptyAssistantMessage<TApi extends Api>(llm: Model<TApi>, opt
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 10,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",

View file

@ -46,6 +46,7 @@ const providerContexts = {
output: 50,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 150,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",
@ -97,6 +98,7 @@ const providerContexts = {
output: 60,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 180,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",
@ -147,6 +149,7 @@ const providerContexts = {
output: 55,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 165,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",
@ -199,6 +202,7 @@ const providerContexts = {
output: 58,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 173,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",
@ -243,6 +247,7 @@ const providerContexts = {
output: 25,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 75,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "error",

View file

@ -0,0 +1,331 @@
/**
* Test totalTokens field across all providers.
*
* totalTokens represents the total number of tokens processed by the LLM,
* including input (with cache) and output (with thinking). This is the
* base for calculating context size for the next request.
*
* - OpenAI Completions: Uses native total_tokens field
* - OpenAI Responses: Uses native total_tokens field
* - Google: Uses native totalTokenCount field
* - Anthropic: Computed as input + output + cacheRead + cacheWrite
* - Other OpenAI-compatible providers: Uses native total_tokens field
*/
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
// Generate a long system prompt to trigger caching (>2k bytes for most providers)
const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses.
Here is some additional context that makes this system prompt long enough to trigger caching:
${Array(50)
.fill(
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.",
)
.join("\n\n")}
Remember: Always be helpful and concise.`;
async function testTotalTokensWithCache<TApi extends Api>(
llm: Model<TApi>,
options: OptionsForApi<TApi> = {} as OptionsForApi<TApi>,
): Promise<{ first: Usage; second: Usage }> {
// First request - no cache
const context1: Context = {
systemPrompt: LONG_SYSTEM_PROMPT,
messages: [
{
role: "user",
content: "What is 2 + 2? Reply with just the number.",
timestamp: Date.now(),
},
],
};
const response1 = await complete(llm, context1, options);
expect(response1.stopReason).toBe("stop");
// Second request - should trigger cache read (same system prompt, add conversation)
const context2: Context = {
systemPrompt: LONG_SYSTEM_PROMPT,
messages: [
...context1.messages,
response1, // Include previous assistant response
{
role: "user",
content: "What is 3 + 3? Reply with just the number.",
timestamp: Date.now(),
},
],
};
const response2 = await complete(llm, context2, options);
expect(response2.stopReason).toBe("stop");
return { first: response1.usage, second: response2.usage };
}
function logUsage(label: string, usage: Usage) {
const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
console.log(` ${label}:`);
console.log(
` input: ${usage.input}, output: ${usage.output}, cacheRead: ${usage.cacheRead}, cacheWrite: ${usage.cacheWrite}`,
);
console.log(` totalTokens: ${usage.totalTokens}, computed: ${computed}`);
}
function assertTotalTokensEqualsComponents(usage: Usage) {
const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
expect(usage.totalTokens).toBe(computed);
}
describe("totalTokens field", () => {
// =========================================================================
// Anthropic
// =========================================================================
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
it("claude-3-5-haiku - should return totalTokens equal to sum of components", async () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
console.log(`\nAnthropic / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
}, 60000);
});
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => {
it("claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
const llm = getModel("anthropic", "claude-sonnet-4-20250514");
console.log(`\nAnthropic OAuth / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_OAUTH_TOKEN });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
}, 60000);
});
// =========================================================================
// OpenAI
// =========================================================================
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
it("gpt-4o-mini - should return totalTokens equal to sum of components", async () => {
const llm: Model<"openai-completions"> = {
...getModel("openai", "gpt-4o-mini")!,
api: "openai-completions",
};
console.log(`\nOpenAI Completions / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
it("gpt-4o - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openai", "gpt-4o");
console.log(`\nOpenAI Responses / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
// =========================================================================
// Google
// =========================================================================
describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
it("gemini-2.0-flash - should return totalTokens equal to sum of components", async () => {
const llm = getModel("google", "gemini-2.0-flash");
console.log(`\nGoogle / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
// =========================================================================
// xAI
// =========================================================================
describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
it("grok-3-fast - should return totalTokens equal to sum of components", async () => {
const llm = getModel("xai", "grok-3-fast");
console.log(`\nxAI / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
// =========================================================================
// Groq
// =========================================================================
describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
it("openai/gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
const llm = getModel("groq", "openai/gpt-oss-120b");
console.log(`\nGroq / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
// =========================================================================
// Cerebras
// =========================================================================
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
it("gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
const llm = getModel("cerebras", "gpt-oss-120b");
console.log(`\nCerebras / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
// =========================================================================
// z.ai
// =========================================================================
describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
it("glm-4.5-flash - should return totalTokens equal to sum of components", async () => {
const llm = getModel("zai", "glm-4.5-flash");
console.log(`\nz.ai / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
// =========================================================================
// OpenRouter - Multiple backend providers
// =========================================================================
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
it("anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
it("deepseek/deepseek-chat - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "deepseek/deepseek-chat");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
it("mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
it("google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
it("meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
});

View file

@ -42,6 +42,7 @@ async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, option
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",
@ -126,6 +127,7 @@ async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, opt
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",
@ -213,6 +215,7 @@ async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, opt
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "toolUse",