mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-17 07:03:25 +00:00
Add totalTokens field to Usage type
- Added totalTokens field to Usage interface in pi-ai - Anthropic: computed as input + output + cacheRead + cacheWrite - OpenAI/Google: uses native total_tokens/totalTokenCount - Fixed openai-completions to compute totalTokens when reasoning tokens present - Updated calculateContextTokens() to use totalTokens field - Added comprehensive test covering 13 providers fixes #130
This commit is contained in:
parent
52f1a8cb31
commit
86e5a70ec4
22 changed files with 552 additions and 70 deletions
|
|
@ -92,6 +92,7 @@ async function testEmptyAssistantMessage<TApi extends Api>(llm: Model<TApi>, opt
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 10,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ const providerContexts = {
|
|||
output: 50,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 150,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -97,6 +98,7 @@ const providerContexts = {
|
|||
output: 60,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 180,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -147,6 +149,7 @@ const providerContexts = {
|
|||
output: 55,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 165,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -199,6 +202,7 @@ const providerContexts = {
|
|||
output: 58,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 173,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -243,6 +247,7 @@ const providerContexts = {
|
|||
output: 25,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 75,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "error",
|
||||
|
|
|
|||
331
packages/ai/test/total-tokens.test.ts
Normal file
331
packages/ai/test/total-tokens.test.ts
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
/**
|
||||
* Test totalTokens field across all providers.
|
||||
*
|
||||
* totalTokens represents the total number of tokens processed by the LLM,
|
||||
* including input (with cache) and output (with thinking). This is the
|
||||
* base for calculating context size for the next request.
|
||||
*
|
||||
* - OpenAI Completions: Uses native total_tokens field
|
||||
* - OpenAI Responses: Uses native total_tokens field
|
||||
* - Google: Uses native totalTokenCount field
|
||||
* - Anthropic: Computed as input + output + cacheRead + cacheWrite
|
||||
* - Other OpenAI-compatible providers: Uses native total_tokens field
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { getModel } from "../src/models.js";
|
||||
import { complete } from "../src/stream.js";
|
||||
import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
|
||||
|
||||
// Generate a long system prompt to trigger caching (>2k bytes for most providers)
|
||||
const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses.
|
||||
|
||||
Here is some additional context that makes this system prompt long enough to trigger caching:
|
||||
|
||||
${Array(50)
|
||||
.fill(
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.",
|
||||
)
|
||||
.join("\n\n")}
|
||||
|
||||
Remember: Always be helpful and concise.`;
|
||||
|
||||
async function testTotalTokensWithCache<TApi extends Api>(
|
||||
llm: Model<TApi>,
|
||||
options: OptionsForApi<TApi> = {} as OptionsForApi<TApi>,
|
||||
): Promise<{ first: Usage; second: Usage }> {
|
||||
// First request - no cache
|
||||
const context1: Context = {
|
||||
systemPrompt: LONG_SYSTEM_PROMPT,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "What is 2 + 2? Reply with just the number.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response1 = await complete(llm, context1, options);
|
||||
expect(response1.stopReason).toBe("stop");
|
||||
|
||||
// Second request - should trigger cache read (same system prompt, add conversation)
|
||||
const context2: Context = {
|
||||
systemPrompt: LONG_SYSTEM_PROMPT,
|
||||
messages: [
|
||||
...context1.messages,
|
||||
response1, // Include previous assistant response
|
||||
{
|
||||
role: "user",
|
||||
content: "What is 3 + 3? Reply with just the number.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response2 = await complete(llm, context2, options);
|
||||
expect(response2.stopReason).toBe("stop");
|
||||
|
||||
return { first: response1.usage, second: response2.usage };
|
||||
}
|
||||
|
||||
function logUsage(label: string, usage: Usage) {
|
||||
const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
|
||||
console.log(` ${label}:`);
|
||||
console.log(
|
||||
` input: ${usage.input}, output: ${usage.output}, cacheRead: ${usage.cacheRead}, cacheWrite: ${usage.cacheWrite}`,
|
||||
);
|
||||
console.log(` totalTokens: ${usage.totalTokens}, computed: ${computed}`);
|
||||
}
|
||||
|
||||
function assertTotalTokensEqualsComponents(usage: Usage) {
|
||||
const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
|
||||
expect(usage.totalTokens).toBe(computed);
|
||||
}
|
||||
|
||||
describe("totalTokens field", () => {
|
||||
// =========================================================================
|
||||
// Anthropic
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
|
||||
it("claude-3-5-haiku - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
|
||||
|
||||
console.log(`\nAnthropic / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
|
||||
// Anthropic should have cache activity
|
||||
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
|
||||
expect(hasCache).toBe(true);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => {
|
||||
it("claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("anthropic", "claude-sonnet-4-20250514");
|
||||
|
||||
console.log(`\nAnthropic OAuth / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_OAUTH_TOKEN });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
|
||||
// Anthropic should have cache activity
|
||||
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
|
||||
expect(hasCache).toBe(true);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// OpenAI
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
|
||||
it("gpt-4o-mini - should return totalTokens equal to sum of components", async () => {
|
||||
const llm: Model<"openai-completions"> = {
|
||||
...getModel("openai", "gpt-4o-mini")!,
|
||||
api: "openai-completions",
|
||||
};
|
||||
|
||||
console.log(`\nOpenAI Completions / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm);
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
|
||||
it("gpt-4o - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openai", "gpt-4o");
|
||||
|
||||
console.log(`\nOpenAI Responses / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm);
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Google
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
|
||||
it("gemini-2.0-flash - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("google", "gemini-2.0-flash");
|
||||
|
||||
console.log(`\nGoogle / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm);
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// xAI
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
|
||||
it("grok-3-fast - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("xai", "grok-3-fast");
|
||||
|
||||
console.log(`\nxAI / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Groq
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
|
||||
it("openai/gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("groq", "openai/gpt-oss-120b");
|
||||
|
||||
console.log(`\nGroq / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Cerebras
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
|
||||
it("gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("cerebras", "gpt-oss-120b");
|
||||
|
||||
console.log(`\nCerebras / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// z.ai
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
|
||||
it("glm-4.5-flash - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("zai", "glm-4.5-flash");
|
||||
|
||||
console.log(`\nz.ai / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// OpenRouter - Multiple backend providers
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
|
||||
it("anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
|
||||
it("deepseek/deepseek-chat - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "deepseek/deepseek-chat");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
|
||||
it("mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
|
||||
it("google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
|
||||
it("meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
});
|
||||
|
|
@ -42,6 +42,7 @@ async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, option
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -126,6 +127,7 @@ async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, opt
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -213,6 +215,7 @@ async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, opt
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue