Add totalTokens field to Usage type

- Added totalTokens field to Usage interface in pi-ai
- Anthropic: computed as input + output + cacheRead + cacheWrite
- OpenAI/Google: uses native total_tokens/totalTokenCount
- Fixed openai-completions to compute totalTokens when reasoning tokens present
- Updated calculateContextTokens() to use totalTokens field
- Added comprehensive test covering 13 providers

fixes #130
This commit is contained in:
Mario Zechner 2025-12-06 22:46:02 +01:00
parent 52f1a8cb31
commit 86e5a70ec4
22 changed files with 552 additions and 70 deletions

View file

@ -105,6 +105,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
@ -129,6 +130,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
output.usage.output = event.message.usage.output_tokens || 0;
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
// Anthropic doesn't provide total_tokens, compute from components
output.usage.totalTokens =
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
calculateCost(model, output.usage);
} else if (event.type === "content_block_start") {
if (event.content_block.type === "text") {
@ -253,6 +257,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
output.usage.output = event.usage.output_tokens || 0;
output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
// Anthropic doesn't provide total_tokens, compute from components
output.usage.totalTokens =
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
calculateCost(model, output.usage);
}
}

View file

@ -56,6 +56,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
@ -200,6 +201,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
cacheWrite: 0,
totalTokens: chunk.usageMetadata.totalTokenCount || 0,
cost: {
input: 0,
output: 0,

View file

@ -50,6 +50,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
@ -106,14 +107,18 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
for await (const chunk of openaiStream) {
if (chunk.usage) {
const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
output.usage = {
// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
input: (chunk.usage.prompt_tokens || 0) - cachedTokens,
output:
(chunk.usage.completion_tokens || 0) +
(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
input,
output: outputTokens,
cacheRead: cachedTokens,
cacheWrite: 0,
// Compute totalTokens ourselves since we add reasoning_tokens to output
// and some providers (e.g., Groq) don't include them in total_tokens
totalTokens: input + outputTokens + cachedTokens,
cost: {
input: 0,
output: 0,

View file

@ -59,6 +59,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
@ -260,6 +261,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
output: response.usage.output_tokens || 0,
cacheRead: cachedTokens,
cacheWrite: 0,
totalTokens: response.usage.total_tokens || 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
}