mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-21 13:00:33 +00:00
Add totalTokens field to Usage type
- Added totalTokens field to Usage interface in pi-ai - Anthropic: computed as input + output + cacheRead + cacheWrite - OpenAI/Google: uses native total_tokens/totalTokenCount - Fixed openai-completions to compute totalTokens when reasoning tokens present - Updated calculateContextTokens() to use totalTokens field - Added comprehensive test covering 13 providers fixes #130
This commit is contained in:
parent
52f1a8cb31
commit
86e5a70ec4
22 changed files with 552 additions and 70 deletions
|
|
@ -105,6 +105,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
@ -129,6 +130,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|||
output.usage.output = event.message.usage.output_tokens || 0;
|
||||
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
|
||||
// Anthropic doesn't provide total_tokens, compute from components
|
||||
output.usage.totalTokens =
|
||||
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
} else if (event.type === "content_block_start") {
|
||||
if (event.content_block.type === "text") {
|
||||
|
|
@ -253,6 +257,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|||
output.usage.output = event.usage.output_tokens || 0;
|
||||
output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
|
||||
// Anthropic doesn't provide total_tokens, compute from components
|
||||
output.usage.totalTokens =
|
||||
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
@ -200,6 +201,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|||
(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
|
||||
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: chunk.usageMetadata.totalTokenCount || 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
@ -106,14 +107,18 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|||
for await (const chunk of openaiStream) {
|
||||
if (chunk.usage) {
|
||||
const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
|
||||
const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
|
||||
const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
|
||||
const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
|
||||
output.usage = {
|
||||
// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
|
||||
input: (chunk.usage.prompt_tokens || 0) - cachedTokens,
|
||||
output:
|
||||
(chunk.usage.completion_tokens || 0) +
|
||||
(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
|
||||
input,
|
||||
output: outputTokens,
|
||||
cacheRead: cachedTokens,
|
||||
cacheWrite: 0,
|
||||
// Compute totalTokens ourselves since we add reasoning_tokens to output
|
||||
// and some providers (e.g., Groq) don't include them in total_tokens
|
||||
totalTokens: input + outputTokens + cachedTokens,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
@ -260,6 +261,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|||
output: response.usage.output_tokens || 0,
|
||||
cacheRead: cachedTokens,
|
||||
cacheWrite: 0,
|
||||
totalTokens: response.usage.total_tokens || 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue