Fix token statistics on abort for Anthropic provider

- Add handling for message_start event to capture initial token usage
- Fix message_delta to use assignment (=) instead of addition (+=)
  since Anthropic sends cumulative token counts, not incremental
- Add comprehensive tests for all providers (Google, OpenAI Completions,
  OpenAI Responses, Anthropic)
- Document OpenAI limitation: token stats only available at stream end

Fixes issue where aborted streams had zero token counts despite
Anthropic sending input tokens in the initial message_start event.
This commit is contained in:
Mario Zechner 2025-10-26 21:22:24 +01:00
parent 23be934a9a
commit bc8d994a7b
3 changed files with 161 additions and 73 deletions

View file

@ -67,7 +67,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
const blocks = output.content as Block[];
for await (const event of anthropicStream) {
if (event.type === "content_block_start") {
if (event.type === "message_start") {
// Capture initial token usage from message_start event
// This ensures we have input token counts even if the stream is aborted early
output.usage.input = event.message.usage.input_tokens || 0;
output.usage.output = event.message.usage.output_tokens || 0;
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
calculateCost(model, output.usage);
} else if (event.type === "content_block_start") {
if (event.content_block.type === "text") {
const block: Block = {
type: "text",
@ -186,10 +194,10 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
if (event.delta.stop_reason) {
output.stopReason = mapStopReason(event.delta.stop_reason);
}
output.usage.input += event.usage.input_tokens || 0;
output.usage.output += event.usage.output_tokens || 0;
output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
output.usage.input = event.usage.input_tokens || 0;
output.usage.output = event.usage.output_tokens || 0;
output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
calculateCost(model, output.usage);
}
}