mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 06:04:40 +00:00
Fix token statistics on abort for Anthropic provider
- Add handling for message_start event to capture initial token usage - Fix message_delta to use assignment (=) instead of addition (+=) since Anthropic sends cumulative token counts, not incremental - Add comprehensive tests for all providers (Google, OpenAI Completions, OpenAI Responses, Anthropic) - Document OpenAI limitation: token stats only available at stream end Fixes issue where aborted streams had zero token counts despite Anthropic sending input tokens in the initial message_start event.
This commit is contained in:
parent
23be934a9a
commit
bc8d994a7b
3 changed files with 161 additions and 73 deletions
|
|
@ -3810,23 +3810,6 @@ export const MODELS = {
|
|||
contextWindow: 32768,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"cohere/command-r-08-2024": {
|
||||
id: "cohere/command-r-08-2024",
|
||||
name: "Cohere: Command R (08-2024)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.15,
|
||||
output: 0.6,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4000,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"cohere/command-r-plus-08-2024": {
|
||||
id: "cohere/command-r-plus-08-2024",
|
||||
name: "Cohere: Command R+ (08-2024)",
|
||||
|
|
@ -3844,6 +3827,23 @@ export const MODELS = {
|
|||
contextWindow: 128000,
|
||||
maxTokens: 4000,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"cohere/command-r-08-2024": {
|
||||
id: "cohere/command-r-08-2024",
|
||||
name: "Cohere: Command R (08-2024)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.15,
|
||||
output: 0.6,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4000,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"sao10k/l3.1-euryale-70b": {
|
||||
id: "sao10k/l3.1-euryale-70b",
|
||||
name: "Sao10K: Llama 3.1 Euryale 70B v2.2",
|
||||
|
|
@ -3912,23 +3912,6 @@ export const MODELS = {
|
|||
contextWindow: 16384,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3.1-70b-instruct": {
|
||||
id: "meta-llama/llama-3.1-70b-instruct",
|
||||
name: "Meta: Llama 3.1 70B Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.39999999999999997,
|
||||
output: 0.39999999999999997,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 131072,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3.1-405b-instruct": {
|
||||
id: "meta-llama/llama-3.1-405b-instruct",
|
||||
name: "Meta: Llama 3.1 405B Instruct",
|
||||
|
|
@ -3946,6 +3929,23 @@ export const MODELS = {
|
|||
contextWindow: 32768,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3.1-70b-instruct": {
|
||||
id: "meta-llama/llama-3.1-70b-instruct",
|
||||
name: "Meta: Llama 3.1 70B Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.39999999999999997,
|
||||
output: 0.39999999999999997,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 131072,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/mistral-nemo": {
|
||||
id: "mistralai/mistral-nemo",
|
||||
name: "Mistral: Mistral Nemo",
|
||||
|
|
@ -4065,23 +4065,6 @@ export const MODELS = {
|
|||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3-70b-instruct": {
|
||||
id: "meta-llama/llama-3-70b-instruct",
|
||||
name: "Meta: Llama 3 70B Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.3,
|
||||
output: 0.39999999999999997,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 8192,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3-8b-instruct": {
|
||||
id: "meta-llama/llama-3-8b-instruct",
|
||||
name: "Meta: Llama 3 8B Instruct",
|
||||
|
|
@ -4099,6 +4082,23 @@ export const MODELS = {
|
|||
contextWindow: 8192,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3-70b-instruct": {
|
||||
id: "meta-llama/llama-3-70b-instruct",
|
||||
name: "Meta: Llama 3 70B Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.3,
|
||||
output: 0.39999999999999997,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 8192,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/mixtral-8x22b-instruct": {
|
||||
id: "mistralai/mixtral-8x22b-instruct",
|
||||
name: "Mistral: Mixtral 8x22B Instruct",
|
||||
|
|
@ -4133,23 +4133,6 @@ export const MODELS = {
|
|||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/mistral-tiny": {
|
||||
id: "mistralai/mistral-tiny",
|
||||
name: "Mistral Tiny",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.25,
|
||||
output: 0.25,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 32768,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/mistral-small": {
|
||||
id: "mistralai/mistral-small",
|
||||
name: "Mistral Small",
|
||||
|
|
@ -4167,6 +4150,23 @@ export const MODELS = {
|
|||
contextWindow: 32768,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/mistral-tiny": {
|
||||
id: "mistralai/mistral-tiny",
|
||||
name: "Mistral Tiny",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.25,
|
||||
output: 0.25,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 32768,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/mixtral-8x7b-instruct": {
|
||||
id: "mistralai/mixtral-8x7b-instruct",
|
||||
name: "Mistral: Mixtral 8x7B Instruct",
|
||||
|
|
|
|||
|
|
@ -67,7 +67,15 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|||
const blocks = output.content as Block[];
|
||||
|
||||
for await (const event of anthropicStream) {
|
||||
if (event.type === "content_block_start") {
|
||||
if (event.type === "message_start") {
|
||||
// Capture initial token usage from message_start event
|
||||
// This ensures we have input token counts even if the stream is aborted early
|
||||
output.usage.input = event.message.usage.input_tokens || 0;
|
||||
output.usage.output = event.message.usage.output_tokens || 0;
|
||||
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
|
||||
calculateCost(model, output.usage);
|
||||
} else if (event.type === "content_block_start") {
|
||||
if (event.content_block.type === "text") {
|
||||
const block: Block = {
|
||||
type: "text",
|
||||
|
|
@ -186,10 +194,10 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|||
if (event.delta.stop_reason) {
|
||||
output.stopReason = mapStopReason(event.delta.stop_reason);
|
||||
}
|
||||
output.usage.input += event.usage.input_tokens || 0;
|
||||
output.usage.output += event.usage.output_tokens || 0;
|
||||
output.usage.cacheRead += event.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite += event.usage.cache_creation_input_tokens || 0;
|
||||
output.usage.input = event.usage.input_tokens || 0;
|
||||
output.usage.output = event.usage.output_tokens || 0;
|
||||
output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
80
packages/ai/test/tokens.test.ts
Normal file
80
packages/ai/test/tokens.test.ts
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import { getModel } from "../src/models.js";
|
||||
import { stream } from "../src/stream.js";
|
||||
import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
|
||||
|
||||
async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
|
||||
const context: Context = {
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Write a long poem with 10 stanzas about the beauty of nature.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const controller = new AbortController();
|
||||
const response = stream(llm, context, { ...options, signal: controller.signal });
|
||||
|
||||
let abortFired = false;
|
||||
for await (const event of response) {
|
||||
if (!abortFired && (event.type === "text_delta" || event.type === "thinking_delta")) {
|
||||
abortFired = true;
|
||||
setTimeout(() => controller.abort(), 3000);
|
||||
}
|
||||
}
|
||||
|
||||
const msg = await response.result();
|
||||
|
||||
expect(msg.stopReason).toBe("aborted");
|
||||
|
||||
// OpenAI providers only send usage in the final chunk, so when aborted they have no token stats
|
||||
// Anthropic and Google send usage information early in the stream
|
||||
if (llm.api === "openai-completions" || llm.api === "openai-responses") {
|
||||
expect(msg.usage.input).toBe(0);
|
||||
expect(msg.usage.output).toBe(0);
|
||||
} else {
|
||||
expect(msg.usage.input).toBeGreaterThan(0);
|
||||
expect(msg.usage.output).toBeGreaterThan(0);
|
||||
expect(msg.usage.cost.input).toBeGreaterThan(0);
|
||||
expect(msg.usage.cost.total).toBeGreaterThan(0);
|
||||
}
|
||||
}
|
||||
|
||||
describe("Token Statistics on Abort", () => {
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => {
|
||||
const llm = getModel("google", "gemini-2.5-flash");
|
||||
|
||||
it("should include token stats when aborted mid-stream", async () => {
|
||||
await testTokensOnAbort(llm, { thinking: { enabled: true } });
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => {
|
||||
const llm: Model<"openai-completions"> = {
|
||||
...getModel("openai", "gpt-4o-mini")!,
|
||||
api: "openai-completions",
|
||||
};
|
||||
|
||||
it("should include token stats when aborted mid-stream", async () => {
|
||||
await testTokensOnAbort(llm);
|
||||
}, 10000);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => {
|
||||
const llm = getModel("openai", "gpt-5-mini");
|
||||
|
||||
it("should include token stats when aborted mid-stream", async () => {
|
||||
await testTokensOnAbort(llm);
|
||||
}, 20000);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY && !process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider", () => {
|
||||
const llm = getModel("anthropic", "claude-opus-4-1-20250805");
|
||||
|
||||
it("should include token stats when aborted mid-stream", async () => {
|
||||
await testTokensOnAbort(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
|
||||
}, 10000);
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue