mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 08:03:39 +00:00
Add totalTokens field to Usage type
- Added totalTokens field to Usage interface in pi-ai - Anthropic: computed as input + output + cacheRead + cacheWrite - OpenAI/Google: uses native total_tokens/totalTokenCount - Fixed openai-completions to compute totalTokens when reasoning tokens present - Updated calculateContextTokens() to use totalTokens field - Added comprehensive test covering 13 providers fixes #130
This commit is contained in:
parent
52f1a8cb31
commit
86e5a70ec4
22 changed files with 552 additions and 70 deletions
|
|
@ -335,6 +335,7 @@ export class Agent {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: this.abortController?.signal.aborted ? "aborted" : "error",
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ function streamSimpleProxy(
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
|
|
|
|||
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
## [Unreleased]
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- **Added `totalTokens` field to `Usage` type**: All code that constructs `Usage` objects must now include the `totalTokens` field. This field represents the total tokens processed by the LLM (input + output + cache). For OpenAI and Google, this uses native API values (`total_tokens`, `totalTokenCount`). For Anthropic, it's computed as `input + output + cacheRead + cacheWrite`.
|
||||
|
||||
## [0.12.10] - 2025-12-04
|
||||
|
||||
### Added
|
||||
|
|
|
|||
|
|
@ -5255,23 +5255,6 @@ export const MODELS = {
|
|||
contextWindow: 131072,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3.1-405b-instruct": {
|
||||
id: "meta-llama/llama-3.1-405b-instruct",
|
||||
name: "Meta: Llama 3.1 405B Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 3.5,
|
||||
output: 3.5,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 130815,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3.1-70b-instruct": {
|
||||
id: "meta-llama/llama-3.1-70b-instruct",
|
||||
name: "Meta: Llama 3.1 70B Instruct",
|
||||
|
|
@ -5289,6 +5272,23 @@ export const MODELS = {
|
|||
contextWindow: 131072,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3.1-405b-instruct": {
|
||||
id: "meta-llama/llama-3.1-405b-instruct",
|
||||
name: "Meta: Llama 3.1 405B Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 3.5,
|
||||
output: 3.5,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 130815,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/mistral-nemo": {
|
||||
id: "mistralai/mistral-nemo",
|
||||
name: "Mistral: Mistral Nemo",
|
||||
|
|
@ -5306,9 +5306,9 @@ export const MODELS = {
|
|||
contextWindow: 131072,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"openai/gpt-4o-mini-2024-07-18": {
|
||||
id: "openai/gpt-4o-mini-2024-07-18",
|
||||
name: "OpenAI: GPT-4o-mini (2024-07-18)",
|
||||
"openai/gpt-4o-mini": {
|
||||
id: "openai/gpt-4o-mini",
|
||||
name: "OpenAI: GPT-4o-mini",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
|
|
@ -5323,9 +5323,9 @@ export const MODELS = {
|
|||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"openai/gpt-4o-mini": {
|
||||
id: "openai/gpt-4o-mini",
|
||||
name: "OpenAI: GPT-4o-mini",
|
||||
"openai/gpt-4o-mini-2024-07-18": {
|
||||
id: "openai/gpt-4o-mini-2024-07-18",
|
||||
name: "OpenAI: GPT-4o-mini (2024-07-18)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
|
|
@ -5425,23 +5425,6 @@ export const MODELS = {
|
|||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"openai/gpt-4o-2024-05-13": {
|
||||
id: "openai/gpt-4o-2024-05-13",
|
||||
name: "OpenAI: GPT-4o (2024-05-13)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 5,
|
||||
output: 15,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"openai/gpt-4o": {
|
||||
id: "openai/gpt-4o",
|
||||
name: "OpenAI: GPT-4o",
|
||||
|
|
@ -5476,22 +5459,22 @@ export const MODELS = {
|
|||
contextWindow: 128000,
|
||||
maxTokens: 64000,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3-70b-instruct": {
|
||||
id: "meta-llama/llama-3-70b-instruct",
|
||||
name: "Meta: Llama 3 70B Instruct",
|
||||
"openai/gpt-4o-2024-05-13": {
|
||||
id: "openai/gpt-4o-2024-05-13",
|
||||
name: "OpenAI: GPT-4o (2024-05-13)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
input: ["text", "image"],
|
||||
cost: {
|
||||
input: 0.3,
|
||||
output: 0.39999999999999997,
|
||||
input: 5,
|
||||
output: 15,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 8192,
|
||||
maxTokens: 16384,
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3-8b-instruct": {
|
||||
id: "meta-llama/llama-3-8b-instruct",
|
||||
|
|
@ -5510,6 +5493,23 @@ export const MODELS = {
|
|||
contextWindow: 8192,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"meta-llama/llama-3-70b-instruct": {
|
||||
id: "meta-llama/llama-3-70b-instruct",
|
||||
name: "Meta: Llama 3 70B Instruct",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 0.3,
|
||||
output: 0.39999999999999997,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 8192,
|
||||
maxTokens: 16384,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/mixtral-8x22b-instruct": {
|
||||
id: "mistralai/mixtral-8x22b-instruct",
|
||||
name: "Mistral: Mixtral 8x22B Instruct",
|
||||
|
|
@ -5595,23 +5595,6 @@ export const MODELS = {
|
|||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"openai/gpt-3.5-turbo-0613": {
|
||||
id: "openai/gpt-3.5-turbo-0613",
|
||||
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 1,
|
||||
output: 2,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 4095,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"openai/gpt-4-turbo-preview": {
|
||||
id: "openai/gpt-4-turbo-preview",
|
||||
name: "OpenAI: GPT-4 Turbo Preview",
|
||||
|
|
@ -5629,6 +5612,23 @@ export const MODELS = {
|
|||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"openai/gpt-3.5-turbo-0613": {
|
||||
id: "openai/gpt-3.5-turbo-0613",
|
||||
name: "OpenAI: GPT-3.5 Turbo (older v0613)",
|
||||
api: "openai-completions",
|
||||
provider: "openrouter",
|
||||
baseUrl: "https://openrouter.ai/api/v1",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: {
|
||||
input: 1,
|
||||
output: 2,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
},
|
||||
contextWindow: 4095,
|
||||
maxTokens: 4096,
|
||||
} satisfies Model<"openai-completions">,
|
||||
"mistralai/mistral-tiny": {
|
||||
id: "mistralai/mistral-tiny",
|
||||
name: "Mistral Tiny",
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
@ -129,6 +130,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|||
output.usage.output = event.message.usage.output_tokens || 0;
|
||||
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
|
||||
// Anthropic doesn't provide total_tokens, compute from components
|
||||
output.usage.totalTokens =
|
||||
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
} else if (event.type === "content_block_start") {
|
||||
if (event.content_block.type === "text") {
|
||||
|
|
@ -253,6 +257,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|||
output.usage.output = event.usage.output_tokens || 0;
|
||||
output.usage.cacheRead = event.usage.cache_read_input_tokens || 0;
|
||||
output.usage.cacheWrite = event.usage.cache_creation_input_tokens || 0;
|
||||
// Anthropic doesn't provide total_tokens, compute from components
|
||||
output.usage.totalTokens =
|
||||
output.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;
|
||||
calculateCost(model, output.usage);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
@ -200,6 +201,7 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|||
(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
|
||||
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: chunk.usageMetadata.totalTokenCount || 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
@ -106,14 +107,18 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|||
for await (const chunk of openaiStream) {
|
||||
if (chunk.usage) {
|
||||
const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
|
||||
const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
|
||||
const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
|
||||
const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
|
||||
output.usage = {
|
||||
// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
|
||||
input: (chunk.usage.prompt_tokens || 0) - cachedTokens,
|
||||
output:
|
||||
(chunk.usage.completion_tokens || 0) +
|
||||
(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
|
||||
input,
|
||||
output: outputTokens,
|
||||
cacheRead: cachedTokens,
|
||||
cacheWrite: 0,
|
||||
// Compute totalTokens ourselves since we add reasoning_tokens to output
|
||||
// and some providers (e.g., Groq) don't include them in total_tokens
|
||||
totalTokens: input + outputTokens + cachedTokens,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
@ -260,6 +261,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|||
output: response.usage.output_tokens || 0,
|
||||
cacheRead: cachedTokens,
|
||||
cacheWrite: 0,
|
||||
totalTokens: response.usage.total_tokens || 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ export interface Usage {
|
|||
output: number;
|
||||
cacheRead: number;
|
||||
cacheWrite: number;
|
||||
totalTokens: number;
|
||||
cost: {
|
||||
input: number;
|
||||
output: number;
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ async function testEmptyAssistantMessage<TApi extends Api>(llm: Model<TApi>, opt
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 10,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "stop",
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ const providerContexts = {
|
|||
output: 50,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 150,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -97,6 +98,7 @@ const providerContexts = {
|
|||
output: 60,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 180,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -147,6 +149,7 @@ const providerContexts = {
|
|||
output: 55,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 165,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -199,6 +202,7 @@ const providerContexts = {
|
|||
output: 58,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 173,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -243,6 +247,7 @@ const providerContexts = {
|
|||
output: 25,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 75,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "error",
|
||||
|
|
|
|||
331
packages/ai/test/total-tokens.test.ts
Normal file
331
packages/ai/test/total-tokens.test.ts
Normal file
|
|
@ -0,0 +1,331 @@
|
|||
/**
|
||||
* Test totalTokens field across all providers.
|
||||
*
|
||||
* totalTokens represents the total number of tokens processed by the LLM,
|
||||
* including input (with cache) and output (with thinking). This is the
|
||||
* base for calculating context size for the next request.
|
||||
*
|
||||
* - OpenAI Completions: Uses native total_tokens field
|
||||
* - OpenAI Responses: Uses native total_tokens field
|
||||
* - Google: Uses native totalTokenCount field
|
||||
* - Anthropic: Computed as input + output + cacheRead + cacheWrite
|
||||
* - Other OpenAI-compatible providers: Uses native total_tokens field
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { getModel } from "../src/models.js";
|
||||
import { complete } from "../src/stream.js";
|
||||
import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
|
||||
|
||||
// Generate a long system prompt to trigger caching (>2k bytes for most providers)
|
||||
const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses.
|
||||
|
||||
Here is some additional context that makes this system prompt long enough to trigger caching:
|
||||
|
||||
${Array(50)
|
||||
.fill(
|
||||
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris.",
|
||||
)
|
||||
.join("\n\n")}
|
||||
|
||||
Remember: Always be helpful and concise.`;
|
||||
|
||||
async function testTotalTokensWithCache<TApi extends Api>(
|
||||
llm: Model<TApi>,
|
||||
options: OptionsForApi<TApi> = {} as OptionsForApi<TApi>,
|
||||
): Promise<{ first: Usage; second: Usage }> {
|
||||
// First request - no cache
|
||||
const context1: Context = {
|
||||
systemPrompt: LONG_SYSTEM_PROMPT,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "What is 2 + 2? Reply with just the number.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response1 = await complete(llm, context1, options);
|
||||
expect(response1.stopReason).toBe("stop");
|
||||
|
||||
// Second request - should trigger cache read (same system prompt, add conversation)
|
||||
const context2: Context = {
|
||||
systemPrompt: LONG_SYSTEM_PROMPT,
|
||||
messages: [
|
||||
...context1.messages,
|
||||
response1, // Include previous assistant response
|
||||
{
|
||||
role: "user",
|
||||
content: "What is 3 + 3? Reply with just the number.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const response2 = await complete(llm, context2, options);
|
||||
expect(response2.stopReason).toBe("stop");
|
||||
|
||||
return { first: response1.usage, second: response2.usage };
|
||||
}
|
||||
|
||||
function logUsage(label: string, usage: Usage) {
|
||||
const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
|
||||
console.log(` ${label}:`);
|
||||
console.log(
|
||||
` input: ${usage.input}, output: ${usage.output}, cacheRead: ${usage.cacheRead}, cacheWrite: ${usage.cacheWrite}`,
|
||||
);
|
||||
console.log(` totalTokens: ${usage.totalTokens}, computed: ${computed}`);
|
||||
}
|
||||
|
||||
function assertTotalTokensEqualsComponents(usage: Usage) {
|
||||
const computed = usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
|
||||
expect(usage.totalTokens).toBe(computed);
|
||||
}
|
||||
|
||||
describe("totalTokens field", () => {
|
||||
// =========================================================================
|
||||
// Anthropic
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
|
||||
it("claude-3-5-haiku - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
|
||||
|
||||
console.log(`\nAnthropic / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
|
||||
// Anthropic should have cache activity
|
||||
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
|
||||
expect(hasCache).toBe(true);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => {
|
||||
it("claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("anthropic", "claude-sonnet-4-20250514");
|
||||
|
||||
console.log(`\nAnthropic OAuth / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_OAUTH_TOKEN });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
|
||||
// Anthropic should have cache activity
|
||||
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
|
||||
expect(hasCache).toBe(true);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// OpenAI
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
|
||||
it("gpt-4o-mini - should return totalTokens equal to sum of components", async () => {
|
||||
const llm: Model<"openai-completions"> = {
|
||||
...getModel("openai", "gpt-4o-mini")!,
|
||||
api: "openai-completions",
|
||||
};
|
||||
|
||||
console.log(`\nOpenAI Completions / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm);
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
|
||||
it("gpt-4o - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openai", "gpt-4o");
|
||||
|
||||
console.log(`\nOpenAI Responses / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm);
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Google
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
|
||||
it("gemini-2.0-flash - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("google", "gemini-2.0-flash");
|
||||
|
||||
console.log(`\nGoogle / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm);
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// xAI
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
|
||||
it("grok-3-fast - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("xai", "grok-3-fast");
|
||||
|
||||
console.log(`\nxAI / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Groq
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
|
||||
it("openai/gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("groq", "openai/gpt-oss-120b");
|
||||
|
||||
console.log(`\nGroq / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// Cerebras
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
|
||||
it("gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("cerebras", "gpt-oss-120b");
|
||||
|
||||
console.log(`\nCerebras / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// z.ai
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
|
||||
it("glm-4.5-flash - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("zai", "glm-4.5-flash");
|
||||
|
||||
console.log(`\nz.ai / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
|
||||
// =========================================================================
|
||||
// OpenRouter - Multiple backend providers
|
||||
// =========================================================================
|
||||
|
||||
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
|
||||
it("anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
|
||||
it("deepseek/deepseek-chat - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "deepseek/deepseek-chat");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
|
||||
it("mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
|
||||
it("google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
|
||||
it("meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", async () => {
|
||||
const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
|
||||
|
||||
console.log(`\nOpenRouter / ${llm.id}:`);
|
||||
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
|
||||
|
||||
logUsage("First request", first);
|
||||
logUsage("Second request", second);
|
||||
|
||||
assertTotalTokensEqualsComponents(first);
|
||||
assertTotalTokensEqualsComponents(second);
|
||||
}, 60000);
|
||||
});
|
||||
});
|
||||
|
|
@ -42,6 +42,7 @@ async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, option
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -126,6 +127,7 @@ async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, opt
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
@ -213,6 +215,7 @@ async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, opt
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: "toolUse",
|
||||
|
|
|
|||
|
|
@ -32,9 +32,10 @@ export const DEFAULT_COMPACTION_SETTINGS: CompactionSettings = {
|
|||
|
||||
/**
|
||||
* Calculate total context tokens from usage.
|
||||
* Uses the native totalTokens field when available, falls back to computing from components.
|
||||
*/
|
||||
export function calculateContextTokens(usage: Usage): number {
|
||||
return usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
|
||||
return usage.totalTokens || usage.input + usage.output + usage.cacheRead + usage.cacheWrite;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ function createMockUsage(input: number, output: number, cacheRead = 0, cacheWrit
|
|||
output,
|
||||
cacheRead,
|
||||
cacheWrite,
|
||||
totalTokens: input + output + cacheRead + cacheWrite,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -131,6 +131,7 @@ const saveSession = async () => {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
|
|
|
|||
|
|
@ -308,6 +308,7 @@ export class Agent {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
stopReason: this.abortController?.signal.aborted ? "aborted" : "error",
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ function streamSimpleProxy(
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
|
|
|
|||
|
|
@ -266,6 +266,7 @@ export class AgentInterface extends LitElement {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
} satisfies Usage,
|
||||
);
|
||||
|
|
|
|||
|
|
@ -101,6 +101,7 @@ export class SessionsStore extends Store {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
},
|
||||
thinkingLevel: state.thinkingLevel || "off",
|
||||
|
|
|
|||
|
|
@ -118,6 +118,8 @@ export interface SessionMetadata {
|
|||
cacheRead: number;
|
||||
/** Total cache write tokens */
|
||||
cacheWrite: number;
|
||||
/** Total tokens processed */
|
||||
totalTokens: number;
|
||||
/** Total cost breakdown */
|
||||
cost: {
|
||||
input: number;
|
||||
|
|
|
|||
|
|
@ -56,11 +56,13 @@ export const simpleHtml = {
|
|||
output: 375,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.0030632000000000003,
|
||||
output: 0.0015,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.0045632,
|
||||
},
|
||||
},
|
||||
|
|
@ -89,11 +91,13 @@ export const simpleHtml = {
|
|||
output: 162,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.003376,
|
||||
output: 0.0006479999999999999,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.004024,
|
||||
},
|
||||
},
|
||||
|
|
@ -159,11 +163,13 @@ export const longSession = {
|
|||
output: 455,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.0030632000000000003,
|
||||
output: 0.00182,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.004883200000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -192,11 +198,13 @@ export const longSession = {
|
|||
output: 147,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.0034384000000000003,
|
||||
output: 0.000588,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.0040264,
|
||||
},
|
||||
},
|
||||
|
|
@ -235,11 +243,13 @@ export const longSession = {
|
|||
output: 96,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.0035656000000000004,
|
||||
output: 0.000384,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.0039496,
|
||||
},
|
||||
},
|
||||
|
|
@ -267,11 +277,13 @@ export const longSession = {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0,
|
||||
},
|
||||
},
|
||||
|
|
@ -312,11 +324,13 @@ export const longSession = {
|
|||
output: 115,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.0049456000000000005,
|
||||
output: 0.00045999999999999996,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.005405600000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -348,11 +362,13 @@ export const longSession = {
|
|||
output: 86,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.0050696000000000005,
|
||||
output: 0.00034399999999999996,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.0054136,
|
||||
},
|
||||
},
|
||||
|
|
@ -391,11 +407,13 @@ export const longSession = {
|
|||
output: 294,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.005151200000000001,
|
||||
output: 0.001176,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.006327200000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -428,11 +446,13 @@ export const longSession = {
|
|||
output: 159,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.0054152,
|
||||
output: 0.000636,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.0060512000000000005,
|
||||
},
|
||||
},
|
||||
|
|
@ -471,11 +491,13 @@ export const longSession = {
|
|||
output: 379,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.005566400000000001,
|
||||
output: 0.001516,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.007082400000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -516,11 +538,13 @@ export const longSession = {
|
|||
output: 537,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.005900000000000001,
|
||||
output: 0.0021479999999999997,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.008048,
|
||||
},
|
||||
},
|
||||
|
|
@ -547,11 +571,13 @@ export const longSession = {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0,
|
||||
},
|
||||
},
|
||||
|
|
@ -583,11 +609,13 @@ export const longSession = {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0,
|
||||
},
|
||||
},
|
||||
|
|
@ -627,11 +655,13 @@ export const longSession = {
|
|||
output: 492,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.024597,
|
||||
output: 0.00738,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.031977,
|
||||
},
|
||||
},
|
||||
|
|
@ -672,11 +702,13 @@ export const longSession = {
|
|||
output: 213,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.026211,
|
||||
output: 0.003195,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.029406,
|
||||
},
|
||||
},
|
||||
|
|
@ -709,11 +741,13 @@ export const longSession = {
|
|||
output: 134,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.026958,
|
||||
output: 0.00201,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.028968,
|
||||
},
|
||||
},
|
||||
|
|
@ -752,11 +786,13 @@ export const longSession = {
|
|||
output: 331,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.02739,
|
||||
output: 0.004965,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.032355,
|
||||
},
|
||||
},
|
||||
|
|
@ -788,11 +824,13 @@ export const longSession = {
|
|||
output: 53,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.028443,
|
||||
output: 0.000795,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.029238,
|
||||
},
|
||||
},
|
||||
|
|
@ -831,11 +869,13 @@ export const longSession = {
|
|||
output: 329,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.028623,
|
||||
output: 0.004935,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.033558,
|
||||
},
|
||||
},
|
||||
|
|
@ -867,11 +907,13 @@ export const longSession = {
|
|||
output: 46,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.029670000000000002,
|
||||
output: 0.00069,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.03036,
|
||||
},
|
||||
},
|
||||
|
|
@ -897,11 +939,13 @@ export const longSession = {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0,
|
||||
},
|
||||
},
|
||||
|
|
@ -937,11 +981,13 @@ export const longSession = {
|
|||
output: 285,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.029856,
|
||||
output: 0.004275,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.034131,
|
||||
},
|
||||
},
|
||||
|
|
@ -974,11 +1020,13 @@ export const longSession = {
|
|||
output: 39,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.030831,
|
||||
output: 0.000585,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.031416,
|
||||
},
|
||||
},
|
||||
|
|
@ -1017,11 +1065,13 @@ export const longSession = {
|
|||
output: 473,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.030993,
|
||||
output: 0.007095000000000001,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.038088,
|
||||
},
|
||||
},
|
||||
|
|
@ -1048,11 +1098,13 @@ export const longSession = {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0,
|
||||
},
|
||||
},
|
||||
|
|
@ -1088,11 +1140,13 @@ export const longSession = {
|
|||
output: 348,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.032556,
|
||||
output: 0.00522,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.037776000000000004,
|
||||
},
|
||||
},
|
||||
|
|
@ -1133,11 +1187,13 @@ export const longSession = {
|
|||
output: 310,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.033942,
|
||||
output: 0.0046500000000000005,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.038592,
|
||||
},
|
||||
},
|
||||
|
|
@ -1170,11 +1226,13 @@ export const longSession = {
|
|||
output: 53,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.034977,
|
||||
output: 0.000795,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.035772,
|
||||
},
|
||||
},
|
||||
|
|
@ -1213,11 +1271,13 @@ export const longSession = {
|
|||
output: 423,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.035160000000000004,
|
||||
output: 0.006345,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.041505,
|
||||
},
|
||||
},
|
||||
|
|
@ -1258,11 +1318,13 @@ export const longSession = {
|
|||
output: 193,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.036651,
|
||||
output: 0.002895,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.039546000000000005,
|
||||
},
|
||||
},
|
||||
|
|
@ -1295,11 +1357,13 @@ export const longSession = {
|
|||
output: 104,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.037557,
|
||||
output: 0.00156,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.039117,
|
||||
},
|
||||
},
|
||||
|
|
@ -1334,11 +1398,13 @@ export const longSession = {
|
|||
output: 146,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.037911,
|
||||
output: 0.00219,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.040101,
|
||||
},
|
||||
},
|
||||
|
|
@ -1371,11 +1437,13 @@ export const longSession = {
|
|||
output: 63,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.038535,
|
||||
output: 0.000945,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.03948,
|
||||
},
|
||||
},
|
||||
|
|
@ -1401,11 +1469,13 @@ export const longSession = {
|
|||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0,
|
||||
},
|
||||
},
|
||||
|
|
@ -1445,11 +1515,13 @@ export const longSession = {
|
|||
output: 324,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.038823,
|
||||
output: 0.00486,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.043683,
|
||||
},
|
||||
},
|
||||
|
|
@ -1490,11 +1562,13 @@ export const longSession = {
|
|||
output: 385,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.040605,
|
||||
output: 0.005775,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.046380000000000005,
|
||||
},
|
||||
},
|
||||
|
|
@ -1531,11 +1605,13 @@ export const longSession = {
|
|||
output: 436,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.043749,
|
||||
output: 0.00654,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.050289,
|
||||
},
|
||||
},
|
||||
|
|
@ -1571,11 +1647,13 @@ export const longSession = {
|
|||
output: 685,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.045105,
|
||||
output: 0.010275,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.05538,
|
||||
},
|
||||
},
|
||||
|
|
@ -1615,11 +1693,13 @@ export const longSession = {
|
|||
output: 683,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.047214,
|
||||
output: 0.010245,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.057458999999999996,
|
||||
},
|
||||
},
|
||||
|
|
@ -1664,11 +1744,13 @@ export const longSession = {
|
|||
output: 3462,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.058758000000000005,
|
||||
output: 0.051930000000000004,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.11068800000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -1697,11 +1779,13 @@ export const longSession = {
|
|||
output: 223,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.069195,
|
||||
output: 0.003345,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.07254000000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -1740,11 +1824,13 @@ export const longSession = {
|
|||
output: 335,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.06991800000000001,
|
||||
output: 0.005025,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.07494300000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -1785,11 +1871,13 @@ export const longSession = {
|
|||
output: 499,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.075036,
|
||||
output: 0.007485,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.08252100000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -1830,11 +1918,13 @@ export const longSession = {
|
|||
output: 462,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.078387,
|
||||
output: 0.00693,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.085317,
|
||||
},
|
||||
},
|
||||
|
|
@ -1875,11 +1965,13 @@ export const longSession = {
|
|||
output: 431,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.079914,
|
||||
output: 0.006465,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.086379,
|
||||
},
|
||||
},
|
||||
|
|
@ -1920,11 +2012,13 @@ export const longSession = {
|
|||
output: 335,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.083382,
|
||||
output: 0.005025,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.088407,
|
||||
},
|
||||
},
|
||||
|
|
@ -1969,11 +2063,13 @@ export const longSession = {
|
|||
output: 1209,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.08655600000000001,
|
||||
output: 0.018135000000000002,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.104691,
|
||||
},
|
||||
},
|
||||
|
|
@ -2002,11 +2098,13 @@ export const longSession = {
|
|||
output: 249,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.09024,
|
||||
output: 0.003735,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.093975,
|
||||
},
|
||||
},
|
||||
|
|
@ -2045,11 +2143,13 @@ export const longSession = {
|
|||
output: 279,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.091008,
|
||||
output: 0.004185,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.095193,
|
||||
},
|
||||
},
|
||||
|
|
@ -2078,11 +2178,13 @@ export const longSession = {
|
|||
output: 54,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.091893,
|
||||
output: 0.0008100000000000001,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.09270300000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -2121,11 +2223,13 @@ export const longSession = {
|
|||
output: 162,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.092097,
|
||||
output: 0.00243,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.094527,
|
||||
},
|
||||
},
|
||||
|
|
@ -2155,11 +2259,13 @@ export const longSession = {
|
|||
output: 67,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.09271800000000001,
|
||||
output: 0.001005,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.09372300000000001,
|
||||
},
|
||||
},
|
||||
|
|
@ -2199,11 +2305,13 @@ export const longSession = {
|
|||
output: 182,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.092937,
|
||||
output: 0.0027300000000000002,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.095667,
|
||||
},
|
||||
},
|
||||
|
|
@ -2233,11 +2341,13 @@ export const longSession = {
|
|||
output: 33,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: {
|
||||
input: 0.093642,
|
||||
output: 0.000495,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
total: 0.094137,
|
||||
},
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue