diff --git a/packages/ai/src/models.ts b/packages/ai/src/models.ts index 057795e3..c033d399 100644 --- a/packages/ai/src/models.ts +++ b/packages/ai/src/models.ts @@ -3,7 +3,7 @@ import { AnthropicLLM } from "./providers/anthropic.js"; import { GoogleLLM } from "./providers/google.js"; import { OpenAICompletionsLLM } from "./providers/openai-completions.js"; import { OpenAIResponsesLLM } from "./providers/openai-responses.js"; -import type { Model } from "./types.js"; +import type { Model, Usage } from "./types.js"; // Provider configuration with factory functions export const PROVIDER_CONFIG = { @@ -102,5 +102,14 @@ export function getModel

( return models[modelId as string]; } +export function calculateCost(model: Model, usage: Usage) { + usage.cost.input = (model.cost.input / 1000000) * usage.input; + usage.cost.output = (model.cost.output / 1000000) * usage.output; + usage.cost.cacheRead = (model.cost.cacheRead / 1000000) * usage.cacheRead; + usage.cost.cacheWrite = (model.cost.cacheWrite / 1000000) * usage.cacheWrite; + usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite; + return usage.cost; +} + // Re-export Model type for convenience export type { Model }; diff --git a/packages/ai/src/providers/anthropic.ts b/packages/ai/src/providers/anthropic.ts index f0f05611..c70598c6 100644 --- a/packages/ai/src/providers/anthropic.ts +++ b/packages/ai/src/providers/anthropic.ts @@ -5,6 +5,7 @@ import type { MessageParam, Tool, } from "@anthropic-ai/sdk/resources/messages.js"; +import { calculateCost } from "../models.js"; import type { AssistantMessage, Context, @@ -13,8 +14,8 @@ import type { Message, Model, StopReason, - TokenUsage, ToolCall, + Usage, } from "../types.js"; export interface AnthropicLLMOptions extends LLMOptions { @@ -186,13 +187,20 @@ export class AnthropicLLM implements LLM { name: block.name, arguments: block.input as Record, })); - const usage: TokenUsage = { + const usage: Usage = { input: msg.usage.input_tokens, output: msg.usage.output_tokens, cacheRead: msg.usage.cache_read_input_tokens || 0, cacheWrite: msg.usage.cache_creation_input_tokens || 0, - // TODO add cost + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, }; + calculateCost(this.modelInfo, usage); return { role: "assistant", @@ -215,6 +223,7 @@ export class AnthropicLLM implements LLM { output: 0, cacheRead: 0, cacheWrite: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "error", error: error instanceof Error ? error.message : String(error), diff --git a/packages/ai/src/providers/google.ts b/packages/ai/src/providers/google.ts index 16a9a8c5..60d71cb2 100644 --- a/packages/ai/src/providers/google.ts +++ b/packages/ai/src/providers/google.ts @@ -5,6 +5,7 @@ import { type GenerateContentParameters, GoogleGenAI, } from "@google/genai"; +import { calculateCost } from "../models.js"; import type { AssistantMessage, Context, @@ -13,9 +14,9 @@ import type { Message, Model, StopReason, - TokenUsage, Tool, ToolCall, + Usage, } from "../types.js"; export interface GoogleLLMOptions extends LLMOptions { @@ -97,11 +98,12 @@ export class GoogleLLM implements LLM { let thinking = ""; let thoughtSignature: string | undefined; const toolCalls: ToolCall[] = []; - let usage: TokenUsage = { + let usage: Usage = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; let stopReason: StopReason = "stop"; let inTextBlock = false; @@ -179,6 +181,13 @@ export class GoogleLLM implements LLM { (chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0), cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0, cacheWrite: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, }; } } @@ -203,6 +212,9 @@ export class GoogleLLM implements LLM { thoughtSignature = Buffer.from(signature).toString("base64"); } + // Calculate cost + calculateCost(this.model, usage); + // Usage metadata is in the last chunk // Already captured during streaming @@ -227,6 +239,7 @@ export class GoogleLLM implements LLM { output: 0, cacheRead: 0, cacheWrite: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "error", error: error instanceof Error ? error.message : String(error), diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index 1b54890d..4a0b1ef5 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -1,5 +1,6 @@ import OpenAI from "openai"; import type { ChatCompletionChunk, ChatCompletionMessageParam } from "openai/resources/chat/completions.js"; +import { calculateCost } from "../models.js"; import type { AssistantMessage, Context, @@ -8,9 +9,9 @@ import type { Message, Model, StopReason, - TokenUsage, Tool, ToolCall, + Usage, } from "../types.js"; export interface OpenAICompletionsLLMOptions extends LLMOptions { @@ -87,11 +88,12 @@ export class OpenAICompletionsLLM implements LLM { let reasoningContent = ""; let reasoningField: "reasoning" | "reasoning_content" | null = null; const parsedToolCalls: { id: string; name: string; arguments: string }[] = []; - let usage: TokenUsage = { + let usage: Usage = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null; let blockType: "text" | "thinking" | null = null; @@ -104,6 +106,13 @@ export class OpenAICompletionsLLM implements LLM { (chunk.usage.completion_tokens_details?.reasoning_tokens || 0), cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0, cacheWrite: 0, + cost: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + total: 0, + }, }; } @@ -206,6 +215,9 @@ export class OpenAICompletionsLLM implements LLM { arguments: JSON.parse(tc.arguments), })); + // Calculate cost + calculateCost(this.modelInfo, usage); + return { role: "assistant", content: content || undefined, @@ -227,6 +239,7 @@ export class OpenAICompletionsLLM implements LLM { output: 0, cacheRead: 0, cacheWrite: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "error", error: error instanceof Error ? error.message : String(error), diff --git a/packages/ai/src/providers/openai-responses.ts b/packages/ai/src/providers/openai-responses.ts index 7175e884..4b3113e5 100644 --- a/packages/ai/src/providers/openai-responses.ts +++ b/packages/ai/src/providers/openai-responses.ts @@ -13,9 +13,9 @@ import type { Message, Model, StopReason, - TokenUsage, Tool, ToolCall, + Usage, } from "../types.js"; export interface OpenAIResponsesLLMOptions extends LLMOptions { @@ -83,11 +83,12 @@ export class OpenAIResponsesLLM implements LLM { let thinking = ""; const toolCalls: ToolCall[] = []; const reasoningItems: ResponseReasoningItem[] = []; - let usage: TokenUsage = { + let usage: Usage = { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; let stopReason: StopReason = "stop"; @@ -137,6 +138,7 @@ export class OpenAIResponsesLLM implements LLM { output: response.usage.output_tokens || 0, cacheRead: response.usage.input_tokens_details?.cached_tokens || 0, cacheWrite: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; } @@ -180,6 +182,7 @@ export class OpenAIResponsesLLM implements LLM { output: 0, cacheRead: 0, cacheWrite: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "error", error: error instanceof Error ? error.message : String(error), diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index c195dbec..e96c8809 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -28,7 +28,7 @@ export interface AssistantMessage { }[]; provider: string; model: string; - usage: TokenUsage; + usage: Usage; stopReason: StopReason; error?: string | Error; @@ -60,7 +60,7 @@ export type Event = | { type: "text"; content: string; delta: string } | { type: "thinking"; content: string; delta: string } | { type: "toolCall"; toolCall: ToolCall } - | { type: "usage"; usage: TokenUsage } + | { type: "usage"; usage: Usage } | { type: "done"; reason: StopReason; message: AssistantMessage } | { type: "error"; error: Error }; @@ -70,12 +70,12 @@ export interface ToolCall { arguments: Record; } -export interface TokenUsage { +export interface Usage { input: number; output: number; cacheRead: number; cacheWrite: number; - cost?: { + cost: { input: number; output: number; cacheRead: number;