feat(ai): Add cost tracking to LLM implementations

- Track input/output token costs for all providers
- Calculate costs based on Model pricing information
- Include cost information in AssistantMessage responses
- Add Usage interface with detailed cost breakdown
- Implement calculateCost utility function for cost calculations
This commit is contained in:
Mario Zechner 2025-08-30 00:45:08 +02:00
parent f9d688d577
commit 550da5e47c
6 changed files with 61 additions and 14 deletions

View file

@ -3,7 +3,7 @@ import { AnthropicLLM } from "./providers/anthropic.js";
import { GoogleLLM } from "./providers/google.js"; import { GoogleLLM } from "./providers/google.js";
import { OpenAICompletionsLLM } from "./providers/openai-completions.js"; import { OpenAICompletionsLLM } from "./providers/openai-completions.js";
import { OpenAIResponsesLLM } from "./providers/openai-responses.js"; import { OpenAIResponsesLLM } from "./providers/openai-responses.js";
import type { Model } from "./types.js"; import type { Model, Usage } from "./types.js";
// Provider configuration with factory functions // Provider configuration with factory functions
export const PROVIDER_CONFIG = { export const PROVIDER_CONFIG = {
@ -102,5 +102,14 @@ export function getModel<P extends keyof typeof PROVIDERS>(
return models[modelId as string]; return models[modelId as string];
} }
export function calculateCost(model: Model, usage: Usage) {
usage.cost.input = (model.cost.input / 1000000) * usage.input;
usage.cost.output = (model.cost.output / 1000000) * usage.output;
usage.cost.cacheRead = (model.cost.cacheRead / 1000000) * usage.cacheRead;
usage.cost.cacheWrite = (model.cost.cacheWrite / 1000000) * usage.cacheWrite;
usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite;
return usage.cost;
}
// Re-export Model type for convenience // Re-export Model type for convenience
export type { Model }; export type { Model };

View file

@ -5,6 +5,7 @@ import type {
MessageParam, MessageParam,
Tool, Tool,
} from "@anthropic-ai/sdk/resources/messages.js"; } from "@anthropic-ai/sdk/resources/messages.js";
import { calculateCost } from "../models.js";
import type { import type {
AssistantMessage, AssistantMessage,
Context, Context,
@ -13,8 +14,8 @@ import type {
Message, Message,
Model, Model,
StopReason, StopReason,
TokenUsage,
ToolCall, ToolCall,
Usage,
} from "../types.js"; } from "../types.js";
export interface AnthropicLLMOptions extends LLMOptions { export interface AnthropicLLMOptions extends LLMOptions {
@ -186,13 +187,20 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
name: block.name, name: block.name,
arguments: block.input as Record<string, any>, arguments: block.input as Record<string, any>,
})); }));
const usage: TokenUsage = { const usage: Usage = {
input: msg.usage.input_tokens, input: msg.usage.input_tokens,
output: msg.usage.output_tokens, output: msg.usage.output_tokens,
cacheRead: msg.usage.cache_read_input_tokens || 0, cacheRead: msg.usage.cache_read_input_tokens || 0,
cacheWrite: msg.usage.cache_creation_input_tokens || 0, cacheWrite: msg.usage.cache_creation_input_tokens || 0,
// TODO add cost cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
},
}; };
calculateCost(this.modelInfo, usage);
return { return {
role: "assistant", role: "assistant",
@ -215,6 +223,7 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
output: 0, output: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
}, },
stopReason: "error", stopReason: "error",
error: error instanceof Error ? error.message : String(error), error: error instanceof Error ? error.message : String(error),

View file

@ -5,6 +5,7 @@ import {
type GenerateContentParameters, type GenerateContentParameters,
GoogleGenAI, GoogleGenAI,
} from "@google/genai"; } from "@google/genai";
import { calculateCost } from "../models.js";
import type { import type {
AssistantMessage, AssistantMessage,
Context, Context,
@ -13,9 +14,9 @@ import type {
Message, Message,
Model, Model,
StopReason, StopReason,
TokenUsage,
Tool, Tool,
ToolCall, ToolCall,
Usage,
} from "../types.js"; } from "../types.js";
export interface GoogleLLMOptions extends LLMOptions { export interface GoogleLLMOptions extends LLMOptions {
@ -97,11 +98,12 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
let thinking = ""; let thinking = "";
let thoughtSignature: string | undefined; let thoughtSignature: string | undefined;
const toolCalls: ToolCall[] = []; const toolCalls: ToolCall[] = [];
let usage: TokenUsage = { let usage: Usage = {
input: 0, input: 0,
output: 0, output: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
}; };
let stopReason: StopReason = "stop"; let stopReason: StopReason = "stop";
let inTextBlock = false; let inTextBlock = false;
@ -179,6 +181,13 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0), (chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0, cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
cacheWrite: 0, cacheWrite: 0,
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
},
}; };
} }
} }
@ -203,6 +212,9 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
thoughtSignature = Buffer.from(signature).toString("base64"); thoughtSignature = Buffer.from(signature).toString("base64");
} }
// Calculate cost
calculateCost(this.model, usage);
// Usage metadata is in the last chunk // Usage metadata is in the last chunk
// Already captured during streaming // Already captured during streaming
@ -227,6 +239,7 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
output: 0, output: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
}, },
stopReason: "error", stopReason: "error",
error: error instanceof Error ? error.message : String(error), error: error instanceof Error ? error.message : String(error),

View file

@ -1,5 +1,6 @@
import OpenAI from "openai"; import OpenAI from "openai";
import type { ChatCompletionChunk, ChatCompletionMessageParam } from "openai/resources/chat/completions.js"; import type { ChatCompletionChunk, ChatCompletionMessageParam } from "openai/resources/chat/completions.js";
import { calculateCost } from "../models.js";
import type { import type {
AssistantMessage, AssistantMessage,
Context, Context,
@ -8,9 +9,9 @@ import type {
Message, Message,
Model, Model,
StopReason, StopReason,
TokenUsage,
Tool, Tool,
ToolCall, ToolCall,
Usage,
} from "../types.js"; } from "../types.js";
export interface OpenAICompletionsLLMOptions extends LLMOptions { export interface OpenAICompletionsLLMOptions extends LLMOptions {
@ -87,11 +88,12 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
let reasoningContent = ""; let reasoningContent = "";
let reasoningField: "reasoning" | "reasoning_content" | null = null; let reasoningField: "reasoning" | "reasoning_content" | null = null;
const parsedToolCalls: { id: string; name: string; arguments: string }[] = []; const parsedToolCalls: { id: string; name: string; arguments: string }[] = [];
let usage: TokenUsage = { let usage: Usage = {
input: 0, input: 0,
output: 0, output: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
}; };
let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null; let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null;
let blockType: "text" | "thinking" | null = null; let blockType: "text" | "thinking" | null = null;
@ -104,6 +106,13 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
(chunk.usage.completion_tokens_details?.reasoning_tokens || 0), (chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0, cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
cacheWrite: 0, cacheWrite: 0,
cost: {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
total: 0,
},
}; };
} }
@ -206,6 +215,9 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
arguments: JSON.parse(tc.arguments), arguments: JSON.parse(tc.arguments),
})); }));
// Calculate cost
calculateCost(this.modelInfo, usage);
return { return {
role: "assistant", role: "assistant",
content: content || undefined, content: content || undefined,
@ -227,6 +239,7 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
output: 0, output: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
}, },
stopReason: "error", stopReason: "error",
error: error instanceof Error ? error.message : String(error), error: error instanceof Error ? error.message : String(error),

View file

@ -13,9 +13,9 @@ import type {
Message, Message,
Model, Model,
StopReason, StopReason,
TokenUsage,
Tool, Tool,
ToolCall, ToolCall,
Usage,
} from "../types.js"; } from "../types.js";
export interface OpenAIResponsesLLMOptions extends LLMOptions { export interface OpenAIResponsesLLMOptions extends LLMOptions {
@ -83,11 +83,12 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
let thinking = ""; let thinking = "";
const toolCalls: ToolCall[] = []; const toolCalls: ToolCall[] = [];
const reasoningItems: ResponseReasoningItem[] = []; const reasoningItems: ResponseReasoningItem[] = [];
let usage: TokenUsage = { let usage: Usage = {
input: 0, input: 0,
output: 0, output: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
}; };
let stopReason: StopReason = "stop"; let stopReason: StopReason = "stop";
@ -137,6 +138,7 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
output: response.usage.output_tokens || 0, output: response.usage.output_tokens || 0,
cacheRead: response.usage.input_tokens_details?.cached_tokens || 0, cacheRead: response.usage.input_tokens_details?.cached_tokens || 0,
cacheWrite: 0, cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
}; };
} }
@ -180,6 +182,7 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
output: 0, output: 0,
cacheRead: 0, cacheRead: 0,
cacheWrite: 0, cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
}, },
stopReason: "error", stopReason: "error",
error: error instanceof Error ? error.message : String(error), error: error instanceof Error ? error.message : String(error),

View file

@ -28,7 +28,7 @@ export interface AssistantMessage {
}[]; }[];
provider: string; provider: string;
model: string; model: string;
usage: TokenUsage; usage: Usage;
stopReason: StopReason; stopReason: StopReason;
error?: string | Error; error?: string | Error;
@ -60,7 +60,7 @@ export type Event =
| { type: "text"; content: string; delta: string } | { type: "text"; content: string; delta: string }
| { type: "thinking"; content: string; delta: string } | { type: "thinking"; content: string; delta: string }
| { type: "toolCall"; toolCall: ToolCall } | { type: "toolCall"; toolCall: ToolCall }
| { type: "usage"; usage: TokenUsage } | { type: "usage"; usage: Usage }
| { type: "done"; reason: StopReason; message: AssistantMessage } | { type: "done"; reason: StopReason; message: AssistantMessage }
| { type: "error"; error: Error }; | { type: "error"; error: Error };
@ -70,12 +70,12 @@ export interface ToolCall {
arguments: Record<string, any>; arguments: Record<string, any>;
} }
export interface TokenUsage { export interface Usage {
input: number; input: number;
output: number; output: number;
cacheRead: number; cacheRead: number;
cacheWrite: number; cacheWrite: number;
cost?: { cost: {
input: number; input: number;
output: number; output: number;
cacheRead: number; cacheRead: number;