mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-21 15:01:26 +00:00
feat(ai): Add cost tracking to LLM implementations
- Track input/output token costs for all providers - Calculate costs based on Model pricing information - Include cost information in AssistantMessage responses - Add Usage interface with detailed cost breakdown - Implement calculateCost utility function for cost calculations
This commit is contained in:
parent
f9d688d577
commit
550da5e47c
6 changed files with 61 additions and 14 deletions
|
|
@ -3,7 +3,7 @@ import { AnthropicLLM } from "./providers/anthropic.js";
|
||||||
import { GoogleLLM } from "./providers/google.js";
|
import { GoogleLLM } from "./providers/google.js";
|
||||||
import { OpenAICompletionsLLM } from "./providers/openai-completions.js";
|
import { OpenAICompletionsLLM } from "./providers/openai-completions.js";
|
||||||
import { OpenAIResponsesLLM } from "./providers/openai-responses.js";
|
import { OpenAIResponsesLLM } from "./providers/openai-responses.js";
|
||||||
import type { Model } from "./types.js";
|
import type { Model, Usage } from "./types.js";
|
||||||
|
|
||||||
// Provider configuration with factory functions
|
// Provider configuration with factory functions
|
||||||
export const PROVIDER_CONFIG = {
|
export const PROVIDER_CONFIG = {
|
||||||
|
|
@ -102,5 +102,14 @@ export function getModel<P extends keyof typeof PROVIDERS>(
|
||||||
return models[modelId as string];
|
return models[modelId as string];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function calculateCost(model: Model, usage: Usage) {
|
||||||
|
usage.cost.input = (model.cost.input / 1000000) * usage.input;
|
||||||
|
usage.cost.output = (model.cost.output / 1000000) * usage.output;
|
||||||
|
usage.cost.cacheRead = (model.cost.cacheRead / 1000000) * usage.cacheRead;
|
||||||
|
usage.cost.cacheWrite = (model.cost.cacheWrite / 1000000) * usage.cacheWrite;
|
||||||
|
usage.cost.total = usage.cost.input + usage.cost.output + usage.cost.cacheRead + usage.cost.cacheWrite;
|
||||||
|
return usage.cost;
|
||||||
|
}
|
||||||
|
|
||||||
// Re-export Model type for convenience
|
// Re-export Model type for convenience
|
||||||
export type { Model };
|
export type { Model };
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import type {
|
||||||
MessageParam,
|
MessageParam,
|
||||||
Tool,
|
Tool,
|
||||||
} from "@anthropic-ai/sdk/resources/messages.js";
|
} from "@anthropic-ai/sdk/resources/messages.js";
|
||||||
|
import { calculateCost } from "../models.js";
|
||||||
import type {
|
import type {
|
||||||
AssistantMessage,
|
AssistantMessage,
|
||||||
Context,
|
Context,
|
||||||
|
|
@ -13,8 +14,8 @@ import type {
|
||||||
Message,
|
Message,
|
||||||
Model,
|
Model,
|
||||||
StopReason,
|
StopReason,
|
||||||
TokenUsage,
|
|
||||||
ToolCall,
|
ToolCall,
|
||||||
|
Usage,
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
|
|
||||||
export interface AnthropicLLMOptions extends LLMOptions {
|
export interface AnthropicLLMOptions extends LLMOptions {
|
||||||
|
|
@ -186,13 +187,20 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
||||||
name: block.name,
|
name: block.name,
|
||||||
arguments: block.input as Record<string, any>,
|
arguments: block.input as Record<string, any>,
|
||||||
}));
|
}));
|
||||||
const usage: TokenUsage = {
|
const usage: Usage = {
|
||||||
input: msg.usage.input_tokens,
|
input: msg.usage.input_tokens,
|
||||||
output: msg.usage.output_tokens,
|
output: msg.usage.output_tokens,
|
||||||
cacheRead: msg.usage.cache_read_input_tokens || 0,
|
cacheRead: msg.usage.cache_read_input_tokens || 0,
|
||||||
cacheWrite: msg.usage.cache_creation_input_tokens || 0,
|
cacheWrite: msg.usage.cache_creation_input_tokens || 0,
|
||||||
// TODO add cost
|
cost: {
|
||||||
|
input: 0,
|
||||||
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
total: 0,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
calculateCost(this.modelInfo, usage);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
|
|
@ -215,6 +223,7 @@ export class AnthropicLLM implements LLM<AnthropicLLMOptions> {
|
||||||
output: 0,
|
output: 0,
|
||||||
cacheRead: 0,
|
cacheRead: 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
},
|
},
|
||||||
stopReason: "error",
|
stopReason: "error",
|
||||||
error: error instanceof Error ? error.message : String(error),
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import {
|
||||||
type GenerateContentParameters,
|
type GenerateContentParameters,
|
||||||
GoogleGenAI,
|
GoogleGenAI,
|
||||||
} from "@google/genai";
|
} from "@google/genai";
|
||||||
|
import { calculateCost } from "../models.js";
|
||||||
import type {
|
import type {
|
||||||
AssistantMessage,
|
AssistantMessage,
|
||||||
Context,
|
Context,
|
||||||
|
|
@ -13,9 +14,9 @@ import type {
|
||||||
Message,
|
Message,
|
||||||
Model,
|
Model,
|
||||||
StopReason,
|
StopReason,
|
||||||
TokenUsage,
|
|
||||||
Tool,
|
Tool,
|
||||||
ToolCall,
|
ToolCall,
|
||||||
|
Usage,
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
|
|
||||||
export interface GoogleLLMOptions extends LLMOptions {
|
export interface GoogleLLMOptions extends LLMOptions {
|
||||||
|
|
@ -97,11 +98,12 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
||||||
let thinking = "";
|
let thinking = "";
|
||||||
let thoughtSignature: string | undefined;
|
let thoughtSignature: string | undefined;
|
||||||
const toolCalls: ToolCall[] = [];
|
const toolCalls: ToolCall[] = [];
|
||||||
let usage: TokenUsage = {
|
let usage: Usage = {
|
||||||
input: 0,
|
input: 0,
|
||||||
output: 0,
|
output: 0,
|
||||||
cacheRead: 0,
|
cacheRead: 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
};
|
};
|
||||||
let stopReason: StopReason = "stop";
|
let stopReason: StopReason = "stop";
|
||||||
let inTextBlock = false;
|
let inTextBlock = false;
|
||||||
|
|
@ -179,6 +181,13 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
||||||
(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
|
(chunk.usageMetadata.candidatesTokenCount || 0) + (chunk.usageMetadata.thoughtsTokenCount || 0),
|
||||||
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
|
cacheRead: chunk.usageMetadata.cachedContentTokenCount || 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: {
|
||||||
|
input: 0,
|
||||||
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
total: 0,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -203,6 +212,9 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
||||||
thoughtSignature = Buffer.from(signature).toString("base64");
|
thoughtSignature = Buffer.from(signature).toString("base64");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate cost
|
||||||
|
calculateCost(this.model, usage);
|
||||||
|
|
||||||
// Usage metadata is in the last chunk
|
// Usage metadata is in the last chunk
|
||||||
// Already captured during streaming
|
// Already captured during streaming
|
||||||
|
|
||||||
|
|
@ -227,6 +239,7 @@ export class GoogleLLM implements LLM<GoogleLLMOptions> {
|
||||||
output: 0,
|
output: 0,
|
||||||
cacheRead: 0,
|
cacheRead: 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
},
|
},
|
||||||
stopReason: "error",
|
stopReason: "error",
|
||||||
error: error instanceof Error ? error.message : String(error),
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import OpenAI from "openai";
|
import OpenAI from "openai";
|
||||||
import type { ChatCompletionChunk, ChatCompletionMessageParam } from "openai/resources/chat/completions.js";
|
import type { ChatCompletionChunk, ChatCompletionMessageParam } from "openai/resources/chat/completions.js";
|
||||||
|
import { calculateCost } from "../models.js";
|
||||||
import type {
|
import type {
|
||||||
AssistantMessage,
|
AssistantMessage,
|
||||||
Context,
|
Context,
|
||||||
|
|
@ -8,9 +9,9 @@ import type {
|
||||||
Message,
|
Message,
|
||||||
Model,
|
Model,
|
||||||
StopReason,
|
StopReason,
|
||||||
TokenUsage,
|
|
||||||
Tool,
|
Tool,
|
||||||
ToolCall,
|
ToolCall,
|
||||||
|
Usage,
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
|
|
||||||
export interface OpenAICompletionsLLMOptions extends LLMOptions {
|
export interface OpenAICompletionsLLMOptions extends LLMOptions {
|
||||||
|
|
@ -87,11 +88,12 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
||||||
let reasoningContent = "";
|
let reasoningContent = "";
|
||||||
let reasoningField: "reasoning" | "reasoning_content" | null = null;
|
let reasoningField: "reasoning" | "reasoning_content" | null = null;
|
||||||
const parsedToolCalls: { id: string; name: string; arguments: string }[] = [];
|
const parsedToolCalls: { id: string; name: string; arguments: string }[] = [];
|
||||||
let usage: TokenUsage = {
|
let usage: Usage = {
|
||||||
input: 0,
|
input: 0,
|
||||||
output: 0,
|
output: 0,
|
||||||
cacheRead: 0,
|
cacheRead: 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
};
|
};
|
||||||
let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null;
|
let finishReason: ChatCompletionChunk.Choice["finish_reason"] | null = null;
|
||||||
let blockType: "text" | "thinking" | null = null;
|
let blockType: "text" | "thinking" | null = null;
|
||||||
|
|
@ -104,6 +106,13 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
||||||
(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
|
(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
|
||||||
cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
|
cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: {
|
||||||
|
input: 0,
|
||||||
|
output: 0,
|
||||||
|
cacheRead: 0,
|
||||||
|
cacheWrite: 0,
|
||||||
|
total: 0,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -206,6 +215,9 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
||||||
arguments: JSON.parse(tc.arguments),
|
arguments: JSON.parse(tc.arguments),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
|
// Calculate cost
|
||||||
|
calculateCost(this.modelInfo, usage);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
role: "assistant",
|
role: "assistant",
|
||||||
content: content || undefined,
|
content: content || undefined,
|
||||||
|
|
@ -227,6 +239,7 @@ export class OpenAICompletionsLLM implements LLM<OpenAICompletionsLLMOptions> {
|
||||||
output: 0,
|
output: 0,
|
||||||
cacheRead: 0,
|
cacheRead: 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
},
|
},
|
||||||
stopReason: "error",
|
stopReason: "error",
|
||||||
error: error instanceof Error ? error.message : String(error),
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,9 @@ import type {
|
||||||
Message,
|
Message,
|
||||||
Model,
|
Model,
|
||||||
StopReason,
|
StopReason,
|
||||||
TokenUsage,
|
|
||||||
Tool,
|
Tool,
|
||||||
ToolCall,
|
ToolCall,
|
||||||
|
Usage,
|
||||||
} from "../types.js";
|
} from "../types.js";
|
||||||
|
|
||||||
export interface OpenAIResponsesLLMOptions extends LLMOptions {
|
export interface OpenAIResponsesLLMOptions extends LLMOptions {
|
||||||
|
|
@ -83,11 +83,12 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
||||||
let thinking = "";
|
let thinking = "";
|
||||||
const toolCalls: ToolCall[] = [];
|
const toolCalls: ToolCall[] = [];
|
||||||
const reasoningItems: ResponseReasoningItem[] = [];
|
const reasoningItems: ResponseReasoningItem[] = [];
|
||||||
let usage: TokenUsage = {
|
let usage: Usage = {
|
||||||
input: 0,
|
input: 0,
|
||||||
output: 0,
|
output: 0,
|
||||||
cacheRead: 0,
|
cacheRead: 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
};
|
};
|
||||||
let stopReason: StopReason = "stop";
|
let stopReason: StopReason = "stop";
|
||||||
|
|
||||||
|
|
@ -137,6 +138,7 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
||||||
output: response.usage.output_tokens || 0,
|
output: response.usage.output_tokens || 0,
|
||||||
cacheRead: response.usage.input_tokens_details?.cached_tokens || 0,
|
cacheRead: response.usage.input_tokens_details?.cached_tokens || 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -180,6 +182,7 @@ export class OpenAIResponsesLLM implements LLM<OpenAIResponsesLLMOptions> {
|
||||||
output: 0,
|
output: 0,
|
||||||
cacheRead: 0,
|
cacheRead: 0,
|
||||||
cacheWrite: 0,
|
cacheWrite: 0,
|
||||||
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||||
},
|
},
|
||||||
stopReason: "error",
|
stopReason: "error",
|
||||||
error: error instanceof Error ? error.message : String(error),
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ export interface AssistantMessage {
|
||||||
}[];
|
}[];
|
||||||
provider: string;
|
provider: string;
|
||||||
model: string;
|
model: string;
|
||||||
usage: TokenUsage;
|
usage: Usage;
|
||||||
|
|
||||||
stopReason: StopReason;
|
stopReason: StopReason;
|
||||||
error?: string | Error;
|
error?: string | Error;
|
||||||
|
|
@ -60,7 +60,7 @@ export type Event =
|
||||||
| { type: "text"; content: string; delta: string }
|
| { type: "text"; content: string; delta: string }
|
||||||
| { type: "thinking"; content: string; delta: string }
|
| { type: "thinking"; content: string; delta: string }
|
||||||
| { type: "toolCall"; toolCall: ToolCall }
|
| { type: "toolCall"; toolCall: ToolCall }
|
||||||
| { type: "usage"; usage: TokenUsage }
|
| { type: "usage"; usage: Usage }
|
||||||
| { type: "done"; reason: StopReason; message: AssistantMessage }
|
| { type: "done"; reason: StopReason; message: AssistantMessage }
|
||||||
| { type: "error"; error: Error };
|
| { type: "error"; error: Error };
|
||||||
|
|
||||||
|
|
@ -70,12 +70,12 @@ export interface ToolCall {
|
||||||
arguments: Record<string, any>;
|
arguments: Record<string, any>;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface TokenUsage {
|
export interface Usage {
|
||||||
input: number;
|
input: number;
|
||||||
output: number;
|
output: number;
|
||||||
cacheRead: number;
|
cacheRead: number;
|
||||||
cacheWrite: number;
|
cacheWrite: number;
|
||||||
cost?: {
|
cost: {
|
||||||
input: number;
|
input: number;
|
||||||
output: number;
|
output: number;
|
||||||
cacheRead: number;
|
cacheRead: number;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue