mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-16 20:01:24 +00:00
fix: normalize OpenAI token counting, add branch source tracking
pi-ai: - Fixed usage.input to exclude cached tokens for OpenAI providers - Previously input included cached tokens, causing double-counting - Now input + output + cacheRead + cacheWrite correctly gives total context coding-agent: - Session header now includes branchedFrom field for branched sessions - Updated compaction.md with refined implementation plan - Updated session.md with branchedFrom documentation
This commit is contained in:
parent
6cd8bcdeb3
commit
989af79752
7 changed files with 65 additions and 25 deletions
|
|
@ -105,12 +105,14 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|||
|
||||
for await (const chunk of openaiStream) {
|
||||
if (chunk.usage) {
|
||||
const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
|
||||
output.usage = {
|
||||
input: chunk.usage.prompt_tokens || 0,
|
||||
// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
|
||||
input: (chunk.usage.prompt_tokens || 0) - cachedTokens,
|
||||
output:
|
||||
(chunk.usage.completion_tokens || 0) +
|
||||
(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
|
||||
cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
|
||||
cacheRead: cachedTokens,
|
||||
cacheWrite: 0,
|
||||
cost: {
|
||||
input: 0,
|
||||
|
|
|
|||
|
|
@ -253,10 +253,12 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|||
else if (event.type === "response.completed") {
|
||||
const response = event.response;
|
||||
if (response?.usage) {
|
||||
const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
|
||||
output.usage = {
|
||||
input: response.usage.input_tokens || 0,
|
||||
// OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
|
||||
input: (response.usage.input_tokens || 0) - cachedTokens,
|
||||
output: response.usage.output_tokens || 0,
|
||||
cacheRead: response.usage.input_tokens_details?.cached_tokens || 0,
|
||||
cacheRead: cachedTokens,
|
||||
cacheWrite: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue