fix: normalize OpenAI token counting, add branch source tracking

pi-ai:
- Fixed usage.input to exclude cached tokens for OpenAI providers
- Previously input included cached tokens, causing double-counting
- Now input + output + cacheRead + cacheWrite correctly gives total context

coding-agent:
- Session header now includes branchedFrom field for branched sessions
- Updated compaction.md with refined implementation plan
- Updated session.md with branchedFrom documentation
This commit is contained in:
Mario Zechner 2025-12-03 17:11:22 +01:00
parent 6cd8bcdeb3
commit 989af79752
7 changed files with 65 additions and 25 deletions

View file

@ -105,12 +105,14 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
for await (const chunk of openaiStream) {
if (chunk.usage) {
const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
output.usage = {
input: chunk.usage.prompt_tokens || 0,
// OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
input: (chunk.usage.prompt_tokens || 0) - cachedTokens,
output:
(chunk.usage.completion_tokens || 0) +
(chunk.usage.completion_tokens_details?.reasoning_tokens || 0),
cacheRead: chunk.usage.prompt_tokens_details?.cached_tokens || 0,
cacheRead: cachedTokens,
cacheWrite: 0,
cost: {
input: 0,

View file

@ -253,10 +253,12 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
else if (event.type === "response.completed") {
const response = event.response;
if (response?.usage) {
const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
output.usage = {
input: response.usage.input_tokens || 0,
// OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
input: (response.usage.input_tokens || 0) - cachedTokens,
output: response.usage.output_tokens || 0,
cacheRead: response.usage.input_tokens_details?.cached_tokens || 0,
cacheRead: cachedTokens,
cacheWrite: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};