diff --git a/packages/ai/src/providers/google-gemini-cli.ts b/packages/ai/src/providers/google-gemini-cli.ts index c95c7673..59ee332e 100644 --- a/packages/ai/src/providers/google-gemini-cli.ts +++ b/packages/ai/src/providers/google-gemini-cli.ts @@ -335,12 +335,15 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = ( } if (responseData.usageMetadata) { + // promptTokenCount includes cachedContentTokenCount, so subtract to get fresh input + const promptTokens = responseData.usageMetadata.promptTokenCount || 0; + const cacheReadTokens = responseData.usageMetadata.cachedContentTokenCount || 0; output.usage = { - input: responseData.usageMetadata.promptTokenCount || 0, + input: promptTokens - cacheReadTokens, output: (responseData.usageMetadata.candidatesTokenCount || 0) + (responseData.usageMetadata.thoughtsTokenCount || 0), - cacheRead: responseData.usageMetadata.cachedContentTokenCount || 0, + cacheRead: cacheReadTokens, cacheWrite: 0, totalTokens: responseData.usageMetadata.totalTokenCount || 0, cost: {