feat(ai): add PI_CACHE_RETENTION env var for extended prompt caching

Adds support for extended cache retention via PI_CACHE_RETENTION=long:
- Anthropic: 5m -> 1h TTL
- OpenAI: in-memory -> 24h retention

Only applies to direct API calls (api.anthropic.com, api.openai.com).
Proxies and other providers are unaffected.

fixes #967
This commit is contained in:
Mario Zechner 2026-01-29 02:22:06 +01:00
parent 605f6f494b
commit 1b6a147579
6 changed files with 256 additions and 1 deletions

View file

@ -30,6 +30,22 @@ import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
import { transformMessages } from "./transform-messages.js";
/**
* Get cache TTL based on PI_CACHE_RETENTION env var.
* Only applies to direct Anthropic API calls (api.anthropic.com).
* Returns '1h' for long retention, undefined for default (5m).
*/
function getCacheTtl(baseUrl: string): "1h" | undefined {
if (
typeof process !== "undefined" &&
process.env.PI_CACHE_RETENTION === "long" &&
baseUrl.includes("api.anthropic.com")
) {
return "1h";
}
return undefined;
}
// Stealth mode: Mimic Claude Code's tool naming exactly
const claudeCodeVersion = "2.1.2";
@ -452,6 +468,7 @@ function buildParams(
};
// For OAuth tokens, we MUST include Claude Code identity
const cacheTtl = getCacheTtl(model.baseUrl);
if (isOAuthToken) {
params.system = [
{
@ -459,6 +476,7 @@ function buildParams(
text: "You are Claude Code, Anthropic's official CLI for Claude.",
cache_control: {
type: "ephemeral",
...(cacheTtl && { ttl: cacheTtl }),
},
},
];
@ -468,6 +486,7 @@ function buildParams(
text: sanitizeSurrogates(context.systemPrompt),
cache_control: {
type: "ephemeral",
...(cacheTtl && { ttl: cacheTtl }),
},
});
}
@ -479,6 +498,7 @@ function buildParams(
text: sanitizeSurrogates(context.systemPrompt),
cache_control: {
type: "ephemeral",
...(cacheTtl && { ttl: cacheTtl }),
},
},
];
@ -655,7 +675,8 @@ function convertMessages(
lastBlock &&
(lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")
) {
(lastBlock as any).cache_control = { type: "ephemeral" };
const cacheTtl = getCacheTtl(model.baseUrl);
(lastBlock as any).cache_control = { type: "ephemeral", ...(cacheTtl && { ttl: cacheTtl }) };
}
}
}

View file

@ -18,6 +18,22 @@ import { buildBaseOptions, clampReasoning } from "./simple-options.js";
const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]);
/**
* Get prompt cache retention based on PI_CACHE_RETENTION env var.
* Only applies to direct OpenAI API calls (api.openai.com).
* Returns '24h' for long retention, undefined for default (in-memory).
*/
function getPromptCacheRetention(baseUrl: string): "24h" | undefined {
if (
typeof process !== "undefined" &&
process.env.PI_CACHE_RETENTION === "long" &&
baseUrl.includes("api.openai.com")
) {
return "24h";
}
return undefined;
}
// OpenAI Responses-specific options
export interface OpenAIResponsesOptions extends StreamOptions {
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
@ -175,6 +191,7 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
input: messages,
stream: true,
prompt_cache_key: options?.sessionId,
prompt_cache_retention: getPromptCacheRetention(model.baseUrl),
};
if (options?.maxTokens) {