feat(ai): add PI_CACHE_RETENTION env var for extended prompt caching

Adds support for extended cache retention via PI_CACHE_RETENTION=long: - Anthropic: 5m -> 1h TTL - OpenAI: in-memory -> 24h retention Only applies to direct API calls (api.anthropic.com, api.openai.com). Proxies and other providers are unaffected. fixes #967
2026-04-17 07:03:25 +00:00 · 2026-01-29 02:22:06 +01:00 · 2026-01-29 02:22:06 +01:00 · 1b6a147579
commit 1b6a147579
parent 605f6f494b
6 changed files with 256 additions and 1 deletions
--- a/packages/ai/src/providers/anthropic.ts
+++ b/packages/ai/src/providers/anthropic.ts
@ -30,6 +30,22 @@ import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
 import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
 import { transformMessages } from "./transform-messages.js";

+/**
+ * Get cache TTL based on PI_CACHE_RETENTION env var.
+ * Only applies to direct Anthropic API calls (api.anthropic.com).
+ * Returns '1h' for long retention, undefined for default (5m).
+ */
+function getCacheTtl(baseUrl: string): "1h" | undefined {
+	if (
+		typeof process !== "undefined" &&
+		process.env.PI_CACHE_RETENTION === "long" &&
+		baseUrl.includes("api.anthropic.com")
+	) {
+		return "1h";
+	}
+	return undefined;
+}
+
 // Stealth mode: Mimic Claude Code's tool naming exactly
 const claudeCodeVersion = "2.1.2";

@ -452,6 +468,7 @@ function buildParams(
 	};

 	// For OAuth tokens, we MUST include Claude Code identity
+	const cacheTtl = getCacheTtl(model.baseUrl);
 	if (isOAuthToken) {
 		params.system = [
 			{
@ -459,6 +476,7 @@ function buildParams(
 				text: "You are Claude Code, Anthropic's official CLI for Claude.",
 				cache_control: {
 					type: "ephemeral",
+					...(cacheTtl && { ttl: cacheTtl }),
 				},
 			},
 		];
@ -468,6 +486,7 @@ function buildParams(
 				text: sanitizeSurrogates(context.systemPrompt),
 				cache_control: {
 					type: "ephemeral",
+					...(cacheTtl && { ttl: cacheTtl }),
 				},
 			});
 		}
@ -479,6 +498,7 @@ function buildParams(
 				text: sanitizeSurrogates(context.systemPrompt),
 				cache_control: {
 					type: "ephemeral",
+					...(cacheTtl && { ttl: cacheTtl }),
 				},
 			},
 		];
@ -655,7 +675,8 @@ function convertMessages(
 					lastBlock &&
 					(lastBlock.type === "text" || lastBlock.type === "image" || lastBlock.type === "tool_result")
 				) {
-					(lastBlock as any).cache_control = { type: "ephemeral" };
+					const cacheTtl = getCacheTtl(model.baseUrl);
+					(lastBlock as any).cache_control = { type: "ephemeral", ...(cacheTtl && { ttl: cacheTtl }) };
 				}
 			}
 		}
--- a/packages/ai/src/providers/openai-responses.ts
+++ b/packages/ai/src/providers/openai-responses.ts
@ -18,6 +18,22 @@ import { buildBaseOptions, clampReasoning } from "./simple-options.js";

 const OPENAI_TOOL_CALL_PROVIDERS = new Set(["openai", "openai-codex", "opencode"]);

+/**
+ * Get prompt cache retention based on PI_CACHE_RETENTION env var.
+ * Only applies to direct OpenAI API calls (api.openai.com).
+ * Returns '24h' for long retention, undefined for default (in-memory).
+ */
+function getPromptCacheRetention(baseUrl: string): "24h" | undefined {
+	if (
+		typeof process !== "undefined" &&
+		process.env.PI_CACHE_RETENTION === "long" &&
+		baseUrl.includes("api.openai.com")
+	) {
+		return "24h";
+	}
+	return undefined;
+}
+
 // OpenAI Responses-specific options
 export interface OpenAIResponsesOptions extends StreamOptions {
 	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
@ -175,6 +191,7 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
 		input: messages,
 		stream: true,
 		prompt_cache_key: options?.sessionId,
+		prompt_cache_retention: getPromptCacheRetention(model.baseUrl),
 	};

 	if (options?.maxTokens) {