From 7b2c62707958ee3aad64615ac26242761ecb8669 Mon Sep 17 00:00:00 2001 From: nathyong Date: Tue, 13 Jan 2026 09:29:33 +1100 Subject: [PATCH] Insert cache point on openrouter+anthropic completions (#584) Co-authored-by: nathyong --- packages/ai/CHANGELOG.md | 1 + .../ai/src/providers/openai-completions.ts | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/packages/ai/CHANGELOG.md b/packages/ai/CHANGELOG.md index 2e827d25..e2b4b33d 100644 --- a/packages/ai/CHANGELOG.md +++ b/packages/ai/CHANGELOG.md @@ -5,6 +5,7 @@ ### Added - Added `serviceTier` option for OpenAI Responses requests ([#672](https://github.com/badlogic/pi-mono/pull/672) by [@markusylisiurunen](https://github.com/markusylisiurunen)) +- **Anthropic caching on OpenRouter**: Interactions with Anthropic models via OpenRouter now set a 5-minute cache point using Anthropic-style `cache_control` breakpoints on the last assistant or user message. ([#584](https://github.com/badlogic/pi-mono/pull/584) by [@nathyong](https://github.com/nathyong)) ## [0.44.0] - 2026-01-12 diff --git a/packages/ai/src/providers/openai-completions.ts b/packages/ai/src/providers/openai-completions.ts index 37c2dd13..0ff4f1cd 100644 --- a/packages/ai/src/providers/openai-completions.ts +++ b/packages/ai/src/providers/openai-completions.ts @@ -365,6 +365,7 @@ function createClient(model: Model<"openai-completions">, context: Context, apiK function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) { const compat = getCompat(model); const messages = convertMessages(model, context, compat); + maybeAddOpenRouterAnthropicCacheControl(model, messages); const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: model.id, @@ -410,6 +411,39 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio return params; } +function maybeAddOpenRouterAnthropicCacheControl( + model: Model<"openai-completions">, + messages: ChatCompletionMessageParam[], +): void { + if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return; + + // Anthropic-style caching requires cache_control on a text part. Add a breakpoint + // on the last user/assistant message (walking backwards until we find text content). + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if (msg.role !== "user" && msg.role !== "assistant") continue; + + const content = msg.content; + if (typeof content === "string") { + msg.content = [ + Object.assign({ type: "text" as const, text: content }, { cache_control: { type: "ephemeral" } }), + ]; + return; + } + + if (!Array.isArray(content)) continue; + + // Find last text part and add cache_control + for (let j = content.length - 1; j >= 0; j--) { + const part = content[j]; + if (part?.type === "text") { + Object.assign(part, { cache_control: { type: "ephemeral" } }); + return; + } + } + } +} + function convertMessages( model: Model<"openai-completions">, context: Context,