mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-22 02:03:42 +00:00
Insert cache point on openrouter+anthropic completions (#584)
Co-authored-by: nathyong <nathyong@noreply.github.com>
This commit is contained in:
parent
8cb9a42067
commit
7b2c627079
2 changed files with 35 additions and 0 deletions
|
|
@ -5,6 +5,7 @@
|
||||||
### Added
|
### Added
|
||||||
|
|
||||||
- Added `serviceTier` option for OpenAI Responses requests ([#672](https://github.com/badlogic/pi-mono/pull/672) by [@markusylisiurunen](https://github.com/markusylisiurunen))
|
- Added `serviceTier` option for OpenAI Responses requests ([#672](https://github.com/badlogic/pi-mono/pull/672) by [@markusylisiurunen](https://github.com/markusylisiurunen))
|
||||||
|
- **Anthropic caching on OpenRouter**: Interactions with Anthropic models via OpenRouter now set a 5-minute cache point using Anthropic-style `cache_control` breakpoints on the last assistant or user message. ([#584](https://github.com/badlogic/pi-mono/pull/584) by [@nathyong](https://github.com/nathyong))
|
||||||
|
|
||||||
## [0.44.0] - 2026-01-12
|
## [0.44.0] - 2026-01-12
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -365,6 +365,7 @@ function createClient(model: Model<"openai-completions">, context: Context, apiK
|
||||||
function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
|
function buildParams(model: Model<"openai-completions">, context: Context, options?: OpenAICompletionsOptions) {
|
||||||
const compat = getCompat(model);
|
const compat = getCompat(model);
|
||||||
const messages = convertMessages(model, context, compat);
|
const messages = convertMessages(model, context, compat);
|
||||||
|
maybeAddOpenRouterAnthropicCacheControl(model, messages);
|
||||||
|
|
||||||
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
|
const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
|
||||||
model: model.id,
|
model: model.id,
|
||||||
|
|
@ -410,6 +411,39 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function maybeAddOpenRouterAnthropicCacheControl(
|
||||||
|
model: Model<"openai-completions">,
|
||||||
|
messages: ChatCompletionMessageParam[],
|
||||||
|
): void {
|
||||||
|
if (model.provider !== "openrouter" || !model.id.startsWith("anthropic/")) return;
|
||||||
|
|
||||||
|
// Anthropic-style caching requires cache_control on a text part. Add a breakpoint
|
||||||
|
// on the last user/assistant message (walking backwards until we find text content).
|
||||||
|
for (let i = messages.length - 1; i >= 0; i--) {
|
||||||
|
const msg = messages[i];
|
||||||
|
if (msg.role !== "user" && msg.role !== "assistant") continue;
|
||||||
|
|
||||||
|
const content = msg.content;
|
||||||
|
if (typeof content === "string") {
|
||||||
|
msg.content = [
|
||||||
|
Object.assign({ type: "text" as const, text: content }, { cache_control: { type: "ephemeral" } }),
|
||||||
|
];
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(content)) continue;
|
||||||
|
|
||||||
|
// Find last text part and add cache_control
|
||||||
|
for (let j = content.length - 1; j >= 0; j--) {
|
||||||
|
const part = content[j];
|
||||||
|
if (part?.type === "text") {
|
||||||
|
Object.assign(part, { cache_control: { type: "ephemeral" } });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function convertMessages(
|
function convertMessages(
|
||||||
model: Model<"openai-completions">,
|
model: Model<"openai-completions">,
|
||||||
context: Context,
|
context: Context,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue