mirror of
https://github.com/getcompanion-ai/co-mono.git
synced 2026-04-15 09:01:14 +00:00
fix(ai): handle bedrock opus 4.6 adaptive thinking and interleaved beta
This commit is contained in:
parent
d1fce2ba1d
commit
d3d3ef4155
2 changed files with 99 additions and 17 deletions
|
|
@ -200,6 +200,14 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
|
|||
}
|
||||
|
||||
if (model.id.includes("anthropic.claude") || model.id.includes("anthropic/claude")) {
|
||||
if (supportsAdaptiveThinking(model.id)) {
|
||||
return streamBedrock(model, context, {
|
||||
...base,
|
||||
reasoning: options.reasoning,
|
||||
thinkingBudgets: options.thinkingBudgets,
|
||||
} satisfies BedrockOptions);
|
||||
}
|
||||
|
||||
const adjusted = adjustMaxTokensForThinking(
|
||||
base.maxTokens || 0,
|
||||
model.maxTokens,
|
||||
|
|
@ -347,6 +355,29 @@ function handleContentBlockStop(
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the model supports adaptive thinking (Opus 4.6+).
|
||||
*/
|
||||
function supportsAdaptiveThinking(modelId: string): boolean {
|
||||
return modelId.includes("opus-4-6") || modelId.includes("opus-4.6");
|
||||
}
|
||||
|
||||
function mapThinkingLevelToEffort(level: SimpleStreamOptions["reasoning"]): "low" | "medium" | "high" | "max" {
|
||||
switch (level) {
|
||||
case "minimal":
|
||||
case "low":
|
||||
return "low";
|
||||
case "medium":
|
||||
return "medium";
|
||||
case "high":
|
||||
return "high";
|
||||
case "xhigh":
|
||||
return "max";
|
||||
default:
|
||||
return "high";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the model supports prompt caching.
|
||||
* Supported: Claude 3.5 Haiku, Claude 3.7 Sonnet, Claude 4.x models
|
||||
|
|
@ -589,26 +620,33 @@ function buildAdditionalModelRequestFields(
|
|||
}
|
||||
|
||||
if (model.id.includes("anthropic.claude")) {
|
||||
const defaultBudgets: Record<ThinkingLevel, number> = {
|
||||
minimal: 1024,
|
||||
low: 2048,
|
||||
medium: 8192,
|
||||
high: 16384,
|
||||
xhigh: 16384, // Claude doesn't support xhigh, clamp to high
|
||||
};
|
||||
const result: Record<string, any> = supportsAdaptiveThinking(model.id)
|
||||
? {
|
||||
thinking: { type: "adaptive" },
|
||||
output_config: { effort: mapThinkingLevelToEffort(options.reasoning) },
|
||||
}
|
||||
: (() => {
|
||||
const defaultBudgets: Record<ThinkingLevel, number> = {
|
||||
minimal: 1024,
|
||||
low: 2048,
|
||||
medium: 8192,
|
||||
high: 16384,
|
||||
xhigh: 16384, // Claude doesn't support xhigh, clamp to high
|
||||
};
|
||||
|
||||
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
|
||||
const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
|
||||
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];
|
||||
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
|
||||
const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
|
||||
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];
|
||||
|
||||
const result: Record<string, any> = {
|
||||
thinking: {
|
||||
type: "enabled",
|
||||
budget_tokens: budget,
|
||||
},
|
||||
};
|
||||
return {
|
||||
thinking: {
|
||||
type: "enabled",
|
||||
budget_tokens: budget,
|
||||
},
|
||||
};
|
||||
})();
|
||||
|
||||
if (options.interleavedThinking) {
|
||||
if (options.interleavedThinking && !supportsAdaptiveThinking(model.id)) {
|
||||
result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1190,6 +1190,50 @@ describe("Generate E2E Tests", () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider (claude-opus-4-6 interleaved thinking)", () => {
|
||||
const llm = getModel("amazon-bedrock", "global.anthropic.claude-opus-4-6-v1");
|
||||
|
||||
it("should use adaptive thinking without anthropic_beta", { retry: 3 }, async () => {
|
||||
let capturedPayload: unknown;
|
||||
const response = await complete(
|
||||
llm,
|
||||
{
|
||||
systemPrompt: "You are a helpful assistant that uses tools when asked.",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: "Think first, then calculate 15 + 27 using the calculator tool.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
tools: [calculatorTool],
|
||||
},
|
||||
{
|
||||
reasoning: "xhigh",
|
||||
interleavedThinking: true,
|
||||
onPayload: (payload) => {
|
||||
capturedPayload = payload;
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
|
||||
expect(capturedPayload).toBeTruthy();
|
||||
|
||||
const payload = capturedPayload as {
|
||||
additionalModelRequestFields?: {
|
||||
thinking?: { type?: string };
|
||||
output_config?: { effort?: string };
|
||||
anthropic_beta?: string[];
|
||||
};
|
||||
};
|
||||
|
||||
expect(payload.additionalModelRequestFields?.thinking).toEqual({ type: "adaptive" });
|
||||
expect(payload.additionalModelRequestFields?.output_config).toEqual({ effort: "max" });
|
||||
expect(payload.additionalModelRequestFields?.anthropic_beta).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// Check if ollama is installed and local LLM tests are enabled
|
||||
let ollamaInstalled = false;
|
||||
if (!process.env.PI_NO_LOCAL_LLM) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue