feat(ai): Add Amazon Bedrock provider (#494)

Adds support for Amazon Bedrock with Claude models including:
- Full streaming support via Converse API
- Reasoning/thinking support for Claude models
- Cross-region inference model ID handling
- Multiple AWS credential sources (profile, IAM keys, API keys)
- Image support in messages and tool results
- Unicode surrogate sanitization

Also adds 'Adding a New Provider' documentation to AGENTS.md and README.

Co-authored-by: nickchan2 <nickchan2@users.noreply.github.com>
This commit is contained in:
Mario Zechner 2026-01-13 00:32:59 +01:00
parent 4f216d318f
commit fd268479a4
31 changed files with 3550 additions and 2593 deletions

View file

@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete, stream } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -66,6 +67,35 @@ async function testImmediateAbort<TApi extends Api>(llm: Model<TApi>, options: O
expect(response.stopReason).toBe("aborted");
}
async function testAbortThenNewMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
// First request: abort immediately before any response content arrives
const controller = new AbortController();
controller.abort();
const context: Context = {
messages: [{ role: "user", content: "Hello, how are you?", timestamp: Date.now() }],
};
const abortedResponse = await complete(llm, context, { ...options, signal: controller.signal });
expect(abortedResponse.stopReason).toBe("aborted");
// The aborted message has empty content since we aborted before anything arrived
expect(abortedResponse.content.length).toBe(0);
// Add the aborted assistant message to context (this is what happens in the real coding agent)
context.messages.push(abortedResponse);
// Second request: send a new message - this should work even with the aborted message in context
context.messages.push({
role: "user",
content: "What is 2 + 2?",
timestamp: Date.now(),
});
const followUp = await complete(llm, context, options);
expect(followUp.stopReason).toBe("stop");
expect(followUp.content.length).toBeGreaterThan(0);
}
describe("AI Providers Abort Tests", () => {
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Abort", () => {
const llm = getModel("google", "gemini-2.5-flash");
@ -154,4 +184,20 @@ describe("AI Providers Abort Tests", () => {
await testImmediateAbort(llm, { apiKey: openaiCodexToken });
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider Abort", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should abort mid-stream", { retry: 3 }, async () => {
await testAbortSignal(llm, { reasoning: "medium" });
});
it("should handle immediate abort", { retry: 3 }, async () => {
await testImmediateAbort(llm);
});
it("should handle abort then new message", { retry: 3 }, async () => {
await testAbortThenNewMessage(llm);
});
});
});

View file

@ -0,0 +1,66 @@
/**
* A test suite to ensure all configured Amazon Bedrock models are usable.
*
* This is here to make sure we got correct model identifiers from models.dev and other sources.
* Because Amazon Bedrock requires cross-region inference in some models,
* plain model identifiers are not always usable and it requires tweaking of model identifiers to use cross-region inference.
* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system for more details.
*
* This test suite is not enabled by default unless AWS credentials and `BEDROCK_EXTENSIVE_MODEL_TEST` environment variables are set.
* This test suite takes ~2 minutes to run. Because not all models are available in all regions,
* it's recommended to use `us-west-2` region for best coverage for running this test suite.
*
* You can run this test suite with:
* ```bash
* $ AWS_REGION=us-west-2 BEDROCK_EXTENSIVE_MODEL_TEST=1 AWS_PROFILE=... npm test -- ./test/bedrock-models.test.ts
* ```
*/
import { describe, expect, it } from "vitest";
import { getModels } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Context } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
describe("Amazon Bedrock Models", () => {
const models = getModels("amazon-bedrock");
it("should get all available Bedrock models", () => {
expect(models.length).toBeGreaterThan(0);
console.log(`Found ${models.length} Bedrock models`);
});
if (hasBedrockCredentials() && process.env.BEDROCK_EXTENSIVE_MODEL_TEST) {
for (const model of models) {
it(`should make a simple request with ${model.id}`, { timeout: 10_000 }, async () => {
const context: Context = {
systemPrompt: "You are a helpful assistant. Be extremely concise.",
messages: [
{
role: "user",
content: "Reply with exactly: 'OK'",
timestamp: Date.now(),
},
],
};
const response = await complete(model, context);
expect(response.role).toBe("assistant");
expect(response.content).toBeTruthy();
expect(response.content.length).toBeGreaterThan(0);
expect(response.usage.input + response.usage.cacheRead).toBeGreaterThan(0);
expect(response.usage.output).toBeGreaterThan(0);
expect(response.errorMessage).toBeFalsy();
const textContent = response.content
.filter((b) => b.type === "text")
.map((b) => (b.type === "text" ? b.text : ""))
.join("")
.trim();
expect(textContent).toBeTruthy();
console.log(`${model.id}: ${textContent.substring(0, 100)}`);
});
}
}
});

View file

@ -0,0 +1,18 @@
/**
* Utility functions for Amazon Bedrock tests
*/
/**
* Check if any valid AWS credentials are configured for Bedrock.
* Returns true if any of the following are set:
* - AWS_PROFILE (named profile from ~/.aws/credentials)
* - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (IAM keys)
* - AWS_BEARER_TOKEN_BEDROCK (Bedrock API key)
*/
export function hasBedrockCredentials(): boolean {
return !!(
process.env.AWS_PROFILE ||
(process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY) ||
process.env.AWS_BEARER_TOKEN_BEDROCK
);
}

View file

@ -18,6 +18,7 @@ import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { AssistantMessage, Context, Model, Usage } from "../src/types.js";
import { isContextOverflow } from "../src/utils/overflow.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -284,6 +285,22 @@ describe("Context overflow error handling", () => {
);
});
// =============================================================================
// Amazon Bedrock
// Expected pattern: "Input is too long for requested model"
// =============================================================================
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock", () => {
it("claude-sonnet-4-5 - should detect overflow via isContextOverflow", async () => {
const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
const result = await testContextOverflow(model, "");
logResult(result);
expect(result.stopReason).toBe("error");
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
// =============================================================================
// xAI
// Expected pattern: "maximum prompt length is X but the request contains Y"

View file

@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, AssistantMessage, Context, Model, OptionsForApi, UserMessage } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -321,6 +322,26 @@ describe("AI Providers Empty Message Tests", () => {
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider Empty Messages", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================

View file

@ -75,6 +75,7 @@ import { afterAll, beforeAll, describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, ImageContent, Model, OptionsForApi, UserMessage } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
@ -840,6 +841,79 @@ describe("Image Limits E2E Tests", () => {
});
});
// -------------------------------------------------------------------------
// Amazon Bedrock (claude-sonnet-4-5)
// Limits: 100 images (Anthropic), 5MB per image, 8000px max dimension
// -------------------------------------------------------------------------
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock (claude-sonnet-4-5)", () => {
const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should accept a small number of images (5)", async () => {
const result = await testImageCount(model, 5, smallImage);
expect(result.success, result.error).toBe(true);
});
it("should find maximum image count limit", { timeout: 600000 }, async () => {
// Anthropic limit: 100 images
const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 20, 120, 20);
console.log(`\n Bedrock max images: ~${limit} (last error: ${lastError})`);
expect(limit).toBeGreaterThanOrEqual(80);
expect(limit).toBeLessThanOrEqual(100);
});
it("should find maximum image size limit", { timeout: 600000 }, async () => {
const MB = 1024 * 1024;
// Anthropic limit: 5MB per image
const sizes = [1, 2, 3, 4, 5, 6];
let lastSuccess = 0;
let lastError: string | undefined;
for (const sizeMB of sizes) {
console.log(` Testing size: ${sizeMB}MB...`);
const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
const result = await testImageSize(model, imageBase64);
if (result.success) {
lastSuccess = sizeMB;
console.log(` SUCCESS`);
} else {
lastError = result.error;
console.log(` FAILED: ${result.error?.substring(0, 100)}`);
break;
}
}
console.log(`\n Bedrock max image size: ~${lastSuccess}MB (last error: ${lastError})`);
expect(lastSuccess).toBeGreaterThanOrEqual(1);
});
it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
// Anthropic limit: 8000px
const dimensions = [1000, 2000, 4000, 6000, 8000, 10000];
let lastSuccess = 0;
let lastError: string | undefined;
for (const dim of dimensions) {
console.log(` Testing dimension: ${dim}x${dim}...`);
const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
const result = await testImageDimensions(model, imageBase64);
if (result.success) {
lastSuccess = dim;
console.log(` SUCCESS`);
} else {
lastError = result.error;
console.log(` FAILED: ${result.error?.substring(0, 100)}`);
break;
}
}
console.log(`\n Bedrock max dimension: ~${lastSuccess}px (last error: ${lastError})`);
expect(lastSuccess).toBeGreaterThanOrEqual(6000);
expect(lastSuccess).toBeLessThanOrEqual(8000);
});
});
// =========================================================================
// MAX SIZE IMAGES TEST
// =========================================================================
@ -898,6 +972,38 @@ describe("Image Limits E2E Tests", () => {
},
);
// Amazon Bedrock (Claude) - 5MB per image limit, same as Anthropic direct
// Using 3MB to stay under 5MB limit
it.skipIf(!hasBedrockCredentials())(
"Bedrock: max ~3MB images before rejection",
{ timeout: 900000 },
async () => {
const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
const image3mb = getImageAtSize(3);
// Similar to Anthropic, test progressively
const counts = [1, 2, 4, 6, 8, 10, 12];
let lastSuccess = 0;
let lastError: string | undefined;
for (const count of counts) {
console.log(` Testing ${count} x ~3MB images...`);
const result = await testImageCount(model, count, image3mb);
if (result.success) {
lastSuccess = count;
console.log(` SUCCESS`);
} else {
lastError = result.error;
console.log(` FAILED: ${result.error?.substring(0, 150)}`);
break;
}
}
console.log(`\n Bedrock max ~3MB images: ${lastSuccess} (last error: ${lastError})`);
expect(lastSuccess).toBeGreaterThanOrEqual(1);
},
);
// OpenAI - 20MB per image documented, we found ≥25MB works
// Test with 15MB images to stay safely under limit
it.skipIf(!process.env.OPENAI_API_KEY)(

View file

@ -5,6 +5,7 @@ import { describe, expect, it } from "vitest";
import type { Api, Context, Model, Tool, ToolResultMessage } from "../src/index.js";
import { complete, getModel } from "../src/index.js";
import type { OptionsForApi } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -273,6 +274,18 @@ describe("Tool Results with Images", () => {
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider (claude-sonnet-4-5)", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithImageResult(llm);
});
it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithTextAndImageResult(llm);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================

View file

@ -8,6 +8,7 @@ import { getModel } from "../src/models.js";
import { complete, stream } from "../src/stream.js";
import type { Api, Context, ImageContent, Model, OptionsForApi, Tool, ToolResultMessage } from "../src/types.js";
import { StringEnum } from "../src/utils/typebox-helpers.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
const __filename = fileURLToPath(import.meta.url);
@ -356,7 +357,7 @@ describe("Generate E2E Tests", () => {
await handleStreaming(llm);
});
it("should handle ", { retry: 3 }, async () => {
it("should handle thinking", { retry: 3 }, async () => {
await handleThinking(llm, { thinking: { enabled: true, budgetTokens: 1024 } });
});
@ -907,6 +908,34 @@ describe("Generate E2E Tests", () => {
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider (claude-sonnet-4-5)", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking", { retry: 3 }, async () => {
await handleThinking(llm, { reasoning: "medium" });
});
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { reasoning: "high" });
});
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
});
// Check if ollama is installed and local LLM tests are enabled
let ollamaInstalled = false;
if (!process.env.PI_NO_LOCAL_LLM) {

View file

@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { stream } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -44,7 +45,7 @@ async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: Op
expect(msg.stopReason).toBe("aborted");
// OpenAI providers, OpenAI Codex, Gemini CLI, zai, and the GPT-OSS model on Antigravity only send usage in the final chunk,
// OpenAI providers, OpenAI Codex, Gemini CLI, zai, Amazon Bedrock, and the GPT-OSS model on Antigravity only send usage in the final chunk,
// so when aborted they have no token stats Anthropic and Google send usage information early in the stream
if (
llm.api === "openai-completions" ||
@ -52,6 +53,7 @@ async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: Op
llm.api === "openai-codex-responses" ||
llm.provider === "google-gemini-cli" ||
llm.provider === "zai" ||
llm.provider === "amazon-bedrock" ||
(llm.provider === "google-antigravity" && llm.id.includes("gpt-oss"))
) {
expect(msg.usage.input).toBe(0);
@ -230,4 +232,12 @@ describe("Token Statistics on Abort", () => {
},
);
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
});
});
});

View file

@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, Tool } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -170,6 +171,14 @@ describe("Tool Call Without Result Tests", () => {
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider", () => {
const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================

View file

@ -16,6 +16,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -535,6 +536,25 @@ describe("totalTokens field", () => {
);
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock", () => {
it(
"claude-sonnet-4-5 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
console.log(`\nAmazon Bedrock / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// OpenAI Codex (OAuth)
// =========================================================================

View file

@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, ToolResultMessage } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Empty schema for test tools - must be proper OBJECT type for Cloud Code Assist
@ -617,6 +618,22 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider Unicode Handling", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe("OpenAI Codex Provider Unicode Handling", () => {
it.skipIf(!openaiCodexToken)(
"gpt-5.2-codex - should handle emoji in tool results",