/** * A test suite to ensure Amazon Bedrock models work correctly with the agent loop. * * Some Bedrock models don't support all features (e.g., reasoning signatures). * This test suite verifies that the agent loop works with various Bedrock models. * * This test suite is not enabled by default unless AWS credentials and * `BEDROCK_EXTENSIVE_MODEL_TEST` environment variables are set. * * You can run this test suite with: * ```bash * $ AWS_REGION=us-east-1 BEDROCK_EXTENSIVE_MODEL_TEST=1 AWS_PROFILE=clanker npm test -- ./test/bedrock-models.test.ts * ``` * * ## Known Issues by Category * * 1. **Inference Profile Required**: Some models require an inference profile ARN instead of on-demand. * 2. **Invalid Model ID**: Model identifiers that don't exist in the current region. * 3. **Max Tokens Exceeded**: Model's maxTokens in our config exceeds the actual limit. * 4. **No Reasoning in User Messages**: Model rejects reasoning content when replayed in conversation. * 5. **Invalid Signature Format**: Model validates signature format (Anthropic newer models). */ import type { AssistantMessage } from "@mariozechner/clanker-ai"; import { getModels } from "@mariozechner/clanker-ai"; import { describe, expect, it } from "vitest"; import { Agent } from "../src/index.js"; import { hasBedrockCredentials } from "./bedrock-utils.js"; // ============================================================================= // Known Issue Categories // ============================================================================= /** Models that require inference profile ARN (not available on-demand in us-east-1) */ const REQUIRES_INFERENCE_PROFILE = new Set([ "anthropic.claude-3-5-haiku-20241022-v1:0", "anthropic.claude-3-5-sonnet-20241022-v2:0", "anthropic.claude-3-opus-20240229-v1:0", "meta.llama3-1-70b-instruct-v1:0", "meta.llama3-1-8b-instruct-v1:0", ]); /** Models with invalid identifiers (not available in us-east-1 or don't exist) */ const INVALID_MODEL_ID = new Set([ "deepseek.v3-v1:0", "eu.anthropic.claude-haiku-4-5-20251001-v1:0", "eu.anthropic.claude-opus-4-5-20251101-v1:0", "eu.anthropic.claude-sonnet-4-5-20250929-v1:0", "qwen.qwen3-235b-a22b-2507-v1:0", "qwen.qwen3-coder-480b-a35b-v1:0", ]); /** Models where our maxTokens config exceeds the model's actual limit */ const MAX_TOKENS_EXCEEDED = new Set([ "us.meta.llama4-maverick-17b-instruct-v1:0", "us.meta.llama4-scout-17b-instruct-v1:0", ]); /** * Models that reject reasoning content in user messages (when replaying conversation). * These work for multi-turn but fail when synthetic thinking is injected. */ const NO_REASONING_IN_USER_MESSAGES = new Set([ // Mistral models "mistral.ministral-3-14b-instruct", "mistral.ministral-3-8b-instruct", "mistral.mistral-large-2402-v1:0", "mistral.voxtral-mini-3b-2507", "mistral.voxtral-small-24b-2507", // Nvidia models "nvidia.nemotron-nano-12b-v2", "nvidia.nemotron-nano-9b-v2", // Qwen models "qwen.qwen3-coder-30b-a3b-v1:0", // Amazon Nova models "us.amazon.nova-lite-v1:0", "us.amazon.nova-micro-v1:0", "us.amazon.nova-premier-v1:0", "us.amazon.nova-pro-v1:0", // Meta Llama models "us.meta.llama3-2-11b-instruct-v1:0", "us.meta.llama3-2-1b-instruct-v1:0", "us.meta.llama3-2-3b-instruct-v1:0", "us.meta.llama3-2-90b-instruct-v1:0", "us.meta.llama3-3-70b-instruct-v1:0", // DeepSeek "us.deepseek.r1-v1:0", // Older Anthropic models "anthropic.claude-3-5-sonnet-20240620-v1:0", "anthropic.claude-3-haiku-20240307-v1:0", "anthropic.claude-3-sonnet-20240229-v1:0", // Cohere models "cohere.command-r-plus-v1:0", "cohere.command-r-v1:0", // Google models "google.gemma-3-27b-it", "google.gemma-3-4b-it", // Non-Anthropic models that don't support signatures (now handled by omitting signature) // but still reject reasoning content in user messages "global.amazon.nova-2-lite-v1:0", "minimax.minimax-m2", "moonshot.kimi-k2-thinking", "openai.gpt-oss-120b-1:0", "openai.gpt-oss-20b-1:0", "openai.gpt-oss-safeguard-120b", "openai.gpt-oss-safeguard-20b", "qwen.qwen3-32b-v1:0", "qwen.qwen3-next-80b-a3b", "qwen.qwen3-vl-235b-a22b", ]); /** * Models that validate signature format (Anthropic newer models). * These work for multi-turn but fail when synthetic/invalid signature is injected. */ const VALIDATES_SIGNATURE_FORMAT = new Set([ "global.anthropic.claude-haiku-4-5-20251001-v1:0", "global.anthropic.claude-opus-4-5-20251101-v1:0", "global.anthropic.claude-sonnet-4-20250514-v1:0", "global.anthropic.claude-sonnet-4-5-20250929-v1:0", "us.anthropic.claude-3-7-sonnet-20250219-v1:0", "us.anthropic.claude-opus-4-1-20250805-v1:0", "us.anthropic.claude-opus-4-20250514-v1:0", ]); /** * DeepSeek R1 fails multi-turn because it rejects reasoning in the replayed assistant message. */ const REJECTS_REASONING_ON_REPLAY = new Set(["us.deepseek.r1-v1:0"]); // ============================================================================= // Helper Functions // ============================================================================= function isModelUnavailable(modelId: string): boolean { return ( REQUIRES_INFERENCE_PROFILE.has(modelId) || INVALID_MODEL_ID.has(modelId) || MAX_TOKENS_EXCEEDED.has(modelId) ); } function failsMultiTurnWithThinking(modelId: string): boolean { return REJECTS_REASONING_ON_REPLAY.has(modelId); } function failsSyntheticSignature(modelId: string): boolean { return ( NO_REASONING_IN_USER_MESSAGES.has(modelId) || VALIDATES_SIGNATURE_FORMAT.has(modelId) ); } // ============================================================================= // Tests // ============================================================================= describe("Amazon Bedrock Models - Agent Loop", () => { const shouldRunExtensiveTests = hasBedrockCredentials() && process.env.BEDROCK_EXTENSIVE_MODEL_TEST; // Get all Amazon Bedrock models const allBedrockModels = getModels("amazon-bedrock"); if (shouldRunExtensiveTests) { for (const model of allBedrockModels) { const modelId = model.id; describe(`Model: ${modelId}`, () => { // Skip entirely unavailable models const unavailable = isModelUnavailable(modelId); it.skipIf(unavailable)( "should handle basic text prompt", { timeout: 60_000 }, async () => { const agent = new Agent({ initialState: { systemPrompt: "You are a helpful assistant. Be extremely concise.", model, thinkingLevel: "off", tools: [], }, }); await agent.prompt("Reply with exactly: 'OK'"); if (agent.state.error) { throw new Error(`Basic prompt error: ${agent.state.error}`); } expect(agent.state.isStreaming).toBe(false); expect(agent.state.messages.length).toBe(2); const assistantMessage = agent.state.messages[1]; if (assistantMessage.role !== "assistant") throw new Error("Expected assistant message"); console.log(`${modelId}: OK`); }, ); // Skip if model is unavailable or known to fail multi-turn with thinking const skipMultiTurn = unavailable || failsMultiTurnWithThinking(modelId); it.skipIf(skipMultiTurn)( "should handle multi-turn conversation with thinking content in history", { timeout: 120_000 }, async () => { const agent = new Agent({ initialState: { systemPrompt: "You are a helpful assistant. Be extremely concise.", model, thinkingLevel: "medium", tools: [], }, }); // First turn await agent.prompt("My name is Alice."); if (agent.state.error) { throw new Error(`First turn error: ${agent.state.error}`); } // Second turn - this should replay the first assistant message which may contain thinking await agent.prompt("What is my name?"); if (agent.state.error) { throw new Error(`Second turn error: ${agent.state.error}`); } expect(agent.state.messages.length).toBe(4); console.log(`${modelId}: multi-turn OK`); }, ); // Skip if model is unavailable or known to fail synthetic signature const skipSynthetic = unavailable || failsSyntheticSignature(modelId); it.skipIf(skipSynthetic)( "should handle conversation with synthetic thinking signature in history", { timeout: 60_000 }, async () => { const agent = new Agent({ initialState: { systemPrompt: "You are a helpful assistant. Be extremely concise.", model, thinkingLevel: "off", tools: [], }, }); // Inject a message with a thinking block that has a signature const syntheticAssistantMessage: AssistantMessage = { role: "assistant", content: [ { type: "thinking", thinking: "I need to remember the user's name.", thinkingSignature: "synthetic-signature-123", }, { type: "text", text: "Nice to meet you, Alice!" }, ], api: "bedrock-converse-stream", provider: "amazon-bedrock", model: modelId, usage: { input: 10, output: 20, cacheRead: 0, cacheWrite: 0, totalTokens: 30, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0, }, }, stopReason: "stop", timestamp: Date.now(), }; agent.replaceMessages([ { role: "user", content: "My name is Alice.", timestamp: Date.now(), }, syntheticAssistantMessage, ]); await agent.prompt("What is my name?"); if (agent.state.error) { throw new Error( `Synthetic signature error: ${agent.state.error}`, ); } expect(agent.state.messages.length).toBe(4); console.log(`${modelId}: synthetic signature OK`); }, ); }); } } else { it.skip("skipped - set AWS credentials and BEDROCK_EXTENSIVE_MODEL_TEST=1 to run", () => {}); } });