Merge branch 'main' into feat/tui-overlay-options

This commit is contained in:
Mario Zechner 2026-01-13 22:06:02 +01:00
commit 7d45e434de
90 changed files with 10277 additions and 1700 deletions

View file

@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete, stream } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -66,6 +67,35 @@ async function testImmediateAbort<TApi extends Api>(llm: Model<TApi>, options: O
expect(response.stopReason).toBe("aborted");
}
async function testAbortThenNewMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
// First request: abort immediately before any response content arrives
const controller = new AbortController();
controller.abort();
const context: Context = {
messages: [{ role: "user", content: "Hello, how are you?", timestamp: Date.now() }],
};
const abortedResponse = await complete(llm, context, { ...options, signal: controller.signal });
expect(abortedResponse.stopReason).toBe("aborted");
// The aborted message has empty content since we aborted before anything arrived
expect(abortedResponse.content.length).toBe(0);
// Add the aborted assistant message to context (this is what happens in the real coding agent)
context.messages.push(abortedResponse);
// Second request: send a new message - this should work even with the aborted message in context
context.messages.push({
role: "user",
content: "What is 2 + 2?",
timestamp: Date.now(),
});
const followUp = await complete(llm, context, options);
expect(followUp.stopReason).toBe("stop");
expect(followUp.content.length).toBeGreaterThan(0);
}
describe("AI Providers Abort Tests", () => {
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Abort", () => {
const llm = getModel("google", "gemini-2.5-flash");
@ -130,6 +160,30 @@ describe("AI Providers Abort Tests", () => {
});
});
describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider Abort", () => {
const llm = getModel("minimax", "MiniMax-M2.1");
it("should abort mid-stream", { retry: 3 }, async () => {
await testAbortSignal(llm);
});
it("should handle immediate abort", { retry: 3 }, async () => {
await testImmediateAbort(llm);
});
});
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider Abort", () => {
const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
it("should abort mid-stream", { retry: 3 }, async () => {
await testAbortSignal(llm);
});
it("should handle immediate abort", { retry: 3 }, async () => {
await testImmediateAbort(llm);
});
});
// Google Gemini CLI / Antigravity share the same provider, so one test covers both
describe("Google Gemini CLI Provider Abort", () => {
it.skipIf(!geminiCliToken)("should abort mid-stream", { retry: 3 }, async () => {
@ -154,4 +208,20 @@ describe("AI Providers Abort Tests", () => {
await testImmediateAbort(llm, { apiKey: openaiCodexToken });
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider Abort", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should abort mid-stream", { retry: 3 }, async () => {
await testAbortSignal(llm, { reasoning: "medium" });
});
it("should handle immediate abort", { retry: 3 }, async () => {
await testImmediateAbort(llm);
});
it("should handle abort then new message", { retry: 3 }, async () => {
await testAbortThenNewMessage(llm);
});
});
});

View file

@ -0,0 +1,66 @@
/**
* A test suite to ensure all configured Amazon Bedrock models are usable.
*
* This is here to make sure we got correct model identifiers from models.dev and other sources.
* Because Amazon Bedrock requires cross-region inference in some models,
* plain model identifiers are not always usable and it requires tweaking of model identifiers to use cross-region inference.
* See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles-support.html#inference-profiles-support-system for more details.
*
* This test suite is not enabled by default unless AWS credentials and `BEDROCK_EXTENSIVE_MODEL_TEST` environment variables are set.
* This test suite takes ~2 minutes to run. Because not all models are available in all regions,
* it's recommended to use `us-west-2` region for best coverage for running this test suite.
*
* You can run this test suite with:
* ```bash
* $ AWS_REGION=us-west-2 BEDROCK_EXTENSIVE_MODEL_TEST=1 AWS_PROFILE=... npm test -- ./test/bedrock-models.test.ts
* ```
*/
import { describe, expect, it } from "vitest";
import { getModels } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Context } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
describe("Amazon Bedrock Models", () => {
const models = getModels("amazon-bedrock");
it("should get all available Bedrock models", () => {
expect(models.length).toBeGreaterThan(0);
console.log(`Found ${models.length} Bedrock models`);
});
if (hasBedrockCredentials() && process.env.BEDROCK_EXTENSIVE_MODEL_TEST) {
for (const model of models) {
it(`should make a simple request with ${model.id}`, { timeout: 10_000 }, async () => {
const context: Context = {
systemPrompt: "You are a helpful assistant. Be extremely concise.",
messages: [
{
role: "user",
content: "Reply with exactly: 'OK'",
timestamp: Date.now(),
},
],
};
const response = await complete(model, context);
expect(response.role).toBe("assistant");
expect(response.content).toBeTruthy();
expect(response.content.length).toBeGreaterThan(0);
expect(response.usage.input + response.usage.cacheRead).toBeGreaterThan(0);
expect(response.usage.output).toBeGreaterThan(0);
expect(response.errorMessage).toBeFalsy();
const textContent = response.content
.filter((b) => b.type === "text")
.map((b) => (b.type === "text" ? b.text : ""))
.join("")
.trim();
expect(textContent).toBeTruthy();
console.log(`${model.id}: ${textContent.substring(0, 100)}`);
});
}
}
});

View file

@ -0,0 +1,18 @@
/**
* Utility functions for Amazon Bedrock tests
*/
/**
* Check if any valid AWS credentials are configured for Bedrock.
* Returns true if any of the following are set:
* - AWS_PROFILE (named profile from ~/.aws/credentials)
* - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY (IAM keys)
* - AWS_BEARER_TOKEN_BEDROCK (Bedrock API key)
*/
export function hasBedrockCredentials(): boolean {
return !!(
process.env.AWS_PROFILE ||
(process.env.AWS_ACCESS_KEY_ID && process.env.AWS_SECRET_ACCESS_KEY) ||
process.env.AWS_BEARER_TOKEN_BEDROCK
);
}

View file

@ -18,6 +18,7 @@ import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { AssistantMessage, Context, Model, Usage } from "../src/types.js";
import { isContextOverflow } from "../src/utils/overflow.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -284,6 +285,22 @@ describe("Context overflow error handling", () => {
);
});
// =============================================================================
// Amazon Bedrock
// Expected pattern: "Input is too long for requested model"
// =============================================================================
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock", () => {
it("claude-sonnet-4-5 - should detect overflow via isContextOverflow", async () => {
const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
const result = await testContextOverflow(model, "");
logResult(result);
expect(result.stopReason).toBe("error");
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
// =============================================================================
// xAI
// Expected pattern: "maximum prompt length is X but the request contains Y"
@ -379,6 +396,37 @@ describe("Context overflow error handling", () => {
}, 120000);
});
// =============================================================================
// MiniMax
// Expected pattern: TBD - need to test actual error message
// =============================================================================
describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax", () => {
it("MiniMax-M2.1 - should detect overflow via isContextOverflow", async () => {
const model = getModel("minimax", "MiniMax-M2.1");
const result = await testContextOverflow(model, process.env.MINIMAX_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
// =============================================================================
// Vercel AI Gateway - Unified API for multiple providers
// =============================================================================
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway", () => {
it("google/gemini-2.5-flash via AI Gateway - should detect overflow via isContextOverflow", async () => {
const model = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
const result = await testContextOverflow(model, process.env.AI_GATEWAY_API_KEY!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
}, 120000);
});
// =============================================================================
// OpenRouter - Multiple backend providers
// Expected pattern: "maximum context length is X tokens"

View file

@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, AssistantMessage, Context, Model, OptionsForApi, UserMessage } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -321,6 +322,66 @@ describe("AI Providers Empty Message Tests", () => {
});
});
describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider Empty Messages", () => {
const llm = getModel("minimax", "MiniMax-M2.1");
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider Empty Messages", () => {
const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider Empty Messages", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================

View file

@ -0,0 +1,103 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
import type { Context, Model } from "../src/types.js";
const originalFetch = global.fetch;
const apiKey = JSON.stringify({ token: "token", projectId: "project" });
const createSseResponse = () => {
const sse = `${[
`data: ${JSON.stringify({
response: {
candidates: [
{
content: { role: "model", parts: [{ text: "Hello" }] },
finishReason: "STOP",
},
],
},
})}`,
].join("\n\n")}\n\n`;
const encoder = new TextEncoder();
const stream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(sse));
controller.close();
},
});
return new Response(stream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
};
afterEach(() => {
global.fetch = originalFetch;
vi.restoreAllMocks();
});
describe("google-gemini-cli Claude thinking header", () => {
const context: Context = {
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
};
it("adds anthropic-beta for Claude thinking models", async () => {
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
const headers = new Headers(init?.headers);
expect(headers.get("anthropic-beta")).toBe("interleaved-thinking-2025-05-14");
return createSseResponse();
});
global.fetch = fetchMock as typeof fetch;
const model: Model<"google-gemini-cli"> = {
id: "claude-opus-4-5-thinking",
name: "Claude Opus 4.5 Thinking",
api: "google-gemini-cli",
provider: "google-antigravity",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
const stream = streamGoogleGeminiCli(model, context, { apiKey });
for await (const _event of stream) {
// exhaust stream
}
await stream.result();
});
it("does not add anthropic-beta for Gemini models", async () => {
const fetchMock = vi.fn(async (_input: string | URL, init?: RequestInit) => {
const headers = new Headers(init?.headers);
expect(headers.has("anthropic-beta")).toBe(false);
return createSseResponse();
});
global.fetch = fetchMock as typeof fetch;
const model: Model<"google-gemini-cli"> = {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
const stream = streamGoogleGeminiCli(model, context, { apiKey });
for await (const _event of stream) {
// exhaust stream
}
await stream.result();
});
});

View file

@ -0,0 +1,108 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { streamGoogleGeminiCli } from "../src/providers/google-gemini-cli.js";
import type { Context, Model } from "../src/types.js";
const originalFetch = global.fetch;
afterEach(() => {
global.fetch = originalFetch;
vi.restoreAllMocks();
});
describe("google-gemini-cli empty stream retry", () => {
it("retries empty SSE responses without duplicate start", async () => {
const emptyStream = new ReadableStream<Uint8Array>({
start(controller) {
controller.close();
},
});
const sse = `${[
`data: ${JSON.stringify({
response: {
candidates: [
{
content: { role: "model", parts: [{ text: "Hello" }] },
finishReason: "STOP",
},
],
usageMetadata: {
promptTokenCount: 1,
candidatesTokenCount: 1,
totalTokenCount: 2,
},
},
})}`,
].join("\n\n")}\n\n`;
const encoder = new TextEncoder();
const dataStream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(sse));
controller.close();
},
});
let callCount = 0;
const fetchMock = vi.fn(async () => {
callCount += 1;
if (callCount === 1) {
return new Response(emptyStream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
}
return new Response(dataStream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
});
global.fetch = fetchMock as typeof fetch;
const model: Model<"google-gemini-cli"> = {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
const context: Context = {
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
};
const stream = streamGoogleGeminiCli(model, context, {
apiKey: JSON.stringify({ token: "token", projectId: "project" }),
});
let startCount = 0;
let doneCount = 0;
let text = "";
for await (const event of stream) {
if (event.type === "start") {
startCount += 1;
}
if (event.type === "done") {
doneCount += 1;
}
if (event.type === "text_delta") {
text += event.delta;
}
}
const result = await stream.result();
expect(text).toBe("Hello");
expect(result.stopReason).toBe("stop");
expect(startCount).toBe(1);
expect(doneCount).toBe(1);
expect(fetchMock).toHaveBeenCalledTimes(2);
});
});

View file

@ -0,0 +1,53 @@
import { afterEach, describe, expect, it, vi } from "vitest";
import { extractRetryDelay } from "../src/providers/google-gemini-cli.js";
describe("extractRetryDelay header parsing", () => {
afterEach(() => {
vi.useRealTimers();
});
it("prefers Retry-After seconds header", () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
const response = new Response("", { headers: { "Retry-After": "5" } });
const delay = extractRetryDelay("Please retry in 1s", response);
expect(delay).toBe(6000);
});
it("parses Retry-After HTTP date header", () => {
vi.useFakeTimers();
const now = new Date("2025-01-01T00:00:00Z");
vi.setSystemTime(now);
const retryAt = new Date(now.getTime() + 12000).toUTCString();
const response = new Response("", { headers: { "Retry-After": retryAt } });
const delay = extractRetryDelay("", response);
expect(delay).toBe(13000);
});
it("parses x-ratelimit-reset header", () => {
vi.useFakeTimers();
const now = new Date("2025-01-01T00:00:00Z");
vi.setSystemTime(now);
const resetAtMs = now.getTime() + 20000;
const resetSeconds = Math.floor(resetAtMs / 1000).toString();
const response = new Response("", { headers: { "x-ratelimit-reset": resetSeconds } });
const delay = extractRetryDelay("", response);
expect(delay).toBe(21000);
});
it("parses x-ratelimit-reset-after header", () => {
vi.useFakeTimers();
vi.setSystemTime(new Date("2025-01-01T00:00:00Z"));
const response = new Response("", { headers: { "x-ratelimit-reset-after": "30" } });
const delay = extractRetryDelay("", response);
expect(delay).toBe(31000);
});
});

View file

@ -0,0 +1,50 @@
import { createHash } from "node:crypto";
import { describe, expect, it } from "vitest";
import { buildRequest } from "../src/providers/google-gemini-cli.js";
import type { Context, Model } from "../src/types.js";
const model: Model<"google-gemini-cli"> = {
id: "gemini-2.5-flash",
name: "Gemini 2.5 Flash",
api: "google-gemini-cli",
provider: "google-gemini-cli",
baseUrl: "https://cloudcode-pa.googleapis.com",
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 8192,
};
describe("buildRequest sessionId", () => {
it("derives sessionId from the first user message", () => {
const context: Context = {
messages: [
{ role: "user", content: "First message", timestamp: Date.now() },
{ role: "user", content: "Second message", timestamp: Date.now() },
],
};
const result = buildRequest(model, context, "project-id");
const expected = createHash("sha256").update("First message").digest("hex").slice(0, 32);
expect(result.request.sessionId).toBe(expected);
});
it("omits sessionId when the first user message has no text", () => {
const context: Context = {
messages: [
{
role: "user",
content: [{ type: "image", data: "Zm9v", mimeType: "image/png" }],
timestamp: Date.now(),
},
{ role: "user", content: "Later text", timestamp: Date.now() },
],
};
const result = buildRequest(model, context, "project-id");
expect(result.request.sessionId).toBeUndefined();
});
});

View file

@ -75,6 +75,7 @@ import { afterAll, beforeAll, describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, ImageContent, Model, OptionsForApi, UserMessage } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
@ -840,6 +841,122 @@ describe("Image Limits E2E Tests", () => {
});
});
// -------------------------------------------------------------------------
// Vercel AI Gateway (google/gemini-2.5-flash)
// -------------------------------------------------------------------------
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway (google/gemini-2.5-flash)", () => {
const model = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
it("should accept a small number of images (5)", async () => {
const result = await testImageCount(model, 5, smallImage);
expect(result.success, result.error).toBe(true);
});
it("should find maximum image count limit", { timeout: 600000 }, async () => {
const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 10, 100, 10);
console.log(`\n Vercel AI Gateway max images: ~${limit} (last error: ${lastError})`);
expect(limit).toBeGreaterThanOrEqual(5);
});
it("should find maximum image size limit", { timeout: 600000 }, async () => {
const MB = 1024 * 1024;
const sizes = [5, 10, 15, 20];
let lastSuccess = 0;
let lastError: string | undefined;
for (const sizeMB of sizes) {
console.log(` Testing size: ${sizeMB}MB...`);
const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
const result = await testImageSize(model, imageBase64);
if (result.success) {
lastSuccess = sizeMB;
console.log(` SUCCESS`);
} else {
lastError = result.error;
console.log(` FAILED: ${result.error?.substring(0, 100)}`);
break;
}
}
console.log(`\n Vercel AI Gateway max image size: ~${lastSuccess}MB (last error: ${lastError})`);
expect(lastSuccess).toBeGreaterThanOrEqual(5);
});
});
// -------------------------------------------------------------------------
// Amazon Bedrock (claude-sonnet-4-5)
// Limits: 100 images (Anthropic), 5MB per image, 8000px max dimension
// -------------------------------------------------------------------------
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock (claude-sonnet-4-5)", () => {
const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should accept a small number of images (5)", async () => {
const result = await testImageCount(model, 5, smallImage);
expect(result.success, result.error).toBe(true);
});
it("should find maximum image count limit", { timeout: 600000 }, async () => {
// Anthropic limit: 100 images
const { limit, lastError } = await findLimit((count) => testImageCount(model, count, smallImage), 20, 120, 20);
console.log(`\n Bedrock max images: ~${limit} (last error: ${lastError})`);
expect(limit).toBeGreaterThanOrEqual(80);
expect(limit).toBeLessThanOrEqual(100);
});
it("should find maximum image size limit", { timeout: 600000 }, async () => {
const MB = 1024 * 1024;
// Anthropic limit: 5MB per image
const sizes = [1, 2, 3, 4, 5, 6];
let lastSuccess = 0;
let lastError: string | undefined;
for (const sizeMB of sizes) {
console.log(` Testing size: ${sizeMB}MB...`);
const imageBase64 = generateImageWithSize(sizeMB * MB, `size-${sizeMB}mb.png`);
const result = await testImageSize(model, imageBase64);
if (result.success) {
lastSuccess = sizeMB;
console.log(` SUCCESS`);
} else {
lastError = result.error;
console.log(` FAILED: ${result.error?.substring(0, 100)}`);
break;
}
}
console.log(`\n Bedrock max image size: ~${lastSuccess}MB (last error: ${lastError})`);
expect(lastSuccess).toBeGreaterThanOrEqual(1);
});
it("should find maximum image dimension limit", { timeout: 600000 }, async () => {
// Anthropic limit: 8000px
const dimensions = [1000, 2000, 4000, 6000, 8000, 10000];
let lastSuccess = 0;
let lastError: string | undefined;
for (const dim of dimensions) {
console.log(` Testing dimension: ${dim}x${dim}...`);
const imageBase64 = generateImage(dim, dim, `dim-${dim}.png`);
const result = await testImageDimensions(model, imageBase64);
if (result.success) {
lastSuccess = dim;
console.log(` SUCCESS`);
} else {
lastError = result.error;
console.log(` FAILED: ${result.error?.substring(0, 100)}`);
break;
}
}
console.log(`\n Bedrock max dimension: ~${lastSuccess}px (last error: ${lastError})`);
expect(lastSuccess).toBeGreaterThanOrEqual(6000);
expect(lastSuccess).toBeLessThanOrEqual(8000);
});
});
// =========================================================================
// MAX SIZE IMAGES TEST
// =========================================================================
@ -898,6 +1015,38 @@ describe("Image Limits E2E Tests", () => {
},
);
// Amazon Bedrock (Claude) - 5MB per image limit, same as Anthropic direct
// Using 3MB to stay under 5MB limit
it.skipIf(!hasBedrockCredentials())(
"Bedrock: max ~3MB images before rejection",
{ timeout: 900000 },
async () => {
const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
const image3mb = getImageAtSize(3);
// Similar to Anthropic, test progressively
const counts = [1, 2, 4, 6, 8, 10, 12];
let lastSuccess = 0;
let lastError: string | undefined;
for (const count of counts) {
console.log(` Testing ${count} x ~3MB images...`);
const result = await testImageCount(model, count, image3mb);
if (result.success) {
lastSuccess = count;
console.log(` SUCCESS`);
} else {
lastError = result.error;
console.log(` FAILED: ${result.error?.substring(0, 150)}`);
break;
}
}
console.log(`\n Bedrock max ~3MB images: ${lastSuccess} (last error: ${lastError})`);
expect(lastSuccess).toBeGreaterThanOrEqual(1);
},
);
// OpenAI - 20MB per image documented, we found ≥25MB works
// Test with 15MB images to stay safely under limit
it.skipIf(!process.env.OPENAI_API_KEY)(

View file

@ -5,6 +5,7 @@ import { describe, expect, it } from "vitest";
import type { Api, Context, Model, Tool, ToolResultMessage } from "../src/index.js";
import { complete, getModel } from "../src/index.js";
import type { OptionsForApi } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -273,6 +274,30 @@ describe("Tool Results with Images", () => {
});
});
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider (google/gemini-2.5-flash)", () => {
const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithImageResult(llm);
});
it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithTextAndImageResult(llm);
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider (claude-sonnet-4-5)", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithImageResult(llm);
});
it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithTextAndImageResult(llm);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================

View file

@ -8,6 +8,7 @@ import { getModel } from "../src/models.js";
import { complete, stream } from "../src/stream.js";
import type { Api, Context, ImageContent, Model, OptionsForApi, Tool, ToolResultMessage } from "../src/types.js";
import { StringEnum } from "../src/utils/typebox-helpers.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
const __filename = fileURLToPath(import.meta.url);
@ -356,7 +357,7 @@ describe("Generate E2E Tests", () => {
await handleStreaming(llm);
});
it("should handle ", { retry: 3 }, async () => {
it("should handle thinking", { retry: 3 }, async () => {
await handleThinking(llm, { thinking: { enabled: true, budgetTokens: 1024 } });
});
@ -597,6 +598,87 @@ describe("Generate E2E Tests", () => {
});
});
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
"Vercel AI Gateway Provider (google/gemini-2.5-flash via Anthropic Messages)",
() => {
const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
it("should handle multi-turn with tools", { retry: 3 }, async () => {
await multiTurn(llm);
});
},
);
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
"Vercel AI Gateway Provider (anthropic/claude-opus-4.5 via Anthropic Messages)",
() => {
const llm = getModel("vercel-ai-gateway", "anthropic/claude-opus-4.5");
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
it("should handle multi-turn with tools", { retry: 3 }, async () => {
await multiTurn(llm);
});
},
);
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)(
"Vercel AI Gateway Provider (openai/gpt-5.1-codex-max via Anthropic Messages)",
() => {
const llm = getModel("vercel-ai-gateway", "openai/gpt-5.1-codex-max");
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
it("should handle multi-turn with tools", { retry: 3 }, async () => {
await multiTurn(llm);
});
},
);
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via OpenAI Completions)", () => {
const llm = getModel("zai", "glm-4.5-air");
@ -698,6 +780,30 @@ describe("Generate E2E Tests", () => {
});
});
describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider (MiniMax-M2.1 via Anthropic Messages)", () => {
const llm = getModel("minimax", "MiniMax-M2.1");
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking mode", { retry: 3 }, async () => {
await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
});
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// Tokens are resolved at module level (see oauthTokens above)
@ -907,6 +1013,34 @@ describe("Generate E2E Tests", () => {
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider (claude-sonnet-4-5)", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking", { retry: 3 }, async () => {
await handleThinking(llm, { reasoning: "medium" });
});
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { reasoning: "high" });
});
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
});
// Check if ollama is installed and local LLM tests are enabled
let ollamaInstalled = false;
if (!process.env.PI_NO_LOCAL_LLM) {

View file

@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { stream } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -44,18 +45,25 @@ async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: Op
expect(msg.stopReason).toBe("aborted");
// OpenAI providers, OpenAI Codex, Gemini CLI, zai, and the GPT-OSS model on Antigravity only send usage in the final chunk,
// so when aborted they have no token stats Anthropic and Google send usage information early in the stream
// OpenAI providers, OpenAI Codex, Gemini CLI, zai, Amazon Bedrock, and the GPT-OSS model on Antigravity only send usage in the final chunk,
// so when aborted they have no token stats. Anthropic and Google send usage information early in the stream.
// MiniMax reports input tokens but not output tokens when aborted.
if (
llm.api === "openai-completions" ||
llm.api === "openai-responses" ||
llm.api === "openai-codex-responses" ||
llm.provider === "google-gemini-cli" ||
llm.provider === "zai" ||
llm.provider === "amazon-bedrock" ||
llm.provider === "vercel-ai-gateway" ||
(llm.provider === "google-antigravity" && llm.id.includes("gpt-oss"))
) {
expect(msg.usage.input).toBe(0);
expect(msg.usage.output).toBe(0);
} else if (llm.provider === "minimax") {
// MiniMax reports input tokens early but output tokens only in final chunk
expect(msg.usage.input).toBeGreaterThan(0);
expect(msg.usage.output).toBe(0);
} else {
expect(msg.usage.input).toBeGreaterThan(0);
expect(msg.usage.output).toBeGreaterThan(0);
@ -144,6 +152,22 @@ describe("Token Statistics on Abort", () => {
});
});
describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider", () => {
const llm = getModel("minimax", "MiniMax-M2.1");
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
});
});
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider", () => {
const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================
@ -230,4 +254,12 @@ describe("Token Statistics on Abort", () => {
},
);
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
});
});
});

View file

@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, Tool } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -170,6 +171,30 @@ describe("Tool Call Without Result Tests", () => {
});
});
describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider", () => {
const model = getModel("minimax", "MiniMax-M2.1");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider", () => {
const model = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider", () => {
const model = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================

View file

@ -16,6 +16,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Resolve OAuth tokens at module level (async, runs before tests)
@ -324,6 +325,52 @@ describe("totalTokens field", () => {
);
});
// =========================================================================
// MiniMax
// =========================================================================
describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax", () => {
it(
"MiniMax-M2.1 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("minimax", "MiniMax-M2.1");
console.log(`\nMiniMax / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MINIMAX_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// Vercel AI Gateway
// =========================================================================
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway", () => {
it(
"google/gemini-2.5-flash - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
console.log(`\nVercel AI Gateway / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.AI_GATEWAY_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// OpenRouter - Multiple backend providers
// =========================================================================
@ -535,6 +582,25 @@ describe("totalTokens field", () => {
);
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock", () => {
it(
"claude-sonnet-4-5 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
console.log(`\nAmazon Bedrock / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// OpenAI Codex (OAuth)
// =========================================================================

View file

@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, ToolResultMessage } from "../src/types.js";
import { hasBedrockCredentials } from "./bedrock-utils.js";
import { resolveApiKey } from "./oauth.js";
// Empty schema for test tools - must be proper OBJECT type for Cloud Code Assist
@ -617,6 +618,54 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
});
});
describe.skipIf(!process.env.MINIMAX_API_KEY)("MiniMax Provider Unicode Handling", () => {
const llm = getModel("minimax", "MiniMax-M2.1");
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.AI_GATEWAY_API_KEY)("Vercel AI Gateway Provider Unicode Handling", () => {
const llm = getModel("vercel-ai-gateway", "google/gemini-2.5-flash");
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!hasBedrockCredentials())("Amazon Bedrock Provider Unicode Handling", () => {
const llm = getModel("amazon-bedrock", "global.anthropic.claude-sonnet-4-5-20250929-v1:0");
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe("OpenAI Codex Provider Unicode Handling", () => {
it.skipIf(!openaiCodexToken)(
"gpt-5.2-codex - should handle emoji in tool results",