Broader testing, more providers.

This commit is contained in:
Mario Zechner 2025-12-20 19:38:38 +01:00
parent 2a0283ecfd
commit 95fcda5887
7 changed files with 1400 additions and 355 deletions

View file

@ -1,8 +1,17 @@
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import { complete, resolveApiKey } from "../src/stream.js";
import type { Api, AssistantMessage, Context, Model, OptionsForApi, UserMessage } from "../src/types.js";
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("anthropic"),
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
]);
const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
async function testEmptyMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
// Test with completely empty content array
const emptyMessage: UserMessage = {
@ -133,19 +142,19 @@ describe("AI Providers Empty Message Tests", () => {
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Empty Messages", () => {
const llm = getModel("google", "gemini-2.5-flash");
it("should handle empty content array", async () => {
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", async () => {
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", async () => {
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", async () => {
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
@ -153,19 +162,19 @@ describe("AI Providers Empty Message Tests", () => {
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Empty Messages", () => {
const llm = getModel("openai", "gpt-4o-mini");
it("should handle empty content array", async () => {
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", async () => {
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", async () => {
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", async () => {
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
@ -173,39 +182,39 @@ describe("AI Providers Empty Message Tests", () => {
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Empty Messages", () => {
const llm = getModel("openai", "gpt-5-mini");
it("should handle empty content array", async () => {
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", async () => {
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", async () => {
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", async () => {
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Empty Messages", () => {
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Empty Messages", () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
it("should handle empty content array", async () => {
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", async () => {
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", async () => {
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", async () => {
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
@ -213,19 +222,19 @@ describe("AI Providers Empty Message Tests", () => {
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Empty Messages", () => {
const llm = getModel("xai", "grok-3");
it("should handle empty content array", async () => {
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", async () => {
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", async () => {
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", async () => {
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
@ -233,19 +242,19 @@ describe("AI Providers Empty Message Tests", () => {
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Empty Messages", () => {
const llm = getModel("groq", "openai/gpt-oss-20b");
it("should handle empty content array", async () => {
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", async () => {
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", async () => {
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", async () => {
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
@ -253,19 +262,19 @@ describe("AI Providers Empty Message Tests", () => {
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Empty Messages", () => {
const llm = getModel("cerebras", "gpt-oss-120b");
it("should handle empty content array", async () => {
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", async () => {
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", async () => {
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", async () => {
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
@ -273,19 +282,19 @@ describe("AI Providers Empty Message Tests", () => {
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Empty Messages", () => {
const llm = getModel("zai", "glm-4.5-air");
it("should handle empty content array", async () => {
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", async () => {
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", async () => {
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", async () => {
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
@ -293,20 +302,274 @@ describe("AI Providers Empty Message Tests", () => {
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider Empty Messages", () => {
const llm = getModel("mistral", "devstral-medium-latest");
it("should handle empty content array", async () => {
it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm);
});
it("should handle empty string content", async () => {
it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm);
});
it("should handle whitespace-only content", async () => {
it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
await testWhitespaceOnlyMessage(llm);
});
it("should handle empty assistant message in conversation", async () => {
it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
await testEmptyAssistantMessage(llm);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================
describe("Anthropic OAuth Provider Empty Messages", () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
it.skipIf(!anthropicOAuthToken)("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
await testEmptyMessage(llm, { apiKey: anthropicOAuthToken });
});
it.skipIf(!anthropicOAuthToken)("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
await testEmptyStringMessage(llm, { apiKey: anthropicOAuthToken });
});
it.skipIf(!anthropicOAuthToken)(
"should handle whitespace-only content",
{ retry: 3, timeout: 30000 },
async () => {
await testWhitespaceOnlyMessage(llm, { apiKey: anthropicOAuthToken });
},
);
it.skipIf(!anthropicOAuthToken)(
"should handle empty assistant message in conversation",
{ retry: 3, timeout: 30000 },
async () => {
await testEmptyAssistantMessage(llm, { apiKey: anthropicOAuthToken });
},
);
});
describe("GitHub Copilot Provider Empty Messages", () => {
it.skipIf(!githubCopilotToken)(
"gpt-4o - should handle empty content array",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await testEmptyMessage(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"gpt-4o - should handle empty string content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await testEmptyStringMessage(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"gpt-4o - should handle whitespace-only content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await testWhitespaceOnlyMessage(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"gpt-4o - should handle empty assistant message in conversation",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await testEmptyAssistantMessage(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should handle empty content array",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await testEmptyMessage(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should handle empty string content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await testEmptyStringMessage(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should handle whitespace-only content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await testWhitespaceOnlyMessage(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should handle empty assistant message in conversation",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await testEmptyAssistantMessage(llm, { apiKey: githubCopilotToken });
},
);
});
describe("Google Gemini CLI Provider Empty Messages", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should handle empty content array",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await testEmptyMessage(llm, { apiKey: geminiCliToken });
},
);
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should handle empty string content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await testEmptyStringMessage(llm, { apiKey: geminiCliToken });
},
);
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should handle whitespace-only content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await testWhitespaceOnlyMessage(llm, { apiKey: geminiCliToken });
},
);
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should handle empty assistant message in conversation",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await testEmptyAssistantMessage(llm, { apiKey: geminiCliToken });
},
);
});
describe("Google Antigravity Provider Empty Messages", () => {
it.skipIf(!antigravityToken)(
"gemini-3-flash - should handle empty content array",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await testEmptyMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gemini-3-flash - should handle empty string content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await testEmptyStringMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gemini-3-flash - should handle whitespace-only content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await testWhitespaceOnlyMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gemini-3-flash - should handle empty assistant message in conversation",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await testEmptyAssistantMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should handle empty content array",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await testEmptyMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should handle empty string content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await testEmptyStringMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should handle whitespace-only content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await testWhitespaceOnlyMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should handle empty assistant message in conversation",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await testEmptyAssistantMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should handle empty content array",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
await testEmptyMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should handle empty string content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
await testEmptyStringMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should handle whitespace-only content",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
await testWhitespaceOnlyMessage(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should handle empty assistant message in conversation",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
await testEmptyAssistantMessage(llm, { apiKey: antigravityToken });
},
);
});
});

View file

@ -3,9 +3,18 @@ import { join } from "node:path";
import { Type } from "@sinclair/typebox";
import { describe, expect, it } from "vitest";
import type { Api, Context, Model, Tool, ToolResultMessage } from "../src/index.js";
import { complete, getModel } from "../src/index.js";
import { complete, getModel, resolveApiKey } from "../src/index.js";
import type { OptionsForApi } from "../src/types.js";
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("anthropic"),
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
]);
const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
/**
* Test that tool results containing only images work correctly across all providers.
* This verifies that:
@ -193,11 +202,11 @@ describe("Tool Results with Images", () => {
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider (gemini-2.5-flash)", () => {
const llm = getModel("google", "gemini-2.5-flash");
it("should handle tool result with only image", async () => {
it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithImageResult(llm);
});
it("should handle tool result with text and image", async () => {
it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithTextAndImageResult(llm);
});
});
@ -205,11 +214,11 @@ describe("Tool Results with Images", () => {
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => {
const llm: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" };
it("should handle tool result with only image", async () => {
it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithImageResult(llm);
});
it("should handle tool result with text and image", async () => {
it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithTextAndImageResult(llm);
});
});
@ -217,11 +226,11 @@ describe("Tool Results with Images", () => {
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
const llm = getModel("openai", "gpt-5-mini");
it("should handle tool result with only image", async () => {
it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithImageResult(llm);
});
it("should handle tool result with text and image", async () => {
it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithTextAndImageResult(llm);
});
});
@ -229,23 +238,11 @@ describe("Tool Results with Images", () => {
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-haiku-4-5)", () => {
const model = getModel("anthropic", "claude-haiku-4-5");
it("should handle tool result with only image", async () => {
it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithImageResult(model);
});
it("should handle tool result with text and image", async () => {
await handleToolWithTextAndImageResult(model);
});
});
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-5)", () => {
const model = getModel("anthropic", "claude-sonnet-4-5");
it("should handle tool result with only image", async () => {
await handleToolWithImageResult(model);
});
it("should handle tool result with text and image", async () => {
it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithTextAndImageResult(model);
});
});
@ -253,11 +250,11 @@ describe("Tool Results with Images", () => {
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v)", () => {
const llm = getModel("openrouter", "z-ai/glm-4.5v");
it("should handle tool result with only image", async () => {
it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithImageResult(llm);
});
it("should handle tool result with text and image", async () => {
it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithTextAndImageResult(llm);
});
});
@ -265,12 +262,134 @@ describe("Tool Results with Images", () => {
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider (pixtral-12b)", () => {
const llm = getModel("mistral", "pixtral-12b");
it("should handle tool result with only image", async () => {
it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithImageResult(llm);
});
it("should handle tool result with text and image", async () => {
it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
await handleToolWithTextAndImageResult(llm);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================
describe("Anthropic OAuth Provider (claude-sonnet-4-5)", () => {
const model = getModel("anthropic", "claude-sonnet-4-5");
it.skipIf(!anthropicOAuthToken)(
"should handle tool result with only image",
{ retry: 3, timeout: 30000 },
async () => {
await handleToolWithImageResult(model, { apiKey: anthropicOAuthToken });
},
);
it.skipIf(!anthropicOAuthToken)(
"should handle tool result with text and image",
{ retry: 3, timeout: 30000 },
async () => {
await handleToolWithTextAndImageResult(model, { apiKey: anthropicOAuthToken });
},
);
});
describe("GitHub Copilot Provider", () => {
it.skipIf(!githubCopilotToken)(
"gpt-4o - should handle tool result with only image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await handleToolWithImageResult(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"gpt-4o - should handle tool result with text and image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await handleToolWithTextAndImageResult(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should handle tool result with only image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await handleToolWithImageResult(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should handle tool result with text and image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await handleToolWithTextAndImageResult(llm, { apiKey: githubCopilotToken });
},
);
});
describe("Google Gemini CLI Provider", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should handle tool result with only image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await handleToolWithImageResult(llm, { apiKey: geminiCliToken });
},
);
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should handle tool result with text and image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await handleToolWithTextAndImageResult(llm, { apiKey: geminiCliToken });
},
);
});
describe("Google Antigravity Provider", () => {
it.skipIf(!antigravityToken)(
"gemini-3-flash - should handle tool result with only image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await handleToolWithImageResult(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gemini-3-flash - should handle tool result with text and image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await handleToolWithTextAndImageResult(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should handle tool result with only image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await handleToolWithImageResult(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should handle tool result with text and image",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await handleToolWithTextAndImageResult(llm, { apiKey: antigravityToken });
},
);
// Note: gpt-oss-120b-medium does not support images, so not tested here
});
});

View file

@ -342,27 +342,27 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini Provider (gemini-2.5-flash)", () => {
const llm = getModel("google", "gemini-2.5-flash");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle ", async () => {
it("should handle ", { retry: 3 }, async () => {
await handleThinking(llm, { thinking: { enabled: true, budgetTokens: 1024 } });
});
it("should handle multi-turn with thinking and tools", async () => {
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { thinking: { enabled: true, budgetTokens: 2048 } });
});
it("should handle image input", async () => {
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
});
@ -370,19 +370,19 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => {
const llm: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" };
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle image input", async () => {
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
});
@ -390,15 +390,15 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
const llm = getModel("openai", "gpt-5-mini");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
@ -406,11 +406,11 @@ describe("Generate E2E Tests", () => {
await handleThinking(llm, { reasoningEffort: "high" });
});
it("should handle multi-turn with thinking and tools", async () => {
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { reasoningEffort: "high" });
});
it("should handle image input", async () => {
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
});
@ -418,19 +418,19 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-3-5-haiku-20241022)", () => {
const model = getModel("anthropic", "claude-3-5-haiku-20241022");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(model, { thinkingEnabled: true });
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(model);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(model);
});
it("should handle image input", async () => {
it("should handle image input", { retry: 3 }, async () => {
await handleImage(model);
});
});
@ -438,19 +438,19 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
const model = getModel("openai", "gpt-5-mini");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(model);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(model);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(model);
});
it("should handle image input", async () => {
it("should handle image input", { retry: 3 }, async () => {
await handleImage(model);
});
});
@ -458,23 +458,23 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-code-fast-1 via OpenAI Completions)", () => {
const llm = getModel("xai", "grok-code-fast-1");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking mode", async () => {
it("should handle thinking mode", { retry: 3 }, async () => {
await handleThinking(llm, { reasoningEffort: "medium" });
});
it("should handle multi-turn with thinking and tools", async () => {
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { reasoningEffort: "medium" });
});
});
@ -482,23 +482,23 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider (gpt-oss-20b via OpenAI Completions)", () => {
const llm = getModel("groq", "openai/gpt-oss-20b");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking mode", async () => {
it("should handle thinking mode", { retry: 3 }, async () => {
await handleThinking(llm, { reasoningEffort: "medium" });
});
it("should handle multi-turn with thinking and tools", async () => {
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { reasoningEffort: "medium" });
});
});
@ -506,23 +506,23 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider (gpt-oss-120b via OpenAI Completions)", () => {
const llm = getModel("cerebras", "gpt-oss-120b");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking mode", async () => {
it("should handle thinking mode", { retry: 3 }, async () => {
await handleThinking(llm, { reasoningEffort: "medium" });
});
it("should handle multi-turn with thinking and tools", async () => {
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { reasoningEffort: "medium" });
});
});
@ -530,19 +530,19 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => {
const llm = getModel("openrouter", "z-ai/glm-4.5v");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking mode", async () => {
it("should handle thinking mode", { retry: 3 }, async () => {
await handleThinking(llm, { reasoningEffort: "medium" });
});
@ -550,7 +550,7 @@ describe("Generate E2E Tests", () => {
await multiTurn(llm, { reasoningEffort: "medium" });
});
it("should handle image input", async () => {
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
});
@ -558,24 +558,24 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via Anthropic Messages)", () => {
const llm = getModel("zai", "glm-4.5-air");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking", async () => {
it("should handle thinking", { retry: 3 }, async () => {
// Prompt doesn't trigger thinking
// await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
});
it("should handle multi-turn with thinking and tools", async () => {
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
});
});
@ -583,28 +583,28 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5v via Anthropic Messages)", () => {
const llm = getModel("zai", "glm-4.5v");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking", async () => {
it("should handle thinking", { retry: 3 }, async () => {
// Prompt doesn't trigger thinking
// await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
});
it("should handle multi-turn with thinking and tools", async () => {
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
});
it("should handle image input", async () => {
it("should handle image input", { retry: 3 }, async () => {
// Can't see image for some reason?
// await handleImage(llm);
});
@ -615,25 +615,25 @@ describe("Generate E2E Tests", () => {
() => {
const llm = getModel("mistral", "devstral-medium-latest");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle thinking mode", async () => {
it("should handle thinking mode", { retry: 3 }, async () => {
// FIXME Skip for now, getting a 422 stauts code, need to test with official SDK
// const llm = getModel("mistral", "magistral-medium-latest");
// await handleThinking(llm, { reasoningEffort: "medium" });
});
it("should handle multi-turn with thinking and tools", async () => {
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { reasoningEffort: "medium" });
});
},
@ -642,19 +642,19 @@ describe("Generate E2E Tests", () => {
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider (pixtral-12b with image support)", () => {
const llm = getModel("mistral", "pixtral-12b");
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm);
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm);
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm);
});
it("should handle image input", async () => {
it("should handle image input", { retry: 3 }, async () => {
await handleImage(llm);
});
});
@ -667,27 +667,27 @@ describe("Generate E2E Tests", () => {
describe("Anthropic OAuth Provider (claude-sonnet-4-20250514)", () => {
const model = getModel("anthropic", "claude-sonnet-4-20250514");
it.skipIf(!anthropicOAuthToken)("should complete basic text generation", async () => {
it.skipIf(!anthropicOAuthToken)("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(model, { apiKey: anthropicOAuthToken });
});
it.skipIf(!anthropicOAuthToken)("should handle tool calling", async () => {
it.skipIf(!anthropicOAuthToken)("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(model, { apiKey: anthropicOAuthToken });
});
it.skipIf(!anthropicOAuthToken)("should handle streaming", async () => {
it.skipIf(!anthropicOAuthToken)("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(model, { apiKey: anthropicOAuthToken });
});
it.skipIf(!anthropicOAuthToken)("should handle thinking", async () => {
it.skipIf(!anthropicOAuthToken)("should handle thinking", { retry: 3 }, async () => {
await handleThinking(model, { apiKey: anthropicOAuthToken, thinkingEnabled: true });
});
it.skipIf(!anthropicOAuthToken)("should handle multi-turn with thinking and tools", async () => {
it.skipIf(!anthropicOAuthToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(model, { apiKey: anthropicOAuthToken, thinkingEnabled: true });
});
it.skipIf(!anthropicOAuthToken)("should handle image input", async () => {
it.skipIf(!anthropicOAuthToken)("should handle image input", { retry: 3 }, async () => {
await handleImage(model, { apiKey: anthropicOAuthToken });
});
});
@ -695,15 +695,15 @@ describe("Generate E2E Tests", () => {
describe("GitHub Copilot Provider (gpt-4o via OpenAI Completions)", () => {
const llm = getModel("github-copilot", "gpt-4o");
it.skipIf(!githubCopilotToken)("should complete basic text generation", async () => {
it.skipIf(!githubCopilotToken)("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm, { apiKey: githubCopilotToken });
});
it.skipIf(!githubCopilotToken)("should handle tool calling", async () => {
it.skipIf(!githubCopilotToken)("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm, { apiKey: githubCopilotToken });
});
it.skipIf(!githubCopilotToken)("should handle streaming", async () => {
it.skipIf(!githubCopilotToken)("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm, { apiKey: githubCopilotToken });
});
@ -712,12 +712,12 @@ describe("Generate E2E Tests", () => {
await handleThinking(thinkingModel, { apiKey: githubCopilotToken, reasoningEffort: "high" });
});
it.skipIf(!githubCopilotToken)("should handle multi-turn with thinking and tools", async () => {
it.skipIf(!githubCopilotToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
const thinkingModel = getModel("github-copilot", "gpt-5-mini");
await multiTurn(thinkingModel, { apiKey: githubCopilotToken, reasoningEffort: "high" });
});
it.skipIf(!githubCopilotToken)("should handle image input", async () => {
it.skipIf(!githubCopilotToken)("should handle image input", { retry: 3 }, async () => {
await handleImage(llm, { apiKey: githubCopilotToken });
});
});
@ -725,27 +725,27 @@ describe("Generate E2E Tests", () => {
describe("Google Gemini CLI Provider (gemini-2.5-flash)", () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
it.skipIf(!geminiCliToken)("should complete basic text generation", async () => {
it.skipIf(!geminiCliToken)("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm, { apiKey: geminiCliToken });
});
it.skipIf(!geminiCliToken)("should handle tool calling", async () => {
it.skipIf(!geminiCliToken)("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm, { apiKey: geminiCliToken });
});
it.skipIf(!geminiCliToken)("should handle streaming", async () => {
it.skipIf(!geminiCliToken)("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm, { apiKey: geminiCliToken });
});
it.skipIf(!geminiCliToken)("should handle thinking", async () => {
it.skipIf(!geminiCliToken)("should handle thinking", { retry: 3 }, async () => {
await handleThinking(llm, { apiKey: geminiCliToken, thinking: { enabled: true, budgetTokens: 1024 } });
});
it.skipIf(!geminiCliToken)("should handle multi-turn with thinking and tools", async () => {
it.skipIf(!geminiCliToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { apiKey: geminiCliToken, thinking: { enabled: true, budgetTokens: 2048 } });
});
it.skipIf(!geminiCliToken)("should handle image input", async () => {
it.skipIf(!geminiCliToken)("should handle image input", { retry: 3 }, async () => {
await handleImage(llm, { apiKey: geminiCliToken });
});
});
@ -753,19 +753,19 @@ describe("Generate E2E Tests", () => {
describe("Google Antigravity Provider (gemini-3-flash)", () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
it.skipIf(!antigravityToken)("should complete basic text generation", async () => {
it.skipIf(!antigravityToken)("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle tool calling", async () => {
it.skipIf(!antigravityToken)("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle streaming", async () => {
it.skipIf(!antigravityToken)("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle thinking", async () => {
it.skipIf(!antigravityToken)("should handle thinking", { retry: 3 }, async () => {
// gemini-3-flash has reasoning: false, use gemini-3-pro-high for thinking
const thinkingModel = getModel("google-antigravity", "gemini-3-pro-high");
await handleThinking(thinkingModel, {
@ -774,12 +774,12 @@ describe("Generate E2E Tests", () => {
});
});
it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", async () => {
it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
const thinkingModel = getModel("google-antigravity", "gemini-3-pro-high");
await multiTurn(thinkingModel, { apiKey: antigravityToken, thinking: { enabled: true, budgetTokens: 2048 } });
});
it.skipIf(!antigravityToken)("should handle image input", async () => {
it.skipIf(!antigravityToken)("should handle image input", { retry: 3 }, async () => {
await handleImage(llm, { apiKey: antigravityToken });
});
});
@ -787,19 +787,19 @@ describe("Generate E2E Tests", () => {
describe("Google Antigravity Provider (claude-sonnet-4-5)", () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
it.skipIf(!antigravityToken)("should complete basic text generation", async () => {
it.skipIf(!antigravityToken)("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle tool calling", async () => {
it.skipIf(!antigravityToken)("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle streaming", async () => {
it.skipIf(!antigravityToken)("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle thinking", async () => {
it.skipIf(!antigravityToken)("should handle thinking", { retry: 3 }, async () => {
// claude-sonnet-4-5 has reasoning: false, use claude-sonnet-4-5-thinking
const thinkingModel = getModel("google-antigravity", "claude-sonnet-4-5-thinking");
await handleThinking(thinkingModel, {
@ -808,12 +808,12 @@ describe("Generate E2E Tests", () => {
});
});
it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", async () => {
it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
const thinkingModel = getModel("google-antigravity", "claude-sonnet-4-5-thinking");
await multiTurn(thinkingModel, { apiKey: antigravityToken, thinking: { enabled: true, budgetTokens: 4096 } });
});
it.skipIf(!antigravityToken)("should handle image input", async () => {
it.skipIf(!antigravityToken)("should handle image input", { retry: 3 }, async () => {
await handleImage(llm, { apiKey: antigravityToken });
});
});
@ -895,23 +895,23 @@ describe("Generate E2E Tests", () => {
}
});
it("should complete basic text generation", async () => {
it("should complete basic text generation", { retry: 3 }, async () => {
await basicTextGeneration(llm, { apiKey: "test" });
});
it("should handle tool calling", async () => {
it("should handle tool calling", { retry: 3 }, async () => {
await handleToolCall(llm, { apiKey: "test" });
});
it("should handle streaming", async () => {
it("should handle streaming", { retry: 3 }, async () => {
await handleStreaming(llm, { apiKey: "test" });
});
it("should handle thinking mode", async () => {
it("should handle thinking mode", { retry: 3 }, async () => {
await handleThinking(llm, { apiKey: "test", reasoningEffort: "medium" });
});
it("should handle multi-turn with thinking and tools", async () => {
it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { apiKey: "test", reasoningEffort: "medium" });
});
});

View file

@ -1,8 +1,17 @@
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { stream } from "../src/stream.js";
import { resolveApiKey, stream } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("anthropic"),
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
]);
const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
const context: Context = {
messages: [
@ -46,9 +55,9 @@ describe("Token Statistics on Abort", () => {
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => {
const llm = getModel("google", "gemini-2.5-flash");
it("should include token stats when aborted mid-stream", async () => {
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm, { thinking: { enabled: true } });
}, 10000);
});
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => {
@ -57,32 +66,140 @@ describe("Token Statistics on Abort", () => {
api: "openai-completions",
};
it("should include token stats when aborted mid-stream", async () => {
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
}, 10000);
});
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => {
const llm = getModel("openai", "gpt-5-mini");
it("should include token stats when aborted mid-stream", async () => {
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
}, 20000);
});
});
describe.skipIf(!process.env.ANTHROPIC_API_KEY && !process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider", () => {
const llm = getModel("anthropic", "claude-opus-4-1-20250805");
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
it("should include token stats when aborted mid-stream", async () => {
await testTokensOnAbort(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
}, 10000);
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
});
});
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
const llm = getModel("xai", "grok-3-fast");
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
});
});
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider", () => {
const llm = getModel("groq", "openai/gpt-oss-20b");
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
});
});
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider", () => {
const llm = getModel("cerebras", "gpt-oss-120b");
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
});
});
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => {
const llm = getModel("zai", "glm-4.5-flash");
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
});
});
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider", () => {
const llm = getModel("mistral", "devstral-medium-latest");
it("should include token stats when aborted mid-stream", async () => {
it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
await testTokensOnAbort(llm);
}, 10000);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================
describe("Anthropic OAuth Provider", () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
it.skipIf(!anthropicOAuthToken)(
"should include token stats when aborted mid-stream",
{ retry: 3, timeout: 30000 },
async () => {
await testTokensOnAbort(llm, { apiKey: anthropicOAuthToken });
},
);
});
describe("GitHub Copilot Provider", () => {
it.skipIf(!githubCopilotToken)(
"gpt-4o - should include token stats when aborted mid-stream",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await testTokensOnAbort(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should include token stats when aborted mid-stream",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await testTokensOnAbort(llm, { apiKey: githubCopilotToken });
},
);
});
describe("Google Gemini CLI Provider", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should include token stats when aborted mid-stream",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await testTokensOnAbort(llm, { apiKey: geminiCliToken });
},
);
});
describe("Google Antigravity Provider", () => {
it.skipIf(!antigravityToken)(
"gemini-3-flash - should include token stats when aborted mid-stream",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await testTokensOnAbort(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should include token stats when aborted mid-stream",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await testTokensOnAbort(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should include token stats when aborted mid-stream",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
await testTokensOnAbort(llm, { apiKey: antigravityToken });
},
);
});
});

View file

@ -1,8 +1,17 @@
import { type Static, Type } from "@sinclair/typebox";
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import type { Context, Tool } from "../src/types.js";
import { complete, resolveApiKey } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, Tool } from "../src/types.js";
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("anthropic"),
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
]);
const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
// Simple calculate tool
const calculateSchema = Type.Object({
@ -17,7 +26,10 @@ const calculateTool: Tool = {
parameters: calculateSchema,
};
async function testToolCallWithoutResult(model: any, options: any = {}) {
async function testToolCallWithoutResult<TApi extends Api>(
model: Model<TApi>,
options: OptionsForApi<TApi> = {} as OptionsForApi<TApi>,
) {
// Step 1: Create context with the calculate tool
const context: Context = {
systemPrompt: "You are a helpful assistant. Use the calculate tool when asked to perform calculations.",
@ -70,7 +82,8 @@ async function testToolCallWithoutResult(model: any, options: any = {}) {
.filter((block) => block.type === "text")
.map((block) => (block.type === "text" ? block.text : ""))
.join(" ");
expect(textContent.length).toBeGreaterThan(0);
const toolCalls = secondResponse.content.filter((block) => block.type === "toolCall").length;
expect(toolCalls || textContent.length).toBeGreaterThan(0);
console.log("Answer:", textContent);
// Verify the stop reason is either "stop" or "toolUse" (new tool call)
@ -78,19 +91,158 @@ async function testToolCallWithoutResult(model: any, options: any = {}) {
}
describe("Tool Call Without Result Tests", () => {
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider - Missing Tool Result", () => {
// =========================================================================
// API Key-based providers
// =========================================================================
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => {
const model = getModel("google", "gemini-2.5-flash");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => {
const model: Model<"openai-completions"> = {
...getModel("openai", "gpt-4o-mini")!,
api: "openai-completions",
};
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => {
const model = getModel("openai", "gpt-5-mini");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => {
const model = getModel("anthropic", "claude-3-5-haiku-20241022");
it("should filter out tool calls without corresponding tool results", async () => {
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
}, 30000);
});
});
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider - Missing Tool Result", () => {
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
const model = getModel("xai", "grok-3-fast");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider", () => {
const model = getModel("groq", "openai/gpt-oss-20b");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider", () => {
const model = getModel("cerebras", "gpt-oss-120b");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => {
const model = getModel("zai", "glm-4.5-flash");
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
});
});
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider", () => {
const model = getModel("mistral", "devstral-medium-latest");
it("should filter out tool calls without corresponding tool results", async () => {
it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
await testToolCallWithoutResult(model);
}, 30000);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================
describe("Anthropic OAuth Provider", () => {
const model = getModel("anthropic", "claude-3-5-haiku-20241022");
it.skipIf(!anthropicOAuthToken)(
"should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
await testToolCallWithoutResult(model, { apiKey: anthropicOAuthToken });
},
);
});
describe("GitHub Copilot Provider", () => {
it.skipIf(!githubCopilotToken)(
"gpt-4o - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("github-copilot", "gpt-4o");
await testToolCallWithoutResult(model, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("github-copilot", "claude-sonnet-4");
await testToolCallWithoutResult(model, { apiKey: githubCopilotToken });
},
);
});
describe("Google Gemini CLI Provider", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("google-gemini-cli", "gemini-2.5-flash");
await testToolCallWithoutResult(model, { apiKey: geminiCliToken });
},
);
});
describe("Google Antigravity Provider", () => {
it.skipIf(!antigravityToken)(
"gemini-3-flash - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("google-antigravity", "gemini-3-flash");
await testToolCallWithoutResult(model, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("google-antigravity", "claude-sonnet-4-5");
await testToolCallWithoutResult(model, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should filter out tool calls without corresponding tool results",
{ retry: 3, timeout: 30000 },
async () => {
const model = getModel("google-antigravity", "gpt-oss-120b-medium");
await testToolCallWithoutResult(model, { apiKey: antigravityToken });
},
);
});
});

View file

@ -14,9 +14,18 @@
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import { complete, resolveApiKey } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("anthropic"),
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
]);
const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
// Generate a long system prompt to trigger caching (>2k bytes for most providers)
const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses.
@ -89,41 +98,49 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
it("claude-3-5-haiku - should return totalTokens equal to sum of components", async () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
it(
"claude-3-5-haiku - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
console.log(`\nAnthropic / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
console.log(`\nAnthropic / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
}, 60000);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
},
);
});
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => {
it("claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
const llm = getModel("anthropic", "claude-sonnet-4-20250514");
describe("Anthropic (OAuth)", () => {
it.skipIf(!anthropicOAuthToken)(
"claude-sonnet-4 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("anthropic", "claude-sonnet-4-20250514");
console.log(`\nAnthropic OAuth / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_OAUTH_TOKEN });
console.log(`\nAnthropic OAuth / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: anthropicOAuthToken });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
}, 60000);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
},
);
});
// =========================================================================
@ -131,25 +148,29 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
it("gpt-4o-mini - should return totalTokens equal to sum of components", async () => {
const llm: Model<"openai-completions"> = {
...getModel("openai", "gpt-4o-mini")!,
api: "openai-completions",
};
it(
"gpt-4o-mini - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm: Model<"openai-completions"> = {
...getModel("openai", "gpt-4o-mini")!,
api: "openai-completions",
};
console.log(`\nOpenAI Completions / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
console.log(`\nOpenAI Completions / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
it("gpt-4o - should return totalTokens equal to sum of components", async () => {
it("gpt-4o - should return totalTokens equal to sum of components", { retry: 3, timeout: 60000 }, async () => {
const llm = getModel("openai", "gpt-4o");
console.log(`\nOpenAI Responses / ${llm.id}:`);
@ -160,7 +181,7 @@ describe("totalTokens field", () => {
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
});
// =========================================================================
@ -168,18 +189,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
it("gemini-2.0-flash - should return totalTokens equal to sum of components", async () => {
const llm = getModel("google", "gemini-2.0-flash");
it(
"gemini-2.0-flash - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google", "gemini-2.0-flash");
console.log(`\nGoogle / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
console.log(`\nGoogle / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -187,18 +212,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
it("grok-3-fast - should return totalTokens equal to sum of components", async () => {
const llm = getModel("xai", "grok-3-fast");
it(
"grok-3-fast - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("xai", "grok-3-fast");
console.log(`\nxAI / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
console.log(`\nxAI / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -206,18 +235,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
it("openai/gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
const llm = getModel("groq", "openai/gpt-oss-120b");
it(
"openai/gpt-oss-120b - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("groq", "openai/gpt-oss-120b");
console.log(`\nGroq / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
console.log(`\nGroq / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -225,18 +258,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
it("gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
const llm = getModel("cerebras", "gpt-oss-120b");
it(
"gpt-oss-120b - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("cerebras", "gpt-oss-120b");
console.log(`\nCerebras / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
console.log(`\nCerebras / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -244,18 +281,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
it("glm-4.5-flash - should return totalTokens equal to sum of components", async () => {
const llm = getModel("zai", "glm-4.5-flash");
it(
"glm-4.5-flash - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("zai", "glm-4.5-flash");
console.log(`\nz.ai / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
console.log(`\nz.ai / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -263,18 +304,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral", () => {
it("devstral-medium-latest - should return totalTokens equal to sum of components", async () => {
const llm = getModel("mistral", "devstral-medium-latest");
it(
"devstral-medium-latest - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("mistral", "devstral-medium-latest");
console.log(`\nMistral / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MISTRAL_API_KEY });
console.log(`\nMistral / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MISTRAL_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -282,69 +327,209 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
it("anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
it(
"anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it("deepseek/deepseek-chat - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "deepseek/deepseek-chat");
it(
"deepseek/deepseek-chat - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "deepseek/deepseek-chat");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it("mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
it(
"mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it("google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
it(
"google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it("meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
it(
"meta-llama/llama-4-maverick - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// GitHub Copilot (OAuth)
// =========================================================================
describe("GitHub Copilot (OAuth)", () => {
it.skipIf(!githubCopilotToken)(
"gpt-4o - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
console.log(`\nGitHub Copilot / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: githubCopilotToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
console.log(`\nGitHub Copilot / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: githubCopilotToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// Google Gemini CLI (OAuth)
// =========================================================================
describe("Google Gemini CLI (OAuth)", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
console.log(`\nGoogle Gemini CLI / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: geminiCliToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// Google Antigravity (OAuth)
// =========================================================================
describe("Google Antigravity (OAuth)", () => {
it.skipIf(!antigravityToken)(
"gemini-3-flash - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
console.log(`\nGoogle Antigravity / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
console.log(`\nGoogle Antigravity / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
console.log(`\nGoogle Antigravity / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
});

View file

@ -1,8 +1,21 @@
import { Type } from "@sinclair/typebox";
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import { complete, resolveApiKey } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, ToolResultMessage } from "../src/types.js";
// Empty schema for test tools - must be proper OBJECT type for Cloud Code Assist
const emptySchema = Type.Object({});
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("anthropic"),
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
]);
const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
/**
* Test for Unicode surrogate pair handling in tool results.
*
@ -53,7 +66,7 @@ async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, option
{
name: "test_tool",
description: "A test tool",
parameters: {} as any,
parameters: emptySchema,
},
],
};
@ -138,7 +151,7 @@ async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, opt
{
name: "linkedin_skill",
description: "Get LinkedIn comments",
parameters: {} as any,
parameters: emptySchema,
},
],
};
@ -226,7 +239,7 @@ async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, opt
{
name: "test_tool",
description: "A test tool",
parameters: {} as any,
parameters: emptySchema,
},
],
};
@ -265,15 +278,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Unicode Handling", () => {
const llm = getModel("google", "gemini-2.5-flash");
it("should handle emoji in tool results", async () => {
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
@ -281,15 +294,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Unicode Handling", () => {
const llm = getModel("openai", "gpt-4o-mini");
it("should handle emoji in tool results", async () => {
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
@ -297,47 +310,243 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Unicode Handling", () => {
const llm = getModel("openai", "gpt-5-mini");
it("should handle emoji in tool results", async () => {
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Unicode Handling", () => {
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Unicode Handling", () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
it("should handle emoji in tool results", async () => {
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// =========================================================================
describe("Anthropic OAuth Provider Unicode Handling", () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
it.skipIf(!anthropicOAuthToken)("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm, { apiKey: anthropicOAuthToken });
});
it.skipIf(!anthropicOAuthToken)(
"should handle real-world LinkedIn comment data with emoji",
{ retry: 3, timeout: 30000 },
async () => {
await testRealWorldLinkedInData(llm, { apiKey: anthropicOAuthToken });
},
);
it.skipIf(!anthropicOAuthToken)(
"should handle unpaired high surrogate (0xD83D) in tool results",
{ retry: 3, timeout: 30000 },
async () => {
await testUnpairedHighSurrogate(llm, { apiKey: anthropicOAuthToken });
},
);
});
describe("GitHub Copilot Provider Unicode Handling", () => {
it.skipIf(!githubCopilotToken)(
"gpt-4o - should handle emoji in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await testEmojiInToolResults(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"gpt-4o - should handle real-world LinkedIn comment data with emoji",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await testRealWorldLinkedInData(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"gpt-4o - should handle unpaired high surrogate (0xD83D) in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
await testUnpairedHighSurrogate(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should handle emoji in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await testEmojiInToolResults(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should handle real-world LinkedIn comment data with emoji",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await testRealWorldLinkedInData(llm, { apiKey: githubCopilotToken });
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should handle unpaired high surrogate (0xD83D) in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
await testUnpairedHighSurrogate(llm, { apiKey: githubCopilotToken });
},
);
});
describe("Google Gemini CLI Provider Unicode Handling", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should handle emoji in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await testEmojiInToolResults(llm, { apiKey: geminiCliToken });
},
);
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should handle real-world LinkedIn comment data with emoji",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await testRealWorldLinkedInData(llm, { apiKey: geminiCliToken });
},
);
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should handle unpaired high surrogate (0xD83D) in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
await testUnpairedHighSurrogate(llm, { apiKey: geminiCliToken });
},
);
});
describe("Google Antigravity Provider Unicode Handling", () => {
it.skipIf(!antigravityToken)(
"gemini-3-flash - should handle emoji in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await testEmojiInToolResults(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gemini-3-flash - should handle real-world LinkedIn comment data with emoji",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await testRealWorldLinkedInData(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gemini-3-flash - should handle unpaired high surrogate (0xD83D) in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
await testUnpairedHighSurrogate(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should handle emoji in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await testEmojiInToolResults(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should handle real-world LinkedIn comment data with emoji",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await testRealWorldLinkedInData(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should handle unpaired high surrogate (0xD83D) in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
await testUnpairedHighSurrogate(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should handle emoji in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
await testEmojiInToolResults(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should handle real-world LinkedIn comment data with emoji",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
await testRealWorldLinkedInData(llm, { apiKey: antigravityToken });
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should handle unpaired high surrogate (0xD83D) in tool results",
{ retry: 3, timeout: 30000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
await testUnpairedHighSurrogate(llm, { apiKey: antigravityToken });
},
);
});
describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Unicode Handling", () => {
const llm = getModel("xai", "grok-3");
it("should handle emoji in tool results", async () => {
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
@ -345,15 +554,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Unicode Handling", () => {
const llm = getModel("groq", "openai/gpt-oss-20b");
it("should handle emoji in tool results", async () => {
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
@ -361,15 +570,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Unicode Handling", () => {
const llm = getModel("cerebras", "gpt-oss-120b");
it("should handle emoji in tool results", async () => {
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
@ -377,15 +586,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Unicode Handling", () => {
const llm = getModel("zai", "glm-4.5-air");
it("should handle emoji in tool results", async () => {
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});
@ -393,15 +602,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider Unicode Handling", () => {
const llm = getModel("mistral", "devstral-medium-latest");
it("should handle emoji in tool results", async () => {
it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
await testEmojiInToolResults(llm);
});
it("should handle real-world LinkedIn comment data with emoji", async () => {
it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
await testRealWorldLinkedInData(llm);
});
it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
await testUnpairedHighSurrogate(llm);
});
});