From 95fcda588721f1af00a32729b0b1eeb65e3d8fc1 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Sat, 20 Dec 2025 19:38:38 +0100 Subject: [PATCH] Broader testing, more providers. --- packages/ai/test/empty.test.ts | 339 +++++++++++-- packages/ai/test/image-tool-result.test.ts | 169 ++++++- packages/ai/test/stream.test.ts | 194 ++++---- packages/ai/test/tokens.test.ts | 145 +++++- .../ai/test/tool-call-without-result.test.ts | 172 ++++++- packages/ai/test/total-tokens.test.ts | 463 ++++++++++++------ packages/ai/test/unicode-surrogate.test.ts | 273 +++++++++-- 7 files changed, 1400 insertions(+), 355 deletions(-) diff --git a/packages/ai/test/empty.test.ts b/packages/ai/test/empty.test.ts index 0a8982d3..b204e06a 100644 --- a/packages/ai/test/empty.test.ts +++ b/packages/ai/test/empty.test.ts @@ -1,8 +1,17 @@ import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; -import { complete } from "../src/stream.js"; +import { complete, resolveApiKey } from "../src/stream.js"; import type { Api, AssistantMessage, Context, Model, OptionsForApi, UserMessage } from "../src/types.js"; +// Resolve OAuth tokens at module level (async, runs before tests) +const oauthTokens = await Promise.all([ + resolveApiKey("anthropic"), + resolveApiKey("github-copilot"), + resolveApiKey("google-gemini-cli"), + resolveApiKey("google-antigravity"), +]); +const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens; + async function testEmptyMessage(llm: Model, options: OptionsForApi = {}) { // Test with completely empty content array const emptyMessage: UserMessage = { @@ -133,19 +142,19 @@ describe("AI Providers Empty Message Tests", () => { describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Empty Messages", () => { const llm = getModel("google", "gemini-2.5-flash"); - it("should handle empty content array", async () => { + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { await testEmptyMessage(llm); }); - it("should handle empty string content", async () => { + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { await testEmptyStringMessage(llm); }); - it("should handle whitespace-only content", async () => { + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { await testWhitespaceOnlyMessage(llm); }); - it("should handle empty assistant message in conversation", async () => { + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { await testEmptyAssistantMessage(llm); }); }); @@ -153,19 +162,19 @@ describe("AI Providers Empty Message Tests", () => { describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Empty Messages", () => { const llm = getModel("openai", "gpt-4o-mini"); - it("should handle empty content array", async () => { + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { await testEmptyMessage(llm); }); - it("should handle empty string content", async () => { + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { await testEmptyStringMessage(llm); }); - it("should handle whitespace-only content", async () => { + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { await testWhitespaceOnlyMessage(llm); }); - it("should handle empty assistant message in conversation", async () => { + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { await testEmptyAssistantMessage(llm); }); }); @@ -173,39 +182,39 @@ describe("AI Providers Empty Message Tests", () => { describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Empty Messages", () => { const llm = getModel("openai", "gpt-5-mini"); - it("should handle empty content array", async () => { + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { await testEmptyMessage(llm); }); - it("should handle empty string content", async () => { + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { await testEmptyStringMessage(llm); }); - it("should handle whitespace-only content", async () => { + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { await testWhitespaceOnlyMessage(llm); }); - it("should handle empty assistant message in conversation", async () => { + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { await testEmptyAssistantMessage(llm); }); }); - describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Empty Messages", () => { + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Empty Messages", () => { const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); - it("should handle empty content array", async () => { + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { await testEmptyMessage(llm); }); - it("should handle empty string content", async () => { + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { await testEmptyStringMessage(llm); }); - it("should handle whitespace-only content", async () => { + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { await testWhitespaceOnlyMessage(llm); }); - it("should handle empty assistant message in conversation", async () => { + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { await testEmptyAssistantMessage(llm); }); }); @@ -213,19 +222,19 @@ describe("AI Providers Empty Message Tests", () => { describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Empty Messages", () => { const llm = getModel("xai", "grok-3"); - it("should handle empty content array", async () => { + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { await testEmptyMessage(llm); }); - it("should handle empty string content", async () => { + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { await testEmptyStringMessage(llm); }); - it("should handle whitespace-only content", async () => { + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { await testWhitespaceOnlyMessage(llm); }); - it("should handle empty assistant message in conversation", async () => { + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { await testEmptyAssistantMessage(llm); }); }); @@ -233,19 +242,19 @@ describe("AI Providers Empty Message Tests", () => { describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Empty Messages", () => { const llm = getModel("groq", "openai/gpt-oss-20b"); - it("should handle empty content array", async () => { + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { await testEmptyMessage(llm); }); - it("should handle empty string content", async () => { + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { await testEmptyStringMessage(llm); }); - it("should handle whitespace-only content", async () => { + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { await testWhitespaceOnlyMessage(llm); }); - it("should handle empty assistant message in conversation", async () => { + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { await testEmptyAssistantMessage(llm); }); }); @@ -253,19 +262,19 @@ describe("AI Providers Empty Message Tests", () => { describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Empty Messages", () => { const llm = getModel("cerebras", "gpt-oss-120b"); - it("should handle empty content array", async () => { + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { await testEmptyMessage(llm); }); - it("should handle empty string content", async () => { + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { await testEmptyStringMessage(llm); }); - it("should handle whitespace-only content", async () => { + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { await testWhitespaceOnlyMessage(llm); }); - it("should handle empty assistant message in conversation", async () => { + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { await testEmptyAssistantMessage(llm); }); }); @@ -273,19 +282,19 @@ describe("AI Providers Empty Message Tests", () => { describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Empty Messages", () => { const llm = getModel("zai", "glm-4.5-air"); - it("should handle empty content array", async () => { + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { await testEmptyMessage(llm); }); - it("should handle empty string content", async () => { + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { await testEmptyStringMessage(llm); }); - it("should handle whitespace-only content", async () => { + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { await testWhitespaceOnlyMessage(llm); }); - it("should handle empty assistant message in conversation", async () => { + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { await testEmptyAssistantMessage(llm); }); }); @@ -293,20 +302,274 @@ describe("AI Providers Empty Message Tests", () => { describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider Empty Messages", () => { const llm = getModel("mistral", "devstral-medium-latest"); - it("should handle empty content array", async () => { + it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { await testEmptyMessage(llm); }); - it("should handle empty string content", async () => { + it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { await testEmptyStringMessage(llm); }); - it("should handle whitespace-only content", async () => { + it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => { await testWhitespaceOnlyMessage(llm); }); - it("should handle empty assistant message in conversation", async () => { + it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => { await testEmptyAssistantMessage(llm); }); }); + + // ========================================================================= + // OAuth-based providers (credentials from ~/.pi/agent/oauth.json) + // ========================================================================= + + describe("Anthropic OAuth Provider Empty Messages", () => { + const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); + + it.skipIf(!anthropicOAuthToken)("should handle empty content array", { retry: 3, timeout: 30000 }, async () => { + await testEmptyMessage(llm, { apiKey: anthropicOAuthToken }); + }); + + it.skipIf(!anthropicOAuthToken)("should handle empty string content", { retry: 3, timeout: 30000 }, async () => { + await testEmptyStringMessage(llm, { apiKey: anthropicOAuthToken }); + }); + + it.skipIf(!anthropicOAuthToken)( + "should handle whitespace-only content", + { retry: 3, timeout: 30000 }, + async () => { + await testWhitespaceOnlyMessage(llm, { apiKey: anthropicOAuthToken }); + }, + ); + + it.skipIf(!anthropicOAuthToken)( + "should handle empty assistant message in conversation", + { retry: 3, timeout: 30000 }, + async () => { + await testEmptyAssistantMessage(llm, { apiKey: anthropicOAuthToken }); + }, + ); + }); + + describe("GitHub Copilot Provider Empty Messages", () => { + it.skipIf(!githubCopilotToken)( + "gpt-4o - should handle empty content array", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await testEmptyMessage(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "gpt-4o - should handle empty string content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await testEmptyStringMessage(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "gpt-4o - should handle whitespace-only content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await testWhitespaceOnlyMessage(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "gpt-4o - should handle empty assistant message in conversation", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await testEmptyAssistantMessage(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should handle empty content array", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await testEmptyMessage(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should handle empty string content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await testEmptyStringMessage(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should handle whitespace-only content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await testWhitespaceOnlyMessage(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should handle empty assistant message in conversation", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await testEmptyAssistantMessage(llm, { apiKey: githubCopilotToken }); + }, + ); + }); + + describe("Google Gemini CLI Provider Empty Messages", () => { + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should handle empty content array", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await testEmptyMessage(llm, { apiKey: geminiCliToken }); + }, + ); + + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should handle empty string content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await testEmptyStringMessage(llm, { apiKey: geminiCliToken }); + }, + ); + + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should handle whitespace-only content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await testWhitespaceOnlyMessage(llm, { apiKey: geminiCliToken }); + }, + ); + + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should handle empty assistant message in conversation", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await testEmptyAssistantMessage(llm, { apiKey: geminiCliToken }); + }, + ); + }); + + describe("Google Antigravity Provider Empty Messages", () => { + it.skipIf(!antigravityToken)( + "gemini-3-flash - should handle empty content array", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await testEmptyMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gemini-3-flash - should handle empty string content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await testEmptyStringMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gemini-3-flash - should handle whitespace-only content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await testWhitespaceOnlyMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gemini-3-flash - should handle empty assistant message in conversation", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await testEmptyAssistantMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should handle empty content array", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await testEmptyMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should handle empty string content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await testEmptyStringMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should handle whitespace-only content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await testWhitespaceOnlyMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should handle empty assistant message in conversation", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await testEmptyAssistantMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should handle empty content array", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gpt-oss-120b-medium"); + await testEmptyMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should handle empty string content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gpt-oss-120b-medium"); + await testEmptyStringMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should handle whitespace-only content", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gpt-oss-120b-medium"); + await testWhitespaceOnlyMessage(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should handle empty assistant message in conversation", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gpt-oss-120b-medium"); + await testEmptyAssistantMessage(llm, { apiKey: antigravityToken }); + }, + ); + }); }); diff --git a/packages/ai/test/image-tool-result.test.ts b/packages/ai/test/image-tool-result.test.ts index cadfd856..9f4ef518 100644 --- a/packages/ai/test/image-tool-result.test.ts +++ b/packages/ai/test/image-tool-result.test.ts @@ -3,9 +3,18 @@ import { join } from "node:path"; import { Type } from "@sinclair/typebox"; import { describe, expect, it } from "vitest"; import type { Api, Context, Model, Tool, ToolResultMessage } from "../src/index.js"; -import { complete, getModel } from "../src/index.js"; +import { complete, getModel, resolveApiKey } from "../src/index.js"; import type { OptionsForApi } from "../src/types.js"; +// Resolve OAuth tokens at module level (async, runs before tests) +const oauthTokens = await Promise.all([ + resolveApiKey("anthropic"), + resolveApiKey("github-copilot"), + resolveApiKey("google-gemini-cli"), + resolveApiKey("google-antigravity"), +]); +const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens; + /** * Test that tool results containing only images work correctly across all providers. * This verifies that: @@ -193,11 +202,11 @@ describe("Tool Results with Images", () => { describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider (gemini-2.5-flash)", () => { const llm = getModel("google", "gemini-2.5-flash"); - it("should handle tool result with only image", async () => { + it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithImageResult(llm); }); - it("should handle tool result with text and image", async () => { + it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithTextAndImageResult(llm); }); }); @@ -205,11 +214,11 @@ describe("Tool Results with Images", () => { describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => { const llm: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" }; - it("should handle tool result with only image", async () => { + it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithImageResult(llm); }); - it("should handle tool result with text and image", async () => { + it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithTextAndImageResult(llm); }); }); @@ -217,11 +226,11 @@ describe("Tool Results with Images", () => { describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => { const llm = getModel("openai", "gpt-5-mini"); - it("should handle tool result with only image", async () => { + it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithImageResult(llm); }); - it("should handle tool result with text and image", async () => { + it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithTextAndImageResult(llm); }); }); @@ -229,23 +238,11 @@ describe("Tool Results with Images", () => { describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-haiku-4-5)", () => { const model = getModel("anthropic", "claude-haiku-4-5"); - it("should handle tool result with only image", async () => { + it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithImageResult(model); }); - it("should handle tool result with text and image", async () => { - await handleToolWithTextAndImageResult(model); - }); - }); - - describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-5)", () => { - const model = getModel("anthropic", "claude-sonnet-4-5"); - - it("should handle tool result with only image", async () => { - await handleToolWithImageResult(model); - }); - - it("should handle tool result with text and image", async () => { + it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithTextAndImageResult(model); }); }); @@ -253,11 +250,11 @@ describe("Tool Results with Images", () => { describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v)", () => { const llm = getModel("openrouter", "z-ai/glm-4.5v"); - it("should handle tool result with only image", async () => { + it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithImageResult(llm); }); - it("should handle tool result with text and image", async () => { + it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithTextAndImageResult(llm); }); }); @@ -265,12 +262,134 @@ describe("Tool Results with Images", () => { describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider (pixtral-12b)", () => { const llm = getModel("mistral", "pixtral-12b"); - it("should handle tool result with only image", async () => { + it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithImageResult(llm); }); - it("should handle tool result with text and image", async () => { + it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => { await handleToolWithTextAndImageResult(llm); }); }); + + // ========================================================================= + // OAuth-based providers (credentials from ~/.pi/agent/oauth.json) + // ========================================================================= + + describe("Anthropic OAuth Provider (claude-sonnet-4-5)", () => { + const model = getModel("anthropic", "claude-sonnet-4-5"); + + it.skipIf(!anthropicOAuthToken)( + "should handle tool result with only image", + { retry: 3, timeout: 30000 }, + async () => { + await handleToolWithImageResult(model, { apiKey: anthropicOAuthToken }); + }, + ); + + it.skipIf(!anthropicOAuthToken)( + "should handle tool result with text and image", + { retry: 3, timeout: 30000 }, + async () => { + await handleToolWithTextAndImageResult(model, { apiKey: anthropicOAuthToken }); + }, + ); + }); + + describe("GitHub Copilot Provider", () => { + it.skipIf(!githubCopilotToken)( + "gpt-4o - should handle tool result with only image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await handleToolWithImageResult(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "gpt-4o - should handle tool result with text and image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await handleToolWithTextAndImageResult(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should handle tool result with only image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await handleToolWithImageResult(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should handle tool result with text and image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await handleToolWithTextAndImageResult(llm, { apiKey: githubCopilotToken }); + }, + ); + }); + + describe("Google Gemini CLI Provider", () => { + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should handle tool result with only image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await handleToolWithImageResult(llm, { apiKey: geminiCliToken }); + }, + ); + + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should handle tool result with text and image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await handleToolWithTextAndImageResult(llm, { apiKey: geminiCliToken }); + }, + ); + }); + + describe("Google Antigravity Provider", () => { + it.skipIf(!antigravityToken)( + "gemini-3-flash - should handle tool result with only image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await handleToolWithImageResult(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gemini-3-flash - should handle tool result with text and image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await handleToolWithTextAndImageResult(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should handle tool result with only image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await handleToolWithImageResult(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should handle tool result with text and image", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await handleToolWithTextAndImageResult(llm, { apiKey: antigravityToken }); + }, + ); + + // Note: gpt-oss-120b-medium does not support images, so not tested here + }); }); diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts index 4d08b7b9..af1acc98 100644 --- a/packages/ai/test/stream.test.ts +++ b/packages/ai/test/stream.test.ts @@ -342,27 +342,27 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini Provider (gemini-2.5-flash)", () => { const llm = getModel("google", "gemini-2.5-flash"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle ", async () => { + it("should handle ", { retry: 3 }, async () => { await handleThinking(llm, { thinking: { enabled: true, budgetTokens: 1024 } }); }); - it("should handle multi-turn with thinking and tools", async () => { + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { thinking: { enabled: true, budgetTokens: 2048 } }); }); - it("should handle image input", async () => { + it("should handle image input", { retry: 3 }, async () => { await handleImage(llm); }); }); @@ -370,19 +370,19 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => { const llm: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" }; - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle image input", async () => { + it("should handle image input", { retry: 3 }, async () => { await handleImage(llm); }); }); @@ -390,15 +390,15 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => { const llm = getModel("openai", "gpt-5-mini"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); @@ -406,11 +406,11 @@ describe("Generate E2E Tests", () => { await handleThinking(llm, { reasoningEffort: "high" }); }); - it("should handle multi-turn with thinking and tools", async () => { + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { reasoningEffort: "high" }); }); - it("should handle image input", async () => { + it("should handle image input", { retry: 3 }, async () => { await handleImage(llm); }); }); @@ -418,19 +418,19 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-3-5-haiku-20241022)", () => { const model = getModel("anthropic", "claude-3-5-haiku-20241022"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(model, { thinkingEnabled: true }); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(model); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(model); }); - it("should handle image input", async () => { + it("should handle image input", { retry: 3 }, async () => { await handleImage(model); }); }); @@ -438,19 +438,19 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => { const model = getModel("openai", "gpt-5-mini"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(model); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(model); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(model); }); - it("should handle image input", async () => { + it("should handle image input", { retry: 3 }, async () => { await handleImage(model); }); }); @@ -458,23 +458,23 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-code-fast-1 via OpenAI Completions)", () => { const llm = getModel("xai", "grok-code-fast-1"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle thinking mode", async () => { + it("should handle thinking mode", { retry: 3 }, async () => { await handleThinking(llm, { reasoningEffort: "medium" }); }); - it("should handle multi-turn with thinking and tools", async () => { + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { reasoningEffort: "medium" }); }); }); @@ -482,23 +482,23 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider (gpt-oss-20b via OpenAI Completions)", () => { const llm = getModel("groq", "openai/gpt-oss-20b"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle thinking mode", async () => { + it("should handle thinking mode", { retry: 3 }, async () => { await handleThinking(llm, { reasoningEffort: "medium" }); }); - it("should handle multi-turn with thinking and tools", async () => { + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { reasoningEffort: "medium" }); }); }); @@ -506,23 +506,23 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider (gpt-oss-120b via OpenAI Completions)", () => { const llm = getModel("cerebras", "gpt-oss-120b"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle thinking mode", async () => { + it("should handle thinking mode", { retry: 3 }, async () => { await handleThinking(llm, { reasoningEffort: "medium" }); }); - it("should handle multi-turn with thinking and tools", async () => { + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { reasoningEffort: "medium" }); }); }); @@ -530,19 +530,19 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => { const llm = getModel("openrouter", "z-ai/glm-4.5v"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle thinking mode", async () => { + it("should handle thinking mode", { retry: 3 }, async () => { await handleThinking(llm, { reasoningEffort: "medium" }); }); @@ -550,7 +550,7 @@ describe("Generate E2E Tests", () => { await multiTurn(llm, { reasoningEffort: "medium" }); }); - it("should handle image input", async () => { + it("should handle image input", { retry: 3 }, async () => { await handleImage(llm); }); }); @@ -558,24 +558,24 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via Anthropic Messages)", () => { const llm = getModel("zai", "glm-4.5-air"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle thinking", async () => { + it("should handle thinking", { retry: 3 }, async () => { // Prompt doesn't trigger thinking // await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); }); - it("should handle multi-turn with thinking and tools", async () => { + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); }); }); @@ -583,28 +583,28 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5v via Anthropic Messages)", () => { const llm = getModel("zai", "glm-4.5v"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle thinking", async () => { + it("should handle thinking", { retry: 3 }, async () => { // Prompt doesn't trigger thinking // await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); }); - it("should handle multi-turn with thinking and tools", async () => { + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); }); - it("should handle image input", async () => { + it("should handle image input", { retry: 3 }, async () => { // Can't see image for some reason? // await handleImage(llm); }); @@ -615,25 +615,25 @@ describe("Generate E2E Tests", () => { () => { const llm = getModel("mistral", "devstral-medium-latest"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle thinking mode", async () => { + it("should handle thinking mode", { retry: 3 }, async () => { // FIXME Skip for now, getting a 422 stauts code, need to test with official SDK // const llm = getModel("mistral", "magistral-medium-latest"); // await handleThinking(llm, { reasoningEffort: "medium" }); }); - it("should handle multi-turn with thinking and tools", async () => { + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { reasoningEffort: "medium" }); }); }, @@ -642,19 +642,19 @@ describe("Generate E2E Tests", () => { describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider (pixtral-12b with image support)", () => { const llm = getModel("mistral", "pixtral-12b"); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm); }); - it("should handle image input", async () => { + it("should handle image input", { retry: 3 }, async () => { await handleImage(llm); }); }); @@ -667,27 +667,27 @@ describe("Generate E2E Tests", () => { describe("Anthropic OAuth Provider (claude-sonnet-4-20250514)", () => { const model = getModel("anthropic", "claude-sonnet-4-20250514"); - it.skipIf(!anthropicOAuthToken)("should complete basic text generation", async () => { + it.skipIf(!anthropicOAuthToken)("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(model, { apiKey: anthropicOAuthToken }); }); - it.skipIf(!anthropicOAuthToken)("should handle tool calling", async () => { + it.skipIf(!anthropicOAuthToken)("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(model, { apiKey: anthropicOAuthToken }); }); - it.skipIf(!anthropicOAuthToken)("should handle streaming", async () => { + it.skipIf(!anthropicOAuthToken)("should handle streaming", { retry: 3 }, async () => { await handleStreaming(model, { apiKey: anthropicOAuthToken }); }); - it.skipIf(!anthropicOAuthToken)("should handle thinking", async () => { + it.skipIf(!anthropicOAuthToken)("should handle thinking", { retry: 3 }, async () => { await handleThinking(model, { apiKey: anthropicOAuthToken, thinkingEnabled: true }); }); - it.skipIf(!anthropicOAuthToken)("should handle multi-turn with thinking and tools", async () => { + it.skipIf(!anthropicOAuthToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(model, { apiKey: anthropicOAuthToken, thinkingEnabled: true }); }); - it.skipIf(!anthropicOAuthToken)("should handle image input", async () => { + it.skipIf(!anthropicOAuthToken)("should handle image input", { retry: 3 }, async () => { await handleImage(model, { apiKey: anthropicOAuthToken }); }); }); @@ -695,15 +695,15 @@ describe("Generate E2E Tests", () => { describe("GitHub Copilot Provider (gpt-4o via OpenAI Completions)", () => { const llm = getModel("github-copilot", "gpt-4o"); - it.skipIf(!githubCopilotToken)("should complete basic text generation", async () => { + it.skipIf(!githubCopilotToken)("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm, { apiKey: githubCopilotToken }); }); - it.skipIf(!githubCopilotToken)("should handle tool calling", async () => { + it.skipIf(!githubCopilotToken)("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm, { apiKey: githubCopilotToken }); }); - it.skipIf(!githubCopilotToken)("should handle streaming", async () => { + it.skipIf(!githubCopilotToken)("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm, { apiKey: githubCopilotToken }); }); @@ -712,12 +712,12 @@ describe("Generate E2E Tests", () => { await handleThinking(thinkingModel, { apiKey: githubCopilotToken, reasoningEffort: "high" }); }); - it.skipIf(!githubCopilotToken)("should handle multi-turn with thinking and tools", async () => { + it.skipIf(!githubCopilotToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { const thinkingModel = getModel("github-copilot", "gpt-5-mini"); await multiTurn(thinkingModel, { apiKey: githubCopilotToken, reasoningEffort: "high" }); }); - it.skipIf(!githubCopilotToken)("should handle image input", async () => { + it.skipIf(!githubCopilotToken)("should handle image input", { retry: 3 }, async () => { await handleImage(llm, { apiKey: githubCopilotToken }); }); }); @@ -725,27 +725,27 @@ describe("Generate E2E Tests", () => { describe("Google Gemini CLI Provider (gemini-2.5-flash)", () => { const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); - it.skipIf(!geminiCliToken)("should complete basic text generation", async () => { + it.skipIf(!geminiCliToken)("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm, { apiKey: geminiCliToken }); }); - it.skipIf(!geminiCliToken)("should handle tool calling", async () => { + it.skipIf(!geminiCliToken)("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm, { apiKey: geminiCliToken }); }); - it.skipIf(!geminiCliToken)("should handle streaming", async () => { + it.skipIf(!geminiCliToken)("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm, { apiKey: geminiCliToken }); }); - it.skipIf(!geminiCliToken)("should handle thinking", async () => { + it.skipIf(!geminiCliToken)("should handle thinking", { retry: 3 }, async () => { await handleThinking(llm, { apiKey: geminiCliToken, thinking: { enabled: true, budgetTokens: 1024 } }); }); - it.skipIf(!geminiCliToken)("should handle multi-turn with thinking and tools", async () => { + it.skipIf(!geminiCliToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { apiKey: geminiCliToken, thinking: { enabled: true, budgetTokens: 2048 } }); }); - it.skipIf(!geminiCliToken)("should handle image input", async () => { + it.skipIf(!geminiCliToken)("should handle image input", { retry: 3 }, async () => { await handleImage(llm, { apiKey: geminiCliToken }); }); }); @@ -753,19 +753,19 @@ describe("Generate E2E Tests", () => { describe("Google Antigravity Provider (gemini-3-flash)", () => { const llm = getModel("google-antigravity", "gemini-3-flash"); - it.skipIf(!antigravityToken)("should complete basic text generation", async () => { + it.skipIf(!antigravityToken)("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm, { apiKey: antigravityToken }); }); - it.skipIf(!antigravityToken)("should handle tool calling", async () => { + it.skipIf(!antigravityToken)("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm, { apiKey: antigravityToken }); }); - it.skipIf(!antigravityToken)("should handle streaming", async () => { + it.skipIf(!antigravityToken)("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm, { apiKey: antigravityToken }); }); - it.skipIf(!antigravityToken)("should handle thinking", async () => { + it.skipIf(!antigravityToken)("should handle thinking", { retry: 3 }, async () => { // gemini-3-flash has reasoning: false, use gemini-3-pro-high for thinking const thinkingModel = getModel("google-antigravity", "gemini-3-pro-high"); await handleThinking(thinkingModel, { @@ -774,12 +774,12 @@ describe("Generate E2E Tests", () => { }); }); - it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", async () => { + it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { const thinkingModel = getModel("google-antigravity", "gemini-3-pro-high"); await multiTurn(thinkingModel, { apiKey: antigravityToken, thinking: { enabled: true, budgetTokens: 2048 } }); }); - it.skipIf(!antigravityToken)("should handle image input", async () => { + it.skipIf(!antigravityToken)("should handle image input", { retry: 3 }, async () => { await handleImage(llm, { apiKey: antigravityToken }); }); }); @@ -787,19 +787,19 @@ describe("Generate E2E Tests", () => { describe("Google Antigravity Provider (claude-sonnet-4-5)", () => { const llm = getModel("google-antigravity", "claude-sonnet-4-5"); - it.skipIf(!antigravityToken)("should complete basic text generation", async () => { + it.skipIf(!antigravityToken)("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm, { apiKey: antigravityToken }); }); - it.skipIf(!antigravityToken)("should handle tool calling", async () => { + it.skipIf(!antigravityToken)("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm, { apiKey: antigravityToken }); }); - it.skipIf(!antigravityToken)("should handle streaming", async () => { + it.skipIf(!antigravityToken)("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm, { apiKey: antigravityToken }); }); - it.skipIf(!antigravityToken)("should handle thinking", async () => { + it.skipIf(!antigravityToken)("should handle thinking", { retry: 3 }, async () => { // claude-sonnet-4-5 has reasoning: false, use claude-sonnet-4-5-thinking const thinkingModel = getModel("google-antigravity", "claude-sonnet-4-5-thinking"); await handleThinking(thinkingModel, { @@ -808,12 +808,12 @@ describe("Generate E2E Tests", () => { }); }); - it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", async () => { + it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { const thinkingModel = getModel("google-antigravity", "claude-sonnet-4-5-thinking"); await multiTurn(thinkingModel, { apiKey: antigravityToken, thinking: { enabled: true, budgetTokens: 4096 } }); }); - it.skipIf(!antigravityToken)("should handle image input", async () => { + it.skipIf(!antigravityToken)("should handle image input", { retry: 3 }, async () => { await handleImage(llm, { apiKey: antigravityToken }); }); }); @@ -895,23 +895,23 @@ describe("Generate E2E Tests", () => { } }); - it("should complete basic text generation", async () => { + it("should complete basic text generation", { retry: 3 }, async () => { await basicTextGeneration(llm, { apiKey: "test" }); }); - it("should handle tool calling", async () => { + it("should handle tool calling", { retry: 3 }, async () => { await handleToolCall(llm, { apiKey: "test" }); }); - it("should handle streaming", async () => { + it("should handle streaming", { retry: 3 }, async () => { await handleStreaming(llm, { apiKey: "test" }); }); - it("should handle thinking mode", async () => { + it("should handle thinking mode", { retry: 3 }, async () => { await handleThinking(llm, { apiKey: "test", reasoningEffort: "medium" }); }); - it("should handle multi-turn with thinking and tools", async () => { + it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => { await multiTurn(llm, { apiKey: "test", reasoningEffort: "medium" }); }); }); diff --git a/packages/ai/test/tokens.test.ts b/packages/ai/test/tokens.test.ts index c76c2654..489e09d4 100644 --- a/packages/ai/test/tokens.test.ts +++ b/packages/ai/test/tokens.test.ts @@ -1,8 +1,17 @@ import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; -import { stream } from "../src/stream.js"; +import { resolveApiKey, stream } from "../src/stream.js"; import type { Api, Context, Model, OptionsForApi } from "../src/types.js"; +// Resolve OAuth tokens at module level (async, runs before tests) +const oauthTokens = await Promise.all([ + resolveApiKey("anthropic"), + resolveApiKey("github-copilot"), + resolveApiKey("google-gemini-cli"), + resolveApiKey("google-antigravity"), +]); +const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens; + async function testTokensOnAbort(llm: Model, options: OptionsForApi = {}) { const context: Context = { messages: [ @@ -46,9 +55,9 @@ describe("Token Statistics on Abort", () => { describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => { const llm = getModel("google", "gemini-2.5-flash"); - it("should include token stats when aborted mid-stream", async () => { + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { await testTokensOnAbort(llm, { thinking: { enabled: true } }); - }, 10000); + }); }); describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => { @@ -57,32 +66,140 @@ describe("Token Statistics on Abort", () => { api: "openai-completions", }; - it("should include token stats when aborted mid-stream", async () => { + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { await testTokensOnAbort(llm); - }, 10000); + }); }); describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => { const llm = getModel("openai", "gpt-5-mini"); - it("should include token stats when aborted mid-stream", async () => { + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { await testTokensOnAbort(llm); - }, 20000); + }); }); - describe.skipIf(!process.env.ANTHROPIC_API_KEY && !process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider", () => { - const llm = getModel("anthropic", "claude-opus-4-1-20250805"); + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => { + const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); - it("should include token stats when aborted mid-stream", async () => { - await testTokensOnAbort(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 }); - }, 10000); + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { + await testTokensOnAbort(llm); + }); + }); + + describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => { + const llm = getModel("xai", "grok-3-fast"); + + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { + await testTokensOnAbort(llm); + }); + }); + + describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider", () => { + const llm = getModel("groq", "openai/gpt-oss-20b"); + + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { + await testTokensOnAbort(llm); + }); + }); + + describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider", () => { + const llm = getModel("cerebras", "gpt-oss-120b"); + + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { + await testTokensOnAbort(llm); + }); + }); + + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => { + const llm = getModel("zai", "glm-4.5-flash"); + + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { + await testTokensOnAbort(llm); + }); }); describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider", () => { const llm = getModel("mistral", "devstral-medium-latest"); - it("should include token stats when aborted mid-stream", async () => { + it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => { await testTokensOnAbort(llm); - }, 10000); + }); + }); + + // ========================================================================= + // OAuth-based providers (credentials from ~/.pi/agent/oauth.json) + // ========================================================================= + + describe("Anthropic OAuth Provider", () => { + const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); + + it.skipIf(!anthropicOAuthToken)( + "should include token stats when aborted mid-stream", + { retry: 3, timeout: 30000 }, + async () => { + await testTokensOnAbort(llm, { apiKey: anthropicOAuthToken }); + }, + ); + }); + + describe("GitHub Copilot Provider", () => { + it.skipIf(!githubCopilotToken)( + "gpt-4o - should include token stats when aborted mid-stream", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await testTokensOnAbort(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should include token stats when aborted mid-stream", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await testTokensOnAbort(llm, { apiKey: githubCopilotToken }); + }, + ); + }); + + describe("Google Gemini CLI Provider", () => { + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should include token stats when aborted mid-stream", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await testTokensOnAbort(llm, { apiKey: geminiCliToken }); + }, + ); + }); + + describe("Google Antigravity Provider", () => { + it.skipIf(!antigravityToken)( + "gemini-3-flash - should include token stats when aborted mid-stream", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await testTokensOnAbort(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should include token stats when aborted mid-stream", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await testTokensOnAbort(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should include token stats when aborted mid-stream", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gpt-oss-120b-medium"); + await testTokensOnAbort(llm, { apiKey: antigravityToken }); + }, + ); }); }); diff --git a/packages/ai/test/tool-call-without-result.test.ts b/packages/ai/test/tool-call-without-result.test.ts index 24e2aa98..cde53b94 100644 --- a/packages/ai/test/tool-call-without-result.test.ts +++ b/packages/ai/test/tool-call-without-result.test.ts @@ -1,8 +1,17 @@ import { type Static, Type } from "@sinclair/typebox"; import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; -import { complete } from "../src/stream.js"; -import type { Context, Tool } from "../src/types.js"; +import { complete, resolveApiKey } from "../src/stream.js"; +import type { Api, Context, Model, OptionsForApi, Tool } from "../src/types.js"; + +// Resolve OAuth tokens at module level (async, runs before tests) +const oauthTokens = await Promise.all([ + resolveApiKey("anthropic"), + resolveApiKey("github-copilot"), + resolveApiKey("google-gemini-cli"), + resolveApiKey("google-antigravity"), +]); +const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens; // Simple calculate tool const calculateSchema = Type.Object({ @@ -17,7 +26,10 @@ const calculateTool: Tool = { parameters: calculateSchema, }; -async function testToolCallWithoutResult(model: any, options: any = {}) { +async function testToolCallWithoutResult( + model: Model, + options: OptionsForApi = {} as OptionsForApi, +) { // Step 1: Create context with the calculate tool const context: Context = { systemPrompt: "You are a helpful assistant. Use the calculate tool when asked to perform calculations.", @@ -70,7 +82,8 @@ async function testToolCallWithoutResult(model: any, options: any = {}) { .filter((block) => block.type === "text") .map((block) => (block.type === "text" ? block.text : "")) .join(" "); - expect(textContent.length).toBeGreaterThan(0); + const toolCalls = secondResponse.content.filter((block) => block.type === "toolCall").length; + expect(toolCalls || textContent.length).toBeGreaterThan(0); console.log("Answer:", textContent); // Verify the stop reason is either "stop" or "toolUse" (new tool call) @@ -78,19 +91,158 @@ async function testToolCallWithoutResult(model: any, options: any = {}) { } describe("Tool Call Without Result Tests", () => { - describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider - Missing Tool Result", () => { + // ========================================================================= + // API Key-based providers + // ========================================================================= + + describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => { + const model = getModel("google", "gemini-2.5-flash"); + + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { + await testToolCallWithoutResult(model); + }); + }); + + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => { + const model: Model<"openai-completions"> = { + ...getModel("openai", "gpt-4o-mini")!, + api: "openai-completions", + }; + + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { + await testToolCallWithoutResult(model); + }); + }); + + describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => { + const model = getModel("openai", "gpt-5-mini"); + + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { + await testToolCallWithoutResult(model); + }); + }); + + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => { const model = getModel("anthropic", "claude-3-5-haiku-20241022"); - it("should filter out tool calls without corresponding tool results", async () => { + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { await testToolCallWithoutResult(model); - }, 30000); + }); }); - describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider - Missing Tool Result", () => { + describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => { + const model = getModel("xai", "grok-3-fast"); + + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { + await testToolCallWithoutResult(model); + }); + }); + + describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider", () => { + const model = getModel("groq", "openai/gpt-oss-20b"); + + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { + await testToolCallWithoutResult(model); + }); + }); + + describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider", () => { + const model = getModel("cerebras", "gpt-oss-120b"); + + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { + await testToolCallWithoutResult(model); + }); + }); + + describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => { + const model = getModel("zai", "glm-4.5-flash"); + + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { + await testToolCallWithoutResult(model); + }); + }); + + describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider", () => { const model = getModel("mistral", "devstral-medium-latest"); - it("should filter out tool calls without corresponding tool results", async () => { + it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => { await testToolCallWithoutResult(model); - }, 30000); + }); + }); + + // ========================================================================= + // OAuth-based providers (credentials from ~/.pi/agent/oauth.json) + // ========================================================================= + + describe("Anthropic OAuth Provider", () => { + const model = getModel("anthropic", "claude-3-5-haiku-20241022"); + + it.skipIf(!anthropicOAuthToken)( + "should filter out tool calls without corresponding tool results", + { retry: 3, timeout: 30000 }, + async () => { + await testToolCallWithoutResult(model, { apiKey: anthropicOAuthToken }); + }, + ); + }); + + describe("GitHub Copilot Provider", () => { + it.skipIf(!githubCopilotToken)( + "gpt-4o - should filter out tool calls without corresponding tool results", + { retry: 3, timeout: 30000 }, + async () => { + const model = getModel("github-copilot", "gpt-4o"); + await testToolCallWithoutResult(model, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should filter out tool calls without corresponding tool results", + { retry: 3, timeout: 30000 }, + async () => { + const model = getModel("github-copilot", "claude-sonnet-4"); + await testToolCallWithoutResult(model, { apiKey: githubCopilotToken }); + }, + ); + }); + + describe("Google Gemini CLI Provider", () => { + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should filter out tool calls without corresponding tool results", + { retry: 3, timeout: 30000 }, + async () => { + const model = getModel("google-gemini-cli", "gemini-2.5-flash"); + await testToolCallWithoutResult(model, { apiKey: geminiCliToken }); + }, + ); + }); + + describe("Google Antigravity Provider", () => { + it.skipIf(!antigravityToken)( + "gemini-3-flash - should filter out tool calls without corresponding tool results", + { retry: 3, timeout: 30000 }, + async () => { + const model = getModel("google-antigravity", "gemini-3-flash"); + await testToolCallWithoutResult(model, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should filter out tool calls without corresponding tool results", + { retry: 3, timeout: 30000 }, + async () => { + const model = getModel("google-antigravity", "claude-sonnet-4-5"); + await testToolCallWithoutResult(model, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should filter out tool calls without corresponding tool results", + { retry: 3, timeout: 30000 }, + async () => { + const model = getModel("google-antigravity", "gpt-oss-120b-medium"); + await testToolCallWithoutResult(model, { apiKey: antigravityToken }); + }, + ); }); }); diff --git a/packages/ai/test/total-tokens.test.ts b/packages/ai/test/total-tokens.test.ts index caeb136a..2fbb47f5 100644 --- a/packages/ai/test/total-tokens.test.ts +++ b/packages/ai/test/total-tokens.test.ts @@ -14,9 +14,18 @@ import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; -import { complete } from "../src/stream.js"; +import { complete, resolveApiKey } from "../src/stream.js"; import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js"; +// Resolve OAuth tokens at module level (async, runs before tests) +const oauthTokens = await Promise.all([ + resolveApiKey("anthropic"), + resolveApiKey("github-copilot"), + resolveApiKey("google-gemini-cli"), + resolveApiKey("google-antigravity"), +]); +const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens; + // Generate a long system prompt to trigger caching (>2k bytes for most providers) const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses. @@ -89,41 +98,49 @@ describe("totalTokens field", () => { // ========================================================================= describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => { - it("claude-3-5-haiku - should return totalTokens equal to sum of components", async () => { - const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); + it( + "claude-3-5-haiku - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); - console.log(`\nAnthropic / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY }); + console.log(`\nAnthropic / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); - // Anthropic should have cache activity - const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0; - expect(hasCache).toBe(true); - }, 60000); + // Anthropic should have cache activity + const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0; + expect(hasCache).toBe(true); + }, + ); }); - describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => { - it("claude-sonnet-4 - should return totalTokens equal to sum of components", async () => { - const llm = getModel("anthropic", "claude-sonnet-4-20250514"); + describe("Anthropic (OAuth)", () => { + it.skipIf(!anthropicOAuthToken)( + "claude-sonnet-4 - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("anthropic", "claude-sonnet-4-20250514"); - console.log(`\nAnthropic OAuth / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_OAUTH_TOKEN }); + console.log(`\nAnthropic OAuth / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: anthropicOAuthToken }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); - // Anthropic should have cache activity - const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0; - expect(hasCache).toBe(true); - }, 60000); + // Anthropic should have cache activity + const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0; + expect(hasCache).toBe(true); + }, + ); }); // ========================================================================= @@ -131,25 +148,29 @@ describe("totalTokens field", () => { // ========================================================================= describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => { - it("gpt-4o-mini - should return totalTokens equal to sum of components", async () => { - const llm: Model<"openai-completions"> = { - ...getModel("openai", "gpt-4o-mini")!, - api: "openai-completions", - }; + it( + "gpt-4o-mini - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm: Model<"openai-completions"> = { + ...getModel("openai", "gpt-4o-mini")!, + api: "openai-completions", + }; - console.log(`\nOpenAI Completions / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm); + console.log(`\nOpenAI Completions / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); }); describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => { - it("gpt-4o - should return totalTokens equal to sum of components", async () => { + it("gpt-4o - should return totalTokens equal to sum of components", { retry: 3, timeout: 60000 }, async () => { const llm = getModel("openai", "gpt-4o"); console.log(`\nOpenAI Responses / ${llm.id}:`); @@ -160,7 +181,7 @@ describe("totalTokens field", () => { assertTotalTokensEqualsComponents(first); assertTotalTokensEqualsComponents(second); - }, 60000); + }); }); // ========================================================================= @@ -168,18 +189,22 @@ describe("totalTokens field", () => { // ========================================================================= describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => { - it("gemini-2.0-flash - should return totalTokens equal to sum of components", async () => { - const llm = getModel("google", "gemini-2.0-flash"); + it( + "gemini-2.0-flash - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("google", "gemini-2.0-flash"); - console.log(`\nGoogle / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm); + console.log(`\nGoogle / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); }); // ========================================================================= @@ -187,18 +212,22 @@ describe("totalTokens field", () => { // ========================================================================= describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => { - it("grok-3-fast - should return totalTokens equal to sum of components", async () => { - const llm = getModel("xai", "grok-3-fast"); + it( + "grok-3-fast - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("xai", "grok-3-fast"); - console.log(`\nxAI / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY }); + console.log(`\nxAI / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); }); // ========================================================================= @@ -206,18 +235,22 @@ describe("totalTokens field", () => { // ========================================================================= describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => { - it("openai/gpt-oss-120b - should return totalTokens equal to sum of components", async () => { - const llm = getModel("groq", "openai/gpt-oss-120b"); + it( + "openai/gpt-oss-120b - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("groq", "openai/gpt-oss-120b"); - console.log(`\nGroq / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY }); + console.log(`\nGroq / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); }); // ========================================================================= @@ -225,18 +258,22 @@ describe("totalTokens field", () => { // ========================================================================= describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => { - it("gpt-oss-120b - should return totalTokens equal to sum of components", async () => { - const llm = getModel("cerebras", "gpt-oss-120b"); + it( + "gpt-oss-120b - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("cerebras", "gpt-oss-120b"); - console.log(`\nCerebras / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY }); + console.log(`\nCerebras / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); }); // ========================================================================= @@ -244,18 +281,22 @@ describe("totalTokens field", () => { // ========================================================================= describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => { - it("glm-4.5-flash - should return totalTokens equal to sum of components", async () => { - const llm = getModel("zai", "glm-4.5-flash"); + it( + "glm-4.5-flash - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("zai", "glm-4.5-flash"); - console.log(`\nz.ai / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY }); + console.log(`\nz.ai / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); }); // ========================================================================= @@ -263,18 +304,22 @@ describe("totalTokens field", () => { // ========================================================================= describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral", () => { - it("devstral-medium-latest - should return totalTokens equal to sum of components", async () => { - const llm = getModel("mistral", "devstral-medium-latest"); + it( + "devstral-medium-latest - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("mistral", "devstral-medium-latest"); - console.log(`\nMistral / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MISTRAL_API_KEY }); + console.log(`\nMistral / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MISTRAL_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); }); // ========================================================================= @@ -282,69 +327,209 @@ describe("totalTokens field", () => { // ========================================================================= describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => { - it("anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", async () => { - const llm = getModel("openrouter", "anthropic/claude-sonnet-4"); + it( + "anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("openrouter", "anthropic/claude-sonnet-4"); - console.log(`\nOpenRouter / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); - it("deepseek/deepseek-chat - should return totalTokens equal to sum of components", async () => { - const llm = getModel("openrouter", "deepseek/deepseek-chat"); + it( + "deepseek/deepseek-chat - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("openrouter", "deepseek/deepseek-chat"); - console.log(`\nOpenRouter / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); - it("mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", async () => { - const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct"); + it( + "mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct"); - console.log(`\nOpenRouter / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); - it("google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", async () => { - const llm = getModel("openrouter", "google/gemini-2.0-flash-001"); + it( + "google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("openrouter", "google/gemini-2.0-flash-001"); - console.log(`\nOpenRouter / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); - it("meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", async () => { - const llm = getModel("openrouter", "meta-llama/llama-4-maverick"); + it( + "meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("openrouter", "meta-llama/llama-4-maverick"); - console.log(`\nOpenRouter / ${llm.id}:`); - const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); + console.log(`\nOpenRouter / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY }); - logUsage("First request", first); - logUsage("Second request", second); + logUsage("First request", first); + logUsage("Second request", second); - assertTotalTokensEqualsComponents(first); - assertTotalTokensEqualsComponents(second); - }, 60000); + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); + }); + + // ========================================================================= + // GitHub Copilot (OAuth) + // ========================================================================= + + describe("GitHub Copilot (OAuth)", () => { + it.skipIf(!githubCopilotToken)( + "gpt-4o - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + + console.log(`\nGitHub Copilot / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: githubCopilotToken }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + + console.log(`\nGitHub Copilot / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: githubCopilotToken }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); + }); + + // ========================================================================= + // Google Gemini CLI (OAuth) + // ========================================================================= + + describe("Google Gemini CLI (OAuth)", () => { + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + + console.log(`\nGoogle Gemini CLI / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: geminiCliToken }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); + }); + + // ========================================================================= + // Google Antigravity (OAuth) + // ========================================================================= + + describe("Google Antigravity (OAuth)", () => { + it.skipIf(!antigravityToken)( + "gemini-3-flash - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + + console.log(`\nGoogle Antigravity / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + + console.log(`\nGoogle Antigravity / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should return totalTokens equal to sum of components", + { retry: 3, timeout: 60000 }, + async () => { + const llm = getModel("google-antigravity", "gpt-oss-120b-medium"); + + console.log(`\nGoogle Antigravity / ${llm.id}:`); + const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken }); + + logUsage("First request", first); + logUsage("Second request", second); + + assertTotalTokensEqualsComponents(first); + assertTotalTokensEqualsComponents(second); + }, + ); }); }); diff --git a/packages/ai/test/unicode-surrogate.test.ts b/packages/ai/test/unicode-surrogate.test.ts index 19159bab..f45df41f 100644 --- a/packages/ai/test/unicode-surrogate.test.ts +++ b/packages/ai/test/unicode-surrogate.test.ts @@ -1,8 +1,21 @@ +import { Type } from "@sinclair/typebox"; import { describe, expect, it } from "vitest"; import { getModel } from "../src/models.js"; -import { complete } from "../src/stream.js"; +import { complete, resolveApiKey } from "../src/stream.js"; import type { Api, Context, Model, OptionsForApi, ToolResultMessage } from "../src/types.js"; +// Empty schema for test tools - must be proper OBJECT type for Cloud Code Assist +const emptySchema = Type.Object({}); + +// Resolve OAuth tokens at module level (async, runs before tests) +const oauthTokens = await Promise.all([ + resolveApiKey("anthropic"), + resolveApiKey("github-copilot"), + resolveApiKey("google-gemini-cli"), + resolveApiKey("google-antigravity"), +]); +const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens; + /** * Test for Unicode surrogate pair handling in tool results. * @@ -53,7 +66,7 @@ async function testEmojiInToolResults(llm: Model, option { name: "test_tool", description: "A test tool", - parameters: {} as any, + parameters: emptySchema, }, ], }; @@ -138,7 +151,7 @@ async function testRealWorldLinkedInData(llm: Model, opt { name: "linkedin_skill", description: "Get LinkedIn comments", - parameters: {} as any, + parameters: emptySchema, }, ], }; @@ -226,7 +239,7 @@ async function testUnpairedHighSurrogate(llm: Model, opt { name: "test_tool", description: "A test tool", - parameters: {} as any, + parameters: emptySchema, }, ], }; @@ -265,15 +278,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => { describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Unicode Handling", () => { const llm = getModel("google", "gemini-2.5-flash"); - it("should handle emoji in tool results", async () => { + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { await testEmojiInToolResults(llm); }); - it("should handle real-world LinkedIn comment data with emoji", async () => { + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { await testRealWorldLinkedInData(llm); }); - it("should handle unpaired high surrogate (0xD83D) in tool results", async () => { + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { await testUnpairedHighSurrogate(llm); }); }); @@ -281,15 +294,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => { describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Unicode Handling", () => { const llm = getModel("openai", "gpt-4o-mini"); - it("should handle emoji in tool results", async () => { + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { await testEmojiInToolResults(llm); }); - it("should handle real-world LinkedIn comment data with emoji", async () => { + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { await testRealWorldLinkedInData(llm); }); - it("should handle unpaired high surrogate (0xD83D) in tool results", async () => { + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { await testUnpairedHighSurrogate(llm); }); }); @@ -297,47 +310,243 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => { describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Unicode Handling", () => { const llm = getModel("openai", "gpt-5-mini"); - it("should handle emoji in tool results", async () => { + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { await testEmojiInToolResults(llm); }); - it("should handle real-world LinkedIn comment data with emoji", async () => { + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { await testRealWorldLinkedInData(llm); }); - it("should handle unpaired high surrogate (0xD83D) in tool results", async () => { + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { await testUnpairedHighSurrogate(llm); }); }); - describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Unicode Handling", () => { + describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Unicode Handling", () => { const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); - it("should handle emoji in tool results", async () => { + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { await testEmojiInToolResults(llm); }); - it("should handle real-world LinkedIn comment data with emoji", async () => { + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { await testRealWorldLinkedInData(llm); }); - it("should handle unpaired high surrogate (0xD83D) in tool results", async () => { + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { await testUnpairedHighSurrogate(llm); }); }); + // ========================================================================= + // OAuth-based providers (credentials from ~/.pi/agent/oauth.json) + // ========================================================================= + + describe("Anthropic OAuth Provider Unicode Handling", () => { + const llm = getModel("anthropic", "claude-3-5-haiku-20241022"); + + it.skipIf(!anthropicOAuthToken)("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { + await testEmojiInToolResults(llm, { apiKey: anthropicOAuthToken }); + }); + + it.skipIf(!anthropicOAuthToken)( + "should handle real-world LinkedIn comment data with emoji", + { retry: 3, timeout: 30000 }, + async () => { + await testRealWorldLinkedInData(llm, { apiKey: anthropicOAuthToken }); + }, + ); + + it.skipIf(!anthropicOAuthToken)( + "should handle unpaired high surrogate (0xD83D) in tool results", + { retry: 3, timeout: 30000 }, + async () => { + await testUnpairedHighSurrogate(llm, { apiKey: anthropicOAuthToken }); + }, + ); + }); + + describe("GitHub Copilot Provider Unicode Handling", () => { + it.skipIf(!githubCopilotToken)( + "gpt-4o - should handle emoji in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await testEmojiInToolResults(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "gpt-4o - should handle real-world LinkedIn comment data with emoji", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await testRealWorldLinkedInData(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "gpt-4o - should handle unpaired high surrogate (0xD83D) in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "gpt-4o"); + await testUnpairedHighSurrogate(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should handle emoji in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await testEmojiInToolResults(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should handle real-world LinkedIn comment data with emoji", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await testRealWorldLinkedInData(llm, { apiKey: githubCopilotToken }); + }, + ); + + it.skipIf(!githubCopilotToken)( + "claude-sonnet-4 - should handle unpaired high surrogate (0xD83D) in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("github-copilot", "claude-sonnet-4"); + await testUnpairedHighSurrogate(llm, { apiKey: githubCopilotToken }); + }, + ); + }); + + describe("Google Gemini CLI Provider Unicode Handling", () => { + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should handle emoji in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await testEmojiInToolResults(llm, { apiKey: geminiCliToken }); + }, + ); + + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should handle real-world LinkedIn comment data with emoji", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await testRealWorldLinkedInData(llm, { apiKey: geminiCliToken }); + }, + ); + + it.skipIf(!geminiCliToken)( + "gemini-2.5-flash - should handle unpaired high surrogate (0xD83D) in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-gemini-cli", "gemini-2.5-flash"); + await testUnpairedHighSurrogate(llm, { apiKey: geminiCliToken }); + }, + ); + }); + + describe("Google Antigravity Provider Unicode Handling", () => { + it.skipIf(!antigravityToken)( + "gemini-3-flash - should handle emoji in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await testEmojiInToolResults(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gemini-3-flash - should handle real-world LinkedIn comment data with emoji", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await testRealWorldLinkedInData(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gemini-3-flash - should handle unpaired high surrogate (0xD83D) in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gemini-3-flash"); + await testUnpairedHighSurrogate(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should handle emoji in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await testEmojiInToolResults(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should handle real-world LinkedIn comment data with emoji", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await testRealWorldLinkedInData(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "claude-sonnet-4-5 - should handle unpaired high surrogate (0xD83D) in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "claude-sonnet-4-5"); + await testUnpairedHighSurrogate(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should handle emoji in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gpt-oss-120b-medium"); + await testEmojiInToolResults(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should handle real-world LinkedIn comment data with emoji", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gpt-oss-120b-medium"); + await testRealWorldLinkedInData(llm, { apiKey: antigravityToken }); + }, + ); + + it.skipIf(!antigravityToken)( + "gpt-oss-120b-medium - should handle unpaired high surrogate (0xD83D) in tool results", + { retry: 3, timeout: 30000 }, + async () => { + const llm = getModel("google-antigravity", "gpt-oss-120b-medium"); + await testUnpairedHighSurrogate(llm, { apiKey: antigravityToken }); + }, + ); + }); + describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Unicode Handling", () => { const llm = getModel("xai", "grok-3"); - it("should handle emoji in tool results", async () => { + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { await testEmojiInToolResults(llm); }); - it("should handle real-world LinkedIn comment data with emoji", async () => { + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { await testRealWorldLinkedInData(llm); }); - it("should handle unpaired high surrogate (0xD83D) in tool results", async () => { + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { await testUnpairedHighSurrogate(llm); }); }); @@ -345,15 +554,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => { describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Unicode Handling", () => { const llm = getModel("groq", "openai/gpt-oss-20b"); - it("should handle emoji in tool results", async () => { + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { await testEmojiInToolResults(llm); }); - it("should handle real-world LinkedIn comment data with emoji", async () => { + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { await testRealWorldLinkedInData(llm); }); - it("should handle unpaired high surrogate (0xD83D) in tool results", async () => { + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { await testUnpairedHighSurrogate(llm); }); }); @@ -361,15 +570,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => { describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Unicode Handling", () => { const llm = getModel("cerebras", "gpt-oss-120b"); - it("should handle emoji in tool results", async () => { + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { await testEmojiInToolResults(llm); }); - it("should handle real-world LinkedIn comment data with emoji", async () => { + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { await testRealWorldLinkedInData(llm); }); - it("should handle unpaired high surrogate (0xD83D) in tool results", async () => { + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { await testUnpairedHighSurrogate(llm); }); }); @@ -377,15 +586,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => { describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Unicode Handling", () => { const llm = getModel("zai", "glm-4.5-air"); - it("should handle emoji in tool results", async () => { + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { await testEmojiInToolResults(llm); }); - it("should handle real-world LinkedIn comment data with emoji", async () => { + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { await testRealWorldLinkedInData(llm); }); - it("should handle unpaired high surrogate (0xD83D) in tool results", async () => { + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { await testUnpairedHighSurrogate(llm); }); }); @@ -393,15 +602,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => { describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider Unicode Handling", () => { const llm = getModel("mistral", "devstral-medium-latest"); - it("should handle emoji in tool results", async () => { + it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => { await testEmojiInToolResults(llm); }); - it("should handle real-world LinkedIn comment data with emoji", async () => { + it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => { await testRealWorldLinkedInData(llm); }); - it("should handle unpaired high surrogate (0xD83D) in tool results", async () => { + it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => { await testUnpairedHighSurrogate(llm); }); });