From 95fcda588721f1af00a32729b0b1eeb65e3d8fc1 Mon Sep 17 00:00:00 2001
From: Mario Zechner <badlogicgames@gmail.com>
Date: Sat, 20 Dec 2025 19:38:38 +0100
Subject: [PATCH] Broader testing, more providers.

---
 packages/ai/test/empty.test.ts                | 339 +++++++++++--
 packages/ai/test/image-tool-result.test.ts    | 169 ++++++-
 packages/ai/test/stream.test.ts               | 194 ++++----
 packages/ai/test/tokens.test.ts               | 145 +++++-
 .../ai/test/tool-call-without-result.test.ts  | 172 ++++++-
 packages/ai/test/total-tokens.test.ts         | 463 ++++++++++++------
 packages/ai/test/unicode-surrogate.test.ts    | 273 +++++++++--
 7 files changed, 1400 insertions(+), 355 deletions(-)
diff --git a/packages/ai/test/empty.test.ts b/packages/ai/test/empty.test.ts
index 0a8982d3..b204e06a 100644
--- a/packages/ai/test/empty.test.ts
+++ b/packages/ai/test/empty.test.ts
@@ -1,8 +1,17 @@
 import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
-import { complete } from "../src/stream.js";
+import { complete, resolveApiKey } from "../src/stream.js";
 import type { Api, AssistantMessage, Context, Model, OptionsForApi, UserMessage } from "../src/types.js";
 
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+	resolveApiKey("anthropic"),
+	resolveApiKey("github-copilot"),
+	resolveApiKey("google-gemini-cli"),
+	resolveApiKey("google-antigravity"),
+]);
+const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
+
 async function testEmptyMessage<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
 	// Test with completely empty content array
 	const emptyMessage: UserMessage = {
@@ -133,19 +142,19 @@ describe("AI Providers Empty Message Tests", () => {
 	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Empty Messages", () => {
 		const llm = getModel("google", "gemini-2.5-flash");
 
-		it("should handle empty content array", async () => {
+		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 
-		it("should handle empty string content", async () => {
+		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 
-		it("should handle whitespace-only content", async () => {
+		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 
-		it("should handle empty assistant message in conversation", async () => {
+		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
@@ -153,19 +162,19 @@ describe("AI Providers Empty Message Tests", () => {
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Empty Messages", () => {
 		const llm = getModel("openai", "gpt-4o-mini");
 
-		it("should handle empty content array", async () => {
+		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 
-		it("should handle empty string content", async () => {
+		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 
-		it("should handle whitespace-only content", async () => {
+		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 
-		it("should handle empty assistant message in conversation", async () => {
+		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
@@ -173,39 +182,39 @@ describe("AI Providers Empty Message Tests", () => {
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Empty Messages", () => {
 		const llm = getModel("openai", "gpt-5-mini");
 
-		it("should handle empty content array", async () => {
+		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 
-		it("should handle empty string content", async () => {
+		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 
-		it("should handle whitespace-only content", async () => {
+		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 
-		it("should handle empty assistant message in conversation", async () => {
+		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
 
-	describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Empty Messages", () => {
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Empty Messages", () => {
 		const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
 
-		it("should handle empty content array", async () => {
+		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 
-		it("should handle empty string content", async () => {
+		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 
-		it("should handle whitespace-only content", async () => {
+		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 
-		it("should handle empty assistant message in conversation", async () => {
+		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
@@ -213,19 +222,19 @@ describe("AI Providers Empty Message Tests", () => {
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Empty Messages", () => {
 		const llm = getModel("xai", "grok-3");
 
-		it("should handle empty content array", async () => {
+		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 
-		it("should handle empty string content", async () => {
+		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 
-		it("should handle whitespace-only content", async () => {
+		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 
-		it("should handle empty assistant message in conversation", async () => {
+		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
@@ -233,19 +242,19 @@ describe("AI Providers Empty Message Tests", () => {
 	describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Empty Messages", () => {
 		const llm = getModel("groq", "openai/gpt-oss-20b");
 
-		it("should handle empty content array", async () => {
+		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 
-		it("should handle empty string content", async () => {
+		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 
-		it("should handle whitespace-only content", async () => {
+		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 
-		it("should handle empty assistant message in conversation", async () => {
+		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
@@ -253,19 +262,19 @@ describe("AI Providers Empty Message Tests", () => {
 	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Empty Messages", () => {
 		const llm = getModel("cerebras", "gpt-oss-120b");
 
-		it("should handle empty content array", async () => {
+		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 
-		it("should handle empty string content", async () => {
+		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 
-		it("should handle whitespace-only content", async () => {
+		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 
-		it("should handle empty assistant message in conversation", async () => {
+		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
@@ -273,19 +282,19 @@ describe("AI Providers Empty Message Tests", () => {
 	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Empty Messages", () => {
 		const llm = getModel("zai", "glm-4.5-air");
 
-		it("should handle empty content array", async () => {
+		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 
-		it("should handle empty string content", async () => {
+		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 
-		it("should handle whitespace-only content", async () => {
+		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 
-		it("should handle empty assistant message in conversation", async () => {
+		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
@@ -293,20 +302,274 @@ describe("AI Providers Empty Message Tests", () => {
 	describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider Empty Messages", () => {
 		const llm = getModel("mistral", "devstral-medium-latest");
 
-		it("should handle empty content array", async () => {
+		it("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyMessage(llm);
 		});
 
-		it("should handle empty string content", async () => {
+		it("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyStringMessage(llm);
 		});
 
-		it("should handle whitespace-only content", async () => {
+		it("should handle whitespace-only content", { retry: 3, timeout: 30000 }, async () => {
 			await testWhitespaceOnlyMessage(llm);
 		});
 
-		it("should handle empty assistant message in conversation", async () => {
+		it("should handle empty assistant message in conversation", { retry: 3, timeout: 30000 }, async () => {
 			await testEmptyAssistantMessage(llm);
 		});
 	});
+
+	// =========================================================================
+	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
+	// =========================================================================
+
+	describe("Anthropic OAuth Provider Empty Messages", () => {
+		const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+		it.skipIf(!anthropicOAuthToken)("should handle empty content array", { retry: 3, timeout: 30000 }, async () => {
+			await testEmptyMessage(llm, { apiKey: anthropicOAuthToken });
+		});
+
+		it.skipIf(!anthropicOAuthToken)("should handle empty string content", { retry: 3, timeout: 30000 }, async () => {
+			await testEmptyStringMessage(llm, { apiKey: anthropicOAuthToken });
+		});
+
+		it.skipIf(!anthropicOAuthToken)(
+			"should handle whitespace-only content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				await testWhitespaceOnlyMessage(llm, { apiKey: anthropicOAuthToken });
+			},
+		);
+
+		it.skipIf(!anthropicOAuthToken)(
+			"should handle empty assistant message in conversation",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				await testEmptyAssistantMessage(llm, { apiKey: anthropicOAuthToken });
+			},
+		);
+	});
+
+	describe("GitHub Copilot Provider Empty Messages", () => {
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should handle empty content array",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await testEmptyMessage(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should handle empty string content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await testEmptyStringMessage(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should handle whitespace-only content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await testWhitespaceOnlyMessage(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should handle empty assistant message in conversation",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await testEmptyAssistantMessage(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should handle empty content array",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await testEmptyMessage(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should handle empty string content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await testEmptyStringMessage(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should handle whitespace-only content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await testWhitespaceOnlyMessage(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should handle empty assistant message in conversation",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await testEmptyAssistantMessage(llm, { apiKey: githubCopilotToken });
+			},
+		);
+	});
+
+	describe("Google Gemini CLI Provider Empty Messages", () => {
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should handle empty content array",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await testEmptyMessage(llm, { apiKey: geminiCliToken });
+			},
+		);
+
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should handle empty string content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await testEmptyStringMessage(llm, { apiKey: geminiCliToken });
+			},
+		);
+
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should handle whitespace-only content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await testWhitespaceOnlyMessage(llm, { apiKey: geminiCliToken });
+			},
+		);
+
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should handle empty assistant message in conversation",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await testEmptyAssistantMessage(llm, { apiKey: geminiCliToken });
+			},
+		);
+	});
+
+	describe("Google Antigravity Provider Empty Messages", () => {
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should handle empty content array",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await testEmptyMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should handle empty string content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await testEmptyStringMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should handle whitespace-only content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await testWhitespaceOnlyMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should handle empty assistant message in conversation",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await testEmptyAssistantMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle empty content array",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await testEmptyMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle empty string content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await testEmptyStringMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle whitespace-only content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await testWhitespaceOnlyMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle empty assistant message in conversation",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await testEmptyAssistantMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should handle empty content array",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+				await testEmptyMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should handle empty string content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+				await testEmptyStringMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should handle whitespace-only content",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+				await testWhitespaceOnlyMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should handle empty assistant message in conversation",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+				await testEmptyAssistantMessage(llm, { apiKey: antigravityToken });
+			},
+		);
+	});
 });
diff --git a/packages/ai/test/image-tool-result.test.ts b/packages/ai/test/image-tool-result.test.ts
index cadfd856..9f4ef518 100644
--- a/packages/ai/test/image-tool-result.test.ts
+++ b/packages/ai/test/image-tool-result.test.ts
@@ -3,9 +3,18 @@ import { join } from "node:path";
 import { Type } from "@sinclair/typebox";
 import { describe, expect, it } from "vitest";
 import type { Api, Context, Model, Tool, ToolResultMessage } from "../src/index.js";
-import { complete, getModel } from "../src/index.js";
+import { complete, getModel, resolveApiKey } from "../src/index.js";
 import type { OptionsForApi } from "../src/types.js";
 
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+	resolveApiKey("anthropic"),
+	resolveApiKey("github-copilot"),
+	resolveApiKey("google-gemini-cli"),
+	resolveApiKey("google-antigravity"),
+]);
+const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
+
 /**
  * Test that tool results containing only images work correctly across all providers.
  * This verifies that:
@@ -193,11 +202,11 @@ describe("Tool Results with Images", () => {
 	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider (gemini-2.5-flash)", () => {
 		const llm = getModel("google", "gemini-2.5-flash");
 
-		it("should handle tool result with only image", async () => {
+		it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithImageResult(llm);
 		});
 
-		it("should handle tool result with text and image", async () => {
+		it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithTextAndImageResult(llm);
 		});
 	});
@@ -205,11 +214,11 @@ describe("Tool Results with Images", () => {
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => {
 		const llm: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" };
 
-		it("should handle tool result with only image", async () => {
+		it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithImageResult(llm);
 		});
 
-		it("should handle tool result with text and image", async () => {
+		it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithTextAndImageResult(llm);
 		});
 	});
@@ -217,11 +226,11 @@ describe("Tool Results with Images", () => {
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
 		const llm = getModel("openai", "gpt-5-mini");
 
-		it("should handle tool result with only image", async () => {
+		it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithImageResult(llm);
 		});
 
-		it("should handle tool result with text and image", async () => {
+		it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithTextAndImageResult(llm);
 		});
 	});
@@ -229,23 +238,11 @@ describe("Tool Results with Images", () => {
 	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-haiku-4-5)", () => {
 		const model = getModel("anthropic", "claude-haiku-4-5");
 
-		it("should handle tool result with only image", async () => {
+		it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithImageResult(model);
 		});
 
-		it("should handle tool result with text and image", async () => {
-			await handleToolWithTextAndImageResult(model);
-		});
-	});
-
-	describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-5)", () => {
-		const model = getModel("anthropic", "claude-sonnet-4-5");
-
-		it("should handle tool result with only image", async () => {
-			await handleToolWithImageResult(model);
-		});
-
-		it("should handle tool result with text and image", async () => {
+		it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithTextAndImageResult(model);
 		});
 	});
@@ -253,11 +250,11 @@ describe("Tool Results with Images", () => {
 	describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v)", () => {
 		const llm = getModel("openrouter", "z-ai/glm-4.5v");
 
-		it("should handle tool result with only image", async () => {
+		it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithImageResult(llm);
 		});
 
-		it("should handle tool result with text and image", async () => {
+		it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithTextAndImageResult(llm);
 		});
 	});
@@ -265,12 +262,134 @@ describe("Tool Results with Images", () => {
 	describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider (pixtral-12b)", () => {
 		const llm = getModel("mistral", "pixtral-12b");
 
-		it("should handle tool result with only image", async () => {
+		it("should handle tool result with only image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithImageResult(llm);
 		});
 
-		it("should handle tool result with text and image", async () => {
+		it("should handle tool result with text and image", { retry: 3, timeout: 30000 }, async () => {
 			await handleToolWithTextAndImageResult(llm);
 		});
 	});
+
+	// =========================================================================
+	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
+	// =========================================================================
+
+	describe("Anthropic OAuth Provider (claude-sonnet-4-5)", () => {
+		const model = getModel("anthropic", "claude-sonnet-4-5");
+
+		it.skipIf(!anthropicOAuthToken)(
+			"should handle tool result with only image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				await handleToolWithImageResult(model, { apiKey: anthropicOAuthToken });
+			},
+		);
+
+		it.skipIf(!anthropicOAuthToken)(
+			"should handle tool result with text and image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				await handleToolWithTextAndImageResult(model, { apiKey: anthropicOAuthToken });
+			},
+		);
+	});
+
+	describe("GitHub Copilot Provider", () => {
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should handle tool result with only image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await handleToolWithImageResult(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should handle tool result with text and image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await handleToolWithTextAndImageResult(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should handle tool result with only image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await handleToolWithImageResult(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should handle tool result with text and image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await handleToolWithTextAndImageResult(llm, { apiKey: githubCopilotToken });
+			},
+		);
+	});
+
+	describe("Google Gemini CLI Provider", () => {
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should handle tool result with only image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await handleToolWithImageResult(llm, { apiKey: geminiCliToken });
+			},
+		);
+
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should handle tool result with text and image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await handleToolWithTextAndImageResult(llm, { apiKey: geminiCliToken });
+			},
+		);
+	});
+
+	describe("Google Antigravity Provider", () => {
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should handle tool result with only image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await handleToolWithImageResult(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should handle tool result with text and image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await handleToolWithTextAndImageResult(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle tool result with only image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await handleToolWithImageResult(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle tool result with text and image",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await handleToolWithTextAndImageResult(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		// Note: gpt-oss-120b-medium does not support images, so not tested here
+	});
 });
diff --git a/packages/ai/test/stream.test.ts b/packages/ai/test/stream.test.ts
index 4d08b7b9..af1acc98 100644
--- a/packages/ai/test/stream.test.ts
+++ b/packages/ai/test/stream.test.ts
@@ -342,27 +342,27 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.GEMINI_API_KEY)("Gemini Provider (gemini-2.5-flash)", () => {
 		const llm = getModel("google", "gemini-2.5-flash");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle ", async () => {
+		it("should handle ", { retry: 3 }, async () => {
 			await handleThinking(llm, { thinking: { enabled: true, budgetTokens: 1024 } });
 		});
 
-		it("should handle multi-turn with thinking and tools", async () => {
+		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { thinking: { enabled: true, budgetTokens: 2048 } });
 		});
 
-		it("should handle image input", async () => {
+		it("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm);
 		});
 	});
@@ -370,19 +370,19 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider (gpt-4o-mini)", () => {
 		const llm: Model<"openai-completions"> = { ...getModel("openai", "gpt-4o-mini"), api: "openai-completions" };
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle image input", async () => {
+		it("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm);
 		});
 	});
@@ -390,15 +390,15 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
 		const llm = getModel("openai", "gpt-5-mini");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
@@ -406,11 +406,11 @@ describe("Generate E2E Tests", () => {
 			await handleThinking(llm, { reasoningEffort: "high" });
 		});
 
-		it("should handle multi-turn with thinking and tools", async () => {
+		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { reasoningEffort: "high" });
 		});
 
-		it("should handle image input", async () => {
+		it("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm);
 		});
 	});
@@ -418,19 +418,19 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider (claude-3-5-haiku-20241022)", () => {
 		const model = getModel("anthropic", "claude-3-5-haiku-20241022");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(model, { thinkingEnabled: true });
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(model);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(model);
 		});
 
-		it("should handle image input", async () => {
+		it("should handle image input", { retry: 3 }, async () => {
 			await handleImage(model);
 		});
 	});
@@ -438,19 +438,19 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
 		const model = getModel("openai", "gpt-5-mini");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(model);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(model);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(model);
 		});
 
-		it("should handle image input", async () => {
+		it("should handle image input", { retry: 3 }, async () => {
 			await handleImage(model);
 		});
 	});
@@ -458,23 +458,23 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider (grok-code-fast-1 via OpenAI Completions)", () => {
 		const llm = getModel("xai", "grok-code-fast-1");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle thinking mode", async () => {
+		it("should handle thinking mode", { retry: 3 }, async () => {
 			await handleThinking(llm, { reasoningEffort: "medium" });
 		});
 
-		it("should handle multi-turn with thinking and tools", async () => {
+		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { reasoningEffort: "medium" });
 		});
 	});
@@ -482,23 +482,23 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider (gpt-oss-20b via OpenAI Completions)", () => {
 		const llm = getModel("groq", "openai/gpt-oss-20b");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle thinking mode", async () => {
+		it("should handle thinking mode", { retry: 3 }, async () => {
 			await handleThinking(llm, { reasoningEffort: "medium" });
 		});
 
-		it("should handle multi-turn with thinking and tools", async () => {
+		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { reasoningEffort: "medium" });
 		});
 	});
@@ -506,23 +506,23 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider (gpt-oss-120b via OpenAI Completions)", () => {
 		const llm = getModel("cerebras", "gpt-oss-120b");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle thinking mode", async () => {
+		it("should handle thinking mode", { retry: 3 }, async () => {
 			await handleThinking(llm, { reasoningEffort: "medium" });
 		});
 
-		it("should handle multi-turn with thinking and tools", async () => {
+		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { reasoningEffort: "medium" });
 		});
 	});
@@ -530,19 +530,19 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter Provider (glm-4.5v via OpenAI Completions)", () => {
 		const llm = getModel("openrouter", "z-ai/glm-4.5v");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle thinking mode", async () => {
+		it("should handle thinking mode", { retry: 3 }, async () => {
 			await handleThinking(llm, { reasoningEffort: "medium" });
 		});
 
@@ -550,7 +550,7 @@ describe("Generate E2E Tests", () => {
 			await multiTurn(llm, { reasoningEffort: "medium" });
 		});
 
-		it("should handle image input", async () => {
+		it("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm);
 		});
 	});
@@ -558,24 +558,24 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5-air via Anthropic Messages)", () => {
 		const llm = getModel("zai", "glm-4.5-air");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle thinking", async () => {
+		it("should handle thinking", { retry: 3 }, async () => {
 			// Prompt doesn't trigger thinking
 			// await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
 		});
 
-		it("should handle multi-turn with thinking and tools", async () => {
+		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
 		});
 	});
@@ -583,28 +583,28 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider (glm-4.5v via Anthropic Messages)", () => {
 		const llm = getModel("zai", "glm-4.5v");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle thinking", async () => {
+		it("should handle thinking", { retry: 3 }, async () => {
 			// Prompt doesn't trigger thinking
 			// await handleThinking(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
 		});
 
-		it("should handle multi-turn with thinking and tools", async () => {
+		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
 		});
 
-		it("should handle image input", async () => {
+		it("should handle image input", { retry: 3 }, async () => {
 			// Can't see image for some reason?
 			// await handleImage(llm);
 		});
@@ -615,25 +615,25 @@ describe("Generate E2E Tests", () => {
 		() => {
 			const llm = getModel("mistral", "devstral-medium-latest");
 
-			it("should complete basic text generation", async () => {
+			it("should complete basic text generation", { retry: 3 }, async () => {
 				await basicTextGeneration(llm);
 			});
 
-			it("should handle tool calling", async () => {
+			it("should handle tool calling", { retry: 3 }, async () => {
 				await handleToolCall(llm);
 			});
 
-			it("should handle streaming", async () => {
+			it("should handle streaming", { retry: 3 }, async () => {
 				await handleStreaming(llm);
 			});
 
-			it("should handle thinking mode", async () => {
+			it("should handle thinking mode", { retry: 3 }, async () => {
 				// FIXME Skip for now, getting a 422 stauts code, need to test with official SDK
 				// const llm = getModel("mistral", "magistral-medium-latest");
 				// await handleThinking(llm, { reasoningEffort: "medium" });
 			});
 
-			it("should handle multi-turn with thinking and tools", async () => {
+			it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 				await multiTurn(llm, { reasoningEffort: "medium" });
 			});
 		},
@@ -642,19 +642,19 @@ describe("Generate E2E Tests", () => {
 	describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider (pixtral-12b with image support)", () => {
 		const llm = getModel("mistral", "pixtral-12b");
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm);
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm);
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm);
 		});
 
-		it("should handle image input", async () => {
+		it("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm);
 		});
 	});
@@ -667,27 +667,27 @@ describe("Generate E2E Tests", () => {
 	describe("Anthropic OAuth Provider (claude-sonnet-4-20250514)", () => {
 		const model = getModel("anthropic", "claude-sonnet-4-20250514");
 
-		it.skipIf(!anthropicOAuthToken)("should complete basic text generation", async () => {
+		it.skipIf(!anthropicOAuthToken)("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(model, { apiKey: anthropicOAuthToken });
 		});
 
-		it.skipIf(!anthropicOAuthToken)("should handle tool calling", async () => {
+		it.skipIf(!anthropicOAuthToken)("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(model, { apiKey: anthropicOAuthToken });
 		});
 
-		it.skipIf(!anthropicOAuthToken)("should handle streaming", async () => {
+		it.skipIf(!anthropicOAuthToken)("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(model, { apiKey: anthropicOAuthToken });
 		});
 
-		it.skipIf(!anthropicOAuthToken)("should handle thinking", async () => {
+		it.skipIf(!anthropicOAuthToken)("should handle thinking", { retry: 3 }, async () => {
 			await handleThinking(model, { apiKey: anthropicOAuthToken, thinkingEnabled: true });
 		});
 
-		it.skipIf(!anthropicOAuthToken)("should handle multi-turn with thinking and tools", async () => {
+		it.skipIf(!anthropicOAuthToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(model, { apiKey: anthropicOAuthToken, thinkingEnabled: true });
 		});
 
-		it.skipIf(!anthropicOAuthToken)("should handle image input", async () => {
+		it.skipIf(!anthropicOAuthToken)("should handle image input", { retry: 3 }, async () => {
 			await handleImage(model, { apiKey: anthropicOAuthToken });
 		});
 	});
@@ -695,15 +695,15 @@ describe("Generate E2E Tests", () => {
 	describe("GitHub Copilot Provider (gpt-4o via OpenAI Completions)", () => {
 		const llm = getModel("github-copilot", "gpt-4o");
 
-		it.skipIf(!githubCopilotToken)("should complete basic text generation", async () => {
+		it.skipIf(!githubCopilotToken)("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm, { apiKey: githubCopilotToken });
 		});
 
-		it.skipIf(!githubCopilotToken)("should handle tool calling", async () => {
+		it.skipIf(!githubCopilotToken)("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm, { apiKey: githubCopilotToken });
 		});
 
-		it.skipIf(!githubCopilotToken)("should handle streaming", async () => {
+		it.skipIf(!githubCopilotToken)("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm, { apiKey: githubCopilotToken });
 		});
 
@@ -712,12 +712,12 @@ describe("Generate E2E Tests", () => {
 			await handleThinking(thinkingModel, { apiKey: githubCopilotToken, reasoningEffort: "high" });
 		});
 
-		it.skipIf(!githubCopilotToken)("should handle multi-turn with thinking and tools", async () => {
+		it.skipIf(!githubCopilotToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			const thinkingModel = getModel("github-copilot", "gpt-5-mini");
 			await multiTurn(thinkingModel, { apiKey: githubCopilotToken, reasoningEffort: "high" });
 		});
 
-		it.skipIf(!githubCopilotToken)("should handle image input", async () => {
+		it.skipIf(!githubCopilotToken)("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm, { apiKey: githubCopilotToken });
 		});
 	});
@@ -725,27 +725,27 @@ describe("Generate E2E Tests", () => {
 	describe("Google Gemini CLI Provider (gemini-2.5-flash)", () => {
 		const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
 
-		it.skipIf(!geminiCliToken)("should complete basic text generation", async () => {
+		it.skipIf(!geminiCliToken)("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm, { apiKey: geminiCliToken });
 		});
 
-		it.skipIf(!geminiCliToken)("should handle tool calling", async () => {
+		it.skipIf(!geminiCliToken)("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm, { apiKey: geminiCliToken });
 		});
 
-		it.skipIf(!geminiCliToken)("should handle streaming", async () => {
+		it.skipIf(!geminiCliToken)("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm, { apiKey: geminiCliToken });
 		});
 
-		it.skipIf(!geminiCliToken)("should handle thinking", async () => {
+		it.skipIf(!geminiCliToken)("should handle thinking", { retry: 3 }, async () => {
 			await handleThinking(llm, { apiKey: geminiCliToken, thinking: { enabled: true, budgetTokens: 1024 } });
 		});
 
-		it.skipIf(!geminiCliToken)("should handle multi-turn with thinking and tools", async () => {
+		it.skipIf(!geminiCliToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { apiKey: geminiCliToken, thinking: { enabled: true, budgetTokens: 2048 } });
 		});
 
-		it.skipIf(!geminiCliToken)("should handle image input", async () => {
+		it.skipIf(!geminiCliToken)("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm, { apiKey: geminiCliToken });
 		});
 	});
@@ -753,19 +753,19 @@ describe("Generate E2E Tests", () => {
 	describe("Google Antigravity Provider (gemini-3-flash)", () => {
 		const llm = getModel("google-antigravity", "gemini-3-flash");
 
-		it.skipIf(!antigravityToken)("should complete basic text generation", async () => {
+		it.skipIf(!antigravityToken)("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm, { apiKey: antigravityToken });
 		});
 
-		it.skipIf(!antigravityToken)("should handle tool calling", async () => {
+		it.skipIf(!antigravityToken)("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm, { apiKey: antigravityToken });
 		});
 
-		it.skipIf(!antigravityToken)("should handle streaming", async () => {
+		it.skipIf(!antigravityToken)("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm, { apiKey: antigravityToken });
 		});
 
-		it.skipIf(!antigravityToken)("should handle thinking", async () => {
+		it.skipIf(!antigravityToken)("should handle thinking", { retry: 3 }, async () => {
 			// gemini-3-flash has reasoning: false, use gemini-3-pro-high for thinking
 			const thinkingModel = getModel("google-antigravity", "gemini-3-pro-high");
 			await handleThinking(thinkingModel, {
@@ -774,12 +774,12 @@ describe("Generate E2E Tests", () => {
 			});
 		});
 
-		it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", async () => {
+		it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			const thinkingModel = getModel("google-antigravity", "gemini-3-pro-high");
 			await multiTurn(thinkingModel, { apiKey: antigravityToken, thinking: { enabled: true, budgetTokens: 2048 } });
 		});
 
-		it.skipIf(!antigravityToken)("should handle image input", async () => {
+		it.skipIf(!antigravityToken)("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm, { apiKey: antigravityToken });
 		});
 	});
@@ -787,19 +787,19 @@ describe("Generate E2E Tests", () => {
 	describe("Google Antigravity Provider (claude-sonnet-4-5)", () => {
 		const llm = getModel("google-antigravity", "claude-sonnet-4-5");
 
-		it.skipIf(!antigravityToken)("should complete basic text generation", async () => {
+		it.skipIf(!antigravityToken)("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm, { apiKey: antigravityToken });
 		});
 
-		it.skipIf(!antigravityToken)("should handle tool calling", async () => {
+		it.skipIf(!antigravityToken)("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm, { apiKey: antigravityToken });
 		});
 
-		it.skipIf(!antigravityToken)("should handle streaming", async () => {
+		it.skipIf(!antigravityToken)("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm, { apiKey: antigravityToken });
 		});
 
-		it.skipIf(!antigravityToken)("should handle thinking", async () => {
+		it.skipIf(!antigravityToken)("should handle thinking", { retry: 3 }, async () => {
 			// claude-sonnet-4-5 has reasoning: false, use claude-sonnet-4-5-thinking
 			const thinkingModel = getModel("google-antigravity", "claude-sonnet-4-5-thinking");
 			await handleThinking(thinkingModel, {
@@ -808,12 +808,12 @@ describe("Generate E2E Tests", () => {
 			});
 		});
 
-		it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", async () => {
+		it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			const thinkingModel = getModel("google-antigravity", "claude-sonnet-4-5-thinking");
 			await multiTurn(thinkingModel, { apiKey: antigravityToken, thinking: { enabled: true, budgetTokens: 4096 } });
 		});
 
-		it.skipIf(!antigravityToken)("should handle image input", async () => {
+		it.skipIf(!antigravityToken)("should handle image input", { retry: 3 }, async () => {
 			await handleImage(llm, { apiKey: antigravityToken });
 		});
 	});
@@ -895,23 +895,23 @@ describe("Generate E2E Tests", () => {
 			}
 		});
 
-		it("should complete basic text generation", async () => {
+		it("should complete basic text generation", { retry: 3 }, async () => {
 			await basicTextGeneration(llm, { apiKey: "test" });
 		});
 
-		it("should handle tool calling", async () => {
+		it("should handle tool calling", { retry: 3 }, async () => {
 			await handleToolCall(llm, { apiKey: "test" });
 		});
 
-		it("should handle streaming", async () => {
+		it("should handle streaming", { retry: 3 }, async () => {
 			await handleStreaming(llm, { apiKey: "test" });
 		});
 
-		it("should handle thinking mode", async () => {
+		it("should handle thinking mode", { retry: 3 }, async () => {
 			await handleThinking(llm, { apiKey: "test", reasoningEffort: "medium" });
 		});
 
-		it("should handle multi-turn with thinking and tools", async () => {
+		it("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
 			await multiTurn(llm, { apiKey: "test", reasoningEffort: "medium" });
 		});
 	});
diff --git a/packages/ai/test/tokens.test.ts b/packages/ai/test/tokens.test.ts
index c76c2654..489e09d4 100644
--- a/packages/ai/test/tokens.test.ts
+++ b/packages/ai/test/tokens.test.ts
@@ -1,8 +1,17 @@
 import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
-import { stream } from "../src/stream.js";
+import { resolveApiKey, stream } from "../src/stream.js";
 import type { Api, Context, Model, OptionsForApi } from "../src/types.js";
 
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+	resolveApiKey("anthropic"),
+	resolveApiKey("github-copilot"),
+	resolveApiKey("google-gemini-cli"),
+	resolveApiKey("google-antigravity"),
+]);
+const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
+
 async function testTokensOnAbort<TApi extends Api>(llm: Model<TApi>, options: OptionsForApi<TApi> = {}) {
 	const context: Context = {
 		messages: [
@@ -46,9 +55,9 @@ describe("Token Statistics on Abort", () => {
 	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => {
 		const llm = getModel("google", "gemini-2.5-flash");
 
-		it("should include token stats when aborted mid-stream", async () => {
+		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
 			await testTokensOnAbort(llm, { thinking: { enabled: true } });
-		}, 10000);
+		});
 	});
 
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => {
@@ -57,32 +66,140 @@ describe("Token Statistics on Abort", () => {
 			api: "openai-completions",
 		};
 
-		it("should include token stats when aborted mid-stream", async () => {
+		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
 			await testTokensOnAbort(llm);
-		}, 10000);
+		});
 	});
 
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => {
 		const llm = getModel("openai", "gpt-5-mini");
 
-		it("should include token stats when aborted mid-stream", async () => {
+		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
 			await testTokensOnAbort(llm);
-		}, 20000);
+		});
 	});
 
-	describe.skipIf(!process.env.ANTHROPIC_API_KEY && !process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider", () => {
-		const llm = getModel("anthropic", "claude-opus-4-1-20250805");
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => {
+		const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
 
-		it("should include token stats when aborted mid-stream", async () => {
-			await testTokensOnAbort(llm, { thinkingEnabled: true, thinkingBudgetTokens: 2048 });
-		}, 10000);
+		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
+			await testTokensOnAbort(llm);
+		});
+	});
+
+	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
+		const llm = getModel("xai", "grok-3-fast");
+
+		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
+			await testTokensOnAbort(llm);
+		});
+	});
+
+	describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider", () => {
+		const llm = getModel("groq", "openai/gpt-oss-20b");
+
+		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
+			await testTokensOnAbort(llm);
+		});
+	});
+
+	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider", () => {
+		const llm = getModel("cerebras", "gpt-oss-120b");
+
+		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
+			await testTokensOnAbort(llm);
+		});
+	});
+
+	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => {
+		const llm = getModel("zai", "glm-4.5-flash");
+
+		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
+			await testTokensOnAbort(llm);
+		});
 	});
 
 	describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider", () => {
 		const llm = getModel("mistral", "devstral-medium-latest");
 
-		it("should include token stats when aborted mid-stream", async () => {
+		it("should include token stats when aborted mid-stream", { retry: 3, timeout: 30000 }, async () => {
 			await testTokensOnAbort(llm);
-		}, 10000);
+		});
+	});
+
+	// =========================================================================
+	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
+	// =========================================================================
+
+	describe("Anthropic OAuth Provider", () => {
+		const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+		it.skipIf(!anthropicOAuthToken)(
+			"should include token stats when aborted mid-stream",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				await testTokensOnAbort(llm, { apiKey: anthropicOAuthToken });
+			},
+		);
+	});
+
+	describe("GitHub Copilot Provider", () => {
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should include token stats when aborted mid-stream",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await testTokensOnAbort(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should include token stats when aborted mid-stream",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await testTokensOnAbort(llm, { apiKey: githubCopilotToken });
+			},
+		);
+	});
+
+	describe("Google Gemini CLI Provider", () => {
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should include token stats when aborted mid-stream",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await testTokensOnAbort(llm, { apiKey: geminiCliToken });
+			},
+		);
+	});
+
+	describe("Google Antigravity Provider", () => {
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should include token stats when aborted mid-stream",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await testTokensOnAbort(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should include token stats when aborted mid-stream",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await testTokensOnAbort(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should include token stats when aborted mid-stream",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+				await testTokensOnAbort(llm, { apiKey: antigravityToken });
+			},
+		);
 	});
 });
diff --git a/packages/ai/test/tool-call-without-result.test.ts b/packages/ai/test/tool-call-without-result.test.ts
index 24e2aa98..cde53b94 100644
--- a/packages/ai/test/tool-call-without-result.test.ts
+++ b/packages/ai/test/tool-call-without-result.test.ts
@@ -1,8 +1,17 @@
 import { type Static, Type } from "@sinclair/typebox";
 import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
-import { complete } from "../src/stream.js";
-import type { Context, Tool } from "../src/types.js";
+import { complete, resolveApiKey } from "../src/stream.js";
+import type { Api, Context, Model, OptionsForApi, Tool } from "../src/types.js";
+
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+	resolveApiKey("anthropic"),
+	resolveApiKey("github-copilot"),
+	resolveApiKey("google-gemini-cli"),
+	resolveApiKey("google-antigravity"),
+]);
+const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
 
 // Simple calculate tool
 const calculateSchema = Type.Object({
@@ -17,7 +26,10 @@ const calculateTool: Tool = {
 	parameters: calculateSchema,
 };
 
-async function testToolCallWithoutResult(model: any, options: any = {}) {
+async function testToolCallWithoutResult<TApi extends Api>(
+	model: Model<TApi>,
+	options: OptionsForApi<TApi> = {} as OptionsForApi<TApi>,
+) {
 	// Step 1: Create context with the calculate tool
 	const context: Context = {
 		systemPrompt: "You are a helpful assistant. Use the calculate tool when asked to perform calculations.",
@@ -70,7 +82,8 @@ async function testToolCallWithoutResult(model: any, options: any = {}) {
 		.filter((block) => block.type === "text")
 		.map((block) => (block.type === "text" ? block.text : ""))
 		.join(" ");
-	expect(textContent.length).toBeGreaterThan(0);
+	const toolCalls = secondResponse.content.filter((block) => block.type === "toolCall").length;
+	expect(toolCalls || textContent.length).toBeGreaterThan(0);
 	console.log("Answer:", textContent);
 
 	// Verify the stop reason is either "stop" or "toolUse" (new tool call)
@@ -78,19 +91,158 @@ async function testToolCallWithoutResult(model: any, options: any = {}) {
 }
 
 describe("Tool Call Without Result Tests", () => {
-	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider - Missing Tool Result", () => {
+	// =========================================================================
+	// API Key-based providers
+	// =========================================================================
+
+	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider", () => {
+		const model = getModel("google", "gemini-2.5-flash");
+
+		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
+			await testToolCallWithoutResult(model);
+		});
+	});
+
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider", () => {
+		const model: Model<"openai-completions"> = {
+			...getModel("openai", "gpt-4o-mini")!,
+			api: "openai-completions",
+		};
+
+		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
+			await testToolCallWithoutResult(model);
+		});
+	});
+
+	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider", () => {
+		const model = getModel("openai", "gpt-5-mini");
+
+		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
+			await testToolCallWithoutResult(model);
+		});
+	});
+
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider", () => {
 		const model = getModel("anthropic", "claude-3-5-haiku-20241022");
 
-		it("should filter out tool calls without corresponding tool results", async () => {
+		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testToolCallWithoutResult(model);
-		}, 30000);
+		});
 	});
 
-	describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider - Missing Tool Result", () => {
+	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider", () => {
+		const model = getModel("xai", "grok-3-fast");
+
+		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
+			await testToolCallWithoutResult(model);
+		});
+	});
+
+	describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider", () => {
+		const model = getModel("groq", "openai/gpt-oss-20b");
+
+		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
+			await testToolCallWithoutResult(model);
+		});
+	});
+
+	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider", () => {
+		const model = getModel("cerebras", "gpt-oss-120b");
+
+		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
+			await testToolCallWithoutResult(model);
+		});
+	});
+
+	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider", () => {
+		const model = getModel("zai", "glm-4.5-flash");
+
+		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
+			await testToolCallWithoutResult(model);
+		});
+	});
+
+	describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider", () => {
 		const model = getModel("mistral", "devstral-medium-latest");
 
-		it("should filter out tool calls without corresponding tool results", async () => {
+		it("should filter out tool calls without corresponding tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testToolCallWithoutResult(model);
-		}, 30000);
+		});
+	});
+
+	// =========================================================================
+	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
+	// =========================================================================
+
+	describe("Anthropic OAuth Provider", () => {
+		const model = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+		it.skipIf(!anthropicOAuthToken)(
+			"should filter out tool calls without corresponding tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				await testToolCallWithoutResult(model, { apiKey: anthropicOAuthToken });
+			},
+		);
+	});
+
+	describe("GitHub Copilot Provider", () => {
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should filter out tool calls without corresponding tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const model = getModel("github-copilot", "gpt-4o");
+				await testToolCallWithoutResult(model, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should filter out tool calls without corresponding tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const model = getModel("github-copilot", "claude-sonnet-4");
+				await testToolCallWithoutResult(model, { apiKey: githubCopilotToken });
+			},
+		);
+	});
+
+	describe("Google Gemini CLI Provider", () => {
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should filter out tool calls without corresponding tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const model = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await testToolCallWithoutResult(model, { apiKey: geminiCliToken });
+			},
+		);
+	});
+
+	describe("Google Antigravity Provider", () => {
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should filter out tool calls without corresponding tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const model = getModel("google-antigravity", "gemini-3-flash");
+				await testToolCallWithoutResult(model, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should filter out tool calls without corresponding tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const model = getModel("google-antigravity", "claude-sonnet-4-5");
+				await testToolCallWithoutResult(model, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should filter out tool calls without corresponding tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const model = getModel("google-antigravity", "gpt-oss-120b-medium");
+				await testToolCallWithoutResult(model, { apiKey: antigravityToken });
+			},
+		);
 	});
 });
diff --git a/packages/ai/test/total-tokens.test.ts b/packages/ai/test/total-tokens.test.ts
index caeb136a..2fbb47f5 100644
--- a/packages/ai/test/total-tokens.test.ts
+++ b/packages/ai/test/total-tokens.test.ts
@@ -14,9 +14,18 @@
 
 import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
-import { complete } from "../src/stream.js";
+import { complete, resolveApiKey } from "../src/stream.js";
 import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
 
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+	resolveApiKey("anthropic"),
+	resolveApiKey("github-copilot"),
+	resolveApiKey("google-gemini-cli"),
+	resolveApiKey("google-antigravity"),
+]);
+const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
+
 // Generate a long system prompt to trigger caching (>2k bytes for most providers)
 const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses.
 
@@ -89,41 +98,49 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
-		it("claude-3-5-haiku - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
+		it(
+			"claude-3-5-haiku - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
 
-			console.log(`\nAnthropic / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
+				console.log(`\nAnthropic / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
 
-			// Anthropic should have cache activity
-			const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
-			expect(hasCache).toBe(true);
-		}, 60000);
+				// Anthropic should have cache activity
+				const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
+				expect(hasCache).toBe(true);
+			},
+		);
 	});
 
-	describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => {
-		it("claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("anthropic", "claude-sonnet-4-20250514");
+	describe("Anthropic (OAuth)", () => {
+		it.skipIf(!anthropicOAuthToken)(
+			"claude-sonnet-4 - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("anthropic", "claude-sonnet-4-20250514");
 
-			console.log(`\nAnthropic OAuth / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_OAUTH_TOKEN });
+				console.log(`\nAnthropic OAuth / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: anthropicOAuthToken });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
 
-			// Anthropic should have cache activity
-			const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
-			expect(hasCache).toBe(true);
-		}, 60000);
+				// Anthropic should have cache activity
+				const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
+				expect(hasCache).toBe(true);
+			},
+		);
 	});
 
 	// =========================================================================
@@ -131,25 +148,29 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
-		it("gpt-4o-mini - should return totalTokens equal to sum of components", async () => {
-			const llm: Model<"openai-completions"> = {
-				...getModel("openai", "gpt-4o-mini")!,
-				api: "openai-completions",
-			};
+		it(
+			"gpt-4o-mini - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm: Model<"openai-completions"> = {
+					...getModel("openai", "gpt-4o-mini")!,
+					api: "openai-completions",
+				};
 
-			console.log(`\nOpenAI Completions / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm);
+				console.log(`\nOpenAI Completions / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm);
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 	});
 
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
-		it("gpt-4o - should return totalTokens equal to sum of components", async () => {
+		it("gpt-4o - should return totalTokens equal to sum of components", { retry: 3, timeout: 60000 }, async () => {
 			const llm = getModel("openai", "gpt-4o");
 
 			console.log(`\nOpenAI Responses / ${llm.id}:`);
@@ -160,7 +181,7 @@ describe("totalTokens field", () => {
 
 			assertTotalTokensEqualsComponents(first);
 			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+		});
 	});
 
 	// =========================================================================
@@ -168,18 +189,22 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
-		it("gemini-2.0-flash - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("google", "gemini-2.0-flash");
+		it(
+			"gemini-2.0-flash - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("google", "gemini-2.0-flash");
 
-			console.log(`\nGoogle / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm);
+				console.log(`\nGoogle / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm);
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 	});
 
 	// =========================================================================
@@ -187,18 +212,22 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
-		it("grok-3-fast - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("xai", "grok-3-fast");
+		it(
+			"grok-3-fast - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("xai", "grok-3-fast");
 
-			console.log(`\nxAI / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
+				console.log(`\nxAI / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 	});
 
 	// =========================================================================
@@ -206,18 +235,22 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
-		it("openai/gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("groq", "openai/gpt-oss-120b");
+		it(
+			"openai/gpt-oss-120b - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("groq", "openai/gpt-oss-120b");
 
-			console.log(`\nGroq / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
+				console.log(`\nGroq / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 	});
 
 	// =========================================================================
@@ -225,18 +258,22 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
-		it("gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("cerebras", "gpt-oss-120b");
+		it(
+			"gpt-oss-120b - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("cerebras", "gpt-oss-120b");
 
-			console.log(`\nCerebras / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
+				console.log(`\nCerebras / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 	});
 
 	// =========================================================================
@@ -244,18 +281,22 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
-		it("glm-4.5-flash - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("zai", "glm-4.5-flash");
+		it(
+			"glm-4.5-flash - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("zai", "glm-4.5-flash");
 
-			console.log(`\nz.ai / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
+				console.log(`\nz.ai / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 	});
 
 	// =========================================================================
@@ -263,18 +304,22 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral", () => {
-		it("devstral-medium-latest - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("mistral", "devstral-medium-latest");
+		it(
+			"devstral-medium-latest - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("mistral", "devstral-medium-latest");
 
-			console.log(`\nMistral / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MISTRAL_API_KEY });
+				console.log(`\nMistral / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MISTRAL_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 	});
 
 	// =========================================================================
@@ -282,69 +327,209 @@ describe("totalTokens field", () => {
 	// =========================================================================
 
 	describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
-		it("anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
+		it(
+			"anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
 
-			console.log(`\nOpenRouter / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+				console.log(`\nOpenRouter / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 
-		it("deepseek/deepseek-chat - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("openrouter", "deepseek/deepseek-chat");
+		it(
+			"deepseek/deepseek-chat - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("openrouter", "deepseek/deepseek-chat");
 
-			console.log(`\nOpenRouter / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+				console.log(`\nOpenRouter / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 
-		it("mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
+		it(
+			"mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
 
-			console.log(`\nOpenRouter / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+				console.log(`\nOpenRouter / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 
-		it("google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
+		it(
+			"google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
 
-			console.log(`\nOpenRouter / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+				console.log(`\nOpenRouter / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 
-		it("meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", async () => {
-			const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
+		it(
+			"meta-llama/llama-4-maverick - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
 
-			console.log(`\nOpenRouter / ${llm.id}:`);
-			const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
+				console.log(`\nOpenRouter / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
 
-			logUsage("First request", first);
-			logUsage("Second request", second);
+				logUsage("First request", first);
+				logUsage("Second request", second);
 
-			assertTotalTokensEqualsComponents(first);
-			assertTotalTokensEqualsComponents(second);
-		}, 60000);
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
+	});
+
+	// =========================================================================
+	// GitHub Copilot (OAuth)
+	// =========================================================================
+
+	describe("GitHub Copilot (OAuth)", () => {
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+
+				console.log(`\nGitHub Copilot / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: githubCopilotToken });
+
+				logUsage("First request", first);
+				logUsage("Second request", second);
+
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+
+				console.log(`\nGitHub Copilot / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: githubCopilotToken });
+
+				logUsage("First request", first);
+				logUsage("Second request", second);
+
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
+	});
+
+	// =========================================================================
+	// Google Gemini CLI (OAuth)
+	// =========================================================================
+
+	describe("Google Gemini CLI (OAuth)", () => {
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+
+				console.log(`\nGoogle Gemini CLI / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: geminiCliToken });
+
+				logUsage("First request", first);
+				logUsage("Second request", second);
+
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
+	});
+
+	// =========================================================================
+	// Google Antigravity (OAuth)
+	// =========================================================================
+
+	describe("Google Antigravity (OAuth)", () => {
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+
+				console.log(`\nGoogle Antigravity / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken });
+
+				logUsage("First request", first);
+				logUsage("Second request", second);
+
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+
+				console.log(`\nGoogle Antigravity / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken });
+
+				logUsage("First request", first);
+				logUsage("Second request", second);
+
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should return totalTokens equal to sum of components",
+			{ retry: 3, timeout: 60000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+
+				console.log(`\nGoogle Antigravity / ${llm.id}:`);
+				const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken });
+
+				logUsage("First request", first);
+				logUsage("Second request", second);
+
+				assertTotalTokensEqualsComponents(first);
+				assertTotalTokensEqualsComponents(second);
+			},
+		);
 	});
 });
diff --git a/packages/ai/test/unicode-surrogate.test.ts b/packages/ai/test/unicode-surrogate.test.ts
index 19159bab..f45df41f 100644
--- a/packages/ai/test/unicode-surrogate.test.ts
+++ b/packages/ai/test/unicode-surrogate.test.ts
@@ -1,8 +1,21 @@
+import { Type } from "@sinclair/typebox";
 import { describe, expect, it } from "vitest";
 import { getModel } from "../src/models.js";
-import { complete } from "../src/stream.js";
+import { complete, resolveApiKey } from "../src/stream.js";
 import type { Api, Context, Model, OptionsForApi, ToolResultMessage } from "../src/types.js";
 
+// Empty schema for test tools - must be proper OBJECT type for Cloud Code Assist
+const emptySchema = Type.Object({});
+
+// Resolve OAuth tokens at module level (async, runs before tests)
+const oauthTokens = await Promise.all([
+	resolveApiKey("anthropic"),
+	resolveApiKey("github-copilot"),
+	resolveApiKey("google-gemini-cli"),
+	resolveApiKey("google-antigravity"),
+]);
+const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
+
 /**
  * Test for Unicode surrogate pair handling in tool results.
  *
@@ -53,7 +66,7 @@ async function testEmojiInToolResults<TApi extends Api>(llm: Model<TApi>, option
 			{
 				name: "test_tool",
 				description: "A test tool",
-				parameters: {} as any,
+				parameters: emptySchema,
 			},
 		],
 	};
@@ -138,7 +151,7 @@ async function testRealWorldLinkedInData<TApi extends Api>(llm: Model<TApi>, opt
 			{
 				name: "linkedin_skill",
 				description: "Get LinkedIn comments",
-				parameters: {} as any,
+				parameters: emptySchema,
 			},
 		],
 	};
@@ -226,7 +239,7 @@ async function testUnpairedHighSurrogate<TApi extends Api>(llm: Model<TApi>, opt
 			{
 				name: "test_tool",
 				description: "A test tool",
-				parameters: {} as any,
+				parameters: emptySchema,
 			},
 		],
 	};
@@ -265,15 +278,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
 	describe.skipIf(!process.env.GEMINI_API_KEY)("Google Provider Unicode Handling", () => {
 		const llm = getModel("google", "gemini-2.5-flash");
 
-		it("should handle emoji in tool results", async () => {
+		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 
-		it("should handle real-world LinkedIn comment data with emoji", async () => {
+		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 
-		it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
+		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
@@ -281,15 +294,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions Provider Unicode Handling", () => {
 		const llm = getModel("openai", "gpt-4o-mini");
 
-		it("should handle emoji in tool results", async () => {
+		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 
-		it("should handle real-world LinkedIn comment data with emoji", async () => {
+		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 
-		it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
+		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
@@ -297,47 +310,243 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
 	describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider Unicode Handling", () => {
 		const llm = getModel("openai", "gpt-5-mini");
 
-		it("should handle emoji in tool results", async () => {
+		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 
-		it("should handle real-world LinkedIn comment data with emoji", async () => {
+		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 
-		it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
+		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
 
-	describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider Unicode Handling", () => {
+	describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic Provider Unicode Handling", () => {
 		const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
 
-		it("should handle emoji in tool results", async () => {
+		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 
-		it("should handle real-world LinkedIn comment data with emoji", async () => {
+		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 
-		it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
+		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
 
+	// =========================================================================
+	// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
+	// =========================================================================
+
+	describe("Anthropic OAuth Provider Unicode Handling", () => {
+		const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
+
+		it.skipIf(!anthropicOAuthToken)("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
+			await testEmojiInToolResults(llm, { apiKey: anthropicOAuthToken });
+		});
+
+		it.skipIf(!anthropicOAuthToken)(
+			"should handle real-world LinkedIn comment data with emoji",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				await testRealWorldLinkedInData(llm, { apiKey: anthropicOAuthToken });
+			},
+		);
+
+		it.skipIf(!anthropicOAuthToken)(
+			"should handle unpaired high surrogate (0xD83D) in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				await testUnpairedHighSurrogate(llm, { apiKey: anthropicOAuthToken });
+			},
+		);
+	});
+
+	describe("GitHub Copilot Provider Unicode Handling", () => {
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should handle emoji in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await testEmojiInToolResults(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should handle real-world LinkedIn comment data with emoji",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await testRealWorldLinkedInData(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"gpt-4o - should handle unpaired high surrogate (0xD83D) in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "gpt-4o");
+				await testUnpairedHighSurrogate(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should handle emoji in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await testEmojiInToolResults(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should handle real-world LinkedIn comment data with emoji",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await testRealWorldLinkedInData(llm, { apiKey: githubCopilotToken });
+			},
+		);
+
+		it.skipIf(!githubCopilotToken)(
+			"claude-sonnet-4 - should handle unpaired high surrogate (0xD83D) in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("github-copilot", "claude-sonnet-4");
+				await testUnpairedHighSurrogate(llm, { apiKey: githubCopilotToken });
+			},
+		);
+	});
+
+	describe("Google Gemini CLI Provider Unicode Handling", () => {
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should handle emoji in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await testEmojiInToolResults(llm, { apiKey: geminiCliToken });
+			},
+		);
+
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should handle real-world LinkedIn comment data with emoji",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await testRealWorldLinkedInData(llm, { apiKey: geminiCliToken });
+			},
+		);
+
+		it.skipIf(!geminiCliToken)(
+			"gemini-2.5-flash - should handle unpaired high surrogate (0xD83D) in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
+				await testUnpairedHighSurrogate(llm, { apiKey: geminiCliToken });
+			},
+		);
+	});
+
+	describe("Google Antigravity Provider Unicode Handling", () => {
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should handle emoji in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await testEmojiInToolResults(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should handle real-world LinkedIn comment data with emoji",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await testRealWorldLinkedInData(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gemini-3-flash - should handle unpaired high surrogate (0xD83D) in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gemini-3-flash");
+				await testUnpairedHighSurrogate(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle emoji in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await testEmojiInToolResults(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle real-world LinkedIn comment data with emoji",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await testRealWorldLinkedInData(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"claude-sonnet-4-5 - should handle unpaired high surrogate (0xD83D) in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "claude-sonnet-4-5");
+				await testUnpairedHighSurrogate(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should handle emoji in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+				await testEmojiInToolResults(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should handle real-world LinkedIn comment data with emoji",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+				await testRealWorldLinkedInData(llm, { apiKey: antigravityToken });
+			},
+		);
+
+		it.skipIf(!antigravityToken)(
+			"gpt-oss-120b-medium - should handle unpaired high surrogate (0xD83D) in tool results",
+			{ retry: 3, timeout: 30000 },
+			async () => {
+				const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
+				await testUnpairedHighSurrogate(llm, { apiKey: antigravityToken });
+			},
+		);
+	});
+
 	describe.skipIf(!process.env.XAI_API_KEY)("xAI Provider Unicode Handling", () => {
 		const llm = getModel("xai", "grok-3");
 
-		it("should handle emoji in tool results", async () => {
+		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 
-		it("should handle real-world LinkedIn comment data with emoji", async () => {
+		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 
-		it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
+		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
@@ -345,15 +554,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
 	describe.skipIf(!process.env.GROQ_API_KEY)("Groq Provider Unicode Handling", () => {
 		const llm = getModel("groq", "openai/gpt-oss-20b");
 
-		it("should handle emoji in tool results", async () => {
+		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 
-		it("should handle real-world LinkedIn comment data with emoji", async () => {
+		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 
-		it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
+		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
@@ -361,15 +570,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
 	describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras Provider Unicode Handling", () => {
 		const llm = getModel("cerebras", "gpt-oss-120b");
 
-		it("should handle emoji in tool results", async () => {
+		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 
-		it("should handle real-world LinkedIn comment data with emoji", async () => {
+		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 
-		it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
+		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
@@ -377,15 +586,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
 	describe.skipIf(!process.env.ZAI_API_KEY)("zAI Provider Unicode Handling", () => {
 		const llm = getModel("zai", "glm-4.5-air");
 
-		it("should handle emoji in tool results", async () => {
+		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 
-		it("should handle real-world LinkedIn comment data with emoji", async () => {
+		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 
-		it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
+		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});
@@ -393,15 +602,15 @@ describe("AI Providers Unicode Surrogate Pair Tests", () => {
 	describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral Provider Unicode Handling", () => {
 		const llm = getModel("mistral", "devstral-medium-latest");
 
-		it("should handle emoji in tool results", async () => {
+		it("should handle emoji in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testEmojiInToolResults(llm);
 		});
 
-		it("should handle real-world LinkedIn comment data with emoji", async () => {
+		it("should handle real-world LinkedIn comment data with emoji", { retry: 3, timeout: 30000 }, async () => {
 			await testRealWorldLinkedInData(llm);
 		});
 
-		it("should handle unpaired high surrogate (0xD83D) in tool results", async () => {
+		it("should handle unpaired high surrogate (0xD83D) in tool results", { retry: 3, timeout: 30000 }, async () => {
 			await testUnpairedHighSurrogate(llm);
 		});
 	});