Add Google Gemini CLI and Antigravity OAuth providers

- Add google-gemini-cli provider: free Gemini 2.0/2.5 via Cloud Code Assist
- Add google-antigravity provider: free Gemini 3, Claude, GPT-OSS via sandbox
- Move OAuth infrastructure from coding-agent to ai package
- Fix thinking signature handling for cross-model handoff
- Fix OpenAI message ID length limit (max 64 chars)
- Add GitHub Copilot overflow pattern detection
- Add OAuth provider tests for context overflow and streaming
This commit is contained in:
Mario Zechner 2025-12-20 18:21:32 +01:00
parent 3266cac0f1
commit c359023c3f
25 changed files with 1392 additions and 413 deletions

View file

@ -15,10 +15,18 @@ import type { ChildProcess } from "child_process";
import { execSync, spawn } from "child_process";
import { afterAll, beforeAll, describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import { complete, resolveApiKey } from "../src/stream.js";
import type { AssistantMessage, Context, Model, Usage } from "../src/types.js";
import { isContextOverflow } from "../src/utils/overflow.js";
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
]);
const [githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
// Lorem ipsum paragraph for realistic token estimation
const LOREM_IPSUM = `Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. `;
@ -111,6 +119,43 @@ describe("Context overflow error handling", () => {
}, 120000);
});
// =============================================================================
// GitHub Copilot (OAuth)
// Tests both OpenAI and Anthropic models via Copilot
// =============================================================================
describe("GitHub Copilot (OAuth)", () => {
// OpenAI model via Copilot
it.skipIf(!githubCopilotToken)(
"gpt-4o - should detect overflow via isContextOverflow",
async () => {
const model = getModel("github-copilot", "gpt-4o");
const result = await testContextOverflow(model, githubCopilotToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/exceeds the limit of \d+/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
// Anthropic model via Copilot
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should detect overflow via isContextOverflow",
async () => {
const model = getModel("github-copilot", "claude-sonnet-4");
const result = await testContextOverflow(model, githubCopilotToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/exceeds the limit of \d+/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
});
// =============================================================================
// OpenAI
// Expected pattern: "exceeds the context window"
@ -158,6 +203,65 @@ describe("Context overflow error handling", () => {
}, 120000);
});
// =============================================================================
// Google Gemini CLI (OAuth)
// Uses same API as Google, expects same error pattern
// =============================================================================
describe("Google Gemini CLI (OAuth)", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should detect overflow via isContextOverflow",
async () => {
const model = getModel("google-gemini-cli", "gemini-2.5-flash");
const result = await testContextOverflow(model, geminiCliToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/input token count.*exceeds the maximum/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
});
// =============================================================================
// Google Antigravity (OAuth)
// Tests both Gemini and Anthropic models via Antigravity
// =============================================================================
describe("Google Antigravity (OAuth)", () => {
// Gemini model
it.skipIf(!antigravityToken)(
"gemini-3-flash - should detect overflow via isContextOverflow",
async () => {
const model = getModel("google-antigravity", "gemini-3-flash");
const result = await testContextOverflow(model, antigravityToken!);
logResult(result);
expect(result.stopReason).toBe("error");
expect(result.errorMessage).toMatch(/input token count.*exceeds the maximum/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
// Anthropic model via Antigravity
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should detect overflow via isContextOverflow",
async () => {
const model = getModel("google-antigravity", "claude-sonnet-4-5");
const result = await testContextOverflow(model, antigravityToken!);
logResult(result);
expect(result.stopReason).toBe("error");
// Anthropic models return "prompt is too long" pattern
expect(result.errorMessage).toMatch(/prompt is too long/i);
expect(isContextOverflow(result.response, model.contextWindow)).toBe(true);
},
120000,
);
});
// =============================================================================
// xAI
// Expected pattern: "maximum prompt length is X but the request contains Y"

View file

@ -5,13 +5,22 @@ import { dirname, join } from "path";
import { fileURLToPath } from "url";
import { afterAll, beforeAll, describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete, stream } from "../src/stream.js";
import { complete, resolveApiKey, stream } from "../src/stream.js";
import type { Api, Context, ImageContent, Model, OptionsForApi, Tool, ToolResultMessage } from "../src/types.js";
import { StringEnum } from "../src/utils/typebox-helpers.js";
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("anthropic"),
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
]);
const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
// Calculator tool definition (same as examples)
// Note: Using StringEnum helper because Google's API doesn't support anyOf/const patterns
// that Type.Enum generates. Google requires { type: "string", enum: [...] } format.
@ -314,7 +323,7 @@ async function multiTurn<TApi extends Api>(model: Model<TApi>, options?: Options
context.messages.push(...results);
// If we got a stop response with text content, we're likely done
expect(response.stopReason).not.toBe("error");
expect(response.stopReason, `Error: ${response.errorMessage}`).not.toBe("error");
if (response.stopReason === "stop") {
break;
}
@ -426,34 +435,6 @@ describe("Generate E2E Tests", () => {
});
});
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic Provider (claude-sonnet-4-20250514)", () => {
const model = getModel("anthropic", "claude-sonnet-4-20250514");
it("should complete basic text generation", async () => {
await basicTextGeneration(model, { thinkingEnabled: true });
});
it("should handle tool calling", async () => {
await handleToolCall(model);
});
it("should handle streaming", async () => {
await handleStreaming(model);
});
it("should handle thinking", async () => {
await handleThinking(model, { thinkingEnabled: true });
});
it("should handle multi-turn with thinking and tools", async () => {
await multiTurn(model, { thinkingEnabled: true });
});
it("should handle image input", async () => {
await handleImage(model);
});
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses Provider (gpt-5-mini)", () => {
const model = getModel("openai", "gpt-5-mini");
@ -678,30 +659,163 @@ describe("Generate E2E Tests", () => {
});
});
// Read GitHub Copilot token from ~/.pi/agent/oauth.json if available
let githubCopilotToken: string | undefined;
try {
const oauthPath = join(process.env.HOME || "", ".pi/agent/oauth.json");
const oauthData = JSON.parse(readFileSync(oauthPath, "utf-8"));
githubCopilotToken = oauthData["github-copilot"]?.access;
} catch {
// oauth.json doesn't exist or is invalid
}
// =========================================================================
// OAuth-based providers (credentials from ~/.pi/agent/oauth.json)
// Tokens are resolved at module level (see oauthTokens above)
// =========================================================================
describe.skipIf(!githubCopilotToken)("GitHub Copilot Provider (gpt-4o via OpenAI Completions)", () => {
describe("Anthropic OAuth Provider (claude-sonnet-4-20250514)", () => {
const model = getModel("anthropic", "claude-sonnet-4-20250514");
it.skipIf(!anthropicOAuthToken)("should complete basic text generation", async () => {
await basicTextGeneration(model, { apiKey: anthropicOAuthToken });
});
it.skipIf(!anthropicOAuthToken)("should handle tool calling", async () => {
await handleToolCall(model, { apiKey: anthropicOAuthToken });
});
it.skipIf(!anthropicOAuthToken)("should handle streaming", async () => {
await handleStreaming(model, { apiKey: anthropicOAuthToken });
});
it.skipIf(!anthropicOAuthToken)("should handle thinking", async () => {
await handleThinking(model, { apiKey: anthropicOAuthToken, thinkingEnabled: true });
});
it.skipIf(!anthropicOAuthToken)("should handle multi-turn with thinking and tools", async () => {
await multiTurn(model, { apiKey: anthropicOAuthToken, thinkingEnabled: true });
});
it.skipIf(!anthropicOAuthToken)("should handle image input", async () => {
await handleImage(model, { apiKey: anthropicOAuthToken });
});
});
describe("GitHub Copilot Provider (gpt-4o via OpenAI Completions)", () => {
const llm = getModel("github-copilot", "gpt-4o");
it("should complete basic text generation", async () => {
it.skipIf(!githubCopilotToken)("should complete basic text generation", async () => {
await basicTextGeneration(llm, { apiKey: githubCopilotToken });
});
it("should handle tool calling", async () => {
it.skipIf(!githubCopilotToken)("should handle tool calling", async () => {
await handleToolCall(llm, { apiKey: githubCopilotToken });
});
it("should handle streaming", async () => {
it.skipIf(!githubCopilotToken)("should handle streaming", async () => {
await handleStreaming(llm, { apiKey: githubCopilotToken });
});
it.skipIf(!githubCopilotToken)("should handle thinking", { retry: 2 }, async () => {
const thinkingModel = getModel("github-copilot", "gpt-5-mini");
await handleThinking(thinkingModel, { apiKey: githubCopilotToken, reasoningEffort: "high" });
});
it.skipIf(!githubCopilotToken)("should handle multi-turn with thinking and tools", async () => {
const thinkingModel = getModel("github-copilot", "gpt-5-mini");
await multiTurn(thinkingModel, { apiKey: githubCopilotToken, reasoningEffort: "high" });
});
it.skipIf(!githubCopilotToken)("should handle image input", async () => {
await handleImage(llm, { apiKey: githubCopilotToken });
});
});
describe("Google Gemini CLI Provider (gemini-2.5-flash)", () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
it.skipIf(!geminiCliToken)("should complete basic text generation", async () => {
await basicTextGeneration(llm, { apiKey: geminiCliToken });
});
it.skipIf(!geminiCliToken)("should handle tool calling", async () => {
await handleToolCall(llm, { apiKey: geminiCliToken });
});
it.skipIf(!geminiCliToken)("should handle streaming", async () => {
await handleStreaming(llm, { apiKey: geminiCliToken });
});
it.skipIf(!geminiCliToken)("should handle thinking", async () => {
await handleThinking(llm, { apiKey: geminiCliToken, thinking: { enabled: true, budgetTokens: 1024 } });
});
it.skipIf(!geminiCliToken)("should handle multi-turn with thinking and tools", async () => {
await multiTurn(llm, { apiKey: geminiCliToken, thinking: { enabled: true, budgetTokens: 2048 } });
});
it.skipIf(!geminiCliToken)("should handle image input", async () => {
await handleImage(llm, { apiKey: geminiCliToken });
});
});
describe("Google Antigravity Provider (gemini-3-flash)", () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
it.skipIf(!antigravityToken)("should complete basic text generation", async () => {
await basicTextGeneration(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle tool calling", async () => {
await handleToolCall(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle streaming", async () => {
await handleStreaming(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle thinking", async () => {
// gemini-3-flash has reasoning: false, use gemini-3-pro-high for thinking
const thinkingModel = getModel("google-antigravity", "gemini-3-pro-high");
await handleThinking(thinkingModel, {
apiKey: antigravityToken,
thinking: { enabled: true, budgetTokens: 1024 },
});
});
it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", async () => {
const thinkingModel = getModel("google-antigravity", "gemini-3-pro-high");
await multiTurn(thinkingModel, { apiKey: antigravityToken, thinking: { enabled: true, budgetTokens: 2048 } });
});
it.skipIf(!antigravityToken)("should handle image input", async () => {
await handleImage(llm, { apiKey: antigravityToken });
});
});
describe("Google Antigravity Provider (claude-sonnet-4-5)", () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
it.skipIf(!antigravityToken)("should complete basic text generation", async () => {
await basicTextGeneration(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle tool calling", async () => {
await handleToolCall(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle streaming", async () => {
await handleStreaming(llm, { apiKey: antigravityToken });
});
it.skipIf(!antigravityToken)("should handle thinking", async () => {
// claude-sonnet-4-5 has reasoning: false, use claude-sonnet-4-5-thinking
const thinkingModel = getModel("google-antigravity", "claude-sonnet-4-5-thinking");
await handleThinking(thinkingModel, {
apiKey: antigravityToken,
thinking: { enabled: true, budgetTokens: 4096 },
});
});
it.skipIf(!antigravityToken)("should handle multi-turn with thinking and tools", async () => {
const thinkingModel = getModel("google-antigravity", "claude-sonnet-4-5-thinking");
await multiTurn(thinkingModel, { apiKey: antigravityToken, thinking: { enabled: true, budgetTokens: 4096 } });
});
it.skipIf(!antigravityToken)("should handle image input", async () => {
await handleImage(llm, { apiKey: antigravityToken });
});
});
// Check if ollama is installed