Broader testing, more providers.

This commit is contained in:
Mario Zechner 2025-12-20 19:38:38 +01:00
parent 2a0283ecfd
commit 95fcda5887
7 changed files with 1400 additions and 355 deletions

View file

@ -14,9 +14,18 @@
import { describe, expect, it } from "vitest";
import { getModel } from "../src/models.js";
import { complete } from "../src/stream.js";
import { complete, resolveApiKey } from "../src/stream.js";
import type { Api, Context, Model, OptionsForApi, Usage } from "../src/types.js";
// Resolve OAuth tokens at module level (async, runs before tests)
const oauthTokens = await Promise.all([
resolveApiKey("anthropic"),
resolveApiKey("github-copilot"),
resolveApiKey("google-gemini-cli"),
resolveApiKey("google-antigravity"),
]);
const [anthropicOAuthToken, githubCopilotToken, geminiCliToken, antigravityToken] = oauthTokens;
// Generate a long system prompt to trigger caching (>2k bytes for most providers)
const LONG_SYSTEM_PROMPT = `You are a helpful assistant. Be concise in your responses.
@ -89,41 +98,49 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.ANTHROPIC_API_KEY)("Anthropic (API Key)", () => {
it("claude-3-5-haiku - should return totalTokens equal to sum of components", async () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
it(
"claude-3-5-haiku - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("anthropic", "claude-3-5-haiku-20241022");
console.log(`\nAnthropic / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
console.log(`\nAnthropic / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
}, 60000);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
},
);
});
describe.skipIf(!process.env.ANTHROPIC_OAUTH_TOKEN)("Anthropic (OAuth)", () => {
it("claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
const llm = getModel("anthropic", "claude-sonnet-4-20250514");
describe("Anthropic (OAuth)", () => {
it.skipIf(!anthropicOAuthToken)(
"claude-sonnet-4 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("anthropic", "claude-sonnet-4-20250514");
console.log(`\nAnthropic OAuth / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ANTHROPIC_OAUTH_TOKEN });
console.log(`\nAnthropic OAuth / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: anthropicOAuthToken });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
}, 60000);
// Anthropic should have cache activity
const hasCache = second.cacheRead > 0 || second.cacheWrite > 0 || first.cacheWrite > 0;
expect(hasCache).toBe(true);
},
);
});
// =========================================================================
@ -131,25 +148,29 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Completions", () => {
it("gpt-4o-mini - should return totalTokens equal to sum of components", async () => {
const llm: Model<"openai-completions"> = {
...getModel("openai", "gpt-4o-mini")!,
api: "openai-completions",
};
it(
"gpt-4o-mini - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm: Model<"openai-completions"> = {
...getModel("openai", "gpt-4o-mini")!,
api: "openai-completions",
};
console.log(`\nOpenAI Completions / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
console.log(`\nOpenAI Completions / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
describe.skipIf(!process.env.OPENAI_API_KEY)("OpenAI Responses", () => {
it("gpt-4o - should return totalTokens equal to sum of components", async () => {
it("gpt-4o - should return totalTokens equal to sum of components", { retry: 3, timeout: 60000 }, async () => {
const llm = getModel("openai", "gpt-4o");
console.log(`\nOpenAI Responses / ${llm.id}:`);
@ -160,7 +181,7 @@ describe("totalTokens field", () => {
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
});
});
// =========================================================================
@ -168,18 +189,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.GEMINI_API_KEY)("Google", () => {
it("gemini-2.0-flash - should return totalTokens equal to sum of components", async () => {
const llm = getModel("google", "gemini-2.0-flash");
it(
"gemini-2.0-flash - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google", "gemini-2.0-flash");
console.log(`\nGoogle / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
console.log(`\nGoogle / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm);
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -187,18 +212,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.XAI_API_KEY)("xAI", () => {
it("grok-3-fast - should return totalTokens equal to sum of components", async () => {
const llm = getModel("xai", "grok-3-fast");
it(
"grok-3-fast - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("xai", "grok-3-fast");
console.log(`\nxAI / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
console.log(`\nxAI / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.XAI_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -206,18 +235,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.GROQ_API_KEY)("Groq", () => {
it("openai/gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
const llm = getModel("groq", "openai/gpt-oss-120b");
it(
"openai/gpt-oss-120b - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("groq", "openai/gpt-oss-120b");
console.log(`\nGroq / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
console.log(`\nGroq / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.GROQ_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -225,18 +258,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.CEREBRAS_API_KEY)("Cerebras", () => {
it("gpt-oss-120b - should return totalTokens equal to sum of components", async () => {
const llm = getModel("cerebras", "gpt-oss-120b");
it(
"gpt-oss-120b - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("cerebras", "gpt-oss-120b");
console.log(`\nCerebras / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
console.log(`\nCerebras / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.CEREBRAS_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -244,18 +281,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.ZAI_API_KEY)("z.ai", () => {
it("glm-4.5-flash - should return totalTokens equal to sum of components", async () => {
const llm = getModel("zai", "glm-4.5-flash");
it(
"glm-4.5-flash - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("zai", "glm-4.5-flash");
console.log(`\nz.ai / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
console.log(`\nz.ai / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.ZAI_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -263,18 +304,22 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.MISTRAL_API_KEY)("Mistral", () => {
it("devstral-medium-latest - should return totalTokens equal to sum of components", async () => {
const llm = getModel("mistral", "devstral-medium-latest");
it(
"devstral-medium-latest - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("mistral", "devstral-medium-latest");
console.log(`\nMistral / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MISTRAL_API_KEY });
console.log(`\nMistral / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.MISTRAL_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
@ -282,69 +327,209 @@ describe("totalTokens field", () => {
// =========================================================================
describe.skipIf(!process.env.OPENROUTER_API_KEY)("OpenRouter", () => {
it("anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
it(
"anthropic/claude-sonnet-4 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "anthropic/claude-sonnet-4");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it("deepseek/deepseek-chat - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "deepseek/deepseek-chat");
it(
"deepseek/deepseek-chat - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "deepseek/deepseek-chat");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it("mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
it(
"mistralai/mistral-small-3.1-24b-instruct - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "mistralai/mistral-small-3.1-24b-instruct");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it("google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
it(
"google/gemini-2.0-flash-001 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "google/gemini-2.0-flash-001");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it("meta-llama/llama-4-maverick - should return totalTokens equal to sum of components", async () => {
const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
it(
"meta-llama/llama-4-maverick - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("openrouter", "meta-llama/llama-4-maverick");
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
console.log(`\nOpenRouter / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: process.env.OPENROUTER_API_KEY });
logUsage("First request", first);
logUsage("Second request", second);
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
}, 60000);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// GitHub Copilot (OAuth)
// =========================================================================
describe("GitHub Copilot (OAuth)", () => {
it.skipIf(!githubCopilotToken)(
"gpt-4o - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("github-copilot", "gpt-4o");
console.log(`\nGitHub Copilot / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: githubCopilotToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it.skipIf(!githubCopilotToken)(
"claude-sonnet-4 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("github-copilot", "claude-sonnet-4");
console.log(`\nGitHub Copilot / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: githubCopilotToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// Google Gemini CLI (OAuth)
// =========================================================================
describe("Google Gemini CLI (OAuth)", () => {
it.skipIf(!geminiCliToken)(
"gemini-2.5-flash - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google-gemini-cli", "gemini-2.5-flash");
console.log(`\nGoogle Gemini CLI / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: geminiCliToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
// =========================================================================
// Google Antigravity (OAuth)
// =========================================================================
describe("Google Antigravity (OAuth)", () => {
it.skipIf(!antigravityToken)(
"gemini-3-flash - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google-antigravity", "gemini-3-flash");
console.log(`\nGoogle Antigravity / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it.skipIf(!antigravityToken)(
"claude-sonnet-4-5 - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google-antigravity", "claude-sonnet-4-5");
console.log(`\nGoogle Antigravity / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
it.skipIf(!antigravityToken)(
"gpt-oss-120b-medium - should return totalTokens equal to sum of components",
{ retry: 3, timeout: 60000 },
async () => {
const llm = getModel("google-antigravity", "gpt-oss-120b-medium");
console.log(`\nGoogle Antigravity / ${llm.id}:`);
const { first, second } = await testTotalTokensWithCache(llm, { apiKey: antigravityToken });
logUsage("First request", first);
logUsage("Second request", second);
assertTotalTokensEqualsComponents(first);
assertTotalTokensEqualsComponents(second);
},
);
});
});