From 251fea752c46364e17a81aa425b3e23c8133af8d Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Fri, 26 Dec 2025 00:05:02 +0100 Subject: [PATCH] Fix API key priority and compaction bugs - getEnvApiKey: ANTHROPIC_OAUTH_TOKEN now takes precedence over ANTHROPIC_API_KEY - findCutPoint: Stop scan-backwards loop at session header (was decrementing past it causing null preparation) - generateSummary/generateTurnPrefixSummary: Throw on stopReason=error instead of returning empty string - Test files: Fix API key priority order, use keepRecentTokens=1 for small test conversations --- packages/ai/src/models.generated.ts | 102 +++++++++--------- packages/ai/src/stream.ts | 6 +- .../examples/hooks/custom-compaction.ts | 11 +- packages/coding-agent/src/core/compaction.ts | 14 ++- .../test/agent-session-branching.test.ts | 2 +- .../test/agent-session-compaction.test.ts | 71 ++++-------- .../test/compaction-hooks.test.ts | 2 +- 7 files changed, 98 insertions(+), 110 deletions(-) diff --git a/packages/ai/src/models.generated.ts b/packages/ai/src/models.generated.ts index 2df269d3..7bc2f673 100644 --- a/packages/ai/src/models.generated.ts +++ b/packages/ai/src/models.generated.ts @@ -6359,6 +6359,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 16384, } satisfies Model<"openai-completions">, + "meta-llama/llama-3.1-70b-instruct": { + id: "meta-llama/llama-3.1-70b-instruct", + name: "Meta: Llama 3.1 70B Instruct", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 0.39999999999999997, + output: 0.39999999999999997, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 131072, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "meta-llama/llama-3.1-8b-instruct": { id: "meta-llama/llama-3.1-8b-instruct", name: "Meta: Llama 3.1 8B Instruct", @@ -6393,23 +6410,6 @@ export const MODELS = { contextWindow: 10000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "meta-llama/llama-3.1-70b-instruct": { - id: "meta-llama/llama-3.1-70b-instruct", - name: "Meta: Llama 3.1 70B Instruct", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 0.39999999999999997, - output: 0.39999999999999997, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 131072, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "mistralai/mistral-nemo": { id: "mistralai/mistral-nemo", name: "Mistral: Mistral Nemo", @@ -6546,23 +6546,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "openai/gpt-4o-2024-05-13": { - id: "openai/gpt-4o-2024-05-13", - name: "OpenAI: GPT-4o (2024-05-13)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text", "image"], - cost: { - input: 5, - output: 15, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 128000, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "openai/gpt-4o": { id: "openai/gpt-4o", name: "OpenAI: GPT-4o", @@ -6597,6 +6580,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 64000, } satisfies Model<"openai-completions">, + "openai/gpt-4o-2024-05-13": { + id: "openai/gpt-4o-2024-05-13", + name: "OpenAI: GPT-4o (2024-05-13)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text", "image"], + cost: { + input: 5, + output: 15, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 128000, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "meta-llama/llama-3-70b-instruct": { id: "meta-llama/llama-3-70b-instruct", name: "Meta: Llama 3 70B Instruct", @@ -6716,23 +6716,6 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, - "openai/gpt-3.5-turbo-0613": { - id: "openai/gpt-3.5-turbo-0613", - name: "OpenAI: GPT-3.5 Turbo (older v0613)", - api: "openai-completions", - provider: "openrouter", - baseUrl: "https://openrouter.ai/api/v1", - reasoning: false, - input: ["text"], - cost: { - input: 1, - output: 2, - cacheRead: 0, - cacheWrite: 0, - }, - contextWindow: 4095, - maxTokens: 4096, - } satisfies Model<"openai-completions">, "openai/gpt-4-turbo-preview": { id: "openai/gpt-4-turbo-preview", name: "OpenAI: GPT-4 Turbo Preview", @@ -6750,6 +6733,23 @@ export const MODELS = { contextWindow: 128000, maxTokens: 4096, } satisfies Model<"openai-completions">, + "openai/gpt-3.5-turbo-0613": { + id: "openai/gpt-3.5-turbo-0613", + name: "OpenAI: GPT-3.5 Turbo (older v0613)", + api: "openai-completions", + provider: "openrouter", + baseUrl: "https://openrouter.ai/api/v1", + reasoning: false, + input: ["text"], + cost: { + input: 1, + output: 2, + cacheRead: 0, + cacheWrite: 0, + }, + contextWindow: 4095, + maxTokens: 4096, + } satisfies Model<"openai-completions">, "mistralai/mistral-tiny": { id: "mistralai/mistral-tiny", name: "Mistral Tiny", diff --git a/packages/ai/src/stream.ts b/packages/ai/src/stream.ts index a71656e9..fb8f5ca3 100644 --- a/packages/ai/src/stream.ts +++ b/packages/ai/src/stream.ts @@ -30,9 +30,13 @@ export function getEnvApiKey(provider: any): string | undefined { return process.env.COPILOT_GITHUB_TOKEN || process.env.GH_TOKEN || process.env.GITHUB_TOKEN; } + // ANTHROPIC_OAUTH_TOKEN takes precedence over ANTHROPIC_API_KEY + if (provider === "anthropic") { + return process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY; + } + const envMap: Record = { openai: "OPENAI_API_KEY", - anthropic: "ANTHROPIC_API_KEY", google: "GEMINI_API_KEY", groq: "GROQ_API_KEY", cerebras: "CEREBRAS_API_KEY", diff --git a/packages/coding-agent/examples/hooks/custom-compaction.ts b/packages/coding-agent/examples/hooks/custom-compaction.ts index 1781e7e3..1eabaf57 100644 --- a/packages/coding-agent/examples/hooks/custom-compaction.ts +++ b/packages/coding-agent/examples/hooks/custom-compaction.ts @@ -23,8 +23,15 @@ export default function (pi: HookAPI) { ctx.ui.notify("Custom compaction hook triggered", "info"); - const { messagesToSummarize, messagesToKeep, previousSummary, tokensBefore, resolveApiKey, entries, signal } = - event; + const { + messagesToSummarize, + messagesToKeep, + previousSummary, + tokensBefore, + resolveApiKey, + entries: _, + signal, + } = event; // Use Gemini Flash for summarization (cheaper/faster than most conversation models) const model = getModel("google", "gemini-2.5-flash"); diff --git a/packages/coding-agent/src/core/compaction.ts b/packages/coding-agent/src/core/compaction.ts index 8abfef74..6a56a3ac 100644 --- a/packages/coding-agent/src/core/compaction.ts +++ b/packages/coding-agent/src/core/compaction.ts @@ -224,7 +224,7 @@ export function findCutPoint( // Walk backwards from newest, accumulating estimated message sizes let accumulatedTokens = 0; - let cutIndex = startIndex; // Default: keep everything in range + let cutIndex = cutPoints[0]; // Default: keep from first message (not header) for (let i = endIndex - 1; i >= startIndex; i--) { const entry = entries[i]; @@ -250,8 +250,8 @@ export function findCutPoint( // Scan backwards from cutIndex to include any non-message entries (bash, settings, etc.) while (cutIndex > startIndex) { const prevEntry = entries[cutIndex - 1]; - // Stop at compaction boundaries - if (prevEntry.type === "compaction") { + // Stop at session header or compaction boundaries + if (prevEntry.type === "session" || prevEntry.type === "compaction") { break; } if (prevEntry.type === "message") { @@ -320,6 +320,10 @@ export async function generateSummary( const response = await complete(model, { messages: summarizationMessages }, { maxTokens, signal, apiKey }); + if (response.stopReason === "error") { + throw new Error(`Summarization failed: ${response.errorMessage || "Unknown error"}`); + } + const textContent = response.content .filter((c): c is { type: "text"; text: string } => c.type === "text") .map((c) => c.text) @@ -550,6 +554,10 @@ async function generateTurnPrefixSummary( const response = await complete(model, { messages: summarizationMessages }, { maxTokens, signal, apiKey }); + if (response.stopReason === "error") { + throw new Error(`Turn prefix summarization failed: ${response.errorMessage || "Unknown error"}`); + } + return response.content .filter((c): c is { type: "text"; text: string } => c.type === "text") .map((c) => c.text) diff --git a/packages/coding-agent/test/agent-session-branching.test.ts b/packages/coding-agent/test/agent-session-branching.test.ts index d6f2db1c..de6e6e20 100644 --- a/packages/coding-agent/test/agent-session-branching.test.ts +++ b/packages/coding-agent/test/agent-session-branching.test.ts @@ -20,7 +20,7 @@ import { SessionManager } from "../src/core/session-manager.js"; import { SettingsManager } from "../src/core/settings-manager.js"; import { codingTools } from "../src/core/tools/index.js"; -const API_KEY = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_OAUTH_TOKEN; +const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY; describe.skipIf(!API_KEY)("AgentSession branching", () => { let session: AgentSession; diff --git a/packages/coding-agent/test/agent-session-compaction.test.ts b/packages/coding-agent/test/agent-session-compaction.test.ts index 2b7f5e71..580e4fc9 100644 --- a/packages/coding-agent/test/agent-session-compaction.test.ts +++ b/packages/coding-agent/test/agent-session-compaction.test.ts @@ -20,7 +20,7 @@ import { SessionManager } from "../src/core/session-manager.js"; import { SettingsManager } from "../src/core/settings-manager.js"; import { codingTools } from "../src/core/tools/index.js"; -const API_KEY = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_OAUTH_TOKEN; +const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY; describe.skipIf(!API_KEY)("AgentSession compaction e2e", () => { let session: AgentSession; @@ -46,7 +46,7 @@ describe.skipIf(!API_KEY)("AgentSession compaction e2e", () => { } }); - function createSession() { + function createSession(inMemory = false) { const model = getModel("anthropic", "claude-sonnet-4-5")!; const transport = new ProviderTransport({ @@ -62,8 +62,10 @@ describe.skipIf(!API_KEY)("AgentSession compaction e2e", () => { }, }); - sessionManager = SessionManager.create(tempDir); + sessionManager = inMemory ? SessionManager.inMemory() : SessionManager.create(tempDir); const settingsManager = SettingsManager.create(tempDir, tempDir); + // Use minimal keepRecentTokens so small test conversations have something to summarize + settingsManager.applyOverrides({ compaction: { keepRecentTokens: 1 } }); const authStorage = new AuthStorage(join(tempDir, "auth.json")); const modelRegistry = new ModelRegistry(authStorage); @@ -156,64 +158,31 @@ describe.skipIf(!API_KEY)("AgentSession compaction e2e", () => { expect(compaction.type).toBe("compaction"); if (compaction.type === "compaction") { expect(compaction.summary.length).toBeGreaterThan(0); - // firstKeptEntryId can be 0 if all messages fit within keepRecentTokens - // (which is the case for small conversations) - expect(compaction.firstKeptEntryId).toBeGreaterThanOrEqual(0); + expect(typeof compaction.firstKeptEntryId).toBe("string"); expect(compaction.tokensBefore).toBeGreaterThan(0); } }, 120000); it("should work with --no-session mode (in-memory only)", async () => { - const model = getModel("anthropic", "claude-sonnet-4-5")!; + createSession(true); // in-memory mode - const transport = new ProviderTransport({ - getApiKey: () => API_KEY, - }); + // Send prompts + await session.prompt("What is 2+2? Reply with just the number."); + await session.agent.waitForIdle(); - const agent = new Agent({ - transport, - initialState: { - model, - systemPrompt: "You are a helpful assistant. Be concise.", - tools: codingTools, - }, - }); + await session.prompt("What is 3+3? Reply with just the number."); + await session.agent.waitForIdle(); - // Create in-memory session manager - const noSessionManager = SessionManager.inMemory(); + // Compact should work even without file persistence + const result = await session.compact(); - const settingsManager = SettingsManager.create(tempDir, tempDir); - const authStorage = new AuthStorage(join(tempDir, "auth.json")); - const modelRegistry = new ModelRegistry(authStorage); + expect(result.summary).toBeDefined(); + expect(result.summary.length).toBeGreaterThan(0); - const noSessionSession = new AgentSession({ - agent, - sessionManager: noSessionManager, - settingsManager, - modelRegistry, - }); - - try { - // Send prompts - await noSessionSession.prompt("What is 2+2? Reply with just the number."); - await noSessionSession.agent.waitForIdle(); - - await noSessionSession.prompt("What is 3+3? Reply with just the number."); - await noSessionSession.agent.waitForIdle(); - - // Compact should work even without file persistence - const result = await noSessionSession.compact(); - - expect(result.summary).toBeDefined(); - expect(result.summary.length).toBeGreaterThan(0); - - // In-memory entries should have the compaction - const entries = noSessionManager.getEntries(); - const compactionEntries = entries.filter((e) => e.type === "compaction"); - expect(compactionEntries.length).toBe(1); - } finally { - noSessionSession.dispose(); - } + // In-memory entries should have the compaction + const entries = sessionManager.getEntries(); + const compactionEntries = entries.filter((e) => e.type === "compaction"); + expect(compactionEntries.length).toBe(1); }, 120000); it("should emit correct events during auto-compaction", async () => { diff --git a/packages/coding-agent/test/compaction-hooks.test.ts b/packages/coding-agent/test/compaction-hooks.test.ts index e3773742..76d227bf 100644 --- a/packages/coding-agent/test/compaction-hooks.test.ts +++ b/packages/coding-agent/test/compaction-hooks.test.ts @@ -16,7 +16,7 @@ import { SessionManager } from "../src/core/session-manager.js"; import { SettingsManager } from "../src/core/settings-manager.js"; import { codingTools } from "../src/core/tools/index.js"; -const API_KEY = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_OAUTH_TOKEN; +const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY; describe.skipIf(!API_KEY)("Compaction hooks", () => { let session: AgentSession;