From 31c5cd38d1f37ec92283362f1ac6426902f275f5 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Mon, 29 Dec 2025 20:30:13 +0100 Subject: [PATCH] Add tree navigation tests and shared test utilities - Add test/utilities.ts with shared helpers (API_KEY, userMsg, assistantMsg, createTestSession) - Add agent-session-tree-navigation.test.ts with e2e tests for tree navigation - Add getChildren() method to SessionManager - Add summaryEntry to navigateTree return type - Update existing tests to use shared utilities --- .../coding-agent/src/core/agent-session.ts | 4 +- .../coding-agent/src/core/session-manager.ts | 13 + .../test/agent-session-branching.test.ts | 3 +- .../test/agent-session-compaction.test.ts | 3 +- .../agent-session-tree-navigation.test.ts | 318 ++++++++++++++++++ .../session-manager/tree-traversal.test.ts | 25 +- packages/coding-agent/test/utilities.ts | 158 +++++++++ 7 files changed, 494 insertions(+), 30 deletions(-) create mode 100644 packages/coding-agent/test/agent-session-tree-navigation.test.ts create mode 100644 packages/coding-agent/test/utilities.ts diff --git a/packages/coding-agent/src/core/agent-session.ts b/packages/coding-agent/src/core/agent-session.ts index f42ee625..f335a707 100644 --- a/packages/coding-agent/src/core/agent-session.ts +++ b/packages/coding-agent/src/core/agent-session.ts @@ -1582,7 +1582,7 @@ export class AgentSession { async navigateTree( targetId: string, options: { summarize?: boolean; customInstructions?: string } = {}, - ): Promise<{ editorText?: string; cancelled: boolean; aborted?: boolean }> { + ): Promise<{ editorText?: string; cancelled: boolean; aborted?: boolean; summaryEntry?: BranchSummaryEntry }> { const oldLeafId = this.sessionManager.getLeafId(); // No-op if already at target @@ -1735,7 +1735,7 @@ export class AgentSession { await this._emitToolSessionEvent("tree", this.sessionFile); this._branchSummaryAbortController = undefined; - return { editorText, cancelled: false }; + return { editorText, cancelled: false, summaryEntry }; } /** diff --git a/packages/coding-agent/src/core/session-manager.ts b/packages/coding-agent/src/core/session-manager.ts index ef33f15d..ccf6e4ea 100644 --- a/packages/coding-agent/src/core/session-manager.ts +++ b/packages/coding-agent/src/core/session-manager.ts @@ -696,6 +696,19 @@ export class SessionManager { return this.byId.get(id); } + /** + * Get all direct children of an entry. + */ + getChildren(parentId: string): SessionEntry[] { + const children: SessionEntry[] = []; + for (const entry of this.byId.values()) { + if (entry.parentId === parentId) { + children.push(entry); + } + } + return children; + } + /** * Get the label for an entry, if any. */ diff --git a/packages/coding-agent/test/agent-session-branching.test.ts b/packages/coding-agent/test/agent-session-branching.test.ts index 51b3aa59..71b78ab7 100644 --- a/packages/coding-agent/test/agent-session-branching.test.ts +++ b/packages/coding-agent/test/agent-session-branching.test.ts @@ -19,8 +19,7 @@ import { ModelRegistry } from "../src/core/model-registry.js"; import { SessionManager } from "../src/core/session-manager.js"; import { SettingsManager } from "../src/core/settings-manager.js"; import { codingTools } from "../src/core/tools/index.js"; - -const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY; +import { API_KEY } from "./utilities.js"; describe.skipIf(!API_KEY)("AgentSession branching", () => { let session: AgentSession; diff --git a/packages/coding-agent/test/agent-session-compaction.test.ts b/packages/coding-agent/test/agent-session-compaction.test.ts index 529961c4..14a664bb 100644 --- a/packages/coding-agent/test/agent-session-compaction.test.ts +++ b/packages/coding-agent/test/agent-session-compaction.test.ts @@ -19,8 +19,7 @@ import { ModelRegistry } from "../src/core/model-registry.js"; import { SessionManager } from "../src/core/session-manager.js"; import { SettingsManager } from "../src/core/settings-manager.js"; import { codingTools } from "../src/core/tools/index.js"; - -const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY; +import { API_KEY } from "./utilities.js"; describe.skipIf(!API_KEY)("AgentSession compaction e2e", () => { let session: AgentSession; diff --git a/packages/coding-agent/test/agent-session-tree-navigation.test.ts b/packages/coding-agent/test/agent-session-tree-navigation.test.ts new file mode 100644 index 00000000..82e82439 --- /dev/null +++ b/packages/coding-agent/test/agent-session-tree-navigation.test.ts @@ -0,0 +1,318 @@ +/** + * E2E tests for AgentSession tree navigation with branch summarization. + * + * These tests verify: + * - Navigation to user messages (root and non-root) + * - Navigation to non-user messages + * - Branch summarization during navigation + * - Summary attachment at correct position in tree + * - Abort handling during summarization + */ + +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { API_KEY, createTestSession, type TestSessionContext } from "./utilities.js"; + +describe.skipIf(!API_KEY)("AgentSession tree navigation e2e", () => { + let ctx: TestSessionContext; + + beforeEach(() => { + ctx = createTestSession({ + systemPrompt: "You are a helpful assistant. Reply with just a few words.", + settingsOverrides: { compaction: { keepRecentTokens: 1 } }, + }); + }); + + afterEach(() => { + ctx.cleanup(); + }); + + it("should navigate to user message and put text in editor", async () => { + const { session } = ctx; + + // Build conversation: u1 -> a1 -> u2 -> a2 + await session.prompt("First message"); + await session.agent.waitForIdle(); + await session.prompt("Second message"); + await session.agent.waitForIdle(); + + // Get tree entries + const tree = session.sessionManager.getTree(); + expect(tree.length).toBe(1); + + // Find the first user entry (u1) + const rootNode = tree[0]; + expect(rootNode.entry.type).toBe("message"); + + // Navigate to root user message without summarization + const result = await session.navigateTree(rootNode.entry.id, { summarize: false }); + + expect(result.cancelled).toBe(false); + expect(result.editorText).toBe("First message"); + + // After navigating to root user message, leaf should be null (empty conversation) + expect(session.sessionManager.getLeafId()).toBeNull(); + }, 60000); + + it("should navigate to non-user message without editor text", async () => { + const { session, sessionManager } = ctx; + + // Build conversation + await session.prompt("Hello"); + await session.agent.waitForIdle(); + + // Get the assistant message + const entries = sessionManager.getEntries(); + const assistantEntry = entries.find((e) => e.type === "message" && e.message.role === "assistant"); + expect(assistantEntry).toBeDefined(); + + // Navigate to assistant message + const result = await session.navigateTree(assistantEntry!.id, { summarize: false }); + + expect(result.cancelled).toBe(false); + expect(result.editorText).toBeUndefined(); + + // Leaf should be the assistant entry + expect(sessionManager.getLeafId()).toBe(assistantEntry!.id); + }, 60000); + + it("should create branch summary when navigating with summarize=true", async () => { + const { session, sessionManager } = ctx; + + // Build conversation: u1 -> a1 -> u2 -> a2 + await session.prompt("What is 2+2?"); + await session.agent.waitForIdle(); + await session.prompt("What is 3+3?"); + await session.agent.waitForIdle(); + + // Get tree and find first user message + const tree = sessionManager.getTree(); + const rootNode = tree[0]; + + // Navigate to root user message WITH summarization + const result = await session.navigateTree(rootNode.entry.id, { summarize: true }); + + expect(result.cancelled).toBe(false); + expect(result.editorText).toBe("What is 2+2?"); + expect(result.summaryEntry).toBeDefined(); + expect(result.summaryEntry?.type).toBe("branch_summary"); + expect(result.summaryEntry?.summary).toBeTruthy(); + expect(result.summaryEntry?.summary.length).toBeGreaterThan(0); + + // Summary should be a root entry (parentId = null) since we navigated to root user + expect(result.summaryEntry?.parentId).toBeNull(); + + // Leaf should be the summary entry + expect(sessionManager.getLeafId()).toBe(result.summaryEntry?.id); + }, 120000); + + it("should attach summary to correct parent when navigating to nested user message", async () => { + const { session, sessionManager } = ctx; + + // Build conversation: u1 -> a1 -> u2 -> a2 -> u3 -> a3 + await session.prompt("Message one"); + await session.agent.waitForIdle(); + await session.prompt("Message two"); + await session.agent.waitForIdle(); + await session.prompt("Message three"); + await session.agent.waitForIdle(); + + // Get the second user message (u2) + const entries = sessionManager.getEntries(); + const userEntries = entries.filter((e) => e.type === "message" && e.message.role === "user"); + expect(userEntries.length).toBe(3); + + const u2 = userEntries[1]; + const a1 = entries.find((e) => e.id === u2.parentId); // a1 is parent of u2 + + // Navigate to u2 with summarization + const result = await session.navigateTree(u2.id, { summarize: true }); + + expect(result.cancelled).toBe(false); + expect(result.editorText).toBe("Message two"); + expect(result.summaryEntry).toBeDefined(); + + // Summary should be attached to a1 (parent of u2) + // So a1 now has two children: u2 and the summary + expect(result.summaryEntry?.parentId).toBe(a1?.id); + + // Verify tree structure + const children = sessionManager.getChildren(a1!.id); + expect(children.length).toBe(2); + + const childTypes = children.map((c) => c.type).sort(); + expect(childTypes).toContain("branch_summary"); + expect(childTypes).toContain("message"); + }, 120000); + + it("should attach summary to selected node when navigating to assistant message", async () => { + const { session, sessionManager } = ctx; + + // Build conversation: u1 -> a1 -> u2 -> a2 + await session.prompt("Hello"); + await session.agent.waitForIdle(); + await session.prompt("Goodbye"); + await session.agent.waitForIdle(); + + // Get the first assistant message (a1) + const entries = sessionManager.getEntries(); + const assistantEntries = entries.filter((e) => e.type === "message" && e.message.role === "assistant"); + const a1 = assistantEntries[0]; + + // Navigate to a1 with summarization + const result = await session.navigateTree(a1.id, { summarize: true }); + + expect(result.cancelled).toBe(false); + expect(result.editorText).toBeUndefined(); // No editor text for assistant messages + expect(result.summaryEntry).toBeDefined(); + + // Summary should be attached to a1 (the selected node) + expect(result.summaryEntry?.parentId).toBe(a1.id); + + // Leaf should be the summary entry + expect(sessionManager.getLeafId()).toBe(result.summaryEntry?.id); + }, 120000); + + it("should handle abort during summarization", async () => { + const { session, sessionManager } = ctx; + + // Build conversation + await session.prompt("Tell me about something"); + await session.agent.waitForIdle(); + await session.prompt("Continue"); + await session.agent.waitForIdle(); + + const entriesBefore = sessionManager.getEntries(); + const leafBefore = sessionManager.getLeafId(); + + // Get root user message + const tree = sessionManager.getTree(); + const rootNode = tree[0]; + + // Start navigation with summarization but abort immediately + const navigationPromise = session.navigateTree(rootNode.entry.id, { summarize: true }); + + // Abort after a short delay (let the LLM call start) + await new Promise((resolve) => setTimeout(resolve, 100)); + session.abortBranchSummary(); + + const result = await navigationPromise; + + expect(result.cancelled).toBe(true); + expect(result.aborted).toBe(true); + expect(result.summaryEntry).toBeUndefined(); + + // Session should be unchanged + const entriesAfter = sessionManager.getEntries(); + expect(entriesAfter.length).toBe(entriesBefore.length); + expect(sessionManager.getLeafId()).toBe(leafBefore); + }, 60000); + + it("should not create summary when navigating without summarize option", async () => { + const { session, sessionManager } = ctx; + + // Build conversation + await session.prompt("First"); + await session.agent.waitForIdle(); + await session.prompt("Second"); + await session.agent.waitForIdle(); + + const entriesBefore = sessionManager.getEntries().length; + + // Navigate without summarization + const tree = sessionManager.getTree(); + await session.navigateTree(tree[0].entry.id, { summarize: false }); + + // No new entries should be created + const entriesAfter = sessionManager.getEntries().length; + expect(entriesAfter).toBe(entriesBefore); + + // No branch_summary entries + const summaries = sessionManager.getEntries().filter((e) => e.type === "branch_summary"); + expect(summaries.length).toBe(0); + }, 60000); + + it("should handle navigation to same position (no-op)", async () => { + const { session, sessionManager } = ctx; + + // Build conversation + await session.prompt("Hello"); + await session.agent.waitForIdle(); + + const leafBefore = sessionManager.getLeafId(); + expect(leafBefore).toBeTruthy(); + const entriesBefore = sessionManager.getEntries().length; + + // Navigate to current leaf + const result = await session.navigateTree(leafBefore!, { summarize: false }); + + expect(result.cancelled).toBe(false); + expect(sessionManager.getLeafId()).toBe(leafBefore); + expect(sessionManager.getEntries().length).toBe(entriesBefore); + }, 60000); + + it("should support custom summarization instructions", async () => { + const { session, sessionManager } = ctx; + + // Build conversation + await session.prompt("What is TypeScript?"); + await session.agent.waitForIdle(); + + // Navigate with custom instructions + const tree = sessionManager.getTree(); + const result = await session.navigateTree(tree[0].entry.id, { + summarize: true, + customInstructions: "Summarize in exactly 3 words.", + }); + + expect(result.summaryEntry).toBeDefined(); + expect(result.summaryEntry?.summary).toBeTruthy(); + // Can't reliably test 3 words exactly, but summary should be short + expect(result.summaryEntry?.summary.split(/\s+/).length).toBeLessThan(20); + }, 120000); +}); + +describe.skipIf(!API_KEY)("AgentSession tree navigation - branch scenarios", () => { + let ctx: TestSessionContext; + + beforeEach(() => { + ctx = createTestSession({ + systemPrompt: "You are a helpful assistant. Reply with just a few words.", + }); + }); + + afterEach(() => { + ctx.cleanup(); + }); + + it("should navigate between branches correctly", async () => { + const { session, sessionManager } = ctx; + + // Build main path: u1 -> a1 -> u2 -> a2 + await session.prompt("Main branch start"); + await session.agent.waitForIdle(); + await session.prompt("Main branch continue"); + await session.agent.waitForIdle(); + + // Get a1 id for branching + const entries = sessionManager.getEntries(); + const a1 = entries.find((e) => e.type === "message" && e.message.role === "assistant"); + + // Create a branch from a1: a1 -> u3 -> a3 + sessionManager.branch(a1!.id); + await session.prompt("Branch path"); + await session.agent.waitForIdle(); + + // Now navigate back to u2 (on main branch) with summarization + const userEntries = entries.filter((e) => e.type === "message" && e.message.role === "user"); + const u2 = userEntries[1]; // "Main branch continue" + + const result = await session.navigateTree(u2.id, { summarize: true }); + + expect(result.cancelled).toBe(false); + expect(result.editorText).toBe("Main branch continue"); + expect(result.summaryEntry).toBeDefined(); + + // Summary captures the branch we're leaving (the "Branch path" conversation) + expect(result.summaryEntry?.summary.length).toBeGreaterThan(0); + }, 180000); +}); diff --git a/packages/coding-agent/test/session-manager/tree-traversal.test.ts b/packages/coding-agent/test/session-manager/tree-traversal.test.ts index 93715ed6..65193479 100644 --- a/packages/coding-agent/test/session-manager/tree-traversal.test.ts +++ b/packages/coding-agent/test/session-manager/tree-traversal.test.ts @@ -1,29 +1,6 @@ import { describe, expect, it } from "vitest"; import { type CustomEntry, SessionManager } from "../../src/core/session-manager.js"; - -function userMsg(text: string) { - return { role: "user" as const, content: text, timestamp: Date.now() }; -} - -function assistantMsg(text: string) { - return { - role: "assistant" as const, - content: [{ type: "text" as const, text }], - api: "anthropic-messages" as const, - provider: "anthropic", - model: "test", - usage: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 2, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: "stop" as const, - timestamp: Date.now(), - }; -} +import { assistantMsg, userMsg } from "../utilities.js"; describe("SessionManager append and tree traversal", () => { describe("append operations", () => { diff --git a/packages/coding-agent/test/utilities.ts b/packages/coding-agent/test/utilities.ts new file mode 100644 index 00000000..3fe5d4cc --- /dev/null +++ b/packages/coding-agent/test/utilities.ts @@ -0,0 +1,158 @@ +/** + * Shared test utilities for coding-agent tests. + */ + +import { existsSync, mkdirSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { Agent } from "@mariozechner/pi-agent-core"; +import { getModel } from "@mariozechner/pi-ai"; +import { AgentSession } from "../src/core/agent-session.js"; +import { AuthStorage } from "../src/core/auth-storage.js"; +import { ModelRegistry } from "../src/core/model-registry.js"; +import { SessionManager } from "../src/core/session-manager.js"; +import { SettingsManager } from "../src/core/settings-manager.js"; +import { codingTools } from "../src/core/tools/index.js"; + +/** + * API key for authenticated tests. Tests using this should be wrapped in + * describe.skipIf(!API_KEY) + */ +export const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY; + +/** + * Create a minimal user message for testing. + */ +export function userMsg(text: string) { + return { role: "user" as const, content: text, timestamp: Date.now() }; +} + +/** + * Create a minimal assistant message for testing. + */ +export function assistantMsg(text: string) { + return { + role: "assistant" as const, + content: [{ type: "text" as const, text }], + api: "anthropic-messages" as const, + provider: "anthropic", + model: "test", + usage: { + input: 1, + output: 1, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 2, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop" as const, + timestamp: Date.now(), + }; +} + +/** + * Options for creating a test session. + */ +export interface TestSessionOptions { + /** Use in-memory session (no file persistence) */ + inMemory?: boolean; + /** Custom system prompt */ + systemPrompt?: string; + /** Custom settings overrides */ + settingsOverrides?: Record; +} + +/** + * Resources returned by createTestSession that need cleanup. + */ +export interface TestSessionContext { + session: AgentSession; + sessionManager: SessionManager; + tempDir: string; + cleanup: () => void; +} + +/** + * Create an AgentSession for testing with proper setup and cleanup. + * Use this for e2e tests that need real LLM calls. + */ +export function createTestSession(options: TestSessionOptions = {}): TestSessionContext { + const tempDir = join(tmpdir(), `pi-test-${Date.now()}-${Math.random().toString(36).slice(2)}`); + mkdirSync(tempDir, { recursive: true }); + + const model = getModel("anthropic", "claude-sonnet-4-5")!; + const agent = new Agent({ + getApiKey: () => API_KEY, + initialState: { + model, + systemPrompt: options.systemPrompt ?? "You are a helpful assistant. Be extremely concise.", + tools: codingTools, + }, + }); + + const sessionManager = options.inMemory ? SessionManager.inMemory() : SessionManager.create(tempDir); + const settingsManager = SettingsManager.create(tempDir, tempDir); + + if (options.settingsOverrides) { + settingsManager.applyOverrides(options.settingsOverrides); + } + + const authStorage = new AuthStorage(join(tempDir, "auth.json")); + const modelRegistry = new ModelRegistry(authStorage, tempDir); + + const session = new AgentSession({ + agent, + sessionManager, + settingsManager, + modelRegistry, + }); + + // Must subscribe to enable session persistence + session.subscribe(() => {}); + + const cleanup = () => { + session.dispose(); + if (tempDir && existsSync(tempDir)) { + rmSync(tempDir, { recursive: true }); + } + }; + + return { session, sessionManager, tempDir, cleanup }; +} + +/** + * Build a session tree for testing using SessionManager. + * Returns the IDs of all created entries. + * + * Example tree structure: + * ``` + * u1 -> a1 -> u2 -> a2 + * -> u3 -> a3 (branch from a1) + * u4 -> a4 (another root) + * ``` + */ +export function buildTestTree( + session: SessionManager, + structure: { + messages: Array<{ role: "user" | "assistant"; text: string; branchFrom?: string }>; + }, +): Map { + const ids = new Map(); + + for (const msg of structure.messages) { + if (msg.branchFrom) { + const branchFromId = ids.get(msg.branchFrom); + if (!branchFromId) { + throw new Error(`Cannot branch from unknown entry: ${msg.branchFrom}`); + } + session.branch(branchFromId); + } + + const id = + msg.role === "user" ? session.appendMessage(userMsg(msg.text)) : session.appendMessage(assistantMsg(msg.text)); + + ids.set(msg.text, id); + } + + return ids; +}