Add tree navigation tests and shared test utilities

- Add test/utilities.ts with shared helpers (API_KEY, userMsg, assistantMsg, createTestSession)
- Add agent-session-tree-navigation.test.ts with e2e tests for tree navigation
- Add getChildren() method to SessionManager
- Add summaryEntry to navigateTree return type
- Update existing tests to use shared utilities
This commit is contained in:
Mario Zechner 2025-12-29 20:30:13 +01:00
parent 01dae9ebcc
commit 31c5cd38d1
7 changed files with 494 additions and 30 deletions

View file

@ -19,8 +19,7 @@ import { ModelRegistry } from "../src/core/model-registry.js";
import { SessionManager } from "../src/core/session-manager.js";
import { SettingsManager } from "../src/core/settings-manager.js";
import { codingTools } from "../src/core/tools/index.js";
const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
import { API_KEY } from "./utilities.js";
describe.skipIf(!API_KEY)("AgentSession branching", () => {
let session: AgentSession;

View file

@ -19,8 +19,7 @@ import { ModelRegistry } from "../src/core/model-registry.js";
import { SessionManager } from "../src/core/session-manager.js";
import { SettingsManager } from "../src/core/settings-manager.js";
import { codingTools } from "../src/core/tools/index.js";
const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
import { API_KEY } from "./utilities.js";
describe.skipIf(!API_KEY)("AgentSession compaction e2e", () => {
let session: AgentSession;

View file

@ -0,0 +1,318 @@
/**
* E2E tests for AgentSession tree navigation with branch summarization.
*
* These tests verify:
* - Navigation to user messages (root and non-root)
* - Navigation to non-user messages
* - Branch summarization during navigation
* - Summary attachment at correct position in tree
* - Abort handling during summarization
*/
import { afterEach, beforeEach, describe, expect, it } from "vitest";
import { API_KEY, createTestSession, type TestSessionContext } from "./utilities.js";
describe.skipIf(!API_KEY)("AgentSession tree navigation e2e", () => {
let ctx: TestSessionContext;
beforeEach(() => {
ctx = createTestSession({
systemPrompt: "You are a helpful assistant. Reply with just a few words.",
settingsOverrides: { compaction: { keepRecentTokens: 1 } },
});
});
afterEach(() => {
ctx.cleanup();
});
it("should navigate to user message and put text in editor", async () => {
const { session } = ctx;
// Build conversation: u1 -> a1 -> u2 -> a2
await session.prompt("First message");
await session.agent.waitForIdle();
await session.prompt("Second message");
await session.agent.waitForIdle();
// Get tree entries
const tree = session.sessionManager.getTree();
expect(tree.length).toBe(1);
// Find the first user entry (u1)
const rootNode = tree[0];
expect(rootNode.entry.type).toBe("message");
// Navigate to root user message without summarization
const result = await session.navigateTree(rootNode.entry.id, { summarize: false });
expect(result.cancelled).toBe(false);
expect(result.editorText).toBe("First message");
// After navigating to root user message, leaf should be null (empty conversation)
expect(session.sessionManager.getLeafId()).toBeNull();
}, 60000);
it("should navigate to non-user message without editor text", async () => {
const { session, sessionManager } = ctx;
// Build conversation
await session.prompt("Hello");
await session.agent.waitForIdle();
// Get the assistant message
const entries = sessionManager.getEntries();
const assistantEntry = entries.find((e) => e.type === "message" && e.message.role === "assistant");
expect(assistantEntry).toBeDefined();
// Navigate to assistant message
const result = await session.navigateTree(assistantEntry!.id, { summarize: false });
expect(result.cancelled).toBe(false);
expect(result.editorText).toBeUndefined();
// Leaf should be the assistant entry
expect(sessionManager.getLeafId()).toBe(assistantEntry!.id);
}, 60000);
it("should create branch summary when navigating with summarize=true", async () => {
const { session, sessionManager } = ctx;
// Build conversation: u1 -> a1 -> u2 -> a2
await session.prompt("What is 2+2?");
await session.agent.waitForIdle();
await session.prompt("What is 3+3?");
await session.agent.waitForIdle();
// Get tree and find first user message
const tree = sessionManager.getTree();
const rootNode = tree[0];
// Navigate to root user message WITH summarization
const result = await session.navigateTree(rootNode.entry.id, { summarize: true });
expect(result.cancelled).toBe(false);
expect(result.editorText).toBe("What is 2+2?");
expect(result.summaryEntry).toBeDefined();
expect(result.summaryEntry?.type).toBe("branch_summary");
expect(result.summaryEntry?.summary).toBeTruthy();
expect(result.summaryEntry?.summary.length).toBeGreaterThan(0);
// Summary should be a root entry (parentId = null) since we navigated to root user
expect(result.summaryEntry?.parentId).toBeNull();
// Leaf should be the summary entry
expect(sessionManager.getLeafId()).toBe(result.summaryEntry?.id);
}, 120000);
it("should attach summary to correct parent when navigating to nested user message", async () => {
const { session, sessionManager } = ctx;
// Build conversation: u1 -> a1 -> u2 -> a2 -> u3 -> a3
await session.prompt("Message one");
await session.agent.waitForIdle();
await session.prompt("Message two");
await session.agent.waitForIdle();
await session.prompt("Message three");
await session.agent.waitForIdle();
// Get the second user message (u2)
const entries = sessionManager.getEntries();
const userEntries = entries.filter((e) => e.type === "message" && e.message.role === "user");
expect(userEntries.length).toBe(3);
const u2 = userEntries[1];
const a1 = entries.find((e) => e.id === u2.parentId); // a1 is parent of u2
// Navigate to u2 with summarization
const result = await session.navigateTree(u2.id, { summarize: true });
expect(result.cancelled).toBe(false);
expect(result.editorText).toBe("Message two");
expect(result.summaryEntry).toBeDefined();
// Summary should be attached to a1 (parent of u2)
// So a1 now has two children: u2 and the summary
expect(result.summaryEntry?.parentId).toBe(a1?.id);
// Verify tree structure
const children = sessionManager.getChildren(a1!.id);
expect(children.length).toBe(2);
const childTypes = children.map((c) => c.type).sort();
expect(childTypes).toContain("branch_summary");
expect(childTypes).toContain("message");
}, 120000);
it("should attach summary to selected node when navigating to assistant message", async () => {
const { session, sessionManager } = ctx;
// Build conversation: u1 -> a1 -> u2 -> a2
await session.prompt("Hello");
await session.agent.waitForIdle();
await session.prompt("Goodbye");
await session.agent.waitForIdle();
// Get the first assistant message (a1)
const entries = sessionManager.getEntries();
const assistantEntries = entries.filter((e) => e.type === "message" && e.message.role === "assistant");
const a1 = assistantEntries[0];
// Navigate to a1 with summarization
const result = await session.navigateTree(a1.id, { summarize: true });
expect(result.cancelled).toBe(false);
expect(result.editorText).toBeUndefined(); // No editor text for assistant messages
expect(result.summaryEntry).toBeDefined();
// Summary should be attached to a1 (the selected node)
expect(result.summaryEntry?.parentId).toBe(a1.id);
// Leaf should be the summary entry
expect(sessionManager.getLeafId()).toBe(result.summaryEntry?.id);
}, 120000);
it("should handle abort during summarization", async () => {
const { session, sessionManager } = ctx;
// Build conversation
await session.prompt("Tell me about something");
await session.agent.waitForIdle();
await session.prompt("Continue");
await session.agent.waitForIdle();
const entriesBefore = sessionManager.getEntries();
const leafBefore = sessionManager.getLeafId();
// Get root user message
const tree = sessionManager.getTree();
const rootNode = tree[0];
// Start navigation with summarization but abort immediately
const navigationPromise = session.navigateTree(rootNode.entry.id, { summarize: true });
// Abort after a short delay (let the LLM call start)
await new Promise((resolve) => setTimeout(resolve, 100));
session.abortBranchSummary();
const result = await navigationPromise;
expect(result.cancelled).toBe(true);
expect(result.aborted).toBe(true);
expect(result.summaryEntry).toBeUndefined();
// Session should be unchanged
const entriesAfter = sessionManager.getEntries();
expect(entriesAfter.length).toBe(entriesBefore.length);
expect(sessionManager.getLeafId()).toBe(leafBefore);
}, 60000);
it("should not create summary when navigating without summarize option", async () => {
const { session, sessionManager } = ctx;
// Build conversation
await session.prompt("First");
await session.agent.waitForIdle();
await session.prompt("Second");
await session.agent.waitForIdle();
const entriesBefore = sessionManager.getEntries().length;
// Navigate without summarization
const tree = sessionManager.getTree();
await session.navigateTree(tree[0].entry.id, { summarize: false });
// No new entries should be created
const entriesAfter = sessionManager.getEntries().length;
expect(entriesAfter).toBe(entriesBefore);
// No branch_summary entries
const summaries = sessionManager.getEntries().filter((e) => e.type === "branch_summary");
expect(summaries.length).toBe(0);
}, 60000);
it("should handle navigation to same position (no-op)", async () => {
const { session, sessionManager } = ctx;
// Build conversation
await session.prompt("Hello");
await session.agent.waitForIdle();
const leafBefore = sessionManager.getLeafId();
expect(leafBefore).toBeTruthy();
const entriesBefore = sessionManager.getEntries().length;
// Navigate to current leaf
const result = await session.navigateTree(leafBefore!, { summarize: false });
expect(result.cancelled).toBe(false);
expect(sessionManager.getLeafId()).toBe(leafBefore);
expect(sessionManager.getEntries().length).toBe(entriesBefore);
}, 60000);
it("should support custom summarization instructions", async () => {
const { session, sessionManager } = ctx;
// Build conversation
await session.prompt("What is TypeScript?");
await session.agent.waitForIdle();
// Navigate with custom instructions
const tree = sessionManager.getTree();
const result = await session.navigateTree(tree[0].entry.id, {
summarize: true,
customInstructions: "Summarize in exactly 3 words.",
});
expect(result.summaryEntry).toBeDefined();
expect(result.summaryEntry?.summary).toBeTruthy();
// Can't reliably test 3 words exactly, but summary should be short
expect(result.summaryEntry?.summary.split(/\s+/).length).toBeLessThan(20);
}, 120000);
});
describe.skipIf(!API_KEY)("AgentSession tree navigation - branch scenarios", () => {
let ctx: TestSessionContext;
beforeEach(() => {
ctx = createTestSession({
systemPrompt: "You are a helpful assistant. Reply with just a few words.",
});
});
afterEach(() => {
ctx.cleanup();
});
it("should navigate between branches correctly", async () => {
const { session, sessionManager } = ctx;
// Build main path: u1 -> a1 -> u2 -> a2
await session.prompt("Main branch start");
await session.agent.waitForIdle();
await session.prompt("Main branch continue");
await session.agent.waitForIdle();
// Get a1 id for branching
const entries = sessionManager.getEntries();
const a1 = entries.find((e) => e.type === "message" && e.message.role === "assistant");
// Create a branch from a1: a1 -> u3 -> a3
sessionManager.branch(a1!.id);
await session.prompt("Branch path");
await session.agent.waitForIdle();
// Now navigate back to u2 (on main branch) with summarization
const userEntries = entries.filter((e) => e.type === "message" && e.message.role === "user");
const u2 = userEntries[1]; // "Main branch continue"
const result = await session.navigateTree(u2.id, { summarize: true });
expect(result.cancelled).toBe(false);
expect(result.editorText).toBe("Main branch continue");
expect(result.summaryEntry).toBeDefined();
// Summary captures the branch we're leaving (the "Branch path" conversation)
expect(result.summaryEntry?.summary.length).toBeGreaterThan(0);
}, 180000);
});

View file

@ -1,29 +1,6 @@
import { describe, expect, it } from "vitest";
import { type CustomEntry, SessionManager } from "../../src/core/session-manager.js";
function userMsg(text: string) {
return { role: "user" as const, content: text, timestamp: Date.now() };
}
function assistantMsg(text: string) {
return {
role: "assistant" as const,
content: [{ type: "text" as const, text }],
api: "anthropic-messages" as const,
provider: "anthropic",
model: "test",
usage: {
input: 1,
output: 1,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 2,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop" as const,
timestamp: Date.now(),
};
}
import { assistantMsg, userMsg } from "../utilities.js";
describe("SessionManager append and tree traversal", () => {
describe("append operations", () => {

View file

@ -0,0 +1,158 @@
/**
* Shared test utilities for coding-agent tests.
*/
import { existsSync, mkdirSync, rmSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { Agent } from "@mariozechner/pi-agent-core";
import { getModel } from "@mariozechner/pi-ai";
import { AgentSession } from "../src/core/agent-session.js";
import { AuthStorage } from "../src/core/auth-storage.js";
import { ModelRegistry } from "../src/core/model-registry.js";
import { SessionManager } from "../src/core/session-manager.js";
import { SettingsManager } from "../src/core/settings-manager.js";
import { codingTools } from "../src/core/tools/index.js";
/**
* API key for authenticated tests. Tests using this should be wrapped in
* describe.skipIf(!API_KEY)
*/
export const API_KEY = process.env.ANTHROPIC_OAUTH_TOKEN || process.env.ANTHROPIC_API_KEY;
/**
* Create a minimal user message for testing.
*/
export function userMsg(text: string) {
return { role: "user" as const, content: text, timestamp: Date.now() };
}
/**
* Create a minimal assistant message for testing.
*/
export function assistantMsg(text: string) {
return {
role: "assistant" as const,
content: [{ type: "text" as const, text }],
api: "anthropic-messages" as const,
provider: "anthropic",
model: "test",
usage: {
input: 1,
output: 1,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 2,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop" as const,
timestamp: Date.now(),
};
}
/**
* Options for creating a test session.
*/
export interface TestSessionOptions {
/** Use in-memory session (no file persistence) */
inMemory?: boolean;
/** Custom system prompt */
systemPrompt?: string;
/** Custom settings overrides */
settingsOverrides?: Record<string, unknown>;
}
/**
* Resources returned by createTestSession that need cleanup.
*/
export interface TestSessionContext {
session: AgentSession;
sessionManager: SessionManager;
tempDir: string;
cleanup: () => void;
}
/**
* Create an AgentSession for testing with proper setup and cleanup.
* Use this for e2e tests that need real LLM calls.
*/
export function createTestSession(options: TestSessionOptions = {}): TestSessionContext {
const tempDir = join(tmpdir(), `pi-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
mkdirSync(tempDir, { recursive: true });
const model = getModel("anthropic", "claude-sonnet-4-5")!;
const agent = new Agent({
getApiKey: () => API_KEY,
initialState: {
model,
systemPrompt: options.systemPrompt ?? "You are a helpful assistant. Be extremely concise.",
tools: codingTools,
},
});
const sessionManager = options.inMemory ? SessionManager.inMemory() : SessionManager.create(tempDir);
const settingsManager = SettingsManager.create(tempDir, tempDir);
if (options.settingsOverrides) {
settingsManager.applyOverrides(options.settingsOverrides);
}
const authStorage = new AuthStorage(join(tempDir, "auth.json"));
const modelRegistry = new ModelRegistry(authStorage, tempDir);
const session = new AgentSession({
agent,
sessionManager,
settingsManager,
modelRegistry,
});
// Must subscribe to enable session persistence
session.subscribe(() => {});
const cleanup = () => {
session.dispose();
if (tempDir && existsSync(tempDir)) {
rmSync(tempDir, { recursive: true });
}
};
return { session, sessionManager, tempDir, cleanup };
}
/**
* Build a session tree for testing using SessionManager.
* Returns the IDs of all created entries.
*
* Example tree structure:
* ```
* u1 -> a1 -> u2 -> a2
* -> u3 -> a3 (branch from a1)
* u4 -> a4 (another root)
* ```
*/
export function buildTestTree(
session: SessionManager,
structure: {
messages: Array<{ role: "user" | "assistant"; text: string; branchFrom?: string }>;
},
): Map<string, string> {
const ids = new Map<string, string>();
for (const msg of structure.messages) {
if (msg.branchFrom) {
const branchFromId = ids.get(msg.branchFrom);
if (!branchFromId) {
throw new Error(`Cannot branch from unknown entry: ${msg.branchFrom}`);
}
session.branch(branchFromId);
}
const id =
msg.role === "user" ? session.appendMessage(userMsg(msg.text)) : session.appendMessage(assistantMsg(msg.text));
ids.set(msg.text, id);
}
return ids;
}