Integrate OpenHandoff factory workspace (#212)

This commit is contained in:
Nathan Flurry 2026-03-09 14:00:20 -07:00 committed by GitHub
parent 3d9476ed0b
commit bf282199b5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
251 changed files with 42824 additions and 692 deletions

View file

@ -0,0 +1,197 @@
import { randomUUID } from "node:crypto";
import { describe, expect, it } from "vitest";
import type { HistoryEvent, RepoOverview } from "@openhandoff/shared";
import { createBackendClient } from "../../src/backend-client.js";
const RUN_FULL_E2E = process.env.HF_ENABLE_DAEMON_FULL_E2E === "1";
function requiredEnv(name: string): string {
const value = process.env[name]?.trim();
if (!value) {
throw new Error(`Missing required env var: ${name}`);
}
return value;
}
function parseGithubRepo(input: string): { fullName: string } {
const trimmed = input.trim();
const shorthand = trimmed.match(/^([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)$/);
if (shorthand) {
return { fullName: `${shorthand[1]}/${shorthand[2]}` };
}
const url = new URL(trimmed.startsWith("http") ? trimmed : `https://${trimmed}`);
const parts = url.pathname.replace(/^\/+/, "").split("/").filter(Boolean);
if (url.hostname.toLowerCase().includes("github.com") && parts.length >= 2) {
return { fullName: `${parts[0]}/${(parts[1] ?? "").replace(/\.git$/, "")}` };
}
throw new Error(`Unable to parse GitHub repo from: ${input}`);
}
async function sleep(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
async function poll<T>(
label: string,
timeoutMs: number,
intervalMs: number,
fn: () => Promise<T>,
isDone: (value: T) => boolean
): Promise<T> {
const start = Date.now();
let last: T;
for (;;) {
last = await fn();
if (isDone(last)) {
return last;
}
if (Date.now() - start > timeoutMs) {
throw new Error(`timed out waiting for ${label}`);
}
await sleep(intervalMs);
}
}
function parseHistoryPayload(event: HistoryEvent): Record<string, unknown> {
try {
return JSON.parse(event.payloadJson) as Record<string, unknown>;
} catch {
return {};
}
}
async function githubApi(token: string, path: string, init?: RequestInit): Promise<Response> {
const url = `https://api.github.com/${path.replace(/^\/+/, "")}`;
return await fetch(url, {
...init,
headers: {
Accept: "application/vnd.github+json",
Authorization: `Bearer ${token}`,
"X-GitHub-Api-Version": "2022-11-28",
...(init?.headers ?? {}),
},
});
}
async function ensureRemoteBranchExists(
token: string,
fullName: string,
branchName: string
): Promise<void> {
const repoRes = await githubApi(token, `repos/${fullName}`, { method: "GET" });
if (!repoRes.ok) {
throw new Error(`GitHub repo lookup failed: ${repoRes.status} ${await repoRes.text()}`);
}
const repo = (await repoRes.json()) as { default_branch?: string };
const defaultBranch = repo.default_branch;
if (!defaultBranch) {
throw new Error(`GitHub repo default branch is missing for ${fullName}`);
}
const defaultRefRes = await githubApi(
token,
`repos/${fullName}/git/ref/heads/${encodeURIComponent(defaultBranch)}`,
{ method: "GET" }
);
if (!defaultRefRes.ok) {
throw new Error(`GitHub default ref lookup failed: ${defaultRefRes.status} ${await defaultRefRes.text()}`);
}
const defaultRef = (await defaultRefRes.json()) as { object?: { sha?: string } };
const sha = defaultRef.object?.sha;
if (!sha) {
throw new Error(`GitHub default ref sha missing for ${fullName}:${defaultBranch}`);
}
const createRefRes = await githubApi(token, `repos/${fullName}/git/refs`, {
method: "POST",
body: JSON.stringify({
ref: `refs/heads/${branchName}`,
sha,
}),
headers: { "Content-Type": "application/json" },
});
if (createRefRes.ok || createRefRes.status === 422) {
return;
}
throw new Error(`GitHub create ref failed: ${createRefRes.status} ${await createRefRes.text()}`);
}
describe("e2e(client): full integration stack workflow", () => {
it.skipIf(!RUN_FULL_E2E)(
"adds repo, loads branch graph, and executes a stack restack action",
{ timeout: 8 * 60_000 },
async () => {
const endpoint =
process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const githubToken = requiredEnv("GITHUB_TOKEN");
const { fullName } = parseGithubRepo(repoRemote);
const normalizedRepoRemote = `https://github.com/${fullName}.git`;
const seededBranch = `e2e/full-seed-${Date.now().toString(36)}-${randomUUID().slice(0, 8)}`;
const client = createBackendClient({
endpoint,
defaultWorkspaceId: workspaceId,
});
try {
await ensureRemoteBranchExists(githubToken, fullName, seededBranch);
const repo = await client.addRepo(workspaceId, repoRemote);
expect(repo.remoteUrl).toBe(normalizedRepoRemote);
const overview = await poll<RepoOverview>(
"repo overview includes seeded branch",
90_000,
1_000,
async () => client.getRepoOverview(workspaceId, repo.repoId),
(value) => value.branches.some((row) => row.branchName === seededBranch)
);
if (!overview.stackAvailable) {
throw new Error(
"git-spice is unavailable for this repo during full integration e2e; set HF_GIT_SPICE_BIN or install git-spice in the backend container"
);
}
const stackResult = await client.runRepoStackAction({
workspaceId,
repoId: repo.repoId,
action: "restack_repo",
});
expect(stackResult.executed).toBe(true);
expect(stackResult.action).toBe("restack_repo");
await poll<HistoryEvent[]>(
"repo stack action history event",
60_000,
1_000,
async () => client.listHistory({ workspaceId, limit: 200 }),
(events) =>
events.some((event) => {
if (event.kind !== "repo.stack_action") {
return false;
}
const payload = parseHistoryPayload(event);
return payload.action === "restack_repo";
})
);
const postActionOverview = await client.getRepoOverview(workspaceId, repo.repoId);
const seededRow = postActionOverview.branches.find((row) => row.branchName === seededBranch);
expect(Boolean(seededRow)).toBe(true);
expect(postActionOverview.fetchedAt).toBeGreaterThan(overview.fetchedAt);
} finally {
await githubApi(
githubToken,
`repos/${fullName}/git/refs/heads/${encodeURIComponent(seededBranch)}`,
{ method: "DELETE" }
).catch(() => {});
}
}
);
});

View file

@ -0,0 +1,351 @@
import { describe, expect, it } from "vitest";
import type { HandoffRecord, HistoryEvent } from "@openhandoff/shared";
import { createBackendClient } from "../../src/backend-client.js";
const RUN_E2E = process.env.HF_ENABLE_DAEMON_E2E === "1";
function requiredEnv(name: string): string {
const value = process.env[name]?.trim();
if (!value) {
throw new Error(`Missing required env var: ${name}`);
}
return value;
}
function parseGithubRepo(input: string): { owner: string; repo: string; fullName: string } {
const trimmed = input.trim();
if (!trimmed) {
throw new Error("HF_E2E_GITHUB_REPO is empty");
}
// owner/repo shorthand
const shorthand = trimmed.match(/^([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)$/);
if (shorthand) {
const owner = shorthand[1]!;
const repo = shorthand[2]!;
return { owner, repo, fullName: `${owner}/${repo}` };
}
// https://github.com/owner/repo(.git)?(/...)?
try {
const url = new URL(trimmed.startsWith("http") ? trimmed : `https://${trimmed}`);
const parts = url.pathname.replace(/^\/+/, "").split("/").filter(Boolean);
if (url.hostname.toLowerCase().includes("github.com") && parts.length >= 2) {
const owner = parts[0]!;
const repo = (parts[1] ?? "").replace(/\.git$/, "");
if (owner && repo) {
return { owner, repo, fullName: `${owner}/${repo}` };
}
}
} catch {
// fall through
}
throw new Error(`Unable to parse GitHub repo from: ${input}`);
}
async function sleep(ms: number): Promise<void> {
await new Promise((r) => setTimeout(r, ms));
}
async function poll<T>(
label: string,
timeoutMs: number,
intervalMs: number,
fn: () => Promise<T>,
isDone: (value: T) => boolean,
onTick?: (value: T) => void
): Promise<T> {
const start = Date.now();
let last: T;
for (;;) {
last = await fn();
onTick?.(last);
if (isDone(last)) {
return last;
}
if (Date.now() - start > timeoutMs) {
throw new Error(`timed out waiting for ${label}`);
}
await sleep(intervalMs);
}
}
function parseHistoryPayload(event: HistoryEvent): Record<string, unknown> {
try {
return JSON.parse(event.payloadJson) as Record<string, unknown>;
} catch {
return {};
}
}
async function debugDump(client: ReturnType<typeof createBackendClient>, workspaceId: string, handoffId: string): Promise<string> {
try {
const handoff = await client.getHandoff(workspaceId, handoffId);
const history = await client.listHistory({ workspaceId, handoffId, limit: 80 }).catch(() => []);
const historySummary = history
.slice(0, 20)
.map((e) => `${new Date(e.createdAt).toISOString()} ${e.kind}`)
.join("\n");
let sessionEventsSummary = "";
if (handoff.activeSandboxId && handoff.activeSessionId) {
const events = await client
.listSandboxSessionEvents(workspaceId, handoff.providerId, handoff.activeSandboxId, {
sessionId: handoff.activeSessionId,
limit: 50,
})
.then((r) => r.items)
.catch(() => []);
sessionEventsSummary = events
.slice(-12)
.map((e) => `${new Date(e.createdAt).toISOString()} ${e.sender}`)
.join("\n");
}
return [
"=== handoff ===",
JSON.stringify(
{
status: handoff.status,
statusMessage: handoff.statusMessage,
title: handoff.title,
branchName: handoff.branchName,
activeSandboxId: handoff.activeSandboxId,
activeSessionId: handoff.activeSessionId,
prUrl: handoff.prUrl,
prSubmitted: handoff.prSubmitted,
},
null,
2
),
"=== history (most recent first) ===",
historySummary || "(none)",
"=== session events (tail) ===",
sessionEventsSummary || "(none)",
].join("\n");
} catch (err) {
return `debug dump failed: ${err instanceof Error ? err.message : String(err)}`;
}
}
async function githubApi(token: string, path: string, init?: RequestInit): Promise<Response> {
const url = `https://api.github.com/${path.replace(/^\/+/, "")}`;
return await fetch(url, {
...init,
headers: {
Accept: "application/vnd.github+json",
Authorization: `Bearer ${token}`,
"X-GitHub-Api-Version": "2022-11-28",
...(init?.headers ?? {}),
},
});
}
describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
it.skipIf(!RUN_E2E)(
"creates a handoff, waits for agent to implement, and opens a PR",
{ timeout: 15 * 60_000 },
async () => {
const endpoint =
process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const githubToken = requiredEnv("GITHUB_TOKEN");
const { fullName } = parseGithubRepo(repoRemote);
const runId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
const expectedFile = `e2e/${runId}.txt`;
const client = createBackendClient({
endpoint,
defaultWorkspaceId: workspaceId,
});
const repo = await client.addRepo(workspaceId, repoRemote);
const created = await client.createHandoff({
workspaceId,
repoId: repo.repoId,
task: [
"E2E test task:",
`1. Create a new file at ${expectedFile} containing the single line: ${runId}`,
"2. git add the file",
`3. git commit -m \"test(e2e): ${runId}\"`,
"4. git push the branch to origin",
"5. Stop when done (agent should go idle).",
].join("\n"),
providerId: "daytona",
explicitTitle: `test(e2e): ${runId}`,
explicitBranchName: `e2e/${runId}`,
});
let prNumber: number | null = null;
let branchName: string | null = null;
let sandboxId: string | null = null;
let sessionId: string | null = null;
let lastStatus: string | null = null;
try {
const namedAndProvisioned = await poll<HandoffRecord>(
"handoff naming + sandbox provisioning",
// Cold Daytona snapshot/image preparation can exceed 5 minutes on first run.
8 * 60_000,
1_000,
async () => client.getHandoff(workspaceId, created.handoffId),
(h) => Boolean(h.title && h.branchName && h.activeSandboxId),
(h) => {
if (h.status !== lastStatus) {
lastStatus = h.status;
}
if (h.status === "error") {
throw new Error("handoff entered error state during provisioning");
}
}
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.handoffId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
branchName = namedAndProvisioned.branchName!;
sandboxId = namedAndProvisioned.activeSandboxId!;
const withSession = await poll<HandoffRecord>(
"handoff to create active session",
3 * 60_000,
1_500,
async () => client.getHandoff(workspaceId, created.handoffId),
(h) => Boolean(h.activeSessionId),
(h) => {
if (h.status === "error") {
throw new Error("handoff entered error state while waiting for active session");
}
}
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.handoffId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
sessionId = withSession.activeSessionId!;
await poll<{ id: string }[]>(
"session transcript bootstrap events",
2 * 60_000,
2_000,
async () =>
(
await client.listSandboxSessionEvents(workspaceId, withSession.providerId, sandboxId!, {
sessionId: sessionId!,
limit: 40,
})
).items,
(events) => events.length > 0
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.handoffId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
await poll<HandoffRecord>(
"handoff to reach idle state",
8 * 60_000,
2_000,
async () => client.getHandoff(workspaceId, created.handoffId),
(h) => h.status === "idle",
(h) => {
if (h.status === "error") {
throw new Error("handoff entered error state while waiting for idle");
}
}
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.handoffId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
const prCreatedEvent = await poll<HistoryEvent[]>(
"PR creation history event",
3 * 60_000,
2_000,
async () => client.listHistory({ workspaceId, handoffId: created.handoffId, limit: 200 }),
(events) => events.some((e) => e.kind === "handoff.pr_created")
)
.catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.handoffId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
})
.then((events) => events.find((e) => e.kind === "handoff.pr_created")!);
const payload = parseHistoryPayload(prCreatedEvent);
prNumber = Number(payload.prNumber);
const prUrl = String(payload.prUrl ?? "");
expect(prNumber).toBeGreaterThan(0);
expect(prUrl).toContain("/pull/");
const prFilesRes = await githubApi(
githubToken,
`repos/${fullName}/pulls/${prNumber}/files?per_page=100`,
{ method: "GET" }
);
if (!prFilesRes.ok) {
const body = await prFilesRes.text();
throw new Error(`GitHub PR files request failed: ${prFilesRes.status} ${body}`);
}
const prFiles = (await prFilesRes.json()) as Array<{ filename: string }>;
expect(prFiles.some((f) => f.filename === expectedFile)).toBe(true);
// Close the handoff and assert the sandbox is released (stopped).
await client.runAction(workspaceId, created.handoffId, "archive");
await poll<HandoffRecord>(
"handoff to become archived (session released)",
60_000,
1_000,
async () => client.getHandoff(workspaceId, created.handoffId),
(h) => h.status === "archived" && h.activeSessionId === null
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.handoffId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
if (sandboxId) {
await poll<{ providerId: string; sandboxId: string; state: string; at: number }>(
"daytona sandbox to stop",
2 * 60_000,
2_000,
async () => client.sandboxProviderState(workspaceId, "daytona", sandboxId!),
(s) => {
const st = String(s.state).toLowerCase();
return st.includes("stopped") || st.includes("suspended") || st.includes("paused");
}
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.handoffId);
const state = await client
.sandboxProviderState(workspaceId, "daytona", sandboxId!)
.catch(() => null);
throw new Error(
`${err instanceof Error ? err.message : String(err)}\n` +
`sandbox state: ${state ? state.state : "unknown"}\n` +
`${dump}`
);
});
}
} finally {
if (prNumber && Number.isFinite(prNumber)) {
await githubApi(githubToken, `repos/${fullName}/pulls/${prNumber}`, {
method: "PATCH",
body: JSON.stringify({ state: "closed" }),
headers: { "Content-Type": "application/json" },
}).catch(() => {});
}
if (branchName) {
await githubApi(
githubToken,
`repos/${fullName}/git/refs/heads/${encodeURIComponent(branchName)}`,
{ method: "DELETE" }
).catch(() => {});
}
}
}
);
});

View file

@ -0,0 +1,334 @@
import { execFile } from "node:child_process";
import { promisify } from "node:util";
import { describe, expect, it } from "vitest";
import type {
HandoffWorkbenchSnapshot,
WorkbenchAgentTab,
WorkbenchHandoff,
WorkbenchModelId,
WorkbenchTranscriptEvent,
} from "@openhandoff/shared";
import { createBackendClient } from "../../src/backend-client.js";
const RUN_WORKBENCH_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_E2E === "1";
const execFileAsync = promisify(execFile);
function requiredEnv(name: string): string {
const value = process.env[name]?.trim();
if (!value) {
throw new Error(`Missing required env var: ${name}`);
}
return value;
}
function workbenchModelEnv(name: string, fallback: WorkbenchModelId): WorkbenchModelId {
const value = process.env[name]?.trim();
switch (value) {
case "claude-sonnet-4":
case "claude-opus-4":
case "gpt-4o":
case "o3":
return value;
default:
return fallback;
}
}
async function sleep(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
async function seedSandboxFile(workspaceId: string, handoffId: string, filePath: string, content: string): Promise<void> {
const repoPath = `/root/.local/share/openhandoff/local-sandboxes/${workspaceId}/${handoffId}/repo`;
const script = [
`cd ${JSON.stringify(repoPath)}`,
`mkdir -p ${JSON.stringify(filePath.includes("/") ? filePath.slice(0, filePath.lastIndexOf("/")) : ".")}`,
`printf '%s\\n' ${JSON.stringify(content)} > ${JSON.stringify(filePath)}`,
].join(" && ");
await execFileAsync("docker", ["exec", "openhandoff-backend-1", "bash", "-lc", script]);
}
async function poll<T>(
label: string,
timeoutMs: number,
intervalMs: number,
fn: () => Promise<T>,
isDone: (value: T) => boolean,
): Promise<T> {
const startedAt = Date.now();
let lastValue: T;
for (;;) {
lastValue = await fn();
if (isDone(lastValue)) {
return lastValue;
}
if (Date.now() - startedAt > timeoutMs) {
throw new Error(`timed out waiting for ${label}`);
}
await sleep(intervalMs);
}
}
function findHandoff(snapshot: HandoffWorkbenchSnapshot, handoffId: string): WorkbenchHandoff {
const handoff = snapshot.handoffs.find((candidate) => candidate.id === handoffId);
if (!handoff) {
throw new Error(`handoff ${handoffId} missing from snapshot`);
}
return handoff;
}
function findTab(handoff: WorkbenchHandoff, tabId: string): WorkbenchAgentTab {
const tab = handoff.tabs.find((candidate) => candidate.id === tabId);
if (!tab) {
throw new Error(`tab ${tabId} missing from handoff ${handoff.id}`);
}
return tab;
}
function extractEventText(event: WorkbenchTranscriptEvent): string {
const payload = event.payload;
if (!payload || typeof payload !== "object") {
return String(payload ?? "");
}
const envelope = payload as {
method?: unknown;
params?: unknown;
result?: unknown;
error?: unknown;
};
const params = envelope.params;
if (params && typeof params === "object") {
const update = (params as { update?: unknown }).update;
if (update && typeof update === "object") {
const content = (update as { content?: unknown }).content;
if (content && typeof content === "object") {
const chunkText = (content as { text?: unknown }).text;
if (typeof chunkText === "string") {
return chunkText;
}
}
}
const text = (params as { text?: unknown }).text;
if (typeof text === "string" && text.trim()) {
return text.trim();
}
const prompt = (params as { prompt?: Array<{ text?: unknown }> }).prompt;
if (Array.isArray(prompt)) {
const value = prompt
.map((item) => (typeof item?.text === "string" ? item.text.trim() : ""))
.filter(Boolean)
.join("\n");
if (value) {
return value;
}
}
}
const result = envelope.result;
if (result && typeof result === "object") {
const text = (result as { text?: unknown }).text;
if (typeof text === "string" && text.trim()) {
return text.trim();
}
}
if (envelope.error) {
return JSON.stringify(envelope.error);
}
if (typeof envelope.method === "string") {
return envelope.method;
}
return JSON.stringify(payload);
}
function transcriptIncludesAgentText(
transcript: WorkbenchTranscriptEvent[],
expectedText: string,
): boolean {
return transcript
.filter((event) => event.sender === "agent")
.map((event) => extractEventText(event))
.join("")
.includes(expectedText);
}
describe("e2e(client): workbench flows", () => {
it.skipIf(!RUN_WORKBENCH_E2E)(
"creates a handoff, adds sessions, exchanges messages, and manages workbench state",
{ timeout: 20 * 60_000 },
async () => {
const endpoint =
process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
const runId = `wb-${Date.now().toString(36)}`;
const expectedFile = `${runId}.txt`;
const expectedInitialReply = `WORKBENCH_READY_${runId}`;
const expectedReply = `WORKBENCH_ACK_${runId}`;
const client = createBackendClient({
endpoint,
defaultWorkspaceId: workspaceId,
});
const repo = await client.addRepo(workspaceId, repoRemote);
const created = await client.createWorkbenchHandoff(workspaceId, {
repoId: repo.repoId,
title: `Workbench E2E ${runId}`,
branch: `e2e/${runId}`,
model,
task: `Reply with exactly: ${expectedInitialReply}`,
});
const provisioned = await poll(
"handoff provisioning",
12 * 60_000,
2_000,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => handoff.branch === `e2e/${runId}` && handoff.tabs.length > 0,
);
const primaryTab = provisioned.tabs[0]!;
const initialCompleted = await poll(
"initial agent response",
12 * 60_000,
2_000,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => {
const tab = findTab(handoff, primaryTab.id);
return (
handoff.status === "idle" &&
tab.status === "idle" &&
transcriptIncludesAgentText(tab.transcript, expectedInitialReply)
);
},
);
expect(findTab(initialCompleted, primaryTab.id).sessionId).toBeTruthy();
expect(transcriptIncludesAgentText(findTab(initialCompleted, primaryTab.id).transcript, expectedInitialReply)).toBe(true);
await seedSandboxFile(workspaceId, created.handoffId, expectedFile, runId);
const fileSeeded = await poll(
"seeded sandbox file reflected in workbench",
30_000,
1_000,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => handoff.fileChanges.some((file) => file.path === expectedFile),
);
expect(fileSeeded.fileChanges.some((file) => file.path === expectedFile)).toBe(true);
await client.renameWorkbenchHandoff(workspaceId, {
handoffId: created.handoffId,
value: `Workbench E2E ${runId} Renamed`,
});
await client.renameWorkbenchSession(workspaceId, {
handoffId: created.handoffId,
tabId: primaryTab.id,
title: "Primary Session",
});
const secondTab = await client.createWorkbenchSession(workspaceId, {
handoffId: created.handoffId,
model,
});
await client.renameWorkbenchSession(workspaceId, {
handoffId: created.handoffId,
tabId: secondTab.tabId,
title: "Follow-up Session",
});
await client.updateWorkbenchDraft(workspaceId, {
handoffId: created.handoffId,
tabId: secondTab.tabId,
text: `Reply with exactly: ${expectedReply}`,
attachments: [
{
id: `${expectedFile}:1`,
filePath: expectedFile,
lineNumber: 1,
lineContent: runId,
},
],
});
const drafted = findHandoff(await client.getWorkbench(workspaceId), created.handoffId);
expect(findTab(drafted, secondTab.tabId).draft.text).toContain(expectedReply);
expect(findTab(drafted, secondTab.tabId).draft.attachments).toHaveLength(1);
await client.sendWorkbenchMessage(workspaceId, {
handoffId: created.handoffId,
tabId: secondTab.tabId,
text: `Reply with exactly: ${expectedReply}`,
attachments: [],
});
const withSecondReply = await poll(
"follow-up session response",
10 * 60_000,
2_000,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => {
const tab = findTab(handoff, secondTab.tabId);
return (
tab.status === "idle" &&
transcriptIncludesAgentText(tab.transcript, expectedReply)
);
},
);
const secondTranscript = findTab(withSecondReply, secondTab.tabId).transcript;
expect(transcriptIncludesAgentText(secondTranscript, expectedReply)).toBe(true);
await client.setWorkbenchSessionUnread(workspaceId, {
handoffId: created.handoffId,
tabId: secondTab.tabId,
unread: false,
});
await client.markWorkbenchUnread(workspaceId, { handoffId: created.handoffId });
const unreadSnapshot = findHandoff(await client.getWorkbench(workspaceId), created.handoffId);
expect(unreadSnapshot.tabs.some((tab) => tab.unread)).toBe(true);
await client.closeWorkbenchSession(workspaceId, {
handoffId: created.handoffId,
tabId: secondTab.tabId,
});
const closedSnapshot = await poll(
"secondary session closed",
30_000,
1_000,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => !handoff.tabs.some((tab) => tab.id === secondTab.tabId),
);
expect(closedSnapshot.tabs).toHaveLength(1);
await client.revertWorkbenchFile(workspaceId, {
handoffId: created.handoffId,
path: expectedFile,
});
const revertedSnapshot = await poll(
"file revert reflected in workbench",
30_000,
1_000,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => !handoff.fileChanges.some((file) => file.path === expectedFile),
);
expect(revertedSnapshot.fileChanges.some((file) => file.path === expectedFile)).toBe(false);
expect(revertedSnapshot.title).toBe(`Workbench E2E ${runId} Renamed`);
expect(findTab(revertedSnapshot, primaryTab.id).sessionName).toBe("Primary Session");
},
);
});

View file

@ -0,0 +1,331 @@
import { describe, expect, it } from "vitest";
import type {
HandoffWorkbenchSnapshot,
WorkbenchAgentTab,
WorkbenchHandoff,
WorkbenchModelId,
WorkbenchTranscriptEvent,
} from "@openhandoff/shared";
import { createBackendClient } from "../../src/backend-client.js";
const RUN_WORKBENCH_LOAD_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_LOAD_E2E === "1";
function requiredEnv(name: string): string {
const value = process.env[name]?.trim();
if (!value) {
throw new Error(`Missing required env var: ${name}`);
}
return value;
}
function workbenchModelEnv(name: string, fallback: WorkbenchModelId): WorkbenchModelId {
const value = process.env[name]?.trim();
switch (value) {
case "claude-sonnet-4":
case "claude-opus-4":
case "gpt-4o":
case "o3":
return value;
default:
return fallback;
}
}
function intEnv(name: string, fallback: number): number {
const raw = process.env[name]?.trim();
if (!raw) {
return fallback;
}
const value = Number.parseInt(raw, 10);
return Number.isFinite(value) && value > 0 ? value : fallback;
}
async function sleep(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
async function poll<T>(
label: string,
timeoutMs: number,
intervalMs: number,
fn: () => Promise<T>,
isDone: (value: T) => boolean,
): Promise<T> {
const startedAt = Date.now();
let lastValue: T;
for (;;) {
lastValue = await fn();
if (isDone(lastValue)) {
return lastValue;
}
if (Date.now() - startedAt > timeoutMs) {
throw new Error(`timed out waiting for ${label}`);
}
await sleep(intervalMs);
}
}
function findHandoff(snapshot: HandoffWorkbenchSnapshot, handoffId: string): WorkbenchHandoff {
const handoff = snapshot.handoffs.find((candidate) => candidate.id === handoffId);
if (!handoff) {
throw new Error(`handoff ${handoffId} missing from snapshot`);
}
return handoff;
}
function findTab(handoff: WorkbenchHandoff, tabId: string): WorkbenchAgentTab {
const tab = handoff.tabs.find((candidate) => candidate.id === tabId);
if (!tab) {
throw new Error(`tab ${tabId} missing from handoff ${handoff.id}`);
}
return tab;
}
function extractEventText(event: WorkbenchTranscriptEvent): string {
const payload = event.payload;
if (!payload || typeof payload !== "object") {
return String(payload ?? "");
}
const envelope = payload as {
method?: unknown;
params?: unknown;
result?: unknown;
};
const params = envelope.params;
if (params && typeof params === "object") {
const update = (params as { update?: unknown }).update;
if (update && typeof update === "object") {
const content = (update as { content?: unknown }).content;
if (content && typeof content === "object") {
const chunkText = (content as { text?: unknown }).text;
if (typeof chunkText === "string") {
return chunkText;
}
}
}
const text = (params as { text?: unknown }).text;
if (typeof text === "string" && text.trim()) {
return text.trim();
}
const prompt = (params as { prompt?: Array<{ text?: unknown }> }).prompt;
if (Array.isArray(prompt)) {
return prompt
.map((item) => (typeof item?.text === "string" ? item.text.trim() : ""))
.filter(Boolean)
.join("\n");
}
}
const result = envelope.result;
if (result && typeof result === "object") {
const text = (result as { text?: unknown }).text;
if (typeof text === "string" && text.trim()) {
return text.trim();
}
}
return typeof envelope.method === "string" ? envelope.method : JSON.stringify(payload);
}
function transcriptIncludesAgentText(transcript: WorkbenchTranscriptEvent[], expectedText: string): boolean {
return transcript
.filter((event) => event.sender === "agent")
.map((event) => extractEventText(event))
.join("")
.includes(expectedText);
}
function average(values: number[]): number {
return values.reduce((sum, value) => sum + value, 0) / Math.max(values.length, 1);
}
async function measureWorkbenchSnapshot(
client: ReturnType<typeof createBackendClient>,
workspaceId: string,
iterations: number,
): Promise<{
avgMs: number;
maxMs: number;
payloadBytes: number;
handoffCount: number;
tabCount: number;
transcriptEventCount: number;
}> {
const durations: number[] = [];
let snapshot: HandoffWorkbenchSnapshot | null = null;
for (let index = 0; index < iterations; index += 1) {
const startedAt = performance.now();
snapshot = await client.getWorkbench(workspaceId);
durations.push(performance.now() - startedAt);
}
const finalSnapshot = snapshot ?? {
workspaceId,
repos: [],
projects: [],
handoffs: [],
};
const payloadBytes = Buffer.byteLength(JSON.stringify(finalSnapshot), "utf8");
const tabCount = finalSnapshot.handoffs.reduce((sum, handoff) => sum + handoff.tabs.length, 0);
const transcriptEventCount = finalSnapshot.handoffs.reduce(
(sum, handoff) =>
sum + handoff.tabs.reduce((tabSum, tab) => tabSum + tab.transcript.length, 0),
0,
);
return {
avgMs: Math.round(average(durations)),
maxMs: Math.round(Math.max(...durations, 0)),
payloadBytes,
handoffCount: finalSnapshot.handoffs.length,
tabCount,
transcriptEventCount,
};
}
describe("e2e(client): workbench load", () => {
it.skipIf(!RUN_WORKBENCH_LOAD_E2E)(
"runs a simple sequential load profile against the real backend",
{ timeout: 30 * 60_000 },
async () => {
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
const handoffCount = intEnv("HF_LOAD_HANDOFF_COUNT", 3);
const extraSessionCount = intEnv("HF_LOAD_EXTRA_SESSION_COUNT", 2);
const pollIntervalMs = intEnv("HF_LOAD_POLL_INTERVAL_MS", 2_000);
const client = createBackendClient({
endpoint,
defaultWorkspaceId: workspaceId,
});
const repo = await client.addRepo(workspaceId, repoRemote);
const createHandoffLatencies: number[] = [];
const provisionLatencies: number[] = [];
const createSessionLatencies: number[] = [];
const messageRoundTripLatencies: number[] = [];
const snapshotSeries: Array<{
handoffCount: number;
avgMs: number;
maxMs: number;
payloadBytes: number;
tabCount: number;
transcriptEventCount: number;
}> = [];
snapshotSeries.push(await measureWorkbenchSnapshot(client, workspaceId, 2));
for (let handoffIndex = 0; handoffIndex < handoffCount; handoffIndex += 1) {
const runId = `load-${handoffIndex}-${Date.now().toString(36)}`;
const initialReply = `LOAD_INIT_${runId}`;
const createStartedAt = performance.now();
const created = await client.createWorkbenchHandoff(workspaceId, {
repoId: repo.repoId,
title: `Workbench Load ${runId}`,
branch: `load/${runId}`,
model,
task: `Reply with exactly: ${initialReply}`,
});
createHandoffLatencies.push(performance.now() - createStartedAt);
const provisionStartedAt = performance.now();
const provisioned = await poll(
`handoff ${runId} provisioning`,
12 * 60_000,
pollIntervalMs,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => {
const tab = handoff.tabs[0];
return Boolean(
tab &&
handoff.status === "idle" &&
tab.status === "idle" &&
transcriptIncludesAgentText(tab.transcript, initialReply),
);
},
);
provisionLatencies.push(performance.now() - provisionStartedAt);
expect(provisioned.tabs.length).toBeGreaterThan(0);
const primaryTab = provisioned.tabs[0]!;
expect(transcriptIncludesAgentText(primaryTab.transcript, initialReply)).toBe(true);
for (let sessionIndex = 0; sessionIndex < extraSessionCount; sessionIndex += 1) {
const expectedReply = `LOAD_REPLY_${runId}_${sessionIndex}`;
const createSessionStartedAt = performance.now();
const createdSession = await client.createWorkbenchSession(workspaceId, {
handoffId: created.handoffId,
model,
});
createSessionLatencies.push(performance.now() - createSessionStartedAt);
await client.sendWorkbenchMessage(workspaceId, {
handoffId: created.handoffId,
tabId: createdSession.tabId,
text: `Run pwd in the repo, then reply with exactly: ${expectedReply}`,
attachments: [],
});
const messageStartedAt = performance.now();
const withReply = await poll(
`handoff ${runId} session ${sessionIndex} reply`,
10 * 60_000,
pollIntervalMs,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => {
const tab = findTab(handoff, createdSession.tabId);
return tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply);
},
);
messageRoundTripLatencies.push(performance.now() - messageStartedAt);
expect(transcriptIncludesAgentText(findTab(withReply, createdSession.tabId).transcript, expectedReply)).toBe(true);
}
const snapshotMetrics = await measureWorkbenchSnapshot(client, workspaceId, 3);
snapshotSeries.push(snapshotMetrics);
console.info(
"[workbench-load-snapshot]",
JSON.stringify({
handoffIndex: handoffIndex + 1,
...snapshotMetrics,
}),
);
}
const firstSnapshot = snapshotSeries[0]!;
const lastSnapshot = snapshotSeries[snapshotSeries.length - 1]!;
const summary = {
handoffCount,
extraSessionCount,
createHandoffAvgMs: Math.round(average(createHandoffLatencies)),
provisionAvgMs: Math.round(average(provisionLatencies)),
createSessionAvgMs: Math.round(average(createSessionLatencies)),
messageRoundTripAvgMs: Math.round(average(messageRoundTripLatencies)),
snapshotReadBaselineAvgMs: firstSnapshot.avgMs,
snapshotReadFinalAvgMs: lastSnapshot.avgMs,
snapshotReadFinalMaxMs: lastSnapshot.maxMs,
snapshotPayloadBaselineBytes: firstSnapshot.payloadBytes,
snapshotPayloadFinalBytes: lastSnapshot.payloadBytes,
snapshotTabFinalCount: lastSnapshot.tabCount,
snapshotTranscriptFinalCount: lastSnapshot.transcriptEventCount,
};
console.info("[workbench-load-summary]", JSON.stringify(summary));
expect(createHandoffLatencies.length).toBe(handoffCount);
expect(provisionLatencies.length).toBe(handoffCount);
expect(createSessionLatencies.length).toBe(handoffCount * extraSessionCount);
expect(messageRoundTripLatencies.length).toBe(handoffCount * extraSessionCount);
},
);
});

View file

@ -0,0 +1,31 @@
import { describe, expect, it } from "vitest";
import {
handoffKey,
handoffStatusSyncKey,
historyKey,
projectBranchSyncKey,
projectKey,
projectPrSyncKey,
sandboxInstanceKey,
workspaceKey
} from "../src/keys.js";
describe("actor keys", () => {
it("prefixes every key with workspace namespace", () => {
const keys = [
workspaceKey("default"),
projectKey("default", "repo"),
handoffKey("default", "repo", "handoff"),
sandboxInstanceKey("default", "daytona", "sbx"),
historyKey("default", "repo"),
projectPrSyncKey("default", "repo"),
projectBranchSyncKey("default", "repo"),
handoffStatusSyncKey("default", "repo", "handoff", "sandbox-1", "session-1")
];
for (const key of keys) {
expect(key[0]).toBe("ws");
expect(key[1]).toBe("default");
}
});
});

View file

@ -0,0 +1,92 @@
import { describe, expect, it } from "vitest";
import type { HandoffRecord } from "@openhandoff/shared";
import {
filterHandoffs,
formatRelativeAge,
fuzzyMatch,
summarizeHandoffs
} from "../src/view-model.js";
const sample: HandoffRecord = {
workspaceId: "default",
repoId: "repo-a",
repoRemote: "https://example.com/repo-a.git",
handoffId: "handoff-1",
branchName: "feature/test",
title: "Test Title",
task: "Do test",
providerId: "daytona",
status: "running",
statusMessage: null,
activeSandboxId: "sandbox-1",
activeSessionId: "session-1",
sandboxes: [
{
sandboxId: "sandbox-1",
providerId: "daytona",
sandboxActorId: null,
switchTarget: "daytona://sandbox-1",
cwd: null,
createdAt: 1,
updatedAt: 1
}
],
agentType: null,
prSubmitted: false,
diffStat: null,
prUrl: null,
prAuthor: null,
ciStatus: null,
reviewStatus: null,
reviewer: null,
conflictsWithMain: null,
hasUnpushed: null,
parentBranch: null,
createdAt: 1,
updatedAt: 1
};
describe("search helpers", () => {
it("supports ordered fuzzy matching", () => {
expect(fuzzyMatch("feature/test-branch", "ftb")).toBe(true);
expect(fuzzyMatch("feature/test-branch", "fbt")).toBe(false);
});
it("filters rows across branch and title", () => {
const rows: HandoffRecord[] = [
sample,
{
...sample,
handoffId: "handoff-2",
branchName: "docs/update-intro",
title: "Docs Intro Refresh",
status: "idle"
}
];
expect(filterHandoffs(rows, "doc")).toHaveLength(1);
expect(filterHandoffs(rows, "h2")).toHaveLength(1);
expect(filterHandoffs(rows, "test")).toHaveLength(2);
});
});
describe("summary helpers", () => {
it("formats relative age", () => {
expect(formatRelativeAge(9_000, 10_000)).toBe("1s");
expect(formatRelativeAge(0, 120_000)).toBe("2m");
});
it("summarizes by status and provider", () => {
const rows: HandoffRecord[] = [
sample,
{ ...sample, handoffId: "handoff-2", status: "idle", providerId: "daytona" },
{ ...sample, handoffId: "handoff-3", status: "error", providerId: "daytona" }
];
const summary = summarizeHandoffs(rows);
expect(summary.total).toBe(3);
expect(summary.byStatus.running).toBe(1);
expect(summary.byStatus.idle).toBe(1);
expect(summary.byStatus.error).toBe(1);
expect(summary.byProvider.daytona).toBe(3);
});
});