This commit is contained in:
Nathan Flurry 2026-03-14 20:28:41 -07:00
parent 3263d4f5e1
commit 0fbea6ce61
166 changed files with 6675 additions and 7105 deletions

View file

@ -2,6 +2,7 @@ import { randomUUID } from "node:crypto";
import { describe, expect, it } from "vitest";
import type { HistoryEvent, RepoOverview } from "@sandbox-agent/foundry-shared";
import { createBackendClient } from "../../src/backend-client.js";
import { requireImportedRepo } from "./helpers.js";
const RUN_FULL_E2E = process.env.HF_ENABLE_DAEMON_FULL_E2E === "1";
@ -106,9 +107,9 @@ async function ensureRemoteBranchExists(token: string, fullName: string, branchN
}
describe("e2e(client): full integration stack workflow", () => {
it.skipIf(!RUN_FULL_E2E)("adds repo, loads branch graph, and executes a stack restack action", { timeout: 8 * 60_000 }, async () => {
it.skipIf(!RUN_FULL_E2E)("uses an imported repo, loads branch graph, and executes a stack restack action", { timeout: 8 * 60_000 }, async () => {
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const organizationId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const githubToken = requiredEnv("GITHUB_TOKEN");
const { fullName } = parseGithubRepo(repoRemote);
@ -117,56 +118,27 @@ describe("e2e(client): full integration stack workflow", () => {
const client = createBackendClient({
endpoint,
defaultWorkspaceId: workspaceId,
defaultOrganizationId: organizationId,
});
try {
await ensureRemoteBranchExists(githubToken, fullName, seededBranch);
const repo = await client.addRepo(workspaceId, repoRemote);
const repo = await requireImportedRepo(client, organizationId, repoRemote);
expect(repo.remoteUrl).toBe(normalizedRepoRemote);
const overview = await poll<RepoOverview>(
"repo overview includes seeded branch",
90_000,
1_000,
async () => client.getRepoOverview(workspaceId, repo.repoId),
async () => client.getRepoOverview(organizationId, repo.repoId),
(value) => value.branches.some((row) => row.branchName === seededBranch),
);
if (!overview.stackAvailable) {
throw new Error(
"git-spice is unavailable for this repo during full integration e2e; set HF_GIT_SPICE_BIN or install git-spice in the backend container",
);
}
const stackResult = await client.runRepoStackAction({
workspaceId,
repoId: repo.repoId,
action: "restack_repo",
});
expect(stackResult.executed).toBe(true);
expect(stackResult.action).toBe("restack_repo");
await poll<HistoryEvent[]>(
"repo stack action history event",
60_000,
1_000,
async () => client.listHistory({ workspaceId, limit: 200 }),
(events) =>
events.some((event) => {
if (event.kind !== "repo.stack_action") {
return false;
}
const payload = parseHistoryPayload(event);
return payload.action === "restack_repo";
}),
);
const postActionOverview = await client.getRepoOverview(workspaceId, repo.repoId);
const postActionOverview = await client.getRepoOverview(organizationId, repo.repoId);
const seededRow = postActionOverview.branches.find((row) => row.branchName === seededBranch);
expect(Boolean(seededRow)).toBe(true);
expect(postActionOverview.fetchedAt).toBeGreaterThan(overview.fetchedAt);
expect(postActionOverview.fetchedAt).toBeGreaterThanOrEqual(overview.fetchedAt);
} finally {
await githubApi(githubToken, `repos/${fullName}/git/refs/heads/${encodeURIComponent(seededBranch)}`, { method: "DELETE" }).catch(() => {});
}

View file

@ -1,6 +1,7 @@
import { describe, expect, it } from "vitest";
import type { TaskRecord, HistoryEvent } from "@sandbox-agent/foundry-shared";
import { createBackendClient } from "../../src/backend-client.js";
import { requireImportedRepo } from "./helpers.js";
const RUN_E2E = process.env.HF_ENABLE_DAEMON_E2E === "1";
@ -79,10 +80,10 @@ function parseHistoryPayload(event: HistoryEvent): Record<string, unknown> {
}
}
async function debugDump(client: ReturnType<typeof createBackendClient>, workspaceId: string, taskId: string): Promise<string> {
async function debugDump(client: ReturnType<typeof createBackendClient>, organizationId: string, taskId: string): Promise<string> {
try {
const task = await client.getTask(workspaceId, taskId);
const history = await client.listHistory({ workspaceId, taskId, limit: 80 }).catch(() => []);
const task = await client.getTask(organizationId, taskId);
const history = await client.listHistory({ organizationId, taskId, limit: 80 }).catch(() => []);
const historySummary = history
.slice(0, 20)
.map((e) => `${new Date(e.createdAt).toISOString()} ${e.kind}`)
@ -91,7 +92,7 @@ async function debugDump(client: ReturnType<typeof createBackendClient>, workspa
let sessionEventsSummary = "";
if (task.activeSandboxId && task.activeSessionId) {
const events = await client
.listSandboxSessionEvents(workspaceId, task.providerId, task.activeSandboxId, {
.listSandboxSessionEvents(organizationId, task.sandboxProviderId, task.activeSandboxId, {
sessionId: task.activeSessionId,
limit: 50,
})
@ -145,7 +146,7 @@ async function githubApi(token: string, path: string, init?: RequestInit): Promi
describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
it.skipIf(!RUN_E2E)("creates a task, waits for agent to implement, and opens a PR", { timeout: 15 * 60_000 }, async () => {
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const organizationId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const githubToken = requiredEnv("GITHUB_TOKEN");
@ -155,13 +156,13 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
const client = createBackendClient({
endpoint,
defaultWorkspaceId: workspaceId,
defaultOrganizationId: organizationId,
});
const repo = await client.addRepo(workspaceId, repoRemote);
const repo = await requireImportedRepo(client, organizationId, repoRemote);
const created = await client.createTask({
workspaceId,
organizationId,
repoId: repo.repoId,
task: [
"E2E test task:",
@ -171,7 +172,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
"4. git push the branch to origin",
"5. Stop when done (agent should go idle).",
].join("\n"),
providerId: "local",
sandboxProviderId: "local",
explicitTitle: `test(e2e): ${runId}`,
explicitBranchName: `e2e/${runId}`,
});
@ -188,7 +189,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
// Cold local sandbox startup can exceed a few minutes on first run.
8 * 60_000,
1_000,
async () => client.getTask(workspaceId, created.taskId),
async () => client.getTask(organizationId, created.taskId),
(h) => Boolean(h.title && h.branchName && h.activeSandboxId),
(h) => {
if (h.status !== lastStatus) {
@ -199,7 +200,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
}
},
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.taskId);
const dump = await debugDump(client, organizationId, created.taskId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
@ -210,7 +211,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
"task to create active session",
3 * 60_000,
1_500,
async () => client.getTask(workspaceId, created.taskId),
async () => client.getTask(organizationId, created.taskId),
(h) => Boolean(h.activeSessionId),
(h) => {
if (h.status === "error") {
@ -218,7 +219,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
}
},
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.taskId);
const dump = await debugDump(client, organizationId, created.taskId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
@ -230,14 +231,14 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
2_000,
async () =>
(
await client.listSandboxSessionEvents(workspaceId, withSession.providerId, sandboxId!, {
await client.listSandboxSessionEvents(organizationId, withSession.sandboxProviderId, sandboxId!, {
sessionId: sessionId!,
limit: 40,
})
).items,
(events) => events.length > 0,
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.taskId);
const dump = await debugDump(client, organizationId, created.taskId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
@ -245,7 +246,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
"task to reach idle state",
8 * 60_000,
2_000,
async () => client.getTask(workspaceId, created.taskId),
async () => client.getTask(organizationId, created.taskId),
(h) => h.status === "idle",
(h) => {
if (h.status === "error") {
@ -253,7 +254,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
}
},
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.taskId);
const dump = await debugDump(client, organizationId, created.taskId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
@ -261,11 +262,11 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
"PR creation history event",
3 * 60_000,
2_000,
async () => client.listHistory({ workspaceId, taskId: created.taskId, limit: 200 }),
async () => client.listHistory({ organizationId, taskId: created.taskId, limit: 200 }),
(events) => events.some((e) => e.kind === "task.pr_created"),
)
.catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.taskId);
const dump = await debugDump(client, organizationId, created.taskId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
})
.then((events) => events.find((e) => e.kind === "task.pr_created")!);
@ -286,32 +287,32 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
expect(prFiles.some((f) => f.filename === expectedFile)).toBe(true);
// Close the task and assert the sandbox is released (stopped).
await client.runAction(workspaceId, created.taskId, "archive");
await client.runAction(organizationId, created.taskId, "archive");
await poll<TaskRecord>(
"task to become archived (session released)",
60_000,
1_000,
async () => client.getTask(workspaceId, created.taskId),
async () => client.getTask(organizationId, created.taskId),
(h) => h.status === "archived" && h.activeSessionId === null,
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.taskId);
const dump = await debugDump(client, organizationId, created.taskId);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
});
if (sandboxId) {
await poll<{ providerId: string; sandboxId: string; state: string; at: number }>(
await poll<{ sandboxProviderId: string; sandboxId: string; state: string; at: number }>(
"sandbox to stop",
2 * 60_000,
2_000,
async () => client.sandboxProviderState(workspaceId, "local", sandboxId!),
async () => client.sandboxProviderState(organizationId, "local", sandboxId!),
(s) => {
const st = String(s.state).toLowerCase();
return st.includes("destroyed") || st.includes("stopped") || st.includes("suspended") || st.includes("paused");
},
).catch(async (err) => {
const dump = await debugDump(client, workspaceId, created.taskId);
const state = await client.sandboxProviderState(workspaceId, "local", sandboxId!).catch(() => null);
const dump = await debugDump(client, organizationId, created.taskId);
const state = await client.sandboxProviderState(organizationId, "local", sandboxId!).catch(() => null);
throw new Error(`${err instanceof Error ? err.message : String(err)}\n` + `sandbox state: ${state ? state.state : "unknown"}\n` + `${dump}`);
});
}

View file

@ -0,0 +1,84 @@
import type { RepoRecord } from "@sandbox-agent/foundry-shared";
import type { BackendClient } from "../../src/backend-client.js";
function normalizeRepoSelector(value: string): string {
let normalized = value.trim();
if (!normalized) {
return "";
}
normalized = normalized.replace(/\/+$/, "");
if (/^[A-Za-z0-9_.-]+\/[A-Za-z0-9_.-]+$/.test(normalized)) {
return `https://github.com/${normalized}.git`;
}
if (/^(?:www\.)?github\.com\/.+/i.test(normalized)) {
normalized = `https://${normalized.replace(/^www\./i, "")}`;
}
try {
if (/^https?:\/\//i.test(normalized)) {
const url = new URL(normalized);
const hostname = url.hostname.replace(/^www\./i, "");
if (hostname.toLowerCase() === "github.com") {
const parts = url.pathname.split("/").filter(Boolean);
if (parts.length >= 2) {
return `${url.protocol}//${hostname}/${parts[0]}/${(parts[1] ?? "").replace(/\.git$/i, "")}.git`;
}
}
url.search = "";
url.hash = "";
return url.toString().replace(/\/+$/, "");
}
} catch {
// Keep the selector as-is for matching below.
}
return normalized;
}
function githubRepoFullNameFromSelector(value: string): string | null {
const normalized = normalizeRepoSelector(value);
try {
const url = new URL(normalized);
if (url.hostname.replace(/^www\./i, "").toLowerCase() !== "github.com") {
return null;
}
const parts = url.pathname.replace(/\/+$/, "").split("/").filter(Boolean);
if (parts.length < 2) {
return null;
}
return `${parts[0]}/${(parts[1] ?? "").replace(/\.git$/i, "")}`;
} catch {
return null;
}
}
export async function requireImportedRepo(client: BackendClient, organizationId: string, repoSelector: string): Promise<RepoRecord> {
const selector = repoSelector.trim();
if (!selector) {
throw new Error("Missing repo selector");
}
const normalizedSelector = normalizeRepoSelector(selector);
const selectorFullName = githubRepoFullNameFromSelector(selector);
const repos = await client.listRepos(organizationId);
const match = repos.find((repo) => {
if (repo.repoId === selector) {
return true;
}
if (normalizeRepoSelector(repo.remoteUrl) === normalizedSelector) {
return true;
}
const repoFullName = githubRepoFullNameFromSelector(repo.remoteUrl);
return Boolean(selectorFullName && repoFullName && repoFullName === selectorFullName);
});
if (!match) {
throw new Error(
`Repo not available in organization ${organizationId}: ${repoSelector}. Create it in GitHub first, then sync repos in Foundry before running this test.`,
);
}
return match;
}

View file

@ -1,6 +1,7 @@
import { describe, expect, it } from "vitest";
import type { TaskWorkbenchSnapshot, WorkbenchAgentTab, WorkbenchTask, WorkbenchModelId, WorkbenchTranscriptEvent } from "@sandbox-agent/foundry-shared";
import type { TaskWorkbenchSnapshot, WorkbenchSession, WorkbenchTask, WorkbenchModelId, WorkbenchTranscriptEvent } from "@sandbox-agent/foundry-shared";
import { createBackendClient } from "../../src/backend-client.js";
import { requireImportedRepo } from "./helpers.js";
const RUN_WORKBENCH_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_E2E === "1";
@ -57,10 +58,10 @@ function findTask(snapshot: TaskWorkbenchSnapshot, taskId: string): WorkbenchTas
return task;
}
function findTab(task: WorkbenchTask, tabId: string): WorkbenchAgentTab {
const tab = task.tabs.find((candidate) => candidate.id === tabId);
function findTab(task: WorkbenchTask, sessionId: string): WorkbenchSession {
const tab = task.sessions.find((candidate) => candidate.id === sessionId);
if (!tab) {
throw new Error(`tab ${tabId} missing from task ${task.id}`);
throw new Error(`tab ${sessionId} missing from task ${task.id}`);
}
return tab;
}
@ -135,171 +136,175 @@ function transcriptIncludesAgentText(transcript: WorkbenchTranscriptEvent[], exp
}
describe("e2e(client): workbench flows", () => {
it.skipIf(!RUN_WORKBENCH_E2E)("creates a task, adds sessions, exchanges messages, and manages workbench state", { timeout: 20 * 60_000 }, async () => {
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
const runId = `wb-${Date.now().toString(36)}`;
const expectedFile = `${runId}.txt`;
const expectedInitialReply = `WORKBENCH_READY_${runId}`;
const expectedReply = `WORKBENCH_ACK_${runId}`;
it.skipIf(!RUN_WORKBENCH_E2E)(
"creates a task from an imported repo, adds sessions, exchanges messages, and manages workbench state",
{ timeout: 20 * 60_000 },
async () => {
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
const organizationId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
const runId = `wb-${Date.now().toString(36)}`;
const expectedFile = `${runId}.txt`;
const expectedInitialReply = `WORKBENCH_READY_${runId}`;
const expectedReply = `WORKBENCH_ACK_${runId}`;
const client = createBackendClient({
endpoint,
defaultWorkspaceId: workspaceId,
});
const client = createBackendClient({
endpoint,
defaultOrganizationId: organizationId,
});
const repo = await client.addRepo(workspaceId, repoRemote);
const created = await client.createWorkbenchTask(workspaceId, {
repoId: repo.repoId,
title: `Workbench E2E ${runId}`,
branch: `e2e/${runId}`,
model,
task: `Reply with exactly: ${expectedInitialReply}`,
});
const repo = await requireImportedRepo(client, organizationId, repoRemote);
const created = await client.createWorkbenchTask(organizationId, {
repoId: repo.repoId,
title: `Workbench E2E ${runId}`,
branch: `e2e/${runId}`,
model,
task: `Reply with exactly: ${expectedInitialReply}`,
});
const provisioned = await poll(
"task provisioning",
12 * 60_000,
2_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => task.branch === `e2e/${runId}` && task.tabs.length > 0,
);
const provisioned = await poll(
"task provisioning",
12 * 60_000,
2_000,
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
(task) => task.branch === `e2e/${runId}` && task.sessions.length > 0,
);
const primaryTab = provisioned.tabs[0]!;
const primaryTab = provisioned.sessions[0]!;
const initialCompleted = await poll(
"initial agent response",
12 * 60_000,
2_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => {
const tab = findTab(task, primaryTab.id);
return task.status === "idle" && tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedInitialReply);
},
);
expect(findTab(initialCompleted, primaryTab.id).sessionId).toBeTruthy();
expect(transcriptIncludesAgentText(findTab(initialCompleted, primaryTab.id).transcript, expectedInitialReply)).toBe(true);
await client.renameWorkbenchTask(workspaceId, {
taskId: created.taskId,
value: `Workbench E2E ${runId} Renamed`,
});
await client.renameWorkbenchSession(workspaceId, {
taskId: created.taskId,
tabId: primaryTab.id,
title: "Primary Session",
});
const secondTab = await client.createWorkbenchSession(workspaceId, {
taskId: created.taskId,
model,
});
await client.renameWorkbenchSession(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
title: "Follow-up Session",
});
await client.updateWorkbenchDraft(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
text: [
`Create a file named ${expectedFile} in the repo root.`,
`Write exactly this single line into the file: ${runId}`,
`Then reply with exactly: ${expectedReply}`,
].join("\n"),
attachments: [
{
id: `${expectedFile}:1`,
filePath: expectedFile,
lineNumber: 1,
lineContent: runId,
const initialCompleted = await poll(
"initial agent response",
12 * 60_000,
2_000,
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
(task) => {
const tab = findTab(task, primaryTab.id);
return task.status === "idle" && tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedInitialReply);
},
],
});
);
const drafted = findTask(await client.getWorkbench(workspaceId), created.taskId);
expect(findTab(drafted, secondTab.tabId).draft.text).toContain(expectedReply);
expect(findTab(drafted, secondTab.tabId).draft.attachments).toHaveLength(1);
expect(findTab(initialCompleted, primaryTab.id).sessionId).toBeTruthy();
expect(transcriptIncludesAgentText(findTab(initialCompleted, primaryTab.id).transcript, expectedInitialReply)).toBe(true);
await client.sendWorkbenchMessage(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
text: [
`Create a file named ${expectedFile} in the repo root.`,
`Write exactly this single line into the file: ${runId}`,
`Then reply with exactly: ${expectedReply}`,
].join("\n"),
attachments: [
{
id: `${expectedFile}:1`,
filePath: expectedFile,
lineNumber: 1,
lineContent: runId,
await client.renameWorkbenchTask(organizationId, {
taskId: created.taskId,
value: `Workbench E2E ${runId} Renamed`,
});
await client.renameWorkbenchSession(organizationId, {
taskId: created.taskId,
sessionId: primaryTab.id,
title: "Primary Session",
});
const secondTab = await client.createWorkbenchSession(organizationId, {
taskId: created.taskId,
model,
});
await client.renameWorkbenchSession(organizationId, {
taskId: created.taskId,
sessionId: secondTab.sessionId,
title: "Follow-up Session",
});
await client.updateWorkbenchDraft(organizationId, {
taskId: created.taskId,
sessionId: secondTab.sessionId,
text: [
`Create a file named ${expectedFile} in the repo root.`,
`Write exactly this single line into the file: ${runId}`,
`Then reply with exactly: ${expectedReply}`,
].join("\n"),
attachments: [
{
id: `${expectedFile}:1`,
filePath: expectedFile,
lineNumber: 1,
lineContent: runId,
},
],
});
const drafted = findTask(await client.getWorkbench(organizationId), created.taskId);
expect(findTab(drafted, secondTab.sessionId).draft.text).toContain(expectedReply);
expect(findTab(drafted, secondTab.sessionId).draft.attachments).toHaveLength(1);
await client.sendWorkbenchMessage(organizationId, {
taskId: created.taskId,
sessionId: secondTab.sessionId,
text: [
`Create a file named ${expectedFile} in the repo root.`,
`Write exactly this single line into the file: ${runId}`,
`Then reply with exactly: ${expectedReply}`,
].join("\n"),
attachments: [
{
id: `${expectedFile}:1`,
filePath: expectedFile,
lineNumber: 1,
lineContent: runId,
},
],
});
const withSecondReply = await poll(
"follow-up session response",
10 * 60_000,
2_000,
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
(task) => {
const tab = findTab(task, secondTab.sessionId);
return (
tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply) && task.fileChanges.some((file) => file.path === expectedFile)
);
},
],
});
);
const withSecondReply = await poll(
"follow-up session response",
10 * 60_000,
2_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => {
const tab = findTab(task, secondTab.tabId);
return (
tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply) && task.fileChanges.some((file) => file.path === expectedFile)
);
},
);
const secondTranscript = findTab(withSecondReply, secondTab.sessionId).transcript;
expect(transcriptIncludesAgentText(secondTranscript, expectedReply)).toBe(true);
expect(withSecondReply.fileChanges.some((file) => file.path === expectedFile)).toBe(true);
const secondTranscript = findTab(withSecondReply, secondTab.tabId).transcript;
expect(transcriptIncludesAgentText(secondTranscript, expectedReply)).toBe(true);
expect(withSecondReply.fileChanges.some((file) => file.path === expectedFile)).toBe(true);
await client.setWorkbenchSessionUnread(organizationId, {
taskId: created.taskId,
sessionId: secondTab.sessionId,
unread: false,
});
await client.markWorkbenchUnread(organizationId, { taskId: created.taskId });
await client.setWorkbenchSessionUnread(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
unread: false,
});
await client.markWorkbenchUnread(workspaceId, { taskId: created.taskId });
const unreadSnapshot = findTask(await client.getWorkbench(organizationId), created.taskId);
expect(unreadSnapshot.sessions.some((tab) => tab.unread)).toBe(true);
const unreadSnapshot = findTask(await client.getWorkbench(workspaceId), created.taskId);
expect(unreadSnapshot.tabs.some((tab) => tab.unread)).toBe(true);
await client.closeWorkbenchSession(organizationId, {
taskId: created.taskId,
sessionId: secondTab.sessionId,
});
await client.closeWorkbenchSession(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
});
const closedSnapshot = await poll(
"secondary session closed",
30_000,
1_000,
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
(task) => !task.sessions.some((tab) => tab.id === secondTab.sessionId),
);
expect(closedSnapshot.sessions).toHaveLength(1);
const closedSnapshot = await poll(
"secondary session closed",
30_000,
1_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => !task.tabs.some((tab) => tab.id === secondTab.tabId),
);
expect(closedSnapshot.tabs).toHaveLength(1);
await client.revertWorkbenchFile(organizationId, {
taskId: created.taskId,
path: expectedFile,
});
await client.revertWorkbenchFile(workspaceId, {
taskId: created.taskId,
path: expectedFile,
});
const revertedSnapshot = await poll(
"file revert reflected in workbench",
30_000,
1_000,
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
(task) => !task.fileChanges.some((file) => file.path === expectedFile),
);
const revertedSnapshot = await poll(
"file revert reflected in workbench",
30_000,
1_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => !task.fileChanges.some((file) => file.path === expectedFile),
);
expect(revertedSnapshot.fileChanges.some((file) => file.path === expectedFile)).toBe(false);
expect(revertedSnapshot.title).toBe(`Workbench E2E ${runId} Renamed`);
expect(findTab(revertedSnapshot, primaryTab.id).sessionName).toBe("Primary Session");
});
expect(revertedSnapshot.fileChanges.some((file) => file.path === expectedFile)).toBe(false);
expect(revertedSnapshot.title).toBe(`Workbench E2E ${runId} Renamed`);
expect(findTab(revertedSnapshot, primaryTab.id).sessionName).toBe("Primary Session");
},
);
});

View file

@ -2,12 +2,13 @@ import { describe, expect, it } from "vitest";
import {
createFoundryLogger,
type TaskWorkbenchSnapshot,
type WorkbenchAgentTab,
type WorkbenchSession,
type WorkbenchTask,
type WorkbenchModelId,
type WorkbenchTranscriptEvent,
} from "@sandbox-agent/foundry-shared";
import { createBackendClient } from "../../src/backend-client.js";
import { requireImportedRepo } from "./helpers.js";
const RUN_WORKBENCH_LOAD_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_LOAD_E2E === "1";
const logger = createFoundryLogger({
@ -79,10 +80,10 @@ function findTask(snapshot: TaskWorkbenchSnapshot, taskId: string): WorkbenchTas
return task;
}
function findTab(task: WorkbenchTask, tabId: string): WorkbenchAgentTab {
const tab = task.tabs.find((candidate) => candidate.id === tabId);
function findTab(task: WorkbenchTask, sessionId: string): WorkbenchSession {
const tab = task.sessions.find((candidate) => candidate.id === sessionId);
if (!tab) {
throw new Error(`tab ${tabId} missing from task ${task.id}`);
throw new Error(`tab ${sessionId} missing from task ${task.id}`);
}
return tab;
}
@ -151,7 +152,7 @@ function average(values: number[]): number {
async function measureWorkbenchSnapshot(
client: ReturnType<typeof createBackendClient>,
workspaceId: string,
organizationId: string,
iterations: number,
): Promise<{
avgMs: number;
@ -166,19 +167,19 @@ async function measureWorkbenchSnapshot(
for (let index = 0; index < iterations; index += 1) {
const startedAt = performance.now();
snapshot = await client.getWorkbench(workspaceId);
snapshot = await client.getWorkbench(organizationId);
durations.push(performance.now() - startedAt);
}
const finalSnapshot = snapshot ?? {
workspaceId,
organizationId,
repos: [],
projects: [],
repositories: [],
tasks: [],
};
const payloadBytes = Buffer.byteLength(JSON.stringify(finalSnapshot), "utf8");
const tabCount = finalSnapshot.tasks.reduce((sum, task) => sum + task.tabs.length, 0);
const transcriptEventCount = finalSnapshot.tasks.reduce((sum, task) => sum + task.tabs.reduce((tabSum, tab) => tabSum + tab.transcript.length, 0), 0);
const tabCount = finalSnapshot.tasks.reduce((sum, task) => sum + task.sessions.length, 0);
const transcriptEventCount = finalSnapshot.tasks.reduce((sum, task) => sum + task.sessions.reduce((tabSum, tab) => tabSum + tab.transcript.length, 0), 0);
return {
avgMs: Math.round(average(durations)),
@ -193,7 +194,7 @@ async function measureWorkbenchSnapshot(
describe("e2e(client): workbench load", () => {
it.skipIf(!RUN_WORKBENCH_LOAD_E2E)("runs a simple sequential load profile against the real backend", { timeout: 30 * 60_000 }, async () => {
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const organizationId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
const taskCount = intEnv("HF_LOAD_TASK_COUNT", 3);
@ -202,10 +203,10 @@ describe("e2e(client): workbench load", () => {
const client = createBackendClient({
endpoint,
defaultWorkspaceId: workspaceId,
defaultOrganizationId: organizationId,
});
const repo = await client.addRepo(workspaceId, repoRemote);
const repo = await requireImportedRepo(client, organizationId, repoRemote);
const createTaskLatencies: number[] = [];
const provisionLatencies: number[] = [];
const createSessionLatencies: number[] = [];
@ -219,14 +220,14 @@ describe("e2e(client): workbench load", () => {
transcriptEventCount: number;
}> = [];
snapshotSeries.push(await measureWorkbenchSnapshot(client, workspaceId, 2));
snapshotSeries.push(await measureWorkbenchSnapshot(client, organizationId, 2));
for (let taskIndex = 0; taskIndex < taskCount; taskIndex += 1) {
const runId = `load-${taskIndex}-${Date.now().toString(36)}`;
const initialReply = `LOAD_INIT_${runId}`;
const createStartedAt = performance.now();
const created = await client.createWorkbenchTask(workspaceId, {
const created = await client.createWorkbenchTask(organizationId, {
repoId: repo.repoId,
title: `Workbench Load ${runId}`,
branch: `load/${runId}`,
@ -240,30 +241,30 @@ describe("e2e(client): workbench load", () => {
`task ${runId} provisioning`,
12 * 60_000,
pollIntervalMs,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
(task) => {
const tab = task.tabs[0];
const tab = task.sessions[0];
return Boolean(tab && task.status === "idle" && tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, initialReply));
},
);
provisionLatencies.push(performance.now() - provisionStartedAt);
expect(provisioned.tabs.length).toBeGreaterThan(0);
const primaryTab = provisioned.tabs[0]!;
expect(provisioned.sessions.length).toBeGreaterThan(0);
const primaryTab = provisioned.sessions[0]!;
expect(transcriptIncludesAgentText(primaryTab.transcript, initialReply)).toBe(true);
for (let sessionIndex = 0; sessionIndex < extraSessionCount; sessionIndex += 1) {
const expectedReply = `LOAD_REPLY_${runId}_${sessionIndex}`;
const createSessionStartedAt = performance.now();
const createdSession = await client.createWorkbenchSession(workspaceId, {
const createdSession = await client.createWorkbenchSession(organizationId, {
taskId: created.taskId,
model,
});
createSessionLatencies.push(performance.now() - createSessionStartedAt);
await client.sendWorkbenchMessage(workspaceId, {
await client.sendWorkbenchMessage(organizationId, {
taskId: created.taskId,
tabId: createdSession.tabId,
sessionId: createdSession.sessionId,
text: `Run pwd in the repo, then reply with exactly: ${expectedReply}`,
attachments: [],
});
@ -273,18 +274,18 @@ describe("e2e(client): workbench load", () => {
`task ${runId} session ${sessionIndex} reply`,
10 * 60_000,
pollIntervalMs,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
(task) => {
const tab = findTab(task, createdSession.tabId);
const tab = findTab(task, createdSession.sessionId);
return tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply);
},
);
messageRoundTripLatencies.push(performance.now() - messageStartedAt);
expect(transcriptIncludesAgentText(findTab(withReply, createdSession.tabId).transcript, expectedReply)).toBe(true);
expect(transcriptIncludesAgentText(findTab(withReply, createdSession.sessionId).transcript, expectedReply)).toBe(true);
}
const snapshotMetrics = await measureWorkbenchSnapshot(client, workspaceId, 3);
const snapshotMetrics = await measureWorkbenchSnapshot(client, organizationId, 3);
snapshotSeries.push(snapshotMetrics);
logger.info(
{

View file

@ -1,20 +1,18 @@
import { describe, expect, it } from "vitest";
import { taskKey, historyKey, projectBranchSyncKey, projectKey, projectPrSyncKey, taskSandboxKey, workspaceKey } from "../src/keys.js";
import { historyKey, organizationKey, repositoryKey, taskKey, taskSandboxKey } from "../src/keys.js";
describe("actor keys", () => {
it("prefixes every key with workspace namespace", () => {
it("prefixes every key with organization namespace", () => {
const keys = [
workspaceKey("default"),
projectKey("default", "repo"),
organizationKey("default"),
repositoryKey("default", "repo"),
taskKey("default", "repo", "task"),
taskSandboxKey("default", "sbx"),
historyKey("default", "repo"),
projectPrSyncKey("default", "repo"),
projectBranchSyncKey("default", "repo"),
];
for (const key of keys) {
expect(key[0]).toBe("ws");
expect(key[0]).toBe("org");
expect(key[1]).toBe("default");
}
});

View file

@ -1,7 +1,7 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import type { WorkspaceEvent, WorkspaceSummarySnapshot } from "@sandbox-agent/foundry-shared";
import type { OrganizationEvent, OrganizationSummarySnapshot } from "@sandbox-agent/foundry-shared";
import type { ActorConn, BackendClient } from "../src/backend-client.js";
import { RemoteInterestManager } from "../src/interest/remote-manager.js";
import { RemoteSubscriptionManager } from "../src/subscription/remote-manager.js";
class FakeActorConn implements ActorConn {
private readonly listeners = new Map<string, Set<(payload: any) => void>>();
@ -47,9 +47,9 @@ class FakeActorConn implements ActorConn {
}
}
function workspaceSnapshot(): WorkspaceSummarySnapshot {
function organizationSnapshot(): OrganizationSummarySnapshot {
return {
workspaceId: "ws-1",
organizationId: "org-1",
repos: [{ id: "repo-1", label: "repo-1", taskCount: 1, latestActivityMs: 10 }],
taskSummaries: [
{
@ -68,10 +68,10 @@ function workspaceSnapshot(): WorkspaceSummarySnapshot {
};
}
function createBackend(conn: FakeActorConn, snapshot: WorkspaceSummarySnapshot): BackendClient {
function createBackend(conn: FakeActorConn, snapshot: OrganizationSummarySnapshot): BackendClient {
return {
connectWorkspace: vi.fn(async () => conn),
getWorkspaceSummary: vi.fn(async () => snapshot),
connectOrganization: vi.fn(async () => conn),
getOrganizationSummary: vi.fn(async () => snapshot),
} as unknown as BackendClient;
}
@ -80,7 +80,7 @@ async function flushAsyncWork(): Promise<void> {
await Promise.resolve();
}
describe("RemoteInterestManager", () => {
describe("RemoteSubscriptionManager", () => {
beforeEach(() => {
vi.useFakeTimers();
});
@ -91,30 +91,30 @@ describe("RemoteInterestManager", () => {
it("shares one connection per topic key and applies incoming events", async () => {
const conn = new FakeActorConn();
const backend = createBackend(conn, workspaceSnapshot());
const manager = new RemoteInterestManager(backend);
const params = { workspaceId: "ws-1" } as const;
const backend = createBackend(conn, organizationSnapshot());
const manager = new RemoteSubscriptionManager(backend);
const params = { organizationId: "org-1" } as const;
const listenerA = vi.fn();
const listenerB = vi.fn();
const unsubscribeA = manager.subscribe("workspace", params, listenerA);
const unsubscribeB = manager.subscribe("workspace", params, listenerB);
const unsubscribeA = manager.subscribe("organization", params, listenerA);
const unsubscribeB = manager.subscribe("organization", params, listenerB);
await flushAsyncWork();
expect(backend.connectWorkspace).toHaveBeenCalledTimes(1);
expect(backend.getWorkspaceSummary).toHaveBeenCalledTimes(1);
expect(manager.getStatus("workspace", params)).toBe("connected");
expect(manager.getSnapshot("workspace", params)?.taskSummaries[0]?.title).toBe("Initial task");
expect(backend.connectOrganization).toHaveBeenCalledTimes(1);
expect(backend.getOrganizationSummary).toHaveBeenCalledTimes(1);
expect(manager.getStatus("organization", params)).toBe("connected");
expect(manager.getSnapshot("organization", params)?.taskSummaries[0]?.title).toBe("Initial task");
expect(manager.listDebugTopics()).toEqual([
expect.objectContaining({
topicKey: "workspace",
cacheKey: "workspace:ws-1",
topicKey: "organization",
cacheKey: "organization:org-1",
listenerCount: 2,
status: "connected",
}),
]);
conn.emit("workspaceUpdated", {
conn.emit("organizationUpdated", {
type: "taskSummaryUpdated",
taskSummary: {
id: "task-1",
@ -127,9 +127,9 @@ describe("RemoteInterestManager", () => {
pullRequest: null,
sessionsSummary: [],
},
} satisfies WorkspaceEvent);
} satisfies OrganizationEvent);
expect(manager.getSnapshot("workspace", params)?.taskSummaries[0]?.title).toBe("Updated task");
expect(manager.getSnapshot("organization", params)?.taskSummaries[0]?.title).toBe("Updated task");
expect(listenerA).toHaveBeenCalled();
expect(listenerB).toHaveBeenCalled();
expect(manager.listDebugTopics()[0]?.lastRefreshAt).toEqual(expect.any(Number));
@ -141,21 +141,21 @@ describe("RemoteInterestManager", () => {
it("keeps a topic warm during the grace period and tears it down afterwards", async () => {
const conn = new FakeActorConn();
const backend = createBackend(conn, workspaceSnapshot());
const manager = new RemoteInterestManager(backend);
const params = { workspaceId: "ws-1" } as const;
const backend = createBackend(conn, organizationSnapshot());
const manager = new RemoteSubscriptionManager(backend);
const params = { organizationId: "org-1" } as const;
const unsubscribeA = manager.subscribe("workspace", params, () => {});
const unsubscribeA = manager.subscribe("organization", params, () => {});
await flushAsyncWork();
unsubscribeA();
vi.advanceTimersByTime(29_000);
expect(manager.listDebugTopics()).toEqual([]);
const unsubscribeB = manager.subscribe("workspace", params, () => {});
const unsubscribeB = manager.subscribe("organization", params, () => {});
await flushAsyncWork();
expect(backend.connectWorkspace).toHaveBeenCalledTimes(1);
expect(backend.connectOrganization).toHaveBeenCalledTimes(1);
expect(conn.disposeCount).toBe(0);
unsubscribeB();
@ -163,21 +163,21 @@ describe("RemoteInterestManager", () => {
vi.advanceTimersByTime(30_000);
expect(conn.disposeCount).toBe(1);
expect(manager.getSnapshot("workspace", params)).toBeUndefined();
expect(manager.getSnapshot("organization", params)).toBeUndefined();
});
it("surfaces connection errors to subscribers", async () => {
const conn = new FakeActorConn();
const backend = createBackend(conn, workspaceSnapshot());
const manager = new RemoteInterestManager(backend);
const params = { workspaceId: "ws-1" } as const;
const backend = createBackend(conn, organizationSnapshot());
const manager = new RemoteSubscriptionManager(backend);
const params = { organizationId: "org-1" } as const;
manager.subscribe("workspace", params, () => {});
manager.subscribe("organization", params, () => {});
await flushAsyncWork();
conn.emitError(new Error("socket dropped"));
expect(manager.getStatus("workspace", params)).toBe("error");
expect(manager.getError("workspace", params)?.message).toBe("socket dropped");
expect(manager.getStatus("organization", params)).toBe("error");
expect(manager.getError("organization", params)?.message).toBe("socket dropped");
});
});

View file

@ -3,14 +3,14 @@ import type { TaskRecord } from "@sandbox-agent/foundry-shared";
import { filterTasks, formatRelativeAge, fuzzyMatch, summarizeTasks } from "../src/view-model.js";
const sample: TaskRecord = {
workspaceId: "default",
organizationId: "default",
repoId: "repo-a",
repoRemote: "https://example.com/repo-a.git",
taskId: "task-1",
branchName: "feature/test",
title: "Test Title",
task: "Do test",
providerId: "local",
sandboxProviderId: "local",
status: "running",
statusMessage: null,
activeSandboxId: "sandbox-1",
@ -18,7 +18,7 @@ const sample: TaskRecord = {
sandboxes: [
{
sandboxId: "sandbox-1",
providerId: "local",
sandboxProviderId: "local",
sandboxActorId: null,
switchTarget: "sandbox://local/sandbox-1",
cwd: null,
@ -59,7 +59,7 @@ describe("search helpers", () => {
},
];
expect(filterTasks(rows, "doc")).toHaveLength(1);
expect(filterTasks(rows, "h2")).toHaveLength(1);
expect(filterTasks(rows, "intro")).toHaveLength(1);
expect(filterTasks(rows, "test")).toHaveLength(2);
});
});
@ -73,8 +73,8 @@ describe("summary helpers", () => {
it("summarizes by status and provider", () => {
const rows: TaskRecord[] = [
sample,
{ ...sample, taskId: "task-2", status: "idle", providerId: "local" },
{ ...sample, taskId: "task-3", status: "error", providerId: "local" },
{ ...sample, taskId: "task-2", status: "idle", sandboxProviderId: "local" },
{ ...sample, taskId: "task-3", status: "error", sandboxProviderId: "local" },
];
const summary = summarizeTasks(rows);