sandbox-agent/foundry/packages/client/test/e2e/workbench-e2e.test.ts
Nathan Flurry 70d31f819c
chore(foundry): improve sandbox impl + status pill (#252)
* Improve Daytona sandbox provisioning and frontend UI

Refactor git clone script in Daytona provider to use cleaner shell logic for GitHub token authentication and branch checkout. Add support for private repository clones with token-based auth. Improve Daytona provider error handling and git configuration setup.

Frontend improvements include enhanced dev panel, workspace dashboard, sidebar navigation, and UI components for better task/session management. Update interest manager and backend client to support improved session state handling.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>

* Add header status pill showing task/session/sandbox state

Surface aggregate status (error, provisioning, running, ready, no sandbox)
as a colored pill in the transcript panel header. Integrates task runtime
status, session status, and sandbox availability via the sandboxProcesses
interest topic so the pill accurately reflects unreachable sandboxes.

Includes mock tasks demonstrating error, provisioning, and running states,
unit tests for deriveHeaderStatus, and workspace-dashboard integration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com>
2026-03-14 12:14:06 -07:00

305 lines
9.8 KiB
TypeScript

import { describe, expect, it } from "vitest";
import type { TaskWorkbenchSnapshot, WorkbenchAgentTab, WorkbenchTask, WorkbenchModelId, WorkbenchTranscriptEvent } from "@sandbox-agent/foundry-shared";
import { createBackendClient } from "../../src/backend-client.js";
const RUN_WORKBENCH_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_E2E === "1";
function requiredEnv(name: string): string {
const value = process.env[name]?.trim();
if (!value) {
throw new Error(`Missing required env var: ${name}`);
}
return value;
}
function workbenchModelEnv(name: string, fallback: WorkbenchModelId): WorkbenchModelId {
const value = process.env[name]?.trim();
switch (value) {
case "claude-sonnet-4":
case "claude-opus-4":
case "gpt-5.3-codex":
case "gpt-5.4":
case "gpt-5.2-codex":
case "gpt-5.1-codex-max":
case "gpt-5.2":
case "gpt-5.1-codex-mini":
return value;
default:
return fallback;
}
}
async function sleep(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
async function poll<T>(label: string, timeoutMs: number, intervalMs: number, fn: () => Promise<T>, isDone: (value: T) => boolean): Promise<T> {
const startedAt = Date.now();
let lastValue: T;
for (;;) {
lastValue = await fn();
if (isDone(lastValue)) {
return lastValue;
}
if (Date.now() - startedAt > timeoutMs) {
throw new Error(`timed out waiting for ${label}`);
}
await sleep(intervalMs);
}
}
function findTask(snapshot: TaskWorkbenchSnapshot, taskId: string): WorkbenchTask {
const task = snapshot.tasks.find((candidate) => candidate.id === taskId);
if (!task) {
throw new Error(`task ${taskId} missing from snapshot`);
}
return task;
}
function findTab(task: WorkbenchTask, tabId: string): WorkbenchAgentTab {
const tab = task.tabs.find((candidate) => candidate.id === tabId);
if (!tab) {
throw new Error(`tab ${tabId} missing from task ${task.id}`);
}
return tab;
}
function extractEventText(event: WorkbenchTranscriptEvent): string {
const payload = event.payload;
if (!payload || typeof payload !== "object") {
return String(payload ?? "");
}
const envelope = payload as {
method?: unknown;
params?: unknown;
result?: unknown;
error?: unknown;
};
const params = envelope.params;
if (params && typeof params === "object") {
const update = (params as { update?: unknown }).update;
if (update && typeof update === "object") {
const content = (update as { content?: unknown }).content;
if (content && typeof content === "object") {
const chunkText = (content as { text?: unknown }).text;
if (typeof chunkText === "string") {
return chunkText;
}
}
}
const text = (params as { text?: unknown }).text;
if (typeof text === "string" && text.trim()) {
return text.trim();
}
const prompt = (params as { prompt?: Array<{ text?: unknown }> }).prompt;
if (Array.isArray(prompt)) {
const value = prompt
.map((item) => (typeof item?.text === "string" ? item.text.trim() : ""))
.filter(Boolean)
.join("\n");
if (value) {
return value;
}
}
}
const result = envelope.result;
if (result && typeof result === "object") {
const text = (result as { text?: unknown }).text;
if (typeof text === "string" && text.trim()) {
return text.trim();
}
}
if (envelope.error) {
return JSON.stringify(envelope.error);
}
if (typeof envelope.method === "string") {
return envelope.method;
}
return JSON.stringify(payload);
}
function transcriptIncludesAgentText(transcript: WorkbenchTranscriptEvent[], expectedText: string): boolean {
return transcript
.filter((event) => event.sender === "agent")
.map((event) => extractEventText(event))
.join("")
.includes(expectedText);
}
describe("e2e(client): workbench flows", () => {
it.skipIf(!RUN_WORKBENCH_E2E)("creates a task, adds sessions, exchanges messages, and manages workbench state", { timeout: 20 * 60_000 }, async () => {
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
const runId = `wb-${Date.now().toString(36)}`;
const expectedFile = `${runId}.txt`;
const expectedInitialReply = `WORKBENCH_READY_${runId}`;
const expectedReply = `WORKBENCH_ACK_${runId}`;
const client = createBackendClient({
endpoint,
defaultWorkspaceId: workspaceId,
});
const repo = await client.addRepo(workspaceId, repoRemote);
const created = await client.createWorkbenchTask(workspaceId, {
repoId: repo.repoId,
title: `Workbench E2E ${runId}`,
branch: `e2e/${runId}`,
model,
task: `Reply with exactly: ${expectedInitialReply}`,
});
const provisioned = await poll(
"task provisioning",
12 * 60_000,
2_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => task.branch === `e2e/${runId}` && task.tabs.length > 0,
);
const primaryTab = provisioned.tabs[0]!;
const initialCompleted = await poll(
"initial agent response",
12 * 60_000,
2_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => {
const tab = findTab(task, primaryTab.id);
return task.status === "idle" && tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedInitialReply);
},
);
expect(findTab(initialCompleted, primaryTab.id).sessionId).toBeTruthy();
expect(transcriptIncludesAgentText(findTab(initialCompleted, primaryTab.id).transcript, expectedInitialReply)).toBe(true);
await client.renameWorkbenchTask(workspaceId, {
taskId: created.taskId,
value: `Workbench E2E ${runId} Renamed`,
});
await client.renameWorkbenchSession(workspaceId, {
taskId: created.taskId,
tabId: primaryTab.id,
title: "Primary Session",
});
const secondTab = await client.createWorkbenchSession(workspaceId, {
taskId: created.taskId,
model,
});
await client.renameWorkbenchSession(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
title: "Follow-up Session",
});
await client.updateWorkbenchDraft(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
text: [
`Create a file named ${expectedFile} in the repo root.`,
`Write exactly this single line into the file: ${runId}`,
`Then reply with exactly: ${expectedReply}`,
].join("\n"),
attachments: [
{
id: `${expectedFile}:1`,
filePath: expectedFile,
lineNumber: 1,
lineContent: runId,
},
],
});
const drafted = findTask(await client.getWorkbench(workspaceId), created.taskId);
expect(findTab(drafted, secondTab.tabId).draft.text).toContain(expectedReply);
expect(findTab(drafted, secondTab.tabId).draft.attachments).toHaveLength(1);
await client.sendWorkbenchMessage(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
text: [
`Create a file named ${expectedFile} in the repo root.`,
`Write exactly this single line into the file: ${runId}`,
`Then reply with exactly: ${expectedReply}`,
].join("\n"),
attachments: [
{
id: `${expectedFile}:1`,
filePath: expectedFile,
lineNumber: 1,
lineContent: runId,
},
],
});
const withSecondReply = await poll(
"follow-up session response",
10 * 60_000,
2_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => {
const tab = findTab(task, secondTab.tabId);
return (
tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply) && task.fileChanges.some((file) => file.path === expectedFile)
);
},
);
const secondTranscript = findTab(withSecondReply, secondTab.tabId).transcript;
expect(transcriptIncludesAgentText(secondTranscript, expectedReply)).toBe(true);
expect(withSecondReply.fileChanges.some((file) => file.path === expectedFile)).toBe(true);
await client.setWorkbenchSessionUnread(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
unread: false,
});
await client.markWorkbenchUnread(workspaceId, { taskId: created.taskId });
const unreadSnapshot = findTask(await client.getWorkbench(workspaceId), created.taskId);
expect(unreadSnapshot.tabs.some((tab) => tab.unread)).toBe(true);
await client.closeWorkbenchSession(workspaceId, {
taskId: created.taskId,
tabId: secondTab.tabId,
});
const closedSnapshot = await poll(
"secondary session closed",
30_000,
1_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => !task.tabs.some((tab) => tab.id === secondTab.tabId),
);
expect(closedSnapshot.tabs).toHaveLength(1);
await client.revertWorkbenchFile(workspaceId, {
taskId: created.taskId,
path: expectedFile,
});
const revertedSnapshot = await poll(
"file revert reflected in workbench",
30_000,
1_000,
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => !task.fileChanges.some((file) => file.path === expectedFile),
);
expect(revertedSnapshot.fileChanges.some((file) => file.path === expectedFile)).toBe(false);
expect(revertedSnapshot.title).toBe(`Workbench E2E ${runId} Renamed`);
expect(findTab(revertedSnapshot, primaryTab.id).sessionName).toBe("Primary Session");
});
});