mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-17 02:04:13 +00:00
chore(foundry): migrate to actions (#262)
* feat(foundry): checkpoint actor and workspace refactor
* docs(foundry): add agent handoff context
* wip(foundry): continue actor refactor
* wip(foundry): capture remaining local changes
* Complete Foundry refactor checklist
* Fix Foundry validation fallout
* wip
* wip: convert all actors from workflow to plain run handlers
Workaround for RivetKit bug where c.queue.iter() never yields messages
for actors created via getOrCreate from another actor's context. The
queue accepts messages (visible in inspector) but the iterator hangs.
Sleep/wake fixes it, but actors with active connections never sleep.
Converted organization, github-data, task, and user actors from
run: workflow(...) to plain run: async (c) => { for await ... }.
Also fixes:
- Missing auth tables in org migration (auth_verification etc)
- default_model NOT NULL constraint on org profile upsert
- Nested workflow step in github-data (HistoryDivergedError)
- Removed --force from frontend Dockerfile pnpm install
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
* Convert all actors from queues/workflows to direct actions, lazy task creation
Major refactor replacing all queue-based workflow communication with direct
RivetKit action calls across all actors. This works around a RivetKit bug
where c.queue.iter() deadlocks for actors created from another actor's context.
Key changes:
- All actors (organization, task, user, audit-log, github-data) converted
from run: workflow(...) to actions-only (no run handler, no queues)
- PR sync creates virtual task entries in org local DB instead of spawning
task actors — prevents OOM from 200+ actors created simultaneously
- Task actors created lazily on first user interaction via getOrCreate,
self-initialize from org's getTaskIndexEntry data
- Removed requireRepoExists cross-actor call (caused 500s), replaced with
local resolveTaskRepoId from org's taskIndex table
- Fixed getOrganizationContext to thread overrides through all sync phases
- Fixed sandbox repo path (/home/user/repo for E2B compatibility)
- Fixed buildSessionDetail to skip transcript fetch for pending sessions
- Added process crash protection (uncaughtException/unhandledRejection)
- Fixed React infinite render loop in mock-layout useEffect dependencies
- Added sandbox listProcesses error handling for expired E2B sandboxes
- Set E2B sandbox timeout to 1 hour (was 5 min default)
- Updated CLAUDE.md with lazy task creation rules, no-silent-catch policy,
React hook dependency safety rules
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
* Fix E2B sandbox timeout comment, frontend stability, and create-flow improvements
- Add TEMPORARY comment on E2B timeoutMs with pointer to rivetkit sandbox
resilience proposal for when autoPause lands
- Fix React useEffect dependency stability in mock-layout and
organization-dashboard to prevent infinite re-render loops
- Fix terminal-pane ref handling
- Improve create-flow service and tests
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---------
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
32f3c6c3bc
commit
f45a467484
139 changed files with 9768 additions and 7204 deletions
|
|
@ -1,6 +1,6 @@
|
|||
import { randomUUID } from "node:crypto";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type { HistoryEvent, RepoOverview } from "@sandbox-agent/foundry-shared";
|
||||
import type { AuditLogEvent as HistoryEvent, RepoOverview } from "@sandbox-agent/foundry-shared";
|
||||
import { createBackendClient } from "../../src/backend-client.js";
|
||||
import { requireImportedRepo } from "./helpers.js";
|
||||
|
||||
|
|
@ -132,11 +132,11 @@ describe("e2e(client): full integration stack workflow", () => {
|
|||
90_000,
|
||||
1_000,
|
||||
async () => client.getRepoOverview(organizationId, repo.repoId),
|
||||
(value) => value.branches.some((row) => row.branchName === seededBranch),
|
||||
(value) => value.branches.some((row: RepoOverview["branches"][number]) => row.branchName === seededBranch),
|
||||
);
|
||||
|
||||
const postActionOverview = await client.getRepoOverview(organizationId, repo.repoId);
|
||||
const seededRow = postActionOverview.branches.find((row) => row.branchName === seededBranch);
|
||||
const seededRow = postActionOverview.branches.find((row: RepoOverview["branches"][number]) => row.branchName === seededBranch);
|
||||
expect(Boolean(seededRow)).toBe(true);
|
||||
expect(postActionOverview.fetchedAt).toBeGreaterThanOrEqual(overview.fetchedAt);
|
||||
} finally {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import type { TaskRecord, HistoryEvent } from "@sandbox-agent/foundry-shared";
|
||||
import type { AuditLogEvent as HistoryEvent, TaskRecord } from "@sandbox-agent/foundry-shared";
|
||||
import { createBackendClient } from "../../src/backend-client.js";
|
||||
import { requireImportedRepo } from "./helpers.js";
|
||||
|
||||
|
|
@ -80,9 +80,10 @@ function parseHistoryPayload(event: HistoryEvent): Record<string, unknown> {
|
|||
}
|
||||
}
|
||||
|
||||
async function debugDump(client: ReturnType<typeof createBackendClient>, organizationId: string, taskId: string): Promise<string> {
|
||||
async function debugDump(client: ReturnType<typeof createBackendClient>, organizationId: string, repoId: string, taskId: string): Promise<string> {
|
||||
try {
|
||||
const task = await client.getTask(organizationId, taskId);
|
||||
const task = await client.getTask(organizationId, repoId, taskId);
|
||||
const detail = await client.getTaskDetail(organizationId, repoId, taskId).catch(() => null);
|
||||
const history = await client.listHistory({ organizationId, taskId, limit: 80 }).catch(() => []);
|
||||
const historySummary = history
|
||||
.slice(0, 20)
|
||||
|
|
@ -90,10 +91,11 @@ async function debugDump(client: ReturnType<typeof createBackendClient>, organiz
|
|||
.join("\n");
|
||||
|
||||
let sessionEventsSummary = "";
|
||||
if (task.activeSandboxId && task.activeSessionId) {
|
||||
const activeSessionId = detail?.activeSessionId ?? null;
|
||||
if (task.activeSandboxId && activeSessionId) {
|
||||
const events = await client
|
||||
.listSandboxSessionEvents(organizationId, task.sandboxProviderId, task.activeSandboxId, {
|
||||
sessionId: task.activeSessionId,
|
||||
sessionId: activeSessionId,
|
||||
limit: 50,
|
||||
})
|
||||
.then((r) => r.items)
|
||||
|
|
@ -109,13 +111,11 @@ async function debugDump(client: ReturnType<typeof createBackendClient>, organiz
|
|||
JSON.stringify(
|
||||
{
|
||||
status: task.status,
|
||||
statusMessage: task.statusMessage,
|
||||
title: task.title,
|
||||
branchName: task.branchName,
|
||||
activeSandboxId: task.activeSandboxId,
|
||||
activeSessionId: task.activeSessionId,
|
||||
prUrl: task.prUrl,
|
||||
prSubmitted: task.prSubmitted,
|
||||
activeSessionId,
|
||||
pullRequestUrl: detail?.pullRequest?.url ?? null,
|
||||
},
|
||||
null,
|
||||
2,
|
||||
|
|
@ -189,7 +189,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
|||
// Cold local sandbox startup can exceed a few minutes on first run.
|
||||
8 * 60_000,
|
||||
1_000,
|
||||
async () => client.getTask(organizationId, created.taskId),
|
||||
async () => client.getTask(organizationId, repo.repoId, created.taskId),
|
||||
(h) => Boolean(h.title && h.branchName && h.activeSandboxId),
|
||||
(h) => {
|
||||
if (h.status !== lastStatus) {
|
||||
|
|
@ -200,18 +200,18 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
|||
}
|
||||
},
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, organizationId, created.taskId);
|
||||
const dump = await debugDump(client, organizationId, repo.repoId, created.taskId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
branchName = namedAndProvisioned.branchName!;
|
||||
sandboxId = namedAndProvisioned.activeSandboxId!;
|
||||
|
||||
const withSession = await poll<TaskRecord>(
|
||||
const withSession = await poll<Awaited<ReturnType<typeof client.getTaskDetail>>>(
|
||||
"task to create active session",
|
||||
3 * 60_000,
|
||||
1_500,
|
||||
async () => client.getTask(organizationId, created.taskId),
|
||||
async () => client.getTaskDetail(organizationId, repo.repoId, created.taskId),
|
||||
(h) => Boolean(h.activeSessionId),
|
||||
(h) => {
|
||||
if (h.status === "error") {
|
||||
|
|
@ -219,7 +219,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
|||
}
|
||||
},
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, organizationId, created.taskId);
|
||||
const dump = await debugDump(client, organizationId, repo.repoId, created.taskId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
|
|
@ -231,14 +231,14 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
|||
2_000,
|
||||
async () =>
|
||||
(
|
||||
await client.listSandboxSessionEvents(organizationId, withSession.sandboxProviderId, sandboxId!, {
|
||||
await client.listSandboxSessionEvents(organizationId, namedAndProvisioned.sandboxProviderId, sandboxId!, {
|
||||
sessionId: sessionId!,
|
||||
limit: 40,
|
||||
})
|
||||
).items,
|
||||
(events) => events.length > 0,
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, organizationId, created.taskId);
|
||||
const dump = await debugDump(client, organizationId, repo.repoId, created.taskId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
|
|
@ -246,7 +246,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
|||
"task to reach idle state",
|
||||
8 * 60_000,
|
||||
2_000,
|
||||
async () => client.getTask(organizationId, created.taskId),
|
||||
async () => client.getTask(organizationId, repo.repoId, created.taskId),
|
||||
(h) => h.status === "idle",
|
||||
(h) => {
|
||||
if (h.status === "error") {
|
||||
|
|
@ -254,7 +254,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
|||
}
|
||||
},
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, organizationId, created.taskId);
|
||||
const dump = await debugDump(client, organizationId, repo.repoId, created.taskId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
|
|
@ -266,7 +266,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
|||
(events) => events.some((e) => e.kind === "task.pr_created"),
|
||||
)
|
||||
.catch(async (err) => {
|
||||
const dump = await debugDump(client, organizationId, created.taskId);
|
||||
const dump = await debugDump(client, organizationId, repo.repoId, created.taskId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
})
|
||||
.then((events) => events.find((e) => e.kind === "task.pr_created")!);
|
||||
|
|
@ -287,16 +287,16 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
|||
expect(prFiles.some((f) => f.filename === expectedFile)).toBe(true);
|
||||
|
||||
// Close the task and assert the sandbox is released (stopped).
|
||||
await client.runAction(organizationId, created.taskId, "archive");
|
||||
await client.runAction(organizationId, repo.repoId, created.taskId, "archive");
|
||||
|
||||
await poll<TaskRecord>(
|
||||
await poll<Awaited<ReturnType<typeof client.getTaskDetail>>>(
|
||||
"task to become archived (session released)",
|
||||
60_000,
|
||||
1_000,
|
||||
async () => client.getTask(organizationId, created.taskId),
|
||||
async () => client.getTaskDetail(organizationId, repo.repoId, created.taskId),
|
||||
(h) => h.status === "archived" && h.activeSessionId === null,
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, organizationId, created.taskId);
|
||||
const dump = await debugDump(client, organizationId, repo.repoId, created.taskId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
|
|
@ -311,7 +311,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
|||
return st.includes("destroyed") || st.includes("stopped") || st.includes("suspended") || st.includes("paused");
|
||||
},
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, organizationId, created.taskId);
|
||||
const dump = await debugDump(client, organizationId, repo.repoId, created.taskId);
|
||||
const state = await client.sandboxProviderState(organizationId, "local", sandboxId!).catch(() => null);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n` + `sandbox state: ${state ? state.state : "unknown"}\n` + `${dump}`);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import type { TaskWorkbenchSnapshot, WorkbenchSession, WorkbenchTask, WorkbenchModelId, WorkbenchTranscriptEvent } from "@sandbox-agent/foundry-shared";
|
||||
import type { TaskWorkspaceSnapshot, WorkspaceSession, WorkspaceTask, WorkspaceModelId, WorkspaceTranscriptEvent } from "@sandbox-agent/foundry-shared";
|
||||
import { createBackendClient } from "../../src/backend-client.js";
|
||||
import { requireImportedRepo } from "./helpers.js";
|
||||
|
||||
|
|
@ -13,21 +13,9 @@ function requiredEnv(name: string): string {
|
|||
return value;
|
||||
}
|
||||
|
||||
function workbenchModelEnv(name: string, fallback: WorkbenchModelId): WorkbenchModelId {
|
||||
function workspaceModelEnv(name: string, fallback: WorkspaceModelId): WorkspaceModelId {
|
||||
const value = process.env[name]?.trim();
|
||||
switch (value) {
|
||||
case "claude-sonnet-4":
|
||||
case "claude-opus-4":
|
||||
case "gpt-5.3-codex":
|
||||
case "gpt-5.4":
|
||||
case "gpt-5.2-codex":
|
||||
case "gpt-5.1-codex-max":
|
||||
case "gpt-5.2":
|
||||
case "gpt-5.1-codex-mini":
|
||||
return value;
|
||||
default:
|
||||
return fallback;
|
||||
}
|
||||
return value && value.length > 0 ? value : fallback;
|
||||
}
|
||||
|
||||
async function sleep(ms: number): Promise<void> {
|
||||
|
|
@ -50,7 +38,7 @@ async function poll<T>(label: string, timeoutMs: number, intervalMs: number, fn:
|
|||
}
|
||||
}
|
||||
|
||||
function findTask(snapshot: TaskWorkbenchSnapshot, taskId: string): WorkbenchTask {
|
||||
function findTask(snapshot: TaskWorkspaceSnapshot, taskId: string): WorkspaceTask {
|
||||
const task = snapshot.tasks.find((candidate) => candidate.id === taskId);
|
||||
if (!task) {
|
||||
throw new Error(`task ${taskId} missing from snapshot`);
|
||||
|
|
@ -58,7 +46,7 @@ function findTask(snapshot: TaskWorkbenchSnapshot, taskId: string): WorkbenchTas
|
|||
return task;
|
||||
}
|
||||
|
||||
function findTab(task: WorkbenchTask, sessionId: string): WorkbenchSession {
|
||||
function findTab(task: WorkspaceTask, sessionId: string): WorkspaceSession {
|
||||
const tab = task.sessions.find((candidate) => candidate.id === sessionId);
|
||||
if (!tab) {
|
||||
throw new Error(`tab ${sessionId} missing from task ${task.id}`);
|
||||
|
|
@ -66,7 +54,7 @@ function findTab(task: WorkbenchTask, sessionId: string): WorkbenchSession {
|
|||
return tab;
|
||||
}
|
||||
|
||||
function extractEventText(event: WorkbenchTranscriptEvent): string {
|
||||
function extractEventText(event: WorkspaceTranscriptEvent): string {
|
||||
const payload = event.payload;
|
||||
if (!payload || typeof payload !== "object") {
|
||||
return String(payload ?? "");
|
||||
|
|
@ -127,7 +115,7 @@ function extractEventText(event: WorkbenchTranscriptEvent): string {
|
|||
return JSON.stringify(payload);
|
||||
}
|
||||
|
||||
function transcriptIncludesAgentText(transcript: WorkbenchTranscriptEvent[], expectedText: string): boolean {
|
||||
function transcriptIncludesAgentText(transcript: WorkspaceTranscriptEvent[], expectedText: string): boolean {
|
||||
return transcript
|
||||
.filter((event) => event.sender === "agent")
|
||||
.map((event) => extractEventText(event))
|
||||
|
|
@ -135,15 +123,15 @@ function transcriptIncludesAgentText(transcript: WorkbenchTranscriptEvent[], exp
|
|||
.includes(expectedText);
|
||||
}
|
||||
|
||||
describe("e2e(client): workbench flows", () => {
|
||||
describe("e2e(client): workspace flows", () => {
|
||||
it.skipIf(!RUN_WORKBENCH_E2E)(
|
||||
"creates a task from an imported repo, adds sessions, exchanges messages, and manages workbench state",
|
||||
"creates a task from an imported repo, adds sessions, exchanges messages, and manages workspace state",
|
||||
{ timeout: 20 * 60_000 },
|
||||
async () => {
|
||||
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
|
||||
const organizationId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
|
||||
const model = workspaceModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
|
||||
const runId = `wb-${Date.now().toString(36)}`;
|
||||
const expectedFile = `${runId}.txt`;
|
||||
const expectedInitialReply = `WORKBENCH_READY_${runId}`;
|
||||
|
|
@ -155,9 +143,9 @@ describe("e2e(client): workbench flows", () => {
|
|||
});
|
||||
|
||||
const repo = await requireImportedRepo(client, organizationId, repoRemote);
|
||||
const created = await client.createWorkbenchTask(organizationId, {
|
||||
const created = await client.createWorkspaceTask(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
title: `Workbench E2E ${runId}`,
|
||||
title: `Workspace E2E ${runId}`,
|
||||
branch: `e2e/${runId}`,
|
||||
model,
|
||||
task: `Reply with exactly: ${expectedInitialReply}`,
|
||||
|
|
@ -167,7 +155,7 @@ describe("e2e(client): workbench flows", () => {
|
|||
"task provisioning",
|
||||
12 * 60_000,
|
||||
2_000,
|
||||
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
|
||||
async () => findTask(await client.getWorkspace(organizationId), created.taskId),
|
||||
(task) => task.branch === `e2e/${runId}` && task.sessions.length > 0,
|
||||
);
|
||||
|
||||
|
|
@ -177,7 +165,7 @@ describe("e2e(client): workbench flows", () => {
|
|||
"initial agent response",
|
||||
12 * 60_000,
|
||||
2_000,
|
||||
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
|
||||
async () => findTask(await client.getWorkspace(organizationId), created.taskId),
|
||||
(task) => {
|
||||
const tab = findTab(task, primaryTab.id);
|
||||
return task.status === "idle" && tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedInitialReply);
|
||||
|
|
@ -187,28 +175,33 @@ describe("e2e(client): workbench flows", () => {
|
|||
expect(findTab(initialCompleted, primaryTab.id).sessionId).toBeTruthy();
|
||||
expect(transcriptIncludesAgentText(findTab(initialCompleted, primaryTab.id).transcript, expectedInitialReply)).toBe(true);
|
||||
|
||||
await client.renameWorkbenchTask(organizationId, {
|
||||
await client.renameWorkspaceTask(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
value: `Workbench E2E ${runId} Renamed`,
|
||||
value: `Workspace E2E ${runId} Renamed`,
|
||||
});
|
||||
await client.renameWorkbenchSession(organizationId, {
|
||||
await client.renameWorkspaceSession(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
sessionId: primaryTab.id,
|
||||
title: "Primary Session",
|
||||
});
|
||||
|
||||
const secondTab = await client.createWorkbenchSession(organizationId, {
|
||||
const secondTab = await client.createWorkspaceSession(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
model,
|
||||
});
|
||||
|
||||
await client.renameWorkbenchSession(organizationId, {
|
||||
await client.renameWorkspaceSession(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
sessionId: secondTab.sessionId,
|
||||
title: "Follow-up Session",
|
||||
});
|
||||
|
||||
await client.updateWorkbenchDraft(organizationId, {
|
||||
await client.updateWorkspaceDraft(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
sessionId: secondTab.sessionId,
|
||||
text: [
|
||||
|
|
@ -226,11 +219,12 @@ describe("e2e(client): workbench flows", () => {
|
|||
],
|
||||
});
|
||||
|
||||
const drafted = findTask(await client.getWorkbench(organizationId), created.taskId);
|
||||
const drafted = findTask(await client.getWorkspace(organizationId), created.taskId);
|
||||
expect(findTab(drafted, secondTab.sessionId).draft.text).toContain(expectedReply);
|
||||
expect(findTab(drafted, secondTab.sessionId).draft.attachments).toHaveLength(1);
|
||||
|
||||
await client.sendWorkbenchMessage(organizationId, {
|
||||
await client.sendWorkspaceMessage(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
sessionId: secondTab.sessionId,
|
||||
text: [
|
||||
|
|
@ -252,7 +246,7 @@ describe("e2e(client): workbench flows", () => {
|
|||
"follow-up session response",
|
||||
10 * 60_000,
|
||||
2_000,
|
||||
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
|
||||
async () => findTask(await client.getWorkspace(organizationId), created.taskId),
|
||||
(task) => {
|
||||
const tab = findTab(task, secondTab.sessionId);
|
||||
return (
|
||||
|
|
@ -265,17 +259,19 @@ describe("e2e(client): workbench flows", () => {
|
|||
expect(transcriptIncludesAgentText(secondTranscript, expectedReply)).toBe(true);
|
||||
expect(withSecondReply.fileChanges.some((file) => file.path === expectedFile)).toBe(true);
|
||||
|
||||
await client.setWorkbenchSessionUnread(organizationId, {
|
||||
await client.setWorkspaceSessionUnread(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
sessionId: secondTab.sessionId,
|
||||
unread: false,
|
||||
});
|
||||
await client.markWorkbenchUnread(organizationId, { taskId: created.taskId });
|
||||
await client.markWorkspaceUnread(organizationId, { repoId: repo.repoId, taskId: created.taskId });
|
||||
|
||||
const unreadSnapshot = findTask(await client.getWorkbench(organizationId), created.taskId);
|
||||
const unreadSnapshot = findTask(await client.getWorkspace(organizationId), created.taskId);
|
||||
expect(unreadSnapshot.sessions.some((tab) => tab.unread)).toBe(true);
|
||||
|
||||
await client.closeWorkbenchSession(organizationId, {
|
||||
await client.closeWorkspaceSession(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
sessionId: secondTab.sessionId,
|
||||
});
|
||||
|
|
@ -284,26 +280,27 @@ describe("e2e(client): workbench flows", () => {
|
|||
"secondary session closed",
|
||||
30_000,
|
||||
1_000,
|
||||
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
|
||||
async () => findTask(await client.getWorkspace(organizationId), created.taskId),
|
||||
(task) => !task.sessions.some((tab) => tab.id === secondTab.sessionId),
|
||||
);
|
||||
expect(closedSnapshot.sessions).toHaveLength(1);
|
||||
|
||||
await client.revertWorkbenchFile(organizationId, {
|
||||
await client.revertWorkspaceFile(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
path: expectedFile,
|
||||
});
|
||||
|
||||
const revertedSnapshot = await poll(
|
||||
"file revert reflected in workbench",
|
||||
"file revert reflected in workspace",
|
||||
30_000,
|
||||
1_000,
|
||||
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
|
||||
async () => findTask(await client.getWorkspace(organizationId), created.taskId),
|
||||
(task) => !task.fileChanges.some((file) => file.path === expectedFile),
|
||||
);
|
||||
|
||||
expect(revertedSnapshot.fileChanges.some((file) => file.path === expectedFile)).toBe(false);
|
||||
expect(revertedSnapshot.title).toBe(`Workbench E2E ${runId} Renamed`);
|
||||
expect(revertedSnapshot.title).toBe(`Workspace E2E ${runId} Renamed`);
|
||||
expect(findTab(revertedSnapshot, primaryTab.id).sessionName).toBe("Primary Session");
|
||||
},
|
||||
);
|
||||
|
|
@ -1,11 +1,11 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
createFoundryLogger,
|
||||
type TaskWorkbenchSnapshot,
|
||||
type WorkbenchSession,
|
||||
type WorkbenchTask,
|
||||
type WorkbenchModelId,
|
||||
type WorkbenchTranscriptEvent,
|
||||
type TaskWorkspaceSnapshot,
|
||||
type WorkspaceSession,
|
||||
type WorkspaceTask,
|
||||
type WorkspaceModelId,
|
||||
type WorkspaceTranscriptEvent,
|
||||
} from "@sandbox-agent/foundry-shared";
|
||||
import { createBackendClient } from "../../src/backend-client.js";
|
||||
import { requireImportedRepo } from "./helpers.js";
|
||||
|
|
@ -14,7 +14,7 @@ const RUN_WORKBENCH_LOAD_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_LOAD_E2E =
|
|||
const logger = createFoundryLogger({
|
||||
service: "foundry-client-e2e",
|
||||
bindings: {
|
||||
suite: "workbench-load",
|
||||
suite: "workspace-load",
|
||||
},
|
||||
});
|
||||
|
||||
|
|
@ -26,21 +26,9 @@ function requiredEnv(name: string): string {
|
|||
return value;
|
||||
}
|
||||
|
||||
function workbenchModelEnv(name: string, fallback: WorkbenchModelId): WorkbenchModelId {
|
||||
function workspaceModelEnv(name: string, fallback: WorkspaceModelId): WorkspaceModelId {
|
||||
const value = process.env[name]?.trim();
|
||||
switch (value) {
|
||||
case "claude-sonnet-4":
|
||||
case "claude-opus-4":
|
||||
case "gpt-5.3-codex":
|
||||
case "gpt-5.4":
|
||||
case "gpt-5.2-codex":
|
||||
case "gpt-5.1-codex-max":
|
||||
case "gpt-5.2":
|
||||
case "gpt-5.1-codex-mini":
|
||||
return value;
|
||||
default:
|
||||
return fallback;
|
||||
}
|
||||
return value && value.length > 0 ? value : fallback;
|
||||
}
|
||||
|
||||
function intEnv(name: string, fallback: number): number {
|
||||
|
|
@ -72,7 +60,7 @@ async function poll<T>(label: string, timeoutMs: number, intervalMs: number, fn:
|
|||
}
|
||||
}
|
||||
|
||||
function findTask(snapshot: TaskWorkbenchSnapshot, taskId: string): WorkbenchTask {
|
||||
function findTask(snapshot: TaskWorkspaceSnapshot, taskId: string): WorkspaceTask {
|
||||
const task = snapshot.tasks.find((candidate) => candidate.id === taskId);
|
||||
if (!task) {
|
||||
throw new Error(`task ${taskId} missing from snapshot`);
|
||||
|
|
@ -80,7 +68,7 @@ function findTask(snapshot: TaskWorkbenchSnapshot, taskId: string): WorkbenchTas
|
|||
return task;
|
||||
}
|
||||
|
||||
function findTab(task: WorkbenchTask, sessionId: string): WorkbenchSession {
|
||||
function findTab(task: WorkspaceTask, sessionId: string): WorkspaceSession {
|
||||
const tab = task.sessions.find((candidate) => candidate.id === sessionId);
|
||||
if (!tab) {
|
||||
throw new Error(`tab ${sessionId} missing from task ${task.id}`);
|
||||
|
|
@ -88,7 +76,7 @@ function findTab(task: WorkbenchTask, sessionId: string): WorkbenchSession {
|
|||
return tab;
|
||||
}
|
||||
|
||||
function extractEventText(event: WorkbenchTranscriptEvent): string {
|
||||
function extractEventText(event: WorkspaceTranscriptEvent): string {
|
||||
const payload = event.payload;
|
||||
if (!payload || typeof payload !== "object") {
|
||||
return String(payload ?? "");
|
||||
|
|
@ -138,7 +126,7 @@ function extractEventText(event: WorkbenchTranscriptEvent): string {
|
|||
return typeof envelope.method === "string" ? envelope.method : JSON.stringify(payload);
|
||||
}
|
||||
|
||||
function transcriptIncludesAgentText(transcript: WorkbenchTranscriptEvent[], expectedText: string): boolean {
|
||||
function transcriptIncludesAgentText(transcript: WorkspaceTranscriptEvent[], expectedText: string): boolean {
|
||||
return transcript
|
||||
.filter((event) => event.sender === "agent")
|
||||
.map((event) => extractEventText(event))
|
||||
|
|
@ -150,7 +138,7 @@ function average(values: number[]): number {
|
|||
return values.reduce((sum, value) => sum + value, 0) / Math.max(values.length, 1);
|
||||
}
|
||||
|
||||
async function measureWorkbenchSnapshot(
|
||||
async function measureWorkspaceSnapshot(
|
||||
client: ReturnType<typeof createBackendClient>,
|
||||
organizationId: string,
|
||||
iterations: number,
|
||||
|
|
@ -163,11 +151,11 @@ async function measureWorkbenchSnapshot(
|
|||
transcriptEventCount: number;
|
||||
}> {
|
||||
const durations: number[] = [];
|
||||
let snapshot: TaskWorkbenchSnapshot | null = null;
|
||||
let snapshot: TaskWorkspaceSnapshot | null = null;
|
||||
|
||||
for (let index = 0; index < iterations; index += 1) {
|
||||
const startedAt = performance.now();
|
||||
snapshot = await client.getWorkbench(organizationId);
|
||||
snapshot = await client.getWorkspace(organizationId);
|
||||
durations.push(performance.now() - startedAt);
|
||||
}
|
||||
|
||||
|
|
@ -191,12 +179,12 @@ async function measureWorkbenchSnapshot(
|
|||
};
|
||||
}
|
||||
|
||||
describe("e2e(client): workbench load", () => {
|
||||
describe("e2e(client): workspace load", () => {
|
||||
it.skipIf(!RUN_WORKBENCH_LOAD_E2E)("runs a simple sequential load profile against the real backend", { timeout: 30 * 60_000 }, async () => {
|
||||
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
|
||||
const organizationId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
|
||||
const model = workspaceModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
|
||||
const taskCount = intEnv("HF_LOAD_TASK_COUNT", 3);
|
||||
const extraSessionCount = intEnv("HF_LOAD_EXTRA_SESSION_COUNT", 2);
|
||||
const pollIntervalMs = intEnv("HF_LOAD_POLL_INTERVAL_MS", 2_000);
|
||||
|
|
@ -220,16 +208,16 @@ describe("e2e(client): workbench load", () => {
|
|||
transcriptEventCount: number;
|
||||
}> = [];
|
||||
|
||||
snapshotSeries.push(await measureWorkbenchSnapshot(client, organizationId, 2));
|
||||
snapshotSeries.push(await measureWorkspaceSnapshot(client, organizationId, 2));
|
||||
|
||||
for (let taskIndex = 0; taskIndex < taskCount; taskIndex += 1) {
|
||||
const runId = `load-${taskIndex}-${Date.now().toString(36)}`;
|
||||
const initialReply = `LOAD_INIT_${runId}`;
|
||||
|
||||
const createStartedAt = performance.now();
|
||||
const created = await client.createWorkbenchTask(organizationId, {
|
||||
const created = await client.createWorkspaceTask(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
title: `Workbench Load ${runId}`,
|
||||
title: `Workspace Load ${runId}`,
|
||||
branch: `load/${runId}`,
|
||||
model,
|
||||
task: `Reply with exactly: ${initialReply}`,
|
||||
|
|
@ -241,7 +229,7 @@ describe("e2e(client): workbench load", () => {
|
|||
`task ${runId} provisioning`,
|
||||
12 * 60_000,
|
||||
pollIntervalMs,
|
||||
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
|
||||
async () => findTask(await client.getWorkspace(organizationId), created.taskId),
|
||||
(task) => {
|
||||
const tab = task.sessions[0];
|
||||
return Boolean(tab && task.status === "idle" && tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, initialReply));
|
||||
|
|
@ -256,13 +244,15 @@ describe("e2e(client): workbench load", () => {
|
|||
for (let sessionIndex = 0; sessionIndex < extraSessionCount; sessionIndex += 1) {
|
||||
const expectedReply = `LOAD_REPLY_${runId}_${sessionIndex}`;
|
||||
const createSessionStartedAt = performance.now();
|
||||
const createdSession = await client.createWorkbenchSession(organizationId, {
|
||||
const createdSession = await client.createWorkspaceSession(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
model,
|
||||
});
|
||||
createSessionLatencies.push(performance.now() - createSessionStartedAt);
|
||||
|
||||
await client.sendWorkbenchMessage(organizationId, {
|
||||
await client.sendWorkspaceMessage(organizationId, {
|
||||
repoId: repo.repoId,
|
||||
taskId: created.taskId,
|
||||
sessionId: createdSession.sessionId,
|
||||
text: `Run pwd in the repo, then reply with exactly: ${expectedReply}`,
|
||||
|
|
@ -274,7 +264,7 @@ describe("e2e(client): workbench load", () => {
|
|||
`task ${runId} session ${sessionIndex} reply`,
|
||||
10 * 60_000,
|
||||
pollIntervalMs,
|
||||
async () => findTask(await client.getWorkbench(organizationId), created.taskId),
|
||||
async () => findTask(await client.getWorkspace(organizationId), created.taskId),
|
||||
(task) => {
|
||||
const tab = findTab(task, createdSession.sessionId);
|
||||
return tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply);
|
||||
|
|
@ -285,14 +275,14 @@ describe("e2e(client): workbench load", () => {
|
|||
expect(transcriptIncludesAgentText(findTab(withReply, createdSession.sessionId).transcript, expectedReply)).toBe(true);
|
||||
}
|
||||
|
||||
const snapshotMetrics = await measureWorkbenchSnapshot(client, organizationId, 3);
|
||||
const snapshotMetrics = await measureWorkspaceSnapshot(client, organizationId, 3);
|
||||
snapshotSeries.push(snapshotMetrics);
|
||||
logger.info(
|
||||
{
|
||||
taskIndex: taskIndex + 1,
|
||||
...snapshotMetrics,
|
||||
},
|
||||
"workbench_load_snapshot",
|
||||
"workspace_load_snapshot",
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -314,7 +304,7 @@ describe("e2e(client): workbench load", () => {
|
|||
snapshotTranscriptFinalCount: lastSnapshot.transcriptEventCount,
|
||||
};
|
||||
|
||||
logger.info(summary, "workbench_load_summary");
|
||||
logger.info(summary, "workspace_load_summary");
|
||||
|
||||
expect(createTaskLatencies.length).toBe(taskCount);
|
||||
expect(provisionLatencies.length).toBe(taskCount);
|
||||
Loading…
Add table
Add a link
Reference in a new issue