factory: rename project and handoff actors

This commit is contained in:
Nathan Flurry 2026-03-10 21:55:30 -07:00
parent 3022bce2ad
commit ea7c36a8e7
147 changed files with 6313 additions and 14364 deletions

View file

@ -1,8 +1,8 @@
import { describe, expect, it } from "vitest";
import type {
HandoffWorkbenchSnapshot,
TaskWorkbenchSnapshot,
WorkbenchAgentTab,
WorkbenchHandoff,
WorkbenchTask,
WorkbenchModelId,
WorkbenchTranscriptEvent,
} from "@sandbox-agent/factory-shared";
@ -70,18 +70,18 @@ async function poll<T>(
}
}
function findHandoff(snapshot: HandoffWorkbenchSnapshot, handoffId: string): WorkbenchHandoff {
const handoff = snapshot.handoffs.find((candidate) => candidate.id === handoffId);
if (!handoff) {
throw new Error(`handoff ${handoffId} missing from snapshot`);
function findTask(snapshot: TaskWorkbenchSnapshot, taskId: string): WorkbenchTask {
const task = snapshot.tasks.find((candidate) => candidate.id === taskId);
if (!task) {
throw new Error(`task ${taskId} missing from snapshot`);
}
return handoff;
return task;
}
function findTab(handoff: WorkbenchHandoff, tabId: string): WorkbenchAgentTab {
const tab = handoff.tabs.find((candidate) => candidate.id === tabId);
function findTab(task: WorkbenchTask, tabId: string): WorkbenchAgentTab {
const tab = task.tabs.find((candidate) => candidate.id === tabId);
if (!tab) {
throw new Error(`tab ${tabId} missing from handoff ${handoff.id}`);
throw new Error(`tab ${tabId} missing from task ${task.id}`);
}
return tab;
}
@ -156,12 +156,12 @@ async function measureWorkbenchSnapshot(
avgMs: number;
maxMs: number;
payloadBytes: number;
handoffCount: number;
taskCount: number;
tabCount: number;
transcriptEventCount: number;
}> {
const durations: number[] = [];
let snapshot: HandoffWorkbenchSnapshot | null = null;
let snapshot: TaskWorkbenchSnapshot | null = null;
for (let index = 0; index < iterations; index += 1) {
const startedAt = performance.now();
@ -173,13 +173,13 @@ async function measureWorkbenchSnapshot(
workspaceId,
repos: [],
projects: [],
handoffs: [],
tasks: [],
};
const payloadBytes = Buffer.byteLength(JSON.stringify(finalSnapshot), "utf8");
const tabCount = finalSnapshot.handoffs.reduce((sum, handoff) => sum + handoff.tabs.length, 0);
const transcriptEventCount = finalSnapshot.handoffs.reduce(
(sum, handoff) =>
sum + handoff.tabs.reduce((tabSum, tab) => tabSum + tab.transcript.length, 0),
const tabCount = finalSnapshot.tasks.reduce((sum, task) => sum + task.tabs.length, 0);
const transcriptEventCount = finalSnapshot.tasks.reduce(
(sum, task) =>
sum + task.tabs.reduce((tabSum, tab) => tabSum + tab.transcript.length, 0),
0,
);
@ -187,7 +187,7 @@ async function measureWorkbenchSnapshot(
avgMs: Math.round(average(durations)),
maxMs: Math.round(Math.max(...durations, 0)),
payloadBytes,
handoffCount: finalSnapshot.handoffs.length,
taskCount: finalSnapshot.tasks.length,
tabCount,
transcriptEventCount,
};
@ -202,7 +202,7 @@ describe("e2e(client): workbench load", () => {
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredRepoRemote();
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
const handoffCount = intEnv("HF_LOAD_HANDOFF_COUNT", 3);
const taskCount = intEnv("HF_LOAD_HANDOFF_COUNT", 3);
const extraSessionCount = intEnv("HF_LOAD_EXTRA_SESSION_COUNT", 2);
const pollIntervalMs = intEnv("HF_LOAD_POLL_INTERVAL_MS", 2_000);
@ -212,12 +212,12 @@ describe("e2e(client): workbench load", () => {
});
const repo = await client.addRepo(workspaceId, repoRemote);
const createHandoffLatencies: number[] = [];
const createTaskLatencies: number[] = [];
const provisionLatencies: number[] = [];
const createSessionLatencies: number[] = [];
const messageRoundTripLatencies: number[] = [];
const snapshotSeries: Array<{
handoffCount: number;
taskCount: number;
avgMs: number;
maxMs: number;
payloadBytes: number;
@ -227,31 +227,31 @@ describe("e2e(client): workbench load", () => {
snapshotSeries.push(await measureWorkbenchSnapshot(client, workspaceId, 2));
for (let handoffIndex = 0; handoffIndex < handoffCount; handoffIndex += 1) {
const runId = `load-${handoffIndex}-${Date.now().toString(36)}`;
for (let taskIndex = 0; taskIndex < taskCount; taskIndex += 1) {
const runId = `load-${taskIndex}-${Date.now().toString(36)}`;
const initialReply = `LOAD_INIT_${runId}`;
const createStartedAt = performance.now();
const created = await client.createWorkbenchHandoff(workspaceId, {
const created = await client.createWorkbenchTask(workspaceId, {
repoId: repo.repoId,
title: `Workbench Load ${runId}`,
branch: `load/${runId}`,
model,
task: `Reply with exactly: ${initialReply}`,
});
createHandoffLatencies.push(performance.now() - createStartedAt);
createTaskLatencies.push(performance.now() - createStartedAt);
const provisionStartedAt = performance.now();
const provisioned = await poll(
`handoff ${runId} provisioning`,
`task ${runId} provisioning`,
12 * 60_000,
pollIntervalMs,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => {
const tab = handoff.tabs[0];
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => {
const tab = task.tabs[0];
return Boolean(
tab &&
handoff.status === "idle" &&
task.status === "idle" &&
tab.status === "idle" &&
transcriptIncludesAgentText(tab.transcript, initialReply),
);
@ -267,13 +267,13 @@ describe("e2e(client): workbench load", () => {
const expectedReply = `LOAD_REPLY_${runId}_${sessionIndex}`;
const createSessionStartedAt = performance.now();
const createdSession = await client.createWorkbenchSession(workspaceId, {
handoffId: created.handoffId,
taskId: created.taskId,
model,
});
createSessionLatencies.push(performance.now() - createSessionStartedAt);
await client.sendWorkbenchMessage(workspaceId, {
handoffId: created.handoffId,
taskId: created.taskId,
tabId: createdSession.tabId,
text: `Run pwd in the repo, then reply with exactly: ${expectedReply}`,
attachments: [],
@ -281,12 +281,12 @@ describe("e2e(client): workbench load", () => {
const messageStartedAt = performance.now();
const withReply = await poll(
`handoff ${runId} session ${sessionIndex} reply`,
`task ${runId} session ${sessionIndex} reply`,
10 * 60_000,
pollIntervalMs,
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
(handoff) => {
const tab = findTab(handoff, createdSession.tabId);
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
(task) => {
const tab = findTab(task, createdSession.tabId);
return tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply);
},
);
@ -300,7 +300,7 @@ describe("e2e(client): workbench load", () => {
console.info(
"[workbench-load-snapshot]",
JSON.stringify({
handoffIndex: handoffIndex + 1,
taskIndex: taskIndex + 1,
...snapshotMetrics,
}),
);
@ -309,9 +309,9 @@ describe("e2e(client): workbench load", () => {
const firstSnapshot = snapshotSeries[0]!;
const lastSnapshot = snapshotSeries[snapshotSeries.length - 1]!;
const summary = {
handoffCount,
taskCount,
extraSessionCount,
createHandoffAvgMs: Math.round(average(createHandoffLatencies)),
createTaskAvgMs: Math.round(average(createTaskLatencies)),
provisionAvgMs: Math.round(average(provisionLatencies)),
createSessionAvgMs: Math.round(average(createSessionLatencies)),
messageRoundTripAvgMs: Math.round(average(messageRoundTripLatencies)),
@ -326,10 +326,10 @@ describe("e2e(client): workbench load", () => {
console.info("[workbench-load-summary]", JSON.stringify(summary));
expect(createHandoffLatencies.length).toBe(handoffCount);
expect(provisionLatencies.length).toBe(handoffCount);
expect(createSessionLatencies.length).toBe(handoffCount * extraSessionCount);
expect(messageRoundTripLatencies.length).toBe(handoffCount * extraSessionCount);
expect(createTaskLatencies.length).toBe(taskCount);
expect(provisionLatencies.length).toBe(taskCount);
expect(createSessionLatencies.length).toBe(taskCount * extraSessionCount);
expect(messageRoundTripLatencies.length).toBe(taskCount * extraSessionCount);
},
);
});