mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-16 12:03:27 +00:00
factory: rename project and handoff actors
This commit is contained in:
parent
3022bce2ad
commit
ea7c36a8e7
147 changed files with 6313 additions and 14364 deletions
|
|
@ -1,8 +1,8 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import type {
|
||||
HandoffWorkbenchSnapshot,
|
||||
TaskWorkbenchSnapshot,
|
||||
WorkbenchAgentTab,
|
||||
WorkbenchHandoff,
|
||||
WorkbenchTask,
|
||||
WorkbenchModelId,
|
||||
WorkbenchTranscriptEvent,
|
||||
} from "@sandbox-agent/factory-shared";
|
||||
|
|
@ -70,18 +70,18 @@ async function poll<T>(
|
|||
}
|
||||
}
|
||||
|
||||
function findHandoff(snapshot: HandoffWorkbenchSnapshot, handoffId: string): WorkbenchHandoff {
|
||||
const handoff = snapshot.handoffs.find((candidate) => candidate.id === handoffId);
|
||||
if (!handoff) {
|
||||
throw new Error(`handoff ${handoffId} missing from snapshot`);
|
||||
function findTask(snapshot: TaskWorkbenchSnapshot, taskId: string): WorkbenchTask {
|
||||
const task = snapshot.tasks.find((candidate) => candidate.id === taskId);
|
||||
if (!task) {
|
||||
throw new Error(`task ${taskId} missing from snapshot`);
|
||||
}
|
||||
return handoff;
|
||||
return task;
|
||||
}
|
||||
|
||||
function findTab(handoff: WorkbenchHandoff, tabId: string): WorkbenchAgentTab {
|
||||
const tab = handoff.tabs.find((candidate) => candidate.id === tabId);
|
||||
function findTab(task: WorkbenchTask, tabId: string): WorkbenchAgentTab {
|
||||
const tab = task.tabs.find((candidate) => candidate.id === tabId);
|
||||
if (!tab) {
|
||||
throw new Error(`tab ${tabId} missing from handoff ${handoff.id}`);
|
||||
throw new Error(`tab ${tabId} missing from task ${task.id}`);
|
||||
}
|
||||
return tab;
|
||||
}
|
||||
|
|
@ -156,12 +156,12 @@ async function measureWorkbenchSnapshot(
|
|||
avgMs: number;
|
||||
maxMs: number;
|
||||
payloadBytes: number;
|
||||
handoffCount: number;
|
||||
taskCount: number;
|
||||
tabCount: number;
|
||||
transcriptEventCount: number;
|
||||
}> {
|
||||
const durations: number[] = [];
|
||||
let snapshot: HandoffWorkbenchSnapshot | null = null;
|
||||
let snapshot: TaskWorkbenchSnapshot | null = null;
|
||||
|
||||
for (let index = 0; index < iterations; index += 1) {
|
||||
const startedAt = performance.now();
|
||||
|
|
@ -173,13 +173,13 @@ async function measureWorkbenchSnapshot(
|
|||
workspaceId,
|
||||
repos: [],
|
||||
projects: [],
|
||||
handoffs: [],
|
||||
tasks: [],
|
||||
};
|
||||
const payloadBytes = Buffer.byteLength(JSON.stringify(finalSnapshot), "utf8");
|
||||
const tabCount = finalSnapshot.handoffs.reduce((sum, handoff) => sum + handoff.tabs.length, 0);
|
||||
const transcriptEventCount = finalSnapshot.handoffs.reduce(
|
||||
(sum, handoff) =>
|
||||
sum + handoff.tabs.reduce((tabSum, tab) => tabSum + tab.transcript.length, 0),
|
||||
const tabCount = finalSnapshot.tasks.reduce((sum, task) => sum + task.tabs.length, 0);
|
||||
const transcriptEventCount = finalSnapshot.tasks.reduce(
|
||||
(sum, task) =>
|
||||
sum + task.tabs.reduce((tabSum, tab) => tabSum + tab.transcript.length, 0),
|
||||
0,
|
||||
);
|
||||
|
||||
|
|
@ -187,7 +187,7 @@ async function measureWorkbenchSnapshot(
|
|||
avgMs: Math.round(average(durations)),
|
||||
maxMs: Math.round(Math.max(...durations, 0)),
|
||||
payloadBytes,
|
||||
handoffCount: finalSnapshot.handoffs.length,
|
||||
taskCount: finalSnapshot.tasks.length,
|
||||
tabCount,
|
||||
transcriptEventCount,
|
||||
};
|
||||
|
|
@ -202,7 +202,7 @@ describe("e2e(client): workbench load", () => {
|
|||
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredRepoRemote();
|
||||
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
|
||||
const handoffCount = intEnv("HF_LOAD_HANDOFF_COUNT", 3);
|
||||
const taskCount = intEnv("HF_LOAD_HANDOFF_COUNT", 3);
|
||||
const extraSessionCount = intEnv("HF_LOAD_EXTRA_SESSION_COUNT", 2);
|
||||
const pollIntervalMs = intEnv("HF_LOAD_POLL_INTERVAL_MS", 2_000);
|
||||
|
||||
|
|
@ -212,12 +212,12 @@ describe("e2e(client): workbench load", () => {
|
|||
});
|
||||
|
||||
const repo = await client.addRepo(workspaceId, repoRemote);
|
||||
const createHandoffLatencies: number[] = [];
|
||||
const createTaskLatencies: number[] = [];
|
||||
const provisionLatencies: number[] = [];
|
||||
const createSessionLatencies: number[] = [];
|
||||
const messageRoundTripLatencies: number[] = [];
|
||||
const snapshotSeries: Array<{
|
||||
handoffCount: number;
|
||||
taskCount: number;
|
||||
avgMs: number;
|
||||
maxMs: number;
|
||||
payloadBytes: number;
|
||||
|
|
@ -227,31 +227,31 @@ describe("e2e(client): workbench load", () => {
|
|||
|
||||
snapshotSeries.push(await measureWorkbenchSnapshot(client, workspaceId, 2));
|
||||
|
||||
for (let handoffIndex = 0; handoffIndex < handoffCount; handoffIndex += 1) {
|
||||
const runId = `load-${handoffIndex}-${Date.now().toString(36)}`;
|
||||
for (let taskIndex = 0; taskIndex < taskCount; taskIndex += 1) {
|
||||
const runId = `load-${taskIndex}-${Date.now().toString(36)}`;
|
||||
const initialReply = `LOAD_INIT_${runId}`;
|
||||
|
||||
const createStartedAt = performance.now();
|
||||
const created = await client.createWorkbenchHandoff(workspaceId, {
|
||||
const created = await client.createWorkbenchTask(workspaceId, {
|
||||
repoId: repo.repoId,
|
||||
title: `Workbench Load ${runId}`,
|
||||
branch: `load/${runId}`,
|
||||
model,
|
||||
task: `Reply with exactly: ${initialReply}`,
|
||||
});
|
||||
createHandoffLatencies.push(performance.now() - createStartedAt);
|
||||
createTaskLatencies.push(performance.now() - createStartedAt);
|
||||
|
||||
const provisionStartedAt = performance.now();
|
||||
const provisioned = await poll(
|
||||
`handoff ${runId} provisioning`,
|
||||
`task ${runId} provisioning`,
|
||||
12 * 60_000,
|
||||
pollIntervalMs,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => {
|
||||
const tab = handoff.tabs[0];
|
||||
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
|
||||
(task) => {
|
||||
const tab = task.tabs[0];
|
||||
return Boolean(
|
||||
tab &&
|
||||
handoff.status === "idle" &&
|
||||
task.status === "idle" &&
|
||||
tab.status === "idle" &&
|
||||
transcriptIncludesAgentText(tab.transcript, initialReply),
|
||||
);
|
||||
|
|
@ -267,13 +267,13 @@ describe("e2e(client): workbench load", () => {
|
|||
const expectedReply = `LOAD_REPLY_${runId}_${sessionIndex}`;
|
||||
const createSessionStartedAt = performance.now();
|
||||
const createdSession = await client.createWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
taskId: created.taskId,
|
||||
model,
|
||||
});
|
||||
createSessionLatencies.push(performance.now() - createSessionStartedAt);
|
||||
|
||||
await client.sendWorkbenchMessage(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
taskId: created.taskId,
|
||||
tabId: createdSession.tabId,
|
||||
text: `Run pwd in the repo, then reply with exactly: ${expectedReply}`,
|
||||
attachments: [],
|
||||
|
|
@ -281,12 +281,12 @@ describe("e2e(client): workbench load", () => {
|
|||
|
||||
const messageStartedAt = performance.now();
|
||||
const withReply = await poll(
|
||||
`handoff ${runId} session ${sessionIndex} reply`,
|
||||
`task ${runId} session ${sessionIndex} reply`,
|
||||
10 * 60_000,
|
||||
pollIntervalMs,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => {
|
||||
const tab = findTab(handoff, createdSession.tabId);
|
||||
async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
|
||||
(task) => {
|
||||
const tab = findTab(task, createdSession.tabId);
|
||||
return tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply);
|
||||
},
|
||||
);
|
||||
|
|
@ -300,7 +300,7 @@ describe("e2e(client): workbench load", () => {
|
|||
console.info(
|
||||
"[workbench-load-snapshot]",
|
||||
JSON.stringify({
|
||||
handoffIndex: handoffIndex + 1,
|
||||
taskIndex: taskIndex + 1,
|
||||
...snapshotMetrics,
|
||||
}),
|
||||
);
|
||||
|
|
@ -309,9 +309,9 @@ describe("e2e(client): workbench load", () => {
|
|||
const firstSnapshot = snapshotSeries[0]!;
|
||||
const lastSnapshot = snapshotSeries[snapshotSeries.length - 1]!;
|
||||
const summary = {
|
||||
handoffCount,
|
||||
taskCount,
|
||||
extraSessionCount,
|
||||
createHandoffAvgMs: Math.round(average(createHandoffLatencies)),
|
||||
createTaskAvgMs: Math.round(average(createTaskLatencies)),
|
||||
provisionAvgMs: Math.round(average(provisionLatencies)),
|
||||
createSessionAvgMs: Math.round(average(createSessionLatencies)),
|
||||
messageRoundTripAvgMs: Math.round(average(messageRoundTripLatencies)),
|
||||
|
|
@ -326,10 +326,10 @@ describe("e2e(client): workbench load", () => {
|
|||
|
||||
console.info("[workbench-load-summary]", JSON.stringify(summary));
|
||||
|
||||
expect(createHandoffLatencies.length).toBe(handoffCount);
|
||||
expect(provisionLatencies.length).toBe(handoffCount);
|
||||
expect(createSessionLatencies.length).toBe(handoffCount * extraSessionCount);
|
||||
expect(messageRoundTripLatencies.length).toBe(handoffCount * extraSessionCount);
|
||||
expect(createTaskLatencies.length).toBe(taskCount);
|
||||
expect(provisionLatencies.length).toBe(taskCount);
|
||||
expect(createSessionLatencies.length).toBe(taskCount * extraSessionCount);
|
||||
expect(messageRoundTripLatencies.length).toBe(taskCount * extraSessionCount);
|
||||
},
|
||||
);
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue