mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-17 02:04:13 +00:00
Stabilize SDK mode integration test
This commit is contained in:
parent
24e99ac5e7
commit
ec8b6afea9
274 changed files with 5412 additions and 7893 deletions
|
|
@ -33,13 +33,7 @@ async function sleep(ms: number): Promise<void> {
|
|||
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function poll<T>(
|
||||
label: string,
|
||||
timeoutMs: number,
|
||||
intervalMs: number,
|
||||
fn: () => Promise<T>,
|
||||
isDone: (value: T) => boolean
|
||||
): Promise<T> {
|
||||
async function poll<T>(label: string, timeoutMs: number, intervalMs: number, fn: () => Promise<T>, isDone: (value: T) => boolean): Promise<T> {
|
||||
const start = Date.now();
|
||||
let last: T;
|
||||
for (;;) {
|
||||
|
|
@ -75,11 +69,7 @@ async function githubApi(token: string, path: string, init?: RequestInit): Promi
|
|||
});
|
||||
}
|
||||
|
||||
async function ensureRemoteBranchExists(
|
||||
token: string,
|
||||
fullName: string,
|
||||
branchName: string
|
||||
): Promise<void> {
|
||||
async function ensureRemoteBranchExists(token: string, fullName: string, branchName: string): Promise<void> {
|
||||
const repoRes = await githubApi(token, `repos/${fullName}`, { method: "GET" });
|
||||
if (!repoRes.ok) {
|
||||
throw new Error(`GitHub repo lookup failed: ${repoRes.status} ${await repoRes.text()}`);
|
||||
|
|
@ -90,11 +80,7 @@ async function ensureRemoteBranchExists(
|
|||
throw new Error(`GitHub repo default branch is missing for ${fullName}`);
|
||||
}
|
||||
|
||||
const defaultRefRes = await githubApi(
|
||||
token,
|
||||
`repos/${fullName}/git/ref/heads/${encodeURIComponent(defaultBranch)}`,
|
||||
{ method: "GET" }
|
||||
);
|
||||
const defaultRefRes = await githubApi(token, `repos/${fullName}/git/ref/heads/${encodeURIComponent(defaultBranch)}`, { method: "GET" });
|
||||
if (!defaultRefRes.ok) {
|
||||
throw new Error(`GitHub default ref lookup failed: ${defaultRefRes.status} ${await defaultRefRes.text()}`);
|
||||
}
|
||||
|
|
@ -120,78 +106,69 @@ async function ensureRemoteBranchExists(
|
|||
}
|
||||
|
||||
describe("e2e(client): full integration stack workflow", () => {
|
||||
it.skipIf(!RUN_FULL_E2E)(
|
||||
"adds repo, loads branch graph, and executes a stack restack action",
|
||||
{ timeout: 8 * 60_000 },
|
||||
async () => {
|
||||
const endpoint =
|
||||
process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
|
||||
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const githubToken = requiredEnv("GITHUB_TOKEN");
|
||||
const { fullName } = parseGithubRepo(repoRemote);
|
||||
const normalizedRepoRemote = `https://github.com/${fullName}.git`;
|
||||
const seededBranch = `e2e/full-seed-${Date.now().toString(36)}-${randomUUID().slice(0, 8)}`;
|
||||
it.skipIf(!RUN_FULL_E2E)("adds repo, loads branch graph, and executes a stack restack action", { timeout: 8 * 60_000 }, async () => {
|
||||
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
|
||||
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const githubToken = requiredEnv("GITHUB_TOKEN");
|
||||
const { fullName } = parseGithubRepo(repoRemote);
|
||||
const normalizedRepoRemote = `https://github.com/${fullName}.git`;
|
||||
const seededBranch = `e2e/full-seed-${Date.now().toString(36)}-${randomUUID().slice(0, 8)}`;
|
||||
|
||||
const client = createBackendClient({
|
||||
endpoint,
|
||||
defaultWorkspaceId: workspaceId,
|
||||
});
|
||||
const client = createBackendClient({
|
||||
endpoint,
|
||||
defaultWorkspaceId: workspaceId,
|
||||
});
|
||||
|
||||
try {
|
||||
await ensureRemoteBranchExists(githubToken, fullName, seededBranch);
|
||||
try {
|
||||
await ensureRemoteBranchExists(githubToken, fullName, seededBranch);
|
||||
|
||||
const repo = await client.addRepo(workspaceId, repoRemote);
|
||||
expect(repo.remoteUrl).toBe(normalizedRepoRemote);
|
||||
const repo = await client.addRepo(workspaceId, repoRemote);
|
||||
expect(repo.remoteUrl).toBe(normalizedRepoRemote);
|
||||
|
||||
const overview = await poll<RepoOverview>(
|
||||
"repo overview includes seeded branch",
|
||||
90_000,
|
||||
1_000,
|
||||
async () => client.getRepoOverview(workspaceId, repo.repoId),
|
||||
(value) => value.branches.some((row) => row.branchName === seededBranch)
|
||||
const overview = await poll<RepoOverview>(
|
||||
"repo overview includes seeded branch",
|
||||
90_000,
|
||||
1_000,
|
||||
async () => client.getRepoOverview(workspaceId, repo.repoId),
|
||||
(value) => value.branches.some((row) => row.branchName === seededBranch),
|
||||
);
|
||||
|
||||
if (!overview.stackAvailable) {
|
||||
throw new Error(
|
||||
"git-spice is unavailable for this repo during full integration e2e; set HF_GIT_SPICE_BIN or install git-spice in the backend container",
|
||||
);
|
||||
|
||||
if (!overview.stackAvailable) {
|
||||
throw new Error(
|
||||
"git-spice is unavailable for this repo during full integration e2e; set HF_GIT_SPICE_BIN or install git-spice in the backend container"
|
||||
);
|
||||
}
|
||||
|
||||
const stackResult = await client.runRepoStackAction({
|
||||
workspaceId,
|
||||
repoId: repo.repoId,
|
||||
action: "restack_repo",
|
||||
});
|
||||
expect(stackResult.executed).toBe(true);
|
||||
expect(stackResult.action).toBe("restack_repo");
|
||||
|
||||
await poll<HistoryEvent[]>(
|
||||
"repo stack action history event",
|
||||
60_000,
|
||||
1_000,
|
||||
async () => client.listHistory({ workspaceId, limit: 200 }),
|
||||
(events) =>
|
||||
events.some((event) => {
|
||||
if (event.kind !== "repo.stack_action") {
|
||||
return false;
|
||||
}
|
||||
const payload = parseHistoryPayload(event);
|
||||
return payload.action === "restack_repo";
|
||||
})
|
||||
);
|
||||
|
||||
const postActionOverview = await client.getRepoOverview(workspaceId, repo.repoId);
|
||||
const seededRow = postActionOverview.branches.find((row) => row.branchName === seededBranch);
|
||||
expect(Boolean(seededRow)).toBe(true);
|
||||
expect(postActionOverview.fetchedAt).toBeGreaterThan(overview.fetchedAt);
|
||||
} finally {
|
||||
await githubApi(
|
||||
githubToken,
|
||||
`repos/${fullName}/git/refs/heads/${encodeURIComponent(seededBranch)}`,
|
||||
{ method: "DELETE" }
|
||||
).catch(() => {});
|
||||
}
|
||||
|
||||
const stackResult = await client.runRepoStackAction({
|
||||
workspaceId,
|
||||
repoId: repo.repoId,
|
||||
action: "restack_repo",
|
||||
});
|
||||
expect(stackResult.executed).toBe(true);
|
||||
expect(stackResult.action).toBe("restack_repo");
|
||||
|
||||
await poll<HistoryEvent[]>(
|
||||
"repo stack action history event",
|
||||
60_000,
|
||||
1_000,
|
||||
async () => client.listHistory({ workspaceId, limit: 200 }),
|
||||
(events) =>
|
||||
events.some((event) => {
|
||||
if (event.kind !== "repo.stack_action") {
|
||||
return false;
|
||||
}
|
||||
const payload = parseHistoryPayload(event);
|
||||
return payload.action === "restack_repo";
|
||||
}),
|
||||
);
|
||||
|
||||
const postActionOverview = await client.getRepoOverview(workspaceId, repo.repoId);
|
||||
const seededRow = postActionOverview.branches.find((row) => row.branchName === seededBranch);
|
||||
expect(Boolean(seededRow)).toBe(true);
|
||||
expect(postActionOverview.fetchedAt).toBeGreaterThan(overview.fetchedAt);
|
||||
} finally {
|
||||
await githubApi(githubToken, `repos/${fullName}/git/refs/heads/${encodeURIComponent(seededBranch)}`, { method: "DELETE" }).catch(() => {});
|
||||
}
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ async function poll<T>(
|
|||
intervalMs: number,
|
||||
fn: () => Promise<T>,
|
||||
isDone: (value: T) => boolean,
|
||||
onTick?: (value: T) => void
|
||||
onTick?: (value: T) => void,
|
||||
): Promise<T> {
|
||||
const start = Date.now();
|
||||
let last: T;
|
||||
|
|
@ -117,7 +117,7 @@ async function debugDump(client: ReturnType<typeof createBackendClient>, workspa
|
|||
prSubmitted: handoff.prSubmitted,
|
||||
},
|
||||
null,
|
||||
2
|
||||
2,
|
||||
),
|
||||
"=== history (most recent first) ===",
|
||||
historySummary || "(none)",
|
||||
|
|
@ -143,209 +143,190 @@ async function githubApi(token: string, path: string, init?: RequestInit): Promi
|
|||
}
|
||||
|
||||
describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
|
||||
it.skipIf(!RUN_E2E)(
|
||||
"creates a handoff, waits for agent to implement, and opens a PR",
|
||||
{ timeout: 15 * 60_000 },
|
||||
async () => {
|
||||
const endpoint =
|
||||
process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
|
||||
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const githubToken = requiredEnv("GITHUB_TOKEN");
|
||||
it.skipIf(!RUN_E2E)("creates a handoff, waits for agent to implement, and opens a PR", { timeout: 15 * 60_000 }, async () => {
|
||||
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
|
||||
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const githubToken = requiredEnv("GITHUB_TOKEN");
|
||||
|
||||
const { fullName } = parseGithubRepo(repoRemote);
|
||||
const runId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
const expectedFile = `e2e/${runId}.txt`;
|
||||
const { fullName } = parseGithubRepo(repoRemote);
|
||||
const runId = `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`;
|
||||
const expectedFile = `e2e/${runId}.txt`;
|
||||
|
||||
const client = createBackendClient({
|
||||
endpoint,
|
||||
defaultWorkspaceId: workspaceId,
|
||||
const client = createBackendClient({
|
||||
endpoint,
|
||||
defaultWorkspaceId: workspaceId,
|
||||
});
|
||||
|
||||
const repo = await client.addRepo(workspaceId, repoRemote);
|
||||
|
||||
const created = await client.createHandoff({
|
||||
workspaceId,
|
||||
repoId: repo.repoId,
|
||||
task: [
|
||||
"E2E test task:",
|
||||
`1. Create a new file at ${expectedFile} containing the single line: ${runId}`,
|
||||
"2. git add the file",
|
||||
`3. git commit -m \"test(e2e): ${runId}\"`,
|
||||
"4. git push the branch to origin",
|
||||
"5. Stop when done (agent should go idle).",
|
||||
].join("\n"),
|
||||
providerId: "daytona",
|
||||
explicitTitle: `test(e2e): ${runId}`,
|
||||
explicitBranchName: `e2e/${runId}`,
|
||||
});
|
||||
|
||||
let prNumber: number | null = null;
|
||||
let branchName: string | null = null;
|
||||
let sandboxId: string | null = null;
|
||||
let sessionId: string | null = null;
|
||||
let lastStatus: string | null = null;
|
||||
|
||||
try {
|
||||
const namedAndProvisioned = await poll<HandoffRecord>(
|
||||
"handoff naming + sandbox provisioning",
|
||||
// Cold Daytona snapshot/image preparation can exceed 5 minutes on first run.
|
||||
8 * 60_000,
|
||||
1_000,
|
||||
async () => client.getHandoff(workspaceId, created.handoffId),
|
||||
(h) => Boolean(h.title && h.branchName && h.activeSandboxId),
|
||||
(h) => {
|
||||
if (h.status !== lastStatus) {
|
||||
lastStatus = h.status;
|
||||
}
|
||||
if (h.status === "error") {
|
||||
throw new Error("handoff entered error state during provisioning");
|
||||
}
|
||||
},
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
const repo = await client.addRepo(workspaceId, repoRemote);
|
||||
branchName = namedAndProvisioned.branchName!;
|
||||
sandboxId = namedAndProvisioned.activeSandboxId!;
|
||||
|
||||
const created = await client.createHandoff({
|
||||
workspaceId,
|
||||
repoId: repo.repoId,
|
||||
task: [
|
||||
"E2E test task:",
|
||||
`1. Create a new file at ${expectedFile} containing the single line: ${runId}`,
|
||||
"2. git add the file",
|
||||
`3. git commit -m \"test(e2e): ${runId}\"`,
|
||||
"4. git push the branch to origin",
|
||||
"5. Stop when done (agent should go idle).",
|
||||
].join("\n"),
|
||||
providerId: "daytona",
|
||||
explicitTitle: `test(e2e): ${runId}`,
|
||||
explicitBranchName: `e2e/${runId}`,
|
||||
const withSession = await poll<HandoffRecord>(
|
||||
"handoff to create active session",
|
||||
3 * 60_000,
|
||||
1_500,
|
||||
async () => client.getHandoff(workspaceId, created.handoffId),
|
||||
(h) => Boolean(h.activeSessionId),
|
||||
(h) => {
|
||||
if (h.status === "error") {
|
||||
throw new Error("handoff entered error state while waiting for active session");
|
||||
}
|
||||
},
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
let prNumber: number | null = null;
|
||||
let branchName: string | null = null;
|
||||
let sandboxId: string | null = null;
|
||||
let sessionId: string | null = null;
|
||||
let lastStatus: string | null = null;
|
||||
sessionId = withSession.activeSessionId!;
|
||||
|
||||
try {
|
||||
const namedAndProvisioned = await poll<HandoffRecord>(
|
||||
"handoff naming + sandbox provisioning",
|
||||
// Cold Daytona snapshot/image preparation can exceed 5 minutes on first run.
|
||||
8 * 60_000,
|
||||
1_000,
|
||||
async () => client.getHandoff(workspaceId, created.handoffId),
|
||||
(h) => Boolean(h.title && h.branchName && h.activeSandboxId),
|
||||
(h) => {
|
||||
if (h.status !== lastStatus) {
|
||||
lastStatus = h.status;
|
||||
}
|
||||
if (h.status === "error") {
|
||||
throw new Error("handoff entered error state during provisioning");
|
||||
}
|
||||
await poll<{ id: string }[]>(
|
||||
"session transcript bootstrap events",
|
||||
2 * 60_000,
|
||||
2_000,
|
||||
async () =>
|
||||
(
|
||||
await client.listSandboxSessionEvents(workspaceId, withSession.providerId, sandboxId!, {
|
||||
sessionId: sessionId!,
|
||||
limit: 40,
|
||||
})
|
||||
).items,
|
||||
(events) => events.length > 0,
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
await poll<HandoffRecord>(
|
||||
"handoff to reach idle state",
|
||||
8 * 60_000,
|
||||
2_000,
|
||||
async () => client.getHandoff(workspaceId, created.handoffId),
|
||||
(h) => h.status === "idle",
|
||||
(h) => {
|
||||
if (h.status === "error") {
|
||||
throw new Error("handoff entered error state while waiting for idle");
|
||||
}
|
||||
).catch(async (err) => {
|
||||
},
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
const prCreatedEvent = await poll<HistoryEvent[]>(
|
||||
"PR creation history event",
|
||||
3 * 60_000,
|
||||
2_000,
|
||||
async () => client.listHistory({ workspaceId, handoffId: created.handoffId, limit: 200 }),
|
||||
(events) => events.some((e) => e.kind === "handoff.pr_created"),
|
||||
)
|
||||
.catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
})
|
||||
.then((events) => events.find((e) => e.kind === "handoff.pr_created")!);
|
||||
|
||||
branchName = namedAndProvisioned.branchName!;
|
||||
sandboxId = namedAndProvisioned.activeSandboxId!;
|
||||
const payload = parseHistoryPayload(prCreatedEvent);
|
||||
prNumber = Number(payload.prNumber);
|
||||
const prUrl = String(payload.prUrl ?? "");
|
||||
|
||||
const withSession = await poll<HandoffRecord>(
|
||||
"handoff to create active session",
|
||||
3 * 60_000,
|
||||
1_500,
|
||||
async () => client.getHandoff(workspaceId, created.handoffId),
|
||||
(h) => Boolean(h.activeSessionId),
|
||||
(h) => {
|
||||
if (h.status === "error") {
|
||||
throw new Error("handoff entered error state while waiting for active session");
|
||||
}
|
||||
}
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
expect(prNumber).toBeGreaterThan(0);
|
||||
expect(prUrl).toContain("/pull/");
|
||||
|
||||
sessionId = withSession.activeSessionId!;
|
||||
const prFilesRes = await githubApi(githubToken, `repos/${fullName}/pulls/${prNumber}/files?per_page=100`, { method: "GET" });
|
||||
if (!prFilesRes.ok) {
|
||||
const body = await prFilesRes.text();
|
||||
throw new Error(`GitHub PR files request failed: ${prFilesRes.status} ${body}`);
|
||||
}
|
||||
const prFiles = (await prFilesRes.json()) as Array<{ filename: string }>;
|
||||
expect(prFiles.some((f) => f.filename === expectedFile)).toBe(true);
|
||||
|
||||
await poll<{ id: string }[]>(
|
||||
"session transcript bootstrap events",
|
||||
// Close the handoff and assert the sandbox is released (stopped).
|
||||
await client.runAction(workspaceId, created.handoffId, "archive");
|
||||
|
||||
await poll<HandoffRecord>(
|
||||
"handoff to become archived (session released)",
|
||||
60_000,
|
||||
1_000,
|
||||
async () => client.getHandoff(workspaceId, created.handoffId),
|
||||
(h) => h.status === "archived" && h.activeSessionId === null,
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
if (sandboxId) {
|
||||
await poll<{ providerId: string; sandboxId: string; state: string; at: number }>(
|
||||
"daytona sandbox to stop",
|
||||
2 * 60_000,
|
||||
2_000,
|
||||
async () =>
|
||||
(
|
||||
await client.listSandboxSessionEvents(workspaceId, withSession.providerId, sandboxId!, {
|
||||
sessionId: sessionId!,
|
||||
limit: 40,
|
||||
})
|
||||
).items,
|
||||
(events) => events.length > 0
|
||||
async () => client.sandboxProviderState(workspaceId, "daytona", sandboxId!),
|
||||
(s) => {
|
||||
const st = String(s.state).toLowerCase();
|
||||
return st.includes("stopped") || st.includes("suspended") || st.includes("paused");
|
||||
},
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
const state = await client.sandboxProviderState(workspaceId, "daytona", sandboxId!).catch(() => null);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n` + `sandbox state: ${state ? state.state : "unknown"}\n` + `${dump}`);
|
||||
});
|
||||
}
|
||||
} finally {
|
||||
if (prNumber && Number.isFinite(prNumber)) {
|
||||
await githubApi(githubToken, `repos/${fullName}/pulls/${prNumber}`, {
|
||||
method: "PATCH",
|
||||
body: JSON.stringify({ state: "closed" }),
|
||||
headers: { "Content-Type": "application/json" },
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
await poll<HandoffRecord>(
|
||||
"handoff to reach idle state",
|
||||
8 * 60_000,
|
||||
2_000,
|
||||
async () => client.getHandoff(workspaceId, created.handoffId),
|
||||
(h) => h.status === "idle",
|
||||
(h) => {
|
||||
if (h.status === "error") {
|
||||
throw new Error("handoff entered error state while waiting for idle");
|
||||
}
|
||||
}
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
const prCreatedEvent = await poll<HistoryEvent[]>(
|
||||
"PR creation history event",
|
||||
3 * 60_000,
|
||||
2_000,
|
||||
async () => client.listHistory({ workspaceId, handoffId: created.handoffId, limit: 200 }),
|
||||
(events) => events.some((e) => e.kind === "handoff.pr_created")
|
||||
)
|
||||
.catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
})
|
||||
.then((events) => events.find((e) => e.kind === "handoff.pr_created")!);
|
||||
|
||||
const payload = parseHistoryPayload(prCreatedEvent);
|
||||
prNumber = Number(payload.prNumber);
|
||||
const prUrl = String(payload.prUrl ?? "");
|
||||
|
||||
expect(prNumber).toBeGreaterThan(0);
|
||||
expect(prUrl).toContain("/pull/");
|
||||
|
||||
const prFilesRes = await githubApi(
|
||||
githubToken,
|
||||
`repos/${fullName}/pulls/${prNumber}/files?per_page=100`,
|
||||
{ method: "GET" }
|
||||
);
|
||||
if (!prFilesRes.ok) {
|
||||
const body = await prFilesRes.text();
|
||||
throw new Error(`GitHub PR files request failed: ${prFilesRes.status} ${body}`);
|
||||
}
|
||||
const prFiles = (await prFilesRes.json()) as Array<{ filename: string }>;
|
||||
expect(prFiles.some((f) => f.filename === expectedFile)).toBe(true);
|
||||
|
||||
// Close the handoff and assert the sandbox is released (stopped).
|
||||
await client.runAction(workspaceId, created.handoffId, "archive");
|
||||
|
||||
await poll<HandoffRecord>(
|
||||
"handoff to become archived (session released)",
|
||||
60_000,
|
||||
1_000,
|
||||
async () => client.getHandoff(workspaceId, created.handoffId),
|
||||
(h) => h.status === "archived" && h.activeSessionId === null
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
throw new Error(`${err instanceof Error ? err.message : String(err)}\n${dump}`);
|
||||
});
|
||||
|
||||
if (sandboxId) {
|
||||
await poll<{ providerId: string; sandboxId: string; state: string; at: number }>(
|
||||
"daytona sandbox to stop",
|
||||
2 * 60_000,
|
||||
2_000,
|
||||
async () => client.sandboxProviderState(workspaceId, "daytona", sandboxId!),
|
||||
(s) => {
|
||||
const st = String(s.state).toLowerCase();
|
||||
return st.includes("stopped") || st.includes("suspended") || st.includes("paused");
|
||||
}
|
||||
).catch(async (err) => {
|
||||
const dump = await debugDump(client, workspaceId, created.handoffId);
|
||||
const state = await client
|
||||
.sandboxProviderState(workspaceId, "daytona", sandboxId!)
|
||||
.catch(() => null);
|
||||
throw new Error(
|
||||
`${err instanceof Error ? err.message : String(err)}\n` +
|
||||
`sandbox state: ${state ? state.state : "unknown"}\n` +
|
||||
`${dump}`
|
||||
);
|
||||
});
|
||||
}
|
||||
} finally {
|
||||
if (prNumber && Number.isFinite(prNumber)) {
|
||||
await githubApi(githubToken, `repos/${fullName}/pulls/${prNumber}`, {
|
||||
method: "PATCH",
|
||||
body: JSON.stringify({ state: "closed" }),
|
||||
headers: { "Content-Type": "application/json" },
|
||||
}).catch(() => {});
|
||||
}
|
||||
|
||||
if (branchName) {
|
||||
await githubApi(
|
||||
githubToken,
|
||||
`repos/${fullName}/git/refs/heads/${encodeURIComponent(branchName)}`,
|
||||
{ method: "DELETE" }
|
||||
).catch(() => {});
|
||||
}
|
||||
if (branchName) {
|
||||
await githubApi(githubToken, `repos/${fullName}/git/refs/heads/${encodeURIComponent(branchName)}`, { method: "DELETE" }).catch(() => {});
|
||||
}
|
||||
}
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,13 +1,7 @@
|
|||
import { execFile } from "node:child_process";
|
||||
import { promisify } from "node:util";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import type {
|
||||
HandoffWorkbenchSnapshot,
|
||||
WorkbenchAgentTab,
|
||||
WorkbenchHandoff,
|
||||
WorkbenchModelId,
|
||||
WorkbenchTranscriptEvent,
|
||||
} from "@openhandoff/shared";
|
||||
import type { HandoffWorkbenchSnapshot, WorkbenchAgentTab, WorkbenchHandoff, WorkbenchModelId, WorkbenchTranscriptEvent } from "@openhandoff/shared";
|
||||
import { createBackendClient } from "../../src/backend-client.js";
|
||||
|
||||
const RUN_WORKBENCH_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_E2E === "1";
|
||||
|
|
@ -48,13 +42,7 @@ async function seedSandboxFile(workspaceId: string, handoffId: string, filePath:
|
|||
await execFileAsync("docker", ["exec", "openhandoff-backend-1", "bash", "-lc", script]);
|
||||
}
|
||||
|
||||
async function poll<T>(
|
||||
label: string,
|
||||
timeoutMs: number,
|
||||
intervalMs: number,
|
||||
fn: () => Promise<T>,
|
||||
isDone: (value: T) => boolean,
|
||||
): Promise<T> {
|
||||
async function poll<T>(label: string, timeoutMs: number, intervalMs: number, fn: () => Promise<T>, isDone: (value: T) => boolean): Promise<T> {
|
||||
const startedAt = Date.now();
|
||||
let lastValue: T;
|
||||
|
||||
|
|
@ -147,10 +135,7 @@ function extractEventText(event: WorkbenchTranscriptEvent): string {
|
|||
return JSON.stringify(payload);
|
||||
}
|
||||
|
||||
function transcriptIncludesAgentText(
|
||||
transcript: WorkbenchTranscriptEvent[],
|
||||
expectedText: string,
|
||||
): boolean {
|
||||
function transcriptIncludesAgentText(transcript: WorkbenchTranscriptEvent[], expectedText: string): boolean {
|
||||
return transcript
|
||||
.filter((event) => event.sender === "agent")
|
||||
.map((event) => extractEventText(event))
|
||||
|
|
@ -159,176 +144,164 @@ function transcriptIncludesAgentText(
|
|||
}
|
||||
|
||||
describe("e2e(client): workbench flows", () => {
|
||||
it.skipIf(!RUN_WORKBENCH_E2E)(
|
||||
"creates a handoff, adds sessions, exchanges messages, and manages workbench state",
|
||||
{ timeout: 20 * 60_000 },
|
||||
async () => {
|
||||
const endpoint =
|
||||
process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
|
||||
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
|
||||
const runId = `wb-${Date.now().toString(36)}`;
|
||||
const expectedFile = `${runId}.txt`;
|
||||
const expectedInitialReply = `WORKBENCH_READY_${runId}`;
|
||||
const expectedReply = `WORKBENCH_ACK_${runId}`;
|
||||
it.skipIf(!RUN_WORKBENCH_E2E)("creates a handoff, adds sessions, exchanges messages, and manages workbench state", { timeout: 20 * 60_000 }, async () => {
|
||||
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
|
||||
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
|
||||
const runId = `wb-${Date.now().toString(36)}`;
|
||||
const expectedFile = `${runId}.txt`;
|
||||
const expectedInitialReply = `WORKBENCH_READY_${runId}`;
|
||||
const expectedReply = `WORKBENCH_ACK_${runId}`;
|
||||
|
||||
const client = createBackendClient({
|
||||
endpoint,
|
||||
defaultWorkspaceId: workspaceId,
|
||||
});
|
||||
const client = createBackendClient({
|
||||
endpoint,
|
||||
defaultWorkspaceId: workspaceId,
|
||||
});
|
||||
|
||||
const repo = await client.addRepo(workspaceId, repoRemote);
|
||||
const created = await client.createWorkbenchHandoff(workspaceId, {
|
||||
repoId: repo.repoId,
|
||||
title: `Workbench E2E ${runId}`,
|
||||
branch: `e2e/${runId}`,
|
||||
model,
|
||||
task: `Reply with exactly: ${expectedInitialReply}`,
|
||||
});
|
||||
const repo = await client.addRepo(workspaceId, repoRemote);
|
||||
const created = await client.createWorkbenchHandoff(workspaceId, {
|
||||
repoId: repo.repoId,
|
||||
title: `Workbench E2E ${runId}`,
|
||||
branch: `e2e/${runId}`,
|
||||
model,
|
||||
task: `Reply with exactly: ${expectedInitialReply}`,
|
||||
});
|
||||
|
||||
const provisioned = await poll(
|
||||
"handoff provisioning",
|
||||
12 * 60_000,
|
||||
2_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => handoff.branch === `e2e/${runId}` && handoff.tabs.length > 0,
|
||||
);
|
||||
const provisioned = await poll(
|
||||
"handoff provisioning",
|
||||
12 * 60_000,
|
||||
2_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => handoff.branch === `e2e/${runId}` && handoff.tabs.length > 0,
|
||||
);
|
||||
|
||||
const primaryTab = provisioned.tabs[0]!;
|
||||
const primaryTab = provisioned.tabs[0]!;
|
||||
|
||||
const initialCompleted = await poll(
|
||||
"initial agent response",
|
||||
12 * 60_000,
|
||||
2_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => {
|
||||
const tab = findTab(handoff, primaryTab.id);
|
||||
return (
|
||||
handoff.status === "idle" &&
|
||||
tab.status === "idle" &&
|
||||
transcriptIncludesAgentText(tab.transcript, expectedInitialReply)
|
||||
);
|
||||
const initialCompleted = await poll(
|
||||
"initial agent response",
|
||||
12 * 60_000,
|
||||
2_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => {
|
||||
const tab = findTab(handoff, primaryTab.id);
|
||||
return handoff.status === "idle" && tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedInitialReply);
|
||||
},
|
||||
);
|
||||
|
||||
expect(findTab(initialCompleted, primaryTab.id).sessionId).toBeTruthy();
|
||||
expect(transcriptIncludesAgentText(findTab(initialCompleted, primaryTab.id).transcript, expectedInitialReply)).toBe(true);
|
||||
|
||||
await seedSandboxFile(workspaceId, created.handoffId, expectedFile, runId);
|
||||
|
||||
const fileSeeded = await poll(
|
||||
"seeded sandbox file reflected in workbench",
|
||||
30_000,
|
||||
1_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => handoff.fileChanges.some((file) => file.path === expectedFile),
|
||||
);
|
||||
expect(fileSeeded.fileChanges.some((file) => file.path === expectedFile)).toBe(true);
|
||||
|
||||
await client.renameWorkbenchHandoff(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
value: `Workbench E2E ${runId} Renamed`,
|
||||
});
|
||||
await client.renameWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: primaryTab.id,
|
||||
title: "Primary Session",
|
||||
});
|
||||
|
||||
const secondTab = await client.createWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
model,
|
||||
});
|
||||
|
||||
await client.renameWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
title: "Follow-up Session",
|
||||
});
|
||||
|
||||
await client.updateWorkbenchDraft(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
text: `Reply with exactly: ${expectedReply}`,
|
||||
attachments: [
|
||||
{
|
||||
id: `${expectedFile}:1`,
|
||||
filePath: expectedFile,
|
||||
lineNumber: 1,
|
||||
lineContent: runId,
|
||||
},
|
||||
);
|
||||
],
|
||||
});
|
||||
|
||||
expect(findTab(initialCompleted, primaryTab.id).sessionId).toBeTruthy();
|
||||
expect(transcriptIncludesAgentText(findTab(initialCompleted, primaryTab.id).transcript, expectedInitialReply)).toBe(true);
|
||||
const drafted = findHandoff(await client.getWorkbench(workspaceId), created.handoffId);
|
||||
expect(findTab(drafted, secondTab.tabId).draft.text).toContain(expectedReply);
|
||||
expect(findTab(drafted, secondTab.tabId).draft.attachments).toHaveLength(1);
|
||||
|
||||
await seedSandboxFile(workspaceId, created.handoffId, expectedFile, runId);
|
||||
await client.sendWorkbenchMessage(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
text: `Reply with exactly: ${expectedReply}`,
|
||||
attachments: [],
|
||||
});
|
||||
|
||||
const fileSeeded = await poll(
|
||||
"seeded sandbox file reflected in workbench",
|
||||
30_000,
|
||||
1_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => handoff.fileChanges.some((file) => file.path === expectedFile),
|
||||
);
|
||||
expect(fileSeeded.fileChanges.some((file) => file.path === expectedFile)).toBe(true);
|
||||
const withSecondReply = await poll(
|
||||
"follow-up session response",
|
||||
10 * 60_000,
|
||||
2_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => {
|
||||
const tab = findTab(handoff, secondTab.tabId);
|
||||
return tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply);
|
||||
},
|
||||
);
|
||||
|
||||
await client.renameWorkbenchHandoff(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
value: `Workbench E2E ${runId} Renamed`,
|
||||
});
|
||||
await client.renameWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: primaryTab.id,
|
||||
title: "Primary Session",
|
||||
});
|
||||
const secondTranscript = findTab(withSecondReply, secondTab.tabId).transcript;
|
||||
expect(transcriptIncludesAgentText(secondTranscript, expectedReply)).toBe(true);
|
||||
|
||||
const secondTab = await client.createWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
model,
|
||||
});
|
||||
await client.setWorkbenchSessionUnread(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
unread: false,
|
||||
});
|
||||
await client.markWorkbenchUnread(workspaceId, { handoffId: created.handoffId });
|
||||
|
||||
await client.renameWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
title: "Follow-up Session",
|
||||
});
|
||||
const unreadSnapshot = findHandoff(await client.getWorkbench(workspaceId), created.handoffId);
|
||||
expect(unreadSnapshot.tabs.some((tab) => tab.unread)).toBe(true);
|
||||
|
||||
await client.updateWorkbenchDraft(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
text: `Reply with exactly: ${expectedReply}`,
|
||||
attachments: [
|
||||
{
|
||||
id: `${expectedFile}:1`,
|
||||
filePath: expectedFile,
|
||||
lineNumber: 1,
|
||||
lineContent: runId,
|
||||
},
|
||||
],
|
||||
});
|
||||
await client.closeWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
});
|
||||
|
||||
const drafted = findHandoff(await client.getWorkbench(workspaceId), created.handoffId);
|
||||
expect(findTab(drafted, secondTab.tabId).draft.text).toContain(expectedReply);
|
||||
expect(findTab(drafted, secondTab.tabId).draft.attachments).toHaveLength(1);
|
||||
const closedSnapshot = await poll(
|
||||
"secondary session closed",
|
||||
30_000,
|
||||
1_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => !handoff.tabs.some((tab) => tab.id === secondTab.tabId),
|
||||
);
|
||||
expect(closedSnapshot.tabs).toHaveLength(1);
|
||||
|
||||
await client.sendWorkbenchMessage(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
text: `Reply with exactly: ${expectedReply}`,
|
||||
attachments: [],
|
||||
});
|
||||
await client.revertWorkbenchFile(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
path: expectedFile,
|
||||
});
|
||||
|
||||
const withSecondReply = await poll(
|
||||
"follow-up session response",
|
||||
10 * 60_000,
|
||||
2_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => {
|
||||
const tab = findTab(handoff, secondTab.tabId);
|
||||
return (
|
||||
tab.status === "idle" &&
|
||||
transcriptIncludesAgentText(tab.transcript, expectedReply)
|
||||
);
|
||||
},
|
||||
);
|
||||
const revertedSnapshot = await poll(
|
||||
"file revert reflected in workbench",
|
||||
30_000,
|
||||
1_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => !handoff.fileChanges.some((file) => file.path === expectedFile),
|
||||
);
|
||||
|
||||
const secondTranscript = findTab(withSecondReply, secondTab.tabId).transcript;
|
||||
expect(transcriptIncludesAgentText(secondTranscript, expectedReply)).toBe(true);
|
||||
|
||||
await client.setWorkbenchSessionUnread(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
unread: false,
|
||||
});
|
||||
await client.markWorkbenchUnread(workspaceId, { handoffId: created.handoffId });
|
||||
|
||||
const unreadSnapshot = findHandoff(await client.getWorkbench(workspaceId), created.handoffId);
|
||||
expect(unreadSnapshot.tabs.some((tab) => tab.unread)).toBe(true);
|
||||
|
||||
await client.closeWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: secondTab.tabId,
|
||||
});
|
||||
|
||||
const closedSnapshot = await poll(
|
||||
"secondary session closed",
|
||||
30_000,
|
||||
1_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => !handoff.tabs.some((tab) => tab.id === secondTab.tabId),
|
||||
);
|
||||
expect(closedSnapshot.tabs).toHaveLength(1);
|
||||
|
||||
await client.revertWorkbenchFile(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
path: expectedFile,
|
||||
});
|
||||
|
||||
const revertedSnapshot = await poll(
|
||||
"file revert reflected in workbench",
|
||||
30_000,
|
||||
1_000,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => !handoff.fileChanges.some((file) => file.path === expectedFile),
|
||||
);
|
||||
|
||||
expect(revertedSnapshot.fileChanges.some((file) => file.path === expectedFile)).toBe(false);
|
||||
expect(revertedSnapshot.title).toBe(`Workbench E2E ${runId} Renamed`);
|
||||
expect(findTab(revertedSnapshot, primaryTab.id).sessionName).toBe("Primary Session");
|
||||
},
|
||||
);
|
||||
expect(revertedSnapshot.fileChanges.some((file) => file.path === expectedFile)).toBe(false);
|
||||
expect(revertedSnapshot.title).toBe(`Workbench E2E ${runId} Renamed`);
|
||||
expect(findTab(revertedSnapshot, primaryTab.id).sessionName).toBe("Primary Session");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,11 +1,5 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import type {
|
||||
HandoffWorkbenchSnapshot,
|
||||
WorkbenchAgentTab,
|
||||
WorkbenchHandoff,
|
||||
WorkbenchModelId,
|
||||
WorkbenchTranscriptEvent,
|
||||
} from "@openhandoff/shared";
|
||||
import type { HandoffWorkbenchSnapshot, WorkbenchAgentTab, WorkbenchHandoff, WorkbenchModelId, WorkbenchTranscriptEvent } from "@openhandoff/shared";
|
||||
import { createBackendClient } from "../../src/backend-client.js";
|
||||
|
||||
const RUN_WORKBENCH_LOAD_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_LOAD_E2E === "1";
|
||||
|
|
@ -44,13 +38,7 @@ async function sleep(ms: number): Promise<void> {
|
|||
await new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async function poll<T>(
|
||||
label: string,
|
||||
timeoutMs: number,
|
||||
intervalMs: number,
|
||||
fn: () => Promise<T>,
|
||||
isDone: (value: T) => boolean,
|
||||
): Promise<T> {
|
||||
async function poll<T>(label: string, timeoutMs: number, intervalMs: number, fn: () => Promise<T>, isDone: (value: T) => boolean): Promise<T> {
|
||||
const startedAt = Date.now();
|
||||
let lastValue: T;
|
||||
|
||||
|
|
@ -174,8 +162,7 @@ async function measureWorkbenchSnapshot(
|
|||
const payloadBytes = Buffer.byteLength(JSON.stringify(finalSnapshot), "utf8");
|
||||
const tabCount = finalSnapshot.handoffs.reduce((sum, handoff) => sum + handoff.tabs.length, 0);
|
||||
const transcriptEventCount = finalSnapshot.handoffs.reduce(
|
||||
(sum, handoff) =>
|
||||
sum + handoff.tabs.reduce((tabSum, tab) => tabSum + tab.transcript.length, 0),
|
||||
(sum, handoff) => sum + handoff.tabs.reduce((tabSum, tab) => tabSum + tab.transcript.length, 0),
|
||||
0,
|
||||
);
|
||||
|
||||
|
|
@ -190,142 +177,133 @@ async function measureWorkbenchSnapshot(
|
|||
}
|
||||
|
||||
describe("e2e(client): workbench load", () => {
|
||||
it.skipIf(!RUN_WORKBENCH_LOAD_E2E)(
|
||||
"runs a simple sequential load profile against the real backend",
|
||||
{ timeout: 30 * 60_000 },
|
||||
async () => {
|
||||
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
|
||||
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
|
||||
const handoffCount = intEnv("HF_LOAD_HANDOFF_COUNT", 3);
|
||||
const extraSessionCount = intEnv("HF_LOAD_EXTRA_SESSION_COUNT", 2);
|
||||
const pollIntervalMs = intEnv("HF_LOAD_POLL_INTERVAL_MS", 2_000);
|
||||
it.skipIf(!RUN_WORKBENCH_LOAD_E2E)("runs a simple sequential load profile against the real backend", { timeout: 30 * 60_000 }, async () => {
|
||||
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
|
||||
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
|
||||
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
|
||||
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
|
||||
const handoffCount = intEnv("HF_LOAD_HANDOFF_COUNT", 3);
|
||||
const extraSessionCount = intEnv("HF_LOAD_EXTRA_SESSION_COUNT", 2);
|
||||
const pollIntervalMs = intEnv("HF_LOAD_POLL_INTERVAL_MS", 2_000);
|
||||
|
||||
const client = createBackendClient({
|
||||
endpoint,
|
||||
defaultWorkspaceId: workspaceId,
|
||||
const client = createBackendClient({
|
||||
endpoint,
|
||||
defaultWorkspaceId: workspaceId,
|
||||
});
|
||||
|
||||
const repo = await client.addRepo(workspaceId, repoRemote);
|
||||
const createHandoffLatencies: number[] = [];
|
||||
const provisionLatencies: number[] = [];
|
||||
const createSessionLatencies: number[] = [];
|
||||
const messageRoundTripLatencies: number[] = [];
|
||||
const snapshotSeries: Array<{
|
||||
handoffCount: number;
|
||||
avgMs: number;
|
||||
maxMs: number;
|
||||
payloadBytes: number;
|
||||
tabCount: number;
|
||||
transcriptEventCount: number;
|
||||
}> = [];
|
||||
|
||||
snapshotSeries.push(await measureWorkbenchSnapshot(client, workspaceId, 2));
|
||||
|
||||
for (let handoffIndex = 0; handoffIndex < handoffCount; handoffIndex += 1) {
|
||||
const runId = `load-${handoffIndex}-${Date.now().toString(36)}`;
|
||||
const initialReply = `LOAD_INIT_${runId}`;
|
||||
|
||||
const createStartedAt = performance.now();
|
||||
const created = await client.createWorkbenchHandoff(workspaceId, {
|
||||
repoId: repo.repoId,
|
||||
title: `Workbench Load ${runId}`,
|
||||
branch: `load/${runId}`,
|
||||
model,
|
||||
task: `Reply with exactly: ${initialReply}`,
|
||||
});
|
||||
createHandoffLatencies.push(performance.now() - createStartedAt);
|
||||
|
||||
const repo = await client.addRepo(workspaceId, repoRemote);
|
||||
const createHandoffLatencies: number[] = [];
|
||||
const provisionLatencies: number[] = [];
|
||||
const createSessionLatencies: number[] = [];
|
||||
const messageRoundTripLatencies: number[] = [];
|
||||
const snapshotSeries: Array<{
|
||||
handoffCount: number;
|
||||
avgMs: number;
|
||||
maxMs: number;
|
||||
payloadBytes: number;
|
||||
tabCount: number;
|
||||
transcriptEventCount: number;
|
||||
}> = [];
|
||||
const provisionStartedAt = performance.now();
|
||||
const provisioned = await poll(
|
||||
`handoff ${runId} provisioning`,
|
||||
12 * 60_000,
|
||||
pollIntervalMs,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => {
|
||||
const tab = handoff.tabs[0];
|
||||
return Boolean(tab && handoff.status === "idle" && tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, initialReply));
|
||||
},
|
||||
);
|
||||
provisionLatencies.push(performance.now() - provisionStartedAt);
|
||||
|
||||
snapshotSeries.push(await measureWorkbenchSnapshot(client, workspaceId, 2));
|
||||
expect(provisioned.tabs.length).toBeGreaterThan(0);
|
||||
const primaryTab = provisioned.tabs[0]!;
|
||||
expect(transcriptIncludesAgentText(primaryTab.transcript, initialReply)).toBe(true);
|
||||
|
||||
for (let handoffIndex = 0; handoffIndex < handoffCount; handoffIndex += 1) {
|
||||
const runId = `load-${handoffIndex}-${Date.now().toString(36)}`;
|
||||
const initialReply = `LOAD_INIT_${runId}`;
|
||||
|
||||
const createStartedAt = performance.now();
|
||||
const created = await client.createWorkbenchHandoff(workspaceId, {
|
||||
repoId: repo.repoId,
|
||||
title: `Workbench Load ${runId}`,
|
||||
branch: `load/${runId}`,
|
||||
for (let sessionIndex = 0; sessionIndex < extraSessionCount; sessionIndex += 1) {
|
||||
const expectedReply = `LOAD_REPLY_${runId}_${sessionIndex}`;
|
||||
const createSessionStartedAt = performance.now();
|
||||
const createdSession = await client.createWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
model,
|
||||
task: `Reply with exactly: ${initialReply}`,
|
||||
});
|
||||
createHandoffLatencies.push(performance.now() - createStartedAt);
|
||||
createSessionLatencies.push(performance.now() - createSessionStartedAt);
|
||||
|
||||
const provisionStartedAt = performance.now();
|
||||
const provisioned = await poll(
|
||||
`handoff ${runId} provisioning`,
|
||||
12 * 60_000,
|
||||
await client.sendWorkbenchMessage(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: createdSession.tabId,
|
||||
text: `Run pwd in the repo, then reply with exactly: ${expectedReply}`,
|
||||
attachments: [],
|
||||
});
|
||||
|
||||
const messageStartedAt = performance.now();
|
||||
const withReply = await poll(
|
||||
`handoff ${runId} session ${sessionIndex} reply`,
|
||||
10 * 60_000,
|
||||
pollIntervalMs,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => {
|
||||
const tab = handoff.tabs[0];
|
||||
return Boolean(
|
||||
tab &&
|
||||
handoff.status === "idle" &&
|
||||
tab.status === "idle" &&
|
||||
transcriptIncludesAgentText(tab.transcript, initialReply),
|
||||
);
|
||||
const tab = findTab(handoff, createdSession.tabId);
|
||||
return tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply);
|
||||
},
|
||||
);
|
||||
provisionLatencies.push(performance.now() - provisionStartedAt);
|
||||
messageRoundTripLatencies.push(performance.now() - messageStartedAt);
|
||||
|
||||
expect(provisioned.tabs.length).toBeGreaterThan(0);
|
||||
const primaryTab = provisioned.tabs[0]!;
|
||||
expect(transcriptIncludesAgentText(primaryTab.transcript, initialReply)).toBe(true);
|
||||
|
||||
for (let sessionIndex = 0; sessionIndex < extraSessionCount; sessionIndex += 1) {
|
||||
const expectedReply = `LOAD_REPLY_${runId}_${sessionIndex}`;
|
||||
const createSessionStartedAt = performance.now();
|
||||
const createdSession = await client.createWorkbenchSession(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
model,
|
||||
});
|
||||
createSessionLatencies.push(performance.now() - createSessionStartedAt);
|
||||
|
||||
await client.sendWorkbenchMessage(workspaceId, {
|
||||
handoffId: created.handoffId,
|
||||
tabId: createdSession.tabId,
|
||||
text: `Run pwd in the repo, then reply with exactly: ${expectedReply}`,
|
||||
attachments: [],
|
||||
});
|
||||
|
||||
const messageStartedAt = performance.now();
|
||||
const withReply = await poll(
|
||||
`handoff ${runId} session ${sessionIndex} reply`,
|
||||
10 * 60_000,
|
||||
pollIntervalMs,
|
||||
async () => findHandoff(await client.getWorkbench(workspaceId), created.handoffId),
|
||||
(handoff) => {
|
||||
const tab = findTab(handoff, createdSession.tabId);
|
||||
return tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply);
|
||||
},
|
||||
);
|
||||
messageRoundTripLatencies.push(performance.now() - messageStartedAt);
|
||||
|
||||
expect(transcriptIncludesAgentText(findTab(withReply, createdSession.tabId).transcript, expectedReply)).toBe(true);
|
||||
}
|
||||
|
||||
const snapshotMetrics = await measureWorkbenchSnapshot(client, workspaceId, 3);
|
||||
snapshotSeries.push(snapshotMetrics);
|
||||
console.info(
|
||||
"[workbench-load-snapshot]",
|
||||
JSON.stringify({
|
||||
handoffIndex: handoffIndex + 1,
|
||||
...snapshotMetrics,
|
||||
}),
|
||||
);
|
||||
expect(transcriptIncludesAgentText(findTab(withReply, createdSession.tabId).transcript, expectedReply)).toBe(true);
|
||||
}
|
||||
|
||||
const firstSnapshot = snapshotSeries[0]!;
|
||||
const lastSnapshot = snapshotSeries[snapshotSeries.length - 1]!;
|
||||
const summary = {
|
||||
handoffCount,
|
||||
extraSessionCount,
|
||||
createHandoffAvgMs: Math.round(average(createHandoffLatencies)),
|
||||
provisionAvgMs: Math.round(average(provisionLatencies)),
|
||||
createSessionAvgMs: Math.round(average(createSessionLatencies)),
|
||||
messageRoundTripAvgMs: Math.round(average(messageRoundTripLatencies)),
|
||||
snapshotReadBaselineAvgMs: firstSnapshot.avgMs,
|
||||
snapshotReadFinalAvgMs: lastSnapshot.avgMs,
|
||||
snapshotReadFinalMaxMs: lastSnapshot.maxMs,
|
||||
snapshotPayloadBaselineBytes: firstSnapshot.payloadBytes,
|
||||
snapshotPayloadFinalBytes: lastSnapshot.payloadBytes,
|
||||
snapshotTabFinalCount: lastSnapshot.tabCount,
|
||||
snapshotTranscriptFinalCount: lastSnapshot.transcriptEventCount,
|
||||
};
|
||||
const snapshotMetrics = await measureWorkbenchSnapshot(client, workspaceId, 3);
|
||||
snapshotSeries.push(snapshotMetrics);
|
||||
console.info(
|
||||
"[workbench-load-snapshot]",
|
||||
JSON.stringify({
|
||||
handoffIndex: handoffIndex + 1,
|
||||
...snapshotMetrics,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
console.info("[workbench-load-summary]", JSON.stringify(summary));
|
||||
const firstSnapshot = snapshotSeries[0]!;
|
||||
const lastSnapshot = snapshotSeries[snapshotSeries.length - 1]!;
|
||||
const summary = {
|
||||
handoffCount,
|
||||
extraSessionCount,
|
||||
createHandoffAvgMs: Math.round(average(createHandoffLatencies)),
|
||||
provisionAvgMs: Math.round(average(provisionLatencies)),
|
||||
createSessionAvgMs: Math.round(average(createSessionLatencies)),
|
||||
messageRoundTripAvgMs: Math.round(average(messageRoundTripLatencies)),
|
||||
snapshotReadBaselineAvgMs: firstSnapshot.avgMs,
|
||||
snapshotReadFinalAvgMs: lastSnapshot.avgMs,
|
||||
snapshotReadFinalMaxMs: lastSnapshot.maxMs,
|
||||
snapshotPayloadBaselineBytes: firstSnapshot.payloadBytes,
|
||||
snapshotPayloadFinalBytes: lastSnapshot.payloadBytes,
|
||||
snapshotTabFinalCount: lastSnapshot.tabCount,
|
||||
snapshotTranscriptFinalCount: lastSnapshot.transcriptEventCount,
|
||||
};
|
||||
|
||||
expect(createHandoffLatencies.length).toBe(handoffCount);
|
||||
expect(provisionLatencies.length).toBe(handoffCount);
|
||||
expect(createSessionLatencies.length).toBe(handoffCount * extraSessionCount);
|
||||
expect(messageRoundTripLatencies.length).toBe(handoffCount * extraSessionCount);
|
||||
},
|
||||
);
|
||||
console.info("[workbench-load-summary]", JSON.stringify(summary));
|
||||
|
||||
expect(createHandoffLatencies.length).toBe(handoffCount);
|
||||
expect(provisionLatencies.length).toBe(handoffCount);
|
||||
expect(createSessionLatencies.length).toBe(handoffCount * extraSessionCount);
|
||||
expect(messageRoundTripLatencies.length).toBe(handoffCount * extraSessionCount);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import {
|
|||
projectKey,
|
||||
projectPrSyncKey,
|
||||
sandboxInstanceKey,
|
||||
workspaceKey
|
||||
workspaceKey,
|
||||
} from "../src/keys.js";
|
||||
|
||||
describe("actor keys", () => {
|
||||
|
|
@ -20,7 +20,7 @@ describe("actor keys", () => {
|
|||
historyKey("default", "repo"),
|
||||
projectPrSyncKey("default", "repo"),
|
||||
projectBranchSyncKey("default", "repo"),
|
||||
handoffStatusSyncKey("default", "repo", "handoff", "sandbox-1", "session-1")
|
||||
handoffStatusSyncKey("default", "repo", "handoff", "sandbox-1", "session-1"),
|
||||
];
|
||||
|
||||
for (const key of keys) {
|
||||
|
|
|
|||
|
|
@ -1,11 +1,6 @@
|
|||
import { describe, expect, it } from "vitest";
|
||||
import type { HandoffRecord } from "@openhandoff/shared";
|
||||
import {
|
||||
filterHandoffs,
|
||||
formatRelativeAge,
|
||||
fuzzyMatch,
|
||||
summarizeHandoffs
|
||||
} from "../src/view-model.js";
|
||||
import { filterHandoffs, formatRelativeAge, fuzzyMatch, summarizeHandoffs } from "../src/view-model.js";
|
||||
|
||||
const sample: HandoffRecord = {
|
||||
workspaceId: "default",
|
||||
|
|
@ -28,8 +23,8 @@ const sample: HandoffRecord = {
|
|||
switchTarget: "daytona://sandbox-1",
|
||||
cwd: null,
|
||||
createdAt: 1,
|
||||
updatedAt: 1
|
||||
}
|
||||
updatedAt: 1,
|
||||
},
|
||||
],
|
||||
agentType: null,
|
||||
prSubmitted: false,
|
||||
|
|
@ -43,7 +38,7 @@ const sample: HandoffRecord = {
|
|||
hasUnpushed: null,
|
||||
parentBranch: null,
|
||||
createdAt: 1,
|
||||
updatedAt: 1
|
||||
updatedAt: 1,
|
||||
};
|
||||
|
||||
describe("search helpers", () => {
|
||||
|
|
@ -60,8 +55,8 @@ describe("search helpers", () => {
|
|||
handoffId: "handoff-2",
|
||||
branchName: "docs/update-intro",
|
||||
title: "Docs Intro Refresh",
|
||||
status: "idle"
|
||||
}
|
||||
status: "idle",
|
||||
},
|
||||
];
|
||||
expect(filterHandoffs(rows, "doc")).toHaveLength(1);
|
||||
expect(filterHandoffs(rows, "h2")).toHaveLength(1);
|
||||
|
|
@ -79,7 +74,7 @@ describe("summary helpers", () => {
|
|||
const rows: HandoffRecord[] = [
|
||||
sample,
|
||||
{ ...sample, handoffId: "handoff-2", status: "idle", providerId: "daytona" },
|
||||
{ ...sample, handoffId: "handoff-3", status: "error", providerId: "daytona" }
|
||||
{ ...sample, handoffId: "handoff-3", status: "error", providerId: "daytona" },
|
||||
];
|
||||
|
||||
const summary = summarizeHandoffs(rows);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue