feat(factory): finish workbench milestone pass

This commit is contained in:
Nathan Flurry 2026-03-09 16:34:27 -07:00
parent bf282199b5
commit 49cba9e6c2
137 changed files with 819 additions and 338 deletions

View file

@ -1,6 +1,6 @@
import { randomUUID } from "node:crypto";
import { describe, expect, it } from "vitest";
import type { HistoryEvent, RepoOverview } from "@openhandoff/shared";
import type { HistoryEvent, RepoOverview } from "@sandbox-agent/factory-shared";
import { createBackendClient } from "../../src/backend-client.js";
const RUN_FULL_E2E = process.env.HF_ENABLE_DAEMON_FULL_E2E === "1";

View file

@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import type { HandoffRecord, HistoryEvent } from "@openhandoff/shared";
import type { HandoffRecord, HistoryEvent } from "@sandbox-agent/factory-shared";
import { createBackendClient } from "../../src/backend-client.js";
const RUN_E2E = process.env.HF_ENABLE_DAEMON_E2E === "1";

View file

@ -1,13 +1,15 @@
import { execFile } from "node:child_process";
import { mkdir, writeFile } from "node:fs/promises";
import { promisify } from "node:util";
import { describe, expect, it } from "vitest";
import type {
HandoffRecord,
HandoffWorkbenchSnapshot,
WorkbenchAgentTab,
WorkbenchHandoff,
WorkbenchModelId,
WorkbenchTranscriptEvent,
} from "@openhandoff/shared";
} from "@sandbox-agent/factory-shared";
import { createBackendClient } from "../../src/backend-client.js";
const RUN_WORKBENCH_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_E2E === "1";
@ -21,6 +23,10 @@ function requiredEnv(name: string): string {
return value;
}
function requiredRepoRemote(): string {
return process.env.HF_E2E_REPO_REMOTE?.trim() || requiredEnv("HF_E2E_GITHUB_REPO");
}
function workbenchModelEnv(name: string, fallback: WorkbenchModelId): WorkbenchModelId {
const value = process.env[name]?.trim();
switch (value) {
@ -38,14 +44,66 @@ async function sleep(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
async function seedSandboxFile(workspaceId: string, handoffId: string, filePath: string, content: string): Promise<void> {
const repoPath = `/root/.local/share/openhandoff/local-sandboxes/${workspaceId}/${handoffId}/repo`;
function backendPortFromEndpoint(endpoint: string): string {
const url = new URL(endpoint);
if (url.port) {
return url.port;
}
return url.protocol === "https:" ? "443" : "80";
}
async function resolveBackendContainerName(endpoint: string): Promise<string | null> {
const explicit = process.env.HF_E2E_BACKEND_CONTAINER?.trim();
if (explicit) {
if (explicit.toLowerCase() === "host") {
return null;
}
return explicit;
}
const { stdout } = await execFileAsync("docker", [
"ps",
"--filter",
`publish=${backendPortFromEndpoint(endpoint)}`,
"--format",
"{{.Names}}",
]);
const containerName = stdout
.split("\n")
.map((line) => line.trim())
.find(Boolean);
return containerName ?? null;
}
function sandboxRepoPath(record: HandoffRecord): string {
const activeSandbox =
record.sandboxes.find((sandbox) => sandbox.sandboxId === record.activeSandboxId) ??
record.sandboxes.find((sandbox) => typeof sandbox.cwd === "string" && sandbox.cwd.length > 0);
const cwd = activeSandbox?.cwd?.trim();
if (!cwd) {
throw new Error(`No sandbox cwd is available for handoff ${record.handoffId}`);
}
return cwd;
}
async function seedSandboxFile(endpoint: string, record: HandoffRecord, filePath: string, content: string): Promise<void> {
const repoPath = sandboxRepoPath(record);
const containerName = await resolveBackendContainerName(endpoint);
if (!containerName) {
const directory =
filePath.includes("/") ? `${repoPath}/${filePath.slice(0, filePath.lastIndexOf("/"))}` : repoPath;
await mkdir(directory, { recursive: true });
await writeFile(`${repoPath}/${filePath}`, `${content}\n`, "utf8");
return;
}
const script = [
`cd ${JSON.stringify(repoPath)}`,
`mkdir -p ${JSON.stringify(filePath.includes("/") ? filePath.slice(0, filePath.lastIndexOf("/")) : ".")}`,
`printf '%s\\n' ${JSON.stringify(content)} > ${JSON.stringify(filePath)}`,
].join(" && ");
await execFileAsync("docker", ["exec", "openhandoff-backend-1", "bash", "-lc", script]);
await execFileAsync("docker", ["exec", containerName, "bash", "-lc", script]);
}
async function poll<T>(
@ -166,7 +224,7 @@ describe("e2e(client): workbench flows", () => {
const endpoint =
process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const repoRemote = requiredRepoRemote();
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
const runId = `wb-${Date.now().toString(36)}`;
const expectedFile = `${runId}.txt`;
@ -215,7 +273,8 @@ describe("e2e(client): workbench flows", () => {
expect(findTab(initialCompleted, primaryTab.id).sessionId).toBeTruthy();
expect(transcriptIncludesAgentText(findTab(initialCompleted, primaryTab.id).transcript, expectedInitialReply)).toBe(true);
await seedSandboxFile(workspaceId, created.handoffId, expectedFile, runId);
const detail = await client.getHandoff(workspaceId, created.handoffId);
await seedSandboxFile(endpoint, detail, expectedFile, runId);
const fileSeeded = await poll(
"seeded sandbox file reflected in workbench",

View file

@ -5,7 +5,7 @@ import type {
WorkbenchHandoff,
WorkbenchModelId,
WorkbenchTranscriptEvent,
} from "@openhandoff/shared";
} from "@sandbox-agent/factory-shared";
import { createBackendClient } from "../../src/backend-client.js";
const RUN_WORKBENCH_LOAD_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_LOAD_E2E === "1";
@ -18,6 +18,10 @@ function requiredEnv(name: string): string {
return value;
}
function requiredRepoRemote(): string {
return process.env.HF_E2E_REPO_REMOTE?.trim() || requiredEnv("HF_E2E_GITHUB_REPO");
}
function workbenchModelEnv(name: string, fallback: WorkbenchModelId): WorkbenchModelId {
const value = process.env[name]?.trim();
switch (value) {
@ -196,7 +200,7 @@ describe("e2e(client): workbench load", () => {
async () => {
const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/api/rivet";
const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
const repoRemote = requiredRepoRemote();
const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
const handoffCount = intEnv("HF_LOAD_HANDOFF_COUNT", 3);
const extraSessionCount = intEnv("HF_LOAD_EXTRA_SESSION_COUNT", 2);

View file

@ -1,5 +1,5 @@
import { describe, expect, it } from "vitest";
import type { HandoffRecord } from "@openhandoff/shared";
import type { HandoffRecord } from "@sandbox-agent/factory-shared";
import {
filterHandoffs,
formatRelativeAge,

View file

@ -0,0 +1,128 @@
import { describe, expect, it } from "vitest";
import type { BackendClient } from "../src/backend-client.js";
import { createHandoffWorkbenchClient } from "../src/workbench-client.js";
async function sleep(ms: number): Promise<void> {
await new Promise((resolve) => setTimeout(resolve, ms));
}
describe("createHandoffWorkbenchClient", () => {
it("scopes mock clients by workspace", async () => {
const alpha = createHandoffWorkbenchClient({
mode: "mock",
workspaceId: "mock-alpha",
});
const beta = createHandoffWorkbenchClient({
mode: "mock",
workspaceId: "mock-beta",
});
const alphaInitial = alpha.getSnapshot();
const betaInitial = beta.getSnapshot();
expect(alphaInitial.workspaceId).toBe("mock-alpha");
expect(betaInitial.workspaceId).toBe("mock-beta");
await alpha.createHandoff({
repoId: alphaInitial.repos[0]!.id,
task: "Ship alpha-only change",
title: "Alpha only",
});
expect(alpha.getSnapshot().handoffs).toHaveLength(alphaInitial.handoffs.length + 1);
expect(beta.getSnapshot().handoffs).toHaveLength(betaInitial.handoffs.length);
});
it("uses the initial task to bootstrap a new mock handoff session", async () => {
const client = createHandoffWorkbenchClient({
mode: "mock",
workspaceId: "mock-onboarding",
});
const snapshot = client.getSnapshot();
const created = await client.createHandoff({
repoId: snapshot.repos[0]!.id,
task: "Reply with exactly: MOCK_WORKBENCH_READY",
title: "Mock onboarding",
branch: "feat/mock-onboarding",
model: "gpt-4o",
});
const runningHandoff = client.getSnapshot().handoffs.find((handoff) => handoff.id === created.handoffId);
expect(runningHandoff).toEqual(
expect.objectContaining({
title: "Mock onboarding",
branch: "feat/mock-onboarding",
status: "running",
}),
);
expect(runningHandoff?.tabs[0]).toEqual(
expect.objectContaining({
id: created.tabId,
created: true,
status: "running",
}),
);
expect(runningHandoff?.tabs[0]?.transcript).toEqual([
expect.objectContaining({
sender: "client",
payload: expect.objectContaining({
method: "session/prompt",
}),
}),
]);
await sleep(2_700);
const completedHandoff = client.getSnapshot().handoffs.find((handoff) => handoff.id === created.handoffId);
expect(completedHandoff?.status).toBe("idle");
expect(completedHandoff?.tabs[0]).toEqual(
expect.objectContaining({
status: "idle",
unread: true,
}),
);
expect(completedHandoff?.tabs[0]?.transcript).toEqual([
expect.objectContaining({ sender: "client" }),
expect.objectContaining({ sender: "agent" }),
]);
});
it("routes remote push actions through the backend boundary", async () => {
const actions: Array<{ workspaceId: string; handoffId: string; action: string }> = [];
let snapshotReads = 0;
const backend = {
async runAction(workspaceId: string, handoffId: string, action: string): Promise<void> {
actions.push({ workspaceId, handoffId, action });
},
async getWorkbench(workspaceId: string) {
snapshotReads += 1;
return {
workspaceId,
repos: [],
projects: [],
handoffs: [],
};
},
subscribeWorkbench(): () => void {
return () => {};
},
} as unknown as BackendClient;
const client = createHandoffWorkbenchClient({
mode: "remote",
backend,
workspaceId: "remote-ws",
});
await client.pushHandoff({ handoffId: "handoff-123" });
expect(actions).toEqual([
{
workspaceId: "remote-ws",
handoffId: "handoff-123",
action: "push",
},
]);
expect(snapshotReads).toBe(1);
});
});