Add header status pill showing task/session/sandbox state

Surface aggregate status (error, provisioning, running, ready, no sandbox) as a colored pill in the transcript panel header. Integrates task runtime status, session status, and sandbox availability via the sandboxProcesses interest topic so the pill accurately reflects unreachable sandboxes. Includes mock tasks demonstrating error, provisioning, and running states, unit tests for deriveHeaderStatus, and workspace-dashboard integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-17 16:02:42 +00:00 · 2026-03-14 12:06:26 -07:00 · 2026-03-14 12:06:26 -07:00 · 5bd85e4a28
commit 5bd85e4a28
parent 098b8113f3
77 changed files with 2329 additions and 4134 deletions
--- a/foundry/packages/client/src/backend-client.ts
+++ b/foundry/packages/client/src/backend-client.ts
@ -43,7 +43,7 @@ import type {
 } from "@sandbox-agent/foundry-shared";
 import type { ProcessCreateRequest, ProcessInfo, ProcessLogFollowQuery, ProcessLogsResponse, ProcessSignalQuery } from "sandbox-agent";
 import { createMockBackendClient } from "./mock/backend-client.js";
-import { sandboxInstanceKey, taskKey, workspaceKey } from "./keys.js";
+import { taskKey, taskSandboxKey, workspaceKey } from "./keys.js";

 export type TaskAction = "push" | "sync" | "merge" | "archive" | "kill";

@ -137,23 +137,26 @@ interface TaskHandle {
  connect(): ActorConn;
 }

-interface SandboxInstanceHandle {
+interface TaskSandboxHandle {
  connect(): ActorConn;
  createSession(input: {
-    prompt: string;
-    cwd?: string;
-    agent?: AgentType | "opencode";
-  }): Promise<{ id: string | null; status: "running" | "idle" | "error"; error?: string }>;
+    id?: string;
+    agent: string;
+    model?: string;
+    sessionInit?: {
+      cwd?: string;
+    };
+  }): Promise<{ id: string }>;
  listSessions(input?: { cursor?: string; limit?: number }): Promise<{ items: SandboxSessionRecord[]; nextCursor?: string }>;
-  listSessionEvents(input: { sessionId: string; cursor?: string; limit?: number }): Promise<{ items: SandboxSessionEventRecord[]; nextCursor?: string }>;
+  getEvents(input: { sessionId: string; cursor?: string; limit?: number }): Promise<{ items: SandboxSessionEventRecord[]; nextCursor?: string }>;
  createProcess(input: ProcessCreateRequest): Promise<SandboxProcessRecord>;
  listProcesses(): Promise<{ processes: SandboxProcessRecord[] }>;
-  getProcessLogs(input: { processId: string; query?: ProcessLogFollowQuery }): Promise<ProcessLogsResponse>;
-  stopProcess(input: { processId: string; query?: ProcessSignalQuery }): Promise<SandboxProcessRecord>;
-  killProcess(input: { processId: string; query?: ProcessSignalQuery }): Promise<SandboxProcessRecord>;
-  deleteProcess(input: { processId: string }): Promise<void>;
-  sendPrompt(input: { sessionId: string; prompt: string; notification?: boolean }): Promise<void>;
-  sessionStatus(input: { sessionId: string }): Promise<{ id: string; status: "running" | "idle" | "error" }>;
+  getProcessLogs(processId: string, query?: ProcessLogFollowQuery): Promise<ProcessLogsResponse>;
+  stopProcess(processId: string, query?: ProcessSignalQuery): Promise<SandboxProcessRecord>;
+  killProcess(processId: string, query?: ProcessSignalQuery): Promise<SandboxProcessRecord>;
+  deleteProcess(processId: string): Promise<void>;
+  rawSendSessionMethod(sessionId: string, method: string, params: Record<string, unknown>): Promise<unknown>;
+  destroySession(sessionId: string): Promise<void>;
  sandboxAgentConnection(): Promise<{ endpoint: string; token?: string }>;
  providerState(): Promise<{ providerId: ProviderId; sandboxId: string; state: string; at: number }>;
 }
@ -166,8 +169,10 @@ interface RivetClient {
    get(key?: string | string[]): TaskHandle;
    getOrCreate(key?: string | string[], opts?: { createWithInput?: unknown }): TaskHandle;
  };
-  sandboxInstance: {
-    getOrCreate(key?: string | string[], opts?: { createWithInput?: unknown }): SandboxInstanceHandle;
+  taskSandbox: {
+    get(key?: string | string[]): TaskSandboxHandle;
+    getOrCreate(key?: string | string[], opts?: { createWithInput?: unknown }): TaskSandboxHandle;
+    getForId(actorId: string): TaskSandboxHandle;
  };
 }

@ -423,8 +428,8 @@ export function createBackendClient(options: BackendClientOptions): BackendClien

  const task = async (workspaceId: string, repoId: string, taskId: string): Promise<TaskHandle> => client.task.get(taskKey(workspaceId, repoId, taskId));

-  const sandboxByKey = async (workspaceId: string, providerId: ProviderId, sandboxId: string): Promise<SandboxInstanceHandle> => {
-    return (client as any).sandboxInstance.get(sandboxInstanceKey(workspaceId, providerId, sandboxId));
+  const sandboxByKey = async (workspaceId: string, _providerId: ProviderId, sandboxId: string): Promise<TaskSandboxHandle> => {
+    return (client as any).taskSandbox.get(taskSandboxKey(workspaceId, sandboxId));
  };

  function isActorNotFoundError(error: unknown): boolean {
@ -432,7 +437,7 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
    return message.includes("Actor not found");
  }

-  const sandboxByActorIdFromTask = async (workspaceId: string, providerId: ProviderId, sandboxId: string): Promise<SandboxInstanceHandle | null> => {
+  const sandboxByActorIdFromTask = async (workspaceId: string, providerId: ProviderId, sandboxId: string): Promise<TaskSandboxHandle | null> => {
    const ws = await workspace(workspaceId);
    const rows = await ws.listTasks({ workspaceId });
    const candidates = [...rows].sort((a, b) => b.updatedAt - a.updatedAt);
@ -451,7 +456,7 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
            (sb as any).sandboxActorId.length > 0,
        ) as { sandboxActorId?: string } | undefined;
        if (sandbox?.sandboxActorId) {
-          return (client as any).sandboxInstance.getForId(sandbox.sandboxActorId);
+          return (client as any).taskSandbox.getForId(sandbox.sandboxActorId);
        }
      } catch (error) {
        const message = error instanceof Error ? error.message : String(error);
@ -469,7 +474,7 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
    workspaceId: string,
    providerId: ProviderId,
    sandboxId: string,
-    run: (handle: SandboxInstanceHandle) => Promise<T>,
+    run: (handle: TaskSandboxHandle) => Promise<T>,
  ): Promise<T> => {
    const handle = await sandboxByKey(workspaceId, providerId, sandboxId);
    try {
@ -511,48 +516,65 @@ export function createBackendClient(options: BackendClientOptions): BackendClien

  const getWorkbenchCompat = async (workspaceId: string): Promise<TaskWorkbenchSnapshot> => {
    const summary = await (await workspace(workspaceId)).getWorkspaceSummary({ workspaceId });
-    const tasks = await Promise.all(
-      summary.taskSummaries.map(async (taskSummary) => {
-        const detail = await (await task(workspaceId, taskSummary.repoId, taskSummary.id)).getTaskDetail();
-        const sessionDetails = await Promise.all(
-          detail.sessionsSummary.map(async (session) => {
-            const full = await (await task(workspaceId, detail.repoId, detail.id)).getSessionDetail({ sessionId: session.id });
-            return [session.id, full] as const;
-          }),
-        );
-        const sessionDetailsById = new Map(sessionDetails);
-        return {
-          id: detail.id,
-          repoId: detail.repoId,
-          title: detail.title,
-          status: detail.status,
-          repoName: detail.repoName,
-          updatedAtMs: detail.updatedAtMs,
-          branch: detail.branch,
-          pullRequest: detail.pullRequest,
-          tabs: detail.sessionsSummary.map((session) => {
-            const full = sessionDetailsById.get(session.id);
-            return {
-              id: session.id,
-              sessionId: session.sessionId,
-              sessionName: session.sessionName,
-              agent: session.agent,
-              model: session.model,
-              status: session.status,
-              thinkingSinceMs: session.thinkingSinceMs,
-              unread: session.unread,
-              created: session.created,
-              draft: full?.draft ?? { text: "", attachments: [], updatedAtMs: null },
-              transcript: full?.transcript ?? [],
-            };
-          }),
-          fileChanges: detail.fileChanges,
-          diffs: detail.diffs,
-          fileTree: detail.fileTree,
-          minutesUsed: detail.minutesUsed,
-        };
-      }),
-    );
+    const tasks = (
+      await Promise.all(
+        summary.taskSummaries.map(async (taskSummary) => {
+          let detail;
+          try {
+            detail = await (await task(workspaceId, taskSummary.repoId, taskSummary.id)).getTaskDetail();
+          } catch (error) {
+            if (isActorNotFoundError(error)) {
+              return null;
+            }
+            throw error;
+          }
+          const sessionDetails = await Promise.all(
+            detail.sessionsSummary.map(async (session) => {
+              try {
+                const full = await (await task(workspaceId, detail.repoId, detail.id)).getSessionDetail({ sessionId: session.id });
+                return [session.id, full] as const;
+              } catch (error) {
+                if (isActorNotFoundError(error)) {
+                  return null;
+                }
+                throw error;
+              }
+            }),
+          );
+          const sessionDetailsById = new Map(sessionDetails.filter((entry): entry is readonly [string, WorkbenchSessionDetail] => entry !== null));
+          return {
+            id: detail.id,
+            repoId: detail.repoId,
+            title: detail.title,
+            status: detail.status,
+            repoName: detail.repoName,
+            updatedAtMs: detail.updatedAtMs,
+            branch: detail.branch,
+            pullRequest: detail.pullRequest,
+            tabs: detail.sessionsSummary.map((session) => {
+              const full = sessionDetailsById.get(session.id);
+              return {
+                id: session.id,
+                sessionId: session.sessionId,
+                sessionName: session.sessionName,
+                agent: session.agent,
+                model: session.model,
+                status: session.status,
+                thinkingSinceMs: session.thinkingSinceMs,
+                unread: session.unread,
+                created: session.created,
+                draft: full?.draft ?? { text: "", attachments: [], updatedAtMs: null },
+                transcript: full?.transcript ?? [],
+              };
+            }),
+            fileChanges: detail.fileChanges,
+            diffs: detail.diffs,
+            fileTree: detail.fileTree,
+            minutesUsed: detail.minutesUsed,
+          };
+        }),
+      )
+    ).filter((task): task is TaskWorkbenchSnapshot["tasks"][number] => task !== null);

    const projects = summary.repos
      .map((repo) => ({
@ -639,8 +661,7 @@ export function createBackendClient(options: BackendClientOptions): BackendClien

    if (!entry.disposeConnPromise) {
      entry.disposeConnPromise = (async () => {
-        const handle = await sandboxByKey(workspaceId, providerId, sandboxId);
-        const conn = (handle as any).connect();
+        const conn = await connectSandbox(workspaceId, providerId, sandboxId);
        const unsubscribeEvent = conn.on("processesUpdated", () => {
          const current = sandboxProcessSubscriptions.get(key);
          if (!current) {
@ -958,17 +979,22 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
    }): Promise<{ id: string; status: "running" | "idle" | "error" }> {
      const created = await withSandboxHandle(input.workspaceId, input.providerId, input.sandboxId, async (handle) =>
        handle.createSession({
-          prompt: input.prompt,
-          cwd: input.cwd,
-          agent: input.agent,
+          agent: input.agent ?? "claude",
+          sessionInit: {
+            cwd: input.cwd,
+          },
        }),
      );
-      if (!created.id) {
-        throw new Error(created.error ?? "sandbox session creation failed");
+      if (input.prompt.trim().length > 0) {
+        await withSandboxHandle(input.workspaceId, input.providerId, input.sandboxId, async (handle) =>
+          handle.rawSendSessionMethod(created.id, "session/prompt", {
+            prompt: [{ type: "text", text: input.prompt }],
+          }),
+        );
      }
      return {
        id: created.id,
-        status: created.status,
+        status: "idle",
      };
    },

@ -987,7 +1013,7 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
      sandboxId: string,
      input: { sessionId: string; cursor?: string; limit?: number },
    ): Promise<{ items: SandboxSessionEventRecord[]; nextCursor?: string }> {
-      return await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.listSessionEvents(input));
+      return await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.getEvents(input));
    },

    async createSandboxProcess(input: {
@ -1010,7 +1036,7 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
      processId: string,
      query?: ProcessLogFollowQuery,
    ): Promise<ProcessLogsResponse> {
-      return await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.getProcessLogs({ processId, query }));
+      return await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.getProcessLogs(processId, query));
    },

    async stopSandboxProcess(
@ -1020,7 +1046,7 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
      processId: string,
      query?: ProcessSignalQuery,
    ): Promise<SandboxProcessRecord> {
-      return await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.stopProcess({ processId, query }));
+      return await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.stopProcess(processId, query));
    },

    async killSandboxProcess(
@ -1030,11 +1056,11 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
      processId: string,
      query?: ProcessSignalQuery,
    ): Promise<SandboxProcessRecord> {
-      return await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.killProcess({ processId, query }));
+      return await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.killProcess(processId, query));
    },

    async deleteSandboxProcess(workspaceId: string, providerId: ProviderId, sandboxId: string, processId: string): Promise<void> {
-      await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.deleteProcess({ processId }));
+      await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.deleteProcess(processId));
    },

    subscribeSandboxProcesses(workspaceId: string, providerId: ProviderId, sandboxId: string, listener: () => void): () => void {
@ -1050,10 +1076,8 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
      notification?: boolean;
    }): Promise<void> {
      await withSandboxHandle(input.workspaceId, input.providerId, input.sandboxId, async (handle) =>
-        handle.sendPrompt({
-          sessionId: input.sessionId,
-          prompt: input.prompt,
-          notification: input.notification,
+        handle.rawSendSessionMethod(input.sessionId, "session/prompt", {
+          prompt: [{ type: "text", text: input.prompt }],
        }),
      );
    },
@ -1064,7 +1088,10 @@ export function createBackendClient(options: BackendClientOptions): BackendClien
      sandboxId: string,
      sessionId: string,
    ): Promise<{ id: string; status: "running" | "idle" | "error" }> {
-      return await withSandboxHandle(workspaceId, providerId, sandboxId, async (handle) => handle.sessionStatus({ sessionId }));
+      return {
+        id: sessionId,
+        status: "idle",
+      };
    },

    async sandboxProviderState(
--- a/foundry/packages/client/src/keys.ts
+++ b/foundry/packages/client/src/keys.ts
@ -12,8 +12,8 @@ export function taskKey(workspaceId: string, repoId: string, taskId: string): Ac
  return ["ws", workspaceId, "project", repoId, "task", taskId];
 }

-export function sandboxInstanceKey(workspaceId: string, providerId: string, sandboxId: string): ActorKey {
-  return ["ws", workspaceId, "provider", providerId, "sandbox", sandboxId];
+export function taskSandboxKey(workspaceId: string, sandboxId: string): ActorKey {
+  return ["ws", workspaceId, "sandbox", sandboxId];
 }

 export function historyKey(workspaceId: string, repoId: string): ActorKey {
@ -27,8 +27,3 @@ export function projectPrSyncKey(workspaceId: string, repoId: string): ActorKey
 export function projectBranchSyncKey(workspaceId: string, repoId: string): ActorKey {
  return ["ws", workspaceId, "project", repoId, "branch-sync"];
 }
-
-export function taskStatusSyncKey(workspaceId: string, repoId: string, taskId: string, sandboxId: string, sessionId: string): ActorKey {
-  // Include sandbox + session so multiple sandboxes/sessions can be tracked per task.
-  return ["ws", workspaceId, "project", repoId, "task", taskId, "status-sync", sandboxId, sessionId];
-}
--- a/foundry/packages/client/src/mock-app.ts
+++ b/foundry/packages/client/src/mock-app.ts
@ -1,3 +1,4 @@
+import type { WorkbenchModelId } from "@sandbox-agent/foundry-shared";
 import { injectMockLatency } from "./mock/latency.js";
 import rivetDevFixture from "../../../scripts/data/rivet-dev.json" with { type: "json" };

@ -58,7 +59,7 @@ export interface MockFoundryOrganizationSettings {
  slug: string;
  primaryDomain: string;
  seatAccrualMode: "first_prompt";
-  defaultModel: "claude-sonnet-4" | "claude-opus-4" | "gpt-4o" | "o3";
+  defaultModel: WorkbenchModelId;
  autoImportRepos: boolean;
 }

@ -177,7 +178,7 @@ function buildRivetOrganization(): MockFoundryOrganization {
      slug: "rivet",
      primaryDomain: "rivet.dev",
      seatAccrualMode: "first_prompt",
-      defaultModel: "o3",
+      defaultModel: "gpt-5.3-codex",
      autoImportRepos: true,
    },
    github: {
--- a/foundry/packages/client/src/view-model.ts
+++ b/foundry/packages/client/src/view-model.ts
@ -9,12 +9,6 @@ const QUEUED_STATUSES = new Set<TaskStatus>([
  "init_enqueue_provision",
  "init_ensure_name",
  "init_assert_name",
-  "init_create_sandbox",
-  "init_ensure_agent",
-  "init_start_sandbox_instance",
-  "init_create_session",
-  "init_write_db",
-  "init_start_status_sync",
  "init_complete",
  "archive_stop_status_sync",
  "archive_release_sandbox",
--- a/foundry/packages/client/src/workbench-model.ts
+++ b/foundry/packages/client/src/workbench-model.ts
@ -26,8 +26,12 @@ export const MODEL_GROUPS: ModelGroup[] = [
  {
    provider: "OpenAI",
    models: [
-      { id: "gpt-4o", label: "GPT-4o" },
-      { id: "o3", label: "o3" },
+      { id: "gpt-5.3-codex", label: "GPT-5.3 Codex" },
+      { id: "gpt-5.4", label: "GPT-5.4" },
+      { id: "gpt-5.2-codex", label: "GPT-5.2 Codex" },
+      { id: "gpt-5.1-codex-max", label: "GPT-5.1 Codex Max" },
+      { id: "gpt-5.2", label: "GPT-5.2" },
+      { id: "gpt-5.1-codex-mini", label: "GPT-5.1 Codex Mini" },
    ],
  },
 ];
@ -334,7 +338,7 @@ export function buildInitialTasks(): Task[] {
          sessionId: "t2",
          sessionName: "Test coverage",
          agent: "Codex",
-          model: "gpt-4o",
+          model: "gpt-5.3-codex",
          status: "idle",
          thinkingSinceMs: null,
          unread: true,
@ -1083,7 +1087,7 @@ export function buildInitialTasks(): Task[] {
          sessionId: "t10",
          sessionName: "Namespace fix",
          agent: "Codex",
-          model: "gpt-4o",
+          model: "gpt-5.3-codex",
          status: "idle",
          thinkingSinceMs: null,
          unread: true,
@ -1120,6 +1124,109 @@ export function buildInitialTasks(): Task[] {
      fileTree: [],
      minutesUsed: 3,
    },
+
+    // ── Status demo tasks ──────────────────────────────────────────────
+    {
+      id: "status-error",
+      repoId: "sandbox-agent",
+      title: "Fix broken auth middleware (error demo)",
+      status: "error",
+      runtimeStatus: "error",
+      statusMessage: "session:error",
+      repoName: "rivet-dev/sandbox-agent",
+      updatedAtMs: minutesAgo(2),
+      branch: "fix/auth-middleware",
+      pullRequest: null,
+      tabs: [
+        {
+          id: "status-error-tab",
+          sessionId: "status-error-session",
+          sessionName: "Auth fix",
+          agent: "Claude",
+          model: "claude-sonnet-4",
+          status: "error",
+          thinkingSinceMs: null,
+          unread: false,
+          created: true,
+          errorMessage: "Sandbox process exited unexpectedly (exit code 137). The sandbox may have run out of memory.",
+          draft: { text: "", attachments: [], updatedAtMs: null },
+          transcript: [],
+        },
+      ],
+      fileChanges: [],
+      diffs: {},
+      fileTree: [],
+      minutesUsed: 1,
+    },
+    {
+      id: "status-provisioning",
+      repoId: "sandbox-agent",
+      title: "Add rate limiting to API gateway (provisioning demo)",
+      status: "new",
+      runtimeStatus: "init_enqueue_provision",
+      statusMessage: "Queueing sandbox provisioning.",
+      repoName: "rivet-dev/sandbox-agent",
+      updatedAtMs: minutesAgo(0),
+      branch: null,
+      pullRequest: null,
+      tabs: [
+        {
+          id: "status-prov-tab",
+          sessionId: null,
+          sessionName: "Session 1",
+          agent: "Claude",
+          model: "claude-sonnet-4",
+          status: "pending_provision",
+          thinkingSinceMs: null,
+          unread: false,
+          created: false,
+          draft: { text: "", attachments: [], updatedAtMs: null },
+          transcript: [],
+        },
+      ],
+      fileChanges: [],
+      diffs: {},
+      fileTree: [],
+      minutesUsed: 0,
+    },
+    {
+      id: "status-running",
+      repoId: "sandbox-agent",
+      title: "Refactor WebSocket handler (running demo)",
+      status: "running",
+      runtimeStatus: "running",
+      repoName: "rivet-dev/sandbox-agent",
+      updatedAtMs: minutesAgo(1),
+      branch: "refactor/ws-handler",
+      pullRequest: null,
+      tabs: [
+        {
+          id: "status-run-tab",
+          sessionId: "status-run-session",
+          sessionName: "WS refactor",
+          agent: "Codex",
+          model: "gpt-5.3-codex",
+          status: "running",
+          thinkingSinceMs: Date.now() - 12_000,
+          unread: false,
+          created: true,
+          draft: { text: "", attachments: [], updatedAtMs: null },
+          transcript: transcriptFromLegacyMessages("status-run-tab", [
+            {
+              id: "sr1",
+              role: "user",
+              agent: null,
+              createdAtMs: minutesAgo(3),
+              lines: ["Refactor the WebSocket handler to use a connection pool pattern."],
+            },
+          ]),
+        },
+      ],
+      fileChanges: [],
+      diffs: {},
+      fileTree: [],
+      minutesUsed: 2,
+    },
  ];
 }

--- a/foundry/packages/client/test/e2e/github-pr-e2e.test.ts
+++ b/foundry/packages/client/test/e2e/github-pr-e2e.test.ts
@ -171,7 +171,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
        "4. git push the branch to origin",
        "5. Stop when done (agent should go idle).",
      ].join("\n"),
-      providerId: "daytona",
+      providerId: "local",
      explicitTitle: `test(e2e): ${runId}`,
      explicitBranchName: `e2e/${runId}`,
    });
@ -185,7 +185,7 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {
    try {
      const namedAndProvisioned = await poll<TaskRecord>(
        "task naming + sandbox provisioning",
-        // Cold Daytona snapshot/image preparation can exceed 5 minutes on first run.
+        // Cold local sandbox startup can exceed a few minutes on first run.
        8 * 60_000,
        1_000,
        async () => client.getTask(workspaceId, created.taskId),
@ -301,17 +301,17 @@ describe("e2e: backend -> sandbox-agent -> git -> PR", () => {

      if (sandboxId) {
        await poll<{ providerId: string; sandboxId: string; state: string; at: number }>(
-          "daytona sandbox to stop",
+          "sandbox to stop",
          2 * 60_000,
          2_000,
-          async () => client.sandboxProviderState(workspaceId, "daytona", sandboxId!),
+          async () => client.sandboxProviderState(workspaceId, "local", sandboxId!),
          (s) => {
            const st = String(s.state).toLowerCase();
-            return st.includes("stopped") || st.includes("suspended") || st.includes("paused");
+            return st.includes("destroyed") || st.includes("stopped") || st.includes("suspended") || st.includes("paused");
          },
        ).catch(async (err) => {
          const dump = await debugDump(client, workspaceId, created.taskId);
-          const state = await client.sandboxProviderState(workspaceId, "daytona", sandboxId!).catch(() => null);
+          const state = await client.sandboxProviderState(workspaceId, "local", sandboxId!).catch(() => null);
          throw new Error(`${err instanceof Error ? err.message : String(err)}\n` + `sandbox state: ${state ? state.state : "unknown"}\n` + `${dump}`);
        });
      }
--- a/foundry/packages/client/test/e2e/workbench-e2e.test.ts
+++ b/foundry/packages/client/test/e2e/workbench-e2e.test.ts
@ -1,11 +1,8 @@
-import { execFile } from "node:child_process";
-import { promisify } from "node:util";
 import { describe, expect, it } from "vitest";
 import type { TaskWorkbenchSnapshot, WorkbenchAgentTab, WorkbenchTask, WorkbenchModelId, WorkbenchTranscriptEvent } from "@sandbox-agent/foundry-shared";
 import { createBackendClient } from "../../src/backend-client.js";

 const RUN_WORKBENCH_E2E = process.env.HF_ENABLE_DAEMON_WORKBENCH_E2E === "1";
-const execFileAsync = promisify(execFile);

 function requiredEnv(name: string): string {
  const value = process.env[name]?.trim();
@ -20,8 +17,12 @@ function workbenchModelEnv(name: string, fallback: WorkbenchModelId): WorkbenchM
  switch (value) {
    case "claude-sonnet-4":
    case "claude-opus-4":
-    case "gpt-4o":
-    case "o3":
+    case "gpt-5.3-codex":
+    case "gpt-5.4":
+    case "gpt-5.2-codex":
+    case "gpt-5.1-codex-max":
+    case "gpt-5.2":
+    case "gpt-5.1-codex-mini":
      return value;
    default:
      return fallback;
@ -32,16 +33,6 @@ async function sleep(ms: number): Promise<void> {
  await new Promise((resolve) => setTimeout(resolve, ms));
 }

-async function seedSandboxFile(workspaceId: string, taskId: string, filePath: string, content: string): Promise<void> {
-  const repoPath = `/root/.local/share/foundry/local-sandboxes/${workspaceId}/${taskId}/repo`;
-  const script = [
-    `cd ${JSON.stringify(repoPath)}`,
-    `mkdir -p ${JSON.stringify(filePath.includes("/") ? filePath.slice(0, filePath.lastIndexOf("/")) : ".")}`,
-    `printf '%s\\n' ${JSON.stringify(content)} > ${JSON.stringify(filePath)}`,
-  ].join(" && ");
-  await execFileAsync("docker", ["exec", "foundry-backend-1", "bash", "-lc", script]);
-}
-
 async function poll<T>(label: string, timeoutMs: number, intervalMs: number, fn: () => Promise<T>, isDone: (value: T) => boolean): Promise<T> {
  const startedAt = Date.now();
  let lastValue: T;
@ -148,7 +139,7 @@ describe("e2e(client): workbench flows", () => {
    const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
    const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
    const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
-    const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
+    const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
    const runId = `wb-${Date.now().toString(36)}`;
    const expectedFile = `${runId}.txt`;
    const expectedInitialReply = `WORKBENCH_READY_${runId}`;
@ -192,17 +183,6 @@ describe("e2e(client): workbench flows", () => {
    expect(findTab(initialCompleted, primaryTab.id).sessionId).toBeTruthy();
    expect(transcriptIncludesAgentText(findTab(initialCompleted, primaryTab.id).transcript, expectedInitialReply)).toBe(true);

-    await seedSandboxFile(workspaceId, created.taskId, expectedFile, runId);
-
-    const fileSeeded = await poll(
-      "seeded sandbox file reflected in workbench",
-      30_000,
-      1_000,
-      async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
-      (task) => task.fileChanges.some((file) => file.path === expectedFile),
-    );
-    expect(fileSeeded.fileChanges.some((file) => file.path === expectedFile)).toBe(true);
-
    await client.renameWorkbenchTask(workspaceId, {
      taskId: created.taskId,
      value: `Workbench E2E ${runId} Renamed`,
@ -227,7 +207,11 @@ describe("e2e(client): workbench flows", () => {
    await client.updateWorkbenchDraft(workspaceId, {
      taskId: created.taskId,
      tabId: secondTab.tabId,
-      text: `Reply with exactly: ${expectedReply}`,
+      text: [
+        `Create a file named ${expectedFile} in the repo root.`,
+        `Write exactly this single line into the file: ${runId}`,
+        `Then reply with exactly: ${expectedReply}`,
+      ].join("\n"),
      attachments: [
        {
          id: `${expectedFile}:1`,
@ -245,8 +229,19 @@ describe("e2e(client): workbench flows", () => {
    await client.sendWorkbenchMessage(workspaceId, {
      taskId: created.taskId,
      tabId: secondTab.tabId,
-      text: `Reply with exactly: ${expectedReply}`,
-      attachments: [],
+      text: [
+        `Create a file named ${expectedFile} in the repo root.`,
+        `Write exactly this single line into the file: ${runId}`,
+        `Then reply with exactly: ${expectedReply}`,
+      ].join("\n"),
+      attachments: [
+        {
+          id: `${expectedFile}:1`,
+          filePath: expectedFile,
+          lineNumber: 1,
+          lineContent: runId,
+        },
+      ],
    });

    const withSecondReply = await poll(
@ -256,12 +251,15 @@ describe("e2e(client): workbench flows", () => {
      async () => findTask(await client.getWorkbench(workspaceId), created.taskId),
      (task) => {
        const tab = findTab(task, secondTab.tabId);
-        return tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply);
+        return (
+          tab.status === "idle" && transcriptIncludesAgentText(tab.transcript, expectedReply) && task.fileChanges.some((file) => file.path === expectedFile)
+        );
      },
    );

    const secondTranscript = findTab(withSecondReply, secondTab.tabId).transcript;
    expect(transcriptIncludesAgentText(secondTranscript, expectedReply)).toBe(true);
+    expect(withSecondReply.fileChanges.some((file) => file.path === expectedFile)).toBe(true);

    await client.setWorkbenchSessionUnread(workspaceId, {
      taskId: created.taskId,
--- a/foundry/packages/client/test/e2e/workbench-load-e2e.test.ts
+++ b/foundry/packages/client/test/e2e/workbench-load-e2e.test.ts
@ -30,8 +30,12 @@ function workbenchModelEnv(name: string, fallback: WorkbenchModelId): WorkbenchM
  switch (value) {
    case "claude-sonnet-4":
    case "claude-opus-4":
-    case "gpt-4o":
-    case "o3":
+    case "gpt-5.3-codex":
+    case "gpt-5.4":
+    case "gpt-5.2-codex":
+    case "gpt-5.1-codex-max":
+    case "gpt-5.2":
+    case "gpt-5.1-codex-mini":
      return value;
    default:
      return fallback;
@ -191,7 +195,7 @@ describe("e2e(client): workbench load", () => {
    const endpoint = process.env.HF_E2E_BACKEND_ENDPOINT?.trim() || "http://127.0.0.1:7741/v1/rivet";
    const workspaceId = process.env.HF_E2E_WORKSPACE?.trim() || "default";
    const repoRemote = requiredEnv("HF_E2E_GITHUB_REPO");
-    const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-4o");
+    const model = workbenchModelEnv("HF_E2E_MODEL", "gpt-5.3-codex");
    const taskCount = intEnv("HF_LOAD_TASK_COUNT", 3);
    const extraSessionCount = intEnv("HF_LOAD_EXTRA_SESSION_COUNT", 2);
    const pollIntervalMs = intEnv("HF_LOAD_POLL_INTERVAL_MS", 2_000);
--- a/foundry/packages/client/test/keys.test.ts
+++ b/foundry/packages/client/test/keys.test.ts
@ -1,5 +1,5 @@
 import { describe, expect, it } from "vitest";
-import { taskKey, taskStatusSyncKey, historyKey, projectBranchSyncKey, projectKey, projectPrSyncKey, sandboxInstanceKey, workspaceKey } from "../src/keys.js";
+import { taskKey, historyKey, projectBranchSyncKey, projectKey, projectPrSyncKey, taskSandboxKey, workspaceKey } from "../src/keys.js";

 describe("actor keys", () => {
  it("prefixes every key with workspace namespace", () => {
@ -7,11 +7,10 @@ describe("actor keys", () => {
      workspaceKey("default"),
      projectKey("default", "repo"),
      taskKey("default", "repo", "task"),
-      sandboxInstanceKey("default", "daytona", "sbx"),
+      taskSandboxKey("default", "sbx"),
      historyKey("default", "repo"),
      projectPrSyncKey("default", "repo"),
      projectBranchSyncKey("default", "repo"),
-      taskStatusSyncKey("default", "repo", "task", "sandbox-1", "session-1"),
    ];

    for (const key of keys) {
--- a/foundry/packages/client/test/view-model.test.ts
+++ b/foundry/packages/client/test/view-model.test.ts
@ -10,7 +10,7 @@ const sample: TaskRecord = {
  branchName: "feature/test",
  title: "Test Title",
  task: "Do test",
-  providerId: "daytona",
+  providerId: "local",
  status: "running",
  statusMessage: null,
  activeSandboxId: "sandbox-1",
@ -18,9 +18,9 @@ const sample: TaskRecord = {
  sandboxes: [
    {
      sandboxId: "sandbox-1",
-      providerId: "daytona",
+      providerId: "local",
      sandboxActorId: null,
-      switchTarget: "daytona://sandbox-1",
+      switchTarget: "sandbox://local/sandbox-1",
      cwd: null,
      createdAt: 1,
      updatedAt: 1,
@ -73,8 +73,8 @@ describe("summary helpers", () => {
  it("summarizes by status and provider", () => {
    const rows: TaskRecord[] = [
      sample,
-      { ...sample, taskId: "task-2", status: "idle", providerId: "daytona" },
-      { ...sample, taskId: "task-3", status: "error", providerId: "daytona" },
+      { ...sample, taskId: "task-2", status: "idle", providerId: "local" },
+      { ...sample, taskId: "task-3", status: "error", providerId: "local" },
    ];

    const summary = summarizeTasks(rows);
@ -82,6 +82,6 @@ describe("summary helpers", () => {
    expect(summary.byStatus.running).toBe(1);
    expect(summary.byStatus.idle).toBe(1);
    expect(summary.byStatus.error).toBe(1);
-    expect(summary.byProvider.daytona).toBe(3);
+    expect(summary.byProvider.local).toBe(3);
  });
 });