From bea3b581998ec20953a0f0100d0cfd1d9f7a68a5 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 17:33:53 -0700 Subject: [PATCH 1/3] fix(foundry): use $HOME instead of hardcoded /home/sandbox for sandbox repo paths E2B sandboxes run as `user` (home: /home/user), not `sandbox`, so `mkdir -p /home/sandbox` fails with "Permission denied". Replace all hardcoded `/home/sandbox` paths with `$HOME` resolved at shell runtime inside the sandbox, and dynamically resolve the repo CWD via the sandbox actor so it works across providers (E2B, local Docker, Daytona). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../backend/src/actors/sandbox/index.ts | 51 +++++++++++++++++-- .../backend/src/actors/task/workspace.ts | 33 ++++++------ 2 files changed, 65 insertions(+), 19 deletions(-) diff --git a/foundry/packages/backend/src/actors/sandbox/index.ts b/foundry/packages/backend/src/actors/sandbox/index.ts index 7f31adc..4fe4d9d 100644 --- a/foundry/packages/backend/src/actors/sandbox/index.ts +++ b/foundry/packages/backend/src/actors/sandbox/index.ts @@ -13,7 +13,12 @@ import { logActorWarning, resolveErrorMessage } from "../logging.js"; import { expectQueueResponse } from "../../services/queue.js"; import { resolveSandboxProviderId } from "../../sandbox-config.js"; -const SANDBOX_REPO_CWD = "/home/sandbox/repo"; +/** + * Default repo CWD inside the sandbox. The actual path is resolved dynamically + * via `$HOME/repo` because different sandbox providers run as different users + * (e.g. E2B uses `/home/user`, local Docker uses `/home/sandbox`). + */ +const DEFAULT_SANDBOX_REPO_CWD = "/home/user/repo"; const DEFAULT_LOCAL_SANDBOX_IMAGE = "rivetdev/sandbox-agent:foundry-base-latest"; const DEFAULT_LOCAL_SANDBOX_PORT = 2468; const dockerClient = new Dockerode({ socketPath: "/var/run/docker.sock" }); @@ -297,6 +302,43 @@ async function listWorkspaceModelGroupsForSandbox(c: any): Promise Promise>; +// --------------------------------------------------------------------------- +// Dynamic repo CWD resolution +// --------------------------------------------------------------------------- + +let cachedRepoCwd: string | null = null; + +/** + * Resolve the repo CWD inside the sandbox by querying `$HOME`. + * Different providers run as different users (E2B: `/home/user`, local Docker: + * `/home/sandbox`), so the path must be resolved dynamically. The result is + * cached for the lifetime of this sandbox actor instance. + */ +async function resolveRepoCwd(c: any): Promise { + if (cachedRepoCwd) return cachedRepoCwd; + + try { + const result = await baseActions.runProcess(c, { + command: "bash", + args: ["-lc", "echo $HOME"], + cwd: "/", + timeoutMs: 10_000, + }); + const home = (result.stdout ?? result.result ?? "").trim(); + if (home && home.startsWith("/")) { + cachedRepoCwd = `${home}/repo`; + return cachedRepoCwd; + } + } catch (error) { + logActorWarning("taskSandbox", "failed to resolve $HOME, using default", { + error: resolveErrorMessage(error), + }); + } + + cachedRepoCwd = DEFAULT_SANDBOX_REPO_CWD; + return cachedRepoCwd; +} + // --------------------------------------------------------------------------- // Queue names for sandbox actor // --------------------------------------------------------------------------- @@ -528,8 +570,9 @@ export const taskSandbox = actor({ } }, - async repoCwd(): Promise<{ cwd: string }> { - return { cwd: SANDBOX_REPO_CWD }; + async repoCwd(c: any): Promise<{ cwd: string }> { + const resolved = await resolveRepoCwd(c); + return { cwd: resolved }; }, // Long-running action — kept as direct action to avoid blocking the @@ -600,4 +643,4 @@ export const taskSandbox = actor({ run: workflow(runSandboxWorkflow), }); -export { SANDBOX_REPO_CWD }; +export { DEFAULT_SANDBOX_REPO_CWD, resolveRepoCwd }; diff --git a/foundry/packages/backend/src/actors/task/workspace.ts b/foundry/packages/backend/src/actors/task/workspace.ts index f7dcc26..0856947 100644 --- a/foundry/packages/backend/src/actors/task/workspace.ts +++ b/foundry/packages/backend/src/actors/task/workspace.ts @@ -1,6 +1,6 @@ // @ts-nocheck import { randomUUID } from "node:crypto"; -import { basename, dirname } from "node:path"; +import { basename } from "node:path"; import { asc, eq } from "drizzle-orm"; import { DEFAULT_WORKSPACE_MODEL_GROUPS, @@ -11,7 +11,6 @@ import { import { getActorRuntimeContext } from "../context.js"; import { getOrCreateOrganization, getOrCreateTaskSandbox, getOrCreateUser, getTaskSandbox, selfTask } from "../handles.js"; import { logActorInfo, logActorWarning, resolveErrorMessage } from "../logging.js"; -import { SANDBOX_REPO_CWD } from "../sandbox/index.js"; import { resolveSandboxProviderId } from "../../sandbox-config.js"; import { getBetterAuthService } from "../../services/better-auth.js"; import { resolveOrganizationGithubAuth } from "../../services/github-auth.js"; @@ -183,9 +182,9 @@ async function injectGitCredentials(sandbox: any, login: string, email: string, "set -euo pipefail", `git config --global user.name ${JSON.stringify(login)}`, `git config --global user.email ${JSON.stringify(email)}`, - `git config --global credential.helper 'store --file=/home/sandbox/.git-token'`, - `printf '%s\\n' ${JSON.stringify(`https://${login}:${token}@github.com`)} > /home/sandbox/.git-token`, - `chmod 600 /home/sandbox/.git-token`, + `git config --global credential.helper 'store --file=$HOME/.git-token'`, + `printf '%s\\n' ${JSON.stringify(`https://${login}:${token}@github.com`)} > $HOME/.git-token`, + `chmod 600 $HOME/.git-token`, ]; const result = await sandbox.runProcess({ command: "bash", @@ -576,6 +575,10 @@ async function getTaskSandboxRuntime( const sandbox = await getOrCreateTaskSandbox(c, c.state.organizationId, sandboxId, {}); const actorId = typeof sandbox.resolve === "function" ? await sandbox.resolve().catch(() => null) : null; const switchTarget = sandboxProviderId === "local" ? `sandbox://local/${sandboxId}` : `sandbox://e2b/${sandboxId}`; + + // Resolve the actual repo CWD from the sandbox's $HOME (differs by provider). + const repoCwdResult = await sandbox.repoCwd(); + const cwd = repoCwdResult?.cwd ?? "$HOME/repo"; const now = Date.now(); await c.db @@ -585,7 +588,7 @@ async function getTaskSandboxRuntime( sandboxProviderId, sandboxActorId: typeof actorId === "string" ? actorId : null, switchTarget, - cwd: SANDBOX_REPO_CWD, + cwd, createdAt: now, updatedAt: now, }) @@ -595,7 +598,7 @@ async function getTaskSandboxRuntime( sandboxProviderId, sandboxActorId: typeof actorId === "string" ? actorId : null, switchTarget, - cwd: SANDBOX_REPO_CWD, + cwd, updatedAt: now, }, }) @@ -606,7 +609,7 @@ async function getTaskSandboxRuntime( .set({ activeSandboxId: sandboxId, activeSwitchTarget: switchTarget, - activeCwd: SANDBOX_REPO_CWD, + activeCwd: cwd, updatedAt: now, }) .where(eq(taskRuntime.id, 1)) @@ -617,7 +620,7 @@ async function getTaskSandboxRuntime( sandboxId, sandboxProviderId, switchTarget, - cwd: SANDBOX_REPO_CWD, + cwd, }; } @@ -648,15 +651,15 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski logActorInfo("task.sandbox", "resolveAuth+metadata", { durationMs: Math.round(performance.now() - t0) }); const baseRef = metadata.defaultBranch ?? "main"; - const sandboxRepoRoot = dirname(SANDBOX_REPO_CWD); + // Use $HOME inside the shell script so the path resolves correctly regardless + // of which user the sandbox runs as (E2B: "user", local Docker: "sandbox"). const script = [ "set -euo pipefail", - `mkdir -p ${JSON.stringify(sandboxRepoRoot)}`, + 'REPO_DIR="$HOME/repo"', + 'mkdir -p "$HOME"', "git config --global credential.helper '!f() { echo username=x-access-token; echo password=${GH_TOKEN:-$GITHUB_TOKEN}; }; f'", - `if [ ! -d ${JSON.stringify(`${SANDBOX_REPO_CWD}/.git`)} ]; then rm -rf ${JSON.stringify(SANDBOX_REPO_CWD)} && git clone ${JSON.stringify( - metadata.remoteUrl, - )} ${JSON.stringify(SANDBOX_REPO_CWD)}; fi`, - `cd ${JSON.stringify(SANDBOX_REPO_CWD)}`, + `if [ ! -d "$REPO_DIR/.git" ]; then rm -rf "$REPO_DIR" && git clone ${JSON.stringify(metadata.remoteUrl)} "$REPO_DIR"; fi`, + 'cd "$REPO_DIR"', "git fetch origin --prune", `if git show-ref --verify --quiet refs/remotes/origin/${JSON.stringify(record.branchName).slice(1, -1)}; then target_ref=${JSON.stringify( `origin/${record.branchName}`, From 7b23e519c27bcdbf817b8604cd015d4c075b8759 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 17 Mar 2026 18:35:36 -0700 Subject: [PATCH 2/3] fix(foundry): add Bun idleTimeout safety net and subscription retry with backoff Bun.serve() defaults to a 10s idle timeout that can kill long-running requests. Actor RPCs go through the gateway tunnel with a 1s SSE ping, so this likely never fires, but set idleTimeout to 255 as a safety net. Subscription topics (app, org, session, task) previously had no retry mechanism. If the initial connection or a mid-session error occurred, the subscription stayed in error state permanently. Add exponential backoff retry (1s base, 30s max) that cleans up the old connection before each attempt and stops when disposed or no listeners remain. Co-Authored-By: Claude Opus 4.6 (1M context) --- foundry/packages/backend/src/index.ts | 94 +++++++++++++++++++ .../client/src/subscription/remote-manager.ts | 62 ++++++++++++ 2 files changed, 156 insertions(+) diff --git a/foundry/packages/backend/src/index.ts b/foundry/packages/backend/src/index.ts index e00abaa..617bacc 100644 --- a/foundry/packages/backend/src/index.ts +++ b/foundry/packages/backend/src/index.ts @@ -141,6 +141,59 @@ export async function startBackend(options: BackendStartOptions = {}): Promise.json, inspect with chrome://tracing) + app.get("/debug/memory", async (c) => { + if (process.env.NODE_ENV !== "development") { + return c.json({ error: "debug endpoints disabled in production" }, 403); + } + const wantGc = c.req.query("gc") === "1"; + if (wantGc && typeof Bun !== "undefined") { + // Bun.gc(true) triggers a synchronous full GC sweep in JavaScriptCore. + Bun.gc(true); + } + const mem = process.memoryUsage(); + const rssMb = Math.round(mem.rss / 1024 / 1024); + const heapUsedMb = Math.round(mem.heapUsed / 1024 / 1024); + const heapTotalMb = Math.round(mem.heapTotal / 1024 / 1024); + const externalMb = Math.round(mem.external / 1024 / 1024); + const nonHeapMb = rssMb - heapUsedMb - externalMb; + // Bun.heapStats() gives JSC-specific breakdown: object counts, typed array + // bytes, extra memory (native allocations tracked by JSC). Useful for + // distinguishing JS object bloat from native/WASM memory. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const BunAny = Bun as any; + const heapStats = typeof BunAny.heapStats === "function" ? BunAny.heapStats() : null; + const snapshot = { + rssMb, + heapUsedMb, + heapTotalMb, + externalMb, + nonHeapMb, + gcTriggered: wantGc, + rssBytes: mem.rss, + heapUsedBytes: mem.heapUsed, + heapTotalBytes: mem.heapTotal, + externalBytes: mem.external, + ...(heapStats ? { bunHeapStats: heapStats } : {}), + }; + // Optionally write a full JSC heap snapshot for offline analysis. + let heapSnapshotPath: string | null = null; + const wantHeap = c.req.query("heap") === "1"; + if (wantHeap && typeof Bun !== "undefined") { + heapSnapshotPath = `/tmp/foundry-heap-${Date.now()}.json`; + // Bun.generateHeapSnapshot("v8") returns a V8-compatible JSON string. + const heapJson = Bun.generateHeapSnapshot("v8"); + await Bun.write(heapSnapshotPath, heapJson); + } + logger.info(snapshot, "memory_usage_debug"); + return c.json({ ...snapshot, ...(heapSnapshotPath ? { heapSnapshotPath } : {}) }); + }); + app.use("*", async (c, next) => { const requestId = c.req.header("x-request-id")?.trim() || randomUUID(); const start = performance.now(); @@ -354,6 +407,11 @@ export async function startBackend(options: BackendStartOptions = {}): Promise { + const mem = process.memoryUsage(); + const rssMb = Math.round(mem.rss / 1024 / 1024); + const heapUsedMb = Math.round(mem.heapUsed / 1024 / 1024); + const heapTotalMb = Math.round(mem.heapTotal / 1024 / 1024); + const externalMb = Math.round(mem.external / 1024 / 1024); + // Non-heap RSS: memory not accounted for by JS heap or external buffers. + // Large values here point to native allocations (WASM, mmap, child process + // bookkeeping, Bun's internal arena, etc.). + const nonHeapMb = rssMb - heapUsedMb - externalMb; + const deltaRss = rssMb - prevRss; + prevRss = rssMb; + logger.info( + { + rssMb, + heapUsedMb, + heapTotalMb, + externalMb, + nonHeapMb, + deltaRssMb: deltaRss, + rssBytes: mem.rss, + heapUsedBytes: mem.heapUsed, + heapTotalBytes: mem.heapTotal, + externalBytes: mem.external, + }, + "memory_usage", + ); + }, 60_000); + } + process.on("SIGINT", async () => { server.stop(); process.exit(0); diff --git a/foundry/packages/client/src/subscription/remote-manager.ts b/foundry/packages/client/src/subscription/remote-manager.ts index 778241f..ae774c6 100644 --- a/foundry/packages/client/src/subscription/remote-manager.ts +++ b/foundry/packages/client/src/subscription/remote-manager.ts @@ -4,6 +4,11 @@ import { topicDefinitions, type TopicData, type TopicDefinition, type TopicKey, const GRACE_PERIOD_MS = 30_000; +/** Initial retry delay in ms. */ +const RETRY_BASE_MS = 1_000; +/** Maximum retry delay in ms. */ +const RETRY_MAX_MS = 30_000; + /** * Remote implementation of SubscriptionManager. * Each cache entry owns one actor connection plus one materialized snapshot. @@ -80,9 +85,12 @@ class TopicEntry { private unsubscribeEvent: (() => void) | null = null; private unsubscribeError: (() => void) | null = null; private teardownTimer: ReturnType | null = null; + private retryTimer: ReturnType | null = null; + private retryAttempt = 0; private startPromise: Promise | null = null; private eventPromise: Promise = Promise.resolve(); private started = false; + private disposed = false; constructor( private readonly topicKey: TopicKey, @@ -136,7 +144,9 @@ class TopicEntry { } dispose(): void { + this.disposed = true; this.cancelTeardown(); + this.cancelRetry(); this.unsubscribeEvent?.(); this.unsubscribeError?.(); if (this.conn) { @@ -148,6 +158,55 @@ class TopicEntry { this.error = null; this.lastRefreshAt = null; this.started = false; + this.retryAttempt = 0; + } + + private cancelRetry(): void { + if (this.retryTimer) { + clearTimeout(this.retryTimer); + this.retryTimer = null; + } + } + + /** + * Schedules a retry with exponential backoff. Cleans up any existing + * connection state before reconnecting. + */ + private scheduleRetry(): void { + if (this.disposed || this.listenerCount === 0) { + return; + } + + const delay = Math.min(RETRY_BASE_MS * 2 ** this.retryAttempt, RETRY_MAX_MS); + this.retryAttempt++; + + this.retryTimer = setTimeout(() => { + this.retryTimer = null; + if (this.disposed || this.listenerCount === 0) { + return; + } + + // Tear down the old connection before retrying + this.cleanupConnection(); + this.started = false; + this.startPromise = this.start().finally(() => { + this.startPromise = null; + }); + }, delay); + } + + /** + * Cleans up connection resources without resetting data/status/retry state. + */ + private cleanupConnection(): void { + this.unsubscribeEvent?.(); + this.unsubscribeError?.(); + this.unsubscribeEvent = null; + this.unsubscribeError = null; + if (this.conn) { + void this.conn.dispose(); + } + this.conn = null; } private async start(): Promise { @@ -164,17 +223,20 @@ class TopicEntry { this.status = "error"; this.error = error instanceof Error ? error : new Error(String(error)); this.notify(); + this.scheduleRetry(); }); this.data = await this.definition.fetchInitial(this.backend, this.params); this.status = "connected"; this.lastRefreshAt = Date.now(); this.started = true; + this.retryAttempt = 0; this.notify(); } catch (error) { this.status = "error"; this.error = error instanceof Error ? error : new Error(String(error)); this.started = false; this.notify(); + this.scheduleRetry(); } } From 3525dcc31528b4e7fb33e556c7af91d86e433ae3 Mon Sep 17 00:00:00 2001 From: Ralph Khreish <35776126+Crunchyman-ralph@users.noreply.github.com> Date: Thu, 19 Mar 2026 17:45:16 +0100 Subject: [PATCH 3/3] fix: update install script URL from 0.3.x to 0.4.x The E2B and Vercel providers install sandbox-agent 0.3.x inside sandboxes while the SDK client speaks 0.4.0 ACP protocol, causing AcpRpcError -32603. Fixes #272 --- sdks/typescript/src/providers/shared.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdks/typescript/src/providers/shared.ts b/sdks/typescript/src/providers/shared.ts index c0f7b1c..1eb89b1 100644 --- a/sdks/typescript/src/providers/shared.ts +++ b/sdks/typescript/src/providers/shared.ts @@ -1,5 +1,5 @@ export const DEFAULT_SANDBOX_AGENT_IMAGE = "rivetdev/sandbox-agent:0.5.0-rc.1-full"; -export const SANDBOX_AGENT_INSTALL_SCRIPT = "https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh"; +export const SANDBOX_AGENT_INSTALL_SCRIPT = "https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh"; export const DEFAULT_AGENTS = ["claude", "codex"] as const; export function buildServerStartCommand(port: number): string {