diff --git a/foundry/packages/backend/src/actors/sandbox/index.ts b/foundry/packages/backend/src/actors/sandbox/index.ts index 7f31adc..4fe4d9d 100644 --- a/foundry/packages/backend/src/actors/sandbox/index.ts +++ b/foundry/packages/backend/src/actors/sandbox/index.ts @@ -13,7 +13,12 @@ import { logActorWarning, resolveErrorMessage } from "../logging.js"; import { expectQueueResponse } from "../../services/queue.js"; import { resolveSandboxProviderId } from "../../sandbox-config.js"; -const SANDBOX_REPO_CWD = "/home/sandbox/repo"; +/** + * Default repo CWD inside the sandbox. The actual path is resolved dynamically + * via `$HOME/repo` because different sandbox providers run as different users + * (e.g. E2B uses `/home/user`, local Docker uses `/home/sandbox`). + */ +const DEFAULT_SANDBOX_REPO_CWD = "/home/user/repo"; const DEFAULT_LOCAL_SANDBOX_IMAGE = "rivetdev/sandbox-agent:foundry-base-latest"; const DEFAULT_LOCAL_SANDBOX_PORT = 2468; const dockerClient = new Dockerode({ socketPath: "/var/run/docker.sock" }); @@ -297,6 +302,43 @@ async function listWorkspaceModelGroupsForSandbox(c: any): Promise Promise>; +// --------------------------------------------------------------------------- +// Dynamic repo CWD resolution +// --------------------------------------------------------------------------- + +let cachedRepoCwd: string | null = null; + +/** + * Resolve the repo CWD inside the sandbox by querying `$HOME`. + * Different providers run as different users (E2B: `/home/user`, local Docker: + * `/home/sandbox`), so the path must be resolved dynamically. The result is + * cached for the lifetime of this sandbox actor instance. + */ +async function resolveRepoCwd(c: any): Promise { + if (cachedRepoCwd) return cachedRepoCwd; + + try { + const result = await baseActions.runProcess(c, { + command: "bash", + args: ["-lc", "echo $HOME"], + cwd: "/", + timeoutMs: 10_000, + }); + const home = (result.stdout ?? result.result ?? "").trim(); + if (home && home.startsWith("/")) { + cachedRepoCwd = `${home}/repo`; + return cachedRepoCwd; + } + } catch (error) { + logActorWarning("taskSandbox", "failed to resolve $HOME, using default", { + error: resolveErrorMessage(error), + }); + } + + cachedRepoCwd = DEFAULT_SANDBOX_REPO_CWD; + return cachedRepoCwd; +} + // --------------------------------------------------------------------------- // Queue names for sandbox actor // --------------------------------------------------------------------------- @@ -528,8 +570,9 @@ export const taskSandbox = actor({ } }, - async repoCwd(): Promise<{ cwd: string }> { - return { cwd: SANDBOX_REPO_CWD }; + async repoCwd(c: any): Promise<{ cwd: string }> { + const resolved = await resolveRepoCwd(c); + return { cwd: resolved }; }, // Long-running action — kept as direct action to avoid blocking the @@ -600,4 +643,4 @@ export const taskSandbox = actor({ run: workflow(runSandboxWorkflow), }); -export { SANDBOX_REPO_CWD }; +export { DEFAULT_SANDBOX_REPO_CWD, resolveRepoCwd }; diff --git a/foundry/packages/backend/src/actors/task/workspace.ts b/foundry/packages/backend/src/actors/task/workspace.ts index f7dcc26..0856947 100644 --- a/foundry/packages/backend/src/actors/task/workspace.ts +++ b/foundry/packages/backend/src/actors/task/workspace.ts @@ -1,6 +1,6 @@ // @ts-nocheck import { randomUUID } from "node:crypto"; -import { basename, dirname } from "node:path"; +import { basename } from "node:path"; import { asc, eq } from "drizzle-orm"; import { DEFAULT_WORKSPACE_MODEL_GROUPS, @@ -11,7 +11,6 @@ import { import { getActorRuntimeContext } from "../context.js"; import { getOrCreateOrganization, getOrCreateTaskSandbox, getOrCreateUser, getTaskSandbox, selfTask } from "../handles.js"; import { logActorInfo, logActorWarning, resolveErrorMessage } from "../logging.js"; -import { SANDBOX_REPO_CWD } from "../sandbox/index.js"; import { resolveSandboxProviderId } from "../../sandbox-config.js"; import { getBetterAuthService } from "../../services/better-auth.js"; import { resolveOrganizationGithubAuth } from "../../services/github-auth.js"; @@ -183,9 +182,9 @@ async function injectGitCredentials(sandbox: any, login: string, email: string, "set -euo pipefail", `git config --global user.name ${JSON.stringify(login)}`, `git config --global user.email ${JSON.stringify(email)}`, - `git config --global credential.helper 'store --file=/home/sandbox/.git-token'`, - `printf '%s\\n' ${JSON.stringify(`https://${login}:${token}@github.com`)} > /home/sandbox/.git-token`, - `chmod 600 /home/sandbox/.git-token`, + `git config --global credential.helper 'store --file=$HOME/.git-token'`, + `printf '%s\\n' ${JSON.stringify(`https://${login}:${token}@github.com`)} > $HOME/.git-token`, + `chmod 600 $HOME/.git-token`, ]; const result = await sandbox.runProcess({ command: "bash", @@ -576,6 +575,10 @@ async function getTaskSandboxRuntime( const sandbox = await getOrCreateTaskSandbox(c, c.state.organizationId, sandboxId, {}); const actorId = typeof sandbox.resolve === "function" ? await sandbox.resolve().catch(() => null) : null; const switchTarget = sandboxProviderId === "local" ? `sandbox://local/${sandboxId}` : `sandbox://e2b/${sandboxId}`; + + // Resolve the actual repo CWD from the sandbox's $HOME (differs by provider). + const repoCwdResult = await sandbox.repoCwd(); + const cwd = repoCwdResult?.cwd ?? "$HOME/repo"; const now = Date.now(); await c.db @@ -585,7 +588,7 @@ async function getTaskSandboxRuntime( sandboxProviderId, sandboxActorId: typeof actorId === "string" ? actorId : null, switchTarget, - cwd: SANDBOX_REPO_CWD, + cwd, createdAt: now, updatedAt: now, }) @@ -595,7 +598,7 @@ async function getTaskSandboxRuntime( sandboxProviderId, sandboxActorId: typeof actorId === "string" ? actorId : null, switchTarget, - cwd: SANDBOX_REPO_CWD, + cwd, updatedAt: now, }, }) @@ -606,7 +609,7 @@ async function getTaskSandboxRuntime( .set({ activeSandboxId: sandboxId, activeSwitchTarget: switchTarget, - activeCwd: SANDBOX_REPO_CWD, + activeCwd: cwd, updatedAt: now, }) .where(eq(taskRuntime.id, 1)) @@ -617,7 +620,7 @@ async function getTaskSandboxRuntime( sandboxId, sandboxProviderId, switchTarget, - cwd: SANDBOX_REPO_CWD, + cwd, }; } @@ -648,15 +651,15 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski logActorInfo("task.sandbox", "resolveAuth+metadata", { durationMs: Math.round(performance.now() - t0) }); const baseRef = metadata.defaultBranch ?? "main"; - const sandboxRepoRoot = dirname(SANDBOX_REPO_CWD); + // Use $HOME inside the shell script so the path resolves correctly regardless + // of which user the sandbox runs as (E2B: "user", local Docker: "sandbox"). const script = [ "set -euo pipefail", - `mkdir -p ${JSON.stringify(sandboxRepoRoot)}`, + 'REPO_DIR="$HOME/repo"', + 'mkdir -p "$HOME"', "git config --global credential.helper '!f() { echo username=x-access-token; echo password=${GH_TOKEN:-$GITHUB_TOKEN}; }; f'", - `if [ ! -d ${JSON.stringify(`${SANDBOX_REPO_CWD}/.git`)} ]; then rm -rf ${JSON.stringify(SANDBOX_REPO_CWD)} && git clone ${JSON.stringify( - metadata.remoteUrl, - )} ${JSON.stringify(SANDBOX_REPO_CWD)}; fi`, - `cd ${JSON.stringify(SANDBOX_REPO_CWD)}`, + `if [ ! -d "$REPO_DIR/.git" ]; then rm -rf "$REPO_DIR" && git clone ${JSON.stringify(metadata.remoteUrl)} "$REPO_DIR"; fi`, + 'cd "$REPO_DIR"', "git fetch origin --prune", `if git show-ref --verify --quiet refs/remotes/origin/${JSON.stringify(record.branchName).slice(1, -1)}; then target_ref=${JSON.stringify( `origin/${record.branchName}`, diff --git a/foundry/packages/backend/src/index.ts b/foundry/packages/backend/src/index.ts index e00abaa..617bacc 100644 --- a/foundry/packages/backend/src/index.ts +++ b/foundry/packages/backend/src/index.ts @@ -141,6 +141,59 @@ export async function startBackend(options: BackendStartOptions = {}): Promise.json, inspect with chrome://tracing) + app.get("/debug/memory", async (c) => { + if (process.env.NODE_ENV !== "development") { + return c.json({ error: "debug endpoints disabled in production" }, 403); + } + const wantGc = c.req.query("gc") === "1"; + if (wantGc && typeof Bun !== "undefined") { + // Bun.gc(true) triggers a synchronous full GC sweep in JavaScriptCore. + Bun.gc(true); + } + const mem = process.memoryUsage(); + const rssMb = Math.round(mem.rss / 1024 / 1024); + const heapUsedMb = Math.round(mem.heapUsed / 1024 / 1024); + const heapTotalMb = Math.round(mem.heapTotal / 1024 / 1024); + const externalMb = Math.round(mem.external / 1024 / 1024); + const nonHeapMb = rssMb - heapUsedMb - externalMb; + // Bun.heapStats() gives JSC-specific breakdown: object counts, typed array + // bytes, extra memory (native allocations tracked by JSC). Useful for + // distinguishing JS object bloat from native/WASM memory. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const BunAny = Bun as any; + const heapStats = typeof BunAny.heapStats === "function" ? BunAny.heapStats() : null; + const snapshot = { + rssMb, + heapUsedMb, + heapTotalMb, + externalMb, + nonHeapMb, + gcTriggered: wantGc, + rssBytes: mem.rss, + heapUsedBytes: mem.heapUsed, + heapTotalBytes: mem.heapTotal, + externalBytes: mem.external, + ...(heapStats ? { bunHeapStats: heapStats } : {}), + }; + // Optionally write a full JSC heap snapshot for offline analysis. + let heapSnapshotPath: string | null = null; + const wantHeap = c.req.query("heap") === "1"; + if (wantHeap && typeof Bun !== "undefined") { + heapSnapshotPath = `/tmp/foundry-heap-${Date.now()}.json`; + // Bun.generateHeapSnapshot("v8") returns a V8-compatible JSON string. + const heapJson = Bun.generateHeapSnapshot("v8"); + await Bun.write(heapSnapshotPath, heapJson); + } + logger.info(snapshot, "memory_usage_debug"); + return c.json({ ...snapshot, ...(heapSnapshotPath ? { heapSnapshotPath } : {}) }); + }); + app.use("*", async (c, next) => { const requestId = c.req.header("x-request-id")?.trim() || randomUUID(); const start = performance.now(); @@ -354,6 +407,11 @@ export async function startBackend(options: BackendStartOptions = {}): Promise { + const mem = process.memoryUsage(); + const rssMb = Math.round(mem.rss / 1024 / 1024); + const heapUsedMb = Math.round(mem.heapUsed / 1024 / 1024); + const heapTotalMb = Math.round(mem.heapTotal / 1024 / 1024); + const externalMb = Math.round(mem.external / 1024 / 1024); + // Non-heap RSS: memory not accounted for by JS heap or external buffers. + // Large values here point to native allocations (WASM, mmap, child process + // bookkeeping, Bun's internal arena, etc.). + const nonHeapMb = rssMb - heapUsedMb - externalMb; + const deltaRss = rssMb - prevRss; + prevRss = rssMb; + logger.info( + { + rssMb, + heapUsedMb, + heapTotalMb, + externalMb, + nonHeapMb, + deltaRssMb: deltaRss, + rssBytes: mem.rss, + heapUsedBytes: mem.heapUsed, + heapTotalBytes: mem.heapTotal, + externalBytes: mem.external, + }, + "memory_usage", + ); + }, 60_000); + } + process.on("SIGINT", async () => { server.stop(); process.exit(0); diff --git a/foundry/packages/client/src/subscription/remote-manager.ts b/foundry/packages/client/src/subscription/remote-manager.ts index 778241f..ae774c6 100644 --- a/foundry/packages/client/src/subscription/remote-manager.ts +++ b/foundry/packages/client/src/subscription/remote-manager.ts @@ -4,6 +4,11 @@ import { topicDefinitions, type TopicData, type TopicDefinition, type TopicKey, const GRACE_PERIOD_MS = 30_000; +/** Initial retry delay in ms. */ +const RETRY_BASE_MS = 1_000; +/** Maximum retry delay in ms. */ +const RETRY_MAX_MS = 30_000; + /** * Remote implementation of SubscriptionManager. * Each cache entry owns one actor connection plus one materialized snapshot. @@ -80,9 +85,12 @@ class TopicEntry { private unsubscribeEvent: (() => void) | null = null; private unsubscribeError: (() => void) | null = null; private teardownTimer: ReturnType | null = null; + private retryTimer: ReturnType | null = null; + private retryAttempt = 0; private startPromise: Promise | null = null; private eventPromise: Promise = Promise.resolve(); private started = false; + private disposed = false; constructor( private readonly topicKey: TopicKey, @@ -136,7 +144,9 @@ class TopicEntry { } dispose(): void { + this.disposed = true; this.cancelTeardown(); + this.cancelRetry(); this.unsubscribeEvent?.(); this.unsubscribeError?.(); if (this.conn) { @@ -148,6 +158,55 @@ class TopicEntry { this.error = null; this.lastRefreshAt = null; this.started = false; + this.retryAttempt = 0; + } + + private cancelRetry(): void { + if (this.retryTimer) { + clearTimeout(this.retryTimer); + this.retryTimer = null; + } + } + + /** + * Schedules a retry with exponential backoff. Cleans up any existing + * connection state before reconnecting. + */ + private scheduleRetry(): void { + if (this.disposed || this.listenerCount === 0) { + return; + } + + const delay = Math.min(RETRY_BASE_MS * 2 ** this.retryAttempt, RETRY_MAX_MS); + this.retryAttempt++; + + this.retryTimer = setTimeout(() => { + this.retryTimer = null; + if (this.disposed || this.listenerCount === 0) { + return; + } + + // Tear down the old connection before retrying + this.cleanupConnection(); + this.started = false; + this.startPromise = this.start().finally(() => { + this.startPromise = null; + }); + }, delay); + } + + /** + * Cleans up connection resources without resetting data/status/retry state. + */ + private cleanupConnection(): void { + this.unsubscribeEvent?.(); + this.unsubscribeError?.(); + this.unsubscribeEvent = null; + this.unsubscribeError = null; + if (this.conn) { + void this.conn.dispose(); + } + this.conn = null; } private async start(): Promise { @@ -164,17 +223,20 @@ class TopicEntry { this.status = "error"; this.error = error instanceof Error ? error : new Error(String(error)); this.notify(); + this.scheduleRetry(); }); this.data = await this.definition.fetchInitial(this.backend, this.params); this.status = "connected"; this.lastRefreshAt = Date.now(); this.started = true; + this.retryAttempt = 0; this.notify(); } catch (error) { this.status = "error"; this.error = error instanceof Error ? error : new Error(String(error)); this.started = false; this.notify(); + this.scheduleRetry(); } } diff --git a/sdks/typescript/src/providers/shared.ts b/sdks/typescript/src/providers/shared.ts index 100b707..1eb89b1 100644 --- a/sdks/typescript/src/providers/shared.ts +++ b/sdks/typescript/src/providers/shared.ts @@ -1,5 +1,5 @@ -export const DEFAULT_SANDBOX_AGENT_IMAGE = process.env.SANDBOX_AGENT_IMAGE ?? "rivetdev/sandbox-agent:0.5.0-rc.1-full"; -export const SANDBOX_AGENT_INSTALL_SCRIPT = "https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh"; +export const DEFAULT_SANDBOX_AGENT_IMAGE = "rivetdev/sandbox-agent:0.5.0-rc.1-full"; +export const SANDBOX_AGENT_INSTALL_SCRIPT = "https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh"; export const DEFAULT_AGENTS = ["claude", "codex"] as const; export function buildServerStartCommand(port: number): string {