Merge branch 'main' into e2b-base-image-support

This commit is contained in:
ABC 2026-03-25 00:37:58 -04:00 committed by GitHub
commit fe8fbfc91c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 223 additions and 21 deletions

View file

@ -13,7 +13,12 @@ import { logActorWarning, resolveErrorMessage } from "../logging.js";
import { expectQueueResponse } from "../../services/queue.js"; import { expectQueueResponse } from "../../services/queue.js";
import { resolveSandboxProviderId } from "../../sandbox-config.js"; import { resolveSandboxProviderId } from "../../sandbox-config.js";
const SANDBOX_REPO_CWD = "/home/sandbox/repo"; /**
* Default repo CWD inside the sandbox. The actual path is resolved dynamically
* via `$HOME/repo` because different sandbox providers run as different users
* (e.g. E2B uses `/home/user`, local Docker uses `/home/sandbox`).
*/
const DEFAULT_SANDBOX_REPO_CWD = "/home/user/repo";
const DEFAULT_LOCAL_SANDBOX_IMAGE = "rivetdev/sandbox-agent:foundry-base-latest"; const DEFAULT_LOCAL_SANDBOX_IMAGE = "rivetdev/sandbox-agent:foundry-base-latest";
const DEFAULT_LOCAL_SANDBOX_PORT = 2468; const DEFAULT_LOCAL_SANDBOX_PORT = 2468;
const dockerClient = new Dockerode({ socketPath: "/var/run/docker.sock" }); const dockerClient = new Dockerode({ socketPath: "/var/run/docker.sock" });
@ -297,6 +302,43 @@ async function listWorkspaceModelGroupsForSandbox(c: any): Promise<WorkspaceMode
const baseActions = baseTaskSandbox.config.actions as Record<string, (c: any, ...args: any[]) => Promise<any>>; const baseActions = baseTaskSandbox.config.actions as Record<string, (c: any, ...args: any[]) => Promise<any>>;
// ---------------------------------------------------------------------------
// Dynamic repo CWD resolution
// ---------------------------------------------------------------------------
let cachedRepoCwd: string | null = null;
/**
* Resolve the repo CWD inside the sandbox by querying `$HOME`.
* Different providers run as different users (E2B: `/home/user`, local Docker:
* `/home/sandbox`), so the path must be resolved dynamically. The result is
* cached for the lifetime of this sandbox actor instance.
*/
async function resolveRepoCwd(c: any): Promise<string> {
if (cachedRepoCwd) return cachedRepoCwd;
try {
const result = await baseActions.runProcess(c, {
command: "bash",
args: ["-lc", "echo $HOME"],
cwd: "/",
timeoutMs: 10_000,
});
const home = (result.stdout ?? result.result ?? "").trim();
if (home && home.startsWith("/")) {
cachedRepoCwd = `${home}/repo`;
return cachedRepoCwd;
}
} catch (error) {
logActorWarning("taskSandbox", "failed to resolve $HOME, using default", {
error: resolveErrorMessage(error),
});
}
cachedRepoCwd = DEFAULT_SANDBOX_REPO_CWD;
return cachedRepoCwd;
}
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Queue names for sandbox actor // Queue names for sandbox actor
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -528,8 +570,9 @@ export const taskSandbox = actor({
} }
}, },
async repoCwd(): Promise<{ cwd: string }> { async repoCwd(c: any): Promise<{ cwd: string }> {
return { cwd: SANDBOX_REPO_CWD }; const resolved = await resolveRepoCwd(c);
return { cwd: resolved };
}, },
// Long-running action — kept as direct action to avoid blocking the // Long-running action — kept as direct action to avoid blocking the
@ -600,4 +643,4 @@ export const taskSandbox = actor({
run: workflow(runSandboxWorkflow), run: workflow(runSandboxWorkflow),
}); });
export { SANDBOX_REPO_CWD }; export { DEFAULT_SANDBOX_REPO_CWD, resolveRepoCwd };

View file

@ -1,6 +1,6 @@
// @ts-nocheck // @ts-nocheck
import { randomUUID } from "node:crypto"; import { randomUUID } from "node:crypto";
import { basename, dirname } from "node:path"; import { basename } from "node:path";
import { asc, eq } from "drizzle-orm"; import { asc, eq } from "drizzle-orm";
import { import {
DEFAULT_WORKSPACE_MODEL_GROUPS, DEFAULT_WORKSPACE_MODEL_GROUPS,
@ -11,7 +11,6 @@ import {
import { getActorRuntimeContext } from "../context.js"; import { getActorRuntimeContext } from "../context.js";
import { getOrCreateOrganization, getOrCreateTaskSandbox, getOrCreateUser, getTaskSandbox, selfTask } from "../handles.js"; import { getOrCreateOrganization, getOrCreateTaskSandbox, getOrCreateUser, getTaskSandbox, selfTask } from "../handles.js";
import { logActorInfo, logActorWarning, resolveErrorMessage } from "../logging.js"; import { logActorInfo, logActorWarning, resolveErrorMessage } from "../logging.js";
import { SANDBOX_REPO_CWD } from "../sandbox/index.js";
import { resolveSandboxProviderId } from "../../sandbox-config.js"; import { resolveSandboxProviderId } from "../../sandbox-config.js";
import { getBetterAuthService } from "../../services/better-auth.js"; import { getBetterAuthService } from "../../services/better-auth.js";
import { resolveOrganizationGithubAuth } from "../../services/github-auth.js"; import { resolveOrganizationGithubAuth } from "../../services/github-auth.js";
@ -183,9 +182,9 @@ async function injectGitCredentials(sandbox: any, login: string, email: string,
"set -euo pipefail", "set -euo pipefail",
`git config --global user.name ${JSON.stringify(login)}`, `git config --global user.name ${JSON.stringify(login)}`,
`git config --global user.email ${JSON.stringify(email)}`, `git config --global user.email ${JSON.stringify(email)}`,
`git config --global credential.helper 'store --file=/home/sandbox/.git-token'`, `git config --global credential.helper 'store --file=$HOME/.git-token'`,
`printf '%s\\n' ${JSON.stringify(`https://${login}:${token}@github.com`)} > /home/sandbox/.git-token`, `printf '%s\\n' ${JSON.stringify(`https://${login}:${token}@github.com`)} > $HOME/.git-token`,
`chmod 600 /home/sandbox/.git-token`, `chmod 600 $HOME/.git-token`,
]; ];
const result = await sandbox.runProcess({ const result = await sandbox.runProcess({
command: "bash", command: "bash",
@ -576,6 +575,10 @@ async function getTaskSandboxRuntime(
const sandbox = await getOrCreateTaskSandbox(c, c.state.organizationId, sandboxId, {}); const sandbox = await getOrCreateTaskSandbox(c, c.state.organizationId, sandboxId, {});
const actorId = typeof sandbox.resolve === "function" ? await sandbox.resolve().catch(() => null) : null; const actorId = typeof sandbox.resolve === "function" ? await sandbox.resolve().catch(() => null) : null;
const switchTarget = sandboxProviderId === "local" ? `sandbox://local/${sandboxId}` : `sandbox://e2b/${sandboxId}`; const switchTarget = sandboxProviderId === "local" ? `sandbox://local/${sandboxId}` : `sandbox://e2b/${sandboxId}`;
// Resolve the actual repo CWD from the sandbox's $HOME (differs by provider).
const repoCwdResult = await sandbox.repoCwd();
const cwd = repoCwdResult?.cwd ?? "$HOME/repo";
const now = Date.now(); const now = Date.now();
await c.db await c.db
@ -585,7 +588,7 @@ async function getTaskSandboxRuntime(
sandboxProviderId, sandboxProviderId,
sandboxActorId: typeof actorId === "string" ? actorId : null, sandboxActorId: typeof actorId === "string" ? actorId : null,
switchTarget, switchTarget,
cwd: SANDBOX_REPO_CWD, cwd,
createdAt: now, createdAt: now,
updatedAt: now, updatedAt: now,
}) })
@ -595,7 +598,7 @@ async function getTaskSandboxRuntime(
sandboxProviderId, sandboxProviderId,
sandboxActorId: typeof actorId === "string" ? actorId : null, sandboxActorId: typeof actorId === "string" ? actorId : null,
switchTarget, switchTarget,
cwd: SANDBOX_REPO_CWD, cwd,
updatedAt: now, updatedAt: now,
}, },
}) })
@ -606,7 +609,7 @@ async function getTaskSandboxRuntime(
.set({ .set({
activeSandboxId: sandboxId, activeSandboxId: sandboxId,
activeSwitchTarget: switchTarget, activeSwitchTarget: switchTarget,
activeCwd: SANDBOX_REPO_CWD, activeCwd: cwd,
updatedAt: now, updatedAt: now,
}) })
.where(eq(taskRuntime.id, 1)) .where(eq(taskRuntime.id, 1))
@ -617,7 +620,7 @@ async function getTaskSandboxRuntime(
sandboxId, sandboxId,
sandboxProviderId, sandboxProviderId,
switchTarget, switchTarget,
cwd: SANDBOX_REPO_CWD, cwd,
}; };
} }
@ -648,15 +651,15 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski
logActorInfo("task.sandbox", "resolveAuth+metadata", { durationMs: Math.round(performance.now() - t0) }); logActorInfo("task.sandbox", "resolveAuth+metadata", { durationMs: Math.round(performance.now() - t0) });
const baseRef = metadata.defaultBranch ?? "main"; const baseRef = metadata.defaultBranch ?? "main";
const sandboxRepoRoot = dirname(SANDBOX_REPO_CWD); // Use $HOME inside the shell script so the path resolves correctly regardless
// of which user the sandbox runs as (E2B: "user", local Docker: "sandbox").
const script = [ const script = [
"set -euo pipefail", "set -euo pipefail",
`mkdir -p ${JSON.stringify(sandboxRepoRoot)}`, 'REPO_DIR="$HOME/repo"',
'mkdir -p "$HOME"',
"git config --global credential.helper '!f() { echo username=x-access-token; echo password=${GH_TOKEN:-$GITHUB_TOKEN}; }; f'", "git config --global credential.helper '!f() { echo username=x-access-token; echo password=${GH_TOKEN:-$GITHUB_TOKEN}; }; f'",
`if [ ! -d ${JSON.stringify(`${SANDBOX_REPO_CWD}/.git`)} ]; then rm -rf ${JSON.stringify(SANDBOX_REPO_CWD)} && git clone ${JSON.stringify( `if [ ! -d "$REPO_DIR/.git" ]; then rm -rf "$REPO_DIR" && git clone ${JSON.stringify(metadata.remoteUrl)} "$REPO_DIR"; fi`,
metadata.remoteUrl, 'cd "$REPO_DIR"',
)} ${JSON.stringify(SANDBOX_REPO_CWD)}; fi`,
`cd ${JSON.stringify(SANDBOX_REPO_CWD)}`,
"git fetch origin --prune", "git fetch origin --prune",
`if git show-ref --verify --quiet refs/remotes/origin/${JSON.stringify(record.branchName).slice(1, -1)}; then target_ref=${JSON.stringify( `if git show-ref --verify --quiet refs/remotes/origin/${JSON.stringify(record.branchName).slice(1, -1)}; then target_ref=${JSON.stringify(
`origin/${record.branchName}`, `origin/${record.branchName}`,

View file

@ -141,6 +141,59 @@ export async function startBackend(options: BackendStartOptions = {}): Promise<v
}; };
app.use("/v1/*", cors(corsConfig)); app.use("/v1/*", cors(corsConfig));
app.use("/v1", cors(corsConfig)); app.use("/v1", cors(corsConfig));
// On-demand memory snapshot endpoint for diagnosing spikes (dev only).
// Usage: curl http://127.0.0.1:7741/debug/memory
// Trigger GC first: curl http://127.0.0.1:7741/debug/memory?gc=1
// Write JSC heap snapshot: curl http://127.0.0.1:7741/debug/memory?heap=1
// (writes /tmp/foundry-heap-<timestamp>.json, inspect with chrome://tracing)
app.get("/debug/memory", async (c) => {
if (process.env.NODE_ENV !== "development") {
return c.json({ error: "debug endpoints disabled in production" }, 403);
}
const wantGc = c.req.query("gc") === "1";
if (wantGc && typeof Bun !== "undefined") {
// Bun.gc(true) triggers a synchronous full GC sweep in JavaScriptCore.
Bun.gc(true);
}
const mem = process.memoryUsage();
const rssMb = Math.round(mem.rss / 1024 / 1024);
const heapUsedMb = Math.round(mem.heapUsed / 1024 / 1024);
const heapTotalMb = Math.round(mem.heapTotal / 1024 / 1024);
const externalMb = Math.round(mem.external / 1024 / 1024);
const nonHeapMb = rssMb - heapUsedMb - externalMb;
// Bun.heapStats() gives JSC-specific breakdown: object counts, typed array
// bytes, extra memory (native allocations tracked by JSC). Useful for
// distinguishing JS object bloat from native/WASM memory.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const BunAny = Bun as any;
const heapStats = typeof BunAny.heapStats === "function" ? BunAny.heapStats() : null;
const snapshot = {
rssMb,
heapUsedMb,
heapTotalMb,
externalMb,
nonHeapMb,
gcTriggered: wantGc,
rssBytes: mem.rss,
heapUsedBytes: mem.heapUsed,
heapTotalBytes: mem.heapTotal,
externalBytes: mem.external,
...(heapStats ? { bunHeapStats: heapStats } : {}),
};
// Optionally write a full JSC heap snapshot for offline analysis.
let heapSnapshotPath: string | null = null;
const wantHeap = c.req.query("heap") === "1";
if (wantHeap && typeof Bun !== "undefined") {
heapSnapshotPath = `/tmp/foundry-heap-${Date.now()}.json`;
// Bun.generateHeapSnapshot("v8") returns a V8-compatible JSON string.
const heapJson = Bun.generateHeapSnapshot("v8");
await Bun.write(heapSnapshotPath, heapJson);
}
logger.info(snapshot, "memory_usage_debug");
return c.json({ ...snapshot, ...(heapSnapshotPath ? { heapSnapshotPath } : {}) });
});
app.use("*", async (c, next) => { app.use("*", async (c, next) => {
const requestId = c.req.header("x-request-id")?.trim() || randomUUID(); const requestId = c.req.header("x-request-id")?.trim() || randomUUID();
const start = performance.now(); const start = performance.now();
@ -354,6 +407,11 @@ export async function startBackend(options: BackendStartOptions = {}): Promise<v
}, },
hostname: config.backend.host, hostname: config.backend.host,
port: config.backend.port, port: config.backend.port,
// Bun defaults to 10s idle timeout. Actor RPCs go through the gateway
// tunnel (not direct HTTP), and the SSE stream has a 1s ping interval
// (RUNNER_SSE_PING_INTERVAL in rivetkit), so the idle timeout likely
// never fires in practice. Set high as a safety net regardless.
idleTimeout: 255,
}); });
logger.info( logger.info(
@ -364,6 +422,42 @@ export async function startBackend(options: BackendStartOptions = {}): Promise<v
"backend_started", "backend_started",
); );
// Periodic memory usage reporting for diagnosing memory spikes (dev only).
// Logs JS heap, RSS, and external (native/WASM) separately so we can tell
// whether spikes come from JS objects, Bun/JSC internals, or native addons
// like SQLite/WASM.
if (process.env.NODE_ENV === "development") {
let prevRss = 0;
setInterval(() => {
const mem = process.memoryUsage();
const rssMb = Math.round(mem.rss / 1024 / 1024);
const heapUsedMb = Math.round(mem.heapUsed / 1024 / 1024);
const heapTotalMb = Math.round(mem.heapTotal / 1024 / 1024);
const externalMb = Math.round(mem.external / 1024 / 1024);
// Non-heap RSS: memory not accounted for by JS heap or external buffers.
// Large values here point to native allocations (WASM, mmap, child process
// bookkeeping, Bun's internal arena, etc.).
const nonHeapMb = rssMb - heapUsedMb - externalMb;
const deltaRss = rssMb - prevRss;
prevRss = rssMb;
logger.info(
{
rssMb,
heapUsedMb,
heapTotalMb,
externalMb,
nonHeapMb,
deltaRssMb: deltaRss,
rssBytes: mem.rss,
heapUsedBytes: mem.heapUsed,
heapTotalBytes: mem.heapTotal,
externalBytes: mem.external,
},
"memory_usage",
);
}, 60_000);
}
process.on("SIGINT", async () => { process.on("SIGINT", async () => {
server.stop(); server.stop();
process.exit(0); process.exit(0);

View file

@ -4,6 +4,11 @@ import { topicDefinitions, type TopicData, type TopicDefinition, type TopicKey,
const GRACE_PERIOD_MS = 30_000; const GRACE_PERIOD_MS = 30_000;
/** Initial retry delay in ms. */
const RETRY_BASE_MS = 1_000;
/** Maximum retry delay in ms. */
const RETRY_MAX_MS = 30_000;
/** /**
* Remote implementation of SubscriptionManager. * Remote implementation of SubscriptionManager.
* Each cache entry owns one actor connection plus one materialized snapshot. * Each cache entry owns one actor connection plus one materialized snapshot.
@ -80,9 +85,12 @@ class TopicEntry<TData, TParams, TEvent> {
private unsubscribeEvent: (() => void) | null = null; private unsubscribeEvent: (() => void) | null = null;
private unsubscribeError: (() => void) | null = null; private unsubscribeError: (() => void) | null = null;
private teardownTimer: ReturnType<typeof setTimeout> | null = null; private teardownTimer: ReturnType<typeof setTimeout> | null = null;
private retryTimer: ReturnType<typeof setTimeout> | null = null;
private retryAttempt = 0;
private startPromise: Promise<void> | null = null; private startPromise: Promise<void> | null = null;
private eventPromise: Promise<void> = Promise.resolve(); private eventPromise: Promise<void> = Promise.resolve();
private started = false; private started = false;
private disposed = false;
constructor( constructor(
private readonly topicKey: TopicKey, private readonly topicKey: TopicKey,
@ -136,7 +144,9 @@ class TopicEntry<TData, TParams, TEvent> {
} }
dispose(): void { dispose(): void {
this.disposed = true;
this.cancelTeardown(); this.cancelTeardown();
this.cancelRetry();
this.unsubscribeEvent?.(); this.unsubscribeEvent?.();
this.unsubscribeError?.(); this.unsubscribeError?.();
if (this.conn) { if (this.conn) {
@ -148,6 +158,55 @@ class TopicEntry<TData, TParams, TEvent> {
this.error = null; this.error = null;
this.lastRefreshAt = null; this.lastRefreshAt = null;
this.started = false; this.started = false;
this.retryAttempt = 0;
}
private cancelRetry(): void {
if (this.retryTimer) {
clearTimeout(this.retryTimer);
this.retryTimer = null;
}
}
/**
* Schedules a retry with exponential backoff. Cleans up any existing
* connection state before reconnecting.
*/
private scheduleRetry(): void {
if (this.disposed || this.listenerCount === 0) {
return;
}
const delay = Math.min(RETRY_BASE_MS * 2 ** this.retryAttempt, RETRY_MAX_MS);
this.retryAttempt++;
this.retryTimer = setTimeout(() => {
this.retryTimer = null;
if (this.disposed || this.listenerCount === 0) {
return;
}
// Tear down the old connection before retrying
this.cleanupConnection();
this.started = false;
this.startPromise = this.start().finally(() => {
this.startPromise = null;
});
}, delay);
}
/**
* Cleans up connection resources without resetting data/status/retry state.
*/
private cleanupConnection(): void {
this.unsubscribeEvent?.();
this.unsubscribeError?.();
this.unsubscribeEvent = null;
this.unsubscribeError = null;
if (this.conn) {
void this.conn.dispose();
}
this.conn = null;
} }
private async start(): Promise<void> { private async start(): Promise<void> {
@ -164,17 +223,20 @@ class TopicEntry<TData, TParams, TEvent> {
this.status = "error"; this.status = "error";
this.error = error instanceof Error ? error : new Error(String(error)); this.error = error instanceof Error ? error : new Error(String(error));
this.notify(); this.notify();
this.scheduleRetry();
}); });
this.data = await this.definition.fetchInitial(this.backend, this.params); this.data = await this.definition.fetchInitial(this.backend, this.params);
this.status = "connected"; this.status = "connected";
this.lastRefreshAt = Date.now(); this.lastRefreshAt = Date.now();
this.started = true; this.started = true;
this.retryAttempt = 0;
this.notify(); this.notify();
} catch (error) { } catch (error) {
this.status = "error"; this.status = "error";
this.error = error instanceof Error ? error : new Error(String(error)); this.error = error instanceof Error ? error : new Error(String(error));
this.started = false; this.started = false;
this.notify(); this.notify();
this.scheduleRetry();
} }
} }

View file

@ -1,5 +1,5 @@
export const DEFAULT_SANDBOX_AGENT_IMAGE = process.env.SANDBOX_AGENT_IMAGE ?? "rivetdev/sandbox-agent:0.5.0-rc.1-full"; export const DEFAULT_SANDBOX_AGENT_IMAGE = "rivetdev/sandbox-agent:0.5.0-rc.1-full";
export const SANDBOX_AGENT_INSTALL_SCRIPT = "https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh"; export const SANDBOX_AGENT_INSTALL_SCRIPT = "https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh";
export const DEFAULT_AGENTS = ["claude", "codex"] as const; export const DEFAULT_AGENTS = ["claude", "codex"] as const;
export function buildServerStartCommand(port: number): string { export function buildServerStartCommand(port: number): string {