SDK: Add ensureServer() for automatic server recovery

Add ensureServer() to SandboxProvider interface to handle cases where the
sandbox-agent server stops or goes to sleep. The SDK now calls this method
after 3 consecutive health-check failures, allowing providers to restart the
server if needed. Most built-in providers (E2B, Daytona, Vercel, Modal,
ComputeSDK) implement this. Docker and Cloudflare manage server lifecycle
differently, and Local uses managed child processes.

Also update docs for quickstart, architecture, multiplayer, and session
persistence; mark persist-* packages as deprecated; and add ensureServer
implementations to all applicable providers.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Nathan Flurry 2026-03-15 20:14:30 -07:00
parent d008283c17
commit 35840facdd
38 changed files with 620 additions and 205 deletions

View file

@ -89,6 +89,7 @@ const HEALTH_WAIT_MIN_DELAY_MS = 500;
const HEALTH_WAIT_MAX_DELAY_MS = 15_000;
const HEALTH_WAIT_LOG_AFTER_MS = 5_000;
const HEALTH_WAIT_LOG_EVERY_MS = 10_000;
const HEALTH_WAIT_ENSURE_SERVER_AFTER_FAILURES = 3;
export interface SandboxAgentHealthWaitOptions {
timeoutMs?: number;
@ -903,7 +904,7 @@ export class SandboxAgent {
const createdSandbox = !existingSandbox;
if (existingSandbox) {
await provider.wake?.(rawSandboxId);
await provider.ensureServer?.(rawSandboxId);
}
try {
@ -2118,6 +2119,7 @@ export class SandboxAgent {
let delayMs = HEALTH_WAIT_MIN_DELAY_MS;
let nextLogAt = startedAt + HEALTH_WAIT_LOG_AFTER_MS;
let lastError: unknown;
let consecutiveFailures = 0;
while (!this.disposed && (deadline === undefined || Date.now() < deadline)) {
throwIfAborted(signal);
@ -2128,11 +2130,22 @@ export class SandboxAgent {
return;
}
lastError = new Error(`Unexpected health response: ${JSON.stringify(health)}`);
consecutiveFailures++;
} catch (error) {
if (isAbortError(error)) {
throw error;
}
lastError = error;
consecutiveFailures++;
}
if (consecutiveFailures >= HEALTH_WAIT_ENSURE_SERVER_AFTER_FAILURES && this.sandboxProvider?.ensureServer && this.sandboxProviderRawId) {
try {
await this.sandboxProvider.ensureServer(this.sandboxProviderRawId);
} catch {
// Best-effort; the next health check will determine if it worked.
}
consecutiveFailures = 0;
}
const now = Date.now();

View file

@ -49,5 +49,12 @@ export function computesdk(options: ComputeSdkProviderOptions = {}): SandboxProv
if (!sandbox) throw new Error(`computesdk sandbox not found: ${sandboxId}`);
return sandbox.getUrl({ port: agentPort });
},
async ensureServer(sandboxId: string): Promise<void> {
const sandbox = await compute.sandbox.getById(sandboxId);
if (!sandbox) throw new Error(`computesdk sandbox not found: ${sandboxId}`);
await sandbox.runCommand(`sandbox-agent server --no-token --host 0.0.0.0 --port ${agentPort}`, {
background: true,
});
},
};
}

View file

@ -56,7 +56,7 @@ export function daytona(options: DaytonaProviderOptions = {}): SandboxProvider {
const preview = await sandbox.getSignedPreviewUrl(agentPort, previewTtlSeconds);
return typeof preview === "string" ? preview : preview.url;
},
async wake(sandboxId: string): Promise<void> {
async ensureServer(sandboxId: string): Promise<void> {
const sandbox = await client.get(sandboxId);
if (!sandbox) {
throw new Error(`daytona sandbox not found: ${sandboxId}`);

View file

@ -53,5 +53,10 @@ export function e2b(options: E2BProviderOptions = {}): SandboxProvider {
const sandbox = await Sandbox.connect(sandboxId, connectOpts as any);
return `https://${sandbox.getHost(agentPort)}`;
},
async ensureServer(sandboxId: string): Promise<void> {
const connectOpts = await resolveOptions(options.connect, sandboxId);
const sandbox = await Sandbox.connect(sandboxId, connectOpts as any);
await sandbox.commands.run(`sandbox-agent server --no-token --host 0.0.0.0 --port ${agentPort}`, { background: true, timeoutMs: 0 });
},
};
}

View file

@ -66,5 +66,9 @@ export function modal(options: ModalProviderOptions = {}): SandboxProvider {
}
return tunnel.url;
},
async ensureServer(sandboxId: string): Promise<void> {
const sb = await client.sandboxes.fromId(sandboxId);
sb.exec(["sandbox-agent", "server", "--no-token", "--host", "0.0.0.0", "--port", String(agentPort)]);
},
};
}

View file

@ -21,8 +21,11 @@ export interface SandboxProvider {
getFetch?(sandboxId: string): Promise<typeof globalThis.fetch>;
/**
* Optional hook invoked before reconnecting to an existing sandbox.
* Useful for providers where the sandbox-agent process may need to be restarted.
* Ensure the sandbox-agent server process is running inside the sandbox.
* Called during health-wait after consecutive failures, and before
* reconnecting to an existing sandbox. Implementations should be
* idempotent if the server is already running, this should be a no-op
* (e.g. the duplicate process exits on port conflict).
*/
wake?(sandboxId: string): Promise<void>;
ensureServer?(sandboxId: string): Promise<void>;
}

View file

@ -53,5 +53,13 @@ export function vercel(options: VercelProviderOptions = {}): SandboxProvider {
const sandbox = await Sandbox.get({ sandboxId });
return sandbox.domain(agentPort);
},
async ensureServer(sandboxId: string): Promise<void> {
const sandbox = await Sandbox.get({ sandboxId });
await sandbox.runCommand({
cmd: "sandbox-agent",
args: ["server", "--no-token", "--host", "0.0.0.0", "--port", String(agentPort)],
detached: true,
});
},
};
}