SDK: Add ensureServer() for automatic server recovery (#260)

* SDK sandbox provisioning: built-in providers, docs restructure, and quickstart overhaul

- Add built-in sandbox providers (local, docker, e2b, daytona, vercel, cloudflare) to the TypeScript SDK so users import directly instead of passing client instances
- Restructure docs: rename architecture to orchestration-architecture, add new architecture page for server overview, improve getting started flow
- Rewrite quickstart to be TypeScript-first with provider CodeGroup and custom provider accordion
- Update all examples to use new provider APIs
- Update persist drivers and foundry for new SDK surface

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Fix SDK typecheck errors and update persist drivers for insertEvent signature

- Fix insertEvent call in client.ts to pass sessionId as first argument
- Update Daytona provider create options to use Partial type (image has default)
- Update StrictUniqueSessionPersistDriver in tests to match new insertEvent signature
- Sync persist packages, openapi spec, and docs with upstream changes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Add Modal and ComputeSDK built-in providers, update examples and docs

- Add `sandbox-agent/modal` provider using Modal SDK with node:22-slim image
- Add `sandbox-agent/computesdk` provider using ComputeSDK's unified sandbox API
- Update Modal and ComputeSDK examples to use new SDK providers
- Update Modal and ComputeSDK deploy docs with provider-based examples
- Add Modal to quickstart CodeGroup and docs.json navigation
- Add provider test entries for Modal and ComputeSDK
- Remove old standalone example files (modal.ts, computesdk.ts)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Fix Modal provider: pre-install agents in image, fire-and-forget exec for server

- Pre-install agents in Dockerfile commands so they are cached across creates
- Use fire-and-forget exec (no wait) to keep server alive in Modal sandbox
- Add memoryMiB option (default 2GB) to avoid OOM during agent install

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Sync upstream changes: multiplayer docs, logos, openapi spec, foundry config

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* SDK: Add ensureServer() for automatic server recovery

Add ensureServer() to SandboxProvider interface to handle cases where the
sandbox-agent server stops or goes to sleep. The SDK now calls this method
after 3 consecutive health-check failures, allowing providers to restart the
server if needed. Most built-in providers (E2B, Daytona, Vercel, Modal,
ComputeSDK) implement this. Docker and Cloudflare manage server lifecycle
differently, and Local uses managed child processes.

Also update docs for quickstart, architecture, multiplayer, and session
persistence; mark persist-* packages as deprecated; and add ensureServer
implementations to all applicable providers.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>

* wip

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Nathan Flurry 2026-03-15 20:29:28 -07:00 committed by GitHub
parent 3426cbc6ec
commit cf7e2a92c6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
112 changed files with 3739 additions and 3537 deletions

View file

@ -0,0 +1,154 @@
import { describe, it, expect } from "vitest";
import { spawn, type ChildProcess } from "node:child_process";
import { resolve, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { execSync } from "node:child_process";
const __dirname = dirname(fileURLToPath(import.meta.url));
const PROJECT_DIR = resolve(__dirname, "..");
/**
* Cloudflare Workers integration test.
*
* Set RUN_CLOUDFLARE_EXAMPLES=1 to enable. Requires wrangler and Docker.
*
* This starts `wrangler dev` which:
* 1. Builds the Dockerfile (cloudflare/sandbox base + sandbox-agent)
* 2. Starts a local Workers runtime with Durable Objects and containers
* 3. Exposes the app on a local port
*
* We then test through the proxy endpoint which forwards to sandbox-agent
* running inside the container.
*/
const shouldRun = process.env.RUN_CLOUDFLARE_EXAMPLES === "1";
const timeoutMs = Number.parseInt(process.env.SANDBOX_TEST_TIMEOUT_MS || "", 10) || 600_000;
const testFn = shouldRun ? it : it.skip;
interface WranglerDev {
baseUrl: string;
cleanup: () => void;
}
async function startWranglerDev(): Promise<WranglerDev> {
// Build frontend assets first (wrangler expects dist/ to exist)
execSync("npx vite build", { cwd: PROJECT_DIR, stdio: "pipe" });
return new Promise<WranglerDev>((resolve, reject) => {
const child: ChildProcess = spawn("npx", ["wrangler", "dev", "--port", "0"], {
cwd: PROJECT_DIR,
stdio: ["ignore", "pipe", "pipe"],
detached: true,
env: {
...process.env,
// Ensure wrangler picks up API keys to pass to the container
NODE_ENV: "development",
},
});
let stdout = "";
let stderr = "";
let resolved = false;
const cleanup = () => {
if (child.pid) {
// Kill process group to ensure wrangler and its children are cleaned up
try {
process.kill(-child.pid, "SIGTERM");
} catch {
try {
child.kill("SIGTERM");
} catch {}
}
}
};
const timer = setTimeout(() => {
if (!resolved) {
resolved = true;
cleanup();
reject(new Error(`wrangler dev did not start within 120s.\nstdout: ${stdout}\nstderr: ${stderr}`));
}
}, 120_000);
const onData = (chunk: Buffer) => {
const text = chunk.toString();
stdout += text;
// wrangler dev prints "Ready on http://localhost:XXXX" when ready
const match = stdout.match(/Ready on (https?:\/\/[^\s]+)/i) ?? stdout.match(/(https?:\/\/(?:localhost|127\.0\.0\.1):\d+)/);
if (match && !resolved) {
resolved = true;
clearTimeout(timer);
resolve({ baseUrl: match[1], cleanup });
}
};
child.stdout?.on("data", onData);
child.stderr?.on("data", (chunk: Buffer) => {
const text = chunk.toString();
stderr += text;
// Some wrangler versions print ready message to stderr
const match = text.match(/Ready on (https?:\/\/[^\s]+)/i) ?? text.match(/(https?:\/\/(?:localhost|127\.0\.0\.1):\d+)/);
if (match && !resolved) {
resolved = true;
clearTimeout(timer);
resolve({ baseUrl: match[1], cleanup });
}
});
child.on("error", (err) => {
if (!resolved) {
resolved = true;
clearTimeout(timer);
reject(new Error(`wrangler dev failed to start: ${err.message}`));
}
});
child.on("exit", (code) => {
if (!resolved) {
resolved = true;
clearTimeout(timer);
reject(new Error(`wrangler dev exited with code ${code}.\nstdout: ${stdout}\nstderr: ${stderr}`));
}
});
});
}
describe("cloudflare example", () => {
testFn(
"starts wrangler dev and sandbox-agent responds via proxy",
async () => {
const { baseUrl, cleanup } = await startWranglerDev();
try {
// The Cloudflare example proxies requests through /sandbox/:name/proxy/*
// Wait for the container inside the Durable Object to start sandbox-agent
const healthUrl = `${baseUrl}/sandbox/test/proxy/v1/health`;
let healthy = false;
for (let i = 0; i < 120; i++) {
try {
const res = await fetch(healthUrl);
if (res.ok) {
const data = await res.json();
// The proxied health endpoint returns {name: "Sandbox Agent", ...}
if (data.status === "ok" || data.name === "Sandbox Agent") {
healthy = true;
break;
}
}
} catch {}
await new Promise((r) => setTimeout(r, 2000));
}
expect(healthy).toBe(true);
// Confirm a second request also works
const response = await fetch(healthUrl);
expect(response.ok).toBe(true);
} finally {
cleanup();
}
},
timeoutMs,
);
});

View file

@ -0,0 +1,8 @@
import { defineConfig } from "vitest/config";
export default defineConfig({
test: {
root: ".",
include: ["tests/**/*.test.ts"],
},
});