Add SDK health wait gate

This commit is contained in:
Nathan Flurry 2026-03-05 18:43:38 -08:00
parent c3a95c3611
commit b860767d3f
10 changed files with 220 additions and 34 deletions

View file

@ -1,6 +1,6 @@
import { SimpleBox } from "@boxlite-ai/boxlite";
import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl, waitForHealth } from "@sandbox-agent/example-shared";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared";
import { setupImage, OCI_DIR } from "./setup-image.ts";
const env: Record<string, string> = {};
@ -26,10 +26,8 @@ if (result.exitCode !== 0) throw new Error(`Failed to start server: ${result.std
const baseUrl = "http://localhost:3000";
console.log("Waiting for server...");
await waitForHealth({ baseUrl });
const client = await SandboxAgent.connect({ baseUrl });
console.log("Connecting to server...");
const client = await SandboxAgent.connect({ baseUrl, waitForHealth: { timeoutMs: 120_000 } });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/root", mcpServers: [] } });
const sessionId = session.id;

View file

@ -10,7 +10,7 @@ import {
type ProviderName,
} from "computesdk";
import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl, waitForHealth } from "@sandbox-agent/example-shared";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared";
import { fileURLToPath } from "node:url";
import { resolve } from "node:path";
@ -116,9 +116,6 @@ export async function setupComputeSdkSandboxAgent(): Promise<{
const baseUrl = await sandbox.getUrl({ port: PORT });
console.log("Waiting for server...");
await waitForHealth({ baseUrl });
const cleanup = async () => {
try {
await sandbox.destroy();
@ -141,7 +138,7 @@ export async function runComputeSdkExample(): Promise<void> {
process.once("SIGINT", handleExit);
process.once("SIGTERM", handleExit);
const client = await SandboxAgent.connect({ baseUrl });
const client = await SandboxAgent.connect({ baseUrl, waitForHealth: { timeoutMs: 120_000 } });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/home", mcpServers: [] } });
const sessionId = session.id;

View file

@ -1,6 +1,6 @@
import { Daytona, Image } from "@daytonaio/sdk";
import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl, waitForHealth } from "@sandbox-agent/example-shared";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared";
const daytona = new Daytona();
@ -25,10 +25,8 @@ await sandbox.process.executeCommand(
const baseUrl = (await sandbox.getSignedPreviewUrl(3000, 4 * 60 * 60)).url;
console.log("Waiting for server...");
await waitForHealth({ baseUrl });
const client = await SandboxAgent.connect({ baseUrl });
console.log("Connecting to server...");
const client = await SandboxAgent.connect({ baseUrl, waitForHealth: { timeoutMs: 120_000 } });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/home/daytona", mcpServers: [] } });
const sessionId = session.id;

View file

@ -1,6 +1,6 @@
import { Daytona } from "@daytonaio/sdk";
import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl, waitForHealth } from "@sandbox-agent/example-shared";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared";
const daytona = new Daytona();
@ -30,10 +30,8 @@ await sandbox.process.executeCommand(
const baseUrl = (await sandbox.getSignedPreviewUrl(3000, 4 * 60 * 60)).url;
console.log("Waiting for server...");
await waitForHealth({ baseUrl });
const client = await SandboxAgent.connect({ baseUrl });
console.log("Connecting to server...");
const client = await SandboxAgent.connect({ baseUrl, waitForHealth: { timeoutMs: 120_000 } });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/home/daytona", mcpServers: [] } });
const sessionId = session.id;

View file

@ -1,6 +1,6 @@
import Docker from "dockerode";
import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl, waitForHealth } from "@sandbox-agent/example-shared";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared";
const IMAGE = "alpine:latest";
const PORT = 3000;
@ -43,9 +43,8 @@ const container = await docker.createContainer({
await container.start();
const baseUrl = `http://127.0.0.1:${PORT}`;
await waitForHealth({ baseUrl });
const client = await SandboxAgent.connect({ baseUrl });
const client = await SandboxAgent.connect({ baseUrl, waitForHealth: { timeoutMs: 120_000 } });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/root", mcpServers: [] } });
const sessionId = session.id;

View file

@ -1,6 +1,6 @@
import { Sandbox } from "@e2b/code-interpreter";
import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl, waitForHealth } from "@sandbox-agent/example-shared";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared";
const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
@ -27,10 +27,8 @@ await sandbox.commands.run("sandbox-agent server --no-token --host 0.0.0.0 --por
const baseUrl = `https://${sandbox.getHost(3000)}`;
console.log("Waiting for server...");
await waitForHealth({ baseUrl });
const client = await SandboxAgent.connect({ baseUrl });
console.log("Connecting to server...");
const client = await SandboxAgent.connect({ baseUrl, waitForHealth: { timeoutMs: 120_000 } });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/home/user", mcpServers: [] } });
const sessionId = session.id;

View file

@ -1,6 +1,6 @@
import { Sandbox } from "@vercel/sandbox";
import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl, waitForHealth } from "@sandbox-agent/example-shared";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared";
const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
@ -38,10 +38,8 @@ await sandbox.runCommand({
const baseUrl = sandbox.domain(3000);
console.log("Waiting for server...");
await waitForHealth({ baseUrl });
const client = await SandboxAgent.connect({ baseUrl });
console.log("Connecting to server...");
const client = await SandboxAgent.connect({ baseUrl, waitForHealth: { timeoutMs: 120_000 } });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/home/vercel-sandbox", mcpServers: [] } });
const sessionId = session.id;

View file

@ -53,6 +53,14 @@ const DEFAULT_BASE_URL = "http://sandbox-agent";
const DEFAULT_REPLAY_MAX_EVENTS = 50;
const DEFAULT_REPLAY_MAX_CHARS = 12_000;
const EVENT_INDEX_SCAN_EVENTS_LIMIT = 500;
const HEALTH_WAIT_MIN_DELAY_MS = 500;
const HEALTH_WAIT_MAX_DELAY_MS = 15_000;
const HEALTH_WAIT_LOG_AFTER_MS = 5_000;
const HEALTH_WAIT_LOG_EVERY_MS = 10_000;
export interface SandboxAgentHealthWaitOptions {
timeoutMs?: number;
}
interface SandboxAgentConnectCommonOptions {
headers?: HeadersInit;
@ -60,6 +68,7 @@ interface SandboxAgentConnectCommonOptions {
replayMaxEvents?: number;
replayMaxChars?: number;
token?: string;
waitForHealth?: boolean | SandboxAgentHealthWaitOptions;
}
export type SandboxAgentConnectOptions =
@ -442,12 +451,16 @@ export class SandboxAgent {
private readonly token?: string;
private readonly fetcher: typeof fetch;
private readonly defaultHeaders?: HeadersInit;
private readonly healthWait: NormalizedHealthWaitOptions;
private readonly persist: SessionPersistDriver;
private readonly replayMaxEvents: number;
private readonly replayMaxChars: number;
private spawnHandle?: SandboxAgentSpawnHandle;
private healthPromise?: Promise<void>;
private healthError?: Error;
private disposed = false;
private readonly liveConnections = new Map<string, LiveAcpConnection>();
private readonly pendingLiveConnections = new Map<string, Promise<LiveAcpConnection>>();
@ -469,10 +482,13 @@ export class SandboxAgent {
}
this.fetcher = resolvedFetch;
this.defaultHeaders = options.headers;
this.healthWait = normalizeHealthWaitOptions(options.waitForHealth);
this.persist = options.persist ?? new InMemorySessionPersistDriver();
this.replayMaxEvents = normalizePositiveInt(options.replayMaxEvents, DEFAULT_REPLAY_MAX_EVENTS);
this.replayMaxChars = normalizePositiveInt(options.replayMaxChars, DEFAULT_REPLAY_MAX_CHARS);
this.startHealthWait();
}
static async connect(options: SandboxAgentConnectOptions): Promise<SandboxAgent> {
@ -504,6 +520,8 @@ export class SandboxAgent {
}
async dispose(): Promise<void> {
this.disposed = true;
const connections = [...this.liveConnections.values()];
this.liveConnections.clear();
const pending = [...this.pendingLiveConnections.values()];
@ -671,7 +689,7 @@ export class SandboxAgent {
}
async getHealth(): Promise<HealthResponse> {
return this.requestJson("GET", `${API_PREFIX}/health`);
return this.requestJson("GET", `${API_PREFIX}/health`, { skipReadyWait: true });
}
async listAgents(options?: { config?: boolean }): Promise<AgentListResponse> {
@ -772,6 +790,8 @@ export class SandboxAgent {
}
private async getLiveConnection(agent: string): Promise<LiveAcpConnection> {
await this.awaitHealthy();
const existing = this.liveConnections.get(agent);
if (existing) {
return existing;
@ -952,6 +972,7 @@ export class SandboxAgent {
headers: options.headers,
accept: options.accept ?? "application/json",
signal: options.signal,
skipReadyWait: options.skipReadyWait,
});
if (response.status === 204) {
@ -962,6 +983,10 @@ export class SandboxAgent {
}
private async requestRaw(method: string, path: string, options: RequestOptions = {}): Promise<Response> {
if (!options.skipReadyWait) {
await this.awaitHealthy();
}
const url = this.buildUrl(path, options.query);
const headers = this.buildHeaders(options.headers);
@ -998,6 +1023,69 @@ export class SandboxAgent {
return response;
}
private startHealthWait(): void {
if (!this.healthWait.enabled || this.healthPromise) {
return;
}
this.healthPromise = this.runHealthWait().catch((error) => {
this.healthError = error instanceof Error ? error : new Error(String(error));
});
}
private async awaitHealthy(): Promise<void> {
if (!this.healthPromise) {
return;
}
await this.healthPromise;
if (this.healthError) {
throw this.healthError;
}
}
private async runHealthWait(): Promise<void> {
const startedAt = Date.now();
const deadline =
typeof this.healthWait.timeoutMs === "number" ? startedAt + this.healthWait.timeoutMs : undefined;
let delayMs = HEALTH_WAIT_MIN_DELAY_MS;
let nextLogAt = startedAt + HEALTH_WAIT_LOG_AFTER_MS;
let lastError: unknown;
while (!this.disposed && (deadline === undefined || Date.now() < deadline)) {
try {
const health = await this.getHealth();
if (health.status === "ok") {
return;
}
lastError = new Error(`Unexpected health response: ${JSON.stringify(health)}`);
} catch (error) {
lastError = error;
}
const now = Date.now();
if (now >= nextLogAt) {
const details = formatHealthWaitError(lastError);
console.warn(
`sandbox-agent at ${this.baseUrl} is not healthy after ${now - startedAt}ms; still waiting (${details})`,
);
nextLogAt = now + HEALTH_WAIT_LOG_EVERY_MS;
}
await sleep(delayMs);
delayMs = Math.min(HEALTH_WAIT_MAX_DELAY_MS, delayMs * 2);
}
if (this.disposed) {
return;
}
throw new Error(
`Timed out waiting for sandbox-agent health after ${this.healthWait.timeoutMs}ms (${formatHealthWaitError(lastError)})`,
);
}
private buildHeaders(extra?: HeadersInit): Headers {
const headers = new Headers(this.defaultHeaders ?? undefined);
@ -1039,8 +1127,13 @@ type RequestOptions = {
headers?: HeadersInit;
accept?: string;
signal?: AbortSignal;
skipReadyWait?: boolean;
};
type NormalizedHealthWaitOptions =
| { enabled: false; timeoutMs?: undefined }
| { enabled: true; timeoutMs?: number };
/**
* Auto-select and call `authenticate` based on the agent's advertised auth methods.
* Prefers env-var-based methods that the server process already has configured.
@ -1201,6 +1294,28 @@ function normalizePositiveInt(value: number | undefined, fallback: number): numb
return Math.floor(value as number);
}
function normalizeHealthWaitOptions(
value: boolean | SandboxAgentHealthWaitOptions | undefined,
): NormalizedHealthWaitOptions {
if (!value) {
return { enabled: false };
}
if (value === true) {
return { enabled: true };
}
const timeoutMs =
typeof value.timeoutMs === "number" && Number.isFinite(value.timeoutMs) && value.timeoutMs > 0
? Math.floor(value.timeoutMs)
: undefined;
return {
enabled: true,
timeoutMs,
};
}
function normalizeSpawnOptions(
spawn: SandboxAgentSpawnOptions | boolean | undefined,
defaultEnabled: boolean,
@ -1230,3 +1345,19 @@ async function readProblem(response: Response): Promise<ProblemDetails | undefin
return undefined;
}
}
function formatHealthWaitError(error: unknown): string {
if (error instanceof Error && error.message) {
return error.message;
}
if (error === undefined || error === null) {
return "unknown error";
}
return String(error);
}
function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}

View file

@ -10,6 +10,7 @@ export { AcpRpcError } from "acp-http-client";
export { buildInspectorUrl } from "./inspector.ts";
export type {
SandboxAgentHealthWaitOptions,
SandboxAgentConnectOptions,
SandboxAgentStartOptions,
SessionCreateRequest,

View file

@ -176,6 +176,74 @@ describe("Integration: TypeScript SDK flat session API", () => {
);
});
it("waits for health before non-ACP HTTP helpers", async () => {
const defaultFetch = globalThis.fetch;
if (!defaultFetch) {
throw new Error("Global fetch is not available in this runtime.");
}
let healthAttempts = 0;
const seenPaths: string[] = [];
const customFetch: typeof fetch = async (input, init) => {
const outgoing = new Request(input, init);
const parsed = new URL(outgoing.url);
seenPaths.push(parsed.pathname);
if (parsed.pathname === "/v1/health") {
healthAttempts += 1;
if (healthAttempts < 3) {
return new Response("warming up", { status: 503 });
}
}
const forwardedUrl = new URL(`${parsed.pathname}${parsed.search}`, baseUrl);
const forwarded = new Request(forwardedUrl.toString(), outgoing);
return defaultFetch(forwarded);
};
const sdk = await SandboxAgent.connect({
token,
fetch: customFetch,
waitForHealth: true,
});
const agents = await sdk.listAgents();
expect(Array.isArray(agents.agents)).toBe(true);
expect(healthAttempts).toBe(3);
const firstAgentsRequest = seenPaths.indexOf("/v1/agents");
expect(firstAgentsRequest).toBeGreaterThanOrEqual(0);
expect(seenPaths.slice(0, firstAgentsRequest)).toEqual([
"/v1/health",
"/v1/health",
"/v1/health",
]);
await sdk.dispose();
});
it("surfaces health timeout when a request awaits readiness", async () => {
const customFetch: typeof fetch = async (input, init) => {
const outgoing = new Request(input, init);
const parsed = new URL(outgoing.url);
if (parsed.pathname === "/v1/health") {
return new Response("warming up", { status: 503 });
}
throw new Error(`Unexpected request path during timeout test: ${parsed.pathname}`);
};
const sdk = await SandboxAgent.connect({
token,
fetch: customFetch,
waitForHealth: { timeoutMs: 100 },
});
await expect(sdk.listAgents()).rejects.toThrow("Timed out waiting for sandbox-agent health");
await sdk.dispose();
});
it("restores a session on stale connection by recreating and replaying history on first prompt", async () => {
const persist = new InMemorySessionPersistDriver({
maxEventsPerSession: 200,