diff --git a/.claude/commands/post-release-testing.md b/.claude/commands/post-release-testing.md index 09e2b6a..10cf6ff 100644 --- a/.claude/commands/post-release-testing.md +++ b/.claude/commands/post-release-testing.md @@ -43,7 +43,7 @@ Manually verify the install script works in a fresh environment: ```bash docker run --rm alpine:latest sh -c " apk add --no-cache curl ca-certificates libstdc++ libgcc bash && - curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh && + curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh && sandbox-agent --version " ``` diff --git a/CLAUDE.md b/CLAUDE.md index cff74eb..248f075 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -20,7 +20,7 @@ - For HTTP/CLI docs/examples, source of truth is: - `server/packages/sandbox-agent/src/router.rs` - `server/packages/sandbox-agent/src/cli.rs` -- Keep docs aligned to implemented endpoints/commands only (for example ACP under `/v1/acp`, not legacy `/v1/sessions` APIs). +- Keep docs aligned to implemented endpoints/commands only (for example ACP under `/v1/acp`, not legacy session REST APIs). ## Change Tracking @@ -78,4 +78,3 @@ - `scripts/release/main.ts` - `scripts/release/promote-artifacts.ts` - `scripts/release/sdk.ts` - - `scripts/sandbox-testing/test-sandbox.ts` diff --git a/Cargo.toml b/Cargo.toml index 699502e..0fc4dc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ members = ["server/packages/*", "gigacode"] exclude = ["factory/packages/desktop/src-tauri", "foundry/packages/desktop/src-tauri"] [workspace.package] -version = "0.5.0-rc.1" +version = "0.4.2" edition = "2021" authors = [ "Rivet Gaming, LLC " ] license = "Apache-2.0" @@ -13,13 +13,13 @@ description = "Universal API for automatic coding agents in sandboxes. Supports [workspace.dependencies] # Internal crates -sandbox-agent = { version = "0.5.0-rc.1", path = "server/packages/sandbox-agent" } -sandbox-agent-error = { version = "0.5.0-rc.1", path = "server/packages/error" } -sandbox-agent-agent-management = { version = "0.5.0-rc.1", path = "server/packages/agent-management" } -sandbox-agent-agent-credentials = { version = "0.5.0-rc.1", path = "server/packages/agent-credentials" } -sandbox-agent-opencode-adapter = { version = "0.5.0-rc.1", path = "server/packages/opencode-adapter" } -sandbox-agent-opencode-server-manager = { version = "0.5.0-rc.1", path = "server/packages/opencode-server-manager" } -acp-http-adapter = { version = "0.5.0-rc.1", path = "server/packages/acp-http-adapter" } +sandbox-agent = { version = "0.4.2", path = "server/packages/sandbox-agent" } +sandbox-agent-error = { version = "0.4.2", path = "server/packages/error" } +sandbox-agent-agent-management = { version = "0.4.2", path = "server/packages/agent-management" } +sandbox-agent-agent-credentials = { version = "0.4.2", path = "server/packages/agent-credentials" } +sandbox-agent-opencode-adapter = { version = "0.4.2", path = "server/packages/opencode-adapter" } +sandbox-agent-opencode-server-manager = { version = "0.4.2", path = "server/packages/opencode-server-manager" } +acp-http-adapter = { version = "0.4.2", path = "server/packages/acp-http-adapter" } # Serialization serde = { version = "1.0", features = ["derive"] } diff --git a/README.md b/README.md index eb427d7..cf9b933 100644 --- a/README.md +++ b/README.md @@ -80,11 +80,11 @@ Import the SDK directly into your Node or browser application. Full type safety **Install** ```bash -npm install sandbox-agent@0.3.x +npm install sandbox-agent@0.4.x ``` ```bash -bun add sandbox-agent@0.3.x +bun add sandbox-agent@0.4.x # Optional: allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 ``` @@ -135,7 +135,7 @@ Run as an HTTP server and connect from any language. Deploy to E2B, Daytona, Ver ```bash # Install it -curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh +curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh # Run it sandbox-agent server --token "$SANDBOX_TOKEN" --host 127.0.0.1 --port 2468 ``` @@ -159,12 +159,12 @@ sandbox-agent server --no-token --host 127.0.0.1 --port 2468 Install the CLI wrapper (optional but convenient): ```bash -npm install -g @sandbox-agent/cli@0.3.x +npm install -g @sandbox-agent/cli@0.4.x ``` ```bash # Allow Bun to run postinstall scripts for native binaries. -bun add -g @sandbox-agent/cli@0.3.x +bun add -g @sandbox-agent/cli@0.4.x bun pm -g trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 ``` @@ -179,11 +179,11 @@ sandbox-agent api sessions send-message-stream my-session --message "Hello" --en You can also use npx like: ```bash -npx @sandbox-agent/cli@0.3.x --help +npx @sandbox-agent/cli@0.4.x --help ``` ```bash -bunx @sandbox-agent/cli@0.3.x --help +bunx @sandbox-agent/cli@0.4.x --help ``` [CLI documentation](https://sandboxagent.dev/docs/cli) diff --git a/docs/agent-sessions.mdx b/docs/agent-sessions.mdx index 0f9e2ab..0154537 100644 --- a/docs/agent-sessions.mdx +++ b/docs/agent-sessions.mdx @@ -51,6 +51,108 @@ await session.prompt([ unsubscribe(); ``` +### Event types + +Each event's `payload` contains a session update. The `sessionUpdate` field identifies the type. + + + +Streamed text or content from the agent's response. + +```json +{ + "sessionUpdate": "agent_message_chunk", + "content": { "type": "text", "text": "Here's how the repository is structured..." } +} +``` + + + +Internal reasoning from the agent (chain-of-thought / extended thinking). + +```json +{ + "sessionUpdate": "agent_thought_chunk", + "content": { "type": "text", "text": "I should start by looking at the project structure..." } +} +``` + + + +Echo of the user's prompt being processed. + +```json +{ + "sessionUpdate": "user_message_chunk", + "content": { "type": "text", "text": "Summarize the repository structure." } +} +``` + + + +The agent invoked a tool (file edit, terminal command, etc.). + +```json +{ + "sessionUpdate": "tool_call", + "toolCallId": "tc_abc123", + "title": "Read file", + "status": "in_progress", + "rawInput": { "path": "/src/index.ts" } +} +``` + + + +Progress or result update for an in-progress tool call. + +```json +{ + "sessionUpdate": "tool_call_update", + "toolCallId": "tc_abc123", + "status": "completed", + "content": [{ "type": "text", "text": "import express from 'express';\n..." }] +} +``` + + + +The agent's execution plan for the current task. + +```json +{ + "sessionUpdate": "plan", + "entries": [ + { "content": "Read the project structure", "status": "completed" }, + { "content": "Identify main entrypoints", "status": "in_progress" }, + { "content": "Write summary", "status": "pending" } + ] +} +``` + + + +Token usage metrics for the current turn. + +```json +{ + "sessionUpdate": "usage_update" +} +``` + + + +Session metadata changed (e.g. agent-generated title). + +```json +{ + "sessionUpdate": "session_info_update", + "title": "Repository structure analysis" +} +``` + + + ## Fetch persisted event history ```ts diff --git a/docs/architecture.mdx b/docs/architecture.mdx index a42bf62..61b4689 100644 --- a/docs/architecture.mdx +++ b/docs/architecture.mdx @@ -56,7 +56,7 @@ Agents are installed lazily on first use. To avoid the cold-start delay, pre-ins sandbox-agent install-agent --all ``` -The `rivetdev/sandbox-agent:0.5.0-rc.1-full` Docker image ships with all agents pre-installed. +The `rivetdev/sandbox-agent:0.4.2-full` Docker image ships with all agents pre-installed. ## Production-ready agent orchestration diff --git a/docs/deploy/boxlite.mdx b/docs/deploy/boxlite.mdx index 115d8b8..8c02bb4 100644 --- a/docs/deploy/boxlite.mdx +++ b/docs/deploy/boxlite.mdx @@ -20,7 +20,7 @@ that BoxLite can load directly (BoxLite has its own image store separate from Do ```dockerfile FROM node:22-bookworm-slim RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/* -RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh +RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh RUN sandbox-agent install-agent claude RUN sandbox-agent install-agent codex ``` diff --git a/docs/deploy/cloudflare.mdx b/docs/deploy/cloudflare.mdx index 1cecdd7..c0370e4 100644 --- a/docs/deploy/cloudflare.mdx +++ b/docs/deploy/cloudflare.mdx @@ -25,7 +25,7 @@ cd my-sandbox ```dockerfile FROM cloudflare/sandbox:0.7.0 -RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh +RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh RUN sandbox-agent install-agent claude && sandbox-agent install-agent codex EXPOSE 8000 @@ -36,7 +36,7 @@ EXPOSE 8000 For standalone scripts, use the `cloudflare` provider: ```bash -npm install sandbox-agent@0.3.x @cloudflare/sandbox +npm install sandbox-agent@0.4.x @cloudflare/sandbox ``` ```typescript diff --git a/docs/deploy/computesdk.mdx b/docs/deploy/computesdk.mdx index 1adfffe..601d9c7 100644 --- a/docs/deploy/computesdk.mdx +++ b/docs/deploy/computesdk.mdx @@ -14,7 +14,7 @@ description: "Deploy Sandbox Agent using ComputeSDK's provider-agnostic sandbox ## TypeScript example ```bash -npm install sandbox-agent@0.3.x computesdk +npm install sandbox-agent@0.4.x computesdk ``` ```typescript @@ -27,7 +27,11 @@ if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY const sdk = await SandboxAgent.start({ sandbox: computesdk({ - create: { envs }, + create: { + envs, + image: process.env.COMPUTESDK_IMAGE, + templateId: process.env.COMPUTESDK_TEMPLATE_ID, + }, }), }); @@ -43,6 +47,7 @@ try { ``` The `computesdk` provider handles sandbox creation, Sandbox Agent installation, agent setup, and server startup automatically. ComputeSDK routes to your configured provider behind the scenes. +The `create` option now forwards the full ComputeSDK sandbox-create payload, including provider-specific fields such as `image` and `templateId` when the selected provider supports them. Before calling `SandboxAgent.start()`, configure ComputeSDK with your provider: diff --git a/docs/deploy/daytona.mdx b/docs/deploy/daytona.mdx index 059aa11..e546bef 100644 --- a/docs/deploy/daytona.mdx +++ b/docs/deploy/daytona.mdx @@ -16,7 +16,7 @@ See [Daytona network limits](https://www.daytona.io/docs/en/network-limits/). ## TypeScript example ```bash -npm install sandbox-agent@0.3.x @daytonaio/sdk +npm install sandbox-agent@0.4.x @daytonaio/sdk ``` ```typescript @@ -44,7 +44,7 @@ try { } ``` -The `daytona` provider uses the `rivetdev/sandbox-agent:0.5.0-rc.1-full` image by default and starts the server automatically. +The `daytona` provider uses the `rivetdev/sandbox-agent:0.4.2-full` image by default and starts the server automatically. ## Using snapshots for faster startup @@ -61,7 +61,7 @@ if (!hasSnapshot) { name: SNAPSHOT, image: Image.base("ubuntu:22.04").runCommands( "apt-get update && apt-get install -y curl ca-certificates", - "curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh", + "curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh", "sandbox-agent install-agent claude", "sandbox-agent install-agent codex", ), diff --git a/docs/deploy/docker.mdx b/docs/deploy/docker.mdx index 74879c7..c5a3432 100644 --- a/docs/deploy/docker.mdx +++ b/docs/deploy/docker.mdx @@ -15,11 +15,11 @@ Run the published full image with all supported agents pre-installed: docker run --rm -p 3000:3000 \ -e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \ -e OPENAI_API_KEY="$OPENAI_API_KEY" \ - rivetdev/sandbox-agent:0.5.0-rc.1-full \ + rivetdev/sandbox-agent:0.4.2-full \ server --no-token --host 0.0.0.0 --port 3000 ``` -The `0.5.0-rc.1-full` tag pins the exact version. The moving `full` tag is also published for contributors who want the latest full image. +The `0.4.2-full` tag pins the exact version. The moving `full` tag is also published for contributors who want the latest full image. If you also want the desktop API inside the container, install desktop dependencies before starting the server: @@ -31,7 +31,7 @@ docker run --rm -p 3000:3000 \ apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y curl ca-certificates bash libstdc++6 && \ rm -rf /var/lib/apt/lists/* && \ - curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh && \ + curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh && \ sandbox-agent install desktop --yes && \ sandbox-agent server --no-token --host 0.0.0.0 --port 3000" ``` @@ -52,7 +52,7 @@ const docker = new Docker(); const PORT = 3000; const container = await docker.createContainer({ - Image: "rivetdev/sandbox-agent:0.5.0-rc.1-full", + Image: "rivetdev/sandbox-agent:0.4.2-full", Cmd: ["server", "--no-token", "--host", "0.0.0.0", "--port", `${PORT}`], Env: [ `ANTHROPIC_API_KEY=${process.env.ANTHROPIC_API_KEY}`, @@ -86,7 +86,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ bash ca-certificates curl git && \ rm -rf /var/lib/apt/lists/* -RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh && \ +RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh && \ sandbox-agent install-agent --all RUN useradd -m -s /bin/bash sandbox diff --git a/docs/deploy/e2b.mdx b/docs/deploy/e2b.mdx index e6465f2..225cfdc 100644 --- a/docs/deploy/e2b.mdx +++ b/docs/deploy/e2b.mdx @@ -11,7 +11,7 @@ description: "Deploy Sandbox Agent inside an E2B sandbox." ## TypeScript example ```bash -npm install sandbox-agent@0.3.x @e2b/code-interpreter +npm install sandbox-agent@0.4.x @e2b/code-interpreter ``` ```typescript @@ -21,9 +21,11 @@ import { e2b } from "sandbox-agent/e2b"; const envs: Record = {}; if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY; +const template = process.env.E2B_TEMPLATE; const sdk = await SandboxAgent.start({ sandbox: e2b({ + template, create: { envs }, }), }); @@ -41,7 +43,10 @@ try { The `e2b` provider handles sandbox creation, Sandbox Agent installation, agent setup, and server startup automatically. Sandboxes pause by default instead of being deleted, and reconnecting with the same `sandboxId` resumes them automatically. +Pass `template` when you want to start from a custom E2B template alias or template ID. E2B base-image selection happens when you build the template, then `sandbox-agent/e2b` uses that template at sandbox creation time. + ## Faster cold starts For faster startup, create a custom E2B template with Sandbox Agent and target agents pre-installed. -See [E2B Custom Templates](https://e2b.dev/docs/sandbox-template). +Build System 2.0 also lets you choose the template's base image in code. +See [E2B Custom Templates](https://e2b.dev/docs/sandbox-template) and [E2B Base Images](https://e2b.dev/docs/template/base-image). diff --git a/docs/deploy/foundry-self-hosting.mdx b/docs/deploy/foundry-self-hosting.mdx deleted file mode 100644 index 8fd43ae..0000000 --- a/docs/deploy/foundry-self-hosting.mdx +++ /dev/null @@ -1,155 +0,0 @@ ---- -title: "Foundry Self-Hosting" -description: "Environment, credentials, and deployment setup for Sandbox Agent Foundry auth, GitHub, and billing." ---- - -This guide documents the deployment contract for the Foundry product surface: app auth, GitHub onboarding, repository import, and billing. - -It also covers the local-development bootstrap that uses `.env.development` only when `NODE_ENV=development`. - -## Local Development - -For backend local development, the Foundry backend now supports a development-only dotenv bootstrap: - -- It loads `.env.development.local` and `.env.development` -- It does this **only** when `NODE_ENV=development` -- It does **not** load dotenv files in production - -The example file lives at [`/.env.development.example`](https://github.com/rivet-dev/sandbox-agent/blob/main/.env.development.example). - -To use it locally: - -```bash -cp .env.development.example .env.development -``` - -Run the backend with: - -```bash -just foundry-backend-start -``` - -That recipe sets `NODE_ENV=development`, which enables the dotenv loader. - -### Local Defaults - -These values can be safely defaulted for local development: - -- `APP_URL=http://localhost:4173` -- `BETTER_AUTH_URL=http://localhost:7741` -- `BETTER_AUTH_SECRET=sandbox-agent-foundry-development-only-change-me` -- `GITHUB_REDIRECT_URI=http://localhost:7741/v1/auth/callback/github` - -These should be treated as development-only values. - -## Production Environment - -For production or self-hosting, set these as real environment variables in your deployment platform. Do not rely on dotenv file loading. - -### App/Auth - -| Variable | Required | Notes | -|---|---:|---| -| `APP_URL` | Yes | Public frontend origin | -| `BETTER_AUTH_URL` | Yes | Public auth base URL | -| `BETTER_AUTH_SECRET` | Yes | Strong random secret for auth/session signing | - -### GitHub OAuth - -| Variable | Required | Notes | -|---|---:|---| -| `GITHUB_CLIENT_ID` | Yes | GitHub OAuth app client id | -| `GITHUB_CLIENT_SECRET` | Yes | GitHub OAuth app client secret | -| `GITHUB_REDIRECT_URI` | Yes | GitHub OAuth callback URL | - -Use GitHub OAuth for: - -- user sign-in -- user identity -- org selection -- access to the signed-in user’s GitHub context - -## GitHub App - -If your Foundry deployment uses GitHub App-backed organization install and repo import, also configure: - -| Variable | Required | Notes | -|---|---:|---| -| `GITHUB_APP_ID` | Yes | GitHub App id | -| `GITHUB_APP_CLIENT_ID` | Yes | GitHub App client id | -| `GITHUB_APP_CLIENT_SECRET` | Yes | GitHub App client secret | -| `GITHUB_APP_PRIVATE_KEY` | Yes | PEM private key for installation auth | - -For `.env.development` and `.env.development.local`, store `GITHUB_APP_PRIVATE_KEY` as a quoted single-line value with `\n` escapes instead of raw multi-line PEM text. - -Recommended GitHub App permissions: - -- Repository `Metadata: Read` -- Repository `Contents: Read & Write` -- Repository `Pull requests: Read & Write` -- Repository `Checks: Read` -- Repository `Commit statuses: Read` - -Set the webhook URL to `https:///v1/webhooks/github` and generate a webhook secret. Store the secret as `GITHUB_WEBHOOK_SECRET`. - -This is required, not optional. Foundry depends on GitHub App webhook delivery for installation lifecycle changes, repo access changes, and ongoing repo / pull request sync. If the GitHub App is not installed for the workspace, or webhook delivery is misconfigured, Foundry will remain in an install / reconnect state and core GitHub-backed functionality will not work correctly. - -Recommended webhook subscriptions: - -- `installation` -- `installation_repositories` -- `pull_request` -- `pull_request_review` -- `pull_request_review_comment` -- `push` -- `create` -- `delete` -- `check_suite` -- `check_run` -- `status` - -Use the GitHub App for: - -- installation/reconnect state -- org repo import -- repository sync -- PR creation and updates - -Use GitHub OAuth for: - -- who the user is -- which orgs they can choose - -## Stripe - -For live billing, configure: - -| Variable | Required | Notes | -|---|---:|---| -| `STRIPE_SECRET_KEY` | Yes | Server-side Stripe secret key | -| `STRIPE_PUBLISHABLE_KEY` | Yes | Client-side Stripe publishable key | -| `STRIPE_WEBHOOK_SECRET` | Yes | Signing secret for billing webhooks | -| `STRIPE_PRICE_TEAM` | Yes | Stripe price id for the Team plan checkout session | - -Stripe should own: - -- hosted checkout -- billing portal -- subscription status -- invoice history -- webhook-driven state sync - -## Mock Invariant - -Foundry’s mock client path should continue to work end to end even when the real auth/GitHub/Stripe path exists. - -That includes: - -- sign-in -- org selection/import -- settings -- billing UI -- workspace/task/session flow -- seat accrual - -Use mock mode for deterministic UI review and local product development. Use the real env-backed path for integration and self-hosting. diff --git a/docs/deploy/local.mdx b/docs/deploy/local.mdx index 90e2ba6..6ecdb09 100644 --- a/docs/deploy/local.mdx +++ b/docs/deploy/local.mdx @@ -9,7 +9,7 @@ For local development, run Sandbox Agent directly on your machine. ```bash # Install -curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh +curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh # Run sandbox-agent server --no-token --host 127.0.0.1 --port 2468 @@ -20,12 +20,12 @@ Or with npm/Bun: ```bash - npx @sandbox-agent/cli@0.3.x server --no-token --host 127.0.0.1 --port 2468 + npx @sandbox-agent/cli@0.4.x server --no-token --host 127.0.0.1 --port 2468 ``` ```bash - bunx @sandbox-agent/cli@0.3.x server --no-token --host 127.0.0.1 --port 2468 + bunx @sandbox-agent/cli@0.4.x server --no-token --host 127.0.0.1 --port 2468 ``` diff --git a/docs/deploy/modal.mdx b/docs/deploy/modal.mdx index 02a3828..5850fd8 100644 --- a/docs/deploy/modal.mdx +++ b/docs/deploy/modal.mdx @@ -11,7 +11,7 @@ description: "Deploy Sandbox Agent inside a Modal sandbox." ## TypeScript example ```bash -npm install sandbox-agent@0.3.x modal +npm install sandbox-agent@0.4.x modal ``` ```typescript @@ -21,9 +21,11 @@ import { modal } from "sandbox-agent/modal"; const secrets: Record = {}; if (process.env.ANTHROPIC_API_KEY) secrets.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.OPENAI_API_KEY) secrets.OPENAI_API_KEY = process.env.OPENAI_API_KEY; +const baseImage = process.env.MODAL_BASE_IMAGE ?? "node:22-slim"; const sdk = await SandboxAgent.start({ sandbox: modal({ + image: baseImage, create: { secrets }, }), }); @@ -40,6 +42,7 @@ try { ``` The `modal` provider handles app creation, image building, sandbox provisioning, agent installation, server startup, and tunnel networking automatically. +Set `image` to change the base Docker image before Sandbox Agent and its agent binaries are layered on top. You can also pass a prebuilt Modal `Image` object. ## Faster cold starts diff --git a/docs/deploy/vercel.mdx b/docs/deploy/vercel.mdx index db97236..ec931d8 100644 --- a/docs/deploy/vercel.mdx +++ b/docs/deploy/vercel.mdx @@ -11,7 +11,7 @@ description: "Deploy Sandbox Agent inside a Vercel Sandbox." ## TypeScript example ```bash -npm install sandbox-agent@0.3.x @vercel/sandbox +npm install sandbox-agent@0.4.x @vercel/sandbox ``` ```typescript diff --git a/docs/docs.json b/docs/docs.json index 0c2b19a..dbcc407 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1,6 +1,6 @@ { "$schema": "https://mintlify.com/docs.json", - "theme": "willow", + "theme": "mint", "name": "Sandbox Agent SDK", "appearance": { "default": "dark", @@ -8,8 +8,8 @@ }, "colors": { "primary": "#ff4f00", - "light": "#ff4f00", - "dark": "#ff4f00" + "light": "#ff6a2a", + "dark": "#cc3f00" }, "favicon": "/favicon.svg", "logo": { @@ -25,17 +25,13 @@ }, "navbar": { "links": [ - { - "label": "Gigacode", - "icon": "terminal", - "href": "https://github.com/rivet-dev/sandbox-agent/tree/main/gigacode" - }, { "label": "Discord", "icon": "discord", "href": "https://discord.gg/auCecybynK" }, { + "label": "GitHub", "type": "github", "href": "https://github.com/rivet-dev/sandbox-agent" } @@ -89,13 +85,10 @@ "group": "System", "pages": ["file-system", "processes", "computer-use", "common-software"] }, - { - "group": "Orchestration", - "pages": ["orchestration-architecture", "session-persistence", "observability", "multiplayer", "security"] - }, { "group": "Reference", "pages": [ + "troubleshooting", "architecture", "cli", "inspector", @@ -127,5 +120,11 @@ ] } ] - } + }, + "__removed": [ + { + "group": "Orchestration", + "pages": ["orchestration-architecture", "session-persistence", "observability", "multiplayer", "security"] + } + ] } diff --git a/docs/gigacode.mdx b/docs/gigacode.mdx deleted file mode 100644 index ccc9e39..0000000 --- a/docs/gigacode.mdx +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: Gigacode -url: "https://github.com/rivet-dev/sandbox-agent/tree/main/gigacode" ---- - - diff --git a/docs/openapi.json b/docs/openapi.json index 11ff956..3624707 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -10,7 +10,7 @@ "license": { "name": "Apache-2.0" }, - "version": "0.5.0-rc.1" + "version": "0.4.2" }, "servers": [ { diff --git a/docs/pi-support-plan.md b/docs/pi-support-plan.md deleted file mode 100644 index 5e207a5..0000000 --- a/docs/pi-support-plan.md +++ /dev/null @@ -1,210 +0,0 @@ -# Pi Agent Support Plan (pi-mono) - -## Implementation Status Update - -- Runtime selection now supports two internal modes: - - `PerSession` (default for unknown/non-allowlisted Pi capabilities) - - `Shared` (allowlist-only compatibility path) -- Pi sessions now use per-session process isolation by default, enabling true concurrent Pi sessions in Inspector and API clients. -- Shared Pi server code remains available and is used only when capability checks allow multiplexing. -- Session termination for per-session Pi mode hard-kills the underlying Pi process and clears queued prompts/pending waiters. -- In-session concurrent sends are serialized with an unbounded daemon-side FIFO queue per session. - -## Investigation Summary - -### Pi CLI modes and RPC protocol -- Pi supports multiple modes including interactive, print/JSON output, RPC, and SDK usage. JSON mode outputs a stream of JSON events suitable for parsing, and RPC mode is intended for programmatic control over stdin/stdout. -- RPC mode is started with `pi --mode rpc` and supports options like `--provider`, `--model`, `--no-session`, and `--session-dir`. -- The RPC protocol is newline-delimited JSON over stdin/stdout: - - Commands are JSON objects written to stdin. - - Responses are JSON objects with `type: "response"` and optional `id`. - - Events are JSON objects without `id`. -- `prompt` can include images using `ImageContent` (base64 or URL) alongside text. -- JSON/print mode (`pi -p` or `pi --print --mode json`) produces JSONL for non-interactive parsing and can resume sessions with a token. - -### RPC commands -RPC commands listed in `rpc.md` include: -- `new_session`, `get_state`, `list_sessions`, `delete_session`, `rename_session`, `clear_session` -- `prompt`, `queue_message`, `abort`, `get_queued_messages` - -### RPC event types -RPC events listed in `rpc.md` include: -- `agent_start`, `agent_end` -- `turn_start`, `turn_end` -- `message_start`, `message_update`, `message_end` -- `tool_execution_start`, `tool_execution_update`, `tool_execution_end` -- `auto_compaction`, `auto_retry`, `hook_error` - -`message_update` uses `assistantMessageEvent` deltas such as: -- `start`, `text_start`, `text_delta`, `text_end` -- `thinking_start`, `thinking_delta`, `thinking_end` -- `toolcall_start`, `toolcall_delta`, `toolcall_end` -- `toolcall_args_start`, `toolcall_args_delta`, `toolcall_args_end` -- `done`, `error` - -`tool_execution_update` includes `partialResult`, which is described as accumulated output so far. - -### Schema source locations (pi-mono) -RPC types are documented as living in: -- `packages/ai/src/types.ts` (Model types) -- `packages/agent/src/types.ts` (AgentResponse types) -- `packages/coding-agent/src/core/messages.ts` (message types) -- `packages/coding-agent/src/modes/rpc/rpc-types.ts` (RPC protocol types) - -### Distribution assets -Pi releases provide platform-specific binaries such as: -- `pi-darwin-arm64`, `pi-darwin-x64` -- `pi-linux-arm64`, `pi-linux-x64` -- `pi-win-x64.zip` - -## Integration Decisions -- Follow the OpenCode pattern: a shared long-running process (stdio RPC) with session multiplexing. -- Primary integration path is RPC streaming (`pi --mode rpc`). -- JSON/print mode is a fallback only (diagnostics or non-interactive runs). -- Create sessions via `new_session`; store the returned `sessionId` as `native_session_id`. -- Use `get_state` as a re-sync path after server restarts. -- Use `prompt` for send-message, with optional image content. -- Convert Pi events into universal events; emit daemon synthetic `session.started` on session creation and `session.ended` only on errors/termination. - -## Implementation Plan - -### 1) Agent Identity + Capabilities -Files: -- `server/packages/agent-management/src/agents.rs` -- `server/packages/sandbox-agent/src/router.rs` -- `docs/cli.mdx`, `docs/conversion.mdx`, `docs/session-transcript-schema.mdx` -- `README.md`, `frontend/packages/website/src/components/FAQ.tsx` - -Tasks: -- Add `AgentId::Pi` with string/binary name `"pi"` and parsing rules. -- Add Pi to `all_agents()` and agent lists. -- Define `AgentCapabilities` for Pi: - - `tool_calls=true`, `tool_results=true` - - `text_messages=true`, `streaming_deltas=true`, `item_started=true` - - `reasoning=true` (from `thinking_*` deltas) - - `images=true` (ImageContent in `prompt`) - - `permissions=false`, `questions=false`, `mcp_tools=false` - - `shared_process=true`, `session_lifecycle=false` (no native session events) - - `error_events=true` (hook_error) - - `command_execution=false`, `file_changes=false`, `file_attachments=false` - -### 2) Installer and Binary Resolution -Files: -- `server/packages/agent-management/src/agents.rs` - -Tasks: -- Add `install_pi()` that: - - Downloads the correct release asset per platform (`pi-`). - - Handles `.zip` on Windows and raw binaries elsewhere. - - Marks binary executable. -- Add Pi to `AgentManager::install`, `is_installed`, `version`. -- Version detection: try `--version`, `version`, `-V`. - -### 3) Schema Extraction for Pi -Files: -- `resources/agent-schemas/src/pi.ts` (new) -- `resources/agent-schemas/src/index.ts` -- `resources/agent-schemas/artifacts/json-schema/pi.json` -- `server/packages/extracted-agent-schemas/build.rs` -- `server/packages/extracted-agent-schemas/src/lib.rs` - -Tasks: -- Implement `extractPiSchema()`: - - Download pi-mono sources (zip/tarball) into a temp dir. - - Use `ts-json-schema-generator` against `packages/coding-agent/src/modes/rpc/rpc-types.ts`. - - Include dependent files per `rpc.md` (ai/types, agent/types, core/messages). - - Extract `RpcEvent`, `RpcResponse`, `RpcCommand` unions (exact type names from source). -- Add fallback schema if remote fetch fails (minimal union with event/response fields). -- Wire pi into extractor index and artifact generation. - -### 4) Universal Schema Conversion (Pi -> Universal) -Files: -- `server/packages/universal-agent-schema/src/agents/pi.rs` (new) -- `server/packages/universal-agent-schema/src/agents/mod.rs` -- `server/packages/universal-agent-schema/src/lib.rs` -- `server/packages/sandbox-agent/src/router.rs` - -Mapping rules: -- `message_start` -> `item.started` (kind=message, role=assistant, native_item_id=messageId) -- `message_update`: - - `text_*` -> `item.delta` (assistant text delta) - - `thinking_*` -> `item.delta` with `ContentPart::Reasoning` (visibility=Private) - - `toolcall_*` and `toolcall_args_*` -> ignore for now (tool_execution_* is authoritative) - - `error` -> `item.completed` with `ItemStatus::Failed` (if no later message_end) -- `message_end` -> `item.completed` (finalize assistant message) -- `tool_execution_start` -> `item.started` (kind=tool_call, ContentPart::ToolCall) -- `tool_execution_update` -> `item.delta` for a synthetic tool_result item: - - Maintain a per-toolCallId buffer to compute delta from accumulated `partialResult`. -- `tool_execution_end` -> `item.completed` (kind=tool_result, output from `result.content`) - - If `isError=true`, set item status to failed. -- `agent_start`, `turn_start`, `turn_end`, `agent_end`, `auto_compaction`, `auto_retry`, `hook_error`: - - Map to `ItemKind::Status` with a label like `pi.agent_start`, `pi.auto_retry`, etc. - - Do not emit `session.ended` for these events. -- If event parsing fails, emit `agent.unparsed` (source=daemon, synthetic=true) and fail tests. - -### 5) Shared RPC Server Integration -Files: -- `server/packages/sandbox-agent/src/router.rs` - -Tasks: -- Add a new managed stdio server type for Pi, similar to Codex: - - Create `PiServer` struct with: - - stdin sender - - pending request map keyed by request id - - per-session native session id mapping - - Extend `ManagedServerKind` to include Pi. - - Add `ensure_pi_server()` and `spawn_pi_server()` using `pi --mode rpc`. - - Add a `handle_pi_server_output()` loop to parse stdout lines into events/responses. -- Session creation: - - On `create_session`, ensure Pi server is running, send `new_session`, store sessionId. - - Register session with `server_manager.register_session` for native mapping. -- Sending messages: - - Use `prompt` command; include sessionId and optional images. - - Emit synthetic `item.started` only if Pi does not emit `message_start`. - -### 6) Router + Streaming Path Changes -Files: -- `server/packages/sandbox-agent/src/router.rs` - -Tasks: -- Add Pi handling to: - - `create_session` (new_session) - - `send_message` (prompt) - - `parse_agent_line` (Pi event conversion) - - `agent_modes` (default to `default` unless Pi exposes a mode list) - - `agent_supports_resume` (true if Pi supports session resume) - -### 7) Tests -Files: -- `server/packages/sandbox-agent/tests/...` -- `server/packages/universal-agent-schema/tests/...` (if present) - -Tasks: -- Unit tests for conversion: - - `message_start/update/end` -> item.started/delta/completed - - `tool_execution_*` -> tool call/result mapping with partialResult delta - - failure -> agent.unparsed -- Integration tests: - - Start Pi RPC server, create session, send prompt, stream events. - - Validate `native_session_id` mapping and event ordering. -- Update HTTP/SSE test coverage to include Pi agent if relevant. - -## Risk Areas / Edge Cases -- `tool_execution_update.partialResult` is cumulative; must compute deltas. -- `message_update` may emit `done`/`error` without `message_end`; handle both paths. -- No native session lifecycle events; rely on daemon synthetic events. -- Session recovery after RPC server restart requires `get_state` + re-register sessions. - -## Acceptance Criteria -- Pi appears in `/v1/agents`, CLI list, and docs. -- `create_session` returns `native_session_id` from Pi `new_session`. -- Streaming prompt yields universal events with proper ordering: - - message -> item.started/delta/completed - - tool execution -> tool call + tool result -- Tests pass and no synthetic data is used in test fixtures. - -## Sources -- https://upd.dev/badlogic/pi-mono/src/commit/d36e0ea07303d8a76d51b4a7bd5f0d6d3c490860/packages/coding-agent/docs/rpc.md -- https://buildwithpi.ai/pi-cli -- https://takopi.dev/docs/pi-cli/ -- https://upd.dev/badlogic/pi-mono/releases diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index 335e7df..223a54d 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -64,7 +64,7 @@ icon: "rocket" docker run -p 2468:2468 \ -e ANTHROPIC_API_KEY="sk-ant-..." \ -e OPENAI_API_KEY="sk-..." \ - rivetdev/sandbox-agent:0.5.0-rc.1-full \ + rivetdev/sandbox-agent:0.4.2-full \ server --no-token --host 0.0.0.0 --port 2468 ``` @@ -89,7 +89,7 @@ icon: "rocket" Install and run the binary directly. ```bash - curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh + curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh sandbox-agent server --no-token --host 0.0.0.0 --port 2468 ``` @@ -98,7 +98,7 @@ icon: "rocket" Run without installing globally. ```bash - npx @sandbox-agent/cli@0.3.x server --no-token --host 0.0.0.0 --port 2468 + npx @sandbox-agent/cli@0.4.x server --no-token --host 0.0.0.0 --port 2468 ``` @@ -106,7 +106,7 @@ icon: "rocket" Run without installing globally. ```bash - bunx @sandbox-agent/cli@0.3.x server --no-token --host 0.0.0.0 --port 2468 + bunx @sandbox-agent/cli@0.4.x server --no-token --host 0.0.0.0 --port 2468 ``` @@ -114,7 +114,7 @@ icon: "rocket" Install globally, then run. ```bash - npm install -g @sandbox-agent/cli@0.3.x + npm install -g @sandbox-agent/cli@0.4.x sandbox-agent server --no-token --host 0.0.0.0 --port 2468 ``` @@ -123,7 +123,7 @@ icon: "rocket" Install globally, then run. ```bash - bun add -g @sandbox-agent/cli@0.3.x + bun add -g @sandbox-agent/cli@0.4.x # Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). bun pm -g trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 sandbox-agent server --no-token --host 0.0.0.0 --port 2468 @@ -134,7 +134,7 @@ icon: "rocket" For local development, use `SandboxAgent.start()` to spawn and manage the server as a subprocess. ```bash - npm install sandbox-agent@0.3.x + npm install sandbox-agent@0.4.x ``` ```typescript @@ -148,7 +148,7 @@ icon: "rocket" For local development, use `SandboxAgent.start()` to spawn and manage the server as a subprocess. ```bash - bun add sandbox-agent@0.3.x + bun add sandbox-agent@0.4.x # Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 ``` diff --git a/docs/react-components.mdx b/docs/react-components.mdx index 93183b2..71a76d2 100644 --- a/docs/react-components.mdx +++ b/docs/react-components.mdx @@ -17,7 +17,7 @@ Current exports: ## Install ```bash -npm install @sandbox-agent/react@0.3.x +npm install @sandbox-agent/react@0.4.x ``` ## Full example diff --git a/docs/sdk-overview.mdx b/docs/sdk-overview.mdx index 8e87e2e..73e0d35 100644 --- a/docs/sdk-overview.mdx +++ b/docs/sdk-overview.mdx @@ -11,12 +11,12 @@ The TypeScript SDK is centered on `sandbox-agent` and its `SandboxAgent` class. ```bash - npm install sandbox-agent@0.3.x + npm install sandbox-agent@0.4.x ``` ```bash - bun add sandbox-agent@0.3.x + bun add sandbox-agent@0.4.x # Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 ``` @@ -26,7 +26,7 @@ The TypeScript SDK is centered on `sandbox-agent` and its `SandboxAgent` class. ## Optional React components ```bash -npm install @sandbox-agent/react@0.3.x +npm install @sandbox-agent/react@0.4.x ``` ## Create a client diff --git a/docs/session-transcript-schema.mdx b/docs/session-transcript-schema.mdx deleted file mode 100644 index c9c004a..0000000 --- a/docs/session-transcript-schema.mdx +++ /dev/null @@ -1,388 +0,0 @@ ---- -title: "Session Transcript Schema" -description: "Universal event schema for session transcripts across all agents." ---- - -Each coding agent outputs events in its own native format. The sandbox-agent converts these into a universal event schema, giving you a consistent session transcript regardless of which agent you use. - -The schema is defined in [OpenAPI format](https://github.com/rivet-dev/sandbox-agent/blob/main/docs/openapi.json). See the [HTTP API Reference](/api-reference) for endpoint documentation. - -## Coverage Matrix - -This table shows which agent feature coverage appears in the universal event stream. All agents retain their full native feature coverage—this only reflects what's normalized into the schema. - -| Feature | Claude | Codex | OpenCode | Amp | Pi (RPC) | -|--------------------|:------:|:-----:|:------------:|:------------:|:------------:| -| Stability | Stable | Stable| Experimental | Experimental | Experimental | -| Text Messages | ✓ | ✓ | ✓ | ✓ | ✓ | -| Tool Calls | ✓ | ✓ | ✓ | ✓ | ✓ | -| Tool Results | ✓ | ✓ | ✓ | ✓ | ✓ | -| Questions (HITL) | ✓ | | ✓ | | | -| Permissions (HITL) | ✓ | ✓ | ✓ | - | | -| Images | - | ✓ | ✓ | - | ✓ | -| File Attachments | - | ✓ | ✓ | - | | -| Session Lifecycle | - | ✓ | ✓ | - | | -| Error Events | - | ✓ | ✓ | ✓ | ✓ | -| Reasoning/Thinking | - | ✓ | - | - | ✓ | -| Command Execution | - | ✓ | - | - | | -| File Changes | - | ✓ | - | - | | -| MCP Tools | ✓ | ✓ | ✓ | ✓ | | -| Streaming Deltas | ✓ | ✓ | ✓ | - | ✓ | -| Variants | | ✓ | ✓ | ✓ | ✓ | - -Agents: [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) · [Codex](https://github.com/openai/codex) · [OpenCode](https://github.com/opencode-ai/opencode) · [Amp](https://ampcode.com) · [Pi](https://buildwithpi.ai/pi-cli) - -- ✓ = Appears in session events -- \- = Agent supports natively, schema conversion coming soon -- (blank) = Not supported by agent -- Pi runtime model is router-managed per-session RPC (`pi --mode rpc`); it does not use generic subprocess streaming. - - - - Basic message exchange between user and assistant. - - - Visibility into tool invocations (file reads, command execution, etc.) and their results. When not natively supported, tool activity is embedded in message content. - - - Interactive questions the agent asks the user. Emits `question.requested` and `question.resolved` events. - - - Permission requests for sensitive operations. Emits `permission.requested` and `permission.resolved` events. - - - Support for image attachments in messages. - - - Support for file attachments in messages. - - - Native `session.started` and `session.ended` events. When not supported, the daemon emits synthetic lifecycle events. - - - Structured error events for runtime failures. - - - Extended thinking or reasoning content with visibility controls. - - - Detailed command execution events with stdout/stderr. - - - Structured file modification events with diffs. - - - Model Context Protocol tool support. - - - Native streaming of content deltas. When not supported, the daemon emits a single synthetic delta before `item.completed`. - - - Model variants such as reasoning effort or depth. Agents may expose different variant sets per model. - - - -Want support for another agent? [Open an issue](https://github.com/rivet-dev/sandbox-agent/issues/new) to request it. - -## UniversalEvent - -Every event from the API is wrapped in a `UniversalEvent` envelope. - -| Field | Type | Description | -|-------|------|-------------| -| `event_id` | string | Unique identifier for this event | -| `sequence` | integer | Monotonic sequence number within the session (starts at 1) | -| `time` | string | RFC3339 timestamp | -| `session_id` | string | Daemon-generated session identifier | -| `native_session_id` | string? | Provider-native session/thread identifier (e.g., Codex `threadId`, OpenCode `sessionID`) | -| `source` | string | Event origin: `agent` (native) or `daemon` (synthetic) | -| `synthetic` | boolean | Whether this event was generated by the daemon to fill gaps | -| `type` | string | Event type (see [Event Types](#event-types)) | -| `data` | object | Event-specific payload | -| `raw` | any? | Original provider payload (only when `include_raw=true`) | - -```json -{ - "event_id": "evt_abc123", - "sequence": 1, - "time": "2025-01-28T12:00:00Z", - "session_id": "my-session", - "native_session_id": "thread_xyz", - "source": "agent", - "synthetic": false, - "type": "item.completed", - "data": { ... } -} -``` - -## Event Types - -### Session Lifecycle - -| Type | Description | Data | -|------|-------------|------| -| `session.started` | Session has started | `{ metadata?: any }` | -| `session.ended` | Session has ended | `{ reason, terminated_by, message?, exit_code? }` | - -### Turn Lifecycle - -| Type | Description | Data | -|------|-------------|------| -| `turn.started` | Turn has started | `{ phase: "started", turn_id?, metadata? }` | -| `turn.ended` | Turn has ended | `{ phase: "ended", turn_id?, metadata? }` | - -**SessionEndedData** - -| Field | Type | Values | -|-------|------|--------| -| `reason` | string | `completed`, `error`, `terminated` | -| `terminated_by` | string | `agent`, `daemon` | -| `message` | string? | Error message (only present when reason is `error`) | -| `exit_code` | int? | Process exit code (only present when reason is `error`) | -| `stderr` | StderrOutput? | Structured stderr output (only present when reason is `error`) | - -**StderrOutput** - -| Field | Type | Description | -|-------|------|-------------| -| `head` | string? | First 20 lines of stderr (if truncated) or full stderr (if not truncated) | -| `tail` | string? | Last 50 lines of stderr (only present if truncated) | -| `truncated` | boolean | Whether the output was truncated | -| `total_lines` | int? | Total number of lines in stderr | - -### Item Lifecycle - -| Type | Description | Data | -|------|-------------|------| -| `item.started` | Item creation | `{ item }` | -| `item.delta` | Streaming content delta | `{ item_id, native_item_id?, delta }` | -| `item.completed` | Item finalized | `{ item }` | - -Items follow a consistent lifecycle: `item.started` → `item.delta` (0 or more) → `item.completed`. - -### HITL (Human-in-the-Loop) - -| Type | Description | Data | -|------|-------------|------| -| `permission.requested` | Permission request pending | `{ permission_id, action, status, metadata? }` | -| `permission.resolved` | Permission decision recorded | `{ permission_id, action, status, metadata? }` | -| `question.requested` | Question pending user input | `{ question_id, prompt, options, status }` | -| `question.resolved` | Question answered or rejected | `{ question_id, prompt, options, status, response? }` | - -**PermissionEventData** - -| Field | Type | Description | -|-------|------|-------------| -| `permission_id` | string | Identifier for the permission request | -| `action` | string | What the agent wants to do | -| `status` | string | `requested`, `accept`, `accept_for_session`, `reject` | -| `metadata` | any? | Additional context | - -**QuestionEventData** - -| Field | Type | Description | -|-------|------|-------------| -| `question_id` | string | Identifier for the question | -| `prompt` | string | Question text | -| `options` | string[] | Available answer options | -| `status` | string | `requested`, `answered`, `rejected` | -| `response` | string? | Selected answer (when resolved) | - -### Errors - -| Type | Description | Data | -|------|-------------|------| -| `error` | Runtime error | `{ message, code?, details? }` | -| `agent.unparsed` | Parse failure | `{ error, location, raw_hash? }` | - -The `agent.unparsed` event indicates the daemon failed to parse an agent payload. This should be treated as a bug. - -## UniversalItem - -Items represent discrete units of content within a session. - -| Field | Type | Description | -|-------|------|-------------| -| `item_id` | string | Daemon-generated identifier | -| `native_item_id` | string? | Provider-native item/message identifier | -| `parent_id` | string? | Parent item ID (e.g., tool call/result parented to a message) | -| `kind` | string | Item category (see below) | -| `role` | string? | Actor role for message items | -| `status` | string | Lifecycle status | -| `content` | ContentPart[] | Ordered list of content parts | - -### ItemKind - -| Value | Description | -|-------|-------------| -| `message` | User or assistant message | -| `tool_call` | Tool invocation | -| `tool_result` | Tool execution result | -| `system` | System message | -| `status` | Status update | -| `unknown` | Unrecognized item type | - -### ItemRole - -| Value | Description | -|-------|-------------| -| `user` | User message | -| `assistant` | Assistant response | -| `system` | System prompt | -| `tool` | Tool-related message | - -### ItemStatus - -| Value | Description | -|-------|-------------| -| `in_progress` | Item is streaming or pending | -| `completed` | Item is finalized | -| `failed` | Item execution failed | - -## Content Parts - -The `content` array contains typed parts that make up an item's payload. - -### text - -Plain text content. - -```json -{ "type": "text", "text": "Hello, world!" } -``` - -### json - -Structured JSON content. - -```json -{ "type": "json", "json": { "key": "value" } } -``` - -### tool_call - -Tool invocation. - -| Field | Type | Description | -|-------|------|-------------| -| `name` | string | Tool name | -| `arguments` | string | JSON-encoded arguments | -| `call_id` | string | Unique call identifier | - -```json -{ - "type": "tool_call", - "name": "read_file", - "arguments": "{\"path\": \"/src/main.ts\"}", - "call_id": "call_abc123" -} -``` - -### tool_result - -Tool execution result. - -| Field | Type | Description | -|-------|------|-------------| -| `call_id` | string | Matching call identifier | -| `output` | string | Tool output | - -```json -{ - "type": "tool_result", - "call_id": "call_abc123", - "output": "File contents here..." -} -``` - -### file_ref - -File reference with optional diff. - -| Field | Type | Description | -|-------|------|-------------| -| `path` | string | File path | -| `action` | string | `read`, `write`, `patch` | -| `diff` | string? | Unified diff (for patches) | - -```json -{ - "type": "file_ref", - "path": "/src/main.ts", - "action": "write", - "diff": "@@ -1,3 +1,4 @@\n+import { foo } from 'bar';" -} -``` - -### image - -Image reference. - -| Field | Type | Description | -|-------|------|-------------| -| `path` | string | Image file path | -| `mime` | string? | MIME type | - -```json -{ "type": "image", "path": "/tmp/screenshot.png", "mime": "image/png" } -``` - -### reasoning - -Model reasoning/thinking content. - -| Field | Type | Description | -|-------|------|-------------| -| `text` | string | Reasoning text | -| `visibility` | string | `public` or `private` | - -```json -{ "type": "reasoning", "text": "Let me think about this...", "visibility": "public" } -``` - -### status - -Status indicator. - -| Field | Type | Description | -|-------|------|-------------| -| `label` | string | Status label | -| `detail` | string? | Additional detail | - -```json -{ "type": "status", "label": "Running tests", "detail": "3 of 10 passed" } -``` - -## Source & Synthetics - -### EventSource - -The `source` field indicates who emitted the event: - -| Value | Description | -|-------|-------------| -| `agent` | Native event from the agent | -| `daemon` | Synthetic event generated by the daemon | - -### Synthetic Events - -The daemon emits synthetic events (`synthetic: true`, `source: "daemon"`) to provide a consistent event stream across all agents. Common synthetics: - -| Synthetic | When | -|-----------|------| -| `session.started` | Agent doesn't emit explicit session start | -| `session.ended` | Agent doesn't emit explicit session end | -| `turn.started` | Agent doesn't emit explicit turn start | -| `turn.ended` | Agent doesn't emit explicit turn end | -| `item.started` | Agent doesn't emit item start events | -| `item.delta` | Agent doesn't stream deltas natively | -| `question.*` | Claude Code plan mode (from ExitPlanMode tool) | - -### Raw Payloads - -Pass `include_raw=true` to event endpoints to receive the original agent payload in the `raw` field. Useful for debugging or accessing agent-specific data not in the universal schema. - -```typescript -const events = await client.getEvents("my-session", { includeRaw: true }); -// events[0].raw contains the original agent payload -``` diff --git a/docs/theme.css b/docs/theme.css index daeb719..4286d2c 100644 --- a/docs/theme.css +++ b/docs/theme.css @@ -20,7 +20,6 @@ body { color: var(--sa-text); } -/* a { color: var(--sa-primary); } @@ -41,6 +40,13 @@ select { color: var(--sa-text); } +code, +pre { + background-color: var(--sa-card); + border: 1px solid var(--sa-border); + color: var(--sa-text); +} + .card, .mintlify-card, .docs-card { @@ -64,4 +70,3 @@ select { .alert-danger { border-color: var(--sa-danger); } -*/ diff --git a/docs/troubleshooting.mdx b/docs/troubleshooting.mdx index 838cc28..18186d6 100644 --- a/docs/troubleshooting.mdx +++ b/docs/troubleshooting.mdx @@ -29,25 +29,6 @@ Verify the agent is installed: ls -la ~/.local/share/sandbox-agent/bin/ ``` -### 4. Binary libc mismatch (musl vs glibc) - -Claude Code binaries are available in both musl and glibc variants. If you see errors like: - -``` -cannot execute: required file not found -Error loading shared library libstdc++.so.6: No such file or directory -``` - -This means the wrong binary variant was downloaded. - -**For sandbox-agent 0.2.0+**: Platform detection is automatic. The correct binary (musl or glibc) is downloaded based on the runtime environment. - -**For sandbox-agent 0.1.x**: Use Alpine Linux which has native musl support: - -```dockerfile -FROM alpine:latest -RUN apk add --no-cache curl ca-certificates libstdc++ libgcc bash -``` ## Daytona Network Restrictions diff --git a/examples/boxlite-python/Dockerfile b/examples/boxlite-python/Dockerfile index 3630511..8aba774 100644 --- a/examples/boxlite-python/Dockerfile +++ b/examples/boxlite-python/Dockerfile @@ -1,5 +1,5 @@ FROM node:22-bookworm-slim RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/* -RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh +RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh RUN sandbox-agent install-agent claude RUN sandbox-agent install-agent codex diff --git a/examples/boxlite/Dockerfile b/examples/boxlite/Dockerfile index 3630511..8aba774 100644 --- a/examples/boxlite/Dockerfile +++ b/examples/boxlite/Dockerfile @@ -1,5 +1,5 @@ FROM node:22-bookworm-slim RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/* -RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh +RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh RUN sandbox-agent install-agent claude RUN sandbox-agent install-agent codex diff --git a/examples/boxlite/tsconfig.json b/examples/boxlite/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/boxlite/tsconfig.json +++ b/examples/boxlite/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/cloudflare/Dockerfile b/examples/cloudflare/Dockerfile index d0796cb..738f8a2 100644 --- a/examples/cloudflare/Dockerfile +++ b/examples/cloudflare/Dockerfile @@ -1,7 +1,7 @@ FROM cloudflare/sandbox:0.7.0 # Install sandbox-agent -RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh +RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh # Pre-install agents RUN sandbox-agent install-agent claude && \ diff --git a/examples/computesdk/tsconfig.json b/examples/computesdk/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/computesdk/tsconfig.json +++ b/examples/computesdk/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/daytona/src/index.ts b/examples/daytona/src/index.ts index b881113..9c4cf85 100644 --- a/examples/daytona/src/index.ts +++ b/examples/daytona/src/index.ts @@ -16,7 +16,6 @@ console.log(`UI: ${client.inspectorUrl}`); const session = await client.createSession({ agent: detectAgent(), - cwd: "/home/daytona", }); session.onEvent((event) => { diff --git a/examples/daytona/tsconfig.json b/examples/daytona/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/daytona/tsconfig.json +++ b/examples/daytona/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/docker/tsconfig.json b/examples/docker/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/docker/tsconfig.json +++ b/examples/docker/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/e2b/src/e2b.ts b/examples/e2b/src/e2b.ts index bfd5bda..17762a2 100644 --- a/examples/e2b/src/e2b.ts +++ b/examples/e2b/src/e2b.ts @@ -17,8 +17,10 @@ export async function setupE2BSandboxAgent(): Promise<{ token?: string; cleanup: () => Promise; }> { + const template = process.env.E2B_TEMPLATE; const client = await SandboxAgent.start({ sandbox: e2b({ + template, create: { envs: collectEnvVars() }, }), }); diff --git a/examples/e2b/src/index.ts b/examples/e2b/src/index.ts index c20ebaa..67b74dc 100644 --- a/examples/e2b/src/index.ts +++ b/examples/e2b/src/index.ts @@ -5,15 +5,15 @@ import { detectAgent } from "@sandbox-agent/example-shared"; const envs: Record = {}; if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY; +const template = process.env.E2B_TEMPLATE; const client = await SandboxAgent.start({ // ✨ NEW ✨ - sandbox: e2b({ create: { envs } }), + sandbox: e2b({ template, create: { envs } }), }); const session = await client.createSession({ agent: detectAgent(), - cwd: "/home/user", }); session.onEvent((event) => { diff --git a/examples/e2b/tsconfig.json b/examples/e2b/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/e2b/tsconfig.json +++ b/examples/e2b/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/file-system/tsconfig.json b/examples/file-system/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/file-system/tsconfig.json +++ b/examples/file-system/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/mcp-custom-tool/tsconfig.json b/examples/mcp-custom-tool/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/mcp-custom-tool/tsconfig.json +++ b/examples/mcp-custom-tool/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/mcp/tsconfig.json b/examples/mcp/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/mcp/tsconfig.json +++ b/examples/mcp/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/modal/tsconfig.json b/examples/modal/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/modal/tsconfig.json +++ b/examples/modal/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/permissions/tsconfig.json b/examples/permissions/tsconfig.json index 9c9fe06..4eec283 100644 --- a/examples/permissions/tsconfig.json +++ b/examples/permissions/tsconfig.json @@ -1,7 +1,8 @@ { "compilerOptions": { "target": "ES2022", - "lib": ["ES2022"], + "lib": ["ES2022", "DOM"], + "types": ["node"], "module": "ESNext", "moduleResolution": "Bundler", "allowImportingTsExtensions": true, diff --git a/examples/persist-memory/tsconfig.json b/examples/persist-memory/tsconfig.json index d1c0065..ec2723c 100644 --- a/examples/persist-memory/tsconfig.json +++ b/examples/persist-memory/tsconfig.json @@ -1,13 +1,15 @@ { "compilerOptions": { "target": "ES2022", + "lib": ["ES2022", "DOM"], "module": "ESNext", "moduleResolution": "Bundler", "allowImportingTsExtensions": true, "noEmit": true, "esModuleInterop": true, "strict": true, - "skipLibCheck": true + "skipLibCheck": true, + "types": ["node"] }, "include": ["src"] } diff --git a/examples/persist-postgres/tsconfig.json b/examples/persist-postgres/tsconfig.json index d1c0065..ec2723c 100644 --- a/examples/persist-postgres/tsconfig.json +++ b/examples/persist-postgres/tsconfig.json @@ -1,13 +1,15 @@ { "compilerOptions": { "target": "ES2022", + "lib": ["ES2022", "DOM"], "module": "ESNext", "moduleResolution": "Bundler", "allowImportingTsExtensions": true, "noEmit": true, "esModuleInterop": true, "strict": true, - "skipLibCheck": true + "skipLibCheck": true, + "types": ["node"] }, "include": ["src"] } diff --git a/examples/persist-sqlite/tsconfig.json b/examples/persist-sqlite/tsconfig.json index d1c0065..ec2723c 100644 --- a/examples/persist-sqlite/tsconfig.json +++ b/examples/persist-sqlite/tsconfig.json @@ -1,13 +1,15 @@ { "compilerOptions": { "target": "ES2022", + "lib": ["ES2022", "DOM"], "module": "ESNext", "moduleResolution": "Bundler", "allowImportingTsExtensions": true, "noEmit": true, "esModuleInterop": true, "strict": true, - "skipLibCheck": true + "skipLibCheck": true, + "types": ["node"] }, "include": ["src"] } diff --git a/examples/shared/src/docker.ts b/examples/shared/src/docker.ts index bc631ba..f4161fb 100644 --- a/examples/shared/src/docker.ts +++ b/examples/shared/src/docker.ts @@ -9,7 +9,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url)); const REPO_ROOT = path.resolve(__dirname, "..", "..", ".."); /** Pre-built Docker image with all agents installed. */ -export const FULL_IMAGE = "rivetdev/sandbox-agent:0.5.0-rc.1-full"; +export const FULL_IMAGE = "rivetdev/sandbox-agent:0.4.2-full"; export interface DockerSandboxOptions { /** Container port used by sandbox-agent inside Docker. */ diff --git a/examples/skills-custom-tool/tsconfig.json b/examples/skills-custom-tool/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/skills-custom-tool/tsconfig.json +++ b/examples/skills-custom-tool/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/skills/tsconfig.json b/examples/skills/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/skills/tsconfig.json +++ b/examples/skills/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/examples/sprites/package.json b/examples/sprites/package.json new file mode 100644 index 0000000..df808e8 --- /dev/null +++ b/examples/sprites/package.json @@ -0,0 +1,20 @@ +{ + "name": "@sandbox-agent/example-sprites", + "private": true, + "type": "module", + "scripts": { + "start": "tsx src/index.ts", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@fly/sprites": "latest", + "@sandbox-agent/example-shared": "workspace:*", + "sandbox-agent": "workspace:*" + }, + "devDependencies": { + "@types/node": "latest", + "tsx": "latest", + "typescript": "latest", + "vitest": "^3.0.0" + } +} diff --git a/examples/sprites/src/index.ts b/examples/sprites/src/index.ts new file mode 100644 index 0000000..bf95e5d --- /dev/null +++ b/examples/sprites/src/index.ts @@ -0,0 +1,21 @@ +import { SandboxAgent } from "sandbox-agent"; +import { sprites } from "sandbox-agent/sprites"; + +const env: Record = {}; +if (process.env.ANTHROPIC_API_KEY) env.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; +if (process.env.OPENAI_API_KEY) env.OPENAI_API_KEY = process.env.OPENAI_API_KEY; + +const client = await SandboxAgent.start({ + sandbox: sprites({ + token: process.env.SPRITES_API_KEY ?? process.env.SPRITE_TOKEN ?? process.env.SPRITES_TOKEN, + env, + }), +}); + +console.log(`UI: ${client.inspectorUrl}`); +console.log(await client.getHealth()); + +process.once("SIGINT", async () => { + await client.destroySandbox(); + process.exit(0); +}); diff --git a/examples/sprites/tests/sprites.test.ts b/examples/sprites/tests/sprites.test.ts new file mode 100644 index 0000000..dfd1594 --- /dev/null +++ b/examples/sprites/tests/sprites.test.ts @@ -0,0 +1,34 @@ +import { describe, it, expect } from "vitest"; +import { SandboxAgent } from "sandbox-agent"; +import { sprites } from "sandbox-agent/sprites"; + +const shouldRun = Boolean(process.env.SPRITES_API_KEY || process.env.SPRITE_TOKEN || process.env.SPRITES_TOKEN); +const timeoutMs = Number.parseInt(process.env.SANDBOX_TEST_TIMEOUT_MS || "", 10) || 300_000; + +const testFn = shouldRun ? it : it.skip; + +describe("sprites provider", () => { + testFn( + "starts sandbox-agent and responds to /v1/health", + async () => { + const env: Record = {}; + if (process.env.ANTHROPIC_API_KEY) env.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; + if (process.env.OPENAI_API_KEY) env.OPENAI_API_KEY = process.env.OPENAI_API_KEY; + + const sdk = await SandboxAgent.start({ + sandbox: sprites({ + token: process.env.SPRITES_API_KEY ?? process.env.SPRITE_TOKEN ?? process.env.SPRITES_TOKEN, + env, + }), + }); + + try { + const health = await sdk.getHealth(); + expect(health.status).toBe("ok"); + } finally { + await sdk.destroySandbox(); + } + }, + timeoutMs, + ); +}); diff --git a/examples/sprites/tsconfig.json b/examples/sprites/tsconfig.json new file mode 100644 index 0000000..ad591c3 --- /dev/null +++ b/examples/sprites/tsconfig.json @@ -0,0 +1,17 @@ +{ + "compilerOptions": { + "target": "ES2022", + "lib": ["ES2022", "DOM"], + "module": "ESNext", + "moduleResolution": "Bundler", + "allowImportingTsExtensions": true, + "noEmit": true, + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "resolveJsonModule": true, + "types": ["node"] + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "**/*.test.ts"] +} diff --git a/examples/vercel/src/index.ts b/examples/vercel/src/index.ts index 9839893..5a83e0c 100644 --- a/examples/vercel/src/index.ts +++ b/examples/vercel/src/index.ts @@ -19,7 +19,6 @@ console.log(`UI: ${client.inspectorUrl}`); const session = await client.createSession({ agent: detectAgent(), - cwd: "/home/vercel-sandbox", }); session.onEvent((event) => { diff --git a/examples/vercel/tsconfig.json b/examples/vercel/tsconfig.json index 96ba2fd..ad591c3 100644 --- a/examples/vercel/tsconfig.json +++ b/examples/vercel/tsconfig.json @@ -9,7 +9,8 @@ "esModuleInterop": true, "strict": true, "skipLibCheck": true, - "resolveJsonModule": true + "resolveJsonModule": true, + "types": ["node"] }, "include": ["src/**/*"], "exclude": ["node_modules", "**/*.test.ts"] diff --git a/foundry/CLAUDE.md b/foundry/CLAUDE.md index d48ab59..2d9bcbb 100644 --- a/foundry/CLAUDE.md +++ b/foundry/CLAUDE.md @@ -72,6 +72,26 @@ Local Docker sandboxes use the `rivetdev/sandbox-agent:foundry-base-latest` imag - The image must be built with `--platform linux/amd64`. The Rust build is memory-intensive; Docker Desktop needs at least 8GB RAM allocated. - When updating the base image contents (new system packages, agent versions), rebuild and push with the publish script, then update the `foundry-base-latest` tag. +## Production GitHub App + OAuth App + +Foundry uses two separate GitHub entities in production: + +- **OAuth App** (`GITHUB_CLIENT_ID` / `GITHUB_CLIENT_SECRET`) — handles "Sign in with GitHub" via Better Auth. This is a standard OAuth App. +- **GitHub App** (`GITHUB_APP_ID` / `GITHUB_APP_CLIENT_ID` / `GITHUB_APP_CLIENT_SECRET` / `GITHUB_APP_PRIVATE_KEY`) — handles webhooks, installation tokens for repo access, and GitHub API sync (repos, PRs). Must be manually installed on each org. + +Key env vars and where they connect: + +- `GITHUB_REDIRECT_URI` — OAuth callback, must point to `https://api.sandboxagent.dev/v1/auth/callback/github` +- `GITHUB_WEBHOOK_SECRET` — must match the secret configured on the GitHub App's Webhook settings page exactly. Mismatches cause silent 500s on webhook delivery (signature verification fails inside the actor, surfaced as a generic RivetKit `internal_error`). +- `BETTER_AUTH_URL` — must be the **API** URL (`https://api.sandboxagent.dev`), not the frontend URL. Better Auth uses this internally for sign-out and session management calls. +- `APP_URL` — the **frontend** URL (`https://foundry.sandboxagent.dev`). + +Troubleshooting: + +- **"GitHub App not installed"** — The GitHub App must be manually installed on each org. Sign-in does not auto-install it. Go to the GitHub App settings → Install App tab. The sign-in flow can only detect existing installations, not create them. +- **Webhooks not arriving** — Check the GitHub App → Advanced tab for delivery history. If deliveries show 500, the webhook secret likely doesn't match `GITHUB_WEBHOOK_SECRET`. Test with: `echo -n '{"test":true}' | openssl dgst -sha256 -hmac "$SECRET"` and curl the endpoint with the computed signature. +- **Deleting all actors wipes GitHub App installation state.** After a full actor reset, you must trigger a webhook (e.g. redeliver from GitHub App Advanced tab, or re-install the app) to repopulate installation records. + ## Railway Logs - Production Foundry Railway logs can be read from a linked checkout with `railway logs --deployment --lines 200` or `railway logs --deployment --lines 200`. diff --git a/foundry/docker/backend.Dockerfile b/foundry/docker/backend.Dockerfile index 3dc1c7d..ae14ddf 100644 --- a/foundry/docker/backend.Dockerfile +++ b/foundry/docker/backend.Dockerfile @@ -19,6 +19,7 @@ RUN pnpm --filter @sandbox-agent/foundry-backend deploy --prod /out FROM oven/bun:1.2 AS runtime ENV NODE_ENV=production ENV HOME=/home/task +ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version WORKDIR /app RUN apt-get update \ && apt-get install -y --no-install-recommends \ @@ -31,6 +32,8 @@ RUN addgroup --system --gid 1001 task \ && adduser --system --uid 1001 --home /home/task --ingroup task task \ && mkdir -p /home/task \ && chown -R task:task /home/task /app +RUN mkdir -p /etc/foundry \ + && date +%s > /etc/foundry/rivet-runner-version COPY --from=build /out ./ USER task EXPOSE 7741 diff --git a/foundry/docker/backend.dev.Dockerfile b/foundry/docker/backend.dev.Dockerfile index 46177c3..c4b6c3a 100644 --- a/foundry/docker/backend.dev.Dockerfile +++ b/foundry/docker/backend.dev.Dockerfile @@ -21,6 +21,9 @@ RUN curl -fsSL "https://releases.rivet.dev/sandbox-agent/${SANDBOX_AGENT_VERSION ENV PATH="/root/.local/bin:${PATH}" ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent" +ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version +RUN mkdir -p /etc/foundry \ + && date +%s > /etc/foundry/rivet-runner-version WORKDIR /app diff --git a/foundry/docker/backend.preview.Dockerfile b/foundry/docker/backend.preview.Dockerfile index b35ced8..91cd7c7 100644 --- a/foundry/docker/backend.preview.Dockerfile +++ b/foundry/docker/backend.preview.Dockerfile @@ -20,6 +20,9 @@ RUN curl -fsSL "https://releases.rivet.dev/sandbox-agent/${SANDBOX_AGENT_VERSION ENV PATH="/root/.local/bin:${PATH}" ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent" +ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version +RUN mkdir -p /etc/foundry \ + && date +%s > /etc/foundry/rivet-runner-version WORKDIR /workspace/quebec diff --git a/foundry/packages/backend/src/actors/index.ts b/foundry/packages/backend/src/actors/index.ts index 52bb914..74ede4a 100644 --- a/foundry/packages/backend/src/actors/index.ts +++ b/foundry/packages/backend/src/actors/index.ts @@ -6,16 +6,15 @@ import { auditLog } from "./audit-log/index.js"; import { taskSandbox } from "./sandbox/index.js"; import { organization } from "./organization/index.js"; import { logger } from "../logging.js"; +import { resolveRunnerVersion } from "../config/runner-version.js"; -const RUNNER_VERSION = Math.floor(Date.now() / 1000); +const runnerVersion = resolveRunnerVersion(); export const registry = setup({ serverless: { basePath: "/v1/rivet", }, - runner: { - version: RUNNER_VERSION, - }, + runner: { version: runnerVersion }, logging: { baseLogger: logger, }, diff --git a/foundry/packages/backend/src/actors/logging.ts b/foundry/packages/backend/src/actors/logging.ts index afc7d37..a61685f 100644 --- a/foundry/packages/backend/src/actors/logging.ts +++ b/foundry/packages/backend/src/actors/logging.ts @@ -22,6 +22,16 @@ export function resolveErrorStack(error: unknown): string | undefined { return undefined; } +export function logActorInfo(scope: string, message: string, context?: Record): void { + logger.info( + { + scope, + ...(context ?? {}), + }, + message, + ); +} + export function logActorWarning(scope: string, message: string, context?: Record): void { logger.warn( { diff --git a/foundry/packages/backend/src/actors/sandbox/index.ts b/foundry/packages/backend/src/actors/sandbox/index.ts index 7f31adc..0444d9b 100644 --- a/foundry/packages/backend/src/actors/sandbox/index.ts +++ b/foundry/packages/backend/src/actors/sandbox/index.ts @@ -13,7 +13,12 @@ import { logActorWarning, resolveErrorMessage } from "../logging.js"; import { expectQueueResponse } from "../../services/queue.js"; import { resolveSandboxProviderId } from "../../sandbox-config.js"; -const SANDBOX_REPO_CWD = "/home/sandbox/repo"; +/** + * Default repo CWD inside the sandbox. The actual path is resolved dynamically + * via `$HOME/repo` because different sandbox providers run as different users + * (e.g. E2B uses `/home/user`, local Docker uses `/home/sandbox`). + */ +const DEFAULT_SANDBOX_REPO_CWD = "/home/user/repo"; const DEFAULT_LOCAL_SANDBOX_IMAGE = "rivetdev/sandbox-agent:foundry-base-latest"; const DEFAULT_LOCAL_SANDBOX_PORT = 2468; const dockerClient = new Dockerode({ socketPath: "/var/run/docker.sock" }); @@ -207,7 +212,7 @@ const baseTaskSandbox = sandboxActor({ if (sandboxProviderId === "e2b") { return e2b({ create: () => ({ - template: config.sandboxProviders.e2b.template ?? "sandbox-agent-full-0.3.x", + template: config.sandboxProviders.e2b.template ?? "sandbox-agent-full-0.5.x", envs: sandboxEnvObject(), // TEMPORARY: Default E2B timeout is 5 minutes which is too short. // Set to 1 hour as a stopgap. Remove this once the E2B provider in @@ -264,7 +269,7 @@ async function providerForConnection(c: any): Promise { sandboxProviderId === "e2b" ? e2b({ create: () => ({ - template: config.sandboxProviders.e2b.template ?? "sandbox-agent-full-0.3.x", + template: config.sandboxProviders.e2b.template ?? "sandbox-agent-full-0.5.x", envs: sandboxEnvObject(), }), installAgents: ["claude", "codex"], @@ -297,6 +302,43 @@ async function listWorkspaceModelGroupsForSandbox(c: any): Promise Promise>; +// --------------------------------------------------------------------------- +// Dynamic repo CWD resolution +// --------------------------------------------------------------------------- + +let cachedRepoCwd: string | null = null; + +/** + * Resolve the repo CWD inside the sandbox by querying `$HOME`. + * Different providers run as different users (E2B: `/home/user`, local Docker: + * `/home/sandbox`), so the path must be resolved dynamically. The result is + * cached for the lifetime of this sandbox actor instance. + */ +async function resolveRepoCwd(c: any): Promise { + if (cachedRepoCwd) return cachedRepoCwd; + + try { + const result = await baseActions.runProcess(c, { + command: "bash", + args: ["-lc", "echo $HOME"], + cwd: "/", + timeoutMs: 10_000, + }); + const home = (result.stdout ?? result.result ?? "").trim(); + if (home && home.startsWith("/")) { + cachedRepoCwd = `${home}/repo`; + return cachedRepoCwd; + } + } catch (error) { + logActorWarning("taskSandbox", "failed to resolve $HOME, using default", { + error: resolveErrorMessage(error), + }); + } + + cachedRepoCwd = DEFAULT_SANDBOX_REPO_CWD; + return cachedRepoCwd; +} + // --------------------------------------------------------------------------- // Queue names for sandbox actor // --------------------------------------------------------------------------- @@ -528,8 +570,9 @@ export const taskSandbox = actor({ } }, - async repoCwd(): Promise<{ cwd: string }> { - return { cwd: SANDBOX_REPO_CWD }; + async repoCwd(c: any): Promise<{ cwd: string }> { + const resolved = await resolveRepoCwd(c); + return { cwd: resolved }; }, // Long-running action — kept as direct action to avoid blocking the @@ -600,4 +643,4 @@ export const taskSandbox = actor({ run: workflow(runSandboxWorkflow), }); -export { SANDBOX_REPO_CWD }; +export { DEFAULT_SANDBOX_REPO_CWD, resolveRepoCwd }; diff --git a/foundry/packages/backend/src/actors/task/workspace.ts b/foundry/packages/backend/src/actors/task/workspace.ts index 5c49a4d..0856947 100644 --- a/foundry/packages/backend/src/actors/task/workspace.ts +++ b/foundry/packages/backend/src/actors/task/workspace.ts @@ -1,6 +1,6 @@ // @ts-nocheck import { randomUUID } from "node:crypto"; -import { basename, dirname } from "node:path"; +import { basename } from "node:path"; import { asc, eq } from "drizzle-orm"; import { DEFAULT_WORKSPACE_MODEL_GROUPS, @@ -10,8 +10,7 @@ import { } from "@sandbox-agent/foundry-shared"; import { getActorRuntimeContext } from "../context.js"; import { getOrCreateOrganization, getOrCreateTaskSandbox, getOrCreateUser, getTaskSandbox, selfTask } from "../handles.js"; -import { logActorWarning, resolveErrorMessage } from "../logging.js"; -import { SANDBOX_REPO_CWD } from "../sandbox/index.js"; +import { logActorInfo, logActorWarning, resolveErrorMessage } from "../logging.js"; import { resolveSandboxProviderId } from "../../sandbox-config.js"; import { getBetterAuthService } from "../../services/better-auth.js"; import { resolveOrganizationGithubAuth } from "../../services/github-auth.js"; @@ -183,9 +182,9 @@ async function injectGitCredentials(sandbox: any, login: string, email: string, "set -euo pipefail", `git config --global user.name ${JSON.stringify(login)}`, `git config --global user.email ${JSON.stringify(email)}`, - `git config --global credential.helper 'store --file=/home/sandbox/.git-token'`, - `printf '%s\\n' ${JSON.stringify(`https://${login}:${token}@github.com`)} > /home/sandbox/.git-token`, - `chmod 600 /home/sandbox/.git-token`, + `git config --global credential.helper 'store --file=$HOME/.git-token'`, + `printf '%s\\n' ${JSON.stringify(`https://${login}:${token}@github.com`)} > $HOME/.git-token`, + `chmod 600 $HOME/.git-token`, ]; const result = await sandbox.runProcess({ command: "bash", @@ -576,6 +575,10 @@ async function getTaskSandboxRuntime( const sandbox = await getOrCreateTaskSandbox(c, c.state.organizationId, sandboxId, {}); const actorId = typeof sandbox.resolve === "function" ? await sandbox.resolve().catch(() => null) : null; const switchTarget = sandboxProviderId === "local" ? `sandbox://local/${sandboxId}` : `sandbox://e2b/${sandboxId}`; + + // Resolve the actual repo CWD from the sandbox's $HOME (differs by provider). + const repoCwdResult = await sandbox.repoCwd(); + const cwd = repoCwdResult?.cwd ?? "$HOME/repo"; const now = Date.now(); await c.db @@ -585,7 +588,7 @@ async function getTaskSandboxRuntime( sandboxProviderId, sandboxActorId: typeof actorId === "string" ? actorId : null, switchTarget, - cwd: SANDBOX_REPO_CWD, + cwd, createdAt: now, updatedAt: now, }) @@ -595,7 +598,7 @@ async function getTaskSandboxRuntime( sandboxProviderId, sandboxActorId: typeof actorId === "string" ? actorId : null, switchTarget, - cwd: SANDBOX_REPO_CWD, + cwd, updatedAt: now, }, }) @@ -606,7 +609,7 @@ async function getTaskSandboxRuntime( .set({ activeSandboxId: sandboxId, activeSwitchTarget: switchTarget, - activeCwd: SANDBOX_REPO_CWD, + activeCwd: cwd, updatedAt: now, }) .where(eq(taskRuntime.id, 1)) @@ -617,7 +620,7 @@ async function getTaskSandboxRuntime( sandboxId, sandboxProviderId, switchTarget, - cwd: SANDBOX_REPO_CWD, + cwd, }; } @@ -636,27 +639,35 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski // If the repo was already prepared and the caller allows skipping fetch, just return. // The clone, fetch, and checkout already happened on a prior call. if (opts?.skipFetchIfPrepared && sandboxRepoPrepared) { + logActorInfo("task.sandbox", "ensureSandboxRepo skipped (already prepared)"); return; } + const repoStart = performance.now(); + + const t0 = performance.now(); const auth = await resolveOrganizationGithubAuth(c, c.state.organizationId); const metadata = await getRepositoryMetadata(c); + logActorInfo("task.sandbox", "resolveAuth+metadata", { durationMs: Math.round(performance.now() - t0) }); + const baseRef = metadata.defaultBranch ?? "main"; - const sandboxRepoRoot = dirname(SANDBOX_REPO_CWD); + // Use $HOME inside the shell script so the path resolves correctly regardless + // of which user the sandbox runs as (E2B: "user", local Docker: "sandbox"). const script = [ "set -euo pipefail", - `mkdir -p ${JSON.stringify(sandboxRepoRoot)}`, + 'REPO_DIR="$HOME/repo"', + 'mkdir -p "$HOME"', "git config --global credential.helper '!f() { echo username=x-access-token; echo password=${GH_TOKEN:-$GITHUB_TOKEN}; }; f'", - `if [ ! -d ${JSON.stringify(`${SANDBOX_REPO_CWD}/.git`)} ]; then rm -rf ${JSON.stringify(SANDBOX_REPO_CWD)} && git clone ${JSON.stringify( - metadata.remoteUrl, - )} ${JSON.stringify(SANDBOX_REPO_CWD)}; fi`, - `cd ${JSON.stringify(SANDBOX_REPO_CWD)}`, + `if [ ! -d "$REPO_DIR/.git" ]; then rm -rf "$REPO_DIR" && git clone ${JSON.stringify(metadata.remoteUrl)} "$REPO_DIR"; fi`, + 'cd "$REPO_DIR"', "git fetch origin --prune", `if git show-ref --verify --quiet refs/remotes/origin/${JSON.stringify(record.branchName).slice(1, -1)}; then target_ref=${JSON.stringify( `origin/${record.branchName}`, )}; else target_ref=${JSON.stringify(baseRef)}; fi`, `git checkout -B ${JSON.stringify(record.branchName)} \"$target_ref\"`, ]; + + const t1 = performance.now(); const result = await sandbox.runProcess({ command: "bash", args: ["-lc", script.join("; ")], @@ -669,6 +680,11 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski : undefined, timeoutMs: 5 * 60_000, }); + logActorInfo("task.sandbox", "git clone/fetch/checkout", { + branch: record.branchName, + repo: metadata.remoteUrl, + durationMs: Math.round(performance.now() - t1), + }); if ((result.exitCode ?? 0) !== 0) { throw new Error(`sandbox repo preparation failed (${result.exitCode ?? 1}): ${[result.stdout, result.stderr].filter(Boolean).join("")}`); @@ -677,10 +693,13 @@ async function ensureSandboxRepo(c: any, sandbox: any, record: any, opts?: { ski // On first repo preparation, inject the task owner's git credentials into the sandbox // so that push/commit operations are authenticated and attributed to the correct user. if (!sandboxRepoPrepared && opts?.authSessionId) { + const t2 = performance.now(); await maybeSwapTaskOwner(c, opts.authSessionId, sandbox); + logActorInfo("task.sandbox", "maybeSwapTaskOwner", { durationMs: Math.round(performance.now() - t2) }); } sandboxRepoPrepared = true; + logActorInfo("task.sandbox", "ensureSandboxRepo complete", { totalDurationMs: Math.round(performance.now() - repoStart) }); } async function executeInSandbox( @@ -1264,6 +1283,7 @@ export async function createWorkspaceSession(c: any, model?: string, authSession } export async function ensureWorkspaceSession(c: any, sessionId: string, model?: string, authSessionId?: string): Promise { + const ensureStart = performance.now(); const meta = await readSessionMeta(c, sessionId); if (!meta || meta.closed) { return; @@ -1283,10 +1303,18 @@ export async function ensureWorkspaceSession(c: any, sessionId: string, model?: }); try { + const t0 = performance.now(); const runtime = await getTaskSandboxRuntime(c, record); + logActorInfo("task.session", "getTaskSandboxRuntime", { sessionId, durationMs: Math.round(performance.now() - t0) }); + + const t1 = performance.now(); await ensureSandboxRepo(c, runtime.sandbox, record); + logActorInfo("task.session", "ensureSandboxRepo", { sessionId, durationMs: Math.round(performance.now() - t1) }); + const resolvedModel = model ?? meta.model ?? (await resolveDefaultModel(c, authSessionId)); const resolvedAgent = await resolveSandboxAgentForModel(c, resolvedModel); + + const t2 = performance.now(); await runtime.sandbox.createSession({ id: meta.sandboxSessionId ?? sessionId, agent: resolvedAgent, @@ -1295,12 +1323,14 @@ export async function ensureWorkspaceSession(c: any, sessionId: string, model?: cwd: runtime.cwd, }, }); + logActorInfo("task.session", "createSession", { sessionId, agent: resolvedAgent, model: resolvedModel, durationMs: Math.round(performance.now() - t2) }); await updateSessionMeta(c, sessionId, { sandboxSessionId: meta.sandboxSessionId ?? sessionId, status: "ready", errorMessage: null, }); + logActorInfo("task.session", "ensureWorkspaceSession complete", { sessionId, totalDurationMs: Math.round(performance.now() - ensureStart) }); fireRefreshSessionTranscript(c, meta.sandboxSessionId ?? sessionId); } catch (error) { await updateSessionMeta(c, sessionId, { @@ -1415,12 +1445,19 @@ export async function changeWorkspaceModel(c: any, sessionId: string, model: str } export async function sendWorkspaceMessage(c: any, sessionId: string, text: string, attachments: Array, authSessionId?: string): Promise { + const sendStart = performance.now(); const meta = requireSendableSessionMeta(await readSessionMeta(c, sessionId), sessionId); const record = await ensureWorkspaceSeeded(c); + + const t0 = performance.now(); const runtime = await getTaskSandboxRuntime(c, record); + logActorInfo("task.message", "getTaskSandboxRuntime", { sessionId, durationMs: Math.round(performance.now() - t0) }); + + const t1 = performance.now(); // Skip git fetch on subsequent messages — the repo was already prepared during session // creation. This avoids a 5-30s network round-trip to GitHub on every prompt. await ensureSandboxRepo(c, runtime.sandbox, record, { skipFetchIfPrepared: true, authSessionId }); + logActorInfo("task.message", "ensureSandboxRepo", { sessionId, durationMs: Math.round(performance.now() - t1) }); // Check if the task owner needs to swap. If a different user is sending this message, // update the owner record and inject their git credentials into the sandbox. @@ -1450,10 +1487,12 @@ export async function sendWorkspaceMessage(c: any, sessionId: string, text: stri await syncWorkspaceSessionStatus(c, meta.sandboxSessionId, "running", Date.now()); try { + const t2 = performance.now(); await runtime.sandbox.sendPrompt({ sessionId: meta.sandboxSessionId, prompt: prompt.join("\n\n"), }); + logActorInfo("task.message", "sendPrompt", { sessionId, durationMs: Math.round(performance.now() - t2) }); await syncWorkspaceSessionStatus(c, meta.sandboxSessionId, "idle", Date.now()); } catch (error) { await updateSessionMeta(c, sessionId, { @@ -1463,6 +1502,7 @@ export async function sendWorkspaceMessage(c: any, sessionId: string, text: stri await syncWorkspaceSessionStatus(c, meta.sandboxSessionId, "error", Date.now()); throw error; } + logActorInfo("task.message", "sendWorkspaceMessage complete", { sessionId, totalDurationMs: Math.round(performance.now() - sendStart) }); } export async function stopWorkspaceSession(c: any, sessionId: string): Promise { diff --git a/foundry/packages/backend/src/config/runner-version.ts b/foundry/packages/backend/src/config/runner-version.ts new file mode 100644 index 0000000..5c33672 --- /dev/null +++ b/foundry/packages/backend/src/config/runner-version.ts @@ -0,0 +1,33 @@ +import { readFileSync } from "node:fs"; + +function parseRunnerVersion(rawValue: string | undefined): number | undefined { + const value = rawValue?.trim(); + if (!value) { + return undefined; + } + + const parsed = Number.parseInt(value, 10); + if (Number.isNaN(parsed)) { + return undefined; + } + + return parsed; +} + +export function resolveRunnerVersion(): number | undefined { + const envVersion = parseRunnerVersion(process.env.RIVET_RUNNER_VERSION); + if (envVersion !== undefined) { + return envVersion; + } + + const versionFilePath = process.env.RIVET_RUNNER_VERSION_FILE; + if (!versionFilePath) { + return undefined; + } + + try { + return parseRunnerVersion(readFileSync(versionFilePath, "utf8")); + } catch { + return undefined; + } +} diff --git a/foundry/packages/backend/src/index.ts b/foundry/packages/backend/src/index.ts index e00abaa..617bacc 100644 --- a/foundry/packages/backend/src/index.ts +++ b/foundry/packages/backend/src/index.ts @@ -141,6 +141,59 @@ export async function startBackend(options: BackendStartOptions = {}): Promise.json, inspect with chrome://tracing) + app.get("/debug/memory", async (c) => { + if (process.env.NODE_ENV !== "development") { + return c.json({ error: "debug endpoints disabled in production" }, 403); + } + const wantGc = c.req.query("gc") === "1"; + if (wantGc && typeof Bun !== "undefined") { + // Bun.gc(true) triggers a synchronous full GC sweep in JavaScriptCore. + Bun.gc(true); + } + const mem = process.memoryUsage(); + const rssMb = Math.round(mem.rss / 1024 / 1024); + const heapUsedMb = Math.round(mem.heapUsed / 1024 / 1024); + const heapTotalMb = Math.round(mem.heapTotal / 1024 / 1024); + const externalMb = Math.round(mem.external / 1024 / 1024); + const nonHeapMb = rssMb - heapUsedMb - externalMb; + // Bun.heapStats() gives JSC-specific breakdown: object counts, typed array + // bytes, extra memory (native allocations tracked by JSC). Useful for + // distinguishing JS object bloat from native/WASM memory. + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const BunAny = Bun as any; + const heapStats = typeof BunAny.heapStats === "function" ? BunAny.heapStats() : null; + const snapshot = { + rssMb, + heapUsedMb, + heapTotalMb, + externalMb, + nonHeapMb, + gcTriggered: wantGc, + rssBytes: mem.rss, + heapUsedBytes: mem.heapUsed, + heapTotalBytes: mem.heapTotal, + externalBytes: mem.external, + ...(heapStats ? { bunHeapStats: heapStats } : {}), + }; + // Optionally write a full JSC heap snapshot for offline analysis. + let heapSnapshotPath: string | null = null; + const wantHeap = c.req.query("heap") === "1"; + if (wantHeap && typeof Bun !== "undefined") { + heapSnapshotPath = `/tmp/foundry-heap-${Date.now()}.json`; + // Bun.generateHeapSnapshot("v8") returns a V8-compatible JSON string. + const heapJson = Bun.generateHeapSnapshot("v8"); + await Bun.write(heapSnapshotPath, heapJson); + } + logger.info(snapshot, "memory_usage_debug"); + return c.json({ ...snapshot, ...(heapSnapshotPath ? { heapSnapshotPath } : {}) }); + }); + app.use("*", async (c, next) => { const requestId = c.req.header("x-request-id")?.trim() || randomUUID(); const start = performance.now(); @@ -354,6 +407,11 @@ export async function startBackend(options: BackendStartOptions = {}): Promise { + const mem = process.memoryUsage(); + const rssMb = Math.round(mem.rss / 1024 / 1024); + const heapUsedMb = Math.round(mem.heapUsed / 1024 / 1024); + const heapTotalMb = Math.round(mem.heapTotal / 1024 / 1024); + const externalMb = Math.round(mem.external / 1024 / 1024); + // Non-heap RSS: memory not accounted for by JS heap or external buffers. + // Large values here point to native allocations (WASM, mmap, child process + // bookkeeping, Bun's internal arena, etc.). + const nonHeapMb = rssMb - heapUsedMb - externalMb; + const deltaRss = rssMb - prevRss; + prevRss = rssMb; + logger.info( + { + rssMb, + heapUsedMb, + heapTotalMb, + externalMb, + nonHeapMb, + deltaRssMb: deltaRss, + rssBytes: mem.rss, + heapUsedBytes: mem.heapUsed, + heapTotalBytes: mem.heapTotal, + externalBytes: mem.external, + }, + "memory_usage", + ); + }, 60_000); + } + process.on("SIGINT", async () => { server.stop(); process.exit(0); diff --git a/foundry/packages/client/package.json b/foundry/packages/client/package.json index 9790474..fa73dab 100644 --- a/foundry/packages/client/package.json +++ b/foundry/packages/client/package.json @@ -6,7 +6,7 @@ "main": "dist/index.js", "types": "dist/index.d.ts", "scripts": { - "build": "tsup src/index.ts --format esm --dts", + "build": "tsup src/index.ts --format esm --dts --tsconfig tsconfig.build.json", "typecheck": "tsc --noEmit", "test": "vitest run", "test:e2e:full": "HF_ENABLE_DAEMON_FULL_E2E=1 vitest run test/e2e/full-integration-e2e.test.ts", diff --git a/foundry/packages/client/src/subscription/remote-manager.ts b/foundry/packages/client/src/subscription/remote-manager.ts index 778241f..ae774c6 100644 --- a/foundry/packages/client/src/subscription/remote-manager.ts +++ b/foundry/packages/client/src/subscription/remote-manager.ts @@ -4,6 +4,11 @@ import { topicDefinitions, type TopicData, type TopicDefinition, type TopicKey, const GRACE_PERIOD_MS = 30_000; +/** Initial retry delay in ms. */ +const RETRY_BASE_MS = 1_000; +/** Maximum retry delay in ms. */ +const RETRY_MAX_MS = 30_000; + /** * Remote implementation of SubscriptionManager. * Each cache entry owns one actor connection plus one materialized snapshot. @@ -80,9 +85,12 @@ class TopicEntry { private unsubscribeEvent: (() => void) | null = null; private unsubscribeError: (() => void) | null = null; private teardownTimer: ReturnType | null = null; + private retryTimer: ReturnType | null = null; + private retryAttempt = 0; private startPromise: Promise | null = null; private eventPromise: Promise = Promise.resolve(); private started = false; + private disposed = false; constructor( private readonly topicKey: TopicKey, @@ -136,7 +144,9 @@ class TopicEntry { } dispose(): void { + this.disposed = true; this.cancelTeardown(); + this.cancelRetry(); this.unsubscribeEvent?.(); this.unsubscribeError?.(); if (this.conn) { @@ -148,6 +158,55 @@ class TopicEntry { this.error = null; this.lastRefreshAt = null; this.started = false; + this.retryAttempt = 0; + } + + private cancelRetry(): void { + if (this.retryTimer) { + clearTimeout(this.retryTimer); + this.retryTimer = null; + } + } + + /** + * Schedules a retry with exponential backoff. Cleans up any existing + * connection state before reconnecting. + */ + private scheduleRetry(): void { + if (this.disposed || this.listenerCount === 0) { + return; + } + + const delay = Math.min(RETRY_BASE_MS * 2 ** this.retryAttempt, RETRY_MAX_MS); + this.retryAttempt++; + + this.retryTimer = setTimeout(() => { + this.retryTimer = null; + if (this.disposed || this.listenerCount === 0) { + return; + } + + // Tear down the old connection before retrying + this.cleanupConnection(); + this.started = false; + this.startPromise = this.start().finally(() => { + this.startPromise = null; + }); + }, delay); + } + + /** + * Cleans up connection resources without resetting data/status/retry state. + */ + private cleanupConnection(): void { + this.unsubscribeEvent?.(); + this.unsubscribeError?.(); + this.unsubscribeEvent = null; + this.unsubscribeError = null; + if (this.conn) { + void this.conn.dispose(); + } + this.conn = null; } private async start(): Promise { @@ -164,17 +223,20 @@ class TopicEntry { this.status = "error"; this.error = error instanceof Error ? error : new Error(String(error)); this.notify(); + this.scheduleRetry(); }); this.data = await this.definition.fetchInitial(this.backend, this.params); this.status = "connected"; this.lastRefreshAt = Date.now(); this.started = true; + this.retryAttempt = 0; this.notify(); } catch (error) { this.status = "error"; this.error = error instanceof Error ? error : new Error(String(error)); this.started = false; this.notify(); + this.scheduleRetry(); } } diff --git a/foundry/packages/client/tsconfig.build.json b/foundry/packages/client/tsconfig.build.json new file mode 100644 index 0000000..35bcdb2 --- /dev/null +++ b/foundry/packages/client/tsconfig.build.json @@ -0,0 +1,6 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "ignoreDeprecations": "6.0" + } +} diff --git a/foundry/packages/frontend/src/components/mock-layout.tsx b/foundry/packages/frontend/src/components/mock-layout.tsx index 797b650..4089e01 100644 --- a/foundry/packages/frontend/src/components/mock-layout.tsx +++ b/foundry/packages/frontend/src/components/mock-layout.tsx @@ -187,6 +187,7 @@ function toTaskModel( diffs: detail?.diffs ?? {}, fileTree: detail?.fileTree ?? [], minutesUsed: detail?.minutesUsed ?? 0, + sandboxes: detail?.sandboxes ?? [], activeSandboxId: detail?.activeSandboxId ?? null, primaryUserLogin: detail?.primaryUserLogin ?? summary.primaryUserLogin ?? null, primaryUserAvatarUrl: detail?.primaryUserAvatarUrl ?? summary.primaryUserAvatarUrl ?? null, diff --git a/foundry/packages/shared/package.json b/foundry/packages/shared/package.json index 04e3ff3..8d65361 100644 --- a/foundry/packages/shared/package.json +++ b/foundry/packages/shared/package.json @@ -6,7 +6,7 @@ "main": "dist/index.js", "types": "dist/index.d.ts", "scripts": { - "build": "tsup src/index.ts --format esm --dts", + "build": "tsup src/index.ts --format esm --dts --tsconfig tsconfig.build.json", "typecheck": "tsc --noEmit", "test": "vitest run" }, diff --git a/foundry/packages/shared/tsconfig.build.json b/foundry/packages/shared/tsconfig.build.json new file mode 100644 index 0000000..35bcdb2 --- /dev/null +++ b/foundry/packages/shared/tsconfig.build.json @@ -0,0 +1,6 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "ignoreDeprecations": "6.0" + } +} diff --git a/foundry/research/friction/sandbox-agent.mdx b/foundry/research/friction/sandbox-agent.mdx index aa4a29d..5fb9ba8 100644 --- a/foundry/research/friction/sandbox-agent.mdx +++ b/foundry/research/friction/sandbox-agent.mdx @@ -55,7 +55,7 @@ Upgrading backend integration from legacy sandbox-agent session endpoints to `sa ### Friction / Issue -`0.2.0` no longer exposes `/v1/sessions` endpoints used by the backend integration; direct session create/status polling via legacy REST paths returns `404`. +`0.2.0` no longer exposes the legacy session REST endpoints used by the backend integration; direct session create/status polling via those paths returns `404`. ### Attempted Fix / Workaround @@ -65,5 +65,5 @@ Upgrading backend integration from legacy sandbox-agent session endpoints to `sa ### Outcome -- Backend no longer depends on removed `/v1/sessions` endpoints. +- Backend no longer depends on removed legacy session REST endpoints. - Daytona flow is aligned with `sandbox-agent 0.2.0` runtime and SDK usage. diff --git a/frontend/packages/website/src/components/GetStarted.tsx b/frontend/packages/website/src/components/GetStarted.tsx index 8a03b34..11f52b0 100644 --- a/frontend/packages/website/src/components/GetStarted.tsx +++ b/frontend/packages/website/src/components/GetStarted.tsx @@ -103,7 +103,7 @@ function SdkCodeHighlighted() { ); } -const sandboxCommand = `curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh`; +const sandboxCommand = `curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh`; const sourceCommands = `git clone https://github.com/rivet-dev/sandbox-agent cd sandbox-agent @@ -196,7 +196,7 @@ export function GetStarted() { curl -fsSL \ {"\n"} {" "} - https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh + https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh diff --git a/justfile b/justfile index 84b761f..4ec95bc 100644 --- a/justfile +++ b/justfile @@ -186,4 +186,3 @@ foundry-format: [group('foundry')] foundry-docker-build tag='foundry:local': docker build -f foundry/docker/backend.Dockerfile -t {{tag}} . - diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b2b99c3..4d2cde6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -450,6 +450,31 @@ importers: specifier: latest version: 5.9.3 + examples/sprites: + dependencies: + '@fly/sprites': + specifier: latest + version: 0.0.1 + '@sandbox-agent/example-shared': + specifier: workspace:* + version: link:../shared + sandbox-agent: + specifier: workspace:* + version: link:../../sdks/typescript + devDependencies: + '@types/node': + specifier: latest + version: 25.5.0 + tsx: + specifier: latest + version: 4.21.0 + typescript: + specifier: latest + version: 6.0.2 + vitest: + specifier: ^3.0.0 + version: 3.2.4(@types/debug@4.1.12)(@types/node@25.5.0)(jiti@1.21.7)(jsdom@26.1.0)(tsx@4.21.0)(yaml@2.8.2) + examples/vercel: dependencies: '@sandbox-agent/example-shared': @@ -531,7 +556,7 @@ importers: version: 1.3.10 tsup: specifier: ^8.5.0 - version: 8.5.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(typescript@5.9.3)(yaml@2.8.2) + version: 8.5.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(typescript@6.0.2)(yaml@2.8.2) foundry/packages/client: dependencies: @@ -553,7 +578,7 @@ importers: version: 19.2.14 tsup: specifier: ^8.5.0 - version: 8.5.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(typescript@5.9.3)(yaml@2.8.2) + version: 8.5.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(typescript@6.0.2)(yaml@2.8.2) foundry/packages/frontend: dependencies: @@ -614,7 +639,7 @@ importers: version: 0.1.27(@types/react@19.2.14)(react@19.2.4) tsup: specifier: ^8.5.0 - version: 8.5.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(typescript@5.9.3)(yaml@2.8.2) + version: 8.5.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(typescript@6.0.2)(yaml@2.8.2) vite: specifier: ^7.1.3 version: 7.3.1(@types/node@25.5.0)(jiti@1.21.7)(tsx@4.21.0)(yaml@2.8.2) @@ -633,7 +658,7 @@ importers: devDependencies: tsup: specifier: ^8.5.0 - version: 8.5.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(typescript@5.9.3)(yaml@2.8.2) + version: 8.5.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(typescript@6.0.2)(yaml@2.8.2) frontend/packages/inspector: dependencies: @@ -771,25 +796,6 @@ importers: specifier: ^5.7.0 version: 5.9.3 - scripts/sandbox-testing: - dependencies: - '@daytonaio/sdk': - specifier: latest - version: 0.151.0(ws@8.19.0) - '@e2b/code-interpreter': - specifier: latest - version: 2.3.3 - devDependencies: - '@types/node': - specifier: latest - version: 25.5.0 - tsx: - specifier: latest - version: 4.21.0 - typescript: - specifier: latest - version: 5.9.3 - sdks/acp-http-client: dependencies: '@agentclientprotocol/sdk': @@ -988,6 +994,9 @@ importers: '@e2b/code-interpreter': specifier: '>=1.0.0' version: 2.3.3 + '@fly/sprites': + specifier: '>=0.0.1' + version: 0.0.1 '@types/dockerode': specifier: ^4.0.0 version: 4.0.1 @@ -2487,6 +2496,10 @@ packages: resolution: {integrity: sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA==} engines: {node: '>=14'} + '@fly/sprites@0.0.1': + resolution: {integrity: sha512-1s+dIVi/pTMP4Aj4Mkg+4LoZ/+a0Kp6l9piPRxvpgEKm11b/eRiZgJwVytwAHeI/vtg2fuwcFExjtXOEfny/TA==} + engines: {node: '>=24.0.0'} + '@grpc/grpc-js@1.14.3': resolution: {integrity: sha512-Iq8QQQ/7X3Sac15oB6p0FmUg/klxQvXLeileoqrTRGJYLV+/9tubbr9ipz0GKHjmXVsgFPo/+W+2cA8eNcR+XA==} engines: {node: '>=12.10.0'} @@ -6937,6 +6950,11 @@ packages: engines: {node: '>=14.17'} hasBin: true + typescript@6.0.2: + resolution: {integrity: sha512-bGdAIrZ0wiGDo5l8c++HWtbaNCWTS4UTv7RaTH/ThVIgjkveJt83m74bBHMJkuCbslY8ixgLBVZJIOiQlQTjfQ==} + engines: {node: '>=14.17'} + hasBin: true + ufo@1.6.3: resolution: {integrity: sha512-yDJTmhydvl5lJzBmy/hyOAA0d+aqCBuwl818haVdYCRrWV84o7YyeVm4QlVHStqNrrJSTb6jKuFAVqAFsr+K3Q==} @@ -8975,6 +8993,8 @@ snapshots: '@fastify/busboy@2.1.1': {} + '@fly/sprites@0.0.1': {} + '@grpc/grpc-js@1.14.3': dependencies: '@grpc/proto-loader': 0.8.0 @@ -14144,6 +14164,34 @@ snapshots: - tsx - yaml + tsup@8.5.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(typescript@6.0.2)(yaml@2.8.2): + dependencies: + bundle-require: 5.1.0(esbuild@0.27.3) + cac: 6.7.14 + chokidar: 4.0.3 + consola: 3.4.2 + debug: 4.4.3 + esbuild: 0.27.3 + fix-dts-default-cjs-exports: 1.0.1 + joycon: 3.1.1 + picocolors: 1.1.1 + postcss-load-config: 6.0.1(jiti@1.21.7)(postcss@8.5.6)(tsx@4.21.0)(yaml@2.8.2) + resolve-from: 5.0.0 + rollup: 4.56.0 + source-map: 0.7.6 + sucrase: 3.35.1 + tinyexec: 0.3.2 + tinyglobby: 0.2.15 + tree-kill: 1.2.2 + optionalDependencies: + postcss: 8.5.6 + typescript: 6.0.2 + transitivePeerDependencies: + - jiti + - supports-color + - tsx + - yaml + tsx@4.21.0: dependencies: esbuild: 0.27.3 @@ -14194,6 +14242,8 @@ snapshots: typescript@5.9.3: {} + typescript@6.0.2: {} + ufo@1.6.3: {} ultrahtml@1.6.0: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index de394aa..93fd2df 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -10,6 +10,5 @@ packages: - "sdks/gigacode/platforms/*" - "resources/vercel-ai-sdk-schemas" - "scripts/release" - - "scripts/sandbox-testing" - "examples/*" - "server/packages/sandbox-agent/tests/opencode-compat" diff --git a/research/acp/friction.md b/research/acp/friction.md index e5273b8..fc2e4bd 100644 --- a/research/acp/friction.md +++ b/research/acp/friction.md @@ -81,7 +81,7 @@ Update this file continuously during the migration. - Date: 2026-02-10 - Area: Session lifecycle surface - Issue: ACP stable does not include v1-equivalent methods for session listing, explicit session termination/delete, or event-log polling. -- Impact: Direct lift-and-shift of `/v1/sessions`, `/terminate`, and `/events` polling is not possible with ACP core only. +- Impact: Direct lift-and-shift of the legacy session REST list, terminate, and event-polling behavior is not possible with ACP core only. - Proposed direction: Define `_sandboxagent/session/*` extension methods for these control operations, while keeping core prompt flow on standard ACP methods. - Decision: Open. - Owner: Unassigned. diff --git a/research/acp/inspector-unimplemented.md b/research/acp/inspector-unimplemented.md index 2935342..3eb0a2f 100644 --- a/research/acp/inspector-unimplemented.md +++ b/research/acp/inspector-unimplemented.md @@ -11,4 +11,4 @@ This tracks legacy inspector behaviors that do not yet have full parity on ACP v 5. TDOO: Agent mode discovery before creating a session is not implemented (inspector currently returns cached-or-empty mode lists). 6. TDOO: Agent model discovery before creating a session is not implemented (inspector currently returns cached-or-empty model lists). 7. TDOO: Session listing only reflects sessions created by this inspector client instance (not full server/global session inventory). -8. TDOO: Event history shown in inspector is synthesized from ACP traffic handled by the inspector compatibility layer, not the old canonical `/v1/sessions/*/events` backend history. +8. TDOO: Event history shown in inspector is synthesized from ACP traffic handled by the inspector compatibility layer, not the old canonical session-events backend history. diff --git a/research/acp/merge-acp.md b/research/acp/merge-acp.md index b5f0cce..b51d1cd 100644 --- a/research/acp/merge-acp.md +++ b/research/acp/merge-acp.md @@ -20,8 +20,8 @@ Static v1 endpoints today: - `GET /v1/agents` - `POST /v1/agents/:agent/install` -- `GET /v1/sessions` -- `GET /v1/sessions/:session_id` +- legacy session list endpoint +- legacy session detail endpoint - `GET /v1/fs/entries` - `GET /v1/fs/file` - `PUT /v1/fs/file` @@ -76,8 +76,8 @@ Interpretation for clients: all agent/session operations and non-binary filesyst | --- | --- | --- | | `GET /v1/agents` | `_sandboxagent/agent/list` | Response keeps current `AgentListResponse` shape for low migration risk. | | `POST /v1/agents/:agent/install` | `_sandboxagent/agent/install` | Params include `agent`, `reinstall`, `agentVersion`, `agentProcessVersion`. | -| `GET /v1/sessions` | `_sandboxagent/session/list` | Return current `SessionListResponse` shape (not ACP unstable list shape). | -| `GET /v1/sessions/:session_id` | `_sandboxagent/session/get` | Return current `SessionInfo` shape; error on missing session. | +| legacy session list endpoint | `_sandboxagent/session/list` | Return current `SessionListResponse` shape (not ACP unstable list shape). | +| legacy session detail endpoint | `_sandboxagent/session/get` | Return current `SessionInfo` shape; error on missing session. | | `GET /v1/fs/entries` | `_sandboxagent/fs/list_entries` | Preserve path + optional `sessionId` resolution semantics. | | `GET /v1/fs/file` | keep HTTP + `_sandboxagent/fs/read_file` | HTTP is primary because responses may require large streaming reads; ACP variant exists for compatibility/smaller payloads. | | `PUT /v1/fs/file` | keep HTTP + `_sandboxagent/fs/write_file` | HTTP is primary for large binary writes; ACP variant exists for compatibility/smaller payloads. | @@ -143,7 +143,7 @@ Package boundary after migration: - `acp-http-client` remains protocol-pure ACP transport and generic `extMethod`/`extNotification`. - `sandbox-agent` remains the typed wrapper that maps convenience methods to `_sandboxagent/...` extension methods. -- No direct `/v1/agents*`, `/v1/sessions*`, or non-binary `/v1/fs/*` fetches in SDK runtime code. +- No direct legacy agents/session REST fetches or non-binary `/v1/fs/*` fetches in SDK runtime code. - Binary file transfer keeps direct HTTP fetches on the three endpoints listed above. - SDK policy: prefer HTTP for `readFsFile`/`writeFsFile`/`uploadFsBatch` even if ACP extension variants exist. @@ -184,17 +184,17 @@ Alternative (optional): introduce a runtime-only control connection mode that do - TypeScript SDK (`sdks/typescript/src/client.ts`): - Repoint `listAgents`, `installAgent`, `listSessions`, `getSession`, `listFsEntries`, `deleteFsEntry`, `mkdirFs`, `moveFs`, and `statFs` to ACP extension calls. - Keep `readFsFile`, `writeFsFile`, and `uploadFsBatch` on HTTP endpoints. - - Remove direct runtime fetch usage for `/v1/agents*`, `/v1/sessions*`, and non-binary `/v1/fs/*`. + - Remove direct runtime fetch usage for legacy agents/session REST endpoints and non-binary `/v1/fs/*`. - Keep method names stable for callers. - Move these methods to connected-only semantics (`NotConnectedError` when disconnected). - CLI (`server/packages/sandbox-agent/src/cli.rs`): - - Make `api agents list/install` call ACP extension methods (via ACP post flow), not direct `/v1/agents*` HTTP calls. + - Make `api agents list/install` call ACP extension methods (via ACP post flow), not direct legacy agent HTTP calls. - Inspector flow/docs: - Stop depending on `GET /v1/agents` in startup path; use ACP extension instead. ### Phase 3: Remove Static Endpoints (Except Health + Binary FS Transfer) -- Remove route registrations for `/v1/agents*`, `/v1/sessions*`, `/v1/fs/entries`, `/v1/fs/entry`, `/v1/fs/mkdir`, `/v1/fs/move`, `/v1/fs/stat` from `router.rs`. +- Remove route registrations for legacy agent/session REST endpoints and `/v1/fs/entries`, `/v1/fs/entry`, `/v1/fs/mkdir`, `/v1/fs/move`, `/v1/fs/stat` from `router.rs`. - Keep `/v1/health`, `/v1/rpc`, `GET /v1/fs/file`, `PUT /v1/fs/file`, and `POST /v1/fs/upload-batch`. - Optional short deprecation period: convert removed routes to `410 Gone` with explicit extension method in `detail`. @@ -237,6 +237,6 @@ Inspector: ## Open Decisions -1. Should removed `/v1/agents*`, `/v1/sessions*`, and non-binary `/v1/fs/*` return `410` for one release or be dropped immediately? +1. Should removed legacy agent/session REST endpoints and non-binary `/v1/fs/*` return `410` for one release or be dropped immediately? 2. Do we keep a strict response-shape parity layer for session/file methods, or normalize to ACP-native shapes? 3. Should `/` service-root remain as informational HTTP, or be treated as out-of-scope for this “only health static + binary fs transfer” policy? diff --git a/research/acp/missing-features-spec/01-questions.md b/research/acp/missing-features-spec/01-questions.md index e49ad4d..3600161 100644 --- a/research/acp/missing-features-spec/01-questions.md +++ b/research/acp/missing-features-spec/01-questions.md @@ -59,11 +59,11 @@ struct PendingQuestion { } ``` -## v1 HTTP Endpoints (from `router.rs`) +## Legacy Session REST Endpoints (from `router.rs`) ``` -POST /v1/sessions/{session_id}/questions/{question_id}/reply -> 204 No Content -POST /v1/sessions/{session_id}/questions/{question_id}/reject -> 204 No Content +session question reply endpoint -> 204 No Content +session question reject endpoint -> 204 No Content ``` ### `reply_question` handler @@ -122,7 +122,7 @@ Key flow: 1. Agent emits `question.requested` event with `QuestionEventData { status: Requested, question_id, prompt, options }` 2. Client renders question UI -3. Client calls `POST /v1/sessions/{id}/questions/{qid}/reply` with `{ answers: [["selected"]] }` or `POST .../reject` +3. Client calls the legacy session question reply or reject endpoint with `{ answers: [["selected"]] }` 4. System emits `question.resolved` event with `QuestionEventData { status: Answered, response: Some("...") }` or `{ status: Rejected }` ## v1 Agent Capability diff --git a/research/acp/missing-features-spec/07-session-termination.md b/research/acp/missing-features-spec/07-session-termination.md index 08e4f85..4d01fb6 100644 --- a/research/acp/missing-features-spec/07-session-termination.md +++ b/research/acp/missing-features-spec/07-session-termination.md @@ -4,7 +4,7 @@ ## Summary -v1 had explicit session termination (`POST /v1/sessions/{id}/terminate`). v1 only has `session/cancel` (turn cancellation, not session kill) and `DELETE /v1/rpc` (connection close, not session termination). Need explicit session destroy/terminate semantics. +The legacy session REST API had an explicit terminate endpoint. ACP only has `session/cancel` (turn cancellation, not session kill) and `DELETE /v1/rpc` (connection close, not session termination). Need explicit session destroy/terminate semantics. ## Current v1 State @@ -20,7 +20,7 @@ v1 had explicit session termination (`POST /v1/sessions/{id}/terminate`). v1 onl ### HTTP Endpoint ``` -POST /v1/sessions/{id}/terminate +legacy session terminate endpoint ``` ### Handler (from `router.rs`) diff --git a/research/acp/missing-features-spec/08-model-variants.md b/research/acp/missing-features-spec/08-model-variants.md index aa58304..ae2cdbb 100644 --- a/research/acp/missing-features-spec/08-model-variants.md +++ b/research/acp/missing-features-spec/08-model-variants.md @@ -49,7 +49,7 @@ Returned `AgentModelsResponse` with full model list including variants. ### Session Creation with Variant ``` -POST /v1/sessions +legacy session create endpoint ``` Body included `variant: Option` to select a specific model variant at session creation time. diff --git a/research/acp/missing-features-spec/10-include-raw.md b/research/acp/missing-features-spec/10-include-raw.md index 8197a5c..6d5b693 100644 --- a/research/acp/missing-features-spec/10-include-raw.md +++ b/research/acp/missing-features-spec/10-include-raw.md @@ -34,7 +34,7 @@ pub struct UniversalEvent { ### v1 Usage ``` -GET /v1/sessions/{id}/events?include_raw=true +legacy event polling endpoint with `include_raw=true` ``` When `include_raw=true`, each `UniversalEvent` included the verbatim JSON the agent process emitted before normalization into the universal schema. diff --git a/research/acp/missing-features-spec/16-session-info.md b/research/acp/missing-features-spec/16-session-info.md index 39729b8..157e918 100644 --- a/research/acp/missing-features-spec/16-session-info.md +++ b/research/acp/missing-features-spec/16-session-info.md @@ -1,10 +1,10 @@ # Feature 16: Session Info -**Implementation approach:** New HTTP endpoints (`GET /v1/sessions`, `GET /v1/sessions/{id}`) +**Implementation approach:** New session-info HTTP endpoints ## Summary -v1 `SessionInfo` tracked `event_count`, `created_at`, `updated_at`, and full `mcp` config. v1 has session data in the ACP runtime's `MetaSession` struct but no HTTP endpoints to query it. Add REST endpoints for session listing and detail. +v1 `SessionInfo` tracked `event_count`, `created_at`, `updated_at`, and full `mcp` config. v1 has session data in the ACP runtime's `MetaSession` struct but no HTTP endpoints to query it. Add HTTP endpoints for session listing and detail. ## Current v1 State @@ -117,8 +117,8 @@ fn build_session_info(state: &SessionState) -> SessionInfo { ### New HTTP Endpoints ``` -GET /v1/sessions -> SessionListResponse -GET /v1/sessions/{id} -> SessionInfo +session list endpoint -> SessionListResponse +session detail endpoint -> SessionInfo ``` These are control-plane HTTP endpoints (not ACP), providing session visibility without requiring an active ACP connection. @@ -156,7 +156,7 @@ Need to add: | File | Change | |------|--------| -| `server/packages/sandbox-agent/src/router.rs` | Add `GET /v1/sessions` and `GET /v1/sessions/{id}` handlers; add response types | +| `server/packages/sandbox-agent/src/router.rs` | Add session list and session detail handlers; add response types | | `server/packages/sandbox-agent/src/acp_runtime/mod.rs` | Add `created_at` to `MetaSession`; add `ended` tracking; expose `list_sessions()` and `get_session()` public methods | | `sdks/typescript/src/client.ts` | Add `listSessions()` and `getSession(id)` methods | | `server/packages/sandbox-agent/tests/v1_api.rs` | Add session listing and detail tests | @@ -165,6 +165,6 @@ Need to add: | Doc | Change | |-----|--------| -| `docs/openapi.json` | Add `/v1/sessions` and `/v1/sessions/{id}` endpoint specs | +| `docs/openapi.json` | Add session list and session detail endpoint specs | | `docs/cli.mdx` | Add CLI `sessions list` and `sessions info` commands | | `docs/sdks/typescript.mdx` | Document session listing SDK methods | diff --git a/research/acp/missing-features-spec/17-error-termination-metadata.md b/research/acp/missing-features-spec/17-error-termination-metadata.md index f50159b..3ba153f 100644 --- a/research/acp/missing-features-spec/17-error-termination-metadata.md +++ b/research/acp/missing-features-spec/17-error-termination-metadata.md @@ -171,7 +171,7 @@ When an agent process terminates with an error: ### Session Info Integration Termination metadata should be accessible via: -- `GET /v1/sessions/{id}` (Feature #16) — include `terminationInfo` in response when session has ended +- the session info endpoint (Feature #16) — include `terminationInfo` in response when session has ended - `session/list` ACP response — include termination status in session entries ### Files to Modify diff --git a/research/acp/missing-features-spec/plan.md b/research/acp/missing-features-spec/plan.md index cc7bcc8..a024655 100644 --- a/research/acp/missing-features-spec/plan.md +++ b/research/acp/missing-features-spec/plan.md @@ -36,7 +36,7 @@ Session-level features that build on Phase A runtime tracking. | Order | Feature | Spec | Approach | Effort | |:-----:|--------------------------------------------------------------|:----:|------------------------------------------------------|:------:| -| B1 | [Session Info](./16-session-info.md) | #16 | New `GET /v1/sessions` and `GET /v1/sessions/{id}` | Medium | +| B1 | [Session Info](./16-session-info.md) | #16 | New session info HTTP endpoints | Medium | | B2 | [Session Termination](./07-session-termination.md) | #7 | Idempotent `_sandboxagent/session/terminate` | Medium | | B3 | [Error Termination Metadata](./17-error-termination-metadata.md) | #17 | Stderr capture + `_sandboxagent/session/ended` event | Medium | diff --git a/research/acp/old-rest-openapi-list.md b/research/acp/old-rest-openapi-list.md index 923a65b..5ac15c8 100644 --- a/research/acp/old-rest-openapi-list.md +++ b/research/acp/old-rest-openapi-list.md @@ -17,16 +17,16 @@ | /v1/fs/stat | UNIMPLEMENTED | | /v1/fs/upload-batch | UNIMPLEMENTED | | /v1/health | UNIMPLEMENTED | -| /v1/sessions | session/list (UNSTABLE) | -| /v1/sessions/{session_id} | session/new \| session/load \| session/resume (UNSTABLE) | -| /v1/sessions/{session_id}/events | UNIMPLEMENTED | -| /v1/sessions/{session_id}/events/sse | session/update (notification stream) | -| /v1/sessions/{session_id}/messages | session/prompt | -| /v1/sessions/{session_id}/messages/stream | session/prompt + session/update notifications | -| /v1/sessions/{session_id}/permissions/{permission_id}/reply | session/request_permission response | -| /v1/sessions/{session_id}/questions/{question_id}/reject | UNIMPLEMENTED | -| /v1/sessions/{session_id}/questions/{question_id}/reply | UNIMPLEMENTED | -| /v1/sessions/{session_id}/terminate | session/cancel (turn cancellation only) | +| legacy session list route | session/list (UNSTABLE) | +| legacy session create/load/resume route | session/new \| session/load \| session/resume (UNSTABLE) | +| legacy session events polling route | UNIMPLEMENTED | +| legacy session events SSE route | session/update (notification stream) | +| legacy session prompt route | session/prompt | +| legacy session prompt + stream route | session/prompt + session/update notifications | +| legacy permission reply route | session/request_permission response | +| legacy question reject route | UNIMPLEMENTED | +| legacy question reply route | UNIMPLEMENTED | +| legacy session terminate route | session/cancel (turn cancellation only) | | AgentCapabilities | initialize.result.agentCapabilities | | AgentCapabilities.commandExecution | UNIMPLEMENTED | | AgentCapabilities.errorEvents | UNIMPLEMENTED | @@ -427,7 +427,7 @@ - `UNIMPLEMENTED` means there is no ACP-standard field/method with equivalent semantics in `schema.unstable.json`; implementation would require ACP extension methods (`_...`) and/or `_meta` payloads. - Rows mapped to `_meta[...]` are ACP-compatible extensions, not standard interoperable ACP fields; both sides must agree on names and semantics. -- Legacy event polling (`/v1/sessions/{session_id}/events`) has no ACP equivalent; ACP is stream-first via `session/update` notifications over streamable HTTP. +- Legacy event polling has no ACP equivalent; ACP is stream-first via `session/update` notifications over streamable HTTP. - Session lifecycle differs: ACP has `session/new`, `session/load`, `session/resume` (UNSTABLE), and `session/fork` (UNSTABLE), but no standard explicit "close session" method. - Permission handling is request/response (`session/request_permission`) tied to JSON-RPC request IDs; it does not use standalone REST reply endpoints. - Question/answer HITL flow in the old schema has no standard ACP equivalent today (separate from permission prompts). diff --git a/research/acp/spec.md b/research/acp/spec.md index 3923d29..f7130fe 100644 --- a/research/acp/spec.md +++ b/research/acp/spec.md @@ -233,8 +233,6 @@ Non-ACP endpoints retained in v1: - `GET /v1/health` - `GET /v1/agents` (capabilities + install status) - `POST /v1/agents/{agent}/install` -- `GET /v1/sessions` -- `GET /v1/sessions/{id}` - `GET /v1/fs/file` - `PUT /v1/fs/file` - `POST /v1/fs/upload-batch` diff --git a/research/acp/v1-schema-to-acp-mapping.md b/research/acp/v1-schema-to-acp-mapping.md index d6f2e68..ea7740b 100644 --- a/research/acp/v1-schema-to-acp-mapping.md +++ b/research/acp/v1-schema-to-acp-mapping.md @@ -54,16 +54,16 @@ Extension namespace used in this spec: | `POST /v1/fs/move` | `POST /v1/fs/move` | HTTP platform API | Port v1 behavior. | | `GET /v1/fs/stat` | `GET /v1/fs/stat` | HTTP platform API | Port v1 behavior. | | `POST /v1/fs/upload-batch` | `POST /v1/fs/upload-batch` | HTTP platform API | Tar upload/extract behavior from v1. | -| `GET /v1/sessions` | `GET /v1/sessions` | HTTP control-plane | Session inventory without ACP connection requirement. | -| `POST /v1/sessions/{session_id}` | `session/new` | Standard | Path `session_id` becomes alias in `_meta["sandboxagent.dev"].requestedSessionId`. | -| `POST /v1/sessions/{session_id}/messages` | `session/prompt` | Standard | Asynchronous behavior comes from transport (request + stream). | -| `POST /v1/sessions/{session_id}/messages/stream` | `session/prompt` + consume `session/update` on SSE | Standard | Streaming is transport-level, not a distinct ACP method. | -| `POST /v1/sessions/{session_id}/terminate` | `_sandboxagent/session/terminate` | Extension | Idempotent termination semantics distinct from `DELETE /v1/rpc`. | -| `GET /v1/sessions/{session_id}/events` | `_sandboxagent/session/events` (poll view over ACP stream) | Extension | Optional compatibility helper; canonical v1 is stream consumption. | -| `GET /v1/sessions/{session_id}/events/sse` | `GET /v1/rpc` SSE stream | Standard transport | Filter by sessionId client-side or via connection/session binding. | -| `POST /v1/sessions/{session_id}/permissions/{permission_id}/reply` | JSON-RPC response to pending `session/request_permission` request id | Standard | Bridge `permission_id` to request `id` in transport state. | -| `POST /v1/sessions/{session_id}/questions/{question_id}/reply` | JSON-RPC response to pending `_sandboxagent/session/request_question` | Extension | ACP stable has no generic question/HITL request method. | -| `POST /v1/sessions/{session_id}/questions/{question_id}/reject` | JSON-RPC response to pending `_sandboxagent/session/request_question` | Extension | Encode rejection in response outcome. | +| legacy session list route | session/list | HTTP control-plane | Session inventory without ACP connection requirement. | +| legacy session create route | `session/new` | Standard | Path `session_id` becomes alias in `_meta["sandboxagent.dev"].requestedSessionId`. | +| legacy session prompt route | `session/prompt` | Standard | Asynchronous behavior comes from transport (request + stream). | +| legacy session prompt + stream route | `session/prompt` + consume `session/update` on SSE | Standard | Streaming is transport-level, not a distinct ACP method. | +| legacy session terminate route | `_sandboxagent/session/terminate` | Extension | Idempotent termination semantics distinct from `DELETE /v1/rpc`. | +| legacy event polling route | `_sandboxagent/session/events` (poll view over ACP stream) | Extension | Optional compatibility helper; canonical v1 is stream consumption. | +| legacy event SSE route | `GET /v1/rpc` SSE stream | Standard transport | Filter by sessionId client-side or via connection/session binding. | +| legacy permission reply route | JSON-RPC response to pending `session/request_permission` request id | Standard | Bridge `permission_id` to request `id` in transport state. | +| legacy question reply route | JSON-RPC response to pending `_sandboxagent/session/request_question` | Extension | ACP stable has no generic question/HITL request method. | +| legacy question reject route | JSON-RPC response to pending `_sandboxagent/session/request_question` | Extension | Encode rejection in response outcome. | ### 3.1 `CreateSessionRequest` field mapping diff --git a/scripts/release/update_version.ts b/scripts/release/update_version.ts index 9d39beb..9dfa305 100644 --- a/scripts/release/update_version.ts +++ b/scripts/release/update_version.ts @@ -45,7 +45,6 @@ const VERSION_REFERENCE_FILES = [ "scripts/release/main.ts", "scripts/release/promote-artifacts.ts", "scripts/release/sdk.ts", - "scripts/sandbox-testing/test-sandbox.ts", ]; export async function updateVersion(opts: ReleaseOpts) { @@ -148,7 +147,7 @@ async function updateVersionReferences(opts: ReleaseOpts, oldVersion: string, ol const original = content; - // Replace minor channel references (e.g. sandbox-agent@0.3.x -> sandbox-agent@0.4.x) + // Replace minor channel references (e.g. sandbox-agent@0.5.x -> sandbox-agent@0.5.x) content = content.replaceAll(`sandbox-agent@${oldMinorChannel}`, `sandbox-agent@${newMinorChannel}`); content = content.replaceAll(`@sandbox-agent/cli@${oldMinorChannel}`, `@sandbox-agent/cli@${newMinorChannel}`); content = content.replaceAll(`@sandbox-agent/react@${oldMinorChannel}`, `@sandbox-agent/react@${newMinorChannel}`); @@ -156,6 +155,13 @@ async function updateVersionReferences(opts: ReleaseOpts, oldVersion: string, ol // Replace install script URL channel content = content.replaceAll(`releases.rivet.dev/sandbox-agent/${oldMinorChannel}/`, `releases.rivet.dev/sandbox-agent/${newMinorChannel}/`); + // If references drifted (for example Cargo.toml version was bumped without updating docs), + // normalize any other pinned minor-channel references to the release's channel. + content = content.replaceAll(/sandbox-agent@0\.\d+\.x/g, `sandbox-agent@${newMinorChannel}`); + content = content.replaceAll(/@sandbox-agent\/cli@0\.\d+\.x/g, `@sandbox-agent/cli@${newMinorChannel}`); + content = content.replaceAll(/@sandbox-agent\/react@0\.\d+\.x/g, `@sandbox-agent/react@${newMinorChannel}`); + content = content.replaceAll(/releases\.rivet\.dev\/sandbox-agent\/0\.\d+\.x\//g, `releases.rivet.dev/sandbox-agent/${newMinorChannel}/`); + // Replace Docker image tags (rivetdev/sandbox-agent:-full -> rivetdev/sandbox-agent:-full) content = content.replaceAll( new RegExp(`rivetdev/sandbox-agent:[0-9]+\\.[0-9]+\\.[0-9]+(?:-[a-zA-Z0-9.]+)?-full`, "g"), @@ -174,7 +180,7 @@ async function updateVersionReferences(opts: ReleaseOpts, oldVersion: string, ol } if (modifiedFiles.length > 0) { - await $({ cwd: opts.root })`git add ${modifiedFiles}`; + await $({ cwd: opts.root })`git add -f ${modifiedFiles}`; console.log(`\nUpdated ${modifiedFiles.length} files with version references.`); } else { console.log(`\nNo version reference files needed updates.`); diff --git a/scripts/sandbox-testing/package.json b/scripts/sandbox-testing/package.json deleted file mode 100644 index 72f162e..0000000 --- a/scripts/sandbox-testing/package.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "name": "@sandbox-agent/testing", - "private": true, - "type": "module", - "scripts": { - "test": "tsx test-sandbox.ts", - "test:docker": "tsx test-sandbox.ts docker", - "test:daytona": "tsx test-sandbox.ts daytona", - "test:mock": "tsx test-sandbox.ts docker --agent=mock", - "test:verbose": "tsx test-sandbox.ts docker --verbose" - }, - "dependencies": { - "@daytonaio/sdk": "latest", - "@e2b/code-interpreter": "latest" - }, - "devDependencies": { - "@types/node": "latest", - "tsx": "latest", - "typescript": "latest" - } -} diff --git a/scripts/sandbox-testing/test-sandbox.ts b/scripts/sandbox-testing/test-sandbox.ts deleted file mode 100644 index 91c837e..0000000 --- a/scripts/sandbox-testing/test-sandbox.ts +++ /dev/null @@ -1,720 +0,0 @@ -#!/usr/bin/env npx tsx -/** - * Sandbox Testing Script - * - * Tests sandbox-agent on various cloud sandbox providers. - * Usage: npx tsx test-sandbox.ts [provider] [options] - * - * Providers: daytona, e2b, docker - * - * Options: - * --skip-build Skip cargo build step - * --use-release Use pre-built release binary from releases.rivet.dev - * --agent Test specific agent (claude, codex, mock) - * --skip-agent-install Skip pre-installing agents (tests on-demand install like Daytona example) - * --keep-alive Don't cleanup sandbox after test - * --verbose Show all logs - */ - -import { execSync, spawn } from "node:child_process"; -import { existsSync, readFileSync, mkdtempSync, writeFileSync, rmSync } from "node:fs"; -import { homedir, tmpdir } from "node:os"; -import { join, dirname } from "node:path"; -import { fileURLToPath } from "node:url"; - -const __dirname = dirname(fileURLToPath(import.meta.url)); -const ROOT_DIR = join(__dirname, "../.."); -const SERVER_DIR = join(ROOT_DIR, "server"); - -// Parse args -const args = process.argv.slice(2); -const provider = args.find((a) => !a.startsWith("--")) || "docker"; -const skipBuild = args.includes("--skip-build"); -const useRelease = args.includes("--use-release"); -const skipAgentInstall = args.includes("--skip-agent-install"); -const keepAlive = args.includes("--keep-alive"); -const verbose = args.includes("--verbose"); -const agentArg = args.find((a) => a.startsWith("--agent="))?.split("=")[1]; - -// Colors -const log = { - info: (msg: string) => console.log(`\x1b[34m[INFO]\x1b[0m ${msg}`), - success: (msg: string) => console.log(`\x1b[32m[OK]\x1b[0m ${msg}`), - error: (msg: string) => console.log(`\x1b[31m[ERROR]\x1b[0m ${msg}`), - warn: (msg: string) => console.log(`\x1b[33m[WARN]\x1b[0m ${msg}`), - debug: (msg: string) => verbose && console.log(`\x1b[90m[DEBUG]\x1b[0m ${msg}`), - section: (msg: string) => console.log(`\n\x1b[1m=== ${msg} ===\x1b[0m`), -}; - -// Credentials extraction using sandbox-agent CLI -function extractCredentials(): { anthropicApiKey?: string; openaiApiKey?: string } { - // First check environment variables - const envCreds = { - anthropicApiKey: process.env.ANTHROPIC_API_KEY, - openaiApiKey: process.env.OPENAI_API_KEY, - }; - - // If both are set in env, use them - if (envCreds.anthropicApiKey && envCreds.openaiApiKey) { - return envCreds; - } - - // Try to extract using sandbox-agent CLI - try { - const binaryPath = join(ROOT_DIR, "target/release/sandbox-agent"); - const debugBinaryPath = join(ROOT_DIR, "target/debug/sandbox-agent"); - const binary = existsSync(binaryPath) ? binaryPath : existsSync(debugBinaryPath) ? debugBinaryPath : null; - - if (binary) { - const output = execSync(`${binary} credentials extract-env --export`, { - encoding: "utf-8", - stdio: ["pipe", "pipe", "pipe"], - }); - - // Parse export statements: export KEY="value" - for (const line of output.split("\n")) { - const match = line.match(/^export (\w+)="(.*)"/); - if (match) { - const [, key, value] = match; - if (key === "ANTHROPIC_API_KEY" && !envCreds.anthropicApiKey) { - envCreds.anthropicApiKey = value; - } else if (key === "OPENAI_API_KEY" && !envCreds.openaiApiKey) { - envCreds.openaiApiKey = value; - } - } - } - log.debug(`Extracted credentials via sandbox-agent CLI`); - } - } catch (err) { - log.debug(`Failed to extract credentials via CLI: ${err}`); - } - - return envCreds; -} - -function getAnthropicApiKey(): string | undefined { - return extractCredentials().anthropicApiKey; -} - -function getOpenAiApiKey(): string | undefined { - return extractCredentials().openaiApiKey; -} - -// Build sandbox-agent -async function buildSandboxAgent(): Promise { - log.section("Building sandbox-agent"); - - if (useRelease) { - log.info("Using pre-built release from releases.rivet.dev"); - return "RELEASE"; - } - - // Binary is in workspace root target dir, not server target dir - const binaryPath = join(ROOT_DIR, "target/release/sandbox-agent"); - - if (skipBuild) { - if (!existsSync(binaryPath)) { - throw new Error(`Binary not found at ${binaryPath}. Run without --skip-build.`); - } - log.info(`Using existing binary: ${binaryPath}`); - return binaryPath; - } - - log.info("Running cargo build --release..."); - try { - execSync("cargo build --release -p sandbox-agent", { - cwd: ROOT_DIR, - stdio: verbose ? "inherit" : "pipe", - }); - log.success(`Built: ${binaryPath}`); - return binaryPath; - } catch (err) { - throw new Error(`Build failed: ${err}`); - } -} - -// Provider interface -interface SandboxProvider { - name: string; - requiredEnv: string[]; - create(opts: { envVars: Record }): Promise; -} - -interface Sandbox { - id: string; - exec(cmd: string): Promise<{ stdout: string; stderr: string; exitCode: number }>; - upload(localPath: string, remotePath: string): Promise; - getBaseUrl(port: number): Promise; - cleanup(): Promise; -} - -// Docker provider -// Uses Ubuntu because Claude Code and sandbox-agent are glibc binaries -const dockerProvider: SandboxProvider = { - name: "docker", - requiredEnv: [], - async create({ envVars }) { - const id = `sandbox-test-${Date.now()}`; - const envArgs = Object.entries(envVars) - .map(([k, v]) => `-e ${k}=${v}`) - .join(" "); - - log.info(`Creating Docker container: ${id}`); - execSync(`docker run -d --name ${id} ${envArgs} -p 0:3000 ubuntu:22.04 tail -f /dev/null`, { stdio: verbose ? "inherit" : "pipe" }); - - // Install dependencies - execSync(`docker exec ${id} sh -c "apt-get update && apt-get install -y curl ca-certificates"`, { - stdio: verbose ? "inherit" : "pipe", - }); - - return { - id, - async exec(cmd) { - try { - const stdout = execSync(`docker exec ${id} sh -c "${cmd.replace(/"/g, '\\"')}"`, { - encoding: "utf-8", - stdio: ["pipe", "pipe", "pipe"], - }); - return { stdout, stderr: "", exitCode: 0 }; - } catch (err: any) { - return { stdout: err.stdout || "", stderr: err.stderr || "", exitCode: err.status || 1 }; - } - }, - async upload(localPath, remotePath) { - execSync(`docker cp "${localPath}" ${id}:${remotePath}`, { stdio: verbose ? "inherit" : "pipe" }); - execSync(`docker exec ${id} chmod +x ${remotePath}`, { stdio: verbose ? "inherit" : "pipe" }); - }, - async getBaseUrl(port) { - const portMapping = execSync(`docker port ${id} ${port}`, { encoding: "utf-8" }).trim(); - const hostPort = portMapping.split(":").pop(); - return `http://localhost:${hostPort}`; - }, - async cleanup() { - log.info(`Cleaning up container: ${id}`); - execSync(`docker rm -f ${id}`, { stdio: "pipe" }); - }, - }; - }, -}; - -// Daytona provider -const daytonaProvider: SandboxProvider = { - name: "daytona", - requiredEnv: ["DAYTONA_API_KEY"], - async create({ envVars }) { - const { Daytona } = await import("@daytonaio/sdk"); - const daytona = new Daytona(); - - log.info("Creating Daytona sandbox..."); - // NOTE: Tier 1/2 sandboxes have restricted network that cannot be overridden - // networkAllowList requires CIDR notation (IP ranges), not domain names - const sandbox = await daytona.create({ - image: "ubuntu:22.04", - envVars, - }); - const id = sandbox.id; - - // Install curl - await sandbox.process.executeCommand("apt-get update && apt-get install -y curl ca-certificates"); - - return { - id, - async exec(cmd) { - const result = await sandbox.process.executeCommand(cmd); - // Daytona SDK returns: { exitCode, result: string, artifacts: { stdout: string } } - return { - stdout: result.result || "", - stderr: "", - exitCode: result.exitCode ?? 0, - }; - }, - async upload(localPath, remotePath) { - const content = readFileSync(localPath); - // Daytona SDK signature: uploadFile(Buffer, remotePath) - await sandbox.fs.uploadFile(content, remotePath); - await sandbox.process.executeCommand(`chmod +x ${remotePath}`); - }, - async getBaseUrl(port) { - const preview = await sandbox.getSignedPreviewUrl(port, 4 * 60 * 60); - return preview.url; - }, - async cleanup() { - log.info(`Cleaning up Daytona sandbox: ${id}`); - await sandbox.delete(60); - }, - }; - }, -}; - -// E2B provider -const e2bProvider: SandboxProvider = { - name: "e2b", - requiredEnv: ["E2B_API_KEY"], - async create({ envVars }) { - const { Sandbox } = await import("@e2b/code-interpreter"); - - log.info("Creating E2B sandbox..."); - let sandbox; - try { - sandbox = await Sandbox.create({ - allowInternetAccess: true, - envs: envVars, - }); - } catch (err: any) { - log.error(`E2B sandbox creation failed: ${err.message || err}`); - throw err; - } - const id = sandbox.sandboxId; - - // Install curl (E2B uses Debian which has glibc, sandbox-agent will auto-detect) - const installResult = await sandbox.commands.run("sudo apt-get update && sudo apt-get install -y curl ca-certificates"); - log.debug(`Install output: ${installResult.stdout} ${installResult.stderr}`); - - return { - id, - async exec(cmd) { - const result = await sandbox.commands.run(cmd); - return { - stdout: result.stdout || "", - stderr: result.stderr || "", - exitCode: result.exitCode, - }; - }, - async upload(localPath, remotePath) { - const content = readFileSync(localPath); - await sandbox.files.write(remotePath, content); - await sandbox.commands.run(`chmod +x ${remotePath}`); - }, - async getBaseUrl(port) { - return `https://${sandbox.getHost(port)}`; - }, - async cleanup() { - log.info(`Cleaning up E2B sandbox: ${id}`); - await sandbox.kill(); - }, - }; - }, -}; - -// Get provider -function getProvider(name: string): SandboxProvider { - switch (name) { - case "docker": - return dockerProvider; - case "daytona": - return daytonaProvider; - case "e2b": - return e2bProvider; - default: - throw new Error(`Unknown provider: ${name}. Available: docker, daytona, e2b`); - } -} - -// Install sandbox-agent in sandbox -async function installSandboxAgent(sandbox: Sandbox, binaryPath: string): Promise { - log.section("Installing sandbox-agent"); - - if (binaryPath === "RELEASE") { - log.info("Installing from releases.rivet.dev..."); - const result = await sandbox.exec("curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh"); - log.debug(`Install output: ${result.stdout}`); - if (result.exitCode !== 0) { - throw new Error(`Install failed: ${result.stderr}`); - } - } else { - log.info(`Uploading local binary: ${binaryPath}`); - await sandbox.upload(binaryPath, "/usr/local/bin/sandbox-agent"); - } - - // Verify installation - const version = await sandbox.exec("sandbox-agent --version"); - log.success(`Installed: ${version.stdout.trim()}`); -} - -// Install agents -async function installAgents(sandbox: Sandbox, agents: string[]): Promise { - log.section("Installing agents"); - - for (const agent of agents) { - log.info(`Installing ${agent}...`); - - if (agent === "claude" || agent === "codex") { - const result = await sandbox.exec(`sandbox-agent install-agent ${agent}`); - if (result.exitCode !== 0) throw new Error(`Failed to install ${agent}: ${result.stderr}`); - log.success(`Installed ${agent}`); - } else if (agent === "mock") { - // Mock agent is built into sandbox-agent, no install needed - log.info("Mock agent is built-in, skipping install"); - } - } -} - -// Start server and check health -async function startServerAndCheckHealth(sandbox: Sandbox): Promise { - log.section("Starting server"); - - // Start server in background - await sandbox.exec("nohup sandbox-agent server --no-token --host 0.0.0.0 --port 3000 >/tmp/sandbox-agent.log 2>&1 &"); - log.info("Server started in background"); - - // Get base URL - const baseUrl = await sandbox.getBaseUrl(3000); - log.info(`Base URL: ${baseUrl}`); - - // Wait for health - log.info("Waiting for health check..."); - for (let i = 0; i < 30; i++) { - try { - const response = await fetch(`${baseUrl}/v1/health`); - if (response.ok) { - const data = await response.json(); - if (data.status === "ok") { - log.success("Health check passed!"); - return baseUrl; - } - } - } catch {} - await new Promise((r) => setTimeout(r, 1000)); - } - - // Show logs on failure - const logs = await sandbox.exec("cat /tmp/sandbox-agent.log"); - log.error("Server logs:\n" + logs.stdout); - throw new Error("Health check failed after 30 seconds"); -} - -// Send a message and wait for response, auto-approving permissions -// Returns the response text -async function sendMessage(baseUrl: string, sessionId: string, message: string): Promise { - log.info(`Sending message: "${message.slice(0, 60)}${message.length > 60 ? "..." : ""}"`); - const msgRes = await fetch(`${baseUrl}/v1/sessions/${sessionId}/messages/stream`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ message }), - }); - if (!msgRes.ok || !msgRes.body) { - throw new Error(`Failed to send message: ${await msgRes.text()}`); - } - - // Process SSE stream - const reader = msgRes.body.getReader(); - const decoder = new TextDecoder(); - let buffer = ""; - let responseText = ""; - let receivedText = false; - let hasError = false; - let errorMessage = ""; - let pendingPermission: string | null = null; - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - - buffer += decoder.decode(value, { stream: true }); - const lines = buffer.split("\n"); - buffer = lines.pop() || ""; - - for (const line of lines) { - if (!line.startsWith("data: ")) continue; - const data = line.slice(6); - if (data === "[DONE]") continue; - - try { - const event = JSON.parse(data); - log.debug(`Event: ${event.type}`); - - if (event.type === "item.delta") { - const delta = event.data?.delta; - const text = typeof delta === "string" ? delta : delta?.text || ""; - if (text) { - if (!receivedText) { - log.info("Receiving response..."); - receivedText = true; - } - process.stdout.write(text); - responseText += text; - } - } - - // Handle permission requests - auto-approve - if (event.type === "permission.requested") { - const permissionId = event.data?.permission_id; - if (permissionId) { - pendingPermission = permissionId; - log.info(`Permission requested (${permissionId}), auto-approving...`); - } - } - - if (event.type === "error") { - hasError = true; - errorMessage = event.data?.message || JSON.stringify(event.data); - log.error(`Error event: ${errorMessage}`); - } - - if (event.type === "agent.unparsed") { - hasError = true; - errorMessage = `Agent unparsed: ${JSON.stringify(event.data)}`; - log.error(errorMessage); - } - } catch {} - } - - // If we have a pending permission, approve it - if (pendingPermission) { - const permId = pendingPermission; - pendingPermission = null; - try { - const approveRes = await fetch(`${baseUrl}/v1/sessions/${sessionId}/permissions/${permId}/reply`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ reply: "once" }), - }); - if (approveRes.ok) { - log.success(`Permission ${permId} approved`); - } else { - log.warn(`Failed to approve permission: ${await approveRes.text()}`); - } - } catch (err) { - log.warn(`Error approving permission: ${err}`); - } - } - } - - if (receivedText) { - console.log(); // newline after response - } - - if (hasError) { - throw new Error(`Agent returned error: ${errorMessage}`); - } - - return responseText; -} - -// Test agent interaction -async function testAgent(baseUrl: string, agent: string, message: string): Promise { - log.section(`Testing ${agent} agent`); - - const sessionId = crypto.randomUUID(); - - // Create session - log.info(`Creating session ${sessionId}...`); - const createRes = await fetch(`${baseUrl}/v1/sessions/${sessionId}`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ agent }), - }); - if (!createRes.ok) { - throw new Error(`Failed to create session: ${await createRes.text()}`); - } - log.success("Session created"); - - const response = await sendMessage(baseUrl, sessionId, message); - if (!response) { - throw new Error("No response received from agent"); - } - log.success("Received response from agent"); -} - -// Test that agent can actually modify files and run commands -async function testAgentActions(baseUrl: string, agent: string, sandbox: Sandbox): Promise { - log.section(`Testing ${agent} agent actions (file + command)`); - - const sessionId = crypto.randomUUID(); - const testFile = "/tmp/sandbox-test-file.txt"; - const expectedContent = "Hello from sandbox test!"; - - // For Claude running as root in containers, we must use default permission mode - // and handle permissions via the API (bypass mode is not supported as root). - // For other agents, we can use bypass mode. - const permissionMode = agent === "claude" ? "default" : "bypass"; - log.info(`Creating session ${sessionId} with permissionMode=${permissionMode}...`); - const createRes = await fetch(`${baseUrl}/v1/sessions/${sessionId}`, { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ agent, permissionMode }), - }); - if (!createRes.ok) { - throw new Error(`Failed to create session: ${await createRes.text()}`); - } - log.success("Session created"); - - // Ask agent to create a file - const fileMessage = `Create a file at ${testFile} with exactly this content (no quotes, no extra text): ${expectedContent}`; - await sendMessage(baseUrl, sessionId, fileMessage); - - // Wait for agent to complete action after permission approval - log.info("Waiting for agent to complete action..."); - await new Promise((r) => setTimeout(r, 5000)); - - // Verify file was created - log.info("Verifying file was created..."); - const fileCheck = await sandbox.exec(`cat ${testFile} 2>&1`); - if (fileCheck.exitCode !== 0) { - throw new Error(`File was not created: ${fileCheck.stderr || fileCheck.stdout}`); - } - if (!fileCheck.stdout.includes("Hello from sandbox test")) { - throw new Error(`File content mismatch. Expected "${expectedContent}", got "${fileCheck.stdout.trim()}"`); - } - log.success(`File created with correct content: "${fileCheck.stdout.trim()}"`); - - // Ask agent to run a command and create output - const cmdMessage = `Run this command and tell me the output: echo "command-test-$(date +%s)" > /tmp/cmd-output.txt && cat /tmp/cmd-output.txt`; - await sendMessage(baseUrl, sessionId, cmdMessage); - - // Verify command was executed - log.info("Verifying command was executed..."); - const cmdCheck = await sandbox.exec("cat /tmp/cmd-output.txt 2>&1"); - if (cmdCheck.exitCode !== 0) { - throw new Error(`Command output file not found: ${cmdCheck.stderr || cmdCheck.stdout}`); - } - if (!cmdCheck.stdout.includes("command-test-")) { - throw new Error(`Command output mismatch. Expected "command-test-*", got "${cmdCheck.stdout.trim()}"`); - } - log.success(`Command executed successfully: "${cmdCheck.stdout.trim()}"`); -} - -// Check environment diagnostics -async function checkEnvironment(sandbox: Sandbox): Promise { - log.section("Environment diagnostics"); - - const checks = [ - { name: "Environment variables", cmd: "env | grep -E 'ANTHROPIC|OPENAI|CLAUDE|CODEX' | sed 's/=.*/=/'" }, - // Check both /root (Alpine) and /home/user (E2B/Debian) paths - { - name: "Agent binaries", - cmd: "ls -la ~/.local/share/sandbox-agent/bin/ 2>/dev/null || ls -la /root/.local/share/sandbox-agent/bin/ 2>/dev/null || ls -la /home/user/.local/share/sandbox-agent/bin/ 2>/dev/null || echo 'No agents installed'", - }, - { - name: "Claude version", - cmd: "~/.local/share/sandbox-agent/bin/claude --version 2>&1 || /root/.local/share/sandbox-agent/bin/claude --version 2>&1 || echo 'Claude not installed'", - }, - { name: "sandbox-agent version", cmd: "sandbox-agent --version 2>/dev/null || echo 'Not installed'" }, - { name: "Server process", cmd: "pgrep -a sandbox-agent 2>/dev/null || ps aux | grep sandbox-agent | grep -v grep || echo 'Not running'" }, - { name: "Server logs (last 50 lines)", cmd: "tail -50 /tmp/sandbox-agent.log 2>/dev/null || echo 'No logs'" }, - { - name: "Network: api.anthropic.com", - cmd: "curl -s -o /dev/null -w '%{http_code}' --connect-timeout 5 https://api.anthropic.com/v1/messages 2>&1 || echo 'UNREACHABLE'", - }, - { - name: "Network: api.openai.com", - cmd: "curl -s -o /dev/null -w '%{http_code}' --connect-timeout 5 https://api.openai.com/v1/models 2>&1 || echo 'UNREACHABLE'", - }, - ]; - - for (const { name, cmd } of checks) { - const result = await sandbox.exec(cmd); - console.log(`\n\x1b[1m${name}:\x1b[0m`); - console.log(result.stdout || "(empty)"); - if (result.stderr) console.log(`stderr: ${result.stderr}`); - } -} - -// Main -async function main() { - log.section(`Sandbox Testing (provider: ${provider})`); - - // Check credentials - const anthropicKey = getAnthropicApiKey(); - const openaiKey = getOpenAiApiKey(); - - log.info(`Anthropic API key: ${anthropicKey ? "found" : "not found"}`); - log.info(`OpenAI API key: ${openaiKey ? "found" : "not found"}`); - - // Determine which agents to test - let agents: string[]; - if (agentArg) { - agents = [agentArg]; - } else if (anthropicKey) { - agents = ["claude"]; - } else if (openaiKey) { - agents = ["codex"]; - } else { - agents = ["mock"]; - log.warn("No API keys found, using mock agent only"); - } - log.info(`Agents to test: ${agents.join(", ")}`); - - // Get provider - const prov = getProvider(provider); - - // Check required env vars - for (const envVar of prov.requiredEnv) { - if (!process.env[envVar]) { - throw new Error(`Missing required environment variable: ${envVar}`); - } - } - - // Build - const binaryPath = await buildSandboxAgent(); - - // Create sandbox - log.section(`Creating ${prov.name} sandbox`); - const envVars: Record = {}; - if (anthropicKey) envVars.ANTHROPIC_API_KEY = anthropicKey; - if (openaiKey) envVars.OPENAI_API_KEY = openaiKey; - - const sandbox = await prov.create({ envVars }); - log.success(`Created sandbox: ${sandbox.id}`); - - try { - // Install sandbox-agent - await installSandboxAgent(sandbox, binaryPath); - - // Install agents (unless --skip-agent-install to test on-demand install like Daytona example) - if (skipAgentInstall) { - log.info("Skipping agent pre-install (testing on-demand installation)"); - } else { - await installAgents(sandbox, agents); - } - - // Check environment - await checkEnvironment(sandbox); - - // Start server and check health - const baseUrl = await startServerAndCheckHealth(sandbox); - - // Test each agent - for (const agent of agents) { - // Basic response test - const message = agent === "mock" ? "hello" : "Say hello in 10 words or less"; - await testAgent(baseUrl, agent, message); - - // For real agents, also test file/command actions with permission handling. - // Claude uses default permission mode and we auto-approve via API. - // Other agents can use bypass mode. - if (agent !== "mock") { - await testAgentActions(baseUrl, agent, sandbox); - } - } - - log.section("All tests passed!"); - - if (keepAlive) { - log.info(`Sandbox ${sandbox.id} is still running. Press Ctrl+C to cleanup.`); - log.info(`Base URL: ${await sandbox.getBaseUrl(3000)}`); - await new Promise(() => {}); // Wait forever - } - } catch (err) { - log.error(`Test failed: ${err}`); - - // Show diagnostics on failure - try { - await checkEnvironment(sandbox); - } catch {} - - if (!keepAlive) { - await sandbox.cleanup(); - } - process.exit(1); - } - - if (!keepAlive) { - await sandbox.cleanup(); - } -} - -main().catch((err) => { - log.error(err.message || err); - process.exit(1); -}); diff --git a/sdks/acp-http-client/package.json b/sdks/acp-http-client/package.json index 37821ee..0d61dc3 100644 --- a/sdks/acp-http-client/package.json +++ b/sdks/acp-http-client/package.json @@ -1,6 +1,6 @@ { "name": "acp-http-client", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "Protocol-faithful ACP JSON-RPC over streamable HTTP client.", "license": "Apache-2.0", "repository": { diff --git a/sdks/cli-shared/package.json b/sdks/cli-shared/package.json index ba3840a..4b9a0ae 100644 --- a/sdks/cli-shared/package.json +++ b/sdks/cli-shared/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/cli-shared", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "Shared helpers for sandbox-agent CLI and SDK", "license": "Apache-2.0", "repository": { diff --git a/sdks/cli/package.json b/sdks/cli/package.json index 17b7804..a7e42c1 100644 --- a/sdks/cli/package.json +++ b/sdks/cli/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/cli", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "CLI for sandbox-agent - run AI coding agents in sandboxes", "license": "Apache-2.0", "repository": { diff --git a/sdks/cli/platforms/darwin-arm64/package.json b/sdks/cli/platforms/darwin-arm64/package.json index 324ba83..9ed1a85 100644 --- a/sdks/cli/platforms/darwin-arm64/package.json +++ b/sdks/cli/platforms/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/cli-darwin-arm64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "sandbox-agent CLI binary for macOS ARM64", "license": "Apache-2.0", "repository": { diff --git a/sdks/cli/platforms/darwin-x64/package.json b/sdks/cli/platforms/darwin-x64/package.json index 0853bf8..6379cdf 100644 --- a/sdks/cli/platforms/darwin-x64/package.json +++ b/sdks/cli/platforms/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/cli-darwin-x64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "sandbox-agent CLI binary for macOS x64", "license": "Apache-2.0", "repository": { diff --git a/sdks/cli/platforms/linux-arm64/package.json b/sdks/cli/platforms/linux-arm64/package.json index 497122f..bbd677a 100644 --- a/sdks/cli/platforms/linux-arm64/package.json +++ b/sdks/cli/platforms/linux-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/cli-linux-arm64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "sandbox-agent CLI binary for Linux arm64", "license": "Apache-2.0", "repository": { diff --git a/sdks/cli/platforms/linux-x64/package.json b/sdks/cli/platforms/linux-x64/package.json index d245c4b..9793e98 100644 --- a/sdks/cli/platforms/linux-x64/package.json +++ b/sdks/cli/platforms/linux-x64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/cli-linux-x64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "sandbox-agent CLI binary for Linux x64", "license": "Apache-2.0", "repository": { diff --git a/sdks/cli/platforms/win32-x64/package.json b/sdks/cli/platforms/win32-x64/package.json index 4254298..0fec6cd 100644 --- a/sdks/cli/platforms/win32-x64/package.json +++ b/sdks/cli/platforms/win32-x64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/cli-win32-x64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "sandbox-agent CLI binary for Windows x64", "license": "Apache-2.0", "repository": { diff --git a/sdks/gigacode/package.json b/sdks/gigacode/package.json index 47249f4..80ed110 100644 --- a/sdks/gigacode/package.json +++ b/sdks/gigacode/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/gigacode", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "Gigacode CLI (sandbox-agent with OpenCode attach by default)", "license": "Apache-2.0", "repository": { diff --git a/sdks/gigacode/platforms/darwin-arm64/package.json b/sdks/gigacode/platforms/darwin-arm64/package.json index 35c5199..5a347ba 100644 --- a/sdks/gigacode/platforms/darwin-arm64/package.json +++ b/sdks/gigacode/platforms/darwin-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/gigacode-darwin-arm64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "gigacode CLI binary for macOS arm64", "license": "Apache-2.0", "repository": { diff --git a/sdks/gigacode/platforms/darwin-x64/package.json b/sdks/gigacode/platforms/darwin-x64/package.json index cd662f5..976bdb4 100644 --- a/sdks/gigacode/platforms/darwin-x64/package.json +++ b/sdks/gigacode/platforms/darwin-x64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/gigacode-darwin-x64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "gigacode CLI binary for macOS x64", "license": "Apache-2.0", "repository": { diff --git a/sdks/gigacode/platforms/linux-arm64/package.json b/sdks/gigacode/platforms/linux-arm64/package.json index 7c65185..94ee741 100644 --- a/sdks/gigacode/platforms/linux-arm64/package.json +++ b/sdks/gigacode/platforms/linux-arm64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/gigacode-linux-arm64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "gigacode CLI binary for Linux arm64", "license": "Apache-2.0", "repository": { diff --git a/sdks/gigacode/platforms/linux-x64/package.json b/sdks/gigacode/platforms/linux-x64/package.json index de9936a..e6c8f36 100644 --- a/sdks/gigacode/platforms/linux-x64/package.json +++ b/sdks/gigacode/platforms/linux-x64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/gigacode-linux-x64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "gigacode CLI binary for Linux x64", "license": "Apache-2.0", "repository": { diff --git a/sdks/gigacode/platforms/win32-x64/package.json b/sdks/gigacode/platforms/win32-x64/package.json index 74a1572..4458d3b 100644 --- a/sdks/gigacode/platforms/win32-x64/package.json +++ b/sdks/gigacode/platforms/win32-x64/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/gigacode-win32-x64", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "gigacode CLI binary for Windows x64", "license": "Apache-2.0", "repository": { diff --git a/sdks/persist-indexeddb/package.json b/sdks/persist-indexeddb/package.json index 7dd6fb6..98c59c7 100644 --- a/sdks/persist-indexeddb/package.json +++ b/sdks/persist-indexeddb/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/persist-indexeddb", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "IndexedDB persistence driver for the Sandbox Agent TypeScript SDK (DEPRECATED)", "license": "Apache-2.0", "repository": { diff --git a/sdks/persist-postgres/package.json b/sdks/persist-postgres/package.json index 1115993..3ffba1b 100644 --- a/sdks/persist-postgres/package.json +++ b/sdks/persist-postgres/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/persist-postgres", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "PostgreSQL persistence driver for the Sandbox Agent TypeScript SDK (DEPRECATED)", "license": "Apache-2.0", "repository": { diff --git a/sdks/persist-rivet/package.json b/sdks/persist-rivet/package.json index 0ab7a54..a8ea332 100644 --- a/sdks/persist-rivet/package.json +++ b/sdks/persist-rivet/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/persist-rivet", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "Rivet Actor persistence driver for the Sandbox Agent TypeScript SDK (DEPRECATED)", "license": "Apache-2.0", "repository": { diff --git a/sdks/persist-sqlite/package.json b/sdks/persist-sqlite/package.json index f276097..c0a3133 100644 --- a/sdks/persist-sqlite/package.json +++ b/sdks/persist-sqlite/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/persist-sqlite", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "SQLite persistence driver for the Sandbox Agent TypeScript SDK (DEPRECATED)", "license": "Apache-2.0", "repository": { diff --git a/sdks/react/package.json b/sdks/react/package.json index 1e4f833..cb4cf7b 100644 --- a/sdks/react/package.json +++ b/sdks/react/package.json @@ -1,6 +1,6 @@ { "name": "@sandbox-agent/react", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "React components for Sandbox Agent frontend integrations", "license": "Apache-2.0", "repository": { diff --git a/sdks/typescript/package.json b/sdks/typescript/package.json index db17bd8..afd0190 100644 --- a/sdks/typescript/package.json +++ b/sdks/typescript/package.json @@ -1,6 +1,6 @@ { "name": "sandbox-agent", - "version": "0.5.0-rc.1", + "version": "0.4.2", "description": "Universal API for automatic coding agents in sandboxes. Supports Claude Code, Codex, OpenCode, and Amp.", "license": "Apache-2.0", "repository": { @@ -46,12 +46,17 @@ "./computesdk": { "types": "./dist/providers/computesdk.d.ts", "import": "./dist/providers/computesdk.js" + }, + "./sprites": { + "types": "./dist/providers/sprites.d.ts", + "import": "./dist/providers/sprites.js" } }, "peerDependencies": { "@cloudflare/sandbox": ">=0.1.0", "@daytonaio/sdk": ">=0.12.0", "@e2b/code-interpreter": ">=1.0.0", + "@fly/sprites": ">=0.0.1", "@vercel/sandbox": ">=0.1.0", "dockerode": ">=4.0.0", "get-port": ">=7.0.0", @@ -68,6 +73,9 @@ "@e2b/code-interpreter": { "optional": true }, + "@fly/sprites": { + "optional": true + }, "@vercel/sandbox": { "optional": true }, @@ -104,6 +112,7 @@ "@cloudflare/sandbox": ">=0.1.0", "@daytonaio/sdk": ">=0.12.0", "@e2b/code-interpreter": ">=1.0.0", + "@fly/sprites": ">=0.0.1", "@types/dockerode": "^4.0.0", "@types/node": "^22.0.0", "@types/ws": "^8.18.1", diff --git a/sdks/typescript/src/client.ts b/sdks/typescript/src/client.ts index df66400..47e6dc3 100644 --- a/sdks/typescript/src/client.ts +++ b/sdks/typescript/src/client.ts @@ -1127,7 +1127,7 @@ export class SandboxAgent { const localSessionId = request.id?.trim() || randomId(); const live = await this.getLiveConnection(request.agent.trim()); - const sessionInit = normalizeSessionInit(request.sessionInit, request.cwd); + const sessionInit = normalizeSessionInit(request.sessionInit, request.cwd, this.sandboxProvider?.defaultCwd); const response = await live.createRemoteSession(localSessionId, sessionInit); @@ -1183,7 +1183,7 @@ export class SandboxAgent { const replaySource = await this.collectReplayEvents(existing.id, this.replayMaxEvents); const replayText = buildReplayText(replaySource, this.replayMaxChars); - const recreated = await live.createRemoteSession(existing.id, normalizeSessionInit(existing.sessionInit)); + const recreated = await live.createRemoteSession(existing.id, normalizeSessionInit(existing.sessionInit, undefined, this.sandboxProvider?.defaultCwd)); const updated: SessionRecord = { ...existing, @@ -2657,17 +2657,21 @@ function toAgentQuery(options: AgentQueryOptions | undefined): Record | undefined, cwdShorthand?: string): Omit { +function normalizeSessionInit( + value: Omit | undefined, + cwdShorthand?: string, + providerDefaultCwd?: string, +): Omit { if (!value) { return { - cwd: cwdShorthand ?? defaultCwd(), + cwd: cwdShorthand ?? providerDefaultCwd ?? defaultCwd(), mcpServers: [], }; } return { ...value, - cwd: value.cwd ?? cwdShorthand ?? defaultCwd(), + cwd: value.cwd ?? cwdShorthand ?? providerDefaultCwd ?? defaultCwd(), mcpServers: value.mcpServers ?? [], }; } diff --git a/sdks/typescript/src/index.ts b/sdks/typescript/src/index.ts index 8c05760..cd4611b 100644 --- a/sdks/typescript/src/index.ts +++ b/sdks/typescript/src/index.ts @@ -147,3 +147,9 @@ export type { SandboxAgentSpawnLogMode, SandboxAgentSpawnOptions, } from "./spawn.ts"; + +export type { + SpritesProviderOptions, + SpritesCreateOverrides, + SpritesClientOverrides, +} from "./providers/sprites.ts"; diff --git a/sdks/typescript/src/providers/cloudflare.ts b/sdks/typescript/src/providers/cloudflare.ts index c17adfc..7f0bd25 100644 --- a/sdks/typescript/src/providers/cloudflare.ts +++ b/sdks/typescript/src/providers/cloudflare.ts @@ -36,6 +36,7 @@ export function cloudflare(options: CloudflareProviderOptions): SandboxProvider return { name: "cloudflare", + defaultCwd: "/root", async create(): Promise { if (typeof sdk.create !== "function") { throw new Error('sandbox provider "cloudflare" requires a sdk with a `create()` method.'); diff --git a/sdks/typescript/src/providers/computesdk.ts b/sdks/typescript/src/providers/computesdk.ts index 7bca7ca..3ebb0da 100644 --- a/sdks/typescript/src/providers/computesdk.ts +++ b/sdks/typescript/src/providers/computesdk.ts @@ -1,25 +1,32 @@ -import { compute } from "computesdk"; +import { compute, type CreateSandboxOptions } from "computesdk"; import type { SandboxProvider } from "./types.ts"; import { DEFAULT_AGENTS, SANDBOX_AGENT_INSTALL_SCRIPT } from "./shared.ts"; const DEFAULT_AGENT_PORT = 3000; +type ComputeCreateOverrides = Partial; + export interface ComputeSdkProviderOptions { - create?: { - envs?: Record; - }; + create?: ComputeCreateOverrides | (() => ComputeCreateOverrides | Promise); agentPort?: number; } +async function resolveCreateOptions(value: ComputeSdkProviderOptions["create"]): Promise { + if (!value) return {}; + return typeof value === "function" ? await value() : value; +} + export function computesdk(options: ComputeSdkProviderOptions = {}): SandboxProvider { const agentPort = options.agentPort ?? DEFAULT_AGENT_PORT; return { name: "computesdk", + defaultCwd: "/root", async create(): Promise { - const envs = options.create?.envs; + const createOpts = await resolveCreateOptions(options.create); const sandbox = await compute.sandbox.create({ - envs: envs && Object.keys(envs).length > 0 ? envs : undefined, + ...createOpts, + envs: createOpts.envs && Object.keys(createOpts.envs).length > 0 ? createOpts.envs : undefined, }); const run = async (cmd: string, runOptions?: { background?: boolean }) => { diff --git a/sdks/typescript/src/providers/daytona.ts b/sdks/typescript/src/providers/daytona.ts index 19026de..7df740c 100644 --- a/sdks/typescript/src/providers/daytona.ts +++ b/sdks/typescript/src/providers/daytona.ts @@ -4,6 +4,7 @@ import { DEFAULT_SANDBOX_AGENT_IMAGE, buildServerStartCommand } from "./shared.t const DEFAULT_AGENT_PORT = 3000; const DEFAULT_PREVIEW_TTL_SECONDS = 4 * 60 * 60; +const DEFAULT_CWD = "/home/sandbox"; type DaytonaCreateParams = NonNullable[0]>; @@ -11,8 +12,9 @@ type DaytonaCreateOverrides = Partial; export interface DaytonaProviderOptions { create?: DaytonaCreateOverrides | (() => DaytonaCreateOverrides | Promise); - image?: string; + image?: DaytonaCreateParams["image"]; agentPort?: number; + cwd?: string; previewTtlSeconds?: number; deleteTimeoutSeconds?: number; } @@ -26,11 +28,13 @@ async function resolveCreateOptions(value: DaytonaProviderOptions["create"]): Pr export function daytona(options: DaytonaProviderOptions = {}): SandboxProvider { const agentPort = options.agentPort ?? DEFAULT_AGENT_PORT; const image = options.image ?? DEFAULT_SANDBOX_AGENT_IMAGE; + const cwd = options.cwd ?? DEFAULT_CWD; const previewTtlSeconds = options.previewTtlSeconds ?? DEFAULT_PREVIEW_TTL_SECONDS; const client = new Daytona(); return { name: "daytona", + defaultCwd: cwd, async create(): Promise { const createOpts = await resolveCreateOptions(options.create); const sandbox = await client.create({ diff --git a/sdks/typescript/src/providers/docker.ts b/sdks/typescript/src/providers/docker.ts index 9e49687..5db67bf 100644 --- a/sdks/typescript/src/providers/docker.ts +++ b/sdks/typescript/src/providers/docker.ts @@ -44,6 +44,7 @@ export function docker(options: DockerProviderOptions = {}): SandboxProvider { return { name: "docker", + defaultCwd: "/home/sandbox", async create(): Promise { const hostPort = await getPort(); const env = await resolveValue(options.env, []); diff --git a/sdks/typescript/src/providers/e2b.ts b/sdks/typescript/src/providers/e2b.ts index 8e99c64..c35a187 100644 --- a/sdks/typescript/src/providers/e2b.ts +++ b/sdks/typescript/src/providers/e2b.ts @@ -5,13 +5,16 @@ import { DEFAULT_AGENTS, SANDBOX_AGENT_INSTALL_SCRIPT } from "./shared.ts"; const DEFAULT_AGENT_PORT = 3000; const DEFAULT_TIMEOUT_MS = 3_600_000; +const SANDBOX_AGENT_PATH_EXPORT = 'export PATH="/usr/local/bin:$HOME/.local/bin:$PATH"'; type E2BCreateOverrides = Omit, "timeoutMs" | "autoPause">; type E2BConnectOverrides = Omit, "timeoutMs">; +type E2BTemplateOverride = string | (() => string | Promise); export interface E2BProviderOptions { create?: E2BCreateOverrides | (() => E2BCreateOverrides | Promise); connect?: E2BConnectOverrides | ((sandboxId: string) => E2BConnectOverrides | Promise); + template?: E2BTemplateOverride; agentPort?: number; timeoutMs?: number; autoPause?: boolean; @@ -28,6 +31,16 @@ async function resolveOptions(value: E2BProviderOptions["create"] | E2BProviderO return value; } +async function resolveTemplate(value: E2BTemplateOverride | undefined): Promise { + if (!value) return undefined; + return typeof value === "function" ? await value() : value; +} + +function buildShellCommand(command: string, strict = false): string { + const strictPrefix = strict ? "set -euo pipefail; " : ""; + return `bash -lc '${strictPrefix}${SANDBOX_AGENT_PATH_EXPORT}; ${command}'`; +} + export function e2b(options: E2BProviderOptions = {}): SandboxProvider { const agentPort = options.agentPort ?? DEFAULT_AGENT_PORT; const timeoutMs = options.timeoutMs ?? DEFAULT_TIMEOUT_MS; @@ -35,20 +48,29 @@ export function e2b(options: E2BProviderOptions = {}): SandboxProvider { return { name: "e2b", + defaultCwd: "/home/user", async create(): Promise { const createOpts = await resolveOptions(options.create); + const rawTemplate = typeof createOpts.template === "string" ? createOpts.template : undefined; + const restCreateOpts = { ...createOpts }; + delete restCreateOpts.template; + const template = (await resolveTemplate(options.template)) ?? rawTemplate; // eslint-disable-next-line @typescript-eslint/no-explicit-any - const sandbox = await Sandbox.betaCreate({ allowInternetAccess: true, ...createOpts, timeoutMs, autoPause } as any); + const sandbox = template + ? // eslint-disable-next-line @typescript-eslint/no-explicit-any + await Sandbox.betaCreate(template, { allowInternetAccess: true, ...restCreateOpts, timeoutMs, autoPause } as any) + : // eslint-disable-next-line @typescript-eslint/no-explicit-any + await Sandbox.betaCreate({ allowInternetAccess: true, ...restCreateOpts, timeoutMs, autoPause } as any); - await sandbox.commands.run(`curl -fsSL ${SANDBOX_AGENT_INSTALL_SCRIPT} | sh`).then((r) => { + await sandbox.commands.run(buildShellCommand(`curl -fsSL ${SANDBOX_AGENT_INSTALL_SCRIPT} | sh`, true)).then((r) => { if (r.exitCode !== 0) throw new Error(`e2b install failed:\n${r.stderr}`); }); for (const agent of DEFAULT_AGENTS) { - await sandbox.commands.run(`sandbox-agent install-agent ${agent}`).then((r) => { + await sandbox.commands.run(buildShellCommand(`sandbox-agent install-agent ${agent}`)).then((r) => { if (r.exitCode !== 0) throw new Error(`e2b agent install failed: ${agent}\n${r.stderr}`); }); } - await sandbox.commands.run(`sandbox-agent server --no-token --host 0.0.0.0 --port ${agentPort}`, { background: true, timeoutMs: 0 }); + await sandbox.commands.run(buildShellCommand(`sandbox-agent server --no-token --host 0.0.0.0 --port ${agentPort}`), { background: true, timeoutMs: 0 }); return sandbox.sandboxId; }, @@ -84,7 +106,7 @@ export function e2b(options: E2BProviderOptions = {}): SandboxProvider { async ensureServer(sandboxId: string): Promise { const connectOpts = await resolveOptions(options.connect, sandboxId); const sandbox = await Sandbox.connect(sandboxId, { ...connectOpts, timeoutMs } as SandboxConnectOpts); - await sandbox.commands.run(`sandbox-agent server --no-token --host 0.0.0.0 --port ${agentPort}`, { background: true, timeoutMs: 0 }); + await sandbox.commands.run(buildShellCommand(`sandbox-agent server --no-token --host 0.0.0.0 --port ${agentPort}`), { background: true, timeoutMs: 0 }); }, }; } diff --git a/sdks/typescript/src/providers/modal.ts b/sdks/typescript/src/providers/modal.ts index 394272b..fad98c2 100644 --- a/sdks/typescript/src/providers/modal.ts +++ b/sdks/typescript/src/providers/modal.ts @@ -1,49 +1,59 @@ -import { ModalClient } from "modal"; +import { ModalClient, type Image, type SandboxCreateParams } from "modal"; import type { SandboxProvider } from "./types.ts"; -import { DEFAULT_AGENTS, SANDBOX_AGENT_INSTALL_SCRIPT } from "./shared.ts"; +import { DEFAULT_SANDBOX_AGENT_IMAGE } from "./shared.ts"; const DEFAULT_AGENT_PORT = 3000; const DEFAULT_APP_NAME = "sandbox-agent"; const DEFAULT_MEMORY_MIB = 2048; +type ModalCreateOverrides = Omit, "secrets" | "encryptedPorts"> & { + secrets?: Record; + encryptedPorts?: number[]; + appName?: string; +}; + export interface ModalProviderOptions { - create?: { - secrets?: Record; - appName?: string; - memoryMiB?: number; - }; + create?: ModalCreateOverrides | (() => ModalCreateOverrides | Promise); + image?: string | Image; agentPort?: number; } +async function resolveCreateOptions(value: ModalProviderOptions["create"]): Promise { + if (!value) return {}; + return typeof value === "function" ? await value() : value; +} + export function modal(options: ModalProviderOptions = {}): SandboxProvider { const agentPort = options.agentPort ?? DEFAULT_AGENT_PORT; - const appName = options.create?.appName ?? DEFAULT_APP_NAME; - const memoryMiB = options.create?.memoryMiB ?? DEFAULT_MEMORY_MIB; const client = new ModalClient(); return { name: "modal", + defaultCwd: "/root", async create(): Promise { + const createOpts = await resolveCreateOptions(options.create); + const appName = createOpts.appName ?? DEFAULT_APP_NAME; + const baseImage = options.image ?? DEFAULT_SANDBOX_AGENT_IMAGE; const app = await client.apps.fromName(appName, { createIfMissing: true }); - // Pre-install sandbox-agent and agents in the image so they are cached - // across sandbox creates and don't need to be installed at runtime. - const installAgentCmds = DEFAULT_AGENTS.map((agent) => `RUN sandbox-agent install-agent ${agent}`); - const image = client.images - .fromRegistry("node:22-slim") - .dockerfileCommands([ - "RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/*", - `RUN curl -fsSL ${SANDBOX_AGENT_INSTALL_SCRIPT} | sh`, - ...installAgentCmds, - ]); + // The default `-full` base image already includes sandbox-agent and all + // agents pre-installed, so no additional dockerfile commands are needed. + const image = typeof baseImage === "string" ? client.images.fromRegistry(baseImage) : baseImage; - const envVars = options.create?.secrets ?? {}; + const envVars = createOpts.secrets ?? {}; const secrets = Object.keys(envVars).length > 0 ? [await client.secrets.fromObject(envVars)] : []; + const sandboxCreateOpts = { ...createOpts }; + delete sandboxCreateOpts.appName; + delete sandboxCreateOpts.secrets; + + const extraPorts = createOpts.encryptedPorts ?? []; + delete sandboxCreateOpts.encryptedPorts; const sb = await client.sandboxes.create(app, image, { - encryptedPorts: [agentPort], + ...sandboxCreateOpts, + encryptedPorts: [agentPort, ...extraPorts], secrets, - memoryMiB, + memoryMiB: sandboxCreateOpts.memoryMiB ?? DEFAULT_MEMORY_MIB, }); // Start the server as a long-running exec process. We intentionally diff --git a/sdks/typescript/src/providers/shared.ts b/sdks/typescript/src/providers/shared.ts index c0f7b1c..eda610e 100644 --- a/sdks/typescript/src/providers/shared.ts +++ b/sdks/typescript/src/providers/shared.ts @@ -1,5 +1,7 @@ -export const DEFAULT_SANDBOX_AGENT_IMAGE = "rivetdev/sandbox-agent:0.5.0-rc.1-full"; -export const SANDBOX_AGENT_INSTALL_SCRIPT = "https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh"; +export const SANDBOX_AGENT_VERSION = "0.5.0-rc.2"; +export const DEFAULT_SANDBOX_AGENT_IMAGE = `rivetdev/sandbox-agent:${SANDBOX_AGENT_VERSION}-full`; +export const SANDBOX_AGENT_INSTALL_SCRIPT = `https://releases.rivet.dev/sandbox-agent/${SANDBOX_AGENT_VERSION}/install.sh`; +export const SANDBOX_AGENT_NPX_SPEC = `@sandbox-agent/cli@${SANDBOX_AGENT_VERSION}`; export const DEFAULT_AGENTS = ["claude", "codex"] as const; export function buildServerStartCommand(port: number): string { diff --git a/sdks/typescript/src/providers/sprites.ts b/sdks/typescript/src/providers/sprites.ts new file mode 100644 index 0000000..aebc3db --- /dev/null +++ b/sdks/typescript/src/providers/sprites.ts @@ -0,0 +1,267 @@ +import { ExecError, SpritesClient, type ClientOptions as SpritesClientOptions, type SpriteConfig } from "@fly/sprites"; +import { SandboxDestroyedError } from "../client.ts"; +import type { SandboxProvider } from "./types.ts"; +import { SANDBOX_AGENT_NPX_SPEC } from "./shared.ts"; + +const DEFAULT_AGENT_PORT = 8080; +const DEFAULT_SERVICE_NAME = "sandbox-agent"; +const DEFAULT_NAME_PREFIX = "sandbox-agent"; +const DEFAULT_SERVICE_START_DURATION = "10m"; + +export interface SpritesCreateOverrides { + name?: string; + config?: SpriteConfig; +} + +export type SpritesClientOverrides = Partial; + +export interface SpritesProviderOptions { + token?: string | (() => string | Promise); + client?: SpritesClientOverrides | (() => SpritesClientOverrides | Promise); + create?: SpritesCreateOverrides | (() => SpritesCreateOverrides | Promise); + env?: Record | (() => Record | Promise>); + installAgents?: readonly string[]; + agentPort?: number; + serviceName?: string; + serviceStartDuration?: string; + namePrefix?: string; +} + +type SpritesSandboxProvider = SandboxProvider & { + getToken(sandboxId: string): Promise; +}; + +interface SpritesService { + cmd?: string; + args?: string[]; + http_port?: number | null; + state?: { + status?: string; + }; +} + +async function resolveValue(value: T | (() => T | Promise) | undefined, fallback: T): Promise { + if (value === undefined) { + return fallback; + } + if (typeof value === "function") { + return await (value as () => T | Promise)(); + } + return value; +} + +async function resolveToken(value: SpritesProviderOptions["token"]): Promise { + const token = await resolveValue(value, process.env.SPRITES_API_KEY ?? process.env.SPRITE_TOKEN ?? process.env.SPRITES_TOKEN ?? ""); + if (!token) { + throw new Error("sprites provider requires a token. Set SPRITES_API_KEY (or SPRITE_TOKEN) or pass `token`."); + } + return token; +} + +function createSpritesClient(token: string, options: SpritesClientOverrides): SpritesClient { + return new SpritesClient(token, options); +} + +function generateSpriteName(prefix: string): string { + const suffix = + typeof globalThis.crypto?.randomUUID === "function" + ? globalThis.crypto.randomUUID().slice(0, 8) + : `${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`; + return `${prefix}-${suffix}`.toLowerCase(); +} + +function isSpriteNotFoundError(error: unknown): boolean { + return error instanceof Error && error.message.startsWith("Sprite not found:"); +} + +function shellQuote(value: string): string { + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +function buildServiceCommand(env: Record, port: number): string { + const exportParts: string[] = []; + for (const [key, value] of Object.entries(env)) { + if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(key)) { + throw new Error(`sprites provider received an invalid environment variable name: ${key}`); + } + exportParts.push(`export ${key}=${shellQuote(value)}`); + } + + exportParts.push(`exec npx -y ${SANDBOX_AGENT_NPX_SPEC} server --no-token --host 0.0.0.0 --port ${port}`); + return exportParts.join("; "); +} + +async function runSpriteCommand(sprite: ReturnType, file: string, args: string[], env?: Record): Promise { + try { + const result = await sprite.execFile(file, args, env ? { env } : undefined); + if (result.exitCode !== 0) { + throw new Error(`sprites command failed: ${file} ${args.join(" ")}`); + } + } catch (error) { + if (error instanceof ExecError) { + throw new Error( + `sprites command failed: ${file} ${args.join(" ")} (exit ${error.exitCode})\nstdout:\n${String(error.stdout)}\nstderr:\n${String(error.stderr)}`, + { cause: error }, + ); + } + throw error; + } +} + +async function fetchService(client: SpritesClient, spriteName: string, serviceName: string): Promise { + const response = await fetch(`${client.baseURL}/v1/sprites/${encodeURIComponent(spriteName)}/services/${encodeURIComponent(serviceName)}`, { + method: "GET", + headers: { + Authorization: `Bearer ${client.token}`, + }, + }); + + if (response.status === 404) { + return undefined; + } + + if (!response.ok) { + throw new Error(`sprites service lookup failed (status ${response.status}): ${await response.text()}`); + } + + return (await response.json()) as SpritesService; +} + +async function upsertService(client: SpritesClient, spriteName: string, serviceName: string, port: number, command: string): Promise { + const existing = await fetchService(client, spriteName, serviceName); + const expectedArgs = ["-lc", command]; + const isCurrent = existing?.cmd === "bash" && existing.http_port === port && JSON.stringify(existing.args ?? []) === JSON.stringify(expectedArgs); + if (isCurrent) { + return; + } + + const response = await fetch(`${client.baseURL}/v1/sprites/${encodeURIComponent(spriteName)}/services/${encodeURIComponent(serviceName)}`, { + method: "PUT", + headers: { + Authorization: `Bearer ${client.token}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + cmd: "bash", + args: expectedArgs, + http_port: port, + }), + }); + + if (!response.ok) { + throw new Error(`sprites service upsert failed (status ${response.status}): ${await response.text()}`); + } +} + +async function startServiceIfNeeded(client: SpritesClient, spriteName: string, serviceName: string, duration: string): Promise { + const existing = await fetchService(client, spriteName, serviceName); + if (existing?.state?.status === "running" || existing?.state?.status === "starting") { + return; + } + + const response = await fetch( + `${client.baseURL}/v1/sprites/${encodeURIComponent(spriteName)}/services/${encodeURIComponent(serviceName)}/start?duration=${encodeURIComponent(duration)}`, + { + method: "POST", + headers: { + Authorization: `Bearer ${client.token}`, + }, + }, + ); + + if (!response.ok) { + throw new Error(`sprites service start failed (status ${response.status}): ${await response.text()}`); + } + + await response.text(); +} + +async function ensureService( + client: SpritesClient, + spriteName: string, + serviceName: string, + port: number, + duration: string, + env: Record, +): Promise { + const command = buildServiceCommand(env, port); + await upsertService(client, spriteName, serviceName, port, command); + await startServiceIfNeeded(client, spriteName, serviceName, duration); +} + +export function sprites(options: SpritesProviderOptions = {}): SandboxProvider { + const agentPort = options.agentPort ?? DEFAULT_AGENT_PORT; + const serviceName = options.serviceName ?? DEFAULT_SERVICE_NAME; + const serviceStartDuration = options.serviceStartDuration ?? DEFAULT_SERVICE_START_DURATION; + const namePrefix = options.namePrefix ?? DEFAULT_NAME_PREFIX; + const installAgents = [...(options.installAgents ?? [])]; + + const getClient = async (): Promise => { + const token = await resolveToken(options.token); + const clientOptions = await resolveValue(options.client, {}); + return createSpritesClient(token, clientOptions); + }; + + const getServerEnv = async (): Promise> => { + return await resolveValue(options.env, {}); + }; + + const provider: SpritesSandboxProvider = { + name: "sprites", + defaultCwd: "/home/sprite", + async create(): Promise { + const client = await getClient(); + const createOptions = await resolveValue(options.create, {}); + const spriteName = createOptions.name ?? generateSpriteName(namePrefix); + const sprite = await client.createSprite(spriteName, createOptions.config); + + const serverEnv = await getServerEnv(); + for (const agent of installAgents) { + await runSpriteCommand(sprite, "bash", ["-lc", `npx -y ${SANDBOX_AGENT_NPX_SPEC} install-agent ${agent}`], serverEnv); + } + + await ensureService(client, spriteName, serviceName, agentPort, serviceStartDuration, serverEnv); + return sprite.name; + }, + async destroy(sandboxId: string): Promise { + const client = await getClient(); + try { + await client.deleteSprite(sandboxId); + } catch (error) { + if (isSpriteNotFoundError(error) || (error instanceof Error && error.message.includes("status 404"))) { + return; + } + throw error; + } + }, + async reconnect(sandboxId: string): Promise { + const client = await getClient(); + try { + await client.getSprite(sandboxId); + } catch (error) { + if (isSpriteNotFoundError(error)) { + throw new SandboxDestroyedError(sandboxId, "sprites", { cause: error }); + } + throw error; + } + }, + async getUrl(sandboxId: string): Promise { + const client = await getClient(); + const sprite = await client.getSprite(sandboxId); + const url = (sprite as { url?: string }).url; + if (!url) { + throw new Error(`sprites API did not return a URL for sprite: ${sandboxId}`); + } + return url; + }, + async ensureServer(sandboxId: string): Promise { + const client = await getClient(); + await ensureService(client, sandboxId, serviceName, agentPort, serviceStartDuration, await getServerEnv()); + }, + async getToken(): Promise { + return await resolveToken(options.token); + }, + }; + + return provider; +} diff --git a/sdks/typescript/src/providers/types.ts b/sdks/typescript/src/providers/types.ts index ab996e1..9c925d1 100644 --- a/sdks/typescript/src/providers/types.ts +++ b/sdks/typescript/src/providers/types.ts @@ -47,4 +47,11 @@ export interface SandboxProvider { * (e.g. the duplicate process exits on port conflict). */ ensureServer?(sandboxId: string): Promise; + + /** + * Default working directory for sessions when the caller does not specify + * one. Remote providers should set this to a path that exists inside the + * sandbox (e.g. '/home/user'). When omitted, falls back to process.cwd(). + */ + defaultCwd?: string; } diff --git a/sdks/typescript/src/providers/vercel.ts b/sdks/typescript/src/providers/vercel.ts index 09d41cf..905b8f1 100644 --- a/sdks/typescript/src/providers/vercel.ts +++ b/sdks/typescript/src/providers/vercel.ts @@ -30,6 +30,7 @@ export function vercel(options: VercelProviderOptions = {}): SandboxProvider { return { name: "vercel", + defaultCwd: "/home/vercel-sandbox", async create(): Promise { const sandbox = await Sandbox.create((await resolveCreateOptions(options.create, agentPort)) as Parameters[0]); diff --git a/sdks/typescript/tests/provider-lifecycle.test.ts b/sdks/typescript/tests/provider-lifecycle.test.ts index 06c85f5..65ea733 100644 --- a/sdks/typescript/tests/provider-lifecycle.test.ts +++ b/sdks/typescript/tests/provider-lifecycle.test.ts @@ -16,6 +16,25 @@ const e2bMocks = vi.hoisted(() => { }; }); +const modalMocks = vi.hoisted(() => ({ + appsFromName: vi.fn(), + imageFromRegistry: vi.fn(), + secretFromObject: vi.fn(), + sandboxCreate: vi.fn(), + sandboxFromId: vi.fn(), +})); + +const computeSdkMocks = vi.hoisted(() => ({ + create: vi.fn(), + getById: vi.fn(), +})); + +const spritesMocks = vi.hoisted(() => ({ + createSprite: vi.fn(), + getSprite: vi.fn(), + deleteSprite: vi.fn(), +})); + vi.mock("@e2b/code-interpreter", () => ({ NotFoundError: e2bMocks.MockNotFoundError, Sandbox: { @@ -24,7 +43,47 @@ vi.mock("@e2b/code-interpreter", () => ({ }, })); +vi.mock("modal", () => ({ + ModalClient: class MockModalClient { + apps = { fromName: modalMocks.appsFromName }; + images = { fromRegistry: modalMocks.imageFromRegistry }; + secrets = { fromObject: modalMocks.secretFromObject }; + sandboxes = { + create: modalMocks.sandboxCreate, + fromId: modalMocks.sandboxFromId, + }; + }, +})); + +vi.mock("computesdk", () => ({ + compute: { + sandbox: { + create: computeSdkMocks.create, + getById: computeSdkMocks.getById, + }, + }, +})); + +vi.mock("@fly/sprites", () => ({ + SpritesClient: class MockSpritesClient { + readonly token: string; + readonly baseURL: string; + + constructor(token: string, options: { baseURL?: string } = {}) { + this.token = token; + this.baseURL = options.baseURL ?? "https://api.sprites.dev"; + } + + createSprite = spritesMocks.createSprite; + getSprite = spritesMocks.getSprite; + deleteSprite = spritesMocks.deleteSprite; + }, +})); + import { e2b } from "../src/providers/e2b.ts"; +import { modal } from "../src/providers/modal.ts"; +import { computesdk } from "../src/providers/computesdk.ts"; +import { sprites } from "../src/providers/sprites.ts"; function createFetch(): typeof fetch { return async () => new Response(null, { status: 200 }); @@ -56,6 +115,29 @@ function createMockSandbox() { }; } +function createMockModalImage() { + return { + dockerfileCommands: vi.fn(function dockerfileCommands() { + return this; + }), + }; +} + +beforeEach(() => { + e2bMocks.betaCreate.mockReset(); + e2bMocks.connect.mockReset(); + modalMocks.appsFromName.mockReset(); + modalMocks.imageFromRegistry.mockReset(); + modalMocks.secretFromObject.mockReset(); + modalMocks.sandboxCreate.mockReset(); + modalMocks.sandboxFromId.mockReset(); + computeSdkMocks.create.mockReset(); + computeSdkMocks.getById.mockReset(); + spritesMocks.createSprite.mockReset(); + spritesMocks.getSprite.mockReset(); + spritesMocks.deleteSprite.mockReset(); +}); + describe("SandboxAgent provider lifecycle", () => { it("reconnects an existing sandbox before ensureServer", async () => { const order: string[] = []; @@ -124,11 +206,6 @@ describe("SandboxAgent provider lifecycle", () => { }); describe("e2b provider", () => { - beforeEach(() => { - e2bMocks.betaCreate.mockReset(); - e2bMocks.connect.mockReset(); - }); - it("creates sandboxes with betaCreate, autoPause, and the default timeout", async () => { const sandbox = createMockSandbox(); e2bMocks.betaCreate.mockResolvedValue(sandbox); @@ -190,4 +267,245 @@ describe("e2b provider", () => { await expect(provider.reconnect?.("missing-sandbox")).rejects.toBeInstanceOf(SandboxDestroyedError); }); + + it("passes a configured template to betaCreate", async () => { + const sandbox = createMockSandbox(); + e2bMocks.betaCreate.mockResolvedValue(sandbox); + + const provider = e2b({ + template: "my-template", + create: { envs: { ANTHROPIC_API_KEY: "test" } }, + }); + + await provider.create(); + + expect(e2bMocks.betaCreate).toHaveBeenCalledWith( + "my-template", + expect.objectContaining({ + allowInternetAccess: true, + envs: { ANTHROPIC_API_KEY: "test" }, + timeoutMs: 3_600_000, + }), + ); + }); + + it("accepts legacy create.template values from plain JavaScript", async () => { + const sandbox = createMockSandbox(); + e2bMocks.betaCreate.mockResolvedValue(sandbox); + + const provider = e2b({ + create: { template: "legacy-template" } as never, + }); + + await provider.create(); + + expect(e2bMocks.betaCreate).toHaveBeenCalledWith( + "legacy-template", + expect.objectContaining({ + allowInternetAccess: true, + timeoutMs: 3_600_000, + }), + ); + }); +}); + +describe("modal provider", () => { + it("uses the configured base image when building the sandbox image", async () => { + const app = { appId: "app-123" }; + const image = createMockModalImage(); + const sandbox = { + sandboxId: "sbx-modal", + exec: vi.fn(), + }; + + modalMocks.appsFromName.mockResolvedValue(app); + modalMocks.imageFromRegistry.mockReturnValue(image); + modalMocks.sandboxCreate.mockResolvedValue(sandbox); + + const provider = modal({ + image: "python:3.12-slim", + create: { + appName: "custom-app", + secrets: { OPENAI_API_KEY: "test" }, + }, + }); + + await expect(provider.create()).resolves.toBe("sbx-modal"); + + expect(modalMocks.appsFromName).toHaveBeenCalledWith("custom-app", { createIfMissing: true }); + expect(modalMocks.imageFromRegistry).toHaveBeenCalledWith("python:3.12-slim"); + expect(image.dockerfileCommands).not.toHaveBeenCalled(); + expect(modalMocks.sandboxCreate).toHaveBeenCalledWith( + app, + image, + expect.objectContaining({ + encryptedPorts: [3000], + memoryMiB: 2048, + }), + ); + }); +}); + +describe("computesdk provider", () => { + it("passes image and template options through to compute.sandbox.create", async () => { + const sandbox = { + sandboxId: "sbx-compute", + runCommand: vi.fn(async () => ({ exitCode: 0, stderr: "" })), + }; + computeSdkMocks.create.mockResolvedValue(sandbox); + + const provider = computesdk({ + create: { + envs: { ANTHROPIC_API_KEY: "test" }, + image: "ghcr.io/example/sandbox-agent:latest", + templateId: "tmpl-123", + }, + }); + + await expect(provider.create()).resolves.toBe("sbx-compute"); + + expect(computeSdkMocks.create).toHaveBeenCalledWith( + expect.objectContaining({ + envs: { ANTHROPIC_API_KEY: "test" }, + image: "ghcr.io/example/sandbox-agent:latest", + templateId: "tmpl-123", + }), + ); + }); +}); + +describe("sprites provider", () => { + it("creates a sprite, installs sandbox-agent, and configures the managed service", async () => { + const sprite = { + name: "sprite-1", + execFile: vi.fn(async () => ({ stdout: "", stderr: "", exitCode: 0 })), + }; + spritesMocks.createSprite.mockResolvedValue(sprite); + + const fetchMock = vi + .fn() + .mockResolvedValueOnce(new Response(null, { status: 404 })) + .mockResolvedValueOnce(new Response(JSON.stringify({}), { status: 200 })) + .mockResolvedValueOnce(new Response(JSON.stringify({ state: { status: "stopped" } }), { status: 200 })) + .mockResolvedValueOnce(new Response("", { status: 200 })); + vi.stubGlobal("fetch", fetchMock); + + const provider = sprites({ + token: "sprite-token", + create: { + name: "sprite-1", + }, + env: { + OPENAI_API_KEY: "test'value", + }, + }); + + await expect(provider.create()).resolves.toBe("sprite-1"); + + expect(spritesMocks.createSprite).toHaveBeenCalledWith("sprite-1", undefined); + expect(sprite.execFile).not.toHaveBeenCalled(); + + const putCall = fetchMock.mock.calls.find(([url, init]) => String(url).includes("/services/sandbox-agent") && init?.method === "PUT"); + expect(putCall).toBeDefined(); + expect(String(putCall?.[0])).toContain("/v1/sprites/sprite-1/services/sandbox-agent"); + expect(putCall?.[1]?.headers).toMatchObject({ + Authorization: "Bearer sprite-token", + "Content-Type": "application/json", + }); + const serviceRequest = JSON.parse(String(putCall?.[1]?.body)) as { args: string[] }; + expect(serviceRequest.args[1]).toContain("exec npx -y @sandbox-agent/cli@0.5.0-rc.2 server --no-token --host 0.0.0.0 --port 8080"); + expect(serviceRequest.args[1]).toContain("OPENAI_API_KEY='test'\\''value'"); + }); + + it("optionally installs agents through npx when requested", async () => { + const sprite = { + name: "sprite-1", + execFile: vi.fn(async () => ({ stdout: "", stderr: "", exitCode: 0 })), + }; + spritesMocks.createSprite.mockResolvedValue(sprite); + + const fetchMock = vi + .fn() + .mockResolvedValueOnce(new Response(null, { status: 404 })) + .mockResolvedValueOnce(new Response(JSON.stringify({}), { status: 200 })) + .mockResolvedValueOnce(new Response(JSON.stringify({ state: { status: "stopped" } }), { status: 200 })) + .mockResolvedValueOnce(new Response("", { status: 200 })); + vi.stubGlobal("fetch", fetchMock); + + const provider = sprites({ + token: "sprite-token", + create: { name: "sprite-1" }, + env: { OPENAI_API_KEY: "test" }, + installAgents: ["claude", "codex"], + }); + + await provider.create(); + + expect(sprite.execFile).toHaveBeenCalledWith("bash", ["-lc", "npx -y @sandbox-agent/cli@0.5.0-rc.2 install-agent claude"], { + env: { OPENAI_API_KEY: "test" }, + }); + expect(sprite.execFile).toHaveBeenCalledWith("bash", ["-lc", "npx -y @sandbox-agent/cli@0.5.0-rc.2 install-agent codex"], { + env: { OPENAI_API_KEY: "test" }, + }); + }); + + it("returns the sprite URL and provider token for authenticated access", async () => { + spritesMocks.getSprite.mockResolvedValue({ + name: "sprite-1", + url: "https://sprite-1.sprites.app", + }); + + const provider = sprites({ + token: "sprite-token", + }); + + await expect(provider.getUrl?.("sprite-1")).resolves.toBe("https://sprite-1.sprites.app"); + await expect((provider as SandboxProvider & { getToken: (sandboxId: string) => Promise }).getToken("sprite-1")).resolves.toBe("sprite-token"); + }); + + it("maps missing reconnect targets to SandboxDestroyedError", async () => { + spritesMocks.getSprite.mockRejectedValue(new Error("Sprite not found: missing-sprite")); + const provider = sprites({ + token: "sprite-token", + }); + + await expect(provider.reconnect?.("missing-sprite")).rejects.toBeInstanceOf(SandboxDestroyedError); + }); + + it("skips starting the service when the desired service is already running", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce( + new Response( + JSON.stringify({ + cmd: "bash", + args: ["-lc", "exec npx -y @sandbox-agent/cli@0.5.0-rc.2 server --no-token --host 0.0.0.0 --port 8080"], + http_port: 8080, + state: { status: "running" }, + }), + { status: 200 }, + ), + ) + .mockResolvedValueOnce( + new Response( + JSON.stringify({ + cmd: "bash", + args: ["-lc", "exec npx -y @sandbox-agent/cli@0.5.0-rc.2 server --no-token --host 0.0.0.0 --port 8080"], + http_port: 8080, + state: { status: "running" }, + }), + { status: 200 }, + ), + ); + vi.stubGlobal("fetch", fetchMock); + + const provider = sprites({ + token: "sprite-token", + }); + + await provider.ensureServer?.("sprite-1"); + + expect(fetchMock).toHaveBeenCalledTimes(2); + expect(fetchMock.mock.calls.every(([, init]) => init?.method === "GET")).toBe(true); + }); }); diff --git a/sdks/typescript/tests/providers.test.ts b/sdks/typescript/tests/providers.test.ts index d98672d..e6cb0a3 100644 --- a/sdks/typescript/tests/providers.test.ts +++ b/sdks/typescript/tests/providers.test.ts @@ -15,6 +15,7 @@ import { daytona } from "../src/providers/daytona.ts"; import { vercel } from "../src/providers/vercel.ts"; import { modal } from "../src/providers/modal.ts"; import { computesdk } from "../src/providers/computesdk.ts"; +import { sprites } from "../src/providers/sprites.ts"; import { prepareMockAgentDataHome } from "./helpers/mock-agent.ts"; const __dirname = dirname(fileURLToPath(import.meta.url)); @@ -35,10 +36,10 @@ function findBinary(): string | null { } const BINARY_PATH = findBinary(); -if (!BINARY_PATH) { - throw new Error("sandbox-agent binary not found. Build it (cargo build -p sandbox-agent) or set SANDBOX_AGENT_BIN."); -} -if (!process.env.SANDBOX_AGENT_BIN) { +// if (!BINARY_PATH) { +// throw new Error("sandbox-agent binary not found. Build it (cargo build -p sandbox-agent) or set SANDBOX_AGENT_BIN."); +// } +if (!process.env.SANDBOX_AGENT_BIN && BINARY_PATH) { process.env.SANDBOX_AGENT_BIN = BINARY_PATH; } @@ -47,7 +48,7 @@ function isModuleAvailable(name: string): boolean { _require.resolve(name); return true; } catch { - return false; + return existsSync(resolve(__dirname, "../node_modules", ...name.split("/"), "package.json")); } } @@ -69,6 +70,8 @@ interface ProviderEntry { name: string; /** Human-readable reasons this provider can't run, or empty if ready. */ skipReasons: string[]; + /** Human-readable reasons session tests can't run, or empty if ready. */ + sessionSkipReasons?: string[]; /** Return a fresh provider instance for a single test. */ createProvider: () => SandboxProvider; /** Optional per-provider setup (e.g. create temp dirs). Returns cleanup fn. */ @@ -79,6 +82,8 @@ interface ProviderEntry { startTimeoutMs?: number; /** Some providers (e.g. local) can verify the sandbox is gone after destroy. */ canVerifyDestroyedSandbox?: boolean; + /** Working directory to use for createSession/prompt tests. */ + sessionCwd?: string; /** * Whether session tests (createSession, prompt) should run. * The mock agent only works with local provider (requires mock-acp process binary). @@ -92,6 +97,10 @@ function missingEnvVars(...vars: string[]): string[] { return missing.length > 0 ? [`missing env: ${missing.join(", ")}`] : []; } +function missingAnyEnvVars(...vars: string[]): string[] { + return vars.some((v) => Boolean(process.env[v])) ? [] : [`missing env: one of ${vars.join(", ")}`]; +} + function missingModules(...modules: string[]): string[] { const missing = modules.filter((m) => !isModuleAvailable(m)); return missing.length > 0 ? [`missing npm packages: ${missing.join(", ")}`] : []; @@ -116,6 +125,7 @@ function buildProviders(): ProviderEntry[] { skipReasons: [], agent: "mock", canVerifyDestroyedSandbox: true, + sessionCwd: process.cwd(), sessionTestsEnabled: true, setup() { dataHome = mkdtempSync(join(tmpdir(), "sdk-provider-local-")); @@ -165,7 +175,6 @@ function buildProviders(): ProviderEntry[] { } // --- e2b --- - // Session tests disabled: see docker comment above (ACP protocol mismatch). { entries.push({ name: "e2b", @@ -173,7 +182,9 @@ function buildProviders(): ProviderEntry[] { agent: "claude", startTimeoutMs: 300_000, canVerifyDestroyedSandbox: false, - sessionTestsEnabled: false, + sessionSkipReasons: missingEnvVars("ANTHROPIC_API_KEY"), + sessionCwd: "/home/user", + sessionTestsEnabled: true, createProvider() { return e2b({ create: { envs: collectApiKeys() }, @@ -183,7 +194,6 @@ function buildProviders(): ProviderEntry[] { } // --- daytona --- - // Session tests disabled: see docker comment above (ACP protocol mismatch). { entries.push({ name: "daytona", @@ -191,7 +201,9 @@ function buildProviders(): ProviderEntry[] { agent: "claude", startTimeoutMs: 300_000, canVerifyDestroyedSandbox: false, - sessionTestsEnabled: false, + sessionSkipReasons: missingEnvVars("ANTHROPIC_API_KEY"), + sessionCwd: "/home/sandbox", + sessionTestsEnabled: true, createProvider() { return daytona({ create: { envVars: collectApiKeys() }, @@ -201,7 +213,6 @@ function buildProviders(): ProviderEntry[] { } // --- vercel --- - // Session tests disabled: see docker comment above (ACP protocol mismatch). { entries.push({ name: "vercel", @@ -219,7 +230,6 @@ function buildProviders(): ProviderEntry[] { } // --- modal --- - // Session tests disabled: see docker comment above (ACP protocol mismatch). { entries.push({ name: "modal", @@ -227,9 +237,12 @@ function buildProviders(): ProviderEntry[] { agent: "claude", startTimeoutMs: 300_000, canVerifyDestroyedSandbox: false, - sessionTestsEnabled: false, + sessionSkipReasons: missingEnvVars("ANTHROPIC_API_KEY"), + sessionCwd: "/root", + sessionTestsEnabled: true, createProvider() { return modal({ + image: process.env.SANDBOX_AGENT_MODAL_IMAGE, create: { secrets: collectApiKeys() }, }); }, @@ -237,7 +250,6 @@ function buildProviders(): ProviderEntry[] { } // --- computesdk --- - // Session tests disabled: see docker comment above (ACP protocol mismatch). { entries.push({ name: "computesdk", @@ -254,6 +266,28 @@ function buildProviders(): ProviderEntry[] { }); } + // --- sprites --- + { + entries.push({ + name: "sprites", + skipReasons: [...missingAnyEnvVars("SPRITES_API_KEY", "SPRITE_TOKEN", "SPRITES_TOKEN"), ...missingModules("@fly/sprites")], + agent: "claude", + startTimeoutMs: 300_000, + canVerifyDestroyedSandbox: false, + sessionSkipReasons: missingEnvVars("ANTHROPIC_API_KEY"), + sessionCwd: "/home/sprite", + sessionTestsEnabled: true, + createProvider() { + return sprites({ + token: process.env.SPRITES_API_KEY ?? process.env.SPRITE_TOKEN ?? process.env.SPRITES_TOKEN, + env: collectApiKeys(), + installAgents: ["claude"], + serviceStartDuration: "10m", + }); + }, + }); + } + return entries; } @@ -375,7 +409,7 @@ function providerSuite(entry: ProviderEntry) { // -- session tests (require working agent) -- - const sessionIt = entry.sessionTestsEnabled ? it : it.skip; + const sessionIt = entry.sessionTestsEnabled && (entry.sessionSkipReasons?.length ?? 0) === 0 ? it : it.skip; sessionIt( "creates sessions with persisted sandboxId", @@ -383,7 +417,7 @@ function providerSuite(entry: ProviderEntry) { const persist = new InMemorySessionPersistDriver(); sdk = await SandboxAgent.start({ sandbox: entry.createProvider(), persist }); - const session = await sdk.createSession({ agent: entry.agent }); + const session = await sdk.createSession({ agent: entry.agent, cwd: entry.sessionCwd }); const record = await persist.getSession(session.id); expect(record?.sandboxId).toBe(sdk.sandboxId); @@ -396,7 +430,7 @@ function providerSuite(entry: ProviderEntry) { async () => { sdk = await SandboxAgent.start({ sandbox: entry.createProvider() }); - const session = await sdk.createSession({ agent: entry.agent }); + const session = await sdk.createSession({ agent: entry.agent, cwd: entry.sessionCwd }); const events: unknown[] = []; const off = session.onEvent((event) => { events.push(event); diff --git a/sdks/typescript/tsup.config.ts b/sdks/typescript/tsup.config.ts index 984eeb3..5da303b 100644 --- a/sdks/typescript/tsup.config.ts +++ b/sdks/typescript/tsup.config.ts @@ -11,10 +11,21 @@ export default defineConfig({ "src/providers/cloudflare.ts", "src/providers/modal.ts", "src/providers/computesdk.ts", + "src/providers/sprites.ts", ], format: ["esm"], dts: true, clean: true, sourcemap: true, - external: ["@cloudflare/sandbox", "@daytonaio/sdk", "@e2b/code-interpreter", "@vercel/sandbox", "dockerode", "get-port", "modal", "computesdk"], + external: [ + "@cloudflare/sandbox", + "@daytonaio/sdk", + "@e2b/code-interpreter", + "@fly/sprites", + "@vercel/sandbox", + "dockerode", + "get-port", + "modal", + "computesdk", + ], }); diff --git a/server/ARCHITECTURE.md b/server/ARCHITECTURE.md index 774c38a..4ac7de8 100644 --- a/server/ARCHITECTURE.md +++ b/server/ARCHITECTURE.md @@ -101,25 +101,23 @@ Each session tracks: ### Lifecycle ``` -POST /v1/sessions/{sessionId} Create session, auto-install agent +POST /v1/acp/{serverId}?agent=... initialize ACP server, auto-install agent ↓ -POST /v1/sessions/{id}/messages Spawn agent subprocess, stream output -POST /v1/sessions/{id}/messages/stream Post and stream a single turn +POST /v1/acp/{serverId} session/new +POST /v1/acp/{serverId} session/prompt ↓ -GET /v1/sessions/{id}/events Poll for new events (offset-based) -GET /v1/sessions/{id}/events/sse Subscribe to SSE stream +GET /v1/acp/{serverId} Subscribe to ACP SSE stream ↓ -POST .../questions/{id}/reply Answer agent question -POST .../permissions/{id}/reply Grant/deny permission request +JSON-RPC response envelopes Answer questions / reply to permissions ↓ -(agent process terminates) Session marked as ended +DELETE /v1/acp/{serverId} Close ACP server ``` ### Event Streaming -- Events are stored in memory per session and assigned a monotonically increasing `id`. -- `/events` returns a slice of events by offset/limit. -- `/events/sse` streams new events from the same offset semantics. +- ACP envelopes are stored in memory per server and assigned a monotonically increasing SSE `id`. +- `GET /v1/acp/{serverId}` replays buffered envelopes and then streams live updates. +- Clients continue turns by POSTing ACP JSON-RPC requests to the same server id. When a message is sent: diff --git a/server/packages/acp-http-adapter/src/process.rs b/server/packages/acp-http-adapter/src/process.rs index 74101ed..cec1d27 100644 --- a/server/packages/acp-http-adapter/src/process.rs +++ b/server/packages/acp-http-adapter/src/process.rs @@ -611,7 +611,7 @@ impl AdapterRuntime { } } - async fn stderr_tail_summary(&self) -> Option { + pub async fn stderr_tail_summary(&self) -> Option { let tail = self.stderr_tail.lock().await; if tail.is_empty() { return None; diff --git a/server/packages/agent-management/src/agents.rs b/server/packages/agent-management/src/agents.rs index 785603d..4f90634 100644 --- a/server/packages/agent-management/src/agents.rs +++ b/server/packages/agent-management/src/agents.rs @@ -1093,9 +1093,9 @@ fn write_mock_agent_process_launcher(path: &Path) -> Result<(), AgentError> { fs::create_dir_all(parent)?; } let script = if cfg!(windows) { - "@echo off\r\nsandbox-agent mock-agent-process %*\r\n" + "@echo off\r\nif not \"%SANDBOX_AGENT_BIN%\"==\"\" (\r\n \"%SANDBOX_AGENT_BIN%\" mock-agent-process %*\r\n exit /b %errorlevel%\r\n)\r\nsandbox-agent mock-agent-process %*\r\n" } else { - "#!/usr/bin/env sh\nexec sandbox-agent mock-agent-process \"$@\"\n" + "#!/usr/bin/env sh\nif [ -n \"${SANDBOX_AGENT_BIN:-}\" ]; then\n exec \"$SANDBOX_AGENT_BIN\" mock-agent-process \"$@\"\nfi\nexec sandbox-agent mock-agent-process \"$@\"\n" }; write_text_file(path, script) } @@ -1969,6 +1969,34 @@ exit 0 assert_eq!(result.artifacts[0].source, InstallSource::Builtin); } + #[test] + fn mock_launcher_prefers_sandbox_agent_bin() { + let temp_dir = tempfile::tempdir().expect("create tempdir"); + let manager = AgentManager::with_platform(temp_dir.path(), Platform::LinuxX64); + + manager + .install( + AgentId::Mock, + InstallOptions { + reinstall: true, + version: None, + agent_process_version: None, + }, + ) + .expect("mock install"); + + let launcher = manager.agent_process_path(AgentId::Mock); + let mut file = fs::File::open(&launcher).expect("open mock launcher"); + let mut contents = String::new(); + file.read_to_string(&mut contents) + .expect("read mock launcher"); + + assert!( + contents.contains("SANDBOX_AGENT_BIN"), + "mock launcher should reference SANDBOX_AGENT_BIN" + ); + } + #[test] fn install_pi_skips_native_and_installs_fallback_npm_launcher() { let _env_lock = env_lock().lock().expect("env lock"); diff --git a/server/packages/sandbox-agent/src/acp_proxy_runtime.rs b/server/packages/sandbox-agent/src/acp_proxy_runtime.rs index 212356e..e0a4f99 100644 --- a/server/packages/sandbox-agent/src/acp_proxy_runtime.rs +++ b/server/packages/sandbox-agent/src/acp_proxy_runtime.rs @@ -147,6 +147,7 @@ impl AcpProxyRuntime { "acp_proxy: POST → response" ); let value = annotate_agent_error(instance.agent, value); + let value = annotate_agent_stderr(value, &instance.runtime).await; Ok(ProxyPostOutcome::Response(value)) } Ok(PostOutcome::Accepted) => { @@ -297,7 +298,7 @@ impl AcpProxyRuntime { let resolve_started = std::time::Instant::now(); let manager = self.inner.agent_manager.clone(); - let launch = tokio::task::spawn_blocking(move || manager.resolve_agent_process(agent)) + let mut launch = tokio::task::spawn_blocking(move || manager.resolve_agent_process(agent)) .await .map_err(|err| SandboxError::StreamError { message: format!("failed to resolve agent process launch spec: {err}"), @@ -306,6 +307,16 @@ impl AcpProxyRuntime { message: err.to_string(), })?; + if agent == AgentId::Mock { + if let Ok(exe) = std::env::current_exe() { + let path = exe.to_string_lossy().to_string(); + launch + .env + .entry("SANDBOX_AGENT_BIN".to_string()) + .or_insert(path); + } + } + tracing::info!( server_id = server_id, agent = agent.as_str(), @@ -572,6 +583,25 @@ fn parse_json_number(raw: &str) -> Option { /// Inspect JSON-RPC error responses from agent processes and add helpful hints /// when we can infer the root cause from a known error pattern. +async fn annotate_agent_stderr(mut value: Value, runtime: &AdapterRuntime) -> Value { + if value.get("error").is_none() { + return value; + } + if let Some(stderr) = runtime.stderr_tail_summary().await { + if let Some(error) = value.get_mut("error") { + if let Some(error_obj) = error.as_object_mut() { + let data = error_obj + .entry("data") + .or_insert_with(|| Value::Object(Default::default())); + if let Some(obj) = data.as_object_mut() { + obj.insert("agentStderr".to_string(), Value::String(stderr)); + } + } + } + } + value +} + fn annotate_agent_error(agent: AgentId, mut value: Value) -> Value { if agent != AgentId::Pi { return value; diff --git a/server/packages/sandbox-agent/tests/agent-flows/pi_rpc_integration.rs b/server/packages/sandbox-agent/tests/agent-flows/pi_rpc_integration.rs deleted file mode 100644 index 3113ae7..0000000 --- a/server/packages/sandbox-agent/tests/agent-flows/pi_rpc_integration.rs +++ /dev/null @@ -1,410 +0,0 @@ -// Pi RPC integration tests (gated via SANDBOX_TEST_PI + PATH). -include!("../common/http.rs"); - -fn pi_test_config() -> Option { - let configs = match test_agents_from_env() { - Ok(configs) => configs, - Err(err) => { - eprintln!("Skipping Pi RPC integration test: {err}"); - return None; - } - }; - configs - .into_iter() - .find(|config| config.agent == AgentId::Pi) -} - -async fn create_pi_session_with_native(app: &Router, session_id: &str) -> String { - let payload = create_pi_session(app, session_id, None, None).await; - let native_session_id = payload - .get("native_session_id") - .and_then(Value::as_str) - .unwrap_or("") - .to_string(); - assert!( - !native_session_id.is_empty(), - "expected native_session_id for pi session" - ); - native_session_id -} - -async fn create_pi_session( - app: &Router, - session_id: &str, - model: Option<&str>, - variant: Option<&str>, -) -> Value { - let mut body = Map::new(); - body.insert("agent".to_string(), json!("pi")); - body.insert( - "permissionMode".to_string(), - json!(test_permission_mode(AgentId::Pi)), - ); - if let Some(model) = model { - body.insert("model".to_string(), json!(model)); - } - if let Some(variant) = variant { - body.insert("variant".to_string(), json!(variant)); - } - let (status, payload) = send_json( - app, - Method::POST, - &format!("/v1/sessions/{session_id}"), - Some(Value::Object(body)), - ) - .await; - assert_eq!(status, StatusCode::OK, "create pi session"); - payload -} - -async fn fetch_pi_models(app: &Router) -> Vec { - let (status, payload) = send_json(app, Method::GET, "/v1/agents/pi/models", None).await; - assert_eq!(status, StatusCode::OK, "pi models endpoint"); - payload - .get("models") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default() -} - -fn model_variant_ids(model: &Value) -> Vec<&str> { - model - .get("variants") - .and_then(Value::as_array) - .map(|values| values.iter().filter_map(Value::as_str).collect::>()) - .unwrap_or_default() -} - -fn assert_strictly_increasing_sequences(events: &[Value], label: &str) { - let mut last_sequence = 0u64; - for event in events { - let sequence = event - .get("sequence") - .and_then(Value::as_u64) - .expect("missing sequence"); - assert!( - sequence > last_sequence, - "{label}: sequence did not increase (prev {last_sequence}, next {sequence})" - ); - last_sequence = sequence; - } -} - -fn assert_all_events_for_session(events: &[Value], session_id: &str) { - for event in events { - let event_session_id = event - .get("session_id") - .and_then(Value::as_str) - .unwrap_or_default(); - assert_eq!( - event_session_id, session_id, - "cross-session event detected in {session_id}: {event}" - ); - } -} - -fn assert_item_started_ids_unique(events: &[Value], label: &str) { - let mut ids = std::collections::HashSet::new(); - for event in events { - let event_type = event - .get("type") - .and_then(Value::as_str) - .unwrap_or_default(); - if event_type != "item.started" { - continue; - } - let Some(item_id) = event - .get("data") - .and_then(|data| data.get("item")) - .and_then(|item| item.get("item_id")) - .and_then(Value::as_str) - else { - continue; - }; - assert!( - ids.insert(item_id.to_string()), - "{label}: duplicate item.started id {item_id}" - ); - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn pi_rpc_session_and_stream() { - let Some(config) = pi_test_config() else { - return; - }; - - let app = TestApp::new(); - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_id = "pi-rpc-session"; - let _native_session_id = create_pi_session_with_native(&app.app, session_id).await; - - let events = read_turn_stream_events(&app.app, session_id, Duration::from_secs(120)).await; - assert!(!events.is_empty(), "no events from pi stream"); - assert!( - !events.iter().any(is_unparsed_event), - "agent.unparsed event encountered" - ); - assert!( - should_stop(&events), - "turn stream did not reach a terminal event" - ); - assert_strictly_increasing_sequences(&events, "pi_rpc_session_and_stream"); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn pi_variant_high_applies_for_thinking_model() { - let Some(config) = pi_test_config() else { - return; - }; - - let app = TestApp::new(); - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let models = fetch_pi_models(&app.app).await; - let thinking_model = models.iter().find_map(|model| { - let model_id = model.get("id").and_then(Value::as_str)?; - let variants = model_variant_ids(model); - if variants.contains(&"high") { - Some(model_id.to_string()) - } else { - None - } - }); - let Some(model_id) = thinking_model else { - eprintln!("Skipping PI variant thinking-model test: no model advertises high"); - return; - }; - - let session_id = "pi-variant-thinking-high"; - create_pi_session(&app.app, session_id, Some(&model_id), Some("high")).await; - - let events = read_turn_stream_events(&app.app, session_id, Duration::from_secs(120)).await; - assert!( - !events.is_empty(), - "no events from pi thinking-variant stream" - ); - assert!( - !events.iter().any(is_unparsed_event), - "agent.unparsed event encountered for thinking-variant session" - ); - assert!( - should_stop(&events), - "thinking-variant turn stream did not reach a terminal event" - ); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn pi_variant_high_on_non_thinking_model_uses_pi_native_clamping() { - let Some(config) = pi_test_config() else { - return; - }; - - let app = TestApp::new(); - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let models = fetch_pi_models(&app.app).await; - let non_thinking_model = models.iter().find_map(|model| { - let model_id = model.get("id").and_then(Value::as_str)?; - let variants = model_variant_ids(model); - if variants == vec!["off"] { - Some(model_id.to_string()) - } else { - None - } - }); - let Some(model_id) = non_thinking_model else { - eprintln!("Skipping PI non-thinking variant test: no off-only model reported"); - return; - }; - - let session_id = "pi-variant-nonthinking-high"; - create_pi_session(&app.app, session_id, Some(&model_id), Some("high")).await; - - let events = read_turn_stream_events(&app.app, session_id, Duration::from_secs(120)).await; - assert!( - !events.is_empty(), - "no events from pi non-thinking variant stream" - ); - assert!( - !events.iter().any(is_unparsed_event), - "agent.unparsed event encountered for non-thinking variant session" - ); - assert!( - should_stop(&events), - "non-thinking variant turn stream did not reach a terminal event" - ); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn pi_parallel_sessions_turns() { - let Some(config) = pi_test_config() else { - return; - }; - - let app = TestApp::new(); - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_a = "pi-parallel-a"; - let session_b = "pi-parallel-b"; - create_pi_session_with_native(&app.app, session_a).await; - create_pi_session_with_native(&app.app, session_b).await; - - let app_a = app.app.clone(); - let app_b = app.app.clone(); - let send_a = send_message(&app_a, session_a); - let send_b = send_message(&app_b, session_b); - tokio::join!(send_a, send_b); - - let app_a = app.app.clone(); - let app_b = app.app.clone(); - let poll_a = poll_events_until(&app_a, session_a, Duration::from_secs(120)); - let poll_b = poll_events_until(&app_b, session_b, Duration::from_secs(120)); - let (events_a, events_b) = tokio::join!(poll_a, poll_b); - - assert!(!events_a.is_empty(), "no events for session A"); - assert!(!events_b.is_empty(), "no events for session B"); - assert!( - should_stop(&events_a), - "session A did not reach a terminal event" - ); - assert!( - should_stop(&events_b), - "session B did not reach a terminal event" - ); - assert!( - !events_a.iter().any(is_unparsed_event), - "session A encountered agent.unparsed" - ); - assert!( - !events_b.iter().any(is_unparsed_event), - "session B encountered agent.unparsed" - ); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn pi_event_isolation() { - let Some(config) = pi_test_config() else { - return; - }; - - let app = TestApp::new(); - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_a = "pi-isolation-a"; - let session_b = "pi-isolation-b"; - create_pi_session_with_native(&app.app, session_a).await; - create_pi_session_with_native(&app.app, session_b).await; - - let app_a = app.app.clone(); - let app_b = app.app.clone(); - let send_a = send_message(&app_a, session_a); - let send_b = send_message(&app_b, session_b); - tokio::join!(send_a, send_b); - - let app_a = app.app.clone(); - let app_b = app.app.clone(); - let poll_a = poll_events_until(&app_a, session_a, Duration::from_secs(120)); - let poll_b = poll_events_until(&app_b, session_b, Duration::from_secs(120)); - let (events_a, events_b) = tokio::join!(poll_a, poll_b); - - assert!(should_stop(&events_a), "session A did not complete"); - assert!(should_stop(&events_b), "session B did not complete"); - assert_all_events_for_session(&events_a, session_a); - assert_all_events_for_session(&events_b, session_b); - assert_strictly_increasing_sequences(&events_a, "session A"); - assert_strictly_increasing_sequences(&events_b, "session B"); - assert_item_started_ids_unique(&events_a, "session A"); - assert_item_started_ids_unique(&events_b, "session B"); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn pi_terminate_one_session_does_not_affect_other() { - let Some(config) = pi_test_config() else { - return; - }; - - let app = TestApp::new(); - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_a = "pi-terminate-a"; - let session_b = "pi-terminate-b"; - create_pi_session_with_native(&app.app, session_a).await; - create_pi_session_with_native(&app.app, session_b).await; - - let terminate_status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_a}/terminate"), - None, - ) - .await; - assert_eq!( - terminate_status, - StatusCode::NO_CONTENT, - "terminate session A" - ); - - send_message(&app.app, session_b).await; - let events_b = poll_events_until(&app.app, session_b, Duration::from_secs(120)).await; - assert!(!events_b.is_empty(), "no events for session B"); - assert!( - should_stop(&events_b), - "session B did not complete after A terminated" - ); - - let events_a = poll_events_until(&app.app, session_a, Duration::from_secs(10)).await; - assert!( - events_a.iter().any(|event| { - event - .get("type") - .and_then(Value::as_str) - .is_some_and(|ty| ty == "session.ended") - }), - "session A missing session.ended after terminate" - ); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn pi_runtime_restart_scope() { - let Some(config) = pi_test_config() else { - return; - }; - - let app = TestApp::new(); - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_a = "pi-restart-scope-a"; - let session_b = "pi-restart-scope-b"; - create_pi_session_with_native(&app.app, session_a).await; - create_pi_session_with_native(&app.app, session_b).await; - - let terminate_status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_a}/terminate"), - None, - ) - .await; - assert_eq!( - terminate_status, - StatusCode::NO_CONTENT, - "terminate session A to stop only its runtime" - ); - - send_message(&app.app, session_b).await; - let events_b = poll_events_until(&app.app, session_b, Duration::from_secs(120)).await; - assert!( - should_stop(&events_b), - "session B did not continue after A stopped" - ); - assert_all_events_for_session(&events_b, session_b); -}