Compare commits

...

59 commits
v0.3.2 ... main

Author SHA1 Message Date
Nathan Flurry
bf484e7c96 docs: clean up orphaned docs and add session event types
Delete orphaned docs not in docs.json navigation (gigacode.mdx,
foundry-self-hosting.mdx, session-transcript-schema.mdx, pi-support-plan.md).
Remove outdated musl/glibc troubleshooting section. Add event types
documentation with example payloads to agent-sessions.mdx.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 19:11:19 -07:00
Nathan Flurry
d55b0dfb88 chore(release): update version to 0.4.2 2026-03-25 18:07:26 -07:00
ABC
251f731232
Merge pull request #284 from rivet-dev/03-25-fix_mock_pass_sandbox_agent_bin_to_mock_agent_launcher
fix(mock): pass SANDBOX_AGENT_BIN to mock agent launcher
2026-03-25 17:00:51 -04:00
abcxff
b45989a082 fix(mock): pass SANDBOX_AGENT_BIN to mock agent launcher 2026-03-25 16:54:40 -04:00
Nathan Flurry
78e84281e8 chore(release): update version to 0.4.1 2026-03-25 13:20:57 -07:00
Nathan Flurry
5da35e6dfa feat: sprites support 2026-03-25 12:23:14 -07:00
ABC
9cd9252725
Merge pull request #283 from rivet-dev/03-25-chore_providers_move_back_to_0.4.x_install_script
chore(providers): sync install script with latest 0.4.x
2026-03-25 14:24:55 -04:00
abcxff
858b9a4d2f chore(providers): move back to 0.4.x install script 2026-03-25 14:22:57 -04:00
Nathan Flurry
4fa28061e9
Merge pull request #279 from rivet-dev/NicholasKissel/docs-dark-theme
fix(docs): restore dark theme styling
2026-03-24 23:26:28 -07:00
ABCxFF
cb42971b56 chore(release): update version to 0.5.0-rc.2 2026-03-25 05:13:47 +00:00
ABC
e9fabbfe64
fix: surface agent stderr in RPC errors & add defaultCwd param (#278) 2026-03-25 00:49:35 -04:00
ABC
32dd5914ed
Merge pull request #269 from rivet-dev/e2b-base-image-support
feat(providers): add base image support and improve forward compatibility
2026-03-25 00:42:49 -04:00
ABC
fe8fbfc91c
Merge branch 'main' into e2b-base-image-support 2026-03-25 00:37:58 -04:00
Nicholas Kissel
32713ff453 fix(docs): keep dark mode strict appearance
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-24 21:33:10 -07:00
abcxff
833b57deb1 fix: surface agent stderr in RPC errors and default cwd for remote providers 2026-03-25 04:26:48 +00:00
Nicholas Kissel
927e77c7e2 fix(docs): restore dark theme styling with custom CSS
Re-enable theme.css with full custom styling (links, inputs, cards,
code blocks, alerts) and update docs.json color values.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-24 21:25:06 -07:00
Nathan Flurry
f353e39fc6
Merge pull request #273 from Crunchyman-ralph/fix/update-install-script-to-0.4.x
fix: update install script URL from 0.3.x to 0.4.x
2026-03-19 12:45:00 -07:00
Ralph Khreish
3525dcc315
fix: update install script URL from 0.3.x to 0.4.x
The E2B and Vercel providers install sandbox-agent 0.3.x inside sandboxes
while the SDK client speaks 0.4.0 ACP protocol, causing AcpRpcError -32603.

Fixes #272
2026-03-19 17:45:16 +01:00
Nathan Flurry
7b23e519c2 fix(foundry): add Bun idleTimeout safety net and subscription retry with backoff
Bun.serve() defaults to a 10s idle timeout that can kill long-running
requests. Actor RPCs go through the gateway tunnel with a 1s SSE ping,
so this likely never fires, but set idleTimeout to 255 as a safety net.

Subscription topics (app, org, session, task) previously had no retry
mechanism. If the initial connection or a mid-session error occurred,
the subscription stayed in error state permanently. Add exponential
backoff retry (1s base, 30s max) that cleans up the old connection
before each attempt and stops when disposed or no listeners remain.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 18:35:36 -07:00
Nathan Flurry
bea3b58199 fix(foundry): use $HOME instead of hardcoded /home/sandbox for sandbox repo paths
E2B sandboxes run as `user` (home: /home/user), not `sandbox`, so
`mkdir -p /home/sandbox` fails with "Permission denied". Replace all
hardcoded `/home/sandbox` paths with `$HOME` resolved at shell runtime
inside the sandbox, and dynamically resolve the repo CWD via the sandbox
actor so it works across providers (E2B, local Docker, Daytona).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 17:33:53 -07:00
Nathan Flurry
524f40ec02 feat(providers): simplify modal to use published base image
The `-full` base image already includes sandbox-agent and all agents
pre-installed. Remove redundant apt-get, install script, and
install-agent dockerfile commands from the Modal provider.

Also allow overriding the default image via SANDBOX_AGENT_IMAGE env var
across all providers for testing with different published versions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 16:54:20 -07:00
Nathan Flurry
4e76038a0d feat(providers): add base image support and improve forward compatibility
Add support for configuring base images across all compute providers:
- E2B: Accept optional `template` parameter to select custom templates
- Modal: Accept optional `image` parameter (string or Image object) for base images
- ComputeSDK: Expand `create` override to accept full CreateSandboxOptions payload (image, templateId, etc.)
- Daytona: Improve type safety for `image` option

Improve forward compatibility by making all `create` overrides accept full Partial SDK types, allowing any new provider fields to flow through without code changes. Fix Modal provider bug where `encryptedPorts` was hardcoded and would clobber user-provided values; now merges additional ports instead.

Update docs and examples to demonstrate base image configuration for E2B, Modal, and ComputeSDK. Add comprehensive provider lifecycle tests for Modal and ComputeSDK, including template and image passthrough verification.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-03-17 15:25:21 -07:00
Nathan Flurry
ffb9f1082b fix(foundry): fix runner version 2026-03-17 14:33:13 -07:00
Nathan Flurry
f25a92aca8 chore(release): update version to 0.5.0-rc.1 2026-03-17 02:44:41 -07:00
Nathan Flurry
3b8c74589d
Merge pull request #264 from rivet-dev/desktop-computer-use-neko
feat: desktop computer-use APIs with neko streaming
2026-03-17 02:36:50 -07:00
Nathan Flurry
dff7614b11 feat: desktop computer-use APIs with windows, launch/open, and neko streaming
Adds desktop computer-use endpoints (windows, screenshots, mouse/keyboard,
launch/open), enhances neko-based streaming integration, updates inspector
UI with desktop debug tab, and adds common software test infrastructure.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:36:25 -07:00
Nathan Flurry
2d8508d6e2 feat: enhance desktop computer-use streaming with neko integration
Improve desktop streaming architecture, add inspector dev tooling,
React DesktopViewer updates, and computer-use documentation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:36:25 -07:00
Nathan Flurry
4252c705df chore: remove .context/ from git and add to .gitignore
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 02:36:25 -07:00
Nathan Flurry
33821d8660 feat: desktop computer-use APIs with neko-based streaming
Add desktop runtime management (Xvfb, openbox, dbus), screen capture,
mouse/keyboard input, and video streaming via neko binary extracted
from the m1k1o/neko container. Includes Docker test rig, TypeScript SDK
desktop support, and inspector Desktop tab.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 02:36:17 -07:00
Nathan Flurry
3895e34bdb feat(foundry): add foundry base sandbox image with sudo, chromium, and dev tooling
Add a custom Docker image (foundry-base.Dockerfile) that builds sandbox-agent
from source and layers sudo, git, neovim, gh, node, bun, chromium, and
agent-browser. Includes publish script for timestamped + latest tags to
rivetdev/sandbox-agent on Docker Hub.

Update local sandbox provider default to use foundry-base-latest and wire
HF_LOCAL_SANDBOX_IMAGE env var through compose.dev.yaml.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 02:09:12 -07:00
Nathan Flurry
eafe0f9fe4 fix(foundry): use IF NOT EXISTS in org migration to handle pre-existing auth tables
Some org actors had auth tables created outside the migration system
(by earlier queue-based auth code). Migration m0001 fails with
"table auth_session_index already exists" on those actors, preventing
them from starting.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 22:45:34 -07:00
Nathan Flurry
6ebe13cddd fix(foundry): use cookie-based OAuth state to prevent proxy retry auth failures
Switch storeStateStrategy from "database" to "cookie" so OAuth state is
stored encrypted in a temporary cookie instead of a DB verification record.
This makes the callback idempotent — proxy retries can't fail because the
state travels with the request itself rather than being deleted after the
first successful callback.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 22:37:54 -07:00
Nathan Flurry
8ddec6831b fix(foundry): deduplicate OAuth callbacks and cache actor handles to fix production auth
The production proxy chain (Cloudflare -> Fastly -> Railway) retries
OAuth callback requests when they take >10s. The first request succeeds
and deletes the verification record, so the retry fails with
"verification not found" -> ?error=please_restart_the_process.

- Add callback deduplication by OAuth state param in the auth handler.
  Duplicate requests wait for the original and return a cloned response.
- Cache appOrganization() and getUser() actor handles to eliminate
  redundant getOrCreate RPCs during callbacks (was 10+ per sign-in).
- Add diagnostic logging for auth callback timing and adapter operations.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 22:29:17 -07:00
Nathan Flurry
4ca77e4d83 Merge remote-tracking branch 'origin/main' into fix-foundry-auth-error 2026-03-16 21:26:25 -07:00
Nathan Flurry
e7b9ac6854 fix(foundry): move Better Auth operations from queues to actions to fix production auth timeout
The org actor's workflow queue is shared with GitHub sync, webhooks, task
mutations, and billing (20+ queue names processed sequentially). During
OAuth callback, auth operations would time out waiting behind long-running
queue handlers, causing Better Auth's parseState to redirect to
?error=please_restart_the_process.

Auth operations are simple SQLite reads/writes with no cross-actor side
effects, so they are safe to run as actions that execute immediately
without competing in the queue.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 21:26:13 -07:00
Nathan Flurry
eab215c7cb feat(foundry): redirect to signin page on auth API errors
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 19:34:16 -07:00
Nathan Flurry
84a80d59d7
Merge pull request #265 from rivet-dev/revert-actions-to-queues
feat(foundry): revert actions to queue/workflow pattern
2026-03-16 18:48:21 -07:00
Nathan Flurry
a171956298 feat(foundry): revert actions to queue/workflow pattern with direct sends
Revert actor communication from direct action calls to queue/workflow-based
patterns for better observability (workflow history in RivetKit inspector),
replay/recovery semantics, and idiomatic RivetKit usage.

- Add queue/workflow infrastructure to all actors: organization, task, user,
  github-data, sandbox, and audit-log
- Mutations route through named queues processed by workflow command loops
  with ctx.step() wrapping for c.state/c.db access and observability
- Remove command action wrappers (~460 lines) — callers use .send() directly
  to queue names with expectQueueResponse() for wait:true results
- Keep sendPrompt and runProcess as direct sandbox actions (long-running /
  large responses that would block the workflow loop or exceed 128KB limit)
- Fix workspace fire-and-forget calls (enqueueWorkspaceEnsureSession,
  enqueueWorkspaceRefresh) to self-send to task queue instead of calling
  directly outside workflow step context

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 18:46:53 -07:00
Nathan Flurry
4111aebfce
feat(foundry): task owner git auth + manual owner change UI (#263)
* Add task owner git auth proposal and sandbox architecture docs

- Add proposal for primary user per task with OAuth token injection
  for sandbox git operations (.context/proposal-task-owner-git-auth.md)
- Document sandbox architecture constraints in CLAUDE.md: single sandbox
  per task assumption, OAuth token security implications, git auto-auth
  requirement, and git error surfacing rules

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Add proposals for reverting to queues and rivetkit sandbox resilience

- proposal-revert-actions-to-queues.md: Detailed plan for reverting the
  actions-only pattern back to queues/workflows now that the RivetKit
  queue.iter() bug is fixed. Lists what to keep (lazy tasks, resolveTaskRepoId,
  sync override threading, E2B fixes, frontend fixes) vs what to revert
  (communication pattern only).

- proposal-rivetkit-sandbox-resilience.md: Rivetkit sandbox actor changes for
  handling destroyed/paused sandboxes, keep-alive, and the UNIQUE constraint
  crash fix.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(foundry): add manual task owner change via UI dropdown

Add an owner dropdown to the Overview tab that lets users reassign task
ownership to any organization member. The owner's GitHub credentials are
used for git operations in the sandbox.

Full-stack implementation:
- Backend: changeTaskOwnerManually action on task actor, routed through
  org actor's changeWorkspaceTaskOwner action, with primaryUser schema
  columns on both task and org index tables
- Client: changeOwner method on workspace client (mock + remote)
- Frontend: owner dropdown in right sidebar Overview tab showing org
  members, with avatar and role display
- Shared: TaskWorkspaceChangeOwnerInput type and primaryUser fields on
  workspace snapshot types

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 17:05:11 -07:00
Nathan Flurry
167712ace7 chore(release): update version to 0.4.1-rc.1 2026-03-16 15:53:00 -07:00
Nathan Flurry
9ce71c03c8
Merge pull request #261 from rivet-dev/e2b-autopause-provider
feat: add E2B auto-pause provider lifecycle support
2026-03-16 15:39:45 -07:00
Nathan Flurry
f45a467484
chore(foundry): migrate to actions (#262)
* feat(foundry): checkpoint actor and workspace refactor

* docs(foundry): add agent handoff context

* wip(foundry): continue actor refactor

* wip(foundry): capture remaining local changes

* Complete Foundry refactor checklist

* Fix Foundry validation fallout

* wip

* wip: convert all actors from workflow to plain run handlers

Workaround for RivetKit bug where c.queue.iter() never yields messages
for actors created via getOrCreate from another actor's context. The
queue accepts messages (visible in inspector) but the iterator hangs.
Sleep/wake fixes it, but actors with active connections never sleep.

Converted organization, github-data, task, and user actors from
run: workflow(...) to plain run: async (c) => { for await ... }.

Also fixes:
- Missing auth tables in org migration (auth_verification etc)
- default_model NOT NULL constraint on org profile upsert
- Nested workflow step in github-data (HistoryDivergedError)
- Removed --force from frontend Dockerfile pnpm install

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Convert all actors from queues/workflows to direct actions, lazy task creation

Major refactor replacing all queue-based workflow communication with direct
RivetKit action calls across all actors. This works around a RivetKit bug
where c.queue.iter() deadlocks for actors created from another actor's context.

Key changes:
- All actors (organization, task, user, audit-log, github-data) converted
  from run: workflow(...) to actions-only (no run handler, no queues)
- PR sync creates virtual task entries in org local DB instead of spawning
  task actors — prevents OOM from 200+ actors created simultaneously
- Task actors created lazily on first user interaction via getOrCreate,
  self-initialize from org's getTaskIndexEntry data
- Removed requireRepoExists cross-actor call (caused 500s), replaced with
  local resolveTaskRepoId from org's taskIndex table
- Fixed getOrganizationContext to thread overrides through all sync phases
- Fixed sandbox repo path (/home/user/repo for E2B compatibility)
- Fixed buildSessionDetail to skip transcript fetch for pending sessions
- Added process crash protection (uncaughtException/unhandledRejection)
- Fixed React infinite render loop in mock-layout useEffect dependencies
- Added sandbox listProcesses error handling for expired E2B sandboxes
- Set E2B sandbox timeout to 1 hour (was 5 min default)
- Updated CLAUDE.md with lazy task creation rules, no-silent-catch policy,
  React hook dependency safety rules

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Fix E2B sandbox timeout comment, frontend stability, and create-flow improvements

- Add TEMPORARY comment on E2B timeoutMs with pointer to rivetkit sandbox
  resilience proposal for when autoPause lands
- Fix React useEffect dependency stability in mock-layout and
  organization-dashboard to prevent infinite re-render loops
- Fix terminal-pane ref handling
- Improve create-flow service and tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 15:23:59 -07:00
Nathan Flurry
77c8f1e3f3 feat: add E2B auto-pause support with pause/kill/reconnect provider lifecycle
Add `pause()`, `kill()`, and `reconnect()` methods to the SandboxProvider interface so providers can support graceful suspension and permanent deletion as distinct operations. The E2B provider now uses `betaCreate` with `autoPause: true` by default, `betaPause()` for suspension, and surfaces `SandboxDestroyedError` on reconnect to a deleted sandbox. SDK exposes `pauseSandbox()` and `killSandbox()` alongside the existing `destroySandbox()`.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 14:57:49 -07:00
Nathan Flurry
32f3c6c3bc chore(release): update version to 0.4.0 2026-03-16 00:48:05 -07:00
Nathan Flurry
7faed2f43a chore(release): update version to 0.4.0-rc.3 2026-03-15 23:26:42 -07:00
Nathan Flurry
f0ec8e497b fix: mock agent process launcher not written during install
agent_process_status() for mock always returned Some(...) even when the
launcher file did not exist. This caused install_agent_process() to
short-circuit with "already installed" and never write the launcher
script. Fix by checking that the launcher file exists before reporting
the mock agent as installed.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 23:17:52 -07:00
Nathan Flurry
56c80e6c9e chore(release): update version to 0.4.0-rc.2 2026-03-15 22:38:30 -07:00
Nathan Flurry
bf543d225d fix: mock agent process, React 18/19 types, release version refs
- Add hidden `mock-agent-process` CLI subcommand implementing a stdio
  JSON-RPC echo agent (ported from examples/mock-acp-agent)
- Update write_mock_agent_process_launcher() to exec the new subcommand
  instead of exiting with error
- Update sdks/react to support React 18 and 19 peer dependencies
- Update @types/react to v19 across workspace (pnpm override + inspector)
- Fix RefObject<T | null> compatibility for React 19 useRef() signatures
- Add version reference replacement logic to release update_version.ts
  covering all docs, examples, and code files listed in CLAUDE.md
- Add missing files to CLAUDE.md Install Version References list
  (architecture.mdx, boxlite, modal, computesdk docs and examples)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 22:36:48 -07:00
Nathan Flurry
2f9f25ae54 chore(release): update version to 0.4.0-rc.1 2026-03-15 20:53:54 -07:00
Nathan Flurry
cf7e2a92c6
SDK: Add ensureServer() for automatic server recovery (#260)
* SDK sandbox provisioning: built-in providers, docs restructure, and quickstart overhaul

- Add built-in sandbox providers (local, docker, e2b, daytona, vercel, cloudflare) to the TypeScript SDK so users import directly instead of passing client instances
- Restructure docs: rename architecture to orchestration-architecture, add new architecture page for server overview, improve getting started flow
- Rewrite quickstart to be TypeScript-first with provider CodeGroup and custom provider accordion
- Update all examples to use new provider APIs
- Update persist drivers and foundry for new SDK surface

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Fix SDK typecheck errors and update persist drivers for insertEvent signature

- Fix insertEvent call in client.ts to pass sessionId as first argument
- Update Daytona provider create options to use Partial type (image has default)
- Update StrictUniqueSessionPersistDriver in tests to match new insertEvent signature
- Sync persist packages, openapi spec, and docs with upstream changes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Add Modal and ComputeSDK built-in providers, update examples and docs

- Add `sandbox-agent/modal` provider using Modal SDK with node:22-slim image
- Add `sandbox-agent/computesdk` provider using ComputeSDK's unified sandbox API
- Update Modal and ComputeSDK examples to use new SDK providers
- Update Modal and ComputeSDK deploy docs with provider-based examples
- Add Modal to quickstart CodeGroup and docs.json navigation
- Add provider test entries for Modal and ComputeSDK
- Remove old standalone example files (modal.ts, computesdk.ts)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Fix Modal provider: pre-install agents in image, fire-and-forget exec for server

- Pre-install agents in Dockerfile commands so they are cached across creates
- Use fire-and-forget exec (no wait) to keep server alive in Modal sandbox
- Add memoryMiB option (default 2GB) to avoid OOM during agent install

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* Sync upstream changes: multiplayer docs, logos, openapi spec, foundry config

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* SDK: Add ensureServer() for automatic server recovery

Add ensureServer() to SandboxProvider interface to handle cases where the
sandbox-agent server stops or goes to sleep. The SDK now calls this method
after 3 consecutive health-check failures, allowing providers to restart the
server if needed. Most built-in providers (E2B, Daytona, Vercel, Modal,
ComputeSDK) implement this. Docker and Cloudflare manage server lifecycle
differently, and Local uses managed child processes.

Also update docs for quickstart, architecture, multiplayer, and session
persistence; mark persist-* packages as deprecated; and add ensureServer
implementations to all applicable providers.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>

* wip

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-15 20:29:28 -07:00
Nathan Flurry
3426cbc6ec
chore: update ACP SDK to 0.16.1 and add e2e testing guidance (#259)
- Bump @agentclientprotocol/sdk from 0.14.1 to 0.16.1 in acp-http-client
- Update adapters.json to reflect new SDK version
- Migrate unstableListSessions to listSessions (stabilized in SDK 0.16.0)
- Add CLAUDE.md guidance: request token location before e2e agent testing

All 5 ACP adapters remain at their latest versions. E2E testing confirms
Claude, Codex, Pi, and Cursor agents work end-to-end with credentials.

Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com>
2026-03-15 19:46:26 -07:00
Nathan Flurry
d850a3b77a
fix: normalize Pi ACP bootstrap payloads (#227)
* fix: normalize pi ACP bootstrap payloads

* docs(cli): document custom pi binary override

* docs(quickstart): list all supported agent IDs

* docs(code): clarify Pi payload normalization rationale
2026-03-15 18:52:59 -07:00
waltertang27
e740d28e0a
Add modal sandbox support (#192)
* add modal sandbox example

* add test instructions

---------

Co-authored-by: Nathan Flurry <NathanFlurry@users.noreply.github.com>
2026-03-15 13:14:59 -07:00
Nathan Flurry
284fe66be4
wip (#258) 2026-03-15 12:37:42 -07:00
Nathan Flurry
57a07f6a0a
wip (#256) 2026-03-14 23:47:43 -07:00
Nathan Flurry
99abb9d42e
chore(foundry): workbench action responsiveness (#254)
* wip

* wip
2026-03-14 20:42:18 -07:00
Nathan Flurry
400f9a214e
Add transcript virtualization to Foundry UI (#255) 2026-03-14 17:55:05 -07:00
Nathan Flurry
5ea9ec5e2f
wip (#253) 2026-03-14 14:38:29 -07:00
Nathan Flurry
70d31f819c
chore(foundry): improve sandbox impl + status pill (#252)
* Improve Daytona sandbox provisioning and frontend UI

Refactor git clone script in Daytona provider to use cleaner shell logic for GitHub token authentication and branch checkout. Add support for private repository clones with token-based auth. Improve Daytona provider error handling and git configuration setup.

Frontend improvements include enhanced dev panel, workspace dashboard, sidebar navigation, and UI components for better task/session management. Update interest manager and backend client to support improved session state handling.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>

* Add header status pill showing task/session/sandbox state

Surface aggregate status (error, provisioning, running, ready, no sandbox)
as a colored pill in the transcript panel header. Integrates task runtime
status, session status, and sandbox availability via the sandboxProcesses
interest topic so the pill accurately reflects unreachable sandboxes.

Includes mock tasks demonstrating error, provisioning, and running states,
unit tests for deriveHeaderStatus, and workspace-dashboard integration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Haiku 4.5 <noreply@anthropic.com>
2026-03-14 12:14:06 -07:00
494 changed files with 46255 additions and 24669 deletions

View file

@ -43,7 +43,7 @@ Manually verify the install script works in a fresh environment:
```bash ```bash
docker run --rm alpine:latest sh -c " docker run --rm alpine:latest sh -c "
apk add --no-cache curl ca-certificates libstdc++ libgcc bash && apk add --no-cache curl ca-certificates libstdc++ libgcc bash &&
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh && curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh &&
sandbox-agent --version sandbox-agent --version
" "
``` ```

View file

@ -23,6 +23,9 @@ GITHUB_APP_PRIVATE_KEY=
# Webhook secret for verifying GitHub webhook payloads. # Webhook secret for verifying GitHub webhook payloads.
# Use smee.io for local development: https://smee.io/new # Use smee.io for local development: https://smee.io/new
GITHUB_WEBHOOK_SECRET= GITHUB_WEBHOOK_SECRET=
# Required for local GitHub webhook forwarding in compose.dev.
SMEE_URL=
SMEE_TARGET=http://backend:7741/v1/webhooks/github
# Fill these in when enabling live Stripe billing. # Fill these in when enabling live Stripe billing.
STRIPE_SECRET_KEY= STRIPE_SECRET_KEY=

1
.gitignore vendored
View file

@ -59,3 +59,4 @@ sdks/cli/platforms/*/bin/
# Foundry desktop app build artifacts # Foundry desktop app build artifacts
foundry/packages/desktop/frontend-dist/ foundry/packages/desktop/frontend-dist/
foundry/packages/desktop/src-tauri/sidecars/ foundry/packages/desktop/src-tauri/sidecars/
.context/

112
CLAUDE.md
View file

@ -1,40 +1,5 @@
# Instructions # Instructions
## ACP v1 Baseline
- v1 is ACP-native.
- `/v1/*` is removed and returns `410 Gone` (`application/problem+json`).
- `/opencode/*` is disabled during ACP core phases and returns `503`.
- Prompt/session traffic is ACP JSON-RPC over streamable HTTP on `/v1/rpc`:
- `POST /v1/rpc`
- `GET /v1/rpc` (SSE)
- `DELETE /v1/rpc`
- Control-plane endpoints:
- `GET /v1/health`
- `GET /v1/agents`
- `POST /v1/agents/{agent}/install`
- Binary filesystem transfer endpoints (intentionally HTTP, not ACP extension methods):
- `GET /v1/fs/file`
- `PUT /v1/fs/file`
- `POST /v1/fs/upload-batch`
- Sandbox Agent ACP extension method naming:
- Custom ACP methods use `_sandboxagent/...` (not `_sandboxagent/v1/...`).
- Session detach method is `_sandboxagent/session/detach`.
## API Scope
- ACP is the primary protocol for agent/session behavior and all functionality that talks directly to the agent.
- ACP extensions may be used for gaps (for example `skills`, `models`, and related metadata), but the default is that agent-facing behavior is implemented by the agent through ACP.
- Custom HTTP APIs are for non-agent/session platform services (for example filesystem, terminals, and other host/runtime capabilities).
- Filesystem and terminal APIs remain Sandbox Agent-specific HTTP contracts and are not ACP.
- Do not make Sandbox Agent core flows depend on ACP client implementations of `fs/*` or `terminal/*`; in practice those client-side capabilities are often incomplete or inconsistent.
- ACP-native filesystem and terminal methods are also too limited for Sandbox Agent host/runtime needs, so prefer the native HTTP APIs for richer behavior.
- Keep `GET /v1/fs/file`, `PUT /v1/fs/file`, and `POST /v1/fs/upload-batch` on HTTP:
- These are Sandbox Agent host/runtime operations with cross-agent-consistent behavior.
- They may involve very large binary transfers that ACP JSON-RPC envelopes are not suited to stream.
- This is intentionally separate from ACP native `fs/read_text_file` and `fs/write_text_file`.
- ACP extension variants may exist in parallel, but SDK defaults should prefer HTTP for these binary transfer operations.
## Naming and Ownership ## Naming and Ownership
- This repository/product is **Sandbox Agent**. - This repository/product is **Sandbox Agent**.
@ -48,66 +13,14 @@
- Never mention "ACP" in user-facing docs (`docs/**/*.mdx`) except in docs that are specifically about ACP itself (e.g. `docs/acp-http-client.mdx`). - Never mention "ACP" in user-facing docs (`docs/**/*.mdx`) except in docs that are specifically about ACP itself (e.g. `docs/acp-http-client.mdx`).
- Never expose underlying protocol method names (e.g. `session/request_permission`, `session/create`, `_sandboxagent/session/detach`) in non-ACP docs. Describe the behavior in user-facing terms instead. - Never expose underlying protocol method names (e.g. `session/request_permission`, `session/create`, `_sandboxagent/session/detach`) in non-ACP docs. Describe the behavior in user-facing terms instead.
- Do not describe the underlying protocol implementation in docs. Only document the SDK surface (methods, types, options). ACP protocol details belong exclusively in ACP-specific pages. - Do not describe the underlying protocol implementation in docs. Only document the SDK surface (methods, types, options). ACP protocol details belong exclusively in ACP-specific pages.
- Do not use em dashes (`—`) in docs. Use commas, periods, or parentheses instead.
## Architecture (Brief) ### Docs Source Of Truth (HTTP/CLI)
- HTTP contract and problem/error mapping: `server/packages/sandbox-agent/src/router.rs`
- ACP client runtime and agent process bridge: `server/packages/sandbox-agent/src/acp_runtime/mod.rs`
- Agent/native + ACP agent process install and lazy install: `server/packages/agent-management/`
- Inspector UI served at `/ui/` and bound to ACP over HTTP from `frontend/packages/inspector/`
## TypeScript SDK Architecture
- TypeScript clients are split into:
- `acp-http-client`: protocol-pure ACP-over-HTTP (`/v1/acp`) with no Sandbox-specific HTTP helpers.
- `sandbox-agent`: `SandboxAgent` SDK wrapper that combines ACP session operations with Sandbox control-plane and filesystem helpers.
- `SandboxAgent` entry points are `SandboxAgent.connect(...)` and `SandboxAgent.start(...)`.
- Stable Sandbox session methods are `createSession`, `resumeSession`, `resumeOrCreateSession`, `destroySession`, `rawSendSessionMethod`, `onSessionEvent`, `setSessionMode`, `setSessionModel`, `setSessionThoughtLevel`, `setSessionConfigOption`, `getSessionConfigOptions`, `getSessionModes`, `respondPermission`, `rawRespondPermission`, and `onPermissionRequest`.
- `Session` helpers are `prompt(...)`, `rawSend(...)`, `onEvent(...)`, `setMode(...)`, `setModel(...)`, `setThoughtLevel(...)`, `setConfigOption(...)`, `getConfigOptions()`, `getModes()`, `respondPermission(...)`, `rawRespondPermission(...)`, and `onPermissionRequest(...)`.
- Cleanup is `sdk.dispose()`.
### React Component Methodology
- Shared React UI belongs in `sdks/react` only when it is reusable outside the Inspector.
- If the same UI pattern is shared between the Sandbox Agent Inspector and Foundry, prefer extracting it into `sdks/react` instead of maintaining parallel implementations.
- Keep shared components unstyled by default: behavior in the package, styling in the consumer via `className`, slot-level `classNames`, render overrides, and `data-*` hooks.
- Prefer extracting reusable pieces such as transcript, composer, and conversation surfaces. Keep Inspector-specific shells such as session selection, session headers, and control-plane actions in `frontend/packages/inspector/`.
- Document all shared React components in `docs/react-components.mdx`, and keep that page aligned with the exported surface in `sdks/react/src/index.ts`.
### TypeScript SDK Naming Conventions
- Use `respond<Thing>(id, reply)` for SDK methods that reply to an agent-initiated request (e.g. `respondPermission`). This is the standard pattern for answering any inbound JSON-RPC request from the agent.
- Prefix raw/low-level escape hatches with `raw` (e.g. `rawRespondPermission`, `rawSend`). These accept protocol-level types directly and bypass SDK abstractions.
### Docs Source Of Truth
- For TypeScript docs/examples, source of truth is implementation in:
- `sdks/typescript/src/client.ts`
- `sdks/typescript/src/index.ts`
- `sdks/acp-http-client/src/index.ts`
- Do not document TypeScript APIs unless they are exported and implemented in those files.
- For HTTP/CLI docs/examples, source of truth is: - For HTTP/CLI docs/examples, source of truth is:
- `server/packages/sandbox-agent/src/router.rs` - `server/packages/sandbox-agent/src/router.rs`
- `server/packages/sandbox-agent/src/cli.rs` - `server/packages/sandbox-agent/src/cli.rs`
- Keep docs aligned to implemented endpoints/commands only (for example ACP under `/v1/acp`, not legacy `/v1/sessions` APIs). - Keep docs aligned to implemented endpoints/commands only (for example ACP under `/v1/acp`, not legacy session REST APIs).
## ACP Protocol Compliance
- Before adding any new ACP method, property, or config option category to the SDK, verify it exists in the ACP spec at `https://agentclientprotocol.com/llms-full.txt`.
- Valid `SessionConfigOptionCategory` values are: `mode`, `model`, `thought_level`, `other`, or custom categories prefixed with `_` (e.g. `_permission_mode`).
- Do not invent ACP properties or categories (e.g. `permission_mode` is not a valid ACP category — use `_permission_mode` if it's a custom extension, or use existing ACP mechanisms like `session/set_mode`).
- `NewSessionRequest` only has `_meta`, `cwd`, and `mcpServers`. Do not add non-ACP fields to it.
- Sandbox Agent SDK abstractions (like `SessionCreateRequest`) may add convenience properties, but must clearly map to real ACP methods internally and not send fabricated fields over the wire.
## Source Documents
- ACP protocol specification (full LLM-readable reference): `https://agentclientprotocol.com/llms-full.txt`
- `~/misc/acp-docs/schema/schema.json`
- `~/misc/acp-docs/schema/meta.json`
- `research/acp/spec.md`
- `research/acp/v1-schema-to-acp-mapping.md`
- `research/acp/friction.md`
- `research/acp/todo.md`
## Change Tracking ## Change Tracking
@ -119,14 +32,20 @@
- Append blockers/decisions to `research/acp/friction.md` during ACP work. - Append blockers/decisions to `research/acp/friction.md` during ACP work.
- `docs/agent-capabilities.mdx` lists models/modes/thought levels per agent. Update it when adding a new agent or changing `fallback_config_options`. If its "Last updated" date is >2 weeks old, re-run `cd scripts/agent-configs && npx tsx dump.ts` and update the doc to match. Source data: `scripts/agent-configs/resources/*.json` and hardcoded entries in `server/packages/sandbox-agent/src/router/support.rs` (`fallback_config_options`). - `docs/agent-capabilities.mdx` lists models/modes/thought levels per agent. Update it when adding a new agent or changing `fallback_config_options`. If its "Last updated" date is >2 weeks old, re-run `cd scripts/agent-configs && npx tsx dump.ts` and update the doc to match. Source data: `scripts/agent-configs/resources/*.json` and hardcoded entries in `server/packages/sandbox-agent/src/router/support.rs` (`fallback_config_options`).
- Some agent models are gated by subscription (e.g. Claude `opus`). The live report only shows models available to the current credentials. The static doc and JSON resource files should list all known models regardless of subscription tier. - Some agent models are gated by subscription (e.g. Claude `opus`). The live report only shows models available to the current credentials. The static doc and JSON resource files should list all known models regardless of subscription tier.
- TypeScript SDK tests should run against a real running server/runtime over real `/v1` HTTP APIs, typically using the real `mock` agent for deterministic behavior.
- Do not use Vitest fetch/transport mocks to simulate server functionality in TypeScript SDK tests.
## Docker Examples (Dev Testing) ## Docker Test Image
- When manually testing bleeding-edge (unreleased) versions of sandbox-agent in `examples/`, use `SANDBOX_AGENT_DEV=1` with the Docker-based examples. - Docker-backed Rust and TypeScript tests build `docker/test-agent/Dockerfile` directly in-process and cache the image tag only in memory (`OnceLock` in Rust, module-level variable in TypeScript).
- This triggers a local build of `docker/runtime/Dockerfile.full` which builds the server binary from local source and packages it into the Docker image. - Do not add cross-process image-build scripts unless there is a concrete need for them.
- Example: `SANDBOX_AGENT_DEV=1 pnpm --filter @sandbox-agent/example-mcp start`
## Common Software Sync
- These three files must stay in sync:
- `docs/common-software.mdx` (user-facing documentation)
- `docker/test-common-software/Dockerfile` (packages installed in the test image)
- `server/packages/sandbox-agent/tests/common_software.rs` (test assertions)
- When adding or removing software from `docs/common-software.mdx`, also add/remove the corresponding `apt-get install` line in the Dockerfile and add/remove the test in `common_software.rs`.
- Run `cargo test -p sandbox-agent --test common_software` to verify.
## Install Version References ## Install Version References
@ -159,4 +78,3 @@
- `scripts/release/main.ts` - `scripts/release/main.ts`
- `scripts/release/promote-artifacts.ts` - `scripts/release/promote-artifacts.ts`
- `scripts/release/sdk.ts` - `scripts/release/sdk.ts`
- `scripts/sandbox-testing/test-sandbox.ts`

View file

@ -4,7 +4,7 @@ members = ["server/packages/*", "gigacode"]
exclude = ["factory/packages/desktop/src-tauri", "foundry/packages/desktop/src-tauri"] exclude = ["factory/packages/desktop/src-tauri", "foundry/packages/desktop/src-tauri"]
[workspace.package] [workspace.package]
version = "0.3.2" version = "0.4.2"
edition = "2021" edition = "2021"
authors = [ "Rivet Gaming, LLC <developer@rivet.gg>" ] authors = [ "Rivet Gaming, LLC <developer@rivet.gg>" ]
license = "Apache-2.0" license = "Apache-2.0"
@ -13,13 +13,13 @@ description = "Universal API for automatic coding agents in sandboxes. Supports
[workspace.dependencies] [workspace.dependencies]
# Internal crates # Internal crates
sandbox-agent = { version = "0.3.2", path = "server/packages/sandbox-agent" } sandbox-agent = { version = "0.4.2", path = "server/packages/sandbox-agent" }
sandbox-agent-error = { version = "0.3.2", path = "server/packages/error" } sandbox-agent-error = { version = "0.4.2", path = "server/packages/error" }
sandbox-agent-agent-management = { version = "0.3.2", path = "server/packages/agent-management" } sandbox-agent-agent-management = { version = "0.4.2", path = "server/packages/agent-management" }
sandbox-agent-agent-credentials = { version = "0.3.2", path = "server/packages/agent-credentials" } sandbox-agent-agent-credentials = { version = "0.4.2", path = "server/packages/agent-credentials" }
sandbox-agent-opencode-adapter = { version = "0.3.2", path = "server/packages/opencode-adapter" } sandbox-agent-opencode-adapter = { version = "0.4.2", path = "server/packages/opencode-adapter" }
sandbox-agent-opencode-server-manager = { version = "0.3.2", path = "server/packages/opencode-server-manager" } sandbox-agent-opencode-server-manager = { version = "0.4.2", path = "server/packages/opencode-server-manager" }
acp-http-adapter = { version = "0.3.2", path = "server/packages/acp-http-adapter" } acp-http-adapter = { version = "0.4.2", path = "server/packages/acp-http-adapter" }
# Serialization # Serialization
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View file

@ -80,11 +80,11 @@ Import the SDK directly into your Node or browser application. Full type safety
**Install** **Install**
```bash ```bash
npm install sandbox-agent@0.3.x npm install sandbox-agent@0.4.x
``` ```
```bash ```bash
bun add sandbox-agent@0.3.x bun add sandbox-agent@0.4.x
# Optional: allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). # Optional: allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()).
bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
``` ```
@ -135,7 +135,7 @@ Run as an HTTP server and connect from any language. Deploy to E2B, Daytona, Ver
```bash ```bash
# Install it # Install it
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
# Run it # Run it
sandbox-agent server --token "$SANDBOX_TOKEN" --host 127.0.0.1 --port 2468 sandbox-agent server --token "$SANDBOX_TOKEN" --host 127.0.0.1 --port 2468
``` ```
@ -159,12 +159,12 @@ sandbox-agent server --no-token --host 127.0.0.1 --port 2468
Install the CLI wrapper (optional but convenient): Install the CLI wrapper (optional but convenient):
```bash ```bash
npm install -g @sandbox-agent/cli@0.3.x npm install -g @sandbox-agent/cli@0.4.x
``` ```
```bash ```bash
# Allow Bun to run postinstall scripts for native binaries. # Allow Bun to run postinstall scripts for native binaries.
bun add -g @sandbox-agent/cli@0.3.x bun add -g @sandbox-agent/cli@0.4.x
bun pm -g trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 bun pm -g trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
``` ```
@ -179,11 +179,11 @@ sandbox-agent api sessions send-message-stream my-session --message "Hello" --en
You can also use npx like: You can also use npx like:
```bash ```bash
npx @sandbox-agent/cli@0.3.x --help npx @sandbox-agent/cli@0.4.x --help
``` ```
```bash ```bash
bunx @sandbox-agent/cli@0.3.x --help bunx @sandbox-agent/cli@0.4.x --help
``` ```
[CLI documentation](https://sandboxagent.dev/docs/cli) [CLI documentation](https://sandboxagent.dev/docs/cli)
@ -277,7 +277,7 @@ Coding agents expect interactive terminals with proper TTY handling. SSH with pi
- **Storage of sessions on disk**: Sessions are already stored by the respective coding agents on disk. It's assumed that the consumer is streaming data from this machine to an external storage, such as Postgres, ClickHouse, or Rivet. - **Storage of sessions on disk**: Sessions are already stored by the respective coding agents on disk. It's assumed that the consumer is streaming data from this machine to an external storage, such as Postgres, ClickHouse, or Rivet.
- **Direct LLM wrappers**: Use the [Vercel AI SDK](https://ai-sdk.dev/docs/introduction) if you want to implement your own agent from scratch. - **Direct LLM wrappers**: Use the [Vercel AI SDK](https://ai-sdk.dev/docs/introduction) if you want to implement your own agent from scratch.
- **Git Repo Management**: Just use git commands or the features provided by your sandbox provider of choice. - **Git Repo Management**: Just use git commands or the features provided by your sandbox provider of choice.
- **Sandbox Provider API**: Sandbox providers have many nuanced differences in their API, it does not make sense for us to try to provide a custom layer. Instead, we opt to provide guides that let you integrate this project with sandbox providers. - **Sandbox Provider API**: Sandbox providers have many nuanced differences in their API, it does not make sense for us to try to provide a custom layer. Instead, we opt to provide guides that let you integrate this repository with sandbox providers.
## Roadmap ## Roadmap

View file

@ -0,0 +1,7 @@
FROM node:22-bookworm-slim
RUN npm install -g pnpm@10.28.2
WORKDIR /app
CMD ["bash", "-lc", "pnpm install --filter @sandbox-agent/inspector... && cd frontend/packages/inspector && exec pnpm vite --host 0.0.0.0 --port 5173"]

View file

@ -10,7 +10,6 @@ COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/ COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/
COPY sdks/cli-shared/package.json ./sdks/cli-shared/ COPY sdks/cli-shared/package.json ./sdks/cli-shared/
COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/ COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/
COPY sdks/persist-indexeddb/package.json ./sdks/persist-indexeddb/
COPY sdks/react/package.json ./sdks/react/ COPY sdks/react/package.json ./sdks/react/
COPY sdks/typescript/package.json ./sdks/typescript/ COPY sdks/typescript/package.json ./sdks/typescript/
@ -21,15 +20,13 @@ RUN pnpm install --filter @sandbox-agent/inspector...
COPY docs/openapi.json ./docs/ COPY docs/openapi.json ./docs/
COPY sdks/cli-shared ./sdks/cli-shared COPY sdks/cli-shared ./sdks/cli-shared
COPY sdks/acp-http-client ./sdks/acp-http-client COPY sdks/acp-http-client ./sdks/acp-http-client
COPY sdks/persist-indexeddb ./sdks/persist-indexeddb
COPY sdks/react ./sdks/react COPY sdks/react ./sdks/react
COPY sdks/typescript ./sdks/typescript COPY sdks/typescript ./sdks/typescript
# Build cli-shared, acp-http-client, SDK, then persist-indexeddb and react (depends on SDK) # Build cli-shared, acp-http-client, SDK, then react (depends on SDK)
RUN cd sdks/cli-shared && pnpm exec tsup RUN cd sdks/cli-shared && pnpm exec tsup
RUN cd sdks/acp-http-client && pnpm exec tsup RUN cd sdks/acp-http-client && pnpm exec tsup
RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup
RUN cd sdks/persist-indexeddb && pnpm exec tsup
RUN cd sdks/react && pnpm exec tsup RUN cd sdks/react && pnpm exec tsup
# Copy inspector source and build # Copy inspector source and build

View file

@ -10,7 +10,6 @@ COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/ COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/
COPY sdks/cli-shared/package.json ./sdks/cli-shared/ COPY sdks/cli-shared/package.json ./sdks/cli-shared/
COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/ COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/
COPY sdks/persist-indexeddb/package.json ./sdks/persist-indexeddb/
COPY sdks/react/package.json ./sdks/react/ COPY sdks/react/package.json ./sdks/react/
COPY sdks/typescript/package.json ./sdks/typescript/ COPY sdks/typescript/package.json ./sdks/typescript/
@ -21,15 +20,13 @@ RUN pnpm install --filter @sandbox-agent/inspector...
COPY docs/openapi.json ./docs/ COPY docs/openapi.json ./docs/
COPY sdks/cli-shared ./sdks/cli-shared COPY sdks/cli-shared ./sdks/cli-shared
COPY sdks/acp-http-client ./sdks/acp-http-client COPY sdks/acp-http-client ./sdks/acp-http-client
COPY sdks/persist-indexeddb ./sdks/persist-indexeddb
COPY sdks/react ./sdks/react COPY sdks/react ./sdks/react
COPY sdks/typescript ./sdks/typescript COPY sdks/typescript ./sdks/typescript
# Build cli-shared, acp-http-client, SDK, then persist-indexeddb and react (depends on SDK) # Build cli-shared, acp-http-client, SDK, then react (depends on SDK)
RUN cd sdks/cli-shared && pnpm exec tsup RUN cd sdks/cli-shared && pnpm exec tsup
RUN cd sdks/acp-http-client && pnpm exec tsup RUN cd sdks/acp-http-client && pnpm exec tsup
RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup
RUN cd sdks/persist-indexeddb && pnpm exec tsup
RUN cd sdks/react && pnpm exec tsup RUN cd sdks/react && pnpm exec tsup
# Copy inspector source and build # Copy inspector source and build

View file

@ -10,7 +10,6 @@ COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/ COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/
COPY sdks/cli-shared/package.json ./sdks/cli-shared/ COPY sdks/cli-shared/package.json ./sdks/cli-shared/
COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/ COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/
COPY sdks/persist-indexeddb/package.json ./sdks/persist-indexeddb/
COPY sdks/react/package.json ./sdks/react/ COPY sdks/react/package.json ./sdks/react/
COPY sdks/typescript/package.json ./sdks/typescript/ COPY sdks/typescript/package.json ./sdks/typescript/
@ -21,15 +20,13 @@ RUN pnpm install --filter @sandbox-agent/inspector...
COPY docs/openapi.json ./docs/ COPY docs/openapi.json ./docs/
COPY sdks/cli-shared ./sdks/cli-shared COPY sdks/cli-shared ./sdks/cli-shared
COPY sdks/acp-http-client ./sdks/acp-http-client COPY sdks/acp-http-client ./sdks/acp-http-client
COPY sdks/persist-indexeddb ./sdks/persist-indexeddb
COPY sdks/react ./sdks/react COPY sdks/react ./sdks/react
COPY sdks/typescript ./sdks/typescript COPY sdks/typescript ./sdks/typescript
# Build cli-shared, acp-http-client, SDK, then persist-indexeddb and react (depends on SDK) # Build cli-shared, acp-http-client, SDK, then react (depends on SDK)
RUN cd sdks/cli-shared && pnpm exec tsup RUN cd sdks/cli-shared && pnpm exec tsup
RUN cd sdks/acp-http-client && pnpm exec tsup RUN cd sdks/acp-http-client && pnpm exec tsup
RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup
RUN cd sdks/persist-indexeddb && pnpm exec tsup
RUN cd sdks/react && pnpm exec tsup RUN cd sdks/react && pnpm exec tsup
# Copy inspector source and build # Copy inspector source and build

View file

@ -10,7 +10,6 @@ COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/ COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/
COPY sdks/cli-shared/package.json ./sdks/cli-shared/ COPY sdks/cli-shared/package.json ./sdks/cli-shared/
COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/ COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/
COPY sdks/persist-indexeddb/package.json ./sdks/persist-indexeddb/
COPY sdks/react/package.json ./sdks/react/ COPY sdks/react/package.json ./sdks/react/
COPY sdks/typescript/package.json ./sdks/typescript/ COPY sdks/typescript/package.json ./sdks/typescript/
@ -21,15 +20,13 @@ RUN pnpm install --filter @sandbox-agent/inspector...
COPY docs/openapi.json ./docs/ COPY docs/openapi.json ./docs/
COPY sdks/cli-shared ./sdks/cli-shared COPY sdks/cli-shared ./sdks/cli-shared
COPY sdks/acp-http-client ./sdks/acp-http-client COPY sdks/acp-http-client ./sdks/acp-http-client
COPY sdks/persist-indexeddb ./sdks/persist-indexeddb
COPY sdks/react ./sdks/react COPY sdks/react ./sdks/react
COPY sdks/typescript ./sdks/typescript COPY sdks/typescript ./sdks/typescript
# Build cli-shared, acp-http-client, SDK, then persist-indexeddb and react (depends on SDK) # Build cli-shared, acp-http-client, SDK, then react (depends on SDK)
RUN cd sdks/cli-shared && pnpm exec tsup RUN cd sdks/cli-shared && pnpm exec tsup
RUN cd sdks/acp-http-client && pnpm exec tsup RUN cd sdks/acp-http-client && pnpm exec tsup
RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup
RUN cd sdks/persist-indexeddb && pnpm exec tsup
RUN cd sdks/react && pnpm exec tsup RUN cd sdks/react && pnpm exec tsup
# Copy inspector source and build # Copy inspector source and build

View file

@ -10,7 +10,6 @@ COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/ COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/
COPY sdks/cli-shared/package.json ./sdks/cli-shared/ COPY sdks/cli-shared/package.json ./sdks/cli-shared/
COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/ COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/
COPY sdks/persist-indexeddb/package.json ./sdks/persist-indexeddb/
COPY sdks/react/package.json ./sdks/react/ COPY sdks/react/package.json ./sdks/react/
COPY sdks/typescript/package.json ./sdks/typescript/ COPY sdks/typescript/package.json ./sdks/typescript/
@ -21,15 +20,13 @@ RUN pnpm install --filter @sandbox-agent/inspector...
COPY docs/openapi.json ./docs/ COPY docs/openapi.json ./docs/
COPY sdks/cli-shared ./sdks/cli-shared COPY sdks/cli-shared ./sdks/cli-shared
COPY sdks/acp-http-client ./sdks/acp-http-client COPY sdks/acp-http-client ./sdks/acp-http-client
COPY sdks/persist-indexeddb ./sdks/persist-indexeddb
COPY sdks/react ./sdks/react COPY sdks/react ./sdks/react
COPY sdks/typescript ./sdks/typescript COPY sdks/typescript ./sdks/typescript
# Build cli-shared, acp-http-client, SDK, then persist-indexeddb and react (depends on SDK) # Build cli-shared, acp-http-client, SDK, then react (depends on SDK)
RUN cd sdks/cli-shared && pnpm exec tsup RUN cd sdks/cli-shared && pnpm exec tsup
RUN cd sdks/acp-http-client && pnpm exec tsup RUN cd sdks/acp-http-client && pnpm exec tsup
RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup
RUN cd sdks/persist-indexeddb && pnpm exec tsup
RUN cd sdks/react && pnpm exec tsup RUN cd sdks/react && pnpm exec tsup
# Copy inspector source and build # Copy inspector source and build

View file

@ -12,7 +12,6 @@ COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/ COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/
COPY sdks/cli-shared/package.json ./sdks/cli-shared/ COPY sdks/cli-shared/package.json ./sdks/cli-shared/
COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/ COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/
COPY sdks/persist-indexeddb/package.json ./sdks/persist-indexeddb/
COPY sdks/react/package.json ./sdks/react/ COPY sdks/react/package.json ./sdks/react/
COPY sdks/typescript/package.json ./sdks/typescript/ COPY sdks/typescript/package.json ./sdks/typescript/
@ -23,7 +22,6 @@ RUN pnpm install --filter @sandbox-agent/inspector...
COPY docs/openapi.json ./docs/ COPY docs/openapi.json ./docs/
COPY sdks/cli-shared ./sdks/cli-shared COPY sdks/cli-shared ./sdks/cli-shared
COPY sdks/acp-http-client ./sdks/acp-http-client COPY sdks/acp-http-client ./sdks/acp-http-client
COPY sdks/persist-indexeddb ./sdks/persist-indexeddb
COPY sdks/react ./sdks/react COPY sdks/react ./sdks/react
COPY sdks/typescript ./sdks/typescript COPY sdks/typescript ./sdks/typescript
@ -31,7 +29,6 @@ COPY sdks/typescript ./sdks/typescript
RUN cd sdks/cli-shared && pnpm exec tsup RUN cd sdks/cli-shared && pnpm exec tsup
RUN cd sdks/acp-http-client && pnpm exec tsup RUN cd sdks/acp-http-client && pnpm exec tsup
RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup
RUN cd sdks/persist-indexeddb && pnpm exec tsup
RUN cd sdks/react && pnpm exec tsup RUN cd sdks/react && pnpm exec tsup
# Copy inspector source and build # Copy inspector source and build
@ -152,7 +149,8 @@ FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
ca-certificates \ ca-certificates \
curl \ curl \
git && \ git \
ffmpeg && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Copy the binary from builder # Copy the binary from builder

View file

@ -11,7 +11,6 @@ COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/ COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/
COPY sdks/cli-shared/package.json ./sdks/cli-shared/ COPY sdks/cli-shared/package.json ./sdks/cli-shared/
COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/ COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/
COPY sdks/persist-indexeddb/package.json ./sdks/persist-indexeddb/
COPY sdks/react/package.json ./sdks/react/ COPY sdks/react/package.json ./sdks/react/
COPY sdks/typescript/package.json ./sdks/typescript/ COPY sdks/typescript/package.json ./sdks/typescript/
@ -20,14 +19,12 @@ RUN pnpm install --filter @sandbox-agent/inspector...
COPY docs/openapi.json ./docs/ COPY docs/openapi.json ./docs/
COPY sdks/cli-shared ./sdks/cli-shared COPY sdks/cli-shared ./sdks/cli-shared
COPY sdks/acp-http-client ./sdks/acp-http-client COPY sdks/acp-http-client ./sdks/acp-http-client
COPY sdks/persist-indexeddb ./sdks/persist-indexeddb
COPY sdks/react ./sdks/react COPY sdks/react ./sdks/react
COPY sdks/typescript ./sdks/typescript COPY sdks/typescript ./sdks/typescript
RUN cd sdks/cli-shared && pnpm exec tsup RUN cd sdks/cli-shared && pnpm exec tsup
RUN cd sdks/acp-http-client && pnpm exec tsup RUN cd sdks/acp-http-client && pnpm exec tsup
RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup
RUN cd sdks/persist-indexeddb && pnpm exec tsup
RUN cd sdks/react && pnpm exec tsup RUN cd sdks/react && pnpm exec tsup
COPY frontend/packages/inspector ./frontend/packages/inspector COPY frontend/packages/inspector ./frontend/packages/inspector

View file

@ -0,0 +1,61 @@
FROM rust:1.88.0-bookworm AS builder
WORKDIR /build
COPY Cargo.toml Cargo.lock ./
COPY server/ ./server/
COPY gigacode/ ./gigacode/
COPY resources/agent-schemas/artifacts/ ./resources/agent-schemas/artifacts/
COPY scripts/agent-configs/ ./scripts/agent-configs/
COPY scripts/audit-acp-deps/ ./scripts/audit-acp-deps/
ENV SANDBOX_AGENT_SKIP_INSPECTOR=1
RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \
--mount=type=cache,target=/build/target \
cargo build -p sandbox-agent --release && \
cp target/release/sandbox-agent /sandbox-agent
# Extract neko binary from the official image for WebRTC desktop streaming.
# Using neko v3 base image from GHCR which provides multi-arch support (amd64, arm64).
# Pinned by digest to prevent breaking changes from upstream.
# Reference client: https://github.com/demodesk/neko-client/blob/37f93eae6bd55b333c94bd009d7f2b079075a026/src/component/internal/webrtc.ts
FROM ghcr.io/m1k1o/neko/base@sha256:0c384afa56268aaa2d5570211d284763d0840dcdd1a7d9a24be3081d94d3dfce AS neko-base
FROM node:22-bookworm-slim
RUN apt-get update -qq && \
apt-get install -y -qq --no-install-recommends \
ca-certificates \
bash \
libstdc++6 \
xvfb \
openbox \
xdotool \
imagemagick \
ffmpeg \
gstreamer1.0-tools \
gstreamer1.0-plugins-base \
gstreamer1.0-plugins-good \
gstreamer1.0-plugins-bad \
gstreamer1.0-plugins-ugly \
gstreamer1.0-nice \
gstreamer1.0-x \
gstreamer1.0-pulseaudio \
libxcvt0 \
x11-xserver-utils \
dbus-x11 \
xauth \
fonts-dejavu-core \
xterm \
> /dev/null 2>&1 && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder /sandbox-agent /usr/local/bin/sandbox-agent
COPY --from=neko-base /usr/bin/neko /usr/local/bin/neko
EXPOSE 3000
# Expose UDP port range for WebRTC media transport
EXPOSE 59050-59070/udp
ENTRYPOINT ["/usr/local/bin/sandbox-agent"]
CMD ["server", "--host", "0.0.0.0", "--port", "3000", "--no-token"]

View file

@ -0,0 +1,37 @@
# Extends the base test-agent image with common software pre-installed.
# Used by the common_software integration test to verify that all documented
# software in docs/common-software.mdx works correctly inside the sandbox.
#
# KEEP IN SYNC with docs/common-software.mdx
ARG BASE_IMAGE=sandbox-agent-test:dev
FROM ${BASE_IMAGE}
USER root
RUN apt-get update -qq && \
apt-get install -y -qq --no-install-recommends \
# Browsers
chromium \
firefox-esr \
# Languages
python3 python3-pip python3-venv \
default-jdk \
ruby-full \
# Databases
sqlite3 \
redis-server \
# Build tools
build-essential cmake pkg-config \
# CLI tools
git jq tmux \
# Media and graphics
imagemagick \
poppler-utils \
# Desktop apps
gimp \
> /dev/null 2>&1 && \
rm -rf /var/lib/apt/lists/*
ENTRYPOINT ["/usr/local/bin/sandbox-agent"]
CMD ["server", "--host", "0.0.0.0", "--port", "3000", "--no-token"]

View file

@ -1,127 +0,0 @@
---
title: "Agent Capabilities"
description: "Models, modes, and thought levels supported by each agent."
---
Capabilities are subject to change as the agents are updated. See [Agent Sessions](/agent-sessions) for full session configuration API details.
<Info>
_Last updated: March 5th, 2026. See [Generating a live report](#generating-a-live-report) for up-to-date reference._
</Info>
## Claude
| Category | Values |
|----------|--------|
| **Models** | `default`, `sonnet`, `opus`, `haiku` |
| **Modes** | `default`, `acceptEdits`, `plan`, `dontAsk`, `bypassPermissions` |
| **Thought levels** | Unsupported |
### Configuring Effort Level For Claude
Claude does not natively support changing effort level after a session starts, so configure it in the filesystem before creating the session.
```ts
import { mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
import { SandboxAgent } from "sandbox-agent";
const cwd = "/path/to/workspace";
await mkdir(path.join(cwd, ".claude"), { recursive: true });
await writeFile(
path.join(cwd, ".claude", "settings.json"),
JSON.stringify({ effortLevel: "high" }, null, 2),
);
const sdk = await SandboxAgent.connect({ baseUrl: "http://127.0.0.1:2468" });
await sdk.createSession({
agent: "claude",
sessionInit: { cwd, mcpServers: [] },
});
```
<Accordion title="Supported file locations (highest precedence last)">
1. `~/.claude/settings.json`
2. `<session cwd>/.claude/settings.json`
3. `<session cwd>/.claude/settings.local.json`
</Accordion>
## Codex
| Category | Values |
|----------|--------|
| **Models** | `gpt-5.3-codex` (default), `gpt-5.3-codex-spark`, `gpt-5.2-codex`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1-codex-mini` |
| **Modes** | `read-only` (default), `auto`, `full-access` |
| **Thought levels** | `low`, `medium`, `high` (default), `xhigh` |
## OpenCode
| Category | Values |
|----------|--------|
| **Models** | See below |
| **Modes** | `build` (default), `plan` |
| **Thought levels** | Unsupported |
<Accordion title="See all models">
| Provider | Models |
|----------|--------|
| **Anthropic** | `anthropic/claude-3-5-haiku-20241022`, `anthropic/claude-3-5-haiku-latest`, `anthropic/claude-3-5-sonnet-20240620`, `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-3-7-sonnet-20250219`, `anthropic/claude-3-7-sonnet-latest`, `anthropic/claude-3-haiku-20240307`, `anthropic/claude-3-opus-20240229`, `anthropic/claude-3-sonnet-20240229`, `anthropic/claude-haiku-4-5`, `anthropic/claude-haiku-4-5-20251001`, `anthropic/claude-opus-4-0`, `anthropic/claude-opus-4-1`, `anthropic/claude-opus-4-1-20250805`, `anthropic/claude-opus-4-20250514`, `anthropic/claude-opus-4-5`, `anthropic/claude-opus-4-5-20251101`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-0`, `anthropic/claude-sonnet-4-20250514`, `anthropic/claude-sonnet-4-5`, `anthropic/claude-sonnet-4-5-20250929` |
| **OpenAI** | `openai/gpt-5.1-codex`, `openai/gpt-5.1-codex-max`, `openai/gpt-5.1-codex-mini`, `openai/gpt-5.2`, `openai/gpt-5.2-codex`, `openai/gpt-5.3-codex` |
| **Cerebras** | `cerebras/gpt-oss-120b`, `cerebras/qwen-3-235b-a22b-instruct-2507`, `cerebras/zai-glm-4.7` |
| **OpenCode Zen** | `opencode/big-pickle`, `opencode/claude-3-5-haiku`, `opencode/claude-haiku-4-5`, `opencode/claude-opus-4-1`, `opencode/claude-opus-4-5`, `opencode/claude-opus-4-6`, `opencode/claude-sonnet-4`, `opencode/claude-sonnet-4-5`, `opencode/gemini-3-flash`, `opencode/gemini-3-pro` (default), `opencode/glm-4.6`, `opencode/glm-4.7`, `opencode/gpt-5`, `opencode/gpt-5-codex`, `opencode/gpt-5-nano`, `opencode/gpt-5.1`, `opencode/gpt-5.1-codex`, `opencode/gpt-5.1-codex-max`, `opencode/gpt-5.1-codex-mini`, `opencode/gpt-5.2`, `opencode/gpt-5.2-codex`, `opencode/kimi-k2`, `opencode/kimi-k2-thinking`, `opencode/kimi-k2.5`, `opencode/kimi-k2.5-free`, `opencode/minimax-m2.1`, `opencode/minimax-m2.1-free`, `opencode/trinity-large-preview-free` |
</Accordion>
## Cursor
| Category | Values |
|----------|--------|
| **Models** | See below |
| **Modes** | Unsupported |
| **Thought levels** | Unsupported |
<Accordion title="See all models">
| Group | Models |
|-------|--------|
| **Auto** | `auto` |
| **Composer** | `composer-1.5`, `composer-1` |
| **GPT-5.3 Codex** | `gpt-5.3-codex`, `gpt-5.3-codex-low`, `gpt-5.3-codex-high`, `gpt-5.3-codex-xhigh`, `gpt-5.3-codex-fast`, `gpt-5.3-codex-low-fast`, `gpt-5.3-codex-high-fast`, `gpt-5.3-codex-xhigh-fast` |
| **GPT-5.2** | `gpt-5.2`, `gpt-5.2-high`, `gpt-5.2-codex`, `gpt-5.2-codex-low`, `gpt-5.2-codex-high`, `gpt-5.2-codex-xhigh`, `gpt-5.2-codex-fast`, `gpt-5.2-codex-low-fast`, `gpt-5.2-codex-high-fast`, `gpt-5.2-codex-xhigh-fast` |
| **GPT-5.1** | `gpt-5.1-high`, `gpt-5.1-codex-max`, `gpt-5.1-codex-max-high` |
| **Claude** | `opus-4.6-thinking` (default), `opus-4.6`, `opus-4.5`, `opus-4.5-thinking`, `sonnet-4.5`, `sonnet-4.5-thinking` |
| **Other** | `gemini-3-pro`, `gemini-3-flash`, `grok` |
</Accordion>
## Amp
| Category | Values |
|----------|--------|
| **Models** | `amp-default` |
| **Modes** | `default`, `bypass` |
| **Thought levels** | Unsupported |
## Pi
| Category | Values |
|----------|--------|
| **Models** | `default` |
| **Modes** | Unsupported |
| **Thought levels** | Unsupported |
## Generating a live report
Requires a running Sandbox Agent server. `--endpoint` defaults to `http://127.0.0.1:2468`.
```bash
sandbox-agent api agents report
```
<Note>
The live report reflects what the agent adapter returns for the current credentials. Some models may be gated by subscription (e.g. Claude's `opus` requires a paid plan) and will not appear in the report if the credentials don't have access.
</Note>

View file

@ -21,10 +21,7 @@ const sdk = await SandboxAgent.connect({
const session = await sdk.createSession({ const session = await sdk.createSession({
agent: "codex", agent: "codex",
sessionInit: { cwd: "/",
cwd: "/",
mcpServers: [],
},
}); });
console.log(session.id, session.agentSessionId); console.log(session.id, session.agentSessionId);
@ -54,6 +51,108 @@ await session.prompt([
unsubscribe(); unsubscribe();
``` ```
### Event types
Each event's `payload` contains a session update. The `sessionUpdate` field identifies the type.
<AccordionGroup>
<Accordion title="agent_message_chunk">
Streamed text or content from the agent's response.
```json
{
"sessionUpdate": "agent_message_chunk",
"content": { "type": "text", "text": "Here's how the repository is structured..." }
}
```
</Accordion>
<Accordion title="agent_thought_chunk">
Internal reasoning from the agent (chain-of-thought / extended thinking).
```json
{
"sessionUpdate": "agent_thought_chunk",
"content": { "type": "text", "text": "I should start by looking at the project structure..." }
}
```
</Accordion>
<Accordion title="user_message_chunk">
Echo of the user's prompt being processed.
```json
{
"sessionUpdate": "user_message_chunk",
"content": { "type": "text", "text": "Summarize the repository structure." }
}
```
</Accordion>
<Accordion title="tool_call">
The agent invoked a tool (file edit, terminal command, etc.).
```json
{
"sessionUpdate": "tool_call",
"toolCallId": "tc_abc123",
"title": "Read file",
"status": "in_progress",
"rawInput": { "path": "/src/index.ts" }
}
```
</Accordion>
<Accordion title="tool_call_update">
Progress or result update for an in-progress tool call.
```json
{
"sessionUpdate": "tool_call_update",
"toolCallId": "tc_abc123",
"status": "completed",
"content": [{ "type": "text", "text": "import express from 'express';\n..." }]
}
```
</Accordion>
<Accordion title="plan">
The agent's execution plan for the current task.
```json
{
"sessionUpdate": "plan",
"entries": [
{ "content": "Read the project structure", "status": "completed" },
{ "content": "Identify main entrypoints", "status": "in_progress" },
{ "content": "Write summary", "status": "pending" }
]
}
```
</Accordion>
<Accordion title="usage_update">
Token usage metrics for the current turn.
```json
{
"sessionUpdate": "usage_update"
}
```
</Accordion>
<Accordion title="session_info_update">
Session metadata changed (e.g. agent-generated title).
```json
{
"sessionUpdate": "session_info_update",
"title": "Repository structure analysis"
}
```
</Accordion>
</AccordionGroup>
## Fetch persisted event history ## Fetch persisted event history
```ts ```ts

20
docs/agents/amp.mdx Normal file
View file

@ -0,0 +1,20 @@
---
title: "Amp"
description: "Use Amp as a sandbox agent."
---
## Usage
```typescript
const session = await client.createSession({
agent: "amp",
});
```
## Capabilities
| Category | Values |
|----------|--------|
| **Models** | `amp-default` |
| **Modes** | `default`, `bypass` |
| **Thought levels** | Unsupported |

49
docs/agents/claude.mdx Normal file
View file

@ -0,0 +1,49 @@
---
title: "Claude"
description: "Use Claude Code as a sandbox agent."
---
## Usage
```typescript
const session = await client.createSession({
agent: "claude",
});
```
## Capabilities
| Category | Values |
|----------|--------|
| **Models** | `default`, `sonnet`, `opus`, `haiku` |
| **Modes** | `default`, `acceptEdits`, `plan`, `dontAsk`, `bypassPermissions` |
| **Thought levels** | Unsupported |
## Configuring effort level
Claude does not support changing effort level after a session starts. Configure it in the filesystem before creating the session.
```ts
import { mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
const cwd = "/path/to/workspace";
await mkdir(path.join(cwd, ".claude"), { recursive: true });
await writeFile(
path.join(cwd, ".claude", "settings.json"),
JSON.stringify({ effortLevel: "high" }, null, 2),
);
const session = await client.createSession({
agent: "claude",
cwd,
});
```
<Accordion title="Supported settings file locations (highest precedence last)">
1. `~/.claude/settings.json`
2. `<session cwd>/.claude/settings.json`
3. `<session cwd>/.claude/settings.local.json`
</Accordion>

20
docs/agents/codex.mdx Normal file
View file

@ -0,0 +1,20 @@
---
title: "Codex"
description: "Use OpenAI Codex as a sandbox agent."
---
## Usage
```typescript
const session = await client.createSession({
agent: "codex",
});
```
## Capabilities
| Category | Values |
|----------|--------|
| **Models** | `gpt-5.3-codex` (default), `gpt-5.3-codex-spark`, `gpt-5.2-codex`, `gpt-5.1-codex-max`, `gpt-5.2`, `gpt-5.1-codex-mini` |
| **Modes** | `read-only` (default), `auto`, `full-access` |
| **Thought levels** | `low`, `medium`, `high` (default), `xhigh` |

34
docs/agents/cursor.mdx Normal file
View file

@ -0,0 +1,34 @@
---
title: "Cursor"
description: "Use Cursor as a sandbox agent."
---
## Usage
```typescript
const session = await client.createSession({
agent: "cursor",
});
```
## Capabilities
| Category | Values |
|----------|--------|
| **Models** | See below |
| **Modes** | Unsupported |
| **Thought levels** | Unsupported |
<Accordion title="All models">
| Group | Models |
|-------|--------|
| **Auto** | `auto` |
| **Composer** | `composer-1.5`, `composer-1` |
| **GPT-5.3 Codex** | `gpt-5.3-codex`, `gpt-5.3-codex-low`, `gpt-5.3-codex-high`, `gpt-5.3-codex-xhigh`, `gpt-5.3-codex-fast`, `gpt-5.3-codex-low-fast`, `gpt-5.3-codex-high-fast`, `gpt-5.3-codex-xhigh-fast` |
| **GPT-5.2** | `gpt-5.2`, `gpt-5.2-high`, `gpt-5.2-codex`, `gpt-5.2-codex-low`, `gpt-5.2-codex-high`, `gpt-5.2-codex-xhigh`, `gpt-5.2-codex-fast`, `gpt-5.2-codex-low-fast`, `gpt-5.2-codex-high-fast`, `gpt-5.2-codex-xhigh-fast` |
| **GPT-5.1** | `gpt-5.1-high`, `gpt-5.1-codex-max`, `gpt-5.1-codex-max-high` |
| **Claude** | `opus-4.6-thinking` (default), `opus-4.6`, `opus-4.5`, `opus-4.5-thinking`, `sonnet-4.5`, `sonnet-4.5-thinking` |
| **Other** | `gemini-3-pro`, `gemini-3-flash`, `grok` |
</Accordion>

31
docs/agents/opencode.mdx Normal file
View file

@ -0,0 +1,31 @@
---
title: "OpenCode"
description: "Use OpenCode as a sandbox agent."
---
## Usage
```typescript
const session = await client.createSession({
agent: "opencode",
});
```
## Capabilities
| Category | Values |
|----------|--------|
| **Models** | See below |
| **Modes** | `build` (default), `plan` |
| **Thought levels** | Unsupported |
<Accordion title="All models">
| Provider | Models |
|----------|--------|
| **Anthropic** | `anthropic/claude-3-5-haiku-20241022`, `anthropic/claude-3-5-haiku-latest`, `anthropic/claude-3-5-sonnet-20240620`, `anthropic/claude-3-5-sonnet-20241022`, `anthropic/claude-3-7-sonnet-20250219`, `anthropic/claude-3-7-sonnet-latest`, `anthropic/claude-3-haiku-20240307`, `anthropic/claude-3-opus-20240229`, `anthropic/claude-3-sonnet-20240229`, `anthropic/claude-haiku-4-5`, `anthropic/claude-haiku-4-5-20251001`, `anthropic/claude-opus-4-0`, `anthropic/claude-opus-4-1`, `anthropic/claude-opus-4-1-20250805`, `anthropic/claude-opus-4-20250514`, `anthropic/claude-opus-4-5`, `anthropic/claude-opus-4-5-20251101`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-0`, `anthropic/claude-sonnet-4-20250514`, `anthropic/claude-sonnet-4-5`, `anthropic/claude-sonnet-4-5-20250929` |
| **OpenAI** | `openai/gpt-5.1-codex`, `openai/gpt-5.1-codex-max`, `openai/gpt-5.1-codex-mini`, `openai/gpt-5.2`, `openai/gpt-5.2-codex`, `openai/gpt-5.3-codex` |
| **Cerebras** | `cerebras/gpt-oss-120b`, `cerebras/qwen-3-235b-a22b-instruct-2507`, `cerebras/zai-glm-4.7` |
| **OpenCode Zen** | `opencode/big-pickle`, `opencode/claude-3-5-haiku`, `opencode/claude-haiku-4-5`, `opencode/claude-opus-4-1`, `opencode/claude-opus-4-5`, `opencode/claude-opus-4-6`, `opencode/claude-sonnet-4`, `opencode/claude-sonnet-4-5`, `opencode/gemini-3-flash`, `opencode/gemini-3-pro` (default), `opencode/glm-4.6`, `opencode/glm-4.7`, `opencode/gpt-5`, `opencode/gpt-5-codex`, `opencode/gpt-5-nano`, `opencode/gpt-5.1`, `opencode/gpt-5.1-codex`, `opencode/gpt-5.1-codex-max`, `opencode/gpt-5.1-codex-mini`, `opencode/gpt-5.2`, `opencode/gpt-5.2-codex`, `opencode/kimi-k2`, `opencode/kimi-k2-thinking`, `opencode/kimi-k2.5`, `opencode/kimi-k2.5-free`, `opencode/minimax-m2.1`, `opencode/minimax-m2.1-free`, `opencode/trinity-large-preview-free` |
</Accordion>

20
docs/agents/pi.mdx Normal file
View file

@ -0,0 +1,20 @@
---
title: "Pi"
description: "Use Pi as a sandbox agent."
---
## Usage
```typescript
const session = await client.createSession({
agent: "pi",
});
```
## Capabilities
| Category | Values |
|----------|--------|
| **Models** | `default` |
| **Modes** | Unsupported |
| **Thought levels** | Unsupported |

View file

@ -1,64 +1,63 @@
--- ---
title: "Architecture" title: "Architecture"
description: "How the client, sandbox, server, and agent fit together." description: "How the Sandbox Agent server, SDK, and agent processes fit together."
icon: "microchip"
--- ---
Sandbox Agent runs as an HTTP server inside your sandbox. Your app talks to it remotely. Sandbox Agent is a lightweight HTTP server that runs **inside** a sandbox. It:
- **Agent management**: Installs, spawns, and stops coding agent processes
- **Sessions**: Routes prompts to agents and streams events back in real time
- **Sandbox APIs**: Filesystem, process, and terminal access for the sandbox environment
## Components ## Components
- `Your client`: your app code using the `sandbox-agent` SDK. ```mermaid
- `Sandbox`: isolated runtime (E2B, Daytona, Docker, etc.). flowchart LR
- `Sandbox Agent server`: process inside the sandbox exposing HTTP transport. CLIENT["Your App"]
- `Agent`: Claude/Codex/OpenCode/Amp process managed by Sandbox Agent.
```mermaid placement="top-right"
flowchart LR
CLIENT["Sandbox Agent SDK"]
SERVER["Sandbox Agent server"]
AGENT["Agent process"]
subgraph SANDBOX["Sandbox"] subgraph SANDBOX["Sandbox"]
direction TB direction TB
SERVER --> AGENT SERVER["Sandbox Agent Server"]
AGENT["Agent Process<br/>(Claude, Codex, etc.)"]
SERVER --> AGENT
end end
CLIENT -->|HTTP| SERVER CLIENT -->|"SDK (HTTP)"| SERVER
``` ```
## Suggested Topology - **Your app**: Uses the `sandbox-agent` TypeScript SDK to talk to the server over HTTP.
- **Sandbox**: An isolated runtime (local process, Docker, E2B, Daytona, Vercel, Cloudflare).
- **Sandbox Agent server**: A single binary inside the sandbox that manages agent lifecycles, routes prompts, streams events, and exposes filesystem/process/terminal APIs.
- **Agent process**: A coding agent (Claude Code, Codex, etc.) spawned by the server. Each session maps to one agent process.
Run the SDK on your backend, then call it from your frontend. ## What `SandboxAgent.start()` does
This extra hop is recommended because it keeps auth/token logic on the backend and makes persistence simpler. 1. **Provision**: The provider creates a sandbox (starts a container, creates a VM, etc.)
2. **Install**: The Sandbox Agent binary is installed inside the sandbox
3. **Boot**: The server starts listening on an HTTP port
4. **Health check**: The SDK waits for `/v1/health` to respond
5. **Ready**: The SDK returns a connected client
```mermaid placement="top-right" For the `local` provider, provisioning is a no-op and the server runs as a local subprocess.
flowchart LR
BROWSER["Browser"]
subgraph BACKEND["Your backend"]
direction TB
SDK["Sandbox Agent SDK"]
end
subgraph SANDBOX_SIMPLE["Sandbox"]
SERVER_SIMPLE["Sandbox Agent server"]
end
BROWSER --> BACKEND ### Server recovery
BACKEND --> SDK --> SERVER_SIMPLE
If the server process stops, the SDK automatically calls the provider's `ensureServer()` after 3 consecutive health-check failures. Most built-in providers implement this. Custom providers can add `ensureServer(sandboxId)` to their `SandboxProvider` object.
## Server HTTP API
See the [HTTP API reference](/api-reference) for the full list of server endpoints.
## Agent installation
Agents are installed lazily on first use. To avoid the cold-start delay, pre-install them:
```bash
sandbox-agent install-agent --all
``` ```
### Backend requirements The `rivetdev/sandbox-agent:0.4.2-full` Docker image ships with all agents pre-installed.
Your backend layer needs to handle: ## Production-ready agent orchestration
- **Long-running connections**: prompts can take minutes. For production deployments, see [Orchestration Architecture](/orchestration-architecture) for recommended topology, backend requirements, and session persistence patterns.
- **Session affinity**: follow-up messages must reach the same session.
- **State between requests**: session metadata and event history must persist across requests.
- **Graceful recovery**: sessions should resume after backend restarts.
We recommend [Rivet](https://rivet.dev) over serverless because actors natively support the long-lived connections, session routing, and state persistence that agent workloads require.
## Session persistence
For storage driver options and replay behavior, see [Persisting Sessions](/session-persistence).

View file

@ -37,6 +37,36 @@ Notes:
- Set `SANDBOX_AGENT_LOG_STDOUT=1` to force stdout/stderr logging. - Set `SANDBOX_AGENT_LOG_STDOUT=1` to force stdout/stderr logging.
- Use `SANDBOX_AGENT_LOG_DIR` to override log directory. - Use `SANDBOX_AGENT_LOG_DIR` to override log directory.
## install
Install first-party runtime dependencies.
### install desktop
Install the Linux desktop runtime packages required by `/v1/desktop/*`.
```bash
sandbox-agent install desktop [OPTIONS]
```
| Option | Description |
|--------|-------------|
| `--yes` | Skip the confirmation prompt |
| `--print-only` | Print the package-manager command without executing it |
| `--package-manager <apt\|dnf\|apk>` | Override package-manager detection |
| `--no-fonts` | Skip the default DejaVu font package |
```bash
sandbox-agent install desktop --yes
sandbox-agent install desktop --print-only
```
Notes:
- Supported on Linux only.
- The command detects `apt`, `dnf`, or `apk`.
- If the host is not already running as root, the command requires `sudo`.
## install-agent ## install-agent
Install or reinstall a single agent, or every supported agent with `--all`. Install or reinstall a single agent, or every supported agent with `--all`.
@ -59,6 +89,39 @@ sandbox-agent install-agent claude --reinstall
sandbox-agent install-agent --all sandbox-agent install-agent --all
``` ```
### Custom Pi implementation path
If you use a forked/custom `pi` binary with `pi-acp`, you can override what executable gets launched.
#### Option 1: explicit command override (recommended)
Set `PI_ACP_PI_COMMAND` in the environment where `sandbox-agent` runs:
```bash
PI_ACP_PI_COMMAND=/absolute/path/to/your/pi-fork sandbox-agent server
```
This is forwarded to `pi-acp`, which uses it instead of looking up `pi` on `PATH`.
#### Option 2: PATH override
Put your custom `pi` first on `PATH` before starting `sandbox-agent`:
```bash
export PATH="/path/to/custom-pi-dir:$PATH"
sandbox-agent server
```
#### Option 3: symlink override
Point `pi` to your custom binary via symlink in a directory that is early on `PATH`:
```bash
ln -sf /absolute/path/to/your/pi-fork /usr/local/bin/pi
```
Then start `sandbox-agent` normally.
## opencode (experimental) ## opencode (experimental)
Start/reuse daemon and run `opencode attach` against `/opencode`. Start/reuse daemon and run `opencode attach` against `/opencode`.
@ -226,7 +289,7 @@ Example output:
} }
``` ```
See [Agent Capabilities](/agent-capabilities) for a full reference of supported models, modes, and thought levels per agent. See individual agent pages (e.g. [Claude](/agents/claude), [Codex](/agents/codex)) for supported models, modes, and thought levels.
#### api agents install #### api agents install

560
docs/common-software.mdx Normal file
View file

@ -0,0 +1,560 @@
---
title: "Common Software"
description: "Install browsers, languages, databases, and other tools inside the sandbox."
sidebarTitle: "Common Software"
icon: "box-open"
---
The sandbox runs a Debian/Ubuntu base image. You can install software with `apt-get` via the [Process API](/processes) or by customizing your Docker image. This page covers commonly needed packages and how to install them.
## Browsers
### Chromium
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "chromium", "chromium-sandbox"],
});
// Launch headless
await sdk.runProcess({
command: "chromium",
args: ["--headless", "--no-sandbox", "--disable-gpu", "https://example.com"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","chromium","chromium-sandbox"]}'
```
</CodeGroup>
<Note>
Use `--no-sandbox` when running Chromium inside a container. The container itself provides isolation.
</Note>
### Firefox
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "firefox-esr"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","firefox-esr"]}'
```
</CodeGroup>
### Playwright browsers
Playwright bundles its own browser binaries. Install the Playwright CLI and let it download browsers for you.
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "npx",
args: ["playwright", "install", "--with-deps", "chromium"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"npx","args":["playwright","install","--with-deps","chromium"]}'
```
</CodeGroup>
---
## Languages and runtimes
### Node.js
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "nodejs", "npm"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","nodejs","npm"]}'
```
</CodeGroup>
For a specific version, use [nvm](https://github.com/nvm-sh/nvm):
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash && . ~/.nvm/nvm.sh && nvm install 22"],
});
```
### Python
Python 3 is typically pre-installed. To add pip and common packages:
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "python3", "python3-pip", "python3-venv"],
});
await sdk.runProcess({
command: "pip3",
args: ["install", "numpy", "pandas", "matplotlib"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","python3","python3-pip","python3-venv"]}'
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"pip3","args":["install","numpy","pandas","matplotlib"]}'
```
</CodeGroup>
### Go
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl -fsSL https://go.dev/dl/go1.23.6.linux-amd64.tar.gz | tar -C /usr/local -xz"],
});
// Add to PATH for subsequent commands
await sdk.runProcess({
command: "bash",
args: ["-c", "export PATH=$PATH:/usr/local/go/bin && go version"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"bash","args":["-c","curl -fsSL https://go.dev/dl/go1.23.6.linux-amd64.tar.gz | tar -C /usr/local -xz"]}'
```
</CodeGroup>
### Rust
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"bash","args":["-c","curl --proto =https --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y"]}'
```
</CodeGroup>
### Java (OpenJDK)
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "default-jdk"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","default-jdk"]}'
```
</CodeGroup>
### Ruby
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "ruby-full"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","ruby-full"]}'
```
</CodeGroup>
---
## Databases
### PostgreSQL
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "postgresql", "postgresql-client"],
});
// Start the service
const proc = await sdk.createProcess({
command: "bash",
args: ["-c", "su - postgres -c 'pg_ctlcluster 15 main start'"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","postgresql","postgresql-client"]}'
```
</CodeGroup>
### SQLite
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "sqlite3"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","sqlite3"]}'
```
</CodeGroup>
### Redis
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "redis-server"],
});
const proc = await sdk.createProcess({
command: "redis-server",
args: ["--daemonize", "no"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","redis-server"]}'
curl -X POST "http://127.0.0.1:2468/v1/processes" \
-H "Content-Type: application/json" \
-d '{"command":"redis-server","args":["--daemonize","no"]}'
```
</CodeGroup>
### MySQL / MariaDB
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "mariadb-server", "mariadb-client"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","mariadb-server","mariadb-client"]}'
```
</CodeGroup>
---
## Build tools
### Essential build toolchain
Most compiled software needs the standard build toolchain:
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "build-essential", "cmake", "pkg-config"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","build-essential","cmake","pkg-config"]}'
```
</CodeGroup>
This installs `gcc`, `g++`, `make`, `cmake`, and related tools.
---
## Desktop applications
These require the [Computer Use](/computer-use) desktop to be started first.
### LibreOffice
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "libreoffice"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","libreoffice"]}'
```
</CodeGroup>
### GIMP
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "gimp"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","gimp"]}'
```
</CodeGroup>
### VLC
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "vlc"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","vlc"]}'
```
</CodeGroup>
### VS Code (code-server)
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl -fsSL https://code-server.dev/install.sh | sh"],
});
const proc = await sdk.createProcess({
command: "code-server",
args: ["--bind-addr", "0.0.0.0:8080", "--auth", "none"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"bash","args":["-c","curl -fsSL https://code-server.dev/install.sh | sh"]}'
curl -X POST "http://127.0.0.1:2468/v1/processes" \
-H "Content-Type: application/json" \
-d '{"command":"code-server","args":["--bind-addr","0.0.0.0:8080","--auth","none"]}'
```
</CodeGroup>
---
## CLI tools
### Git
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "git"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","git"]}'
```
</CodeGroup>
### Docker
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl -fsSL https://get.docker.com | sh"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"bash","args":["-c","curl -fsSL https://get.docker.com | sh"]}'
```
</CodeGroup>
### jq
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "jq"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","jq"]}'
```
</CodeGroup>
### tmux
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "tmux"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","tmux"]}'
```
</CodeGroup>
---
## Media and graphics
### FFmpeg
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "ffmpeg"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","ffmpeg"]}'
```
</CodeGroup>
### ImageMagick
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "imagemagick"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","imagemagick"]}'
```
</CodeGroup>
### Poppler (PDF utilities)
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "poppler-utils"],
});
// Convert PDF to images
await sdk.runProcess({
command: "pdftoppm",
args: ["-png", "document.pdf", "output"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","poppler-utils"]}'
```
</CodeGroup>
---
## Pre-installing in a Docker image
For production use, install software in your Dockerfile instead of at runtime. This avoids repeated downloads and makes startup faster.
```dockerfile
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y \
chromium \
firefox-esr \
nodejs npm \
python3 python3-pip \
git curl wget \
build-essential \
sqlite3 \
ffmpeg \
imagemagick \
jq \
&& rm -rf /var/lib/apt/lists/*
RUN pip3 install numpy pandas matplotlib
```
See [Docker deployment](/deploy/docker) for how to use custom images with Sandbox Agent.

859
docs/computer-use.mdx Normal file
View file

@ -0,0 +1,859 @@
---
title: "Computer Use"
description: "Control a virtual desktop inside the sandbox with mouse, keyboard, screenshots, recordings, and live streaming."
sidebarTitle: "Computer Use"
icon: "desktop"
---
Sandbox Agent provides a managed virtual desktop (Xvfb + openbox) that you can control programmatically. This is useful for browser automation, GUI testing, and AI computer-use workflows.
## Start and stop
<CodeGroup>
```ts TypeScript
import { SandboxAgent } from "sandbox-agent";
const sdk = await SandboxAgent.connect({
baseUrl: "http://127.0.0.1:2468",
});
const status = await sdk.startDesktop({
width: 1920,
height: 1080,
dpi: 96,
});
console.log(status.state); // "active"
console.log(status.display); // ":99"
// When done
await sdk.stopDesktop();
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/start" \
-H "Content-Type: application/json" \
-d '{"width":1920,"height":1080,"dpi":96}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/stop"
```
</CodeGroup>
All fields in the start request are optional. Defaults are 1440x900 at 96 DPI.
### Start request options
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| `width` | number | 1440 | Desktop width in pixels |
| `height` | number | 900 | Desktop height in pixels |
| `dpi` | number | 96 | Display DPI |
| `displayNum` | number | 99 | Starting X display number. The runtime probes from this number upward to find an available display. |
| `stateDir` | string | (auto) | Desktop state directory for home, logs, recordings |
| `streamVideoCodec` | string | `"vp8"` | WebRTC video codec (`vp8`, `vp9`, `h264`) |
| `streamAudioCodec` | string | `"opus"` | WebRTC audio codec (`opus`, `g722`) |
| `streamFrameRate` | number | 30 | Streaming frame rate (1-60) |
| `webrtcPortRange` | string | `"59050-59070"` | UDP port range for WebRTC media |
| `recordingFps` | number | 30 | Default recording FPS when not specified in `startDesktopRecording` (1-60) |
The streaming and recording options configure defaults for the desktop session. They take effect when streaming or recording is started later.
<CodeGroup>
```ts TypeScript
const status = await sdk.startDesktop({
width: 1920,
height: 1080,
streamVideoCodec: "h264",
streamFrameRate: 60,
webrtcPortRange: "59100-59120",
recordingFps: 15,
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/start" \
-H "Content-Type: application/json" \
-d '{
"width": 1920,
"height": 1080,
"streamVideoCodec": "h264",
"streamFrameRate": 60,
"webrtcPortRange": "59100-59120",
"recordingFps": 15
}'
```
</CodeGroup>
## Status
<CodeGroup>
```ts TypeScript
const status = await sdk.getDesktopStatus();
console.log(status.state); // "inactive" | "active" | "failed" | ...
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/status"
```
</CodeGroup>
## Screenshots
Capture the full desktop or a specific region. Optionally include the cursor position.
<CodeGroup>
```ts TypeScript
// Full screenshot (PNG by default)
const png = await sdk.takeDesktopScreenshot();
// JPEG at 70% quality, half scale
const jpeg = await sdk.takeDesktopScreenshot({
format: "jpeg",
quality: 70,
scale: 0.5,
});
// Include cursor overlay
const withCursor = await sdk.takeDesktopScreenshot({
showCursor: true,
});
// Region screenshot
const region = await sdk.takeDesktopRegionScreenshot({
x: 100,
y: 100,
width: 400,
height: 300,
});
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/screenshot" --output screenshot.png
curl "http://127.0.0.1:2468/v1/desktop/screenshot?format=jpeg&quality=70&scale=0.5" \
--output screenshot.jpg
# Include cursor overlay
curl "http://127.0.0.1:2468/v1/desktop/screenshot?show_cursor=true" \
--output with_cursor.png
curl "http://127.0.0.1:2468/v1/desktop/screenshot/region?x=100&y=100&width=400&height=300" \
--output region.png
```
</CodeGroup>
### Screenshot options
| Param | Type | Default | Description |
|-------|------|---------|-------------|
| `format` | string | `"png"` | Output format: `png`, `jpeg`, or `webp` |
| `quality` | number | 85 | Compression quality (1-100, JPEG/WebP only) |
| `scale` | number | 1.0 | Scale factor (0.1-1.0) |
| `showCursor` | boolean | `false` | Composite a crosshair at the cursor position |
When `showCursor` is enabled, the cursor position is captured at the moment of the screenshot and a red crosshair is drawn at that location. This is useful for AI agents that need to see where the cursor is in the screenshot.
## Mouse
<CodeGroup>
```ts TypeScript
// Get current position
const pos = await sdk.getDesktopMousePosition();
console.log(pos.x, pos.y);
// Move
await sdk.moveDesktopMouse({ x: 500, y: 300 });
// Click (left by default)
await sdk.clickDesktop({ x: 500, y: 300 });
// Right click
await sdk.clickDesktop({ x: 500, y: 300, button: "right" });
// Double click
await sdk.clickDesktop({ x: 500, y: 300, clickCount: 2 });
// Drag
await sdk.dragDesktopMouse({
startX: 100, startY: 100,
endX: 400, endY: 400,
});
// Scroll
await sdk.scrollDesktop({ x: 500, y: 300, deltaY: -3 });
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/mouse/position"
curl -X POST "http://127.0.0.1:2468/v1/desktop/mouse/click" \
-H "Content-Type: application/json" \
-d '{"x":500,"y":300}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/mouse/drag" \
-H "Content-Type: application/json" \
-d '{"startX":100,"startY":100,"endX":400,"endY":400}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/mouse/scroll" \
-H "Content-Type: application/json" \
-d '{"x":500,"y":300,"deltaY":-3}'
```
</CodeGroup>
## Keyboard
<CodeGroup>
```ts TypeScript
// Type text
await sdk.typeDesktopText({ text: "Hello, world!" });
// Press a key with modifiers
await sdk.pressDesktopKey({
key: "c",
modifiers: { ctrl: true },
});
// Low-level key down/up
await sdk.keyDownDesktop({ key: "Shift_L" });
await sdk.keyUpDesktop({ key: "Shift_L" });
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/keyboard/type" \
-H "Content-Type: application/json" \
-d '{"text":"Hello, world!"}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/keyboard/press" \
-H "Content-Type: application/json" \
-d '{"key":"c","modifiers":{"ctrl":true}}'
```
</CodeGroup>
## Clipboard
Read and write the X11 clipboard programmatically.
<CodeGroup>
```ts TypeScript
// Read clipboard
const clipboard = await sdk.getDesktopClipboard();
console.log(clipboard.text);
// Read primary selection (mouse-selected text)
const primary = await sdk.getDesktopClipboard({ selection: "primary" });
// Write to clipboard
await sdk.setDesktopClipboard({ text: "Pasted via API" });
// Write to both clipboard and primary selection
await sdk.setDesktopClipboard({
text: "Synced text",
selection: "both",
});
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/clipboard"
curl "http://127.0.0.1:2468/v1/desktop/clipboard?selection=primary"
curl -X POST "http://127.0.0.1:2468/v1/desktop/clipboard" \
-H "Content-Type: application/json" \
-d '{"text":"Pasted via API"}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/clipboard" \
-H "Content-Type: application/json" \
-d '{"text":"Synced text","selection":"both"}'
```
</CodeGroup>
The `selection` parameter controls which X11 selection to read or write:
| Value | Description |
|-------|-------------|
| `clipboard` (default) | The standard clipboard (Ctrl+C / Ctrl+V) |
| `primary` | The primary selection (text selected with the mouse) |
| `both` | Write to both clipboard and primary selection (write only) |
## Display and windows
<CodeGroup>
```ts TypeScript
const display = await sdk.getDesktopDisplayInfo();
console.log(display.resolution); // { width: 1920, height: 1080, dpi: 96 }
const { windows } = await sdk.listDesktopWindows();
for (const win of windows) {
console.log(win.title, win.x, win.y, win.width, win.height);
}
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/display/info"
curl "http://127.0.0.1:2468/v1/desktop/windows"
```
</CodeGroup>
The windows endpoint filters out noise automatically: window manager internals (Openbox), windows with empty titles, and tiny helper windows (under 120x80) are excluded. The currently active/focused window is always included regardless of filters.
### Focused window
Get the currently focused window without listing all windows.
<CodeGroup>
```ts TypeScript
const focused = await sdk.getDesktopFocusedWindow();
console.log(focused.title, focused.id);
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/windows/focused"
```
</CodeGroup>
Returns 404 if no window currently has focus.
### Window management
Focus, move, and resize windows by their X11 window ID.
<CodeGroup>
```ts TypeScript
const { windows } = await sdk.listDesktopWindows();
const win = windows[0];
// Bring window to foreground
await sdk.focusDesktopWindow(win.id);
// Move window
await sdk.moveDesktopWindow(win.id, { x: 100, y: 50 });
// Resize window
await sdk.resizeDesktopWindow(win.id, { width: 1280, height: 720 });
```
```bash cURL
# Focus a window
curl -X POST "http://127.0.0.1:2468/v1/desktop/windows/12345/focus"
# Move a window
curl -X POST "http://127.0.0.1:2468/v1/desktop/windows/12345/move" \
-H "Content-Type: application/json" \
-d '{"x":100,"y":50}'
# Resize a window
curl -X POST "http://127.0.0.1:2468/v1/desktop/windows/12345/resize" \
-H "Content-Type: application/json" \
-d '{"width":1280,"height":720}'
```
</CodeGroup>
All three endpoints return the updated window info so you can verify the operation took effect. The window manager may adjust the requested position or size.
## App launching
Launch applications or open files/URLs on the desktop without needing to shell out.
<CodeGroup>
```ts TypeScript
// Launch an app by name
const result = await sdk.launchDesktopApp({
app: "firefox",
args: ["--private"],
});
console.log(result.processId); // "proc_7"
// Launch and wait for the window to appear
const withWindow = await sdk.launchDesktopApp({
app: "xterm",
wait: true,
});
console.log(withWindow.windowId); // "12345" or null if timed out
// Open a URL with the default handler
const opened = await sdk.openDesktopTarget({
target: "https://example.com",
});
console.log(opened.processId);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/launch" \
-H "Content-Type: application/json" \
-d '{"app":"firefox","args":["--private"]}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/launch" \
-H "Content-Type: application/json" \
-d '{"app":"xterm","wait":true}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/open" \
-H "Content-Type: application/json" \
-d '{"target":"https://example.com"}'
```
</CodeGroup>
The returned `processId` can be used with the [Process API](/processes) to read logs (`GET /v1/processes/{id}/logs`) or stop the application (`POST /v1/processes/{id}/stop`).
When `wait` is `true`, the API polls for up to 5 seconds for a window to appear. If the window appears, its ID is returned in `windowId`. If it times out, `windowId` is `null` but the process is still running.
<Tip>
**Launch/Open vs the Process API:** Both `launch` and `open` are convenience wrappers around the [Process API](/processes). They create managed processes (with `owner: "desktop"`) that you can inspect, log, and stop through the same Process endpoints. The difference is that `launch` validates the binary exists in PATH first and can optionally wait for a window to appear, while `open` delegates to the system default handler (`xdg-open`). Use the Process API directly when you need full control over command, environment, working directory, or restart policies.
</Tip>
## Recording
Record the desktop to MP4.
<CodeGroup>
```ts TypeScript
const recording = await sdk.startDesktopRecording({ fps: 30 });
console.log(recording.id);
// ... do things ...
const stopped = await sdk.stopDesktopRecording();
// List all recordings
const { recordings } = await sdk.listDesktopRecordings();
// Download
const mp4 = await sdk.downloadDesktopRecording(recording.id);
// Clean up
await sdk.deleteDesktopRecording(recording.id);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/recording/start" \
-H "Content-Type: application/json" \
-d '{"fps":30}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/recording/stop"
curl "http://127.0.0.1:2468/v1/desktop/recordings"
curl "http://127.0.0.1:2468/v1/desktop/recordings/rec_1/download" --output recording.mp4
curl -X DELETE "http://127.0.0.1:2468/v1/desktop/recordings/rec_1"
```
</CodeGroup>
## Desktop processes
The desktop runtime manages several background processes (Xvfb, openbox, neko, ffmpeg). These are all registered with the general [Process API](/processes) under the `desktop` owner, so you can inspect logs, check status, and troubleshoot using the same tools you use for any other managed process.
<CodeGroup>
```ts TypeScript
// List all processes, including desktop-owned ones
const { processes } = await sdk.listProcesses();
const desktopProcs = processes.filter((p) => p.owner === "desktop");
for (const p of desktopProcs) {
console.log(p.id, p.command, p.status);
}
// Read logs from a specific desktop process
const logs = await sdk.getProcessLogs(desktopProcs[0].id, { tail: 50 });
for (const entry of logs.entries) {
console.log(entry.stream, atob(entry.data));
}
```
```bash cURL
# List all processes (desktop processes have owner: "desktop")
curl "http://127.0.0.1:2468/v1/processes"
# Get logs from a specific desktop process
curl "http://127.0.0.1:2468/v1/processes/proc_1/logs?tail=50"
```
</CodeGroup>
The desktop status endpoint also includes a summary of running processes:
<CodeGroup>
```ts TypeScript
const status = await sdk.getDesktopStatus();
for (const proc of status.processes) {
console.log(proc.name, proc.pid, proc.running);
}
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/status"
# Response includes: processes: [{ name: "Xvfb", pid: 123, running: true }, ...]
```
</CodeGroup>
| Process | Role | Restart policy |
|---------|------|---------------|
| Xvfb | Virtual X11 framebuffer | Auto-restart while desktop is active |
| openbox | Window manager | Auto-restart while desktop is active |
| neko | WebRTC streaming server (started by `startDesktopStream`) | No auto-restart |
| ffmpeg | Screen recorder (started by `startDesktopRecording`) | No auto-restart |
## Live streaming
Start a WebRTC stream for real-time desktop viewing in a browser.
<CodeGroup>
```ts TypeScript
await sdk.startDesktopStream();
// Check stream status
const status = await sdk.getDesktopStreamStatus();
console.log(status.active); // true
console.log(status.processId); // "proc_5"
// Connect via the React DesktopViewer component or
// use the WebSocket signaling endpoint directly
// at ws://127.0.0.1:2468/v1/desktop/stream/signaling
await sdk.stopDesktopStream();
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/stream/start"
# Check stream status
curl "http://127.0.0.1:2468/v1/desktop/stream/status"
# Connect to ws://127.0.0.1:2468/v1/desktop/stream/signaling for WebRTC signaling
curl -X POST "http://127.0.0.1:2468/v1/desktop/stream/stop"
```
</CodeGroup>
For a drop-in React component, see [React Components](/react-components).
## API reference
### Endpoints
| Method | Path | Description |
|--------|------|-------------|
| `POST` | `/v1/desktop/start` | Start the desktop runtime |
| `POST` | `/v1/desktop/stop` | Stop the desktop runtime |
| `GET` | `/v1/desktop/status` | Get desktop runtime status |
| `GET` | `/v1/desktop/screenshot` | Capture full desktop screenshot |
| `GET` | `/v1/desktop/screenshot/region` | Capture a region screenshot |
| `GET` | `/v1/desktop/mouse/position` | Get current mouse position |
| `POST` | `/v1/desktop/mouse/move` | Move the mouse |
| `POST` | `/v1/desktop/mouse/click` | Click the mouse |
| `POST` | `/v1/desktop/mouse/down` | Press mouse button down |
| `POST` | `/v1/desktop/mouse/up` | Release mouse button |
| `POST` | `/v1/desktop/mouse/drag` | Drag from one point to another |
| `POST` | `/v1/desktop/mouse/scroll` | Scroll at a position |
| `POST` | `/v1/desktop/keyboard/type` | Type text |
| `POST` | `/v1/desktop/keyboard/press` | Press a key with optional modifiers |
| `POST` | `/v1/desktop/keyboard/down` | Press a key down (hold) |
| `POST` | `/v1/desktop/keyboard/up` | Release a key |
| `GET` | `/v1/desktop/display/info` | Get display info |
| `GET` | `/v1/desktop/windows` | List visible windows |
| `GET` | `/v1/desktop/windows/focused` | Get focused window info |
| `POST` | `/v1/desktop/windows/{id}/focus` | Focus a window |
| `POST` | `/v1/desktop/windows/{id}/move` | Move a window |
| `POST` | `/v1/desktop/windows/{id}/resize` | Resize a window |
| `GET` | `/v1/desktop/clipboard` | Read clipboard contents |
| `POST` | `/v1/desktop/clipboard` | Write to clipboard |
| `POST` | `/v1/desktop/launch` | Launch an application |
| `POST` | `/v1/desktop/open` | Open a file or URL |
| `POST` | `/v1/desktop/recording/start` | Start recording |
| `POST` | `/v1/desktop/recording/stop` | Stop recording |
| `GET` | `/v1/desktop/recordings` | List recordings |
| `GET` | `/v1/desktop/recordings/{id}` | Get recording metadata |
| `GET` | `/v1/desktop/recordings/{id}/download` | Download recording |
| `DELETE` | `/v1/desktop/recordings/{id}` | Delete recording |
| `POST` | `/v1/desktop/stream/start` | Start WebRTC streaming |
| `POST` | `/v1/desktop/stream/stop` | Stop WebRTC streaming |
| `GET` | `/v1/desktop/stream/status` | Get stream status |
| `GET` | `/v1/desktop/stream/signaling` | WebSocket for WebRTC signaling |
### TypeScript SDK methods
| Method | Returns | Description |
|--------|---------|-------------|
| `startDesktop(request?)` | `DesktopStatusResponse` | Start the desktop |
| `stopDesktop()` | `DesktopStatusResponse` | Stop the desktop |
| `getDesktopStatus()` | `DesktopStatusResponse` | Get desktop status |
| `takeDesktopScreenshot(query?)` | `Uint8Array` | Capture screenshot |
| `takeDesktopRegionScreenshot(query)` | `Uint8Array` | Capture region screenshot |
| `getDesktopMousePosition()` | `DesktopMousePositionResponse` | Get mouse position |
| `moveDesktopMouse(request)` | `DesktopMousePositionResponse` | Move mouse |
| `clickDesktop(request)` | `DesktopMousePositionResponse` | Click mouse |
| `mouseDownDesktop(request)` | `DesktopMousePositionResponse` | Mouse button down |
| `mouseUpDesktop(request)` | `DesktopMousePositionResponse` | Mouse button up |
| `dragDesktopMouse(request)` | `DesktopMousePositionResponse` | Drag mouse |
| `scrollDesktop(request)` | `DesktopMousePositionResponse` | Scroll |
| `typeDesktopText(request)` | `DesktopActionResponse` | Type text |
| `pressDesktopKey(request)` | `DesktopActionResponse` | Press key |
| `keyDownDesktop(request)` | `DesktopActionResponse` | Key down |
| `keyUpDesktop(request)` | `DesktopActionResponse` | Key up |
| `getDesktopDisplayInfo()` | `DesktopDisplayInfoResponse` | Get display info |
| `listDesktopWindows()` | `DesktopWindowListResponse` | List windows |
| `getDesktopFocusedWindow()` | `DesktopWindowInfo` | Get focused window |
| `focusDesktopWindow(id)` | `DesktopWindowInfo` | Focus a window |
| `moveDesktopWindow(id, request)` | `DesktopWindowInfo` | Move a window |
| `resizeDesktopWindow(id, request)` | `DesktopWindowInfo` | Resize a window |
| `getDesktopClipboard(query?)` | `DesktopClipboardResponse` | Read clipboard |
| `setDesktopClipboard(request)` | `DesktopActionResponse` | Write clipboard |
| `launchDesktopApp(request)` | `DesktopLaunchResponse` | Launch an app |
| `openDesktopTarget(request)` | `DesktopOpenResponse` | Open file/URL |
| `startDesktopRecording(request?)` | `DesktopRecordingInfo` | Start recording |
| `stopDesktopRecording()` | `DesktopRecordingInfo` | Stop recording |
| `listDesktopRecordings()` | `DesktopRecordingListResponse` | List recordings |
| `getDesktopRecording(id)` | `DesktopRecordingInfo` | Get recording |
| `downloadDesktopRecording(id)` | `Uint8Array` | Download recording |
| `deleteDesktopRecording(id)` | `void` | Delete recording |
| `startDesktopStream()` | `DesktopStreamStatusResponse` | Start streaming |
| `stopDesktopStream()` | `DesktopStreamStatusResponse` | Stop streaming |
| `getDesktopStreamStatus()` | `DesktopStreamStatusResponse` | Stream status |
## Customizing the desktop environment
The desktop runs inside the sandbox filesystem, so you can customize it using the [File System](/file-system) API before or after starting the desktop. The desktop HOME directory is located at `~/.local/state/sandbox-agent/desktop/home` (or `$XDG_STATE_HOME/sandbox-agent/desktop/home` if `XDG_STATE_HOME` is set).
All configuration files below are written to paths relative to this HOME directory.
### Window manager (openbox)
The desktop uses [openbox](http://openbox.org/) as its window manager. You can customize its behavior, theme, and keyboard shortcuts by writing an `rc.xml` config file.
<CodeGroup>
```ts TypeScript
const openboxConfig = `<?xml version="1.0" encoding="UTF-8"?>
<openbox_config xmlns="http://openbox.org/3.4/rc">
<theme>
<name>Clearlooks</name>
<titleLayout>NLIMC</titleLayout>
<font place="ActiveWindow"><name>DejaVu Sans</name><size>10</size></font>
</theme>
<desktops><number>1</number></desktops>
<keyboard>
<keybind key="A-F4"><action name="Close"/></keybind>
<keybind key="A-Tab"><action name="NextWindow"/></keybind>
</keyboard>
</openbox_config>`;
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox/rc.xml" },
openboxConfig,
);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/fs/mkdir?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox"
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox/rc.xml" \
-H "Content-Type: application/octet-stream" \
--data-binary @rc.xml
```
</CodeGroup>
### Autostart programs
Openbox runs scripts in `~/.config/openbox/autostart` on startup. Use this to launch applications, set the background, or configure the environment.
<CodeGroup>
```ts TypeScript
const autostart = `#!/bin/sh
# Set a solid background color
xsetroot -solid "#1e1e2e" &
# Launch a terminal
xterm -geometry 120x40+50+50 &
# Launch a browser
firefox --no-remote &
`;
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox/autostart" },
autostart,
);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/fs/mkdir?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox"
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox/autostart" \
-H "Content-Type: application/octet-stream" \
--data-binary @autostart.sh
```
</CodeGroup>
<Note>
The autostart script runs when openbox starts, which happens during `startDesktop()`. Write the autostart file before calling `startDesktop()` for it to take effect.
</Note>
### Background
There is no wallpaper set by default (the background is the X root window default). You can set it using `xsetroot` in the autostart script (as shown above), or use `feh` if you need an image:
<CodeGroup>
```ts TypeScript
// Upload a wallpaper image
import fs from "node:fs";
const wallpaper = await fs.promises.readFile("./wallpaper.png");
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/wallpaper.png" },
wallpaper,
);
// Set the autostart to apply it
const autostart = `#!/bin/sh
feh --bg-fill ~/wallpaper.png &
`;
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox/autostart" },
autostart,
);
```
```bash cURL
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/wallpaper.png" \
-H "Content-Type: application/octet-stream" \
--data-binary @wallpaper.png
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox/autostart" \
-H "Content-Type: application/octet-stream" \
--data-binary @autostart.sh
```
</CodeGroup>
<Note>
`feh` is not installed by default. Install it via the [Process API](/processes) before starting the desktop: `await sdk.runProcess({ command: "apt-get", args: ["install", "-y", "feh"] })`.
</Note>
### Fonts
Only `fonts-dejavu-core` is installed by default. To add more fonts, install them with your system package manager or copy font files into the sandbox:
<CodeGroup>
```ts TypeScript
// Install a font package
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "fonts-noto", "fonts-liberation"],
});
// Or copy a custom font file
import fs from "node:fs";
const font = await fs.promises.readFile("./CustomFont.ttf");
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.local/share/fonts" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.local/share/fonts/CustomFont.ttf" },
font,
);
// Rebuild the font cache
await sdk.runProcess({ command: "fc-cache", args: ["-fv"] });
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","fonts-noto","fonts-liberation"]}'
curl -X POST "http://127.0.0.1:2468/v1/fs/mkdir?path=~/.local/state/sandbox-agent/desktop/home/.local/share/fonts"
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.local/share/fonts/CustomFont.ttf" \
-H "Content-Type: application/octet-stream" \
--data-binary @CustomFont.ttf
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"fc-cache","args":["-fv"]}'
```
</CodeGroup>
### Cursor theme
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "dmz-cursor-theme"],
});
const xresources = `Xcursor.theme: DMZ-White\nXcursor.size: 24\n`;
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.Xresources" },
xresources,
);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","dmz-cursor-theme"]}'
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.Xresources" \
-H "Content-Type: application/octet-stream" \
--data-binary 'Xcursor.theme: DMZ-White\nXcursor.size: 24'
```
</CodeGroup>
<Note>
Run `xrdb -merge ~/.Xresources` (via the autostart or process API) after writing the file for changes to take effect.
</Note>
### Shell and terminal
No terminal emulator or shell is launched by default. Add one to the openbox autostart:
```sh
# In ~/.config/openbox/autostart
xterm -geometry 120x40+50+50 &
```
To use a different shell, set the `SHELL` environment variable in your Dockerfile or install your preferred shell and configure the terminal to use it.
### GTK theme
Applications using GTK will pick up settings from `~/.config/gtk-3.0/settings.ini`:
<CodeGroup>
```ts TypeScript
const gtkSettings = `[Settings]
gtk-theme-name=Adwaita
gtk-icon-theme-name=Adwaita
gtk-font-name=DejaVu Sans 10
gtk-cursor-theme-name=DMZ-White
gtk-cursor-theme-size=24
`;
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.config/gtk-3.0" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.config/gtk-3.0/settings.ini" },
gtkSettings,
);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/fs/mkdir?path=~/.local/state/sandbox-agent/desktop/home/.config/gtk-3.0"
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.config/gtk-3.0/settings.ini" \
-H "Content-Type: application/octet-stream" \
--data-binary @settings.ini
```
</CodeGroup>
### Summary of configuration paths
All paths are relative to the desktop HOME directory (`~/.local/state/sandbox-agent/desktop/home`).
| What | Path | Notes |
|------|------|-------|
| Openbox config | `.config/openbox/rc.xml` | Window manager theme, keybindings, behavior |
| Autostart | `.config/openbox/autostart` | Shell script run on desktop start |
| Custom fonts | `.local/share/fonts/` | TTF/OTF files, run `fc-cache -fv` after |
| Cursor theme | `.Xresources` | Requires `xrdb -merge` to apply |
| GTK 3 settings | `.config/gtk-3.0/settings.ini` | Theme, icons, fonts for GTK apps |
| Wallpaper | Any path, referenced from autostart | Requires `feh` or similar tool |

View file

@ -1,115 +0,0 @@
---
title: "Credentials"
description: "How Sandbox Agent discovers and uses provider credentials."
---
Sandbox Agent discovers API credentials from environment variables and local agent config files.
These credentials are passed through to underlying agent runtimes.
## Credential sources
Credentials are discovered in priority order.
### Environment variables (highest priority)
API keys first:
| Variable | Provider |
|----------|----------|
| `ANTHROPIC_API_KEY` | Anthropic |
| `CLAUDE_API_KEY` | Anthropic fallback |
| `OPENAI_API_KEY` | OpenAI |
| `CODEX_API_KEY` | OpenAI fallback |
OAuth tokens (used when OAuth extraction is enabled):
| Variable | Provider |
|----------|----------|
| `CLAUDE_CODE_OAUTH_TOKEN` | Anthropic |
| `ANTHROPIC_AUTH_TOKEN` | Anthropic fallback |
### Agent config files
| Agent | Config path | Provider |
|-------|-------------|----------|
| Amp | `~/.amp/config.json` | Anthropic |
| Claude Code | `~/.claude.json`, `~/.claude/.credentials.json` | Anthropic |
| Codex | `~/.codex/auth.json` | OpenAI |
| OpenCode | `~/.local/share/opencode/auth.json` | Anthropic/OpenAI |
## Provider requirements by agent
| Agent | Required provider |
|-------|-------------------|
| Claude Code | Anthropic |
| Amp | Anthropic |
| Codex | OpenAI |
| OpenCode | Anthropic or OpenAI |
| Mock | None |
## Error handling behavior
Credential extraction is best-effort:
- Missing or malformed files are skipped.
- Discovery continues to later sources.
- Missing credentials mark providers unavailable instead of failing server startup.
When prompting, Sandbox Agent does not pre-validate provider credentials. Agent-native authentication errors surface through session events/output.
## Checking credential status
### API
`sdk.listAgents()` includes `credentialsAvailable` per agent.
```json
{
"agents": [
{
"id": "claude",
"installed": true,
"credentialsAvailable": true
},
{
"id": "codex",
"installed": true,
"credentialsAvailable": false
}
]
}
```
### TypeScript SDK
```typescript
const result = await sdk.listAgents();
for (const agent of result.agents) {
console.log(`${agent.id}: ${agent.credentialsAvailable ? "authenticated" : "no credentials"}`);
}
```
## Passing credentials explicitly
Set environment variables before starting Sandbox Agent:
```bash
export ANTHROPIC_API_KEY=sk-ant-...
export OPENAI_API_KEY=sk-...
sandbox-agent daemon start
```
Or with SDK-managed local spawn:
```typescript
import { SandboxAgent } from "sandbox-agent";
const sdk = await SandboxAgent.start({
spawn: {
env: {
ANTHROPIC_API_KEY: process.env.MY_ANTHROPIC_KEY,
},
},
});
```

View file

@ -80,9 +80,7 @@ await sdk.setMcpConfig(
const session = await sdk.createSession({ const session = await sdk.createSession({
agent: "claude", agent: "claude",
sessionInit: { cwd: "/workspace",
cwd: "/workspace",
},
}); });
await session.prompt([ await session.prompt([
@ -145,9 +143,7 @@ await sdk.writeFsFile({ path: "/opt/skills/random-number/SKILL.md" }, skill);
```ts ```ts
const session = await sdk.createSession({ const session = await sdk.createSession({
agent: "claude", agent: "claude",
sessionInit: { cwd: "/workspace",
cwd: "/workspace",
},
}); });
await session.prompt([ await session.prompt([

View file

@ -20,7 +20,7 @@ that BoxLite can load directly (BoxLite has its own image store separate from Do
```dockerfile ```dockerfile
FROM node:22-bookworm-slim FROM node:22-bookworm-slim
RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
RUN sandbox-agent install-agent claude RUN sandbox-agent install-agent claude
RUN sandbox-agent install-agent codex RUN sandbox-agent install-agent codex
``` ```

View file

@ -25,13 +25,44 @@ cd my-sandbox
```dockerfile ```dockerfile
FROM cloudflare/sandbox:0.7.0 FROM cloudflare/sandbox:0.7.0
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
RUN sandbox-agent install-agent claude && sandbox-agent install-agent codex RUN sandbox-agent install-agent claude && sandbox-agent install-agent codex
EXPOSE 8000 EXPOSE 8000
``` ```
## TypeScript example ## TypeScript example (with provider)
For standalone scripts, use the `cloudflare` provider:
```bash
npm install sandbox-agent@0.4.x @cloudflare/sandbox
```
```typescript
import { SandboxAgent } from "sandbox-agent";
import { cloudflare } from "sandbox-agent/cloudflare";
const sdk = await SandboxAgent.start({
sandbox: cloudflare(),
});
try {
const session = await sdk.createSession({ agent: "codex" });
const response = await session.prompt([
{ type: "text", text: "Summarize this repository" },
]);
console.log(response.stopReason);
} finally {
await sdk.destroySandbox();
}
```
The `cloudflare` provider uses `containerFetch` under the hood, automatically stripping `AbortSignal` to avoid dropped streaming updates.
## TypeScript example (Durable Objects)
For Workers with Durable Objects, use `SandboxAgent.connect(...)` with a custom `fetch` backed by `sandbox.containerFetch(...)`:
```typescript ```typescript
import { getSandbox, type Sandbox } from "@cloudflare/sandbox"; import { getSandbox, type Sandbox } from "@cloudflare/sandbox";
@ -109,7 +140,6 @@ app.all("*", (c) => c.env.ASSETS.fetch(c.req.raw));
export default app; export default app;
``` ```
Create the SDK client inside the Worker using custom `fetch` backed by `sandbox.containerFetch(...)`.
This keeps all Sandbox Agent calls inside the Cloudflare sandbox routing path and does not require a `baseUrl`. This keeps all Sandbox Agent calls inside the Cloudflare sandbox routing path and does not require a `baseUrl`.
## Troubleshooting streaming updates ## Troubleshooting streaming updates

View file

@ -1,160 +1,66 @@
--- ---
title: "ComputeSDK" title: "ComputeSDK"
description: "Deploy the daemon using ComputeSDK's provider-agnostic sandbox API." description: "Deploy Sandbox Agent using ComputeSDK's provider-agnostic sandbox API."
--- ---
[ComputeSDK](https://computesdk.com) provides a unified interface for managing sandboxes across multiple providers. Write once, deploy anywhere—switch providers by changing environment variables. [ComputeSDK](https://computesdk.com) provides a unified interface for managing sandboxes across multiple providers. Write once, deploy anywhere by changing environment variables.
## Prerequisites ## Prerequisites
- `COMPUTESDK_API_KEY` from [console.computesdk.com](https://console.computesdk.com) - `COMPUTESDK_API_KEY` from [console.computesdk.com](https://console.computesdk.com)
- Provider API key (one of: `E2B_API_KEY`, `DAYTONA_API_KEY`, `VERCEL_TOKEN`, `MODAL_TOKEN_ID` + `MODAL_TOKEN_SECRET`, `BLAXEL_API_KEY`, `CSB_API_KEY`) - Provider API key (one of: `E2B_API_KEY`, `DAYTONA_API_KEY`, `VERCEL_TOKEN`, `MODAL_TOKEN_ID` + `MODAL_TOKEN_SECRET`, `BLAXEL_API_KEY`, `CSB_API_KEY`)
- `ANTHROPIC_API_KEY` or `OPENAI_API_KEY` for the coding agents - `ANTHROPIC_API_KEY` or `OPENAI_API_KEY`
## TypeScript Example ## TypeScript example
```bash
npm install sandbox-agent@0.4.x computesdk
```
```typescript ```typescript
import {
compute,
detectProvider,
getMissingEnvVars,
getProviderConfigFromEnv,
isProviderAuthComplete,
isValidProvider,
PROVIDER_NAMES,
type ExplicitComputeConfig,
type ProviderName,
} from "computesdk";
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { computesdk } from "sandbox-agent/computesdk";
const PORT = 3000;
const REQUEST_TIMEOUT_MS =
Number.parseInt(process.env.COMPUTESDK_TIMEOUT_MS || "", 10) || 120_000;
/**
* Detects and validates the provider to use.
* Priority: COMPUTESDK_PROVIDER env var > auto-detection from API keys
*/
function resolveProvider(): ProviderName {
const providerOverride = process.env.COMPUTESDK_PROVIDER;
if (providerOverride) {
if (!isValidProvider(providerOverride)) {
throw new Error(
`Unsupported provider "${providerOverride}". Supported: ${PROVIDER_NAMES.join(", ")}`
);
}
if (!isProviderAuthComplete(providerOverride)) {
const missing = getMissingEnvVars(providerOverride);
throw new Error(
`Missing credentials for "${providerOverride}". Set: ${missing.join(", ")}`
);
}
return providerOverride as ProviderName;
}
const detected = detectProvider();
if (!detected) {
throw new Error(
`No provider credentials found. Set one of: ${PROVIDER_NAMES.map((p) => getMissingEnvVars(p).join(", ")).join(" | ")}`
);
}
return detected as ProviderName;
}
function configureComputeSDK(): void {
const provider = resolveProvider();
const config: ExplicitComputeConfig = {
provider,
computesdkApiKey: process.env.COMPUTESDK_API_KEY,
requestTimeoutMs: REQUEST_TIMEOUT_MS,
};
// Add provider-specific config from environment
const providerConfig = getProviderConfigFromEnv(provider);
if (Object.keys(providerConfig).length > 0) {
(config as any)[provider] = providerConfig;
}
compute.setConfig(config);
}
configureComputeSDK();
// Build environment variables to pass to sandbox
const envs: Record<string, string> = {}; const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY; if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
// Create sandbox const sdk = await SandboxAgent.start({
const sandbox = await compute.sandbox.create({ sandbox: computesdk({
envs: Object.keys(envs).length > 0 ? envs : undefined, create: {
envs,
image: process.env.COMPUTESDK_IMAGE,
templateId: process.env.COMPUTESDK_TEMPLATE_ID,
},
}),
}); });
// Helper to run commands with error handling try {
const run = async (cmd: string, options?: { background?: boolean }) => { const session = await sdk.createSession({ agent: "claude" });
const result = await sandbox.runCommand(cmd, options); const response = await session.prompt([
if (typeof result?.exitCode === "number" && result.exitCode !== 0) { { type: "text", text: "Summarize this repository" },
throw new Error(`Command failed: ${cmd} (exit ${result.exitCode})\n${result.stderr || ""}`); ]);
} console.log(response.stopReason);
return result; } finally {
}; await sdk.destroySandbox();
// Install sandbox-agent
await run("curl -fsSL https://releases.rivet.dev/sandbox-agent/latest/install.sh | sh");
// Install agents conditionally based on available API keys
if (envs.ANTHROPIC_API_KEY) {
await run("sandbox-agent install-agent claude");
} }
if (envs.OPENAI_API_KEY) {
await run("sandbox-agent install-agent codex");
}
// Start the server in the background
await run(`sandbox-agent server --no-token --host 0.0.0.0 --port ${PORT}`, { background: true });
// Get the public URL for the sandbox
const baseUrl = await sandbox.getUrl({ port: PORT });
// Wait for server to be ready
const deadline = Date.now() + REQUEST_TIMEOUT_MS;
while (Date.now() < deadline) {
try {
const response = await fetch(`${baseUrl}/v1/health`);
if (response.ok) {
const data = await response.json();
if (data?.status === "ok") break;
}
} catch {
// Server not ready yet
}
await new Promise((r) => setTimeout(r, 500));
}
// Connect to the server
const client = await SandboxAgent.connect({ baseUrl });
// Detect which agent to use based on available API keys
const agent = envs.ANTHROPIC_API_KEY ? "claude" : "codex";
// Create a session and start coding
await client.createSession("my-session", { agent });
await client.postMessage("my-session", {
message: "Summarize this repository",
});
for await (const event of client.streamEvents("my-session")) {
console.log(event.type, event.data);
}
// Cleanup
await sandbox.destroy();
``` ```
## Supported Providers The `computesdk` provider handles sandbox creation, Sandbox Agent installation, agent setup, and server startup automatically. ComputeSDK routes to your configured provider behind the scenes.
The `create` option now forwards the full ComputeSDK sandbox-create payload, including provider-specific fields such as `image` and `templateId` when the selected provider supports them.
Before calling `SandboxAgent.start()`, configure ComputeSDK with your provider:
```typescript
import { compute } from "computesdk";
compute.setConfig({
provider: "e2b", // or auto-detect via detectProvider()
computesdkApiKey: process.env.COMPUTESDK_API_KEY,
});
```
## Supported providers
ComputeSDK auto-detects your provider from environment variables: ComputeSDK auto-detects your provider from environment variables:
@ -169,46 +75,7 @@ ComputeSDK auto-detects your provider from environment variables:
## Notes ## Notes
- **Provider resolution order**: `COMPUTESDK_PROVIDER` env var takes priority, otherwise auto-detection from API keys. - **Provider resolution**: Set `COMPUTESDK_PROVIDER` to force a specific provider, or let ComputeSDK auto-detect from API keys.
- **Conditional agent installation**: Only agents with available API keys are installed, reducing setup time.
- **Command error handling**: The example validates exit codes and throws on failures for easier debugging.
- `sandbox.runCommand(..., { background: true })` keeps the server running while your app continues. - `sandbox.runCommand(..., { background: true })` keeps the server running while your app continues.
- `sandbox.getUrl({ port })` returns a public URL for the sandbox port. - `sandbox.getUrl({ port })` returns a public URL for the sandbox port.
- Always destroy the sandbox when you are done to avoid leaking resources. - Always destroy the sandbox when done to avoid leaking resources.
- If sandbox creation times out, set `COMPUTESDK_TIMEOUT_MS` to a higher value (default: 120000ms).
## Explicit Provider Selection
To force a specific provider instead of auto-detection, set the `COMPUTESDK_PROVIDER` environment variable:
```bash
export COMPUTESDK_PROVIDER=e2b
```
Or configure programmatically using `getProviderConfigFromEnv()`:
```typescript
import { compute, getProviderConfigFromEnv, type ExplicitComputeConfig } from "computesdk";
const config: ExplicitComputeConfig = {
provider: "e2b",
computesdkApiKey: process.env.COMPUTESDK_API_KEY,
requestTimeoutMs: 120_000,
};
// Automatically populate provider-specific config from environment
const providerConfig = getProviderConfigFromEnv("e2b");
if (Object.keys(providerConfig).length > 0) {
(config as any).e2b = providerConfig;
}
compute.setConfig(config);
```
## Direct Mode (No ComputeSDK API Key)
To bypass the ComputeSDK gateway and use provider SDKs directly, see the provider-specific examples:
- [E2B](/deploy/e2b)
- [Daytona](/deploy/daytona)
- [Vercel](/deploy/vercel)

View file

@ -15,40 +15,37 @@ See [Daytona network limits](https://www.daytona.io/docs/en/network-limits/).
## TypeScript example ## TypeScript example
```typescript ```bash
import { Daytona } from "@daytonaio/sdk"; npm install sandbox-agent@0.4.x @daytonaio/sdk
import { SandboxAgent } from "sandbox-agent"; ```
const daytona = new Daytona(); ```typescript
import { SandboxAgent } from "sandbox-agent";
import { daytona } from "sandbox-agent/daytona";
const envVars: Record<string, string> = {}; const envVars: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envVars.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.ANTHROPIC_API_KEY) envVars.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envVars.OPENAI_API_KEY = process.env.OPENAI_API_KEY; if (process.env.OPENAI_API_KEY) envVars.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const sandbox = await daytona.create({ envVars }); const sdk = await SandboxAgent.start({
sandbox: daytona({
create: { envVars },
}),
});
await sandbox.process.executeCommand( try {
"curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh" const session = await sdk.createSession({ agent: "claude" });
); const response = await session.prompt([
{ type: "text", text: "Summarize this repository" },
await sandbox.process.executeCommand("sandbox-agent install-agent claude"); ]);
await sandbox.process.executeCommand("sandbox-agent install-agent codex"); console.log(response.stopReason);
} finally {
await sandbox.process.executeCommand( await sdk.destroySandbox();
"nohup sandbox-agent server --no-token --host 0.0.0.0 --port 3000 >/tmp/sandbox-agent.log 2>&1 &" }
);
await new Promise((r) => setTimeout(r, 2000));
const baseUrl = (await sandbox.getSignedPreviewUrl(3000, 4 * 60 * 60)).url;
const sdk = await SandboxAgent.connect({ baseUrl });
const session = await sdk.createSession({ agent: "claude" });
await session.prompt([{ type: "text", text: "Summarize this repository" }]);
await sandbox.delete();
``` ```
The `daytona` provider uses the `rivetdev/sandbox-agent:0.4.2-full` image by default and starts the server automatically.
## Using snapshots for faster startup ## Using snapshots for faster startup
```typescript ```typescript
@ -64,7 +61,7 @@ if (!hasSnapshot) {
name: SNAPSHOT, name: SNAPSHOT,
image: Image.base("ubuntu:22.04").runCommands( image: Image.base("ubuntu:22.04").runCommands(
"apt-get update && apt-get install -y curl ca-certificates", "apt-get update && apt-get install -y curl ca-certificates",
"curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh", "curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh",
"sandbox-agent install-agent claude", "sandbox-agent install-agent claude",
"sandbox-agent install-agent codex", "sandbox-agent install-agent codex",
), ),

View file

@ -15,11 +15,32 @@ Run the published full image with all supported agents pre-installed:
docker run --rm -p 3000:3000 \ docker run --rm -p 3000:3000 \
-e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \ -e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
-e OPENAI_API_KEY="$OPENAI_API_KEY" \ -e OPENAI_API_KEY="$OPENAI_API_KEY" \
rivetdev/sandbox-agent:0.3.1-full \ rivetdev/sandbox-agent:0.4.2-full \
server --no-token --host 0.0.0.0 --port 3000 server --no-token --host 0.0.0.0 --port 3000
``` ```
The `0.3.1-full` tag pins the exact version. The moving `full` tag is also published for contributors who want the latest full image. The `0.4.2-full` tag pins the exact version. The moving `full` tag is also published for contributors who want the latest full image.
If you also want the desktop API inside the container, install desktop dependencies before starting the server:
```bash
docker run --rm -p 3000:3000 \
-e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
-e OPENAI_API_KEY="$OPENAI_API_KEY" \
node:22-bookworm-slim sh -c "\
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y curl ca-certificates bash libstdc++6 && \
rm -rf /var/lib/apt/lists/* && \
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh && \
sandbox-agent install desktop --yes && \
sandbox-agent server --no-token --host 0.0.0.0 --port 3000"
```
In a Dockerfile:
```dockerfile
RUN sandbox-agent install desktop --yes
```
## TypeScript with dockerode ## TypeScript with dockerode
@ -31,7 +52,7 @@ const docker = new Docker();
const PORT = 3000; const PORT = 3000;
const container = await docker.createContainer({ const container = await docker.createContainer({
Image: "rivetdev/sandbox-agent:0.3.1-full", Image: "rivetdev/sandbox-agent:0.4.2-full",
Cmd: ["server", "--no-token", "--host", "0.0.0.0", "--port", `${PORT}`], Cmd: ["server", "--no-token", "--host", "0.0.0.0", "--port", `${PORT}`],
Env: [ Env: [
`ANTHROPIC_API_KEY=${process.env.ANTHROPIC_API_KEY}`, `ANTHROPIC_API_KEY=${process.env.ANTHROPIC_API_KEY}`,
@ -65,7 +86,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
bash ca-certificates curl git && \ bash ca-certificates curl git && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh && \ RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh && \
sandbox-agent install-agent --all sandbox-agent install-agent --all
RUN useradd -m -s /bin/bash sandbox RUN useradd -m -s /bin/bash sandbox

View file

@ -10,43 +10,43 @@ description: "Deploy Sandbox Agent inside an E2B sandbox."
## TypeScript example ## TypeScript example
```bash
npm install sandbox-agent@0.4.x @e2b/code-interpreter
```
```typescript ```typescript
import { Sandbox } from "@e2b/code-interpreter";
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { e2b } from "sandbox-agent/e2b";
const envs: Record<string, string> = {}; const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY; if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const template = process.env.E2B_TEMPLATE;
const sandbox = await Sandbox.create({ allowInternetAccess: true, envs }); const sdk = await SandboxAgent.start({
sandbox: e2b({
await sandbox.commands.run( template,
"curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh" create: { envs },
); }),
await sandbox.commands.run("sandbox-agent install-agent claude");
await sandbox.commands.run("sandbox-agent install-agent codex");
await sandbox.commands.run(
"sandbox-agent server --no-token --host 0.0.0.0 --port 3000",
{ background: true, timeoutMs: 0 }
);
const baseUrl = `https://${sandbox.getHost(3000)}`;
const sdk = await SandboxAgent.connect({ baseUrl });
const session = await sdk.createSession({ agent: "claude" });
const off = session.onEvent((event) => {
console.log(event.sender, event.payload);
}); });
await session.prompt([{ type: "text", text: "Summarize this repository" }]); try {
off(); const session = await sdk.createSession({ agent: "claude" });
const response = await session.prompt([
await sandbox.kill(); { type: "text", text: "Summarize this repository" },
]);
console.log(response.stopReason);
} finally {
await sdk.destroySandbox();
}
``` ```
The `e2b` provider handles sandbox creation, Sandbox Agent installation, agent setup, and server startup automatically. Sandboxes pause by default instead of being deleted, and reconnecting with the same `sandboxId` resumes them automatically.
Pass `template` when you want to start from a custom E2B template alias or template ID. E2B base-image selection happens when you build the template, then `sandbox-agent/e2b` uses that template at sandbox creation time.
## Faster cold starts ## Faster cold starts
For faster startup, create a custom E2B template with Sandbox Agent and target agents pre-installed. For faster startup, create a custom E2B template with Sandbox Agent and target agents pre-installed.
See [E2B Custom Templates](https://e2b.dev/docs/sandbox-template). Build System 2.0 also lets you choose the template's base image in code.
See [E2B Custom Templates](https://e2b.dev/docs/sandbox-template) and [E2B Base Images](https://e2b.dev/docs/template/base-image).

View file

@ -1,153 +0,0 @@
---
title: "Foundry Self-Hosting"
description: "Environment, credentials, and deployment setup for Sandbox Agent Foundry auth, GitHub, and billing."
---
This guide documents the deployment contract for the Foundry product surface: app auth, GitHub onboarding, repository import, and billing.
It also covers the local-development bootstrap that uses `.env.development` only when `NODE_ENV=development`.
## Local Development
For backend local development, the Foundry backend now supports a development-only dotenv bootstrap:
- It loads `.env.development.local` and `.env.development`
- It does this **only** when `NODE_ENV=development`
- It does **not** load dotenv files in production
The example file lives at [`/.env.development.example`](https://github.com/rivet-dev/sandbox-agent/blob/main/.env.development.example).
To use it locally:
```bash
cp .env.development.example .env.development
```
Run the backend with:
```bash
just foundry-backend-start
```
That recipe sets `NODE_ENV=development`, which enables the dotenv loader.
### Local Defaults
These values can be safely defaulted for local development:
- `APP_URL=http://localhost:4173`
- `BETTER_AUTH_URL=http://localhost:7741`
- `BETTER_AUTH_SECRET=sandbox-agent-foundry-development-only-change-me`
- `GITHUB_REDIRECT_URI=http://localhost:7741/v1/auth/callback/github`
These should be treated as development-only values.
## Production Environment
For production or self-hosting, set these as real environment variables in your deployment platform. Do not rely on dotenv file loading.
### App/Auth
| Variable | Required | Notes |
|---|---:|---|
| `APP_URL` | Yes | Public frontend origin |
| `BETTER_AUTH_URL` | Yes | Public auth base URL |
| `BETTER_AUTH_SECRET` | Yes | Strong random secret for auth/session signing |
### GitHub OAuth
| Variable | Required | Notes |
|---|---:|---|
| `GITHUB_CLIENT_ID` | Yes | GitHub OAuth app client id |
| `GITHUB_CLIENT_SECRET` | Yes | GitHub OAuth app client secret |
| `GITHUB_REDIRECT_URI` | Yes | GitHub OAuth callback URL |
Use GitHub OAuth for:
- user sign-in
- user identity
- org selection
- access to the signed-in users GitHub context
## GitHub App
If your Foundry deployment uses GitHub App-backed organization install and repo import, also configure:
| Variable | Required | Notes |
|---|---:|---|
| `GITHUB_APP_ID` | Yes | GitHub App id |
| `GITHUB_APP_CLIENT_ID` | Yes | GitHub App client id |
| `GITHUB_APP_CLIENT_SECRET` | Yes | GitHub App client secret |
| `GITHUB_APP_PRIVATE_KEY` | Yes | PEM private key for installation auth |
For `.env.development` and `.env.development.local`, store `GITHUB_APP_PRIVATE_KEY` as a quoted single-line value with `\n` escapes instead of raw multi-line PEM text.
Recommended GitHub App permissions:
- Repository `Metadata: Read`
- Repository `Contents: Read & Write`
- Repository `Pull requests: Read & Write`
- Repository `Checks: Read`
- Repository `Commit statuses: Read`
Set the webhook URL to `https://<your-backend-host>/v1/webhooks/github` and generate a webhook secret. Store the secret as `GITHUB_WEBHOOK_SECRET`.
Recommended webhook subscriptions:
- `installation`
- `installation_repositories`
- `pull_request`
- `pull_request_review`
- `pull_request_review_comment`
- `push`
- `create`
- `delete`
- `check_suite`
- `check_run`
- `status`
Use the GitHub App for:
- installation/reconnect state
- org repo import
- repository sync
- PR creation and updates
Use GitHub OAuth for:
- who the user is
- which orgs they can choose
## Stripe
For live billing, configure:
| Variable | Required | Notes |
|---|---:|---|
| `STRIPE_SECRET_KEY` | Yes | Server-side Stripe secret key |
| `STRIPE_PUBLISHABLE_KEY` | Yes | Client-side Stripe publishable key |
| `STRIPE_WEBHOOK_SECRET` | Yes | Signing secret for billing webhooks |
| `STRIPE_PRICE_TEAM` | Yes | Stripe price id for the Team plan checkout session |
Stripe should own:
- hosted checkout
- billing portal
- subscription status
- invoice history
- webhook-driven state sync
## Mock Invariant
Foundrys mock client path should continue to work end to end even when the real auth/GitHub/Stripe path exists.
That includes:
- sign-in
- org selection/import
- settings
- billing UI
- workspace/task/session flow
- seat accrual
Use mock mode for deterministic UI review and local product development. Use the real env-backed path for integration and self-hosting.

View file

@ -9,7 +9,7 @@ For local development, run Sandbox Agent directly on your machine.
```bash ```bash
# Install # Install
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
# Run # Run
sandbox-agent server --no-token --host 127.0.0.1 --port 2468 sandbox-agent server --no-token --host 127.0.0.1 --port 2468
@ -20,24 +20,27 @@ Or with npm/Bun:
<Tabs> <Tabs>
<Tab title="npx"> <Tab title="npx">
```bash ```bash
npx @sandbox-agent/cli@0.3.x server --no-token --host 127.0.0.1 --port 2468 npx @sandbox-agent/cli@0.4.x server --no-token --host 127.0.0.1 --port 2468
``` ```
</Tab> </Tab>
<Tab title="bunx"> <Tab title="bunx">
```bash ```bash
bunx @sandbox-agent/cli@0.3.x server --no-token --host 127.0.0.1 --port 2468 bunx @sandbox-agent/cli@0.4.x server --no-token --host 127.0.0.1 --port 2468
``` ```
</Tab> </Tab>
</Tabs> </Tabs>
## With the TypeScript SDK ## With the TypeScript SDK
The SDK can spawn and manage the server as a subprocess: The SDK can spawn and manage the server as a subprocess using the `local` provider:
```typescript ```typescript
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { local } from "sandbox-agent/local";
const sdk = await SandboxAgent.start(); const sdk = await SandboxAgent.start({
sandbox: local(),
});
const session = await sdk.createSession({ const session = await sdk.createSession({
agent: "claude", agent: "claude",
@ -47,7 +50,21 @@ await session.prompt([
{ type: "text", text: "Summarize this repository." }, { type: "text", text: "Summarize this repository." },
]); ]);
await sdk.dispose(); await sdk.destroySandbox();
``` ```
This starts the server on an available local port and connects automatically. This starts the server on an available local port and connects automatically.
Pass options to customize the local provider:
```typescript
const sdk = await SandboxAgent.start({
sandbox: local({
port: 3000,
log: "inherit",
env: {
ANTHROPIC_API_KEY: process.env.MY_ANTHROPIC_KEY,
},
}),
});
```

55
docs/deploy/modal.mdx Normal file
View file

@ -0,0 +1,55 @@
---
title: "Modal"
description: "Deploy Sandbox Agent inside a Modal sandbox."
---
## Prerequisites
- `MODAL_TOKEN_ID` and `MODAL_TOKEN_SECRET` from [modal.com/settings](https://modal.com/settings)
- `ANTHROPIC_API_KEY` or `OPENAI_API_KEY`
## TypeScript example
```bash
npm install sandbox-agent@0.4.x modal
```
```typescript
import { SandboxAgent } from "sandbox-agent";
import { modal } from "sandbox-agent/modal";
const secrets: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) secrets.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) secrets.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const baseImage = process.env.MODAL_BASE_IMAGE ?? "node:22-slim";
const sdk = await SandboxAgent.start({
sandbox: modal({
image: baseImage,
create: { secrets },
}),
});
try {
const session = await sdk.createSession({ agent: "claude" });
const response = await session.prompt([
{ type: "text", text: "Summarize this repository" },
]);
console.log(response.stopReason);
} finally {
await sdk.destroySandbox();
}
```
The `modal` provider handles app creation, image building, sandbox provisioning, agent installation, server startup, and tunnel networking automatically.
Set `image` to change the base Docker image before Sandbox Agent and its agent binaries are layered on top. You can also pass a prebuilt Modal `Image` object.
## Faster cold starts
Modal caches image layers, so the Dockerfile commands that install `curl` and `sandbox-agent` only run on the first build. Subsequent sandbox creates reuse the cached image.
## Notes
- Modal sandboxes use [gVisor](https://gvisor.dev/) for strong isolation.
- Ports are exposed via encrypted tunnels (`encryptedPorts`). The provider uses `sb.tunnels()` to get the public HTTPS URL.
- Environment variables (API keys) are passed as Modal [Secrets](https://modal.com/docs/guide/secrets) for security.

View file

@ -10,52 +10,40 @@ description: "Deploy Sandbox Agent inside a Vercel Sandbox."
## TypeScript example ## TypeScript example
```typescript ```bash
import { Sandbox } from "@vercel/sandbox"; npm install sandbox-agent@0.4.x @vercel/sandbox
import { SandboxAgent } from "sandbox-agent";
const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const sandbox = await Sandbox.create({
runtime: "node24",
ports: [3000],
});
const run = async (cmd: string, args: string[] = []) => {
const result = await sandbox.runCommand({ cmd, args, env: envs });
if (result.exitCode !== 0) {
throw new Error(`Command failed: ${cmd} ${args.join(" ")}`);
}
};
await run("sh", ["-c", "curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh"]);
await run("sandbox-agent", ["install-agent", "claude"]);
await run("sandbox-agent", ["install-agent", "codex"]);
await sandbox.runCommand({
cmd: "sandbox-agent",
args: ["server", "--no-token", "--host", "0.0.0.0", "--port", "3000"],
env: envs,
detached: true,
});
const baseUrl = sandbox.domain(3000);
const sdk = await SandboxAgent.connect({ baseUrl });
const session = await sdk.createSession({ agent: "claude" });
const off = session.onEvent((event) => {
console.log(event.sender, event.payload);
});
await session.prompt([{ type: "text", text: "Summarize this repository" }]);
off();
await sandbox.stop();
``` ```
```typescript
import { SandboxAgent } from "sandbox-agent";
import { vercel } from "sandbox-agent/vercel";
const env: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) env.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) env.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const sdk = await SandboxAgent.start({
sandbox: vercel({
create: {
runtime: "node24",
env,
},
}),
});
try {
const session = await sdk.createSession({ agent: "claude" });
const response = await session.prompt([
{ type: "text", text: "Summarize this repository" },
]);
console.log(response.stopReason);
} finally {
await sdk.destroySandbox();
}
```
The `vercel` provider handles sandbox creation, Sandbox Agent installation, agent setup, and server startup automatically.
## Authentication ## Authentication
Vercel Sandboxes support OIDC token auth (recommended) and access-token auth. Vercel Sandboxes support OIDC token auth (recommended) and access-token auth.

View file

@ -1,6 +1,6 @@
{ {
"$schema": "https://mintlify.com/docs.json", "$schema": "https://mintlify.com/docs.json",
"theme": "willow", "theme": "mint",
"name": "Sandbox Agent SDK", "name": "Sandbox Agent SDK",
"appearance": { "appearance": {
"default": "dark", "default": "dark",
@ -8,8 +8,8 @@
}, },
"colors": { "colors": {
"primary": "#ff4f00", "primary": "#ff4f00",
"light": "#ff4f00", "light": "#ff6a2a",
"dark": "#ff4f00" "dark": "#cc3f00"
}, },
"favicon": "/favicon.svg", "favicon": "/favicon.svg",
"logo": { "logo": {
@ -25,17 +25,13 @@
}, },
"navbar": { "navbar": {
"links": [ "links": [
{
"label": "Gigacode",
"icon": "terminal",
"href": "https://github.com/rivet-dev/sandbox-agent/tree/main/gigacode"
},
{ {
"label": "Discord", "label": "Discord",
"icon": "discord", "icon": "discord",
"href": "https://discord.gg/auCecybynK" "href": "https://discord.gg/auCecybynK"
}, },
{ {
"label": "GitHub",
"type": "github", "type": "github",
"href": "https://github.com/rivet-dev/sandbox-agent" "href": "https://github.com/rivet-dev/sandbox-agent"
} }
@ -51,46 +47,55 @@
"pages": [ "pages": [
"quickstart", "quickstart",
"sdk-overview", "sdk-overview",
"llm-credentials",
"react-components", "react-components",
{ {
"group": "Deploy", "group": "Deploy",
"icon": "server", "icon": "server",
"pages": [ "pages": [
"deploy/local", "deploy/local",
"deploy/computesdk",
"deploy/e2b", "deploy/e2b",
"deploy/daytona", "deploy/daytona",
"deploy/vercel", "deploy/vercel",
"deploy/cloudflare", "deploy/cloudflare",
"deploy/docker", "deploy/docker",
"deploy/boxlite" "deploy/modal",
"deploy/boxlite",
"deploy/computesdk"
] ]
} }
] ]
}, },
{ {
"group": "Agent", "group": "Agent",
"pages": ["agent-sessions", "attachments", "skills-config", "mcp-config", "custom-tools"] "pages": [
"agent-sessions",
{
"group": "Agents",
"icon": "robot",
"pages": ["agents/claude", "agents/codex", "agents/opencode", "agents/cursor", "agents/amp", "agents/pi"]
},
"attachments",
"skills-config",
"mcp-config",
"custom-tools"
]
}, },
{ {
"group": "System", "group": "System",
"pages": ["file-system", "processes"] "pages": ["file-system", "processes", "computer-use", "common-software"]
},
{
"group": "Orchestration",
"pages": ["architecture", "session-persistence", "observability", "multiplayer", "security"]
}, },
{ {
"group": "Reference", "group": "Reference",
"pages": [ "pages": [
"agent-capabilities", "troubleshooting",
"architecture",
"cli", "cli",
"inspector", "inspector",
"opencode-compatibility", "opencode-compatibility",
{ {
"group": "More", "group": "More",
"pages": [ "pages": [
"credentials",
"daemon", "daemon",
"cors", "cors",
"session-restoration", "session-restoration",
@ -115,5 +120,11 @@
] ]
} }
] ]
} },
"__removed": [
{
"group": "Orchestration",
"pages": ["orchestration-architecture", "session-persistence", "observability", "multiplayer", "security"]
}
]
} }

View file

@ -1,6 +0,0 @@
---
title: Gigacode
url: "https://github.com/rivet-dev/sandbox-agent/tree/main/gigacode"
---

View file

@ -35,6 +35,7 @@ console.log(url);
- Prompt testing - Prompt testing
- Request/response debugging - Request/response debugging
- Interactive permission prompts (approve, always-allow, or reject tool-use requests) - Interactive permission prompts (approve, always-allow, or reject tool-use requests)
- Desktop panel for status, remediation, start/stop, and screenshot refresh
- Process management (create, stop, kill, delete, view logs) - Process management (create, stop, kill, delete, view logs)
- Interactive PTY terminal for tty processes - Interactive PTY terminal for tty processes
- One-shot command execution - One-shot command execution
@ -50,3 +51,16 @@ console.log(url);
The Inspector includes an embedded Ghostty-based terminal for interactive tty The Inspector includes an embedded Ghostty-based terminal for interactive tty
processes. The UI uses the SDK's high-level `connectProcessTerminal(...)` processes. The UI uses the SDK's high-level `connectProcessTerminal(...)`
wrapper via the shared `@sandbox-agent/react` `ProcessTerminal` component. wrapper via the shared `@sandbox-agent/react` `ProcessTerminal` component.
## Desktop panel
The `Desktop` panel shows the current desktop runtime state, missing dependencies,
the suggested install command, last error details, process/log paths, and the
latest captured screenshot.
Use it to:
- Check whether desktop dependencies are installed
- Start or stop the managed desktop runtime
- Refresh desktop status
- Capture a fresh screenshot on demand

250
docs/llm-credentials.mdx Normal file
View file

@ -0,0 +1,250 @@
---
title: "LLM Credentials"
description: "Strategies for providing LLM provider credentials to agents."
icon: "key"
---
Sandbox Agent needs LLM provider credentials (Anthropic, OpenAI, etc.) to run agent sessions.
## Configuration
Pass credentials via `spawn.env` when starting a sandbox. Each call to `SandboxAgent.start()` can use different credentials:
```typescript
import { SandboxAgent } from "sandbox-agent";
const sdk = await SandboxAgent.start({
spawn: {
env: {
ANTHROPIC_API_KEY: "sk-ant-...",
OPENAI_API_KEY: "sk-...",
},
},
});
```
Each agent requires credentials from a specific provider. Sandbox Agent checks environment variables (including those passed via `spawn.env`) and host config files:
| Agent | Provider | Environment variables | Config files |
|-------|----------|----------------------|--------------|
| Claude Code | Anthropic | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | `~/.claude.json`, `~/.claude/.credentials.json` |
| Amp | Anthropic | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | `~/.amp/config.json` |
| Codex | OpenAI | `OPENAI_API_KEY`, `CODEX_API_KEY` | `~/.codex/auth.json` |
| OpenCode | Anthropic or OpenAI | `ANTHROPIC_API_KEY`, `OPENAI_API_KEY` | `~/.local/share/opencode/auth.json` |
| Mock | None | - | - |
## Credential strategies
LLM credentials are passed into the sandbox as environment variables. The agent and everything inside the sandbox has access to the token, so it's important to choose the right strategy for how you provision and scope these credentials.
| Strategy | Who pays | Cost attribution | Best for |
|----------|----------|-----------------|----------|
| **Per-tenant gateway** (recommended) | Your organization, billed back per tenant | Per-tenant keys with budgets | Multi-tenant SaaS, usage-based billing |
| **Bring your own key** | Each user (usage-based) | Per-user by default | Dev environments, internal tools |
| **Shared API key** | Your organization | None (single bill) | Single-tenant apps, internal platforms |
| **Personal subscription** | Each user (existing subscription) | Per-user by default | Local dev, internal tools where users have Claude or Codex subscriptions |
### Per-tenant gateway (recommended)
Route LLM traffic through a gateway that mints per-tenant API keys, each with its own spend tracking and budget limits.
```mermaid
graph LR
B[Your Backend] -->|tenant key| S[Sandbox]
S -->|LLM requests| G[Gateway]
G -->|scoped key| P[LLM Provider]
```
Your backend issues a scoped key per tenant, then passes it to the sandbox. This is the typical pattern when using sandbox providers (E2B, Daytona, Docker).
```typescript expandable
import { SandboxAgent } from "sandbox-agent";
async function createTenantSandbox(tenantId: string) {
// Issue a scoped key for this tenant via OpenRouter
const res = await fetch("https://openrouter.ai/api/v1/keys", {
method: "POST",
headers: {
Authorization: `Bearer ${process.env.OPENROUTER_PROVISIONING_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
name: `tenant-${tenantId}`,
limit: 50,
limitResetType: "monthly",
}),
});
const { key } = await res.json();
// Start a sandbox with the tenant's scoped key
const sdk = await SandboxAgent.start({
spawn: {
env: {
OPENAI_API_KEY: key, // OpenRouter uses OpenAI-compatible endpoints
},
},
});
const session = await sdk.createSession({
agent: "claude",
sessionInit: { cwd: "/workspace" },
});
return { sdk, session };
}
```
#### Security
Recommended for multi-tenant applications. Each tenant gets a scoped key with its own budget, so exfiltration only exposes that tenant's allowance.
#### Use cases
- **Multi-tenant SaaS**: per-tenant spend tracking and budget limits
- **Production apps**: exposed to end users who need isolated credentials
- **Usage-based billing**: each tenant pays for their own consumption
#### Choosing a gateway
<AccordionGroup>
<Accordion title="OpenRouter provisioned keys" icon="cloud">
Managed service, zero infrastructure. [OpenRouter](https://openrouter.ai/docs/features/provisioning-api-keys) provides per-tenant API keys with spend tracking and budget limits via their Provisioning API. Pass the tenant key to Sandbox Agent as `OPENAI_API_KEY` (OpenRouter uses OpenAI-compatible endpoints).
```bash
# Create a key for a tenant with a $50/month budget
curl https://openrouter.ai/api/v1/keys \
-H "Authorization: Bearer $PROVISIONING_KEY" \
-H "Content-Type: application/json" \
-d '{
"name": "tenant-acme",
"limit": 50,
"limitResetType": "monthly"
}'
```
Easiest to set up but not open-source. See [OpenRouter pricing](https://openrouter.ai/docs/framework/pricing) for details.
</Accordion>
<Accordion title="LiteLLM proxy" icon="server">
Self-hosted, open-source (MIT). [LiteLLM](https://github.com/BerriAI/litellm) is an OpenAI-compatible proxy with hierarchical budgets (org, team, user, key), virtual keys, and spend tracking. Requires Python + PostgreSQL.
```bash
# Create a team (tenant) with a $500 budget
curl http://litellm:4000/team/new \
-H "Authorization: Bearer $LITELLM_MASTER_KEY" \
-H "Content-Type: application/json" \
-d '{
"team_alias": "tenant-acme",
"max_budget": 500
}'
# Generate a key for that team
curl http://litellm:4000/key/generate \
-H "Authorization: Bearer $LITELLM_MASTER_KEY" \
-H "Content-Type: application/json" \
-d '{
"team_id": "team-abc123",
"max_budget": 100
}'
```
Full control with no vendor lock-in. Organization-level features require an enterprise license.
</Accordion>
<Accordion title="Portkey gateway" icon="code-branch">
Self-hosted, open-source (Apache 2.0). [Portkey](https://github.com/Portkey-AI/gateway) is a lightweight OpenAI-compatible gateway supporting 200+ providers. Single binary, no database required. Create virtual keys with per-tenant budget limits and pass them to Sandbox Agent.
Lightest operational footprint of the self-hosted options. Observability and analytics require the managed platform or your own tooling.
</Accordion>
</AccordionGroup>
To bill tenants for LLM usage, use [Stripe token billing](https://docs.stripe.com/billing/token-billing) (integrates natively with OpenRouter) or query your gateway's spend API and feed usage into your billing system.
### Bring your own key
Each user provides their own API key. Users are billed directly by the LLM provider with no additional infrastructure needed.
Pass the user's key via `spawn.env`:
```typescript
const sdk = await SandboxAgent.start({
spawn: {
env: {
ANTHROPIC_API_KEY: userProvidedKey,
},
},
});
```
#### Security
API keys are typically long-lived. The key is visible to the agent and anything running inside the sandbox, so exfiltration is possible. This is usually acceptable for developer-facing tools where the user owns the key.
#### Use cases
- **Developer tools**: each user manages their own API key
- **Internal platforms**: users already have LLM provider accounts
- **Per-user billing**: no extra infrastructure needed
### Shared credentials
A single organization-wide API key is used for all sessions. All token usage appears on one bill with no per-user or per-tenant cost attribution.
```typescript
const sdk = await SandboxAgent.start({
spawn: {
env: {
ANTHROPIC_API_KEY: process.env.ORG_ANTHROPIC_KEY!,
OPENAI_API_KEY: process.env.ORG_OPENAI_KEY!,
},
},
});
```
If you need to track or limit spend per tenant, use a per-tenant gateway instead.
#### Security
Not recommended for anything other than internal tooling. A single exfiltrated key exposes your organization's entire LLM budget. If you need org-paid credentials for external users, use a per-tenant gateway with scoped keys instead.
#### Use cases
- **Single-tenant apps**: small number of users, one bill
- **Prototyping**: cost attribution not needed yet
- **Simplicity over security**: acceptable when exfiltration risk is low
### Personal subscription
If the user is signed into Claude Code or Codex on the host machine, Sandbox Agent automatically picks up their OAuth tokens. No configuration is needed.
#### Remote sandboxes
Extract credentials locally and pass them to a remote sandbox via `spawn.env`:
```bash
$ sandbox-agent credentials extract-env
ANTHROPIC_API_KEY=sk-ant-...
CLAUDE_API_KEY=sk-ant-...
OPENAI_API_KEY=sk-...
CODEX_API_KEY=sk-...
```
Use `-e` to prefix with `export` for shell sourcing.
#### Security
Personal subscriptions use OAuth tokens with a limited lifespan. These are the same credentials used when running an agent normally on the host. If a token is exfiltrated from the sandbox, the exposure window is short.
#### Use cases
- **Local development**: users are already signed into Claude Code or Codex
- **Internal tools**: every user has their own subscription
- **Prototyping**: no key management needed

View file

@ -27,9 +27,7 @@ await sdk.setMcpConfig(
// Create a session using the configured MCP servers // Create a session using the configured MCP servers
const session = await sdk.createSession({ const session = await sdk.createSession({
agent: "claude", agent: "claude",
sessionInit: { cwd: "/workspace",
cwd: "/workspace",
},
}); });
await session.prompt([ await session.prompt([

View file

@ -20,8 +20,40 @@ Use [actor keys](https://rivet.dev/docs/actors/keys) to map each workspace to on
```ts Actor (server) ```ts Actor (server)
import { actor, setup } from "rivetkit"; import { actor, setup } from "rivetkit";
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent, type SessionPersistDriver, type SessionRecord, type SessionEvent, type ListPageRequest, type ListPage, type ListEventsRequest } from "sandbox-agent";
import { RivetSessionPersistDriver, type RivetPersistState } from "@sandbox-agent/persist-rivet";
interface RivetPersistData { sessions: Record<string, SessionRecord>; events: Record<string, SessionEvent[]>; }
type RivetPersistState = { _sandboxAgentPersist: RivetPersistData };
class RivetSessionPersistDriver implements SessionPersistDriver {
private readonly stateKey: string;
private readonly ctx: { state: Record<string, unknown> };
constructor(ctx: { state: Record<string, unknown> }, options: { stateKey?: string } = {}) {
this.ctx = ctx;
this.stateKey = options.stateKey ?? "_sandboxAgentPersist";
if (!this.ctx.state[this.stateKey]) {
this.ctx.state[this.stateKey] = { sessions: {}, events: {} };
}
}
private get data(): RivetPersistData { return this.ctx.state[this.stateKey] as RivetPersistData; }
async getSession(id: string) { const s = this.data.sessions[id]; return s ? { ...s } : undefined; }
async listSessions(request: ListPageRequest = {}): Promise<ListPage<SessionRecord>> {
const sorted = Object.values(this.data.sessions).sort((a, b) => a.createdAt - b.createdAt || a.id.localeCompare(b.id));
const offset = Number(request.cursor ?? 0);
const limit = request.limit ?? 100;
const slice = sorted.slice(offset, offset + limit);
return { items: slice, nextCursor: offset + slice.length < sorted.length ? String(offset + slice.length) : undefined };
}
async updateSession(session: SessionRecord) { this.data.sessions[session.id] = { ...session }; if (!this.data.events[session.id]) this.data.events[session.id] = []; }
async listEvents(request: ListEventsRequest): Promise<ListPage<SessionEvent>> {
const all = [...(this.data.events[request.sessionId] ?? [])].sort((a, b) => a.eventIndex - b.eventIndex || a.id.localeCompare(b.id));
const offset = Number(request.cursor ?? 0);
const limit = request.limit ?? 100;
const slice = all.slice(offset, offset + limit);
return { items: slice, nextCursor: offset + slice.length < all.length ? String(offset + slice.length) : undefined };
}
async insertEvent(sessionId: string, event: SessionEvent) { const events = this.data.events[sessionId] ?? []; events.push({ ...event, payload: JSON.parse(JSON.stringify(event.payload)) }); this.data.events[sessionId] = events; }
}
type WorkspaceState = RivetPersistState & { type WorkspaceState = RivetPersistState & {
sandboxId: string; sandboxId: string;
@ -111,5 +143,5 @@ await conn.prompt({
## Notes ## Notes
- Keep sandbox calls actor-only. Browser clients should not call Sandbox Agent directly. - Keep sandbox calls actor-only. Browser clients should not call Sandbox Agent directly.
- Use `@sandbox-agent/persist-rivet` so session history persists in actor state. - Copy the Rivet persist driver from the example above into your project so session history persists in actor state.
- For client connection patterns, see [Rivet JavaScript client](https://rivet.dev/docs/clients/javascript). - For client connection patterns, see [Rivet JavaScript client](https://rivet.dev/docs/clients/javascript).

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,43 @@
---
title: "Orchestration Architecture"
description: "Production topology, backend requirements, and session persistence."
icon: "sitemap"
---
This page covers production topology and backend requirements. Read [Architecture](/architecture) first for an overview of how the server, SDK, and agent processes fit together.
## Suggested Topology
Run the SDK on your backend, then call it from your frontend.
This extra hop is recommended because it keeps auth/token logic on the backend and makes persistence simpler.
```mermaid placement="top-right"
flowchart LR
BROWSER["Browser"]
subgraph BACKEND["Your backend"]
direction TB
SDK["Sandbox Agent SDK"]
end
subgraph SANDBOX_SIMPLE["Sandbox"]
SERVER_SIMPLE["Sandbox Agent server"]
end
BROWSER --> BACKEND
BACKEND --> SDK --> SERVER_SIMPLE
```
### Backend requirements
Your backend layer needs to handle:
- **Long-running connections**: prompts can take minutes.
- **Session affinity**: follow-up messages must reach the same session.
- **State between requests**: session metadata and event history must persist across requests.
- **Graceful recovery**: sessions should resume after backend restarts.
We recommend [Rivet](https://rivet.dev) over serverless because actors natively support the long-lived connections, session routing, and state persistence that agent workloads require.
## Session persistence
For storage driver options and replay behavior, see [Persisting Sessions](/session-persistence).

View file

@ -1,210 +0,0 @@
# Pi Agent Support Plan (pi-mono)
## Implementation Status Update
- Runtime selection now supports two internal modes:
- `PerSession` (default for unknown/non-allowlisted Pi capabilities)
- `Shared` (allowlist-only compatibility path)
- Pi sessions now use per-session process isolation by default, enabling true concurrent Pi sessions in Inspector and API clients.
- Shared Pi server code remains available and is used only when capability checks allow multiplexing.
- Session termination for per-session Pi mode hard-kills the underlying Pi process and clears queued prompts/pending waiters.
- In-session concurrent sends are serialized with an unbounded daemon-side FIFO queue per session.
## Investigation Summary
### Pi CLI modes and RPC protocol
- Pi supports multiple modes including interactive, print/JSON output, RPC, and SDK usage. JSON mode outputs a stream of JSON events suitable for parsing, and RPC mode is intended for programmatic control over stdin/stdout.
- RPC mode is started with `pi --mode rpc` and supports options like `--provider`, `--model`, `--no-session`, and `--session-dir`.
- The RPC protocol is newline-delimited JSON over stdin/stdout:
- Commands are JSON objects written to stdin.
- Responses are JSON objects with `type: "response"` and optional `id`.
- Events are JSON objects without `id`.
- `prompt` can include images using `ImageContent` (base64 or URL) alongside text.
- JSON/print mode (`pi -p` or `pi --print --mode json`) produces JSONL for non-interactive parsing and can resume sessions with a token.
### RPC commands
RPC commands listed in `rpc.md` include:
- `new_session`, `get_state`, `list_sessions`, `delete_session`, `rename_session`, `clear_session`
- `prompt`, `queue_message`, `abort`, `get_queued_messages`
### RPC event types
RPC events listed in `rpc.md` include:
- `agent_start`, `agent_end`
- `turn_start`, `turn_end`
- `message_start`, `message_update`, `message_end`
- `tool_execution_start`, `tool_execution_update`, `tool_execution_end`
- `auto_compaction`, `auto_retry`, `hook_error`
`message_update` uses `assistantMessageEvent` deltas such as:
- `start`, `text_start`, `text_delta`, `text_end`
- `thinking_start`, `thinking_delta`, `thinking_end`
- `toolcall_start`, `toolcall_delta`, `toolcall_end`
- `toolcall_args_start`, `toolcall_args_delta`, `toolcall_args_end`
- `done`, `error`
`tool_execution_update` includes `partialResult`, which is described as accumulated output so far.
### Schema source locations (pi-mono)
RPC types are documented as living in:
- `packages/ai/src/types.ts` (Model types)
- `packages/agent/src/types.ts` (AgentResponse types)
- `packages/coding-agent/src/core/messages.ts` (message types)
- `packages/coding-agent/src/modes/rpc/rpc-types.ts` (RPC protocol types)
### Distribution assets
Pi releases provide platform-specific binaries such as:
- `pi-darwin-arm64`, `pi-darwin-x64`
- `pi-linux-arm64`, `pi-linux-x64`
- `pi-win-x64.zip`
## Integration Decisions
- Follow the OpenCode pattern: a shared long-running process (stdio RPC) with session multiplexing.
- Primary integration path is RPC streaming (`pi --mode rpc`).
- JSON/print mode is a fallback only (diagnostics or non-interactive runs).
- Create sessions via `new_session`; store the returned `sessionId` as `native_session_id`.
- Use `get_state` as a re-sync path after server restarts.
- Use `prompt` for send-message, with optional image content.
- Convert Pi events into universal events; emit daemon synthetic `session.started` on session creation and `session.ended` only on errors/termination.
## Implementation Plan
### 1) Agent Identity + Capabilities
Files:
- `server/packages/agent-management/src/agents.rs`
- `server/packages/sandbox-agent/src/router.rs`
- `docs/cli.mdx`, `docs/conversion.mdx`, `docs/session-transcript-schema.mdx`
- `README.md`, `frontend/packages/website/src/components/FAQ.tsx`
Tasks:
- Add `AgentId::Pi` with string/binary name `"pi"` and parsing rules.
- Add Pi to `all_agents()` and agent lists.
- Define `AgentCapabilities` for Pi:
- `tool_calls=true`, `tool_results=true`
- `text_messages=true`, `streaming_deltas=true`, `item_started=true`
- `reasoning=true` (from `thinking_*` deltas)
- `images=true` (ImageContent in `prompt`)
- `permissions=false`, `questions=false`, `mcp_tools=false`
- `shared_process=true`, `session_lifecycle=false` (no native session events)
- `error_events=true` (hook_error)
- `command_execution=false`, `file_changes=false`, `file_attachments=false`
### 2) Installer and Binary Resolution
Files:
- `server/packages/agent-management/src/agents.rs`
Tasks:
- Add `install_pi()` that:
- Downloads the correct release asset per platform (`pi-<platform>`).
- Handles `.zip` on Windows and raw binaries elsewhere.
- Marks binary executable.
- Add Pi to `AgentManager::install`, `is_installed`, `version`.
- Version detection: try `--version`, `version`, `-V`.
### 3) Schema Extraction for Pi
Files:
- `resources/agent-schemas/src/pi.ts` (new)
- `resources/agent-schemas/src/index.ts`
- `resources/agent-schemas/artifacts/json-schema/pi.json`
- `server/packages/extracted-agent-schemas/build.rs`
- `server/packages/extracted-agent-schemas/src/lib.rs`
Tasks:
- Implement `extractPiSchema()`:
- Download pi-mono sources (zip/tarball) into a temp dir.
- Use `ts-json-schema-generator` against `packages/coding-agent/src/modes/rpc/rpc-types.ts`.
- Include dependent files per `rpc.md` (ai/types, agent/types, core/messages).
- Extract `RpcEvent`, `RpcResponse`, `RpcCommand` unions (exact type names from source).
- Add fallback schema if remote fetch fails (minimal union with event/response fields).
- Wire pi into extractor index and artifact generation.
### 4) Universal Schema Conversion (Pi -> Universal)
Files:
- `server/packages/universal-agent-schema/src/agents/pi.rs` (new)
- `server/packages/universal-agent-schema/src/agents/mod.rs`
- `server/packages/universal-agent-schema/src/lib.rs`
- `server/packages/sandbox-agent/src/router.rs`
Mapping rules:
- `message_start` -> `item.started` (kind=message, role=assistant, native_item_id=messageId)
- `message_update`:
- `text_*` -> `item.delta` (assistant text delta)
- `thinking_*` -> `item.delta` with `ContentPart::Reasoning` (visibility=Private)
- `toolcall_*` and `toolcall_args_*` -> ignore for now (tool_execution_* is authoritative)
- `error` -> `item.completed` with `ItemStatus::Failed` (if no later message_end)
- `message_end` -> `item.completed` (finalize assistant message)
- `tool_execution_start` -> `item.started` (kind=tool_call, ContentPart::ToolCall)
- `tool_execution_update` -> `item.delta` for a synthetic tool_result item:
- Maintain a per-toolCallId buffer to compute delta from accumulated `partialResult`.
- `tool_execution_end` -> `item.completed` (kind=tool_result, output from `result.content`)
- If `isError=true`, set item status to failed.
- `agent_start`, `turn_start`, `turn_end`, `agent_end`, `auto_compaction`, `auto_retry`, `hook_error`:
- Map to `ItemKind::Status` with a label like `pi.agent_start`, `pi.auto_retry`, etc.
- Do not emit `session.ended` for these events.
- If event parsing fails, emit `agent.unparsed` (source=daemon, synthetic=true) and fail tests.
### 5) Shared RPC Server Integration
Files:
- `server/packages/sandbox-agent/src/router.rs`
Tasks:
- Add a new managed stdio server type for Pi, similar to Codex:
- Create `PiServer` struct with:
- stdin sender
- pending request map keyed by request id
- per-session native session id mapping
- Extend `ManagedServerKind` to include Pi.
- Add `ensure_pi_server()` and `spawn_pi_server()` using `pi --mode rpc`.
- Add a `handle_pi_server_output()` loop to parse stdout lines into events/responses.
- Session creation:
- On `create_session`, ensure Pi server is running, send `new_session`, store sessionId.
- Register session with `server_manager.register_session` for native mapping.
- Sending messages:
- Use `prompt` command; include sessionId and optional images.
- Emit synthetic `item.started` only if Pi does not emit `message_start`.
### 6) Router + Streaming Path Changes
Files:
- `server/packages/sandbox-agent/src/router.rs`
Tasks:
- Add Pi handling to:
- `create_session` (new_session)
- `send_message` (prompt)
- `parse_agent_line` (Pi event conversion)
- `agent_modes` (default to `default` unless Pi exposes a mode list)
- `agent_supports_resume` (true if Pi supports session resume)
### 7) Tests
Files:
- `server/packages/sandbox-agent/tests/...`
- `server/packages/universal-agent-schema/tests/...` (if present)
Tasks:
- Unit tests for conversion:
- `message_start/update/end` -> item.started/delta/completed
- `tool_execution_*` -> tool call/result mapping with partialResult delta
- failure -> agent.unparsed
- Integration tests:
- Start Pi RPC server, create session, send prompt, stream events.
- Validate `native_session_id` mapping and event ordering.
- Update HTTP/SSE test coverage to include Pi agent if relevant.
## Risk Areas / Edge Cases
- `tool_execution_update.partialResult` is cumulative; must compute deltas.
- `message_update` may emit `done`/`error` without `message_end`; handle both paths.
- No native session lifecycle events; rely on daemon synthetic events.
- Session recovery after RPC server restart requires `get_state` + re-register sessions.
## Acceptance Criteria
- Pi appears in `/v1/agents`, CLI list, and docs.
- `create_session` returns `native_session_id` from Pi `new_session`.
- Streaming prompt yields universal events with proper ordering:
- message -> item.started/delta/completed
- tool execution -> tool call + tool result
- Tests pass and no synthetic data is used in test fixtures.
## Sources
- https://upd.dev/badlogic/pi-mono/src/commit/d36e0ea07303d8a76d51b4a7bd5f0d6d3c490860/packages/coding-agent/docs/rpc.md
- https://buildwithpi.ai/pi-cli
- https://takopi.dev/docs/pi-cli/
- https://upd.dev/badlogic/pi-mono/releases

View file

@ -64,7 +64,7 @@ icon: "rocket"
docker run -p 2468:2468 \ docker run -p 2468:2468 \
-e ANTHROPIC_API_KEY="sk-ant-..." \ -e ANTHROPIC_API_KEY="sk-ant-..." \
-e OPENAI_API_KEY="sk-..." \ -e OPENAI_API_KEY="sk-..." \
rivetdev/sandbox-agent:0.3.1-full \ rivetdev/sandbox-agent:0.4.2-full \
server --no-token --host 0.0.0.0 --port 2468 server --no-token --host 0.0.0.0 --port 2468
``` ```
</Tab> </Tab>
@ -77,6 +77,9 @@ icon: "rocket"
<Accordion title="Testing without API keys"> <Accordion title="Testing without API keys">
Use the `mock` agent for SDK and integration testing without provider credentials. Use the `mock` agent for SDK and integration testing without provider credentials.
</Accordion> </Accordion>
<Accordion title="Multi-tenant and per-user billing">
For per-tenant token tracking, budget enforcement, or usage-based billing, see [LLM Credentials](/llm-credentials) for gateway options like OpenRouter, LiteLLM, and Portkey.
</Accordion>
</AccordionGroup> </AccordionGroup>
</Step> </Step>
@ -86,7 +89,7 @@ icon: "rocket"
Install and run the binary directly. Install and run the binary directly.
```bash ```bash
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
sandbox-agent server --no-token --host 0.0.0.0 --port 2468 sandbox-agent server --no-token --host 0.0.0.0 --port 2468
``` ```
</Tab> </Tab>
@ -95,7 +98,7 @@ icon: "rocket"
Run without installing globally. Run without installing globally.
```bash ```bash
npx @sandbox-agent/cli@0.3.x server --no-token --host 0.0.0.0 --port 2468 npx @sandbox-agent/cli@0.4.x server --no-token --host 0.0.0.0 --port 2468
``` ```
</Tab> </Tab>
@ -103,7 +106,7 @@ icon: "rocket"
Run without installing globally. Run without installing globally.
```bash ```bash
bunx @sandbox-agent/cli@0.3.x server --no-token --host 0.0.0.0 --port 2468 bunx @sandbox-agent/cli@0.4.x server --no-token --host 0.0.0.0 --port 2468
``` ```
</Tab> </Tab>
@ -111,7 +114,7 @@ icon: "rocket"
Install globally, then run. Install globally, then run.
```bash ```bash
npm install -g @sandbox-agent/cli@0.3.x npm install -g @sandbox-agent/cli@0.4.x
sandbox-agent server --no-token --host 0.0.0.0 --port 2468 sandbox-agent server --no-token --host 0.0.0.0 --port 2468
``` ```
</Tab> </Tab>
@ -120,7 +123,7 @@ icon: "rocket"
Install globally, then run. Install globally, then run.
```bash ```bash
bun add -g @sandbox-agent/cli@0.3.x bun add -g @sandbox-agent/cli@0.4.x
# Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). # Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()).
bun pm -g trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 bun pm -g trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
sandbox-agent server --no-token --host 0.0.0.0 --port 2468 sandbox-agent server --no-token --host 0.0.0.0 --port 2468
@ -131,7 +134,7 @@ icon: "rocket"
For local development, use `SandboxAgent.start()` to spawn and manage the server as a subprocess. For local development, use `SandboxAgent.start()` to spawn and manage the server as a subprocess.
```bash ```bash
npm install sandbox-agent@0.3.x npm install sandbox-agent@0.4.x
``` ```
```typescript ```typescript
@ -145,7 +148,7 @@ icon: "rocket"
For local development, use `SandboxAgent.start()` to spawn and manage the server as a subprocess. For local development, use `SandboxAgent.start()` to spawn and manage the server as a subprocess.
```bash ```bash
bun add sandbox-agent@0.3.x bun add sandbox-agent@0.4.x
# Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). # Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()).
bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
``` ```
@ -223,6 +226,16 @@ icon: "rocket"
If agents are not installed up front, they are lazily installed when creating a session. If agents are not installed up front, they are lazily installed when creating a session.
</Step> </Step>
<Step title="Install desktop dependencies (optional, Linux only)">
If you want to use `/v1/desktop/*`, install the desktop runtime packages first:
```bash
sandbox-agent install desktop --yes
```
Then use `GET /v1/desktop/status` or `sdk.getDesktopStatus()` to verify the runtime is ready before calling desktop screenshot or input APIs.
</Step>
<Step title="Create a session"> <Step title="Create a session">
```typescript ```typescript
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";

View file

@ -12,11 +12,12 @@ Current exports:
- `ProcessTerminal` for attaching to a running tty process - `ProcessTerminal` for attaching to a running tty process
- `AgentTranscript` for rendering session/message timelines without bundling any styles - `AgentTranscript` for rendering session/message timelines without bundling any styles
- `ChatComposer` for a reusable prompt input/send surface - `ChatComposer` for a reusable prompt input/send surface
- `useTranscriptVirtualizer` for wiring large transcript lists to a scroll container
## Install ## Install
```bash ```bash
npm install @sandbox-agent/react@0.3.x npm install @sandbox-agent/react@0.4.x
``` ```
## Full example ## Full example
@ -184,11 +185,20 @@ Useful props:
- `className`: root class hook - `className`: root class hook
- `classNames`: slot-level class hooks for styling from outside the package - `classNames`: slot-level class hooks for styling from outside the package
- `scrollRef` + `virtualize`: opt into TanStack Virtual against an external scroll container
- `renderMessageText`: custom text or markdown renderer - `renderMessageText`: custom text or markdown renderer
- `renderToolItemIcon`, `renderToolGroupIcon`, `renderChevron`, `renderEventLinkContent`: presentation overrides - `renderToolItemIcon`, `renderToolGroupIcon`, `renderChevron`, `renderEventLinkContent`: presentation overrides
- `renderInlinePendingIndicator`, `renderThinkingState`: loading/thinking UI overrides - `renderInlinePendingIndicator`, `renderThinkingState`: loading/thinking UI overrides
- `isDividerEntry`, `canOpenEvent`, `getToolGroupSummary`: behavior overrides for grouping and labels - `isDividerEntry`, `canOpenEvent`, `getToolGroupSummary`: behavior overrides for grouping and labels
## Transcript virtualization hook
`useTranscriptVirtualizer` exposes the same TanStack Virtual behavior used by `AgentTranscript` when `virtualize` is enabled.
- Pass the grouped transcript rows you want to virtualize
- Pass a `scrollRef` that points at the actual scrollable element
- Use it when you need transcript-aware virtualization outside the stock `AgentTranscript` renderer
## Composer and conversation ## Composer and conversation
`ChatComposer` is the headless message input. `AgentConversation` composes `AgentTranscript` and `ChatComposer` so apps can reuse the transcript/composer pairing without pulling in Inspector session chrome. `ChatComposer` is the headless message input. `AgentConversation` composes `AgentTranscript` and `ChatComposer` so apps can reuse the transcript/composer pairing without pulling in Inspector session chrome.

View file

@ -11,28 +11,22 @@ The TypeScript SDK is centered on `sandbox-agent` and its `SandboxAgent` class.
<Tabs> <Tabs>
<Tab title="npm"> <Tab title="npm">
```bash ```bash
npm install sandbox-agent@0.3.x npm install sandbox-agent@0.4.x
``` ```
</Tab> </Tab>
<Tab title="bun"> <Tab title="bun">
```bash ```bash
bun add sandbox-agent@0.3.x bun add sandbox-agent@0.4.x
# Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). # Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()).
bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
``` ```
</Tab> </Tab>
</Tabs> </Tabs>
## Optional persistence drivers
```bash
npm install @sandbox-agent/persist-indexeddb@0.3.x @sandbox-agent/persist-sqlite@0.3.x @sandbox-agent/persist-postgres@0.3.x
```
## Optional React components ## Optional React components
```bash ```bash
npm install @sandbox-agent/react@0.3.x npm install @sandbox-agent/react@0.4.x
``` ```
## Create a client ## Create a client
@ -68,15 +62,12 @@ const sdk = await SandboxAgent.connect({
controller.abort(); controller.abort();
``` ```
With persistence: With persistence (see [Persisting Sessions](/session-persistence) for driver options):
```ts ```ts
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent, InMemorySessionPersistDriver } from "sandbox-agent";
import { SQLiteSessionPersistDriver } from "@sandbox-agent/persist-sqlite";
const persist = new SQLiteSessionPersistDriver({ const persist = new InMemorySessionPersistDriver();
filename: "./sessions.db",
});
const sdk = await SandboxAgent.connect({ const sdk = await SandboxAgent.connect({
baseUrl: "http://127.0.0.1:2468", baseUrl: "http://127.0.0.1:2468",
@ -84,25 +75,40 @@ const sdk = await SandboxAgent.connect({
}); });
``` ```
Local autospawn (Node.js only): Local spawn with a sandbox provider:
```ts ```ts
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { local } from "sandbox-agent/local";
const localSdk = await SandboxAgent.start(); const sdk = await SandboxAgent.start({
sandbox: local(),
});
await localSdk.dispose(); // sdk.sandboxId — prefixed provider ID (e.g. "local/127.0.0.1:2468")
await sdk.destroySandbox(); // provider-defined cleanup + disposes client
``` ```
`SandboxAgent.start(...)` requires a `sandbox` provider. Built-in providers:
| Import | Provider |
|--------|----------|
| `sandbox-agent/local` | Local subprocess |
| `sandbox-agent/docker` | Docker container |
| `sandbox-agent/e2b` | E2B sandbox |
| `sandbox-agent/daytona` | Daytona workspace |
| `sandbox-agent/vercel` | Vercel Sandbox |
| `sandbox-agent/cloudflare` | Cloudflare Sandbox |
Use `sdk.dispose()` to disconnect without changing sandbox state, `sdk.pauseSandbox()` for graceful suspension when supported, or `sdk.killSandbox()` for permanent deletion.
## Session flow ## Session flow
```ts ```ts
const session = await sdk.createSession({ const session = await sdk.createSession({
agent: "mock", agent: "mock",
sessionInit: { cwd: "/",
cwd: "/",
mcpServers: [],
},
}); });
const prompt = await session.prompt([ const prompt = await session.prompt([
@ -190,6 +196,44 @@ const writeResult = await sdk.writeFsFile({ path: "./hello.txt" }, "hello");
console.log(health.status, agents.agents.length, entries.length, writeResult.path); console.log(health.status, agents.agents.length, entries.length, writeResult.path);
``` ```
## Desktop API
The SDK also wraps the desktop host/runtime HTTP API.
Install desktop dependencies first on Linux hosts:
```bash
sandbox-agent install desktop --yes
```
Then query status, surface remediation if needed, and start the runtime:
```ts
const status = await sdk.getDesktopStatus();
if (status.state === "install_required") {
console.log(status.installCommand);
}
const started = await sdk.startDesktop({
width: 1440,
height: 900,
dpi: 96,
});
const screenshot = await sdk.takeDesktopScreenshot();
const displayInfo = await sdk.getDesktopDisplayInfo();
await sdk.moveDesktopMouse({ x: 400, y: 300 });
await sdk.clickDesktop({ x: 400, y: 300, button: "left", clickCount: 1 });
await sdk.typeDesktopText({ text: "hello world", delayMs: 10 });
await sdk.pressDesktopKey({ key: "ctrl+l" });
await sdk.stopDesktop();
```
Screenshot helpers return `Uint8Array` PNG bytes. The SDK does not attempt to install OS packages remotely; callers should surface `missingDependencies` and `installCommand` from `getDesktopStatus()`.
## Error handling ## Error handling
```ts ```ts
@ -223,5 +267,10 @@ Parameters:
- `token` (optional): Bearer token for authenticated servers - `token` (optional): Bearer token for authenticated servers
- `headers` (optional): Additional request headers - `headers` (optional): Additional request headers
- `fetch` (optional): Custom fetch implementation used by SDK HTTP and session calls - `fetch` (optional): Custom fetch implementation used by SDK HTTP and session calls
- `skipHealthCheck` (optional): set `true` to skip the startup `/v1/health` wait
- `waitForHealth` (optional, defaults to enabled): waits for `/v1/health` before HTTP helpers and session setup proceed; pass `false` to disable or `{ timeoutMs }` to bound the wait - `waitForHealth` (optional, defaults to enabled): waits for `/v1/health` before HTTP helpers and session setup proceed; pass `false` to disable or `{ timeoutMs }` to bound the wait
- `signal` (optional): aborts the startup `/v1/health` wait used by `connect()` - `signal` (optional): aborts the startup `/v1/health` wait used by `connect()`
## LLM credentials
Sandbox Agent supports personal API keys, shared organization keys, and per-tenant gateway keys with budget enforcement. See [LLM Credentials](/llm-credentials) for setup details.

View file

@ -4,7 +4,7 @@ description: "Backend-first auth and access control patterns."
icon: "shield" icon: "shield"
--- ---
As covered in [Architecture](/architecture), run the Sandbox Agent client on your backend, not in the browser. As covered in [Orchestration Architecture](/orchestration-architecture), run the Sandbox Agent client on your backend, not in the browser.
This keeps sandbox credentials private and gives you one place for authz, rate limiting, and audit logging. This keeps sandbox credentials private and gives you one place for authz, rate limiting, and audit logging.
@ -92,7 +92,7 @@ export const workspace = actor({
const session = await sdk.createSession({ const session = await sdk.createSession({
agent: "claude", agent: "claude",
sessionInit: { cwd: "/workspace" }, cwd: "/workspace",
}); });
session.onEvent((event) => { session.onEvent((event) => {

View file

@ -10,14 +10,22 @@ With persistence enabled, sessions can be restored after runtime/session loss. S
Each driver stores: Each driver stores:
- `SessionRecord` (`id`, `agent`, `agentSessionId`, `lastConnectionId`, `createdAt`, optional `destroyedAt`, optional `sessionInit`) - `SessionRecord` (`id`, `agent`, `agentSessionId`, `lastConnectionId`, `createdAt`, optional `destroyedAt`, optional `sandboxId`, optional `sessionInit`, optional `configOptions`, optional `modes`)
- `SessionEvent` (`id`, `eventIndex`, `sessionId`, `connectionId`, `sender`, `payload`, `createdAt`) - `SessionEvent` (`id`, `eventIndex`, `sessionId`, `connectionId`, `sender`, `payload`, `createdAt`)
## Persistence drivers ## Persistence drivers
### In-memory ### Rivet
Best for local dev and ephemeral workloads. Recommended for sandbox orchestration with actor state. See [Multiplayer](/multiplayer) for a full Rivet actor example with persistence in actor state.
### IndexedDB (browser)
Best for browser apps that should survive reloads. See the [Inspector source](https://github.com/rivet-dev/sandbox-agent/tree/main/frontend/packages/inspector/src/persist-indexeddb.ts) for a complete IndexedDB driver you can copy into your project.
### In-memory (built-in)
Best for local dev and ephemeral workloads. No extra dependencies required.
```ts ```ts
import { InMemorySessionPersistDriver, SandboxAgent } from "sandbox-agent"; import { InMemorySessionPersistDriver, SandboxAgent } from "sandbox-agent";
@ -33,91 +41,17 @@ const sdk = await SandboxAgent.connect({
}); });
``` ```
### Rivet
Recommended for sandbox orchestration with actor state.
```bash
npm install @sandbox-agent/persist-rivet@0.3.x
```
```ts
import { actor } from "rivetkit";
import { SandboxAgent } from "sandbox-agent";
import { RivetSessionPersistDriver, type RivetPersistState } from "@sandbox-agent/persist-rivet";
type PersistedState = RivetPersistState & {
sandboxId: string;
baseUrl: string;
};
export default actor({
createState: async () => {
return {
sandboxId: "sbx_123",
baseUrl: "http://127.0.0.1:2468",
} satisfies Partial<PersistedState>;
},
createVars: async (c) => {
const persist = new RivetSessionPersistDriver(c);
const sdk = await SandboxAgent.connect({
baseUrl: c.state.baseUrl,
persist,
});
const session = await sdk.resumeOrCreateSession({ id: "default", agent: "codex" });
const unsubscribe = session.onEvent((event) => {
c.broadcast("session.event", event);
});
return { sdk, session, unsubscribe };
},
actions: {
sendMessage: async (c, message: string) => {
await c.vars.session.prompt([{ type: "text", text: message }]);
},
},
onSleep: async (c) => {
c.vars.unsubscribe?.();
await c.vars.sdk.dispose();
},
});
```
### IndexedDB
Best for browser apps that should survive reloads.
```bash
npm install @sandbox-agent/persist-indexeddb@0.3.x
```
```ts
import { SandboxAgent } from "sandbox-agent";
import { IndexedDbSessionPersistDriver } from "@sandbox-agent/persist-indexeddb";
const persist = new IndexedDbSessionPersistDriver({
databaseName: "sandbox-agent-session-store",
});
const sdk = await SandboxAgent.connect({
baseUrl: "http://127.0.0.1:2468",
persist,
});
```
### SQLite ### SQLite
Best for local/server Node apps that need durable storage without a DB server. Best for local/server Node apps that need durable storage without a DB server.
```bash ```bash
npm install @sandbox-agent/persist-sqlite@0.3.x npm install better-sqlite3
``` ```
```ts ```ts
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { SQLiteSessionPersistDriver } from "@sandbox-agent/persist-sqlite"; import { SQLiteSessionPersistDriver } from "./persist.ts";
const persist = new SQLiteSessionPersistDriver({ const persist = new SQLiteSessionPersistDriver({
filename: "./sandbox-agent.db", filename: "./sandbox-agent.db",
@ -129,17 +63,19 @@ const sdk = await SandboxAgent.connect({
}); });
``` ```
See the [full SQLite example](https://github.com/rivet-dev/sandbox-agent/tree/main/examples/persist-sqlite) for the complete driver implementation you can copy into your project.
### Postgres ### Postgres
Use when you already run Postgres and want shared relational storage. Use when you already run Postgres and want shared relational storage.
```bash ```bash
npm install @sandbox-agent/persist-postgres@0.3.x npm install pg
``` ```
```ts ```ts
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { PostgresSessionPersistDriver } from "@sandbox-agent/persist-postgres"; import { PostgresSessionPersistDriver } from "./persist.ts";
const persist = new PostgresSessionPersistDriver({ const persist = new PostgresSessionPersistDriver({
connectionString: process.env.DATABASE_URL, connectionString: process.env.DATABASE_URL,
@ -152,6 +88,8 @@ const sdk = await SandboxAgent.connect({
}); });
``` ```
See the [full Postgres example](https://github.com/rivet-dev/sandbox-agent/tree/main/examples/persist-postgres) for the complete driver implementation you can copy into your project.
### Custom driver ### Custom driver
Implement `SessionPersistDriver` for custom backends. Implement `SessionPersistDriver` for custom backends.
@ -160,11 +98,11 @@ Implement `SessionPersistDriver` for custom backends.
import type { SessionPersistDriver } from "sandbox-agent"; import type { SessionPersistDriver } from "sandbox-agent";
class MyDriver implements SessionPersistDriver { class MyDriver implements SessionPersistDriver {
async getSession(id) { return null; } async getSession(id) { return undefined; }
async listSessions(request) { return { items: [] }; } async listSessions(request) { return { items: [] }; }
async updateSession(session) {} async updateSession(session) {}
async listEvents(request) { return { items: [] }; } async listEvents(request) { return { items: [] }; }
async insertEvent(event) {} async insertEvent(sessionId, event) {}
} }
``` ```

View file

@ -1,388 +0,0 @@
---
title: "Session Transcript Schema"
description: "Universal event schema for session transcripts across all agents."
---
Each coding agent outputs events in its own native format. The sandbox-agent converts these into a universal event schema, giving you a consistent session transcript regardless of which agent you use.
The schema is defined in [OpenAPI format](https://github.com/rivet-dev/sandbox-agent/blob/main/docs/openapi.json). See the [HTTP API Reference](/api-reference) for endpoint documentation.
## Coverage Matrix
This table shows which agent feature coverage appears in the universal event stream. All agents retain their full native feature coverage—this only reflects what's normalized into the schema.
| Feature | Claude | Codex | OpenCode | Amp | Pi (RPC) |
|--------------------|:------:|:-----:|:------------:|:------------:|:------------:|
| Stability | Stable | Stable| Experimental | Experimental | Experimental |
| Text Messages | ✓ | ✓ | ✓ | ✓ | ✓ |
| Tool Calls | ✓ | ✓ | ✓ | ✓ | ✓ |
| Tool Results | ✓ | ✓ | ✓ | ✓ | ✓ |
| Questions (HITL) | ✓ | | ✓ | | |
| Permissions (HITL) | ✓ | ✓ | ✓ | - | |
| Images | - | ✓ | ✓ | - | ✓ |
| File Attachments | - | ✓ | ✓ | - | |
| Session Lifecycle | - | ✓ | ✓ | - | |
| Error Events | - | ✓ | ✓ | ✓ | ✓ |
| Reasoning/Thinking | - | ✓ | - | - | ✓ |
| Command Execution | - | ✓ | - | - | |
| File Changes | - | ✓ | - | - | |
| MCP Tools | ✓ | ✓ | ✓ | ✓ | |
| Streaming Deltas | ✓ | ✓ | ✓ | - | ✓ |
| Variants | | ✓ | ✓ | ✓ | ✓ |
Agents: [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) · [Codex](https://github.com/openai/codex) · [OpenCode](https://github.com/opencode-ai/opencode) · [Amp](https://ampcode.com) · [Pi](https://buildwithpi.ai/pi-cli)
- ✓ = Appears in session events
- \- = Agent supports natively, schema conversion coming soon
- (blank) = Not supported by agent
- Pi runtime model is router-managed per-session RPC (`pi --mode rpc`); it does not use generic subprocess streaming.
<AccordionGroup>
<Accordion title="Text Messages">
Basic message exchange between user and assistant.
</Accordion>
<Accordion title="Tool Calls & Results">
Visibility into tool invocations (file reads, command execution, etc.) and their results. When not natively supported, tool activity is embedded in message content.
</Accordion>
<Accordion title="Questions (HITL)">
Interactive questions the agent asks the user. Emits `question.requested` and `question.resolved` events.
</Accordion>
<Accordion title="Permissions (HITL)">
Permission requests for sensitive operations. Emits `permission.requested` and `permission.resolved` events.
</Accordion>
<Accordion title="Images">
Support for image attachments in messages.
</Accordion>
<Accordion title="File Attachments">
Support for file attachments in messages.
</Accordion>
<Accordion title="Session Lifecycle">
Native `session.started` and `session.ended` events. When not supported, the daemon emits synthetic lifecycle events.
</Accordion>
<Accordion title="Error Events">
Structured error events for runtime failures.
</Accordion>
<Accordion title="Reasoning/Thinking">
Extended thinking or reasoning content with visibility controls.
</Accordion>
<Accordion title="Command Execution">
Detailed command execution events with stdout/stderr.
</Accordion>
<Accordion title="File Changes">
Structured file modification events with diffs.
</Accordion>
<Accordion title="MCP Tools">
Model Context Protocol tool support.
</Accordion>
<Accordion title="Streaming Deltas">
Native streaming of content deltas. When not supported, the daemon emits a single synthetic delta before `item.completed`.
</Accordion>
<Accordion title="Variants">
Model variants such as reasoning effort or depth. Agents may expose different variant sets per model.
</Accordion>
</AccordionGroup>
Want support for another agent? [Open an issue](https://github.com/rivet-dev/sandbox-agent/issues/new) to request it.
## UniversalEvent
Every event from the API is wrapped in a `UniversalEvent` envelope.
| Field | Type | Description |
|-------|------|-------------|
| `event_id` | string | Unique identifier for this event |
| `sequence` | integer | Monotonic sequence number within the session (starts at 1) |
| `time` | string | RFC3339 timestamp |
| `session_id` | string | Daemon-generated session identifier |
| `native_session_id` | string? | Provider-native session/thread identifier (e.g., Codex `threadId`, OpenCode `sessionID`) |
| `source` | string | Event origin: `agent` (native) or `daemon` (synthetic) |
| `synthetic` | boolean | Whether this event was generated by the daemon to fill gaps |
| `type` | string | Event type (see [Event Types](#event-types)) |
| `data` | object | Event-specific payload |
| `raw` | any? | Original provider payload (only when `include_raw=true`) |
```json
{
"event_id": "evt_abc123",
"sequence": 1,
"time": "2025-01-28T12:00:00Z",
"session_id": "my-session",
"native_session_id": "thread_xyz",
"source": "agent",
"synthetic": false,
"type": "item.completed",
"data": { ... }
}
```
## Event Types
### Session Lifecycle
| Type | Description | Data |
|------|-------------|------|
| `session.started` | Session has started | `{ metadata?: any }` |
| `session.ended` | Session has ended | `{ reason, terminated_by, message?, exit_code? }` |
### Turn Lifecycle
| Type | Description | Data |
|------|-------------|------|
| `turn.started` | Turn has started | `{ phase: "started", turn_id?, metadata? }` |
| `turn.ended` | Turn has ended | `{ phase: "ended", turn_id?, metadata? }` |
**SessionEndedData**
| Field | Type | Values |
|-------|------|--------|
| `reason` | string | `completed`, `error`, `terminated` |
| `terminated_by` | string | `agent`, `daemon` |
| `message` | string? | Error message (only present when reason is `error`) |
| `exit_code` | int? | Process exit code (only present when reason is `error`) |
| `stderr` | StderrOutput? | Structured stderr output (only present when reason is `error`) |
**StderrOutput**
| Field | Type | Description |
|-------|------|-------------|
| `head` | string? | First 20 lines of stderr (if truncated) or full stderr (if not truncated) |
| `tail` | string? | Last 50 lines of stderr (only present if truncated) |
| `truncated` | boolean | Whether the output was truncated |
| `total_lines` | int? | Total number of lines in stderr |
### Item Lifecycle
| Type | Description | Data |
|------|-------------|------|
| `item.started` | Item creation | `{ item }` |
| `item.delta` | Streaming content delta | `{ item_id, native_item_id?, delta }` |
| `item.completed` | Item finalized | `{ item }` |
Items follow a consistent lifecycle: `item.started` → `item.delta` (0 or more) → `item.completed`.
### HITL (Human-in-the-Loop)
| Type | Description | Data |
|------|-------------|------|
| `permission.requested` | Permission request pending | `{ permission_id, action, status, metadata? }` |
| `permission.resolved` | Permission decision recorded | `{ permission_id, action, status, metadata? }` |
| `question.requested` | Question pending user input | `{ question_id, prompt, options, status }` |
| `question.resolved` | Question answered or rejected | `{ question_id, prompt, options, status, response? }` |
**PermissionEventData**
| Field | Type | Description |
|-------|------|-------------|
| `permission_id` | string | Identifier for the permission request |
| `action` | string | What the agent wants to do |
| `status` | string | `requested`, `accept`, `accept_for_session`, `reject` |
| `metadata` | any? | Additional context |
**QuestionEventData**
| Field | Type | Description |
|-------|------|-------------|
| `question_id` | string | Identifier for the question |
| `prompt` | string | Question text |
| `options` | string[] | Available answer options |
| `status` | string | `requested`, `answered`, `rejected` |
| `response` | string? | Selected answer (when resolved) |
### Errors
| Type | Description | Data |
|------|-------------|------|
| `error` | Runtime error | `{ message, code?, details? }` |
| `agent.unparsed` | Parse failure | `{ error, location, raw_hash? }` |
The `agent.unparsed` event indicates the daemon failed to parse an agent payload. This should be treated as a bug.
## UniversalItem
Items represent discrete units of content within a session.
| Field | Type | Description |
|-------|------|-------------|
| `item_id` | string | Daemon-generated identifier |
| `native_item_id` | string? | Provider-native item/message identifier |
| `parent_id` | string? | Parent item ID (e.g., tool call/result parented to a message) |
| `kind` | string | Item category (see below) |
| `role` | string? | Actor role for message items |
| `status` | string | Lifecycle status |
| `content` | ContentPart[] | Ordered list of content parts |
### ItemKind
| Value | Description |
|-------|-------------|
| `message` | User or assistant message |
| `tool_call` | Tool invocation |
| `tool_result` | Tool execution result |
| `system` | System message |
| `status` | Status update |
| `unknown` | Unrecognized item type |
### ItemRole
| Value | Description |
|-------|-------------|
| `user` | User message |
| `assistant` | Assistant response |
| `system` | System prompt |
| `tool` | Tool-related message |
### ItemStatus
| Value | Description |
|-------|-------------|
| `in_progress` | Item is streaming or pending |
| `completed` | Item is finalized |
| `failed` | Item execution failed |
## Content Parts
The `content` array contains typed parts that make up an item's payload.
### text
Plain text content.
```json
{ "type": "text", "text": "Hello, world!" }
```
### json
Structured JSON content.
```json
{ "type": "json", "json": { "key": "value" } }
```
### tool_call
Tool invocation.
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Tool name |
| `arguments` | string | JSON-encoded arguments |
| `call_id` | string | Unique call identifier |
```json
{
"type": "tool_call",
"name": "read_file",
"arguments": "{\"path\": \"/src/main.ts\"}",
"call_id": "call_abc123"
}
```
### tool_result
Tool execution result.
| Field | Type | Description |
|-------|------|-------------|
| `call_id` | string | Matching call identifier |
| `output` | string | Tool output |
```json
{
"type": "tool_result",
"call_id": "call_abc123",
"output": "File contents here..."
}
```
### file_ref
File reference with optional diff.
| Field | Type | Description |
|-------|------|-------------|
| `path` | string | File path |
| `action` | string | `read`, `write`, `patch` |
| `diff` | string? | Unified diff (for patches) |
```json
{
"type": "file_ref",
"path": "/src/main.ts",
"action": "write",
"diff": "@@ -1,3 +1,4 @@\n+import { foo } from 'bar';"
}
```
### image
Image reference.
| Field | Type | Description |
|-------|------|-------------|
| `path` | string | Image file path |
| `mime` | string? | MIME type |
```json
{ "type": "image", "path": "/tmp/screenshot.png", "mime": "image/png" }
```
### reasoning
Model reasoning/thinking content.
| Field | Type | Description |
|-------|------|-------------|
| `text` | string | Reasoning text |
| `visibility` | string | `public` or `private` |
```json
{ "type": "reasoning", "text": "Let me think about this...", "visibility": "public" }
```
### status
Status indicator.
| Field | Type | Description |
|-------|------|-------------|
| `label` | string | Status label |
| `detail` | string? | Additional detail |
```json
{ "type": "status", "label": "Running tests", "detail": "3 of 10 passed" }
```
## Source & Synthetics
### EventSource
The `source` field indicates who emitted the event:
| Value | Description |
|-------|-------------|
| `agent` | Native event from the agent |
| `daemon` | Synthetic event generated by the daemon |
### Synthetic Events
The daemon emits synthetic events (`synthetic: true`, `source: "daemon"`) to provide a consistent event stream across all agents. Common synthetics:
| Synthetic | When |
|-----------|------|
| `session.started` | Agent doesn't emit explicit session start |
| `session.ended` | Agent doesn't emit explicit session end |
| `turn.started` | Agent doesn't emit explicit turn start |
| `turn.ended` | Agent doesn't emit explicit turn end |
| `item.started` | Agent doesn't emit item start events |
| `item.delta` | Agent doesn't stream deltas natively |
| `question.*` | Claude Code plan mode (from ExitPlanMode tool) |
### Raw Payloads
Pass `include_raw=true` to event endpoints to receive the original agent payload in the `raw` field. Useful for debugging or accessing agent-specific data not in the universal schema.
```typescript
const events = await client.getEvents("my-session", { includeRaw: true });
// events[0].raw contains the original agent payload
```

View file

@ -35,9 +35,7 @@ await sdk.setSkillsConfig(
// Create a session using the configured skills // Create a session using the configured skills
const session = await sdk.createSession({ const session = await sdk.createSession({
agent: "claude", agent: "claude",
sessionInit: { cwd: "/workspace",
cwd: "/workspace",
},
}); });
await session.prompt([ await session.prompt([

View file

@ -20,7 +20,6 @@ body {
color: var(--sa-text); color: var(--sa-text);
} }
/*
a { a {
color: var(--sa-primary); color: var(--sa-primary);
} }
@ -41,6 +40,13 @@ select {
color: var(--sa-text); color: var(--sa-text);
} }
code,
pre {
background-color: var(--sa-card);
border: 1px solid var(--sa-border);
color: var(--sa-text);
}
.card, .card,
.mintlify-card, .mintlify-card,
.docs-card { .docs-card {
@ -64,4 +70,3 @@ select {
.alert-danger { .alert-danger {
border-color: var(--sa-danger); border-color: var(--sa-danger);
} }
*/

View file

@ -29,25 +29,6 @@ Verify the agent is installed:
ls -la ~/.local/share/sandbox-agent/bin/ ls -la ~/.local/share/sandbox-agent/bin/
``` ```
### 4. Binary libc mismatch (musl vs glibc)
Claude Code binaries are available in both musl and glibc variants. If you see errors like:
```
cannot execute: required file not found
Error loading shared library libstdc++.so.6: No such file or directory
```
This means the wrong binary variant was downloaded.
**For sandbox-agent 0.2.0+**: Platform detection is automatic. The correct binary (musl or glibc) is downloaded based on the runtime environment.
**For sandbox-agent 0.1.x**: Use Alpine Linux which has native musl support:
```dockerfile
FROM alpine:latest
RUN apk add --no-cache curl ca-certificates libstdc++ libgcc bash
```
## Daytona Network Restrictions ## Daytona Network Restrictions

View file

@ -1,5 +1,5 @@
FROM node:22-bookworm-slim FROM node:22-bookworm-slim
RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
RUN sandbox-agent install-agent claude RUN sandbox-agent install-agent claude
RUN sandbox-agent install-agent codex RUN sandbox-agent install-agent codex

View file

@ -1,5 +1,5 @@
FROM node:22-bookworm-slim FROM node:22-bookworm-slim
RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
RUN sandbox-agent install-agent claude RUN sandbox-agent install-agent claude
RUN sandbox-agent install-agent codex RUN sandbox-agent install-agent codex

View file

@ -25,7 +25,7 @@ const baseUrl = "http://localhost:3000";
console.log("Connecting to server..."); console.log("Connecting to server...");
const client = await SandboxAgent.connect({ baseUrl }); const client = await SandboxAgent.connect({ baseUrl });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/root", mcpServers: [] } }); const session = await client.createSession({ agent: detectAgent(), cwd: "/root" });
const sessionId = session.id; const sessionId = session.id;
console.log(` UI: ${buildInspectorUrl({ baseUrl, sessionId })}`); console.log(` UI: ${buildInspectorUrl({ baseUrl, sessionId })}`);

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -1,7 +1,7 @@
FROM cloudflare/sandbox:0.7.0 FROM cloudflare/sandbox:0.7.0
# Install sandbox-agent # Install sandbox-agent
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
# Pre-install agents # Pre-install agents
RUN sandbox-agent install-agent claude && \ RUN sandbox-agent install-agent claude && \

View file

@ -0,0 +1,154 @@
import { describe, it, expect } from "vitest";
import { spawn, type ChildProcess } from "node:child_process";
import { resolve, dirname } from "node:path";
import { fileURLToPath } from "node:url";
import { execSync } from "node:child_process";
const __dirname = dirname(fileURLToPath(import.meta.url));
const PROJECT_DIR = resolve(__dirname, "..");
/**
* Cloudflare Workers integration test.
*
* Set RUN_CLOUDFLARE_EXAMPLES=1 to enable. Requires wrangler and Docker.
*
* This starts `wrangler dev` which:
* 1. Builds the Dockerfile (cloudflare/sandbox base + sandbox-agent)
* 2. Starts a local Workers runtime with Durable Objects and containers
* 3. Exposes the app on a local port
*
* We then test through the proxy endpoint which forwards to sandbox-agent
* running inside the container.
*/
const shouldRun = process.env.RUN_CLOUDFLARE_EXAMPLES === "1";
const timeoutMs = Number.parseInt(process.env.SANDBOX_TEST_TIMEOUT_MS || "", 10) || 600_000;
const testFn = shouldRun ? it : it.skip;
interface WranglerDev {
baseUrl: string;
cleanup: () => void;
}
async function startWranglerDev(): Promise<WranglerDev> {
// Build frontend assets first (wrangler expects dist/ to exist)
execSync("npx vite build", { cwd: PROJECT_DIR, stdio: "pipe" });
return new Promise<WranglerDev>((resolve, reject) => {
const child: ChildProcess = spawn("npx", ["wrangler", "dev", "--port", "0"], {
cwd: PROJECT_DIR,
stdio: ["ignore", "pipe", "pipe"],
detached: true,
env: {
...process.env,
// Ensure wrangler picks up API keys to pass to the container
NODE_ENV: "development",
},
});
let stdout = "";
let stderr = "";
let resolved = false;
const cleanup = () => {
if (child.pid) {
// Kill process group to ensure wrangler and its children are cleaned up
try {
process.kill(-child.pid, "SIGTERM");
} catch {
try {
child.kill("SIGTERM");
} catch {}
}
}
};
const timer = setTimeout(() => {
if (!resolved) {
resolved = true;
cleanup();
reject(new Error(`wrangler dev did not start within 120s.\nstdout: ${stdout}\nstderr: ${stderr}`));
}
}, 120_000);
const onData = (chunk: Buffer) => {
const text = chunk.toString();
stdout += text;
// wrangler dev prints "Ready on http://localhost:XXXX" when ready
const match = stdout.match(/Ready on (https?:\/\/[^\s]+)/i) ?? stdout.match(/(https?:\/\/(?:localhost|127\.0\.0\.1):\d+)/);
if (match && !resolved) {
resolved = true;
clearTimeout(timer);
resolve({ baseUrl: match[1], cleanup });
}
};
child.stdout?.on("data", onData);
child.stderr?.on("data", (chunk: Buffer) => {
const text = chunk.toString();
stderr += text;
// Some wrangler versions print ready message to stderr
const match = text.match(/Ready on (https?:\/\/[^\s]+)/i) ?? text.match(/(https?:\/\/(?:localhost|127\.0\.0\.1):\d+)/);
if (match && !resolved) {
resolved = true;
clearTimeout(timer);
resolve({ baseUrl: match[1], cleanup });
}
});
child.on("error", (err) => {
if (!resolved) {
resolved = true;
clearTimeout(timer);
reject(new Error(`wrangler dev failed to start: ${err.message}`));
}
});
child.on("exit", (code) => {
if (!resolved) {
resolved = true;
clearTimeout(timer);
reject(new Error(`wrangler dev exited with code ${code}.\nstdout: ${stdout}\nstderr: ${stderr}`));
}
});
});
}
describe("cloudflare example", () => {
testFn(
"starts wrangler dev and sandbox-agent responds via proxy",
async () => {
const { baseUrl, cleanup } = await startWranglerDev();
try {
// The Cloudflare example proxies requests through /sandbox/:name/proxy/*
// Wait for the container inside the Durable Object to start sandbox-agent
const healthUrl = `${baseUrl}/sandbox/test/proxy/v1/health`;
let healthy = false;
for (let i = 0; i < 120; i++) {
try {
const res = await fetch(healthUrl);
if (res.ok) {
const data = await res.json();
// The proxied health endpoint returns {name: "Sandbox Agent", ...}
if (data.status === "ok" || data.name === "Sandbox Agent") {
healthy = true;
break;
}
}
} catch {}
await new Promise((r) => setTimeout(r, 2000));
}
expect(healthy).toBe(true);
// Confirm a second request also works
const response = await fetch(healthUrl);
expect(response.ok).toBe(true);
} finally {
cleanup();
}
},
timeoutMs,
);
});

View file

@ -2,7 +2,7 @@ import { defineConfig } from "vitest/config";
export default defineConfig({ export default defineConfig({
test: { test: {
root: ".",
include: ["tests/**/*.test.ts"], include: ["tests/**/*.test.ts"],
testTimeout: 60000,
}, },
}); });

View file

@ -3,7 +3,7 @@
"private": true, "private": true,
"type": "module", "type": "module",
"scripts": { "scripts": {
"start": "tsx src/computesdk.ts", "start": "tsx src/index.ts",
"typecheck": "tsc --noEmit" "typecheck": "tsc --noEmit"
}, },
"dependencies": { "dependencies": {

View file

@ -1,151 +0,0 @@
import {
compute,
detectProvider,
getMissingEnvVars,
getProviderConfigFromEnv,
isProviderAuthComplete,
isValidProvider,
PROVIDER_NAMES,
type ExplicitComputeConfig,
type ProviderName,
} from "computesdk";
import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared";
import { fileURLToPath } from "node:url";
import { resolve } from "node:path";
const PORT = 3000;
const REQUEST_TIMEOUT_MS = Number.parseInt(process.env.COMPUTESDK_TIMEOUT_MS || "", 10) || 120_000;
/**
* Detects and validates the provider to use.
* Priority: COMPUTESDK_PROVIDER env var > auto-detection from API keys
*/
function resolveProvider(): ProviderName {
const providerOverride = process.env.COMPUTESDK_PROVIDER;
if (providerOverride) {
if (!isValidProvider(providerOverride)) {
throw new Error(`Unsupported ComputeSDK provider "${providerOverride}". Supported providers: ${PROVIDER_NAMES.join(", ")}`);
}
if (!isProviderAuthComplete(providerOverride)) {
const missing = getMissingEnvVars(providerOverride);
throw new Error(`Missing credentials for provider "${providerOverride}". Set: ${missing.join(", ")}`);
}
console.log(`Using ComputeSDK provider: ${providerOverride} (explicit)`);
return providerOverride as ProviderName;
}
const detected = detectProvider();
if (!detected) {
throw new Error(`No provider credentials found. Set one of: ${PROVIDER_NAMES.map((p) => getMissingEnvVars(p).join(", ")).join(" | ")}`);
}
console.log(`Using ComputeSDK provider: ${detected} (auto-detected)`);
return detected as ProviderName;
}
function configureComputeSDK(): void {
const provider = resolveProvider();
const config: ExplicitComputeConfig = {
provider,
computesdkApiKey: process.env.COMPUTESDK_API_KEY,
requestTimeoutMs: REQUEST_TIMEOUT_MS,
};
const providerConfig = getProviderConfigFromEnv(provider);
if (Object.keys(providerConfig).length > 0) {
const configWithProvider = config as ExplicitComputeConfig & Record<ProviderName, Record<string, string>>;
configWithProvider[provider] = providerConfig;
}
compute.setConfig(config);
}
configureComputeSDK();
const buildEnv = (): Record<string, string> => {
const env: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) env.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) env.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
return env;
};
export async function setupComputeSdkSandboxAgent(): Promise<{
baseUrl: string;
cleanup: () => Promise<void>;
}> {
const env = buildEnv();
console.log("Creating ComputeSDK sandbox...");
const sandbox = await compute.sandbox.create({
envs: Object.keys(env).length > 0 ? env : undefined,
});
const run = async (cmd: string, options?: { background?: boolean }) => {
const result = await sandbox.runCommand(cmd, options);
if (typeof result?.exitCode === "number" && result.exitCode !== 0) {
throw new Error(`Command failed: ${cmd} (exit ${result.exitCode})\n${result.stderr || ""}`);
}
return result;
};
console.log("Installing sandbox-agent...");
await run("curl -fsSL https://releases.rivet.dev/sandbox-agent/latest/install.sh | sh");
if (env.ANTHROPIC_API_KEY) {
console.log("Installing Claude agent...");
await run("sandbox-agent install-agent claude");
}
if (env.OPENAI_API_KEY) {
console.log("Installing Codex agent...");
await run("sandbox-agent install-agent codex");
}
console.log("Starting server...");
await run(`sandbox-agent server --no-token --host 0.0.0.0 --port ${PORT}`, { background: true });
const baseUrl = await sandbox.getUrl({ port: PORT });
const cleanup = async () => {
try {
await sandbox.destroy();
} catch (error) {
console.warn("Cleanup failed:", error instanceof Error ? error.message : error);
}
};
return { baseUrl, cleanup };
}
export async function runComputeSdkExample(): Promise<void> {
const { baseUrl, cleanup } = await setupComputeSdkSandboxAgent();
const handleExit = async () => {
await cleanup();
process.exit(0);
};
process.once("SIGINT", handleExit);
process.once("SIGTERM", handleExit);
const client = await SandboxAgent.connect({ baseUrl });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/home", mcpServers: [] } });
const sessionId = session.id;
console.log(` UI: ${buildInspectorUrl({ baseUrl, sessionId })}`);
console.log(" Press Ctrl+C to stop.");
// Keep alive until SIGINT/SIGTERM triggers cleanup above
await new Promise(() => {});
}
const isDirectRun = Boolean(process.argv[1] && resolve(process.argv[1]) === fileURLToPath(import.meta.url));
if (isDirectRun) {
runComputeSdkExample().catch((error) => {
console.error(error instanceof Error ? error.message : error);
process.exit(1);
});
}

View file

@ -0,0 +1,30 @@
import { SandboxAgent } from "sandbox-agent";
import { computesdk } from "sandbox-agent/computesdk";
import { detectAgent } from "@sandbox-agent/example-shared";
const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const client = await SandboxAgent.start({
sandbox: computesdk({
create: { envs },
}),
});
console.log(`UI: ${client.inspectorUrl}`);
const session = await client.createSession({
agent: detectAgent(),
});
session.onEvent((event) => {
console.log(`[${event.sender}]`, JSON.stringify(event.payload));
});
session.prompt([{ type: "text", text: "Say hello from ComputeSDK in one sentence." }]);
process.once("SIGINT", async () => {
await client.destroySandbox();
process.exit(0);
});

View file

@ -1,6 +1,6 @@
import { describe, it, expect } from "vitest"; import { describe, it, expect } from "vitest";
import { buildHeaders } from "@sandbox-agent/example-shared"; import { SandboxAgent } from "sandbox-agent";
import { setupComputeSdkSandboxAgent } from "../src/computesdk.ts"; import { computesdk } from "sandbox-agent/computesdk";
const hasModal = Boolean(process.env.MODAL_TOKEN_ID && process.env.MODAL_TOKEN_SECRET); const hasModal = Boolean(process.env.MODAL_TOKEN_ID && process.env.MODAL_TOKEN_SECRET);
const hasVercel = Boolean(process.env.VERCEL_TOKEN || process.env.VERCEL_OIDC_TOKEN); const hasVercel = Boolean(process.env.VERCEL_TOKEN || process.env.VERCEL_OIDC_TOKEN);
@ -13,20 +13,23 @@ const timeoutMs = Number.parseInt(process.env.SANDBOX_TEST_TIMEOUT_MS || "", 10)
const testFn = shouldRun ? it : it.skip; const testFn = shouldRun ? it : it.skip;
describe("computesdk example", () => { describe("computesdk provider", () => {
testFn( testFn(
"starts sandbox-agent and responds to /v1/health", "starts sandbox-agent and responds to /v1/health",
async () => { async () => {
const { baseUrl, cleanup } = await setupComputeSdkSandboxAgent(); const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const sdk = await SandboxAgent.start({
sandbox: computesdk({ create: { envs } }),
});
try { try {
const response = await fetch(`${baseUrl}/v1/health`, { const health = await sdk.getHealth();
headers: buildHeaders({}), expect(health.status).toBe("ok");
});
expect(response.ok).toBe(true);
const data = await response.json();
expect(data.status).toBe("ok");
} finally { } finally {
await cleanup(); await sdk.destroySandbox();
} }
}, },
timeoutMs, timeoutMs,

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -0,0 +1,33 @@
import { SandboxAgent } from "sandbox-agent";
import { daytona } from "sandbox-agent/daytona";
function collectEnvVars(): Record<string, string> {
const envVars: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envVars.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envVars.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
return envVars;
}
function inspectorUrlToBaseUrl(inspectorUrl: string): string {
return inspectorUrl.replace(/\/ui\/$/, "");
}
export async function setupDaytonaSandboxAgent(): Promise<{
baseUrl: string;
token?: string;
extraHeaders?: Record<string, string>;
cleanup: () => Promise<void>;
}> {
const client = await SandboxAgent.start({
sandbox: daytona({
create: { envVars: collectEnvVars() },
}),
});
return {
baseUrl: inspectorUrlToBaseUrl(client.inspectorUrl),
cleanup: async () => {
await client.killSandbox();
},
};
}

View file

@ -1,42 +1,30 @@
import { Daytona } from "@daytonaio/sdk";
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared"; import { daytona } from "sandbox-agent/daytona";
import { detectAgent } from "@sandbox-agent/example-shared";
const daytona = new Daytona();
const envVars: Record<string, string> = {}; const envVars: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envVars.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.ANTHROPIC_API_KEY) envVars.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envVars.OPENAI_API_KEY = process.env.OPENAI_API_KEY; if (process.env.OPENAI_API_KEY) envVars.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
// Use default image and install sandbox-agent at runtime (faster startup, no snapshot build) const client = await SandboxAgent.start({
console.log("Creating Daytona sandbox..."); sandbox: daytona({
const sandbox = await daytona.create({ envVars, autoStopInterval: 0 }); create: { envVars },
}),
});
// Install sandbox-agent and start server console.log(`UI: ${client.inspectorUrl}`);
console.log("Installing sandbox-agent...");
await sandbox.process.executeCommand("curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh");
console.log("Installing agents..."); const session = await client.createSession({
await sandbox.process.executeCommand("sandbox-agent install-agent claude"); agent: detectAgent(),
await sandbox.process.executeCommand("sandbox-agent install-agent codex"); });
await sandbox.process.executeCommand("nohup sandbox-agent server --no-token --host 0.0.0.0 --port 3000 >/tmp/sandbox-agent.log 2>&1 &"); session.onEvent((event) => {
console.log(`[${event.sender}]`, JSON.stringify(event.payload));
});
const baseUrl = (await sandbox.getSignedPreviewUrl(3000, 4 * 60 * 60)).url; session.prompt([{ type: "text", text: "Say hello from Daytona in one sentence." }]);
console.log("Connecting to server..."); process.once("SIGINT", async () => {
const client = await SandboxAgent.connect({ baseUrl }); await client.destroySandbox();
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/home/daytona", mcpServers: [] } });
const sessionId = session.id;
console.log(` UI: ${buildInspectorUrl({ baseUrl, sessionId })}`);
console.log(" Press Ctrl+C to stop.");
const keepAlive = setInterval(() => {}, 60_000);
const cleanup = async () => {
clearInterval(keepAlive);
await sandbox.delete(60);
process.exit(0); process.exit(0);
}; });
process.once("SIGINT", cleanup);
process.once("SIGTERM", cleanup);

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -9,10 +9,10 @@
"dependencies": { "dependencies": {
"@sandbox-agent/example-shared": "workspace:*", "@sandbox-agent/example-shared": "workspace:*",
"dockerode": "latest", "dockerode": "latest",
"get-port": "latest",
"sandbox-agent": "workspace:*" "sandbox-agent": "workspace:*"
}, },
"devDependencies": { "devDependencies": {
"@types/dockerode": "latest",
"@types/node": "latest", "@types/node": "latest",
"tsx": "latest", "tsx": "latest",
"typescript": "latest", "typescript": "latest",

View file

@ -1,68 +1,40 @@
import Docker from "dockerode";
import fs from "node:fs"; import fs from "node:fs";
import path from "node:path"; import path from "node:path";
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared"; import { docker } from "sandbox-agent/docker";
import { detectAgent } from "@sandbox-agent/example-shared";
import { FULL_IMAGE } from "@sandbox-agent/example-shared/docker"; import { FULL_IMAGE } from "@sandbox-agent/example-shared/docker";
const IMAGE = FULL_IMAGE;
const PORT = 3000;
const agent = detectAgent();
const codexAuthPath = process.env.HOME ? path.join(process.env.HOME, ".codex", "auth.json") : null; const codexAuthPath = process.env.HOME ? path.join(process.env.HOME, ".codex", "auth.json") : null;
const bindMounts = codexAuthPath && fs.existsSync(codexAuthPath) ? [`${codexAuthPath}:/home/sandbox/.codex/auth.json:ro`] : []; const bindMounts = codexAuthPath && fs.existsSync(codexAuthPath) ? [`${codexAuthPath}:/home/sandbox/.codex/auth.json:ro`] : [];
const env = [
process.env.ANTHROPIC_API_KEY ? `ANTHROPIC_API_KEY=${process.env.ANTHROPIC_API_KEY}` : "",
process.env.OPENAI_API_KEY ? `OPENAI_API_KEY=${process.env.OPENAI_API_KEY}` : "",
process.env.CODEX_API_KEY ? `CODEX_API_KEY=${process.env.CODEX_API_KEY}` : "",
].filter(Boolean);
const docker = new Docker({ socketPath: "/var/run/docker.sock" }); const client = await SandboxAgent.start({
sandbox: docker({
// Pull image if needed image: FULL_IMAGE,
try { env,
await docker.getImage(IMAGE).inspect(); binds: bindMounts,
} catch { }),
console.log(`Pulling ${IMAGE}...`);
await new Promise<void>((resolve, reject) => {
docker.pull(IMAGE, (err: Error | null, stream: NodeJS.ReadableStream) => {
if (err) return reject(err);
docker.modem.followProgress(stream, (err: Error | null) => (err ? reject(err) : resolve()));
});
});
}
console.log("Starting container...");
const container = await docker.createContainer({
Image: IMAGE,
Cmd: ["server", "--no-token", "--host", "0.0.0.0", "--port", `${PORT}`],
Env: [
process.env.ANTHROPIC_API_KEY ? `ANTHROPIC_API_KEY=${process.env.ANTHROPIC_API_KEY}` : "",
process.env.OPENAI_API_KEY ? `OPENAI_API_KEY=${process.env.OPENAI_API_KEY}` : "",
process.env.CODEX_API_KEY ? `CODEX_API_KEY=${process.env.CODEX_API_KEY}` : "",
].filter(Boolean),
ExposedPorts: { [`${PORT}/tcp`]: {} },
HostConfig: {
AutoRemove: true,
PortBindings: { [`${PORT}/tcp`]: [{ HostPort: `${PORT}` }] },
Binds: bindMounts,
},
}); });
await container.start();
const baseUrl = `http://127.0.0.1:${PORT}`; console.log(`UI: ${client.inspectorUrl}`);
const client = await SandboxAgent.connect({ baseUrl }); const session = await client.createSession({
const session = await client.createSession({ agent, sessionInit: { cwd: "/home/sandbox", mcpServers: [] } }); agent: detectAgent(),
const sessionId = session.id; cwd: "/home/sandbox",
});
console.log(` UI: ${buildInspectorUrl({ baseUrl, sessionId })}`); session.onEvent((event) => {
console.log(" Press Ctrl+C to stop."); console.log(`[${event.sender}]`, JSON.stringify(event.payload));
});
const keepAlive = setInterval(() => {}, 60_000); session.prompt([{ type: "text", text: "Say hello from Docker in one sentence." }]);
const cleanup = async () => {
clearInterval(keepAlive); process.once("SIGINT", async () => {
try { await client.destroySandbox();
await container.stop({ t: 5 });
} catch {}
try {
await container.remove({ force: true });
} catch {}
process.exit(0); process.exit(0);
}; });
process.once("SIGINT", cleanup);
process.once("SIGTERM", cleanup);

View file

@ -1,8 +1,15 @@
import { describe, it, expect } from "vitest"; import { describe, it, expect } from "vitest";
import { buildHeaders } from "@sandbox-agent/example-shared"; import { startDockerSandbox } from "@sandbox-agent/example-shared/docker";
import { setupDockerSandboxAgent } from "../src/docker.ts";
const shouldRun = process.env.RUN_DOCKER_EXAMPLES === "1"; /**
* Docker integration test.
*
* Set SANDBOX_AGENT_DOCKER_IMAGE to the image tag to test (e.g. a locally-built
* full image). The test starts a container from that image, waits for
* sandbox-agent to become healthy, and validates the /v1/health endpoint.
*/
const image = process.env.SANDBOX_AGENT_DOCKER_IMAGE;
const shouldRun = Boolean(image);
const timeoutMs = Number.parseInt(process.env.SANDBOX_TEST_TIMEOUT_MS || "", 10) || 300_000; const timeoutMs = Number.parseInt(process.env.SANDBOX_TEST_TIMEOUT_MS || "", 10) || 300_000;
const testFn = shouldRun ? it : it.skip; const testFn = shouldRun ? it : it.skip;
@ -11,11 +18,29 @@ describe("docker example", () => {
testFn( testFn(
"starts sandbox-agent and responds to /v1/health", "starts sandbox-agent and responds to /v1/health",
async () => { async () => {
const { baseUrl, token, cleanup } = await setupDockerSandboxAgent(); const { baseUrl, cleanup } = await startDockerSandbox({
port: 2468,
image: image!,
});
try { try {
const response = await fetch(`${baseUrl}/v1/health`, { // Wait for health check
headers: buildHeaders({ token }), let healthy = false;
}); for (let i = 0; i < 60; i++) {
try {
const res = await fetch(`${baseUrl}/v1/health`);
if (res.ok) {
const data = await res.json();
if (data.status === "ok") {
healthy = true;
break;
}
}
} catch {}
await new Promise((r) => setTimeout(r, 1000));
}
expect(healthy).toBe(true);
const response = await fetch(`${baseUrl}/v1/health`);
expect(response.ok).toBe(true); expect(response.ok).toBe(true);
const data = await response.json(); const data = await response.json();
expect(data.status).toBe("ok"); expect(data.status).toBe("ok");

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

34
examples/e2b/src/e2b.ts Normal file
View file

@ -0,0 +1,34 @@
import { SandboxAgent } from "sandbox-agent";
import { e2b } from "sandbox-agent/e2b";
function collectEnvVars(): Record<string, string> {
const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
return envs;
}
function inspectorUrlToBaseUrl(inspectorUrl: string): string {
return inspectorUrl.replace(/\/ui\/$/, "");
}
export async function setupE2BSandboxAgent(): Promise<{
baseUrl: string;
token?: string;
cleanup: () => Promise<void>;
}> {
const template = process.env.E2B_TEMPLATE;
const client = await SandboxAgent.start({
sandbox: e2b({
template,
create: { envs: collectEnvVars() },
}),
});
return {
baseUrl: inspectorUrlToBaseUrl(client.inspectorUrl),
cleanup: async () => {
await client.killSandbox();
},
};
}

View file

@ -1,45 +1,28 @@
import { Sandbox } from "@e2b/code-interpreter";
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { detectAgent, buildInspectorUrl } from "@sandbox-agent/example-shared"; import { e2b } from "sandbox-agent/e2b";
import { detectAgent } from "@sandbox-agent/example-shared";
const envs: Record<string, string> = {}; const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY; if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const template = process.env.E2B_TEMPLATE;
console.log("Creating E2B sandbox..."); const client = await SandboxAgent.start({
const sandbox = await Sandbox.create({ allowInternetAccess: true, envs }); // ✨ NEW ✨
sandbox: e2b({ template, create: { envs } }),
});
const run = async (cmd: string) => { const session = await client.createSession({
const result = await sandbox.commands.run(cmd); agent: detectAgent(),
if (result.exitCode !== 0) throw new Error(`Command failed: ${cmd}\n${result.stderr}`); });
return result;
};
console.log("Installing sandbox-agent..."); session.onEvent((event) => {
await run("curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh"); console.log(`[${event.sender}]`, JSON.stringify(event.payload));
});
console.log("Installing agents..."); session.prompt([{ type: "text", text: "Say hello from E2B in one sentence." }]);
await run("sandbox-agent install-agent claude");
await run("sandbox-agent install-agent codex");
console.log("Starting server..."); process.once("SIGINT", async () => {
await sandbox.commands.run("sandbox-agent server --no-token --host 0.0.0.0 --port 3000", { background: true, timeoutMs: 0 }); await client.destroySandbox();
const baseUrl = `https://${sandbox.getHost(3000)}`;
console.log("Connecting to server...");
const client = await SandboxAgent.connect({ baseUrl });
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/home/user", mcpServers: [] } });
const sessionId = session.id;
console.log(` UI: ${buildInspectorUrl({ baseUrl, sessionId })}`);
console.log(" Press Ctrl+C to stop.");
const keepAlive = setInterval(() => {}, 60_000);
const cleanup = async () => {
clearInterval(keepAlive);
await sandbox.kill();
process.exit(0); process.exit(0);
}; });
process.once("SIGINT", cleanup);
process.once("SIGTERM", cleanup);

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -44,7 +44,7 @@ const readmeText = new TextDecoder().decode(readmeBytes);
console.log(` README.md content: ${readmeText.trim()}`); console.log(` README.md content: ${readmeText.trim()}`);
console.log("Creating session..."); console.log("Creating session...");
const session = await client.createSession({ agent: detectAgent(), sessionInit: { cwd: "/opt/my-project", mcpServers: [] } }); const session = await client.createSession({ agent: detectAgent(), cwd: "/opt/my-project" });
const sessionId = session.id; const sessionId = session.id;
console.log(` UI: ${buildInspectorUrl({ baseUrl, sessionId })}`); console.log(` UI: ${buildInspectorUrl({ baseUrl, sessionId })}`);
console.log(' Try: "read the README in /opt/my-project"'); console.log(' Try: "read the README in /opt/my-project"');

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -0,0 +1,20 @@
{
"name": "@sandbox-agent/example-modal",
"private": true,
"type": "module",
"scripts": {
"start": "tsx src/index.ts",
"typecheck": "tsc --noEmit"
},
"dependencies": {
"modal": "latest",
"@sandbox-agent/example-shared": "workspace:*",
"sandbox-agent": "workspace:*"
},
"devDependencies": {
"@types/node": "latest",
"tsx": "latest",
"typescript": "latest",
"vitest": "^3.0.0"
}
}

View file

@ -0,0 +1,30 @@
import { SandboxAgent } from "sandbox-agent";
import { modal } from "sandbox-agent/modal";
import { detectAgent } from "@sandbox-agent/example-shared";
const secrets: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) secrets.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) secrets.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const client = await SandboxAgent.start({
sandbox: modal({
create: { secrets },
}),
});
console.log(`UI: ${client.inspectorUrl}`);
const session = await client.createSession({
agent: detectAgent(),
});
session.onEvent((event) => {
console.log(`[${event.sender}]`, JSON.stringify(event.payload));
});
session.prompt([{ type: "text", text: "Say hello from Modal in one sentence." }]);
process.once("SIGINT", async () => {
await client.destroySandbox();
process.exit(0);
});

View file

@ -0,0 +1,31 @@
import { describe, it, expect } from "vitest";
import { SandboxAgent } from "sandbox-agent";
import { modal } from "sandbox-agent/modal";
const shouldRun = Boolean(process.env.MODAL_TOKEN_ID && process.env.MODAL_TOKEN_SECRET);
const timeoutMs = Number.parseInt(process.env.SANDBOX_TEST_TIMEOUT_MS || "", 10) || 300_000;
const testFn = shouldRun ? it : it.skip;
describe("modal provider", () => {
testFn(
"starts sandbox-agent and responds to /v1/health",
async () => {
const secrets: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) secrets.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) secrets.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const sdk = await SandboxAgent.start({
sandbox: modal({ create: { secrets } }),
});
try {
const health = await sdk.getHealth();
expect(health.status).toBe("ok");
} finally {
await sdk.destroySandbox();
}
},
timeoutMs,
);
});

View file

@ -0,0 +1,17 @@
{
"compilerOptions": {
"target": "ES2022",
"lib": ["ES2022", "DOM"],
"module": "ESNext",
"moduleResolution": "Bundler",
"allowImportingTsExtensions": true,
"noEmit": true,
"esModuleInterop": true,
"strict": true,
"skipLibCheck": true,
"resolveJsonModule": true,
"types": ["node"]
},
"include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"]
}

View file

@ -2,6 +2,7 @@ import { createInterface } from "node:readline/promises";
import { stdin as input, stdout as output } from "node:process"; import { stdin as input, stdout as output } from "node:process";
import { Command } from "commander"; import { Command } from "commander";
import { SandboxAgent, type PermissionReply, type SessionPermissionRequest } from "sandbox-agent"; import { SandboxAgent, type PermissionReply, type SessionPermissionRequest } from "sandbox-agent";
import { local } from "sandbox-agent/local";
const options = parseOptions(); const options = parseOptions();
const agent = options.agent.trim().toLowerCase(); const agent = options.agent.trim().toLowerCase();
@ -9,10 +10,7 @@ const autoReply = parsePermissionReply(options.reply);
const promptText = options.prompt?.trim() || `Create ./permission-example.txt with the text 'hello from the ${agent} permissions example'.`; const promptText = options.prompt?.trim() || `Create ./permission-example.txt with the text 'hello from the ${agent} permissions example'.`;
const sdk = await SandboxAgent.start({ const sdk = await SandboxAgent.start({
spawn: { sandbox: local({ log: "inherit" }),
enabled: true,
log: "inherit",
},
}); });
try { try {
@ -43,10 +41,7 @@ try {
const session = await sdk.createSession({ const session = await sdk.createSession({
agent, agent,
...(mode ? { mode } : {}), ...(mode ? { mode } : {}),
sessionInit: { cwd: process.cwd(),
cwd: process.cwd(),
mcpServers: [],
},
}); });
const rl = autoReply const rl = autoReply

View file

@ -1,7 +1,8 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2022", "target": "ES2022",
"lib": ["ES2022"], "lib": ["ES2022", "DOM"],
"types": ["node"],
"module": "ESNext", "module": "ESNext",
"moduleResolution": "Bundler", "moduleResolution": "Bundler",
"allowImportingTsExtensions": true, "allowImportingTsExtensions": true,

View file

@ -1,13 +1,15 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2022", "target": "ES2022",
"lib": ["ES2022", "DOM"],
"module": "ESNext", "module": "ESNext",
"moduleResolution": "Bundler", "moduleResolution": "Bundler",
"allowImportingTsExtensions": true, "allowImportingTsExtensions": true,
"noEmit": true, "noEmit": true,
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true "skipLibCheck": true,
"types": ["node"]
}, },
"include": ["src"] "include": ["src"]
} }

View file

@ -8,7 +8,6 @@
}, },
"dependencies": { "dependencies": {
"@sandbox-agent/example-shared": "workspace:*", "@sandbox-agent/example-shared": "workspace:*",
"@sandbox-agent/persist-postgres": "workspace:*",
"pg": "latest", "pg": "latest",
"sandbox-agent": "workspace:*" "sandbox-agent": "workspace:*"
}, },

View file

@ -3,7 +3,7 @@ import { randomUUID } from "node:crypto";
import { Client } from "pg"; import { Client } from "pg";
import { setTimeout as delay } from "node:timers/promises"; import { setTimeout as delay } from "node:timers/promises";
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { PostgresSessionPersistDriver } from "@sandbox-agent/persist-postgres"; import { PostgresSessionPersistDriver } from "./persist.ts";
import { startDockerSandbox } from "@sandbox-agent/example-shared/docker"; import { startDockerSandbox } from "@sandbox-agent/example-shared/docker";
import { detectAgent } from "@sandbox-agent/example-shared"; import { detectAgent } from "@sandbox-agent/example-shared";

View file

@ -0,0 +1,336 @@
import { Pool, type PoolConfig } from "pg";
import type { ListEventsRequest, ListPage, ListPageRequest, SessionEvent, SessionPersistDriver, SessionRecord } from "sandbox-agent";
const DEFAULT_LIST_LIMIT = 100;
export interface PostgresSessionPersistDriverOptions {
connectionString?: string;
pool?: Pool;
poolConfig?: PoolConfig;
schema?: string;
}
export class PostgresSessionPersistDriver implements SessionPersistDriver {
private readonly pool: Pool;
private readonly ownsPool: boolean;
private readonly schema: string;
private readonly initialized: Promise<void>;
constructor(options: PostgresSessionPersistDriverOptions = {}) {
this.schema = normalizeSchema(options.schema ?? "public");
if (options.pool) {
this.pool = options.pool;
this.ownsPool = false;
} else {
this.pool = new Pool({
connectionString: options.connectionString,
...options.poolConfig,
});
this.ownsPool = true;
}
this.initialized = this.initialize();
}
async getSession(id: string): Promise<SessionRecord | undefined> {
await this.ready();
const result = await this.pool.query<SessionRow>(
`SELECT id, agent, agent_session_id, last_connection_id, created_at, destroyed_at, sandbox_id, session_init_json, config_options_json, modes_json
FROM ${this.table("sessions")}
WHERE id = $1`,
[id],
);
if (result.rows.length === 0) {
return undefined;
}
return decodeSessionRow(result.rows[0]);
}
async listSessions(request: ListPageRequest = {}): Promise<ListPage<SessionRecord>> {
await this.ready();
const offset = parseCursor(request.cursor);
const limit = normalizeLimit(request.limit);
const rowsResult = await this.pool.query<SessionRow>(
`SELECT id, agent, agent_session_id, last_connection_id, created_at, destroyed_at, sandbox_id, session_init_json, config_options_json, modes_json
FROM ${this.table("sessions")}
ORDER BY created_at ASC, id ASC
LIMIT $1 OFFSET $2`,
[limit, offset],
);
const countResult = await this.pool.query<{ count: string }>(`SELECT COUNT(*) AS count FROM ${this.table("sessions")}`);
const total = parseInteger(countResult.rows[0]?.count ?? "0");
const nextOffset = offset + rowsResult.rows.length;
return {
items: rowsResult.rows.map(decodeSessionRow),
nextCursor: nextOffset < total ? String(nextOffset) : undefined,
};
}
async updateSession(session: SessionRecord): Promise<void> {
await this.ready();
await this.pool.query(
`INSERT INTO ${this.table("sessions")} (
id, agent, agent_session_id, last_connection_id, created_at, destroyed_at, sandbox_id, session_init_json, config_options_json, modes_json
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)
ON CONFLICT(id) DO UPDATE SET
agent = EXCLUDED.agent,
agent_session_id = EXCLUDED.agent_session_id,
last_connection_id = EXCLUDED.last_connection_id,
created_at = EXCLUDED.created_at,
destroyed_at = EXCLUDED.destroyed_at,
sandbox_id = EXCLUDED.sandbox_id,
session_init_json = EXCLUDED.session_init_json,
config_options_json = EXCLUDED.config_options_json,
modes_json = EXCLUDED.modes_json`,
[
session.id,
session.agent,
session.agentSessionId,
session.lastConnectionId,
session.createdAt,
session.destroyedAt ?? null,
session.sandboxId ?? null,
session.sessionInit ? JSON.stringify(session.sessionInit) : null,
session.configOptions ? JSON.stringify(session.configOptions) : null,
session.modes !== undefined ? JSON.stringify(session.modes) : null,
],
);
}
async listEvents(request: ListEventsRequest): Promise<ListPage<SessionEvent>> {
await this.ready();
const offset = parseCursor(request.cursor);
const limit = normalizeLimit(request.limit);
const rowsResult = await this.pool.query<EventRow>(
`SELECT id, event_index, session_id, created_at, connection_id, sender, payload_json
FROM ${this.table("events")}
WHERE session_id = $1
ORDER BY event_index ASC, id ASC
LIMIT $2 OFFSET $3`,
[request.sessionId, limit, offset],
);
const countResult = await this.pool.query<{ count: string }>(`SELECT COUNT(*) AS count FROM ${this.table("events")} WHERE session_id = $1`, [
request.sessionId,
]);
const total = parseInteger(countResult.rows[0]?.count ?? "0");
const nextOffset = offset + rowsResult.rows.length;
return {
items: rowsResult.rows.map(decodeEventRow),
nextCursor: nextOffset < total ? String(nextOffset) : undefined,
};
}
async insertEvent(_sessionId: string, event: SessionEvent): Promise<void> {
await this.ready();
await this.pool.query(
`INSERT INTO ${this.table("events")} (
id, event_index, session_id, created_at, connection_id, sender, payload_json
) VALUES ($1, $2, $3, $4, $5, $6, $7)
ON CONFLICT(id) DO UPDATE SET
event_index = EXCLUDED.event_index,
session_id = EXCLUDED.session_id,
created_at = EXCLUDED.created_at,
connection_id = EXCLUDED.connection_id,
sender = EXCLUDED.sender,
payload_json = EXCLUDED.payload_json`,
[event.id, event.eventIndex, event.sessionId, event.createdAt, event.connectionId, event.sender, event.payload],
);
}
async close(): Promise<void> {
if (!this.ownsPool) {
return;
}
await this.pool.end();
}
private async ready(): Promise<void> {
await this.initialized;
}
private table(name: "sessions" | "events"): string {
return `"${this.schema}"."${name}"`;
}
private async initialize(): Promise<void> {
await this.pool.query(`CREATE SCHEMA IF NOT EXISTS "${this.schema}"`);
await this.pool.query(`
CREATE TABLE IF NOT EXISTS ${this.table("sessions")} (
id TEXT PRIMARY KEY,
agent TEXT NOT NULL,
agent_session_id TEXT NOT NULL,
last_connection_id TEXT NOT NULL,
created_at BIGINT NOT NULL,
destroyed_at BIGINT,
sandbox_id TEXT,
session_init_json JSONB,
config_options_json JSONB,
modes_json JSONB
)
`);
await this.pool.query(`
ALTER TABLE ${this.table("sessions")}
ADD COLUMN IF NOT EXISTS sandbox_id TEXT
`);
await this.pool.query(`
ALTER TABLE ${this.table("sessions")}
ADD COLUMN IF NOT EXISTS config_options_json JSONB
`);
await this.pool.query(`
ALTER TABLE ${this.table("sessions")}
ADD COLUMN IF NOT EXISTS modes_json JSONB
`);
await this.pool.query(`
CREATE TABLE IF NOT EXISTS ${this.table("events")} (
id TEXT PRIMARY KEY,
event_index BIGINT NOT NULL,
session_id TEXT NOT NULL,
created_at BIGINT NOT NULL,
connection_id TEXT NOT NULL,
sender TEXT NOT NULL,
payload_json JSONB NOT NULL
)
`);
await this.pool.query(`
ALTER TABLE ${this.table("events")}
ALTER COLUMN id TYPE TEXT USING id::TEXT
`);
await this.pool.query(`
ALTER TABLE ${this.table("events")}
ADD COLUMN IF NOT EXISTS event_index BIGINT
`);
await this.pool.query(`
WITH ranked AS (
SELECT id, ROW_NUMBER() OVER (PARTITION BY session_id ORDER BY created_at ASC, id ASC) AS ranked_index
FROM ${this.table("events")}
)
UPDATE ${this.table("events")} AS current_events
SET event_index = ranked.ranked_index
FROM ranked
WHERE current_events.id = ranked.id
AND current_events.event_index IS NULL
`);
await this.pool.query(`
ALTER TABLE ${this.table("events")}
ALTER COLUMN event_index SET NOT NULL
`);
await this.pool.query(`
CREATE INDEX IF NOT EXISTS idx_events_session_order
ON ${this.table("events")}(session_id, event_index, id)
`);
}
}
type SessionRow = {
id: string;
agent: string;
agent_session_id: string;
last_connection_id: string;
created_at: string | number;
destroyed_at: string | number | null;
sandbox_id: string | null;
session_init_json: unknown | null;
config_options_json: unknown | null;
modes_json: unknown | null;
};
type EventRow = {
id: string | number;
event_index: string | number;
session_id: string;
created_at: string | number;
connection_id: string;
sender: string;
payload_json: unknown;
};
function decodeSessionRow(row: SessionRow): SessionRecord {
return {
id: row.id,
agent: row.agent,
agentSessionId: row.agent_session_id,
lastConnectionId: row.last_connection_id,
createdAt: parseInteger(row.created_at),
destroyedAt: row.destroyed_at === null ? undefined : parseInteger(row.destroyed_at),
sandboxId: row.sandbox_id ?? undefined,
sessionInit: row.session_init_json ? (row.session_init_json as SessionRecord["sessionInit"]) : undefined,
configOptions: row.config_options_json ? (row.config_options_json as SessionRecord["configOptions"]) : undefined,
modes: row.modes_json ? (row.modes_json as SessionRecord["modes"]) : undefined,
};
}
function decodeEventRow(row: EventRow): SessionEvent {
return {
id: String(row.id),
eventIndex: parseInteger(row.event_index),
sessionId: row.session_id,
createdAt: parseInteger(row.created_at),
connectionId: row.connection_id,
sender: parseSender(row.sender),
payload: row.payload_json as SessionEvent["payload"],
};
}
function normalizeLimit(limit: number | undefined): number {
if (!Number.isFinite(limit) || (limit ?? 0) < 1) {
return DEFAULT_LIST_LIMIT;
}
return Math.floor(limit as number);
}
function parseCursor(cursor: string | undefined): number {
if (!cursor) {
return 0;
}
const parsed = Number.parseInt(cursor, 10);
if (!Number.isFinite(parsed) || parsed < 0) {
return 0;
}
return parsed;
}
function parseInteger(value: string | number): number {
const parsed = typeof value === "number" ? value : Number.parseInt(value, 10);
if (!Number.isFinite(parsed)) {
throw new Error(`Invalid integer value returned by postgres: ${String(value)}`);
}
return parsed;
}
function parseSender(value: string): SessionEvent["sender"] {
if (value === "agent" || value === "client") {
return value;
}
throw new Error(`Invalid sender value returned by postgres: ${value}`);
}
function normalizeSchema(schema: string): string {
if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(schema)) {
throw new Error(`Invalid schema name '${schema}'. Use letters, numbers, and underscores only.`);
}
return schema;
}

View file

@ -1,13 +1,15 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2022", "target": "ES2022",
"lib": ["ES2022", "DOM"],
"module": "ESNext", "module": "ESNext",
"moduleResolution": "Bundler", "moduleResolution": "Bundler",
"allowImportingTsExtensions": true, "allowImportingTsExtensions": true,
"noEmit": true, "noEmit": true,
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true "skipLibCheck": true,
"types": ["node"]
}, },
"include": ["src"] "include": ["src"]
} }

View file

@ -8,10 +8,11 @@
}, },
"dependencies": { "dependencies": {
"@sandbox-agent/example-shared": "workspace:*", "@sandbox-agent/example-shared": "workspace:*",
"@sandbox-agent/persist-sqlite": "workspace:*", "better-sqlite3": "^11.0.0",
"sandbox-agent": "workspace:*" "sandbox-agent": "workspace:*"
}, },
"devDependencies": { "devDependencies": {
"@types/better-sqlite3": "^7.0.0",
"@types/node": "latest", "@types/node": "latest",
"tsx": "latest", "tsx": "latest",
"typescript": "latest" "typescript": "latest"

View file

@ -1,5 +1,5 @@
import { SandboxAgent } from "sandbox-agent"; import { SandboxAgent } from "sandbox-agent";
import { SQLiteSessionPersistDriver } from "@sandbox-agent/persist-sqlite"; import { SQLiteSessionPersistDriver } from "./persist.ts";
import { startDockerSandbox } from "@sandbox-agent/example-shared/docker"; import { startDockerSandbox } from "@sandbox-agent/example-shared/docker";
import { detectAgent } from "@sandbox-agent/example-shared"; import { detectAgent } from "@sandbox-agent/example-shared";

View file

@ -0,0 +1,310 @@
import Database from "better-sqlite3";
import type { ListEventsRequest, ListPage, ListPageRequest, SessionEvent, SessionPersistDriver, SessionRecord } from "sandbox-agent";
const DEFAULT_LIST_LIMIT = 100;
export interface SQLiteSessionPersistDriverOptions {
filename?: string;
}
export class SQLiteSessionPersistDriver implements SessionPersistDriver {
private readonly db: Database.Database;
constructor(options: SQLiteSessionPersistDriverOptions = {}) {
this.db = new Database(options.filename ?? ":memory:");
this.initialize();
}
async getSession(id: string): Promise<SessionRecord | undefined> {
const row = this.db
.prepare(
`SELECT id, agent, agent_session_id, last_connection_id, created_at, destroyed_at, sandbox_id, session_init_json, config_options_json, modes_json
FROM sessions WHERE id = ?`,
)
.get(id) as SessionRow | undefined;
if (!row) {
return undefined;
}
return decodeSessionRow(row);
}
async listSessions(request: ListPageRequest = {}): Promise<ListPage<SessionRecord>> {
const offset = parseCursor(request.cursor);
const limit = normalizeLimit(request.limit);
const rows = this.db
.prepare(
`SELECT id, agent, agent_session_id, last_connection_id, created_at, destroyed_at, sandbox_id, session_init_json, config_options_json, modes_json
FROM sessions
ORDER BY created_at ASC, id ASC
LIMIT ? OFFSET ?`,
)
.all(limit, offset) as SessionRow[];
const countRow = this.db.prepare(`SELECT COUNT(*) as count FROM sessions`).get() as { count: number };
const nextOffset = offset + rows.length;
return {
items: rows.map(decodeSessionRow),
nextCursor: nextOffset < countRow.count ? String(nextOffset) : undefined,
};
}
async updateSession(session: SessionRecord): Promise<void> {
this.db
.prepare(
`INSERT INTO sessions (
id, agent, agent_session_id, last_connection_id, created_at, destroyed_at, sandbox_id, session_init_json, config_options_json, modes_json
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
agent = excluded.agent,
agent_session_id = excluded.agent_session_id,
last_connection_id = excluded.last_connection_id,
created_at = excluded.created_at,
destroyed_at = excluded.destroyed_at,
sandbox_id = excluded.sandbox_id,
session_init_json = excluded.session_init_json,
config_options_json = excluded.config_options_json,
modes_json = excluded.modes_json`,
)
.run(
session.id,
session.agent,
session.agentSessionId,
session.lastConnectionId,
session.createdAt,
session.destroyedAt ?? null,
session.sandboxId ?? null,
session.sessionInit ? JSON.stringify(session.sessionInit) : null,
session.configOptions ? JSON.stringify(session.configOptions) : null,
session.modes !== undefined ? JSON.stringify(session.modes) : null,
);
}
async listEvents(request: ListEventsRequest): Promise<ListPage<SessionEvent>> {
const offset = parseCursor(request.cursor);
const limit = normalizeLimit(request.limit);
const rows = this.db
.prepare(
`SELECT id, event_index, session_id, created_at, connection_id, sender, payload_json
FROM events
WHERE session_id = ?
ORDER BY event_index ASC, id ASC
LIMIT ? OFFSET ?`,
)
.all(request.sessionId, limit, offset) as EventRow[];
const countRow = this.db.prepare(`SELECT COUNT(*) as count FROM events WHERE session_id = ?`).get(request.sessionId) as { count: number };
const nextOffset = offset + rows.length;
return {
items: rows.map(decodeEventRow),
nextCursor: nextOffset < countRow.count ? String(nextOffset) : undefined,
};
}
async insertEvent(_sessionId: string, event: SessionEvent): Promise<void> {
this.db
.prepare(
`INSERT INTO events (
id, event_index, session_id, created_at, connection_id, sender, payload_json
) VALUES (?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(id) DO UPDATE SET
event_index = excluded.event_index,
session_id = excluded.session_id,
created_at = excluded.created_at,
connection_id = excluded.connection_id,
sender = excluded.sender,
payload_json = excluded.payload_json`,
)
.run(event.id, event.eventIndex, event.sessionId, event.createdAt, event.connectionId, event.sender, JSON.stringify(event.payload));
}
close(): void {
this.db.close();
}
private initialize(): void {
this.db.exec(`
CREATE TABLE IF NOT EXISTS sessions (
id TEXT PRIMARY KEY,
agent TEXT NOT NULL,
agent_session_id TEXT NOT NULL,
last_connection_id TEXT NOT NULL,
created_at INTEGER NOT NULL,
destroyed_at INTEGER,
sandbox_id TEXT,
session_init_json TEXT,
config_options_json TEXT,
modes_json TEXT
)
`);
const sessionColumns = this.db.prepare(`PRAGMA table_info(sessions)`).all() as TableInfoRow[];
if (!sessionColumns.some((column) => column.name === "sandbox_id")) {
this.db.exec(`ALTER TABLE sessions ADD COLUMN sandbox_id TEXT`);
}
if (!sessionColumns.some((column) => column.name === "config_options_json")) {
this.db.exec(`ALTER TABLE sessions ADD COLUMN config_options_json TEXT`);
}
if (!sessionColumns.some((column) => column.name === "modes_json")) {
this.db.exec(`ALTER TABLE sessions ADD COLUMN modes_json TEXT`);
}
this.ensureEventsTable();
}
private ensureEventsTable(): void {
const tableInfo = this.db.prepare(`PRAGMA table_info(events)`).all() as TableInfoRow[];
if (tableInfo.length === 0) {
this.createEventsTable();
return;
}
const idColumn = tableInfo.find((column) => column.name === "id");
const hasEventIndex = tableInfo.some((column) => column.name === "event_index");
const idType = (idColumn?.type ?? "").trim().toUpperCase();
const idIsText = idType === "TEXT";
if (!idIsText || !hasEventIndex) {
this.rebuildEventsTable(hasEventIndex);
}
this.db.exec(`
CREATE INDEX IF NOT EXISTS idx_events_session_order
ON events(session_id, event_index, id)
`);
}
private createEventsTable(): void {
this.db.exec(`
CREATE TABLE IF NOT EXISTS events (
id TEXT PRIMARY KEY,
event_index INTEGER NOT NULL,
session_id TEXT NOT NULL,
created_at INTEGER NOT NULL,
connection_id TEXT NOT NULL,
sender TEXT NOT NULL,
payload_json TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_events_session_order
ON events(session_id, event_index, id)
`);
}
private rebuildEventsTable(hasEventIndex: boolean): void {
this.db.exec(`
ALTER TABLE events RENAME TO events_legacy;
`);
this.createEventsTable();
if (hasEventIndex) {
this.db.exec(`
INSERT INTO events (id, event_index, session_id, created_at, connection_id, sender, payload_json)
SELECT
CAST(id AS TEXT),
COALESCE(event_index, ROW_NUMBER() OVER (PARTITION BY session_id ORDER BY created_at ASC, id ASC)),
session_id,
created_at,
connection_id,
sender,
payload_json
FROM events_legacy
`);
} else {
this.db.exec(`
INSERT INTO events (id, event_index, session_id, created_at, connection_id, sender, payload_json)
SELECT
CAST(id AS TEXT),
ROW_NUMBER() OVER (PARTITION BY session_id ORDER BY created_at ASC, id ASC),
session_id,
created_at,
connection_id,
sender,
payload_json
FROM events_legacy
`);
}
this.db.exec(`DROP TABLE events_legacy`);
}
}
type SessionRow = {
id: string;
agent: string;
agent_session_id: string;
last_connection_id: string;
created_at: number;
destroyed_at: number | null;
sandbox_id: string | null;
session_init_json: string | null;
config_options_json: string | null;
modes_json: string | null;
};
type EventRow = {
id: string;
event_index: number;
session_id: string;
created_at: number;
connection_id: string;
sender: "client" | "agent";
payload_json: string;
};
type TableInfoRow = {
name: string;
type: string;
};
function decodeSessionRow(row: SessionRow): SessionRecord {
return {
id: row.id,
agent: row.agent,
agentSessionId: row.agent_session_id,
lastConnectionId: row.last_connection_id,
createdAt: row.created_at,
destroyedAt: row.destroyed_at ?? undefined,
sandboxId: row.sandbox_id ?? undefined,
sessionInit: row.session_init_json ? (JSON.parse(row.session_init_json) as SessionRecord["sessionInit"]) : undefined,
configOptions: row.config_options_json ? (JSON.parse(row.config_options_json) as SessionRecord["configOptions"]) : undefined,
modes: row.modes_json ? (JSON.parse(row.modes_json) as SessionRecord["modes"]) : undefined,
};
}
function decodeEventRow(row: EventRow): SessionEvent {
return {
id: row.id,
eventIndex: row.event_index,
sessionId: row.session_id,
createdAt: row.created_at,
connectionId: row.connection_id,
sender: row.sender,
payload: JSON.parse(row.payload_json),
};
}
function normalizeLimit(limit: number | undefined): number {
if (!Number.isFinite(limit) || (limit ?? 0) < 1) {
return DEFAULT_LIST_LIMIT;
}
return Math.floor(limit as number);
}
function parseCursor(cursor: string | undefined): number {
if (!cursor) {
return 0;
}
const parsed = Number.parseInt(cursor, 10);
if (!Number.isFinite(parsed) || parsed < 0) {
return 0;
}
return parsed;
}

View file

@ -1,13 +1,15 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2022", "target": "ES2022",
"lib": ["ES2022", "DOM"],
"module": "ESNext", "module": "ESNext",
"moduleResolution": "Bundler", "moduleResolution": "Bundler",
"allowImportingTsExtensions": true, "allowImportingTsExtensions": true,
"noEmit": true, "noEmit": true,
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true "skipLibCheck": true,
"types": ["node"]
}, },
"include": ["src"] "include": ["src"]
} }

View file

@ -9,7 +9,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
const REPO_ROOT = path.resolve(__dirname, "..", "..", ".."); const REPO_ROOT = path.resolve(__dirname, "..", "..", "..");
/** Pre-built Docker image with all agents installed. */ /** Pre-built Docker image with all agents installed. */
export const FULL_IMAGE = "rivetdev/sandbox-agent:0.3.1-full"; export const FULL_IMAGE = "rivetdev/sandbox-agent:0.4.2-full";
export interface DockerSandboxOptions { export interface DockerSandboxOptions {
/** Container port used by sandbox-agent inside Docker. */ /** Container port used by sandbox-agent inside Docker. */
@ -78,11 +78,11 @@ function readClaudeCredentialFiles(): ClaudeCredentialFile[] {
const candidates: Array<{ hostPath: string; containerPath: string }> = [ const candidates: Array<{ hostPath: string; containerPath: string }> = [
{ {
hostPath: path.join(homeDir, ".claude", ".credentials.json"), hostPath: path.join(homeDir, ".claude", ".credentials.json"),
containerPath: "/root/.claude/.credentials.json", containerPath: ".claude/.credentials.json",
}, },
{ {
hostPath: path.join(homeDir, ".claude-oauth-credentials.json"), hostPath: path.join(homeDir, ".claude-oauth-credentials.json"),
containerPath: "/root/.claude-oauth-credentials.json", containerPath: ".claude-oauth-credentials.json",
}, },
]; ];
@ -180,10 +180,9 @@ export async function startDockerSandbox(opts: DockerSandboxOptions): Promise<Do
const credentialBootstrapCommands = claudeCredentialFiles.flatMap((file, index) => { const credentialBootstrapCommands = claudeCredentialFiles.flatMap((file, index) => {
const envKey = `SANDBOX_AGENT_CLAUDE_CREDENTIAL_${index}_B64`; const envKey = `SANDBOX_AGENT_CLAUDE_CREDENTIAL_${index}_B64`;
bootstrapEnv[envKey] = file.base64Content; bootstrapEnv[envKey] = file.base64Content;
return [ // Use $HOME-relative paths so credentials work regardless of container user
`mkdir -p ${shellSingleQuotedLiteral(path.posix.dirname(file.containerPath))}`, const containerDir = path.posix.dirname(file.containerPath);
`printf %s "$${envKey}" | base64 -d > ${shellSingleQuotedLiteral(file.containerPath)}`, return [`mkdir -p "$HOME/${containerDir}"`, `printf %s "$${envKey}" | base64 -d > "$HOME/${file.containerPath}"`];
];
}); });
setupCommands.unshift(...credentialBootstrapCommands); setupCommands.unshift(...credentialBootstrapCommands);
} }
@ -200,8 +199,9 @@ export async function startDockerSandbox(opts: DockerSandboxOptions): Promise<Do
const container = await docker.createContainer({ const container = await docker.createContainer({
Image: image, Image: image,
Entrypoint: ["/bin/sh", "-c"],
WorkingDir: "/home/sandbox", WorkingDir: "/home/sandbox",
Cmd: ["sh", "-c", bootCommands.join(" && ")], Cmd: [bootCommands.join(" && ")],
Env: [...Object.entries(credentialEnv).map(([key, value]) => `${key}=${value}`), ...Object.entries(bootstrapEnv).map(([key, value]) => `${key}=${value}`)], Env: [...Object.entries(credentialEnv).map(([key, value]) => `${key}=${value}`), ...Object.entries(bootstrapEnv).map(([key, value]) => `${key}=${value}`)],
ExposedPorts: { [`${port}/tcp`]: {} }, ExposedPorts: { [`${port}/tcp`]: {} },
HostConfig: { HostConfig: {
@ -253,10 +253,13 @@ export async function startDockerSandbox(opts: DockerSandboxOptions): Promise<Do
try { try {
await container.remove({ force: true }); await container.remove({ force: true });
} catch {} } catch {}
};
const signalCleanup = async () => {
await cleanup();
process.exit(0); process.exit(0);
}; };
process.once("SIGINT", cleanup); process.once("SIGINT", signalCleanup);
process.once("SIGTERM", cleanup); process.once("SIGTERM", signalCleanup);
return { baseUrl, cleanup }; return { baseUrl, cleanup };
} }

Some files were not shown because too many files have changed in this diff Show more