Compare commits

...

44 commits

Author SHA1 Message Date
Nathan Flurry
bf484e7c96 docs: clean up orphaned docs and add session event types
Delete orphaned docs not in docs.json navigation (gigacode.mdx,
foundry-self-hosting.mdx, session-transcript-schema.mdx, pi-support-plan.md).
Remove outdated musl/glibc troubleshooting section. Add event types
documentation with example payloads to agent-sessions.mdx.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-25 19:11:19 -07:00
Nathan Flurry
d55b0dfb88 chore(release): update version to 0.4.2 2026-03-25 18:07:26 -07:00
ABC
251f731232
Merge pull request #284 from rivet-dev/03-25-fix_mock_pass_sandbox_agent_bin_to_mock_agent_launcher
fix(mock): pass SANDBOX_AGENT_BIN to mock agent launcher
2026-03-25 17:00:51 -04:00
abcxff
b45989a082 fix(mock): pass SANDBOX_AGENT_BIN to mock agent launcher 2026-03-25 16:54:40 -04:00
Nathan Flurry
78e84281e8 chore(release): update version to 0.4.1 2026-03-25 13:20:57 -07:00
Nathan Flurry
5da35e6dfa feat: sprites support 2026-03-25 12:23:14 -07:00
ABC
9cd9252725
Merge pull request #283 from rivet-dev/03-25-chore_providers_move_back_to_0.4.x_install_script
chore(providers): sync install script with latest 0.4.x
2026-03-25 14:24:55 -04:00
abcxff
858b9a4d2f chore(providers): move back to 0.4.x install script 2026-03-25 14:22:57 -04:00
Nathan Flurry
4fa28061e9
Merge pull request #279 from rivet-dev/NicholasKissel/docs-dark-theme
fix(docs): restore dark theme styling
2026-03-24 23:26:28 -07:00
ABCxFF
cb42971b56 chore(release): update version to 0.5.0-rc.2 2026-03-25 05:13:47 +00:00
ABC
e9fabbfe64
fix: surface agent stderr in RPC errors & add defaultCwd param (#278) 2026-03-25 00:49:35 -04:00
ABC
32dd5914ed
Merge pull request #269 from rivet-dev/e2b-base-image-support
feat(providers): add base image support and improve forward compatibility
2026-03-25 00:42:49 -04:00
ABC
fe8fbfc91c
Merge branch 'main' into e2b-base-image-support 2026-03-25 00:37:58 -04:00
Nicholas Kissel
32713ff453 fix(docs): keep dark mode strict appearance
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-24 21:33:10 -07:00
abcxff
833b57deb1 fix: surface agent stderr in RPC errors and default cwd for remote providers 2026-03-25 04:26:48 +00:00
Nicholas Kissel
927e77c7e2 fix(docs): restore dark theme styling with custom CSS
Re-enable theme.css with full custom styling (links, inputs, cards,
code blocks, alerts) and update docs.json color values.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-24 21:25:06 -07:00
Nathan Flurry
f353e39fc6
Merge pull request #273 from Crunchyman-ralph/fix/update-install-script-to-0.4.x
fix: update install script URL from 0.3.x to 0.4.x
2026-03-19 12:45:00 -07:00
Ralph Khreish
3525dcc315
fix: update install script URL from 0.3.x to 0.4.x
The E2B and Vercel providers install sandbox-agent 0.3.x inside sandboxes
while the SDK client speaks 0.4.0 ACP protocol, causing AcpRpcError -32603.

Fixes #272
2026-03-19 17:45:16 +01:00
Nathan Flurry
7b23e519c2 fix(foundry): add Bun idleTimeout safety net and subscription retry with backoff
Bun.serve() defaults to a 10s idle timeout that can kill long-running
requests. Actor RPCs go through the gateway tunnel with a 1s SSE ping,
so this likely never fires, but set idleTimeout to 255 as a safety net.

Subscription topics (app, org, session, task) previously had no retry
mechanism. If the initial connection or a mid-session error occurred,
the subscription stayed in error state permanently. Add exponential
backoff retry (1s base, 30s max) that cleans up the old connection
before each attempt and stops when disposed or no listeners remain.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 18:35:36 -07:00
Nathan Flurry
bea3b58199 fix(foundry): use $HOME instead of hardcoded /home/sandbox for sandbox repo paths
E2B sandboxes run as `user` (home: /home/user), not `sandbox`, so
`mkdir -p /home/sandbox` fails with "Permission denied". Replace all
hardcoded `/home/sandbox` paths with `$HOME` resolved at shell runtime
inside the sandbox, and dynamically resolve the repo CWD via the sandbox
actor so it works across providers (E2B, local Docker, Daytona).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 17:33:53 -07:00
Nathan Flurry
524f40ec02 feat(providers): simplify modal to use published base image
The `-full` base image already includes sandbox-agent and all agents
pre-installed. Remove redundant apt-get, install script, and
install-agent dockerfile commands from the Modal provider.

Also allow overriding the default image via SANDBOX_AGENT_IMAGE env var
across all providers for testing with different published versions.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 16:54:20 -07:00
Nathan Flurry
4e76038a0d feat(providers): add base image support and improve forward compatibility
Add support for configuring base images across all compute providers:
- E2B: Accept optional `template` parameter to select custom templates
- Modal: Accept optional `image` parameter (string or Image object) for base images
- ComputeSDK: Expand `create` override to accept full CreateSandboxOptions payload (image, templateId, etc.)
- Daytona: Improve type safety for `image` option

Improve forward compatibility by making all `create` overrides accept full Partial SDK types, allowing any new provider fields to flow through without code changes. Fix Modal provider bug where `encryptedPorts` was hardcoded and would clobber user-provided values; now merges additional ports instead.

Update docs and examples to demonstrate base image configuration for E2B, Modal, and ComputeSDK. Add comprehensive provider lifecycle tests for Modal and ComputeSDK, including template and image passthrough verification.

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-03-17 15:25:21 -07:00
Nathan Flurry
ffb9f1082b fix(foundry): fix runner version 2026-03-17 14:33:13 -07:00
Nathan Flurry
f25a92aca8 chore(release): update version to 0.5.0-rc.1 2026-03-17 02:44:41 -07:00
Nathan Flurry
3b8c74589d
Merge pull request #264 from rivet-dev/desktop-computer-use-neko
feat: desktop computer-use APIs with neko streaming
2026-03-17 02:36:50 -07:00
Nathan Flurry
dff7614b11 feat: desktop computer-use APIs with windows, launch/open, and neko streaming
Adds desktop computer-use endpoints (windows, screenshots, mouse/keyboard,
launch/open), enhances neko-based streaming integration, updates inspector
UI with desktop debug tab, and adds common software test infrastructure.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:36:25 -07:00
Nathan Flurry
2d8508d6e2 feat: enhance desktop computer-use streaming with neko integration
Improve desktop streaming architecture, add inspector dev tooling,
React DesktopViewer updates, and computer-use documentation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-17 02:36:25 -07:00
Nathan Flurry
4252c705df chore: remove .context/ from git and add to .gitignore
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 02:36:25 -07:00
Nathan Flurry
33821d8660 feat: desktop computer-use APIs with neko-based streaming
Add desktop runtime management (Xvfb, openbox, dbus), screen capture,
mouse/keyboard input, and video streaming via neko binary extracted
from the m1k1o/neko container. Includes Docker test rig, TypeScript SDK
desktop support, and inspector Desktop tab.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 02:36:17 -07:00
Nathan Flurry
3895e34bdb feat(foundry): add foundry base sandbox image with sudo, chromium, and dev tooling
Add a custom Docker image (foundry-base.Dockerfile) that builds sandbox-agent
from source and layers sudo, git, neovim, gh, node, bun, chromium, and
agent-browser. Includes publish script for timestamped + latest tags to
rivetdev/sandbox-agent on Docker Hub.

Update local sandbox provider default to use foundry-base-latest and wire
HF_LOCAL_SANDBOX_IMAGE env var through compose.dev.yaml.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-17 02:09:12 -07:00
Nathan Flurry
eafe0f9fe4 fix(foundry): use IF NOT EXISTS in org migration to handle pre-existing auth tables
Some org actors had auth tables created outside the migration system
(by earlier queue-based auth code). Migration m0001 fails with
"table auth_session_index already exists" on those actors, preventing
them from starting.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 22:45:34 -07:00
Nathan Flurry
6ebe13cddd fix(foundry): use cookie-based OAuth state to prevent proxy retry auth failures
Switch storeStateStrategy from "database" to "cookie" so OAuth state is
stored encrypted in a temporary cookie instead of a DB verification record.
This makes the callback idempotent — proxy retries can't fail because the
state travels with the request itself rather than being deleted after the
first successful callback.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 22:37:54 -07:00
Nathan Flurry
8ddec6831b fix(foundry): deduplicate OAuth callbacks and cache actor handles to fix production auth
The production proxy chain (Cloudflare -> Fastly -> Railway) retries
OAuth callback requests when they take >10s. The first request succeeds
and deletes the verification record, so the retry fails with
"verification not found" -> ?error=please_restart_the_process.

- Add callback deduplication by OAuth state param in the auth handler.
  Duplicate requests wait for the original and return a cloned response.
- Cache appOrganization() and getUser() actor handles to eliminate
  redundant getOrCreate RPCs during callbacks (was 10+ per sign-in).
- Add diagnostic logging for auth callback timing and adapter operations.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 22:29:17 -07:00
Nathan Flurry
4ca77e4d83 Merge remote-tracking branch 'origin/main' into fix-foundry-auth-error 2026-03-16 21:26:25 -07:00
Nathan Flurry
e7b9ac6854 fix(foundry): move Better Auth operations from queues to actions to fix production auth timeout
The org actor's workflow queue is shared with GitHub sync, webhooks, task
mutations, and billing (20+ queue names processed sequentially). During
OAuth callback, auth operations would time out waiting behind long-running
queue handlers, causing Better Auth's parseState to redirect to
?error=please_restart_the_process.

Auth operations are simple SQLite reads/writes with no cross-actor side
effects, so they are safe to run as actions that execute immediately
without competing in the queue.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 21:26:13 -07:00
Nathan Flurry
eab215c7cb feat(foundry): redirect to signin page on auth API errors
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 19:34:16 -07:00
Nathan Flurry
84a80d59d7
Merge pull request #265 from rivet-dev/revert-actions-to-queues
feat(foundry): revert actions to queue/workflow pattern
2026-03-16 18:48:21 -07:00
Nathan Flurry
a171956298 feat(foundry): revert actions to queue/workflow pattern with direct sends
Revert actor communication from direct action calls to queue/workflow-based
patterns for better observability (workflow history in RivetKit inspector),
replay/recovery semantics, and idiomatic RivetKit usage.

- Add queue/workflow infrastructure to all actors: organization, task, user,
  github-data, sandbox, and audit-log
- Mutations route through named queues processed by workflow command loops
  with ctx.step() wrapping for c.state/c.db access and observability
- Remove command action wrappers (~460 lines) — callers use .send() directly
  to queue names with expectQueueResponse() for wait:true results
- Keep sendPrompt and runProcess as direct sandbox actions (long-running /
  large responses that would block the workflow loop or exceed 128KB limit)
- Fix workspace fire-and-forget calls (enqueueWorkspaceEnsureSession,
  enqueueWorkspaceRefresh) to self-send to task queue instead of calling
  directly outside workflow step context

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 18:46:53 -07:00
Nathan Flurry
4111aebfce
feat(foundry): task owner git auth + manual owner change UI (#263)
* Add task owner git auth proposal and sandbox architecture docs

- Add proposal for primary user per task with OAuth token injection
  for sandbox git operations (.context/proposal-task-owner-git-auth.md)
- Document sandbox architecture constraints in CLAUDE.md: single sandbox
  per task assumption, OAuth token security implications, git auto-auth
  requirement, and git error surfacing rules

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Add proposals for reverting to queues and rivetkit sandbox resilience

- proposal-revert-actions-to-queues.md: Detailed plan for reverting the
  actions-only pattern back to queues/workflows now that the RivetKit
  queue.iter() bug is fixed. Lists what to keep (lazy tasks, resolveTaskRepoId,
  sync override threading, E2B fixes, frontend fixes) vs what to revert
  (communication pattern only).

- proposal-rivetkit-sandbox-resilience.md: Rivetkit sandbox actor changes for
  handling destroyed/paused sandboxes, keep-alive, and the UNIQUE constraint
  crash fix.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat(foundry): add manual task owner change via UI dropdown

Add an owner dropdown to the Overview tab that lets users reassign task
ownership to any organization member. The owner's GitHub credentials are
used for git operations in the sandbox.

Full-stack implementation:
- Backend: changeTaskOwnerManually action on task actor, routed through
  org actor's changeWorkspaceTaskOwner action, with primaryUser schema
  columns on both task and org index tables
- Client: changeOwner method on workspace client (mock + remote)
- Frontend: owner dropdown in right sidebar Overview tab showing org
  members, with avatar and role display
- Shared: TaskWorkspaceChangeOwnerInput type and primaryUser fields on
  workspace snapshot types

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 17:05:11 -07:00
Nathan Flurry
167712ace7 chore(release): update version to 0.4.1-rc.1 2026-03-16 15:53:00 -07:00
Nathan Flurry
9ce71c03c8
Merge pull request #261 from rivet-dev/e2b-autopause-provider
feat: add E2B auto-pause provider lifecycle support
2026-03-16 15:39:45 -07:00
Nathan Flurry
f45a467484
chore(foundry): migrate to actions (#262)
* feat(foundry): checkpoint actor and workspace refactor

* docs(foundry): add agent handoff context

* wip(foundry): continue actor refactor

* wip(foundry): capture remaining local changes

* Complete Foundry refactor checklist

* Fix Foundry validation fallout

* wip

* wip: convert all actors from workflow to plain run handlers

Workaround for RivetKit bug where c.queue.iter() never yields messages
for actors created via getOrCreate from another actor's context. The
queue accepts messages (visible in inspector) but the iterator hangs.
Sleep/wake fixes it, but actors with active connections never sleep.

Converted organization, github-data, task, and user actors from
run: workflow(...) to plain run: async (c) => { for await ... }.

Also fixes:
- Missing auth tables in org migration (auth_verification etc)
- default_model NOT NULL constraint on org profile upsert
- Nested workflow step in github-data (HistoryDivergedError)
- Removed --force from frontend Dockerfile pnpm install

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Convert all actors from queues/workflows to direct actions, lazy task creation

Major refactor replacing all queue-based workflow communication with direct
RivetKit action calls across all actors. This works around a RivetKit bug
where c.queue.iter() deadlocks for actors created from another actor's context.

Key changes:
- All actors (organization, task, user, audit-log, github-data) converted
  from run: workflow(...) to actions-only (no run handler, no queues)
- PR sync creates virtual task entries in org local DB instead of spawning
  task actors — prevents OOM from 200+ actors created simultaneously
- Task actors created lazily on first user interaction via getOrCreate,
  self-initialize from org's getTaskIndexEntry data
- Removed requireRepoExists cross-actor call (caused 500s), replaced with
  local resolveTaskRepoId from org's taskIndex table
- Fixed getOrganizationContext to thread overrides through all sync phases
- Fixed sandbox repo path (/home/user/repo for E2B compatibility)
- Fixed buildSessionDetail to skip transcript fetch for pending sessions
- Added process crash protection (uncaughtException/unhandledRejection)
- Fixed React infinite render loop in mock-layout useEffect dependencies
- Added sandbox listProcesses error handling for expired E2B sandboxes
- Set E2B sandbox timeout to 1 hour (was 5 min default)
- Updated CLAUDE.md with lazy task creation rules, no-silent-catch policy,
  React hook dependency safety rules

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Fix E2B sandbox timeout comment, frontend stability, and create-flow improvements

- Add TEMPORARY comment on E2B timeoutMs with pointer to rivetkit sandbox
  resilience proposal for when autoPause lands
- Fix React useEffect dependency stability in mock-layout and
  organization-dashboard to prevent infinite re-render loops
- Fix terminal-pane ref handling
- Improve create-flow service and tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 15:23:59 -07:00
Nathan Flurry
77c8f1e3f3 feat: add E2B auto-pause support with pause/kill/reconnect provider lifecycle
Add `pause()`, `kill()`, and `reconnect()` methods to the SandboxProvider interface so providers can support graceful suspension and permanent deletion as distinct operations. The E2B provider now uses `betaCreate` with `autoPause: true` by default, `betaPause()` for suspension, and surfaces `SandboxDestroyedError` on reconnect to a deleted sandbox. SDK exposes `pauseSandbox()` and `killSandbox()` alongside the existing `destroySandbox()`.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 14:57:49 -07:00
Nathan Flurry
32f3c6c3bc chore(release): update version to 0.4.0 2026-03-16 00:48:05 -07:00
314 changed files with 30667 additions and 10524 deletions

View file

@ -43,7 +43,7 @@ Manually verify the install script works in a fresh environment:
```bash ```bash
docker run --rm alpine:latest sh -c " docker run --rm alpine:latest sh -c "
apk add --no-cache curl ca-certificates libstdc++ libgcc bash && apk add --no-cache curl ca-certificates libstdc++ libgcc bash &&
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh && curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh &&
sandbox-agent --version sandbox-agent --version
" "
``` ```

1
.gitignore vendored
View file

@ -59,3 +59,4 @@ sdks/cli/platforms/*/bin/
# Foundry desktop app build artifacts # Foundry desktop app build artifacts
foundry/packages/desktop/frontend-dist/ foundry/packages/desktop/frontend-dist/
foundry/packages/desktop/src-tauri/sidecars/ foundry/packages/desktop/src-tauri/sidecars/
.context/

View file

@ -20,20 +20,7 @@
- For HTTP/CLI docs/examples, source of truth is: - For HTTP/CLI docs/examples, source of truth is:
- `server/packages/sandbox-agent/src/router.rs` - `server/packages/sandbox-agent/src/router.rs`
- `server/packages/sandbox-agent/src/cli.rs` - `server/packages/sandbox-agent/src/cli.rs`
- Keep docs aligned to implemented endpoints/commands only (for example ACP under `/v1/acp`, not legacy `/v1/sessions` APIs). - Keep docs aligned to implemented endpoints/commands only (for example ACP under `/v1/acp`, not legacy session REST APIs).
## E2E Agent Testing
- When asked to test agents e2e and you do not have the API tokens/credentials required, always stop and ask the user where to find the tokens before proceeding.
## ACP Adapter Audit
- `scripts/audit-acp-deps/adapters.json` is the single source of truth for ACP adapter npm packages, pinned versions, and the `@agentclientprotocol/sdk` pin.
- The Rust fallback install path in `server/packages/agent-management/src/agents.rs` reads adapter entries from `adapters.json` at compile time via `include_str!`.
- Run `cd scripts/audit-acp-deps && npx tsx audit.ts` to compare our pinned versions against the ACP registry and npm latest.
- When bumping an adapter version, update `adapters.json` only — the Rust code picks it up automatically.
- When adding a new agent, add an entry to `adapters.json` (the `_` fallback arm in `install_agent_process_fallback` handles it).
- When updating the `@agentclientprotocol/sdk` pin, update both `adapters.json` (sdkDeps) and `sdks/acp-http-client/package.json`.
## Change Tracking ## Change Tracking
@ -43,41 +30,22 @@
- Regenerate `docs/openapi.json` when HTTP contracts change. - Regenerate `docs/openapi.json` when HTTP contracts change.
- Keep `docs/inspector.mdx` and `docs/sdks/typescript.mdx` aligned with implementation. - Keep `docs/inspector.mdx` and `docs/sdks/typescript.mdx` aligned with implementation.
- Append blockers/decisions to `research/acp/friction.md` during ACP work. - Append blockers/decisions to `research/acp/friction.md` during ACP work.
- Each agent has its own doc page at `docs/agents/<name>.mdx` listing models, modes, and thought levels. Update the relevant page when changing `fallback_config_options`. To regenerate capability data, run `cd scripts/agent-configs && npx tsx dump.ts`. Source data: `scripts/agent-configs/resources/*.json` and hardcoded entries in `server/packages/sandbox-agent/src/router/support.rs` (`fallback_config_options`). - `docs/agent-capabilities.mdx` lists models/modes/thought levels per agent. Update it when adding a new agent or changing `fallback_config_options`. If its "Last updated" date is >2 weeks old, re-run `cd scripts/agent-configs && npx tsx dump.ts` and update the doc to match. Source data: `scripts/agent-configs/resources/*.json` and hardcoded entries in `server/packages/sandbox-agent/src/router/support.rs` (`fallback_config_options`).
- Some agent models are gated by subscription (e.g. Claude `opus`). The live report only shows models available to the current credentials. The static doc and JSON resource files should list all known models regardless of subscription tier. - Some agent models are gated by subscription (e.g. Claude `opus`). The live report only shows models available to the current credentials. The static doc and JSON resource files should list all known models regardless of subscription tier.
## Adding Providers ## Docker Test Image
When adding a new sandbox provider, update all of the following: - Docker-backed Rust and TypeScript tests build `docker/test-agent/Dockerfile` directly in-process and cache the image tag only in memory (`OnceLock` in Rust, module-level variable in TypeScript).
- Do not add cross-process image-build scripts unless there is a concrete need for them.
- `sdks/typescript/src/providers/<name>.ts` — provider implementation ## Common Software Sync
- `sdks/typescript/package.json` — add `./<name>` export, peerDependencies, peerDependenciesMeta, devDependencies
- `sdks/typescript/tsup.config.ts` — add entry point and external
- `sdks/typescript/tests/providers.test.ts` — add test entry
- `examples/<name>/` — create example with `src/index.ts` and `tests/<name>.test.ts`
- `docs/deploy/<name>.mdx` — create deploy guide
- `docs/docs.json` — add to Deploy pages navigation
- `docs/quickstart.mdx` — add tab in "Start the sandbox" step, add credentials entry in "Passing LLM credentials" accordion
## Adding Agents - These three files must stay in sync:
- `docs/common-software.mdx` (user-facing documentation)
When adding a new agent, update all of the following: - `docker/test-common-software/Dockerfile` (packages installed in the test image)
- `server/packages/sandbox-agent/tests/common_software.rs` (test assertions)
- `docs/agents/<name>.mdx` — create agent page with usage snippet and capabilities table - When adding or removing software from `docs/common-software.mdx`, also add/remove the corresponding `apt-get install` line in the Dockerfile and add/remove the test in `common_software.rs`.
- `docs/docs.json` — add to the Agents group under Agent - Run `cargo test -p sandbox-agent --test common_software` to verify.
- `docs/quickstart.mdx` — add tab in the "Create a session and send a prompt" CodeGroup
## Persist Packages (Deprecated)
- The `@sandbox-agent/persist-*` npm packages (`persist-sqlite`, `persist-postgres`, `persist-indexeddb`, `persist-rivet`) are deprecated stubs. They still publish to npm but throw a deprecation error at import time.
- Driver implementations now live inline in examples and consuming packages:
- SQLite: `examples/persist-sqlite/src/persist.ts`
- Postgres: `examples/persist-postgres/src/persist.ts`
- IndexedDB: `frontend/packages/inspector/src/persist-indexeddb.ts`
- Rivet: inlined in `docs/multiplayer.mdx`
- In-memory: built into the main `sandbox-agent` SDK (`InMemorySessionPersistDriver`)
- Docs (`docs/session-persistence.mdx`) link to the example implementations on GitHub instead of referencing the packages.
- Do not re-add `@sandbox-agent/persist-*` as dependencies anywhere. New persist drivers should be copied into the consuming project directly.
## Install Version References ## Install Version References
@ -93,28 +61,20 @@ When adding a new agent, update all of the following:
- `docs/sdk-overview.mdx` - `docs/sdk-overview.mdx`
- `docs/react-components.mdx` - `docs/react-components.mdx`
- `docs/session-persistence.mdx` - `docs/session-persistence.mdx`
- `docs/architecture.mdx`
- `docs/deploy/local.mdx` - `docs/deploy/local.mdx`
- `docs/deploy/cloudflare.mdx` - `docs/deploy/cloudflare.mdx`
- `docs/deploy/vercel.mdx` - `docs/deploy/vercel.mdx`
- `docs/deploy/daytona.mdx` - `docs/deploy/daytona.mdx`
- `docs/deploy/e2b.mdx` - `docs/deploy/e2b.mdx`
- `docs/deploy/docker.mdx` - `docs/deploy/docker.mdx`
- `docs/deploy/boxlite.mdx`
- `docs/deploy/modal.mdx`
- `docs/deploy/computesdk.mdx`
- `frontend/packages/website/src/components/GetStarted.tsx` - `frontend/packages/website/src/components/GetStarted.tsx`
- `.claude/commands/post-release-testing.md` - `.claude/commands/post-release-testing.md`
- `examples/cloudflare/Dockerfile` - `examples/cloudflare/Dockerfile`
- `examples/boxlite/Dockerfile`
- `examples/boxlite-python/Dockerfile`
- `examples/daytona/src/index.ts` - `examples/daytona/src/index.ts`
- `examples/shared/src/docker.ts` - `examples/shared/src/docker.ts`
- `examples/docker/src/index.ts` - `examples/docker/src/index.ts`
- `examples/e2b/src/index.ts` - `examples/e2b/src/index.ts`
- `examples/vercel/src/index.ts` - `examples/vercel/src/index.ts`
- `sdks/typescript/src/providers/shared.ts`
- `scripts/release/main.ts` - `scripts/release/main.ts`
- `scripts/release/promote-artifacts.ts` - `scripts/release/promote-artifacts.ts`
- `scripts/release/sdk.ts` - `scripts/release/sdk.ts`
- `scripts/sandbox-testing/test-sandbox.ts`

View file

@ -4,7 +4,7 @@ members = ["server/packages/*", "gigacode"]
exclude = ["factory/packages/desktop/src-tauri", "foundry/packages/desktop/src-tauri"] exclude = ["factory/packages/desktop/src-tauri", "foundry/packages/desktop/src-tauri"]
[workspace.package] [workspace.package]
version = "0.4.0-rc.3" version = "0.4.2"
edition = "2021" edition = "2021"
authors = [ "Rivet Gaming, LLC <developer@rivet.gg>" ] authors = [ "Rivet Gaming, LLC <developer@rivet.gg>" ]
license = "Apache-2.0" license = "Apache-2.0"
@ -13,13 +13,13 @@ description = "Universal API for automatic coding agents in sandboxes. Supports
[workspace.dependencies] [workspace.dependencies]
# Internal crates # Internal crates
sandbox-agent = { version = "0.4.0-rc.3", path = "server/packages/sandbox-agent" } sandbox-agent = { version = "0.4.2", path = "server/packages/sandbox-agent" }
sandbox-agent-error = { version = "0.4.0-rc.3", path = "server/packages/error" } sandbox-agent-error = { version = "0.4.2", path = "server/packages/error" }
sandbox-agent-agent-management = { version = "0.4.0-rc.3", path = "server/packages/agent-management" } sandbox-agent-agent-management = { version = "0.4.2", path = "server/packages/agent-management" }
sandbox-agent-agent-credentials = { version = "0.4.0-rc.3", path = "server/packages/agent-credentials" } sandbox-agent-agent-credentials = { version = "0.4.2", path = "server/packages/agent-credentials" }
sandbox-agent-opencode-adapter = { version = "0.4.0-rc.3", path = "server/packages/opencode-adapter" } sandbox-agent-opencode-adapter = { version = "0.4.2", path = "server/packages/opencode-adapter" }
sandbox-agent-opencode-server-manager = { version = "0.4.0-rc.3", path = "server/packages/opencode-server-manager" } sandbox-agent-opencode-server-manager = { version = "0.4.2", path = "server/packages/opencode-server-manager" }
acp-http-adapter = { version = "0.4.0-rc.3", path = "server/packages/acp-http-adapter" } acp-http-adapter = { version = "0.4.2", path = "server/packages/acp-http-adapter" }
# Serialization # Serialization
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View file

@ -80,11 +80,11 @@ Import the SDK directly into your Node or browser application. Full type safety
**Install** **Install**
```bash ```bash
npm install sandbox-agent@0.3.x npm install sandbox-agent@0.4.x
``` ```
```bash ```bash
bun add sandbox-agent@0.3.x bun add sandbox-agent@0.4.x
# Optional: allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). # Optional: allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()).
bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
``` ```
@ -135,7 +135,7 @@ Run as an HTTP server and connect from any language. Deploy to E2B, Daytona, Ver
```bash ```bash
# Install it # Install it
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
# Run it # Run it
sandbox-agent server --token "$SANDBOX_TOKEN" --host 127.0.0.1 --port 2468 sandbox-agent server --token "$SANDBOX_TOKEN" --host 127.0.0.1 --port 2468
``` ```
@ -159,12 +159,12 @@ sandbox-agent server --no-token --host 127.0.0.1 --port 2468
Install the CLI wrapper (optional but convenient): Install the CLI wrapper (optional but convenient):
```bash ```bash
npm install -g @sandbox-agent/cli@0.3.x npm install -g @sandbox-agent/cli@0.4.x
``` ```
```bash ```bash
# Allow Bun to run postinstall scripts for native binaries. # Allow Bun to run postinstall scripts for native binaries.
bun add -g @sandbox-agent/cli@0.3.x bun add -g @sandbox-agent/cli@0.4.x
bun pm -g trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 bun pm -g trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
``` ```
@ -179,11 +179,11 @@ sandbox-agent api sessions send-message-stream my-session --message "Hello" --en
You can also use npx like: You can also use npx like:
```bash ```bash
npx @sandbox-agent/cli@0.3.x --help npx @sandbox-agent/cli@0.4.x --help
``` ```
```bash ```bash
bunx @sandbox-agent/cli@0.3.x --help bunx @sandbox-agent/cli@0.4.x --help
``` ```
[CLI documentation](https://sandboxagent.dev/docs/cli) [CLI documentation](https://sandboxagent.dev/docs/cli)

View file

@ -0,0 +1,7 @@
FROM node:22-bookworm-slim
RUN npm install -g pnpm@10.28.2
WORKDIR /app
CMD ["bash", "-lc", "pnpm install --filter @sandbox-agent/inspector... && cd frontend/packages/inspector && exec pnpm vite --host 0.0.0.0 --port 5173"]

View file

@ -149,7 +149,8 @@ FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
ca-certificates \ ca-certificates \
curl \ curl \
git && \ git \
ffmpeg && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Copy the binary from builder # Copy the binary from builder

View file

@ -0,0 +1,61 @@
FROM rust:1.88.0-bookworm AS builder
WORKDIR /build
COPY Cargo.toml Cargo.lock ./
COPY server/ ./server/
COPY gigacode/ ./gigacode/
COPY resources/agent-schemas/artifacts/ ./resources/agent-schemas/artifacts/
COPY scripts/agent-configs/ ./scripts/agent-configs/
COPY scripts/audit-acp-deps/ ./scripts/audit-acp-deps/
ENV SANDBOX_AGENT_SKIP_INSPECTOR=1
RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \
--mount=type=cache,target=/build/target \
cargo build -p sandbox-agent --release && \
cp target/release/sandbox-agent /sandbox-agent
# Extract neko binary from the official image for WebRTC desktop streaming.
# Using neko v3 base image from GHCR which provides multi-arch support (amd64, arm64).
# Pinned by digest to prevent breaking changes from upstream.
# Reference client: https://github.com/demodesk/neko-client/blob/37f93eae6bd55b333c94bd009d7f2b079075a026/src/component/internal/webrtc.ts
FROM ghcr.io/m1k1o/neko/base@sha256:0c384afa56268aaa2d5570211d284763d0840dcdd1a7d9a24be3081d94d3dfce AS neko-base
FROM node:22-bookworm-slim
RUN apt-get update -qq && \
apt-get install -y -qq --no-install-recommends \
ca-certificates \
bash \
libstdc++6 \
xvfb \
openbox \
xdotool \
imagemagick \
ffmpeg \
gstreamer1.0-tools \
gstreamer1.0-plugins-base \
gstreamer1.0-plugins-good \
gstreamer1.0-plugins-bad \
gstreamer1.0-plugins-ugly \
gstreamer1.0-nice \
gstreamer1.0-x \
gstreamer1.0-pulseaudio \
libxcvt0 \
x11-xserver-utils \
dbus-x11 \
xauth \
fonts-dejavu-core \
xterm \
> /dev/null 2>&1 && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder /sandbox-agent /usr/local/bin/sandbox-agent
COPY --from=neko-base /usr/bin/neko /usr/local/bin/neko
EXPOSE 3000
# Expose UDP port range for WebRTC media transport
EXPOSE 59050-59070/udp
ENTRYPOINT ["/usr/local/bin/sandbox-agent"]
CMD ["server", "--host", "0.0.0.0", "--port", "3000", "--no-token"]

View file

@ -0,0 +1,37 @@
# Extends the base test-agent image with common software pre-installed.
# Used by the common_software integration test to verify that all documented
# software in docs/common-software.mdx works correctly inside the sandbox.
#
# KEEP IN SYNC with docs/common-software.mdx
ARG BASE_IMAGE=sandbox-agent-test:dev
FROM ${BASE_IMAGE}
USER root
RUN apt-get update -qq && \
apt-get install -y -qq --no-install-recommends \
# Browsers
chromium \
firefox-esr \
# Languages
python3 python3-pip python3-venv \
default-jdk \
ruby-full \
# Databases
sqlite3 \
redis-server \
# Build tools
build-essential cmake pkg-config \
# CLI tools
git jq tmux \
# Media and graphics
imagemagick \
poppler-utils \
# Desktop apps
gimp \
> /dev/null 2>&1 && \
rm -rf /var/lib/apt/lists/*
ENTRYPOINT ["/usr/local/bin/sandbox-agent"]
CMD ["server", "--host", "0.0.0.0", "--port", "3000", "--no-token"]

View file

@ -51,6 +51,108 @@ await session.prompt([
unsubscribe(); unsubscribe();
``` ```
### Event types
Each event's `payload` contains a session update. The `sessionUpdate` field identifies the type.
<AccordionGroup>
<Accordion title="agent_message_chunk">
Streamed text or content from the agent's response.
```json
{
"sessionUpdate": "agent_message_chunk",
"content": { "type": "text", "text": "Here's how the repository is structured..." }
}
```
</Accordion>
<Accordion title="agent_thought_chunk">
Internal reasoning from the agent (chain-of-thought / extended thinking).
```json
{
"sessionUpdate": "agent_thought_chunk",
"content": { "type": "text", "text": "I should start by looking at the project structure..." }
}
```
</Accordion>
<Accordion title="user_message_chunk">
Echo of the user's prompt being processed.
```json
{
"sessionUpdate": "user_message_chunk",
"content": { "type": "text", "text": "Summarize the repository structure." }
}
```
</Accordion>
<Accordion title="tool_call">
The agent invoked a tool (file edit, terminal command, etc.).
```json
{
"sessionUpdate": "tool_call",
"toolCallId": "tc_abc123",
"title": "Read file",
"status": "in_progress",
"rawInput": { "path": "/src/index.ts" }
}
```
</Accordion>
<Accordion title="tool_call_update">
Progress or result update for an in-progress tool call.
```json
{
"sessionUpdate": "tool_call_update",
"toolCallId": "tc_abc123",
"status": "completed",
"content": [{ "type": "text", "text": "import express from 'express';\n..." }]
}
```
</Accordion>
<Accordion title="plan">
The agent's execution plan for the current task.
```json
{
"sessionUpdate": "plan",
"entries": [
{ "content": "Read the project structure", "status": "completed" },
{ "content": "Identify main entrypoints", "status": "in_progress" },
{ "content": "Write summary", "status": "pending" }
]
}
```
</Accordion>
<Accordion title="usage_update">
Token usage metrics for the current turn.
```json
{
"sessionUpdate": "usage_update"
}
```
</Accordion>
<Accordion title="session_info_update">
Session metadata changed (e.g. agent-generated title).
```json
{
"sessionUpdate": "session_info_update",
"title": "Repository structure analysis"
}
```
</Accordion>
</AccordionGroup>
## Fetch persisted event history ## Fetch persisted event history
```ts ```ts

View file

@ -56,7 +56,7 @@ Agents are installed lazily on first use. To avoid the cold-start delay, pre-ins
sandbox-agent install-agent --all sandbox-agent install-agent --all
``` ```
The `rivetdev/sandbox-agent:0.4.0-rc.3-full` Docker image ships with all agents pre-installed. The `rivetdev/sandbox-agent:0.4.2-full` Docker image ships with all agents pre-installed.
## Production-ready agent orchestration ## Production-ready agent orchestration

View file

@ -37,6 +37,36 @@ Notes:
- Set `SANDBOX_AGENT_LOG_STDOUT=1` to force stdout/stderr logging. - Set `SANDBOX_AGENT_LOG_STDOUT=1` to force stdout/stderr logging.
- Use `SANDBOX_AGENT_LOG_DIR` to override log directory. - Use `SANDBOX_AGENT_LOG_DIR` to override log directory.
## install
Install first-party runtime dependencies.
### install desktop
Install the Linux desktop runtime packages required by `/v1/desktop/*`.
```bash
sandbox-agent install desktop [OPTIONS]
```
| Option | Description |
|--------|-------------|
| `--yes` | Skip the confirmation prompt |
| `--print-only` | Print the package-manager command without executing it |
| `--package-manager <apt\|dnf\|apk>` | Override package-manager detection |
| `--no-fonts` | Skip the default DejaVu font package |
```bash
sandbox-agent install desktop --yes
sandbox-agent install desktop --print-only
```
Notes:
- Supported on Linux only.
- The command detects `apt`, `dnf`, or `apk`.
- If the host is not already running as root, the command requires `sudo`.
## install-agent ## install-agent
Install or reinstall a single agent, or every supported agent with `--all`. Install or reinstall a single agent, or every supported agent with `--all`.

560
docs/common-software.mdx Normal file
View file

@ -0,0 +1,560 @@
---
title: "Common Software"
description: "Install browsers, languages, databases, and other tools inside the sandbox."
sidebarTitle: "Common Software"
icon: "box-open"
---
The sandbox runs a Debian/Ubuntu base image. You can install software with `apt-get` via the [Process API](/processes) or by customizing your Docker image. This page covers commonly needed packages and how to install them.
## Browsers
### Chromium
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "chromium", "chromium-sandbox"],
});
// Launch headless
await sdk.runProcess({
command: "chromium",
args: ["--headless", "--no-sandbox", "--disable-gpu", "https://example.com"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","chromium","chromium-sandbox"]}'
```
</CodeGroup>
<Note>
Use `--no-sandbox` when running Chromium inside a container. The container itself provides isolation.
</Note>
### Firefox
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "firefox-esr"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","firefox-esr"]}'
```
</CodeGroup>
### Playwright browsers
Playwright bundles its own browser binaries. Install the Playwright CLI and let it download browsers for you.
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "npx",
args: ["playwright", "install", "--with-deps", "chromium"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"npx","args":["playwright","install","--with-deps","chromium"]}'
```
</CodeGroup>
---
## Languages and runtimes
### Node.js
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "nodejs", "npm"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","nodejs","npm"]}'
```
</CodeGroup>
For a specific version, use [nvm](https://github.com/nvm-sh/nvm):
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash && . ~/.nvm/nvm.sh && nvm install 22"],
});
```
### Python
Python 3 is typically pre-installed. To add pip and common packages:
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "python3", "python3-pip", "python3-venv"],
});
await sdk.runProcess({
command: "pip3",
args: ["install", "numpy", "pandas", "matplotlib"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","python3","python3-pip","python3-venv"]}'
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"pip3","args":["install","numpy","pandas","matplotlib"]}'
```
</CodeGroup>
### Go
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl -fsSL https://go.dev/dl/go1.23.6.linux-amd64.tar.gz | tar -C /usr/local -xz"],
});
// Add to PATH for subsequent commands
await sdk.runProcess({
command: "bash",
args: ["-c", "export PATH=$PATH:/usr/local/go/bin && go version"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"bash","args":["-c","curl -fsSL https://go.dev/dl/go1.23.6.linux-amd64.tar.gz | tar -C /usr/local -xz"]}'
```
</CodeGroup>
### Rust
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"bash","args":["-c","curl --proto =https --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y"]}'
```
</CodeGroup>
### Java (OpenJDK)
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "default-jdk"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","default-jdk"]}'
```
</CodeGroup>
### Ruby
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "ruby-full"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","ruby-full"]}'
```
</CodeGroup>
---
## Databases
### PostgreSQL
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "postgresql", "postgresql-client"],
});
// Start the service
const proc = await sdk.createProcess({
command: "bash",
args: ["-c", "su - postgres -c 'pg_ctlcluster 15 main start'"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","postgresql","postgresql-client"]}'
```
</CodeGroup>
### SQLite
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "sqlite3"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","sqlite3"]}'
```
</CodeGroup>
### Redis
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "redis-server"],
});
const proc = await sdk.createProcess({
command: "redis-server",
args: ["--daemonize", "no"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","redis-server"]}'
curl -X POST "http://127.0.0.1:2468/v1/processes" \
-H "Content-Type: application/json" \
-d '{"command":"redis-server","args":["--daemonize","no"]}'
```
</CodeGroup>
### MySQL / MariaDB
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "mariadb-server", "mariadb-client"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","mariadb-server","mariadb-client"]}'
```
</CodeGroup>
---
## Build tools
### Essential build toolchain
Most compiled software needs the standard build toolchain:
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "build-essential", "cmake", "pkg-config"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","build-essential","cmake","pkg-config"]}'
```
</CodeGroup>
This installs `gcc`, `g++`, `make`, `cmake`, and related tools.
---
## Desktop applications
These require the [Computer Use](/computer-use) desktop to be started first.
### LibreOffice
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "libreoffice"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","libreoffice"]}'
```
</CodeGroup>
### GIMP
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "gimp"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","gimp"]}'
```
</CodeGroup>
### VLC
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "vlc"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","vlc"]}'
```
</CodeGroup>
### VS Code (code-server)
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl -fsSL https://code-server.dev/install.sh | sh"],
});
const proc = await sdk.createProcess({
command: "code-server",
args: ["--bind-addr", "0.0.0.0:8080", "--auth", "none"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"bash","args":["-c","curl -fsSL https://code-server.dev/install.sh | sh"]}'
curl -X POST "http://127.0.0.1:2468/v1/processes" \
-H "Content-Type: application/json" \
-d '{"command":"code-server","args":["--bind-addr","0.0.0.0:8080","--auth","none"]}'
```
</CodeGroup>
---
## CLI tools
### Git
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "git"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","git"]}'
```
</CodeGroup>
### Docker
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "bash",
args: ["-c", "curl -fsSL https://get.docker.com | sh"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"bash","args":["-c","curl -fsSL https://get.docker.com | sh"]}'
```
</CodeGroup>
### jq
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "jq"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","jq"]}'
```
</CodeGroup>
### tmux
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "tmux"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","tmux"]}'
```
</CodeGroup>
---
## Media and graphics
### FFmpeg
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "ffmpeg"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","ffmpeg"]}'
```
</CodeGroup>
### ImageMagick
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "imagemagick"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","imagemagick"]}'
```
</CodeGroup>
### Poppler (PDF utilities)
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "poppler-utils"],
});
// Convert PDF to images
await sdk.runProcess({
command: "pdftoppm",
args: ["-png", "document.pdf", "output"],
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","poppler-utils"]}'
```
</CodeGroup>
---
## Pre-installing in a Docker image
For production use, install software in your Dockerfile instead of at runtime. This avoids repeated downloads and makes startup faster.
```dockerfile
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y \
chromium \
firefox-esr \
nodejs npm \
python3 python3-pip \
git curl wget \
build-essential \
sqlite3 \
ffmpeg \
imagemagick \
jq \
&& rm -rf /var/lib/apt/lists/*
RUN pip3 install numpy pandas matplotlib
```
See [Docker deployment](/deploy/docker) for how to use custom images with Sandbox Agent.

859
docs/computer-use.mdx Normal file
View file

@ -0,0 +1,859 @@
---
title: "Computer Use"
description: "Control a virtual desktop inside the sandbox with mouse, keyboard, screenshots, recordings, and live streaming."
sidebarTitle: "Computer Use"
icon: "desktop"
---
Sandbox Agent provides a managed virtual desktop (Xvfb + openbox) that you can control programmatically. This is useful for browser automation, GUI testing, and AI computer-use workflows.
## Start and stop
<CodeGroup>
```ts TypeScript
import { SandboxAgent } from "sandbox-agent";
const sdk = await SandboxAgent.connect({
baseUrl: "http://127.0.0.1:2468",
});
const status = await sdk.startDesktop({
width: 1920,
height: 1080,
dpi: 96,
});
console.log(status.state); // "active"
console.log(status.display); // ":99"
// When done
await sdk.stopDesktop();
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/start" \
-H "Content-Type: application/json" \
-d '{"width":1920,"height":1080,"dpi":96}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/stop"
```
</CodeGroup>
All fields in the start request are optional. Defaults are 1440x900 at 96 DPI.
### Start request options
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| `width` | number | 1440 | Desktop width in pixels |
| `height` | number | 900 | Desktop height in pixels |
| `dpi` | number | 96 | Display DPI |
| `displayNum` | number | 99 | Starting X display number. The runtime probes from this number upward to find an available display. |
| `stateDir` | string | (auto) | Desktop state directory for home, logs, recordings |
| `streamVideoCodec` | string | `"vp8"` | WebRTC video codec (`vp8`, `vp9`, `h264`) |
| `streamAudioCodec` | string | `"opus"` | WebRTC audio codec (`opus`, `g722`) |
| `streamFrameRate` | number | 30 | Streaming frame rate (1-60) |
| `webrtcPortRange` | string | `"59050-59070"` | UDP port range for WebRTC media |
| `recordingFps` | number | 30 | Default recording FPS when not specified in `startDesktopRecording` (1-60) |
The streaming and recording options configure defaults for the desktop session. They take effect when streaming or recording is started later.
<CodeGroup>
```ts TypeScript
const status = await sdk.startDesktop({
width: 1920,
height: 1080,
streamVideoCodec: "h264",
streamFrameRate: 60,
webrtcPortRange: "59100-59120",
recordingFps: 15,
});
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/start" \
-H "Content-Type: application/json" \
-d '{
"width": 1920,
"height": 1080,
"streamVideoCodec": "h264",
"streamFrameRate": 60,
"webrtcPortRange": "59100-59120",
"recordingFps": 15
}'
```
</CodeGroup>
## Status
<CodeGroup>
```ts TypeScript
const status = await sdk.getDesktopStatus();
console.log(status.state); // "inactive" | "active" | "failed" | ...
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/status"
```
</CodeGroup>
## Screenshots
Capture the full desktop or a specific region. Optionally include the cursor position.
<CodeGroup>
```ts TypeScript
// Full screenshot (PNG by default)
const png = await sdk.takeDesktopScreenshot();
// JPEG at 70% quality, half scale
const jpeg = await sdk.takeDesktopScreenshot({
format: "jpeg",
quality: 70,
scale: 0.5,
});
// Include cursor overlay
const withCursor = await sdk.takeDesktopScreenshot({
showCursor: true,
});
// Region screenshot
const region = await sdk.takeDesktopRegionScreenshot({
x: 100,
y: 100,
width: 400,
height: 300,
});
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/screenshot" --output screenshot.png
curl "http://127.0.0.1:2468/v1/desktop/screenshot?format=jpeg&quality=70&scale=0.5" \
--output screenshot.jpg
# Include cursor overlay
curl "http://127.0.0.1:2468/v1/desktop/screenshot?show_cursor=true" \
--output with_cursor.png
curl "http://127.0.0.1:2468/v1/desktop/screenshot/region?x=100&y=100&width=400&height=300" \
--output region.png
```
</CodeGroup>
### Screenshot options
| Param | Type | Default | Description |
|-------|------|---------|-------------|
| `format` | string | `"png"` | Output format: `png`, `jpeg`, or `webp` |
| `quality` | number | 85 | Compression quality (1-100, JPEG/WebP only) |
| `scale` | number | 1.0 | Scale factor (0.1-1.0) |
| `showCursor` | boolean | `false` | Composite a crosshair at the cursor position |
When `showCursor` is enabled, the cursor position is captured at the moment of the screenshot and a red crosshair is drawn at that location. This is useful for AI agents that need to see where the cursor is in the screenshot.
## Mouse
<CodeGroup>
```ts TypeScript
// Get current position
const pos = await sdk.getDesktopMousePosition();
console.log(pos.x, pos.y);
// Move
await sdk.moveDesktopMouse({ x: 500, y: 300 });
// Click (left by default)
await sdk.clickDesktop({ x: 500, y: 300 });
// Right click
await sdk.clickDesktop({ x: 500, y: 300, button: "right" });
// Double click
await sdk.clickDesktop({ x: 500, y: 300, clickCount: 2 });
// Drag
await sdk.dragDesktopMouse({
startX: 100, startY: 100,
endX: 400, endY: 400,
});
// Scroll
await sdk.scrollDesktop({ x: 500, y: 300, deltaY: -3 });
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/mouse/position"
curl -X POST "http://127.0.0.1:2468/v1/desktop/mouse/click" \
-H "Content-Type: application/json" \
-d '{"x":500,"y":300}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/mouse/drag" \
-H "Content-Type: application/json" \
-d '{"startX":100,"startY":100,"endX":400,"endY":400}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/mouse/scroll" \
-H "Content-Type: application/json" \
-d '{"x":500,"y":300,"deltaY":-3}'
```
</CodeGroup>
## Keyboard
<CodeGroup>
```ts TypeScript
// Type text
await sdk.typeDesktopText({ text: "Hello, world!" });
// Press a key with modifiers
await sdk.pressDesktopKey({
key: "c",
modifiers: { ctrl: true },
});
// Low-level key down/up
await sdk.keyDownDesktop({ key: "Shift_L" });
await sdk.keyUpDesktop({ key: "Shift_L" });
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/keyboard/type" \
-H "Content-Type: application/json" \
-d '{"text":"Hello, world!"}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/keyboard/press" \
-H "Content-Type: application/json" \
-d '{"key":"c","modifiers":{"ctrl":true}}'
```
</CodeGroup>
## Clipboard
Read and write the X11 clipboard programmatically.
<CodeGroup>
```ts TypeScript
// Read clipboard
const clipboard = await sdk.getDesktopClipboard();
console.log(clipboard.text);
// Read primary selection (mouse-selected text)
const primary = await sdk.getDesktopClipboard({ selection: "primary" });
// Write to clipboard
await sdk.setDesktopClipboard({ text: "Pasted via API" });
// Write to both clipboard and primary selection
await sdk.setDesktopClipboard({
text: "Synced text",
selection: "both",
});
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/clipboard"
curl "http://127.0.0.1:2468/v1/desktop/clipboard?selection=primary"
curl -X POST "http://127.0.0.1:2468/v1/desktop/clipboard" \
-H "Content-Type: application/json" \
-d '{"text":"Pasted via API"}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/clipboard" \
-H "Content-Type: application/json" \
-d '{"text":"Synced text","selection":"both"}'
```
</CodeGroup>
The `selection` parameter controls which X11 selection to read or write:
| Value | Description |
|-------|-------------|
| `clipboard` (default) | The standard clipboard (Ctrl+C / Ctrl+V) |
| `primary` | The primary selection (text selected with the mouse) |
| `both` | Write to both clipboard and primary selection (write only) |
## Display and windows
<CodeGroup>
```ts TypeScript
const display = await sdk.getDesktopDisplayInfo();
console.log(display.resolution); // { width: 1920, height: 1080, dpi: 96 }
const { windows } = await sdk.listDesktopWindows();
for (const win of windows) {
console.log(win.title, win.x, win.y, win.width, win.height);
}
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/display/info"
curl "http://127.0.0.1:2468/v1/desktop/windows"
```
</CodeGroup>
The windows endpoint filters out noise automatically: window manager internals (Openbox), windows with empty titles, and tiny helper windows (under 120x80) are excluded. The currently active/focused window is always included regardless of filters.
### Focused window
Get the currently focused window without listing all windows.
<CodeGroup>
```ts TypeScript
const focused = await sdk.getDesktopFocusedWindow();
console.log(focused.title, focused.id);
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/windows/focused"
```
</CodeGroup>
Returns 404 if no window currently has focus.
### Window management
Focus, move, and resize windows by their X11 window ID.
<CodeGroup>
```ts TypeScript
const { windows } = await sdk.listDesktopWindows();
const win = windows[0];
// Bring window to foreground
await sdk.focusDesktopWindow(win.id);
// Move window
await sdk.moveDesktopWindow(win.id, { x: 100, y: 50 });
// Resize window
await sdk.resizeDesktopWindow(win.id, { width: 1280, height: 720 });
```
```bash cURL
# Focus a window
curl -X POST "http://127.0.0.1:2468/v1/desktop/windows/12345/focus"
# Move a window
curl -X POST "http://127.0.0.1:2468/v1/desktop/windows/12345/move" \
-H "Content-Type: application/json" \
-d '{"x":100,"y":50}'
# Resize a window
curl -X POST "http://127.0.0.1:2468/v1/desktop/windows/12345/resize" \
-H "Content-Type: application/json" \
-d '{"width":1280,"height":720}'
```
</CodeGroup>
All three endpoints return the updated window info so you can verify the operation took effect. The window manager may adjust the requested position or size.
## App launching
Launch applications or open files/URLs on the desktop without needing to shell out.
<CodeGroup>
```ts TypeScript
// Launch an app by name
const result = await sdk.launchDesktopApp({
app: "firefox",
args: ["--private"],
});
console.log(result.processId); // "proc_7"
// Launch and wait for the window to appear
const withWindow = await sdk.launchDesktopApp({
app: "xterm",
wait: true,
});
console.log(withWindow.windowId); // "12345" or null if timed out
// Open a URL with the default handler
const opened = await sdk.openDesktopTarget({
target: "https://example.com",
});
console.log(opened.processId);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/launch" \
-H "Content-Type: application/json" \
-d '{"app":"firefox","args":["--private"]}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/launch" \
-H "Content-Type: application/json" \
-d '{"app":"xterm","wait":true}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/open" \
-H "Content-Type: application/json" \
-d '{"target":"https://example.com"}'
```
</CodeGroup>
The returned `processId` can be used with the [Process API](/processes) to read logs (`GET /v1/processes/{id}/logs`) or stop the application (`POST /v1/processes/{id}/stop`).
When `wait` is `true`, the API polls for up to 5 seconds for a window to appear. If the window appears, its ID is returned in `windowId`. If it times out, `windowId` is `null` but the process is still running.
<Tip>
**Launch/Open vs the Process API:** Both `launch` and `open` are convenience wrappers around the [Process API](/processes). They create managed processes (with `owner: "desktop"`) that you can inspect, log, and stop through the same Process endpoints. The difference is that `launch` validates the binary exists in PATH first and can optionally wait for a window to appear, while `open` delegates to the system default handler (`xdg-open`). Use the Process API directly when you need full control over command, environment, working directory, or restart policies.
</Tip>
## Recording
Record the desktop to MP4.
<CodeGroup>
```ts TypeScript
const recording = await sdk.startDesktopRecording({ fps: 30 });
console.log(recording.id);
// ... do things ...
const stopped = await sdk.stopDesktopRecording();
// List all recordings
const { recordings } = await sdk.listDesktopRecordings();
// Download
const mp4 = await sdk.downloadDesktopRecording(recording.id);
// Clean up
await sdk.deleteDesktopRecording(recording.id);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/recording/start" \
-H "Content-Type: application/json" \
-d '{"fps":30}'
curl -X POST "http://127.0.0.1:2468/v1/desktop/recording/stop"
curl "http://127.0.0.1:2468/v1/desktop/recordings"
curl "http://127.0.0.1:2468/v1/desktop/recordings/rec_1/download" --output recording.mp4
curl -X DELETE "http://127.0.0.1:2468/v1/desktop/recordings/rec_1"
```
</CodeGroup>
## Desktop processes
The desktop runtime manages several background processes (Xvfb, openbox, neko, ffmpeg). These are all registered with the general [Process API](/processes) under the `desktop` owner, so you can inspect logs, check status, and troubleshoot using the same tools you use for any other managed process.
<CodeGroup>
```ts TypeScript
// List all processes, including desktop-owned ones
const { processes } = await sdk.listProcesses();
const desktopProcs = processes.filter((p) => p.owner === "desktop");
for (const p of desktopProcs) {
console.log(p.id, p.command, p.status);
}
// Read logs from a specific desktop process
const logs = await sdk.getProcessLogs(desktopProcs[0].id, { tail: 50 });
for (const entry of logs.entries) {
console.log(entry.stream, atob(entry.data));
}
```
```bash cURL
# List all processes (desktop processes have owner: "desktop")
curl "http://127.0.0.1:2468/v1/processes"
# Get logs from a specific desktop process
curl "http://127.0.0.1:2468/v1/processes/proc_1/logs?tail=50"
```
</CodeGroup>
The desktop status endpoint also includes a summary of running processes:
<CodeGroup>
```ts TypeScript
const status = await sdk.getDesktopStatus();
for (const proc of status.processes) {
console.log(proc.name, proc.pid, proc.running);
}
```
```bash cURL
curl "http://127.0.0.1:2468/v1/desktop/status"
# Response includes: processes: [{ name: "Xvfb", pid: 123, running: true }, ...]
```
</CodeGroup>
| Process | Role | Restart policy |
|---------|------|---------------|
| Xvfb | Virtual X11 framebuffer | Auto-restart while desktop is active |
| openbox | Window manager | Auto-restart while desktop is active |
| neko | WebRTC streaming server (started by `startDesktopStream`) | No auto-restart |
| ffmpeg | Screen recorder (started by `startDesktopRecording`) | No auto-restart |
## Live streaming
Start a WebRTC stream for real-time desktop viewing in a browser.
<CodeGroup>
```ts TypeScript
await sdk.startDesktopStream();
// Check stream status
const status = await sdk.getDesktopStreamStatus();
console.log(status.active); // true
console.log(status.processId); // "proc_5"
// Connect via the React DesktopViewer component or
// use the WebSocket signaling endpoint directly
// at ws://127.0.0.1:2468/v1/desktop/stream/signaling
await sdk.stopDesktopStream();
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/desktop/stream/start"
# Check stream status
curl "http://127.0.0.1:2468/v1/desktop/stream/status"
# Connect to ws://127.0.0.1:2468/v1/desktop/stream/signaling for WebRTC signaling
curl -X POST "http://127.0.0.1:2468/v1/desktop/stream/stop"
```
</CodeGroup>
For a drop-in React component, see [React Components](/react-components).
## API reference
### Endpoints
| Method | Path | Description |
|--------|------|-------------|
| `POST` | `/v1/desktop/start` | Start the desktop runtime |
| `POST` | `/v1/desktop/stop` | Stop the desktop runtime |
| `GET` | `/v1/desktop/status` | Get desktop runtime status |
| `GET` | `/v1/desktop/screenshot` | Capture full desktop screenshot |
| `GET` | `/v1/desktop/screenshot/region` | Capture a region screenshot |
| `GET` | `/v1/desktop/mouse/position` | Get current mouse position |
| `POST` | `/v1/desktop/mouse/move` | Move the mouse |
| `POST` | `/v1/desktop/mouse/click` | Click the mouse |
| `POST` | `/v1/desktop/mouse/down` | Press mouse button down |
| `POST` | `/v1/desktop/mouse/up` | Release mouse button |
| `POST` | `/v1/desktop/mouse/drag` | Drag from one point to another |
| `POST` | `/v1/desktop/mouse/scroll` | Scroll at a position |
| `POST` | `/v1/desktop/keyboard/type` | Type text |
| `POST` | `/v1/desktop/keyboard/press` | Press a key with optional modifiers |
| `POST` | `/v1/desktop/keyboard/down` | Press a key down (hold) |
| `POST` | `/v1/desktop/keyboard/up` | Release a key |
| `GET` | `/v1/desktop/display/info` | Get display info |
| `GET` | `/v1/desktop/windows` | List visible windows |
| `GET` | `/v1/desktop/windows/focused` | Get focused window info |
| `POST` | `/v1/desktop/windows/{id}/focus` | Focus a window |
| `POST` | `/v1/desktop/windows/{id}/move` | Move a window |
| `POST` | `/v1/desktop/windows/{id}/resize` | Resize a window |
| `GET` | `/v1/desktop/clipboard` | Read clipboard contents |
| `POST` | `/v1/desktop/clipboard` | Write to clipboard |
| `POST` | `/v1/desktop/launch` | Launch an application |
| `POST` | `/v1/desktop/open` | Open a file or URL |
| `POST` | `/v1/desktop/recording/start` | Start recording |
| `POST` | `/v1/desktop/recording/stop` | Stop recording |
| `GET` | `/v1/desktop/recordings` | List recordings |
| `GET` | `/v1/desktop/recordings/{id}` | Get recording metadata |
| `GET` | `/v1/desktop/recordings/{id}/download` | Download recording |
| `DELETE` | `/v1/desktop/recordings/{id}` | Delete recording |
| `POST` | `/v1/desktop/stream/start` | Start WebRTC streaming |
| `POST` | `/v1/desktop/stream/stop` | Stop WebRTC streaming |
| `GET` | `/v1/desktop/stream/status` | Get stream status |
| `GET` | `/v1/desktop/stream/signaling` | WebSocket for WebRTC signaling |
### TypeScript SDK methods
| Method | Returns | Description |
|--------|---------|-------------|
| `startDesktop(request?)` | `DesktopStatusResponse` | Start the desktop |
| `stopDesktop()` | `DesktopStatusResponse` | Stop the desktop |
| `getDesktopStatus()` | `DesktopStatusResponse` | Get desktop status |
| `takeDesktopScreenshot(query?)` | `Uint8Array` | Capture screenshot |
| `takeDesktopRegionScreenshot(query)` | `Uint8Array` | Capture region screenshot |
| `getDesktopMousePosition()` | `DesktopMousePositionResponse` | Get mouse position |
| `moveDesktopMouse(request)` | `DesktopMousePositionResponse` | Move mouse |
| `clickDesktop(request)` | `DesktopMousePositionResponse` | Click mouse |
| `mouseDownDesktop(request)` | `DesktopMousePositionResponse` | Mouse button down |
| `mouseUpDesktop(request)` | `DesktopMousePositionResponse` | Mouse button up |
| `dragDesktopMouse(request)` | `DesktopMousePositionResponse` | Drag mouse |
| `scrollDesktop(request)` | `DesktopMousePositionResponse` | Scroll |
| `typeDesktopText(request)` | `DesktopActionResponse` | Type text |
| `pressDesktopKey(request)` | `DesktopActionResponse` | Press key |
| `keyDownDesktop(request)` | `DesktopActionResponse` | Key down |
| `keyUpDesktop(request)` | `DesktopActionResponse` | Key up |
| `getDesktopDisplayInfo()` | `DesktopDisplayInfoResponse` | Get display info |
| `listDesktopWindows()` | `DesktopWindowListResponse` | List windows |
| `getDesktopFocusedWindow()` | `DesktopWindowInfo` | Get focused window |
| `focusDesktopWindow(id)` | `DesktopWindowInfo` | Focus a window |
| `moveDesktopWindow(id, request)` | `DesktopWindowInfo` | Move a window |
| `resizeDesktopWindow(id, request)` | `DesktopWindowInfo` | Resize a window |
| `getDesktopClipboard(query?)` | `DesktopClipboardResponse` | Read clipboard |
| `setDesktopClipboard(request)` | `DesktopActionResponse` | Write clipboard |
| `launchDesktopApp(request)` | `DesktopLaunchResponse` | Launch an app |
| `openDesktopTarget(request)` | `DesktopOpenResponse` | Open file/URL |
| `startDesktopRecording(request?)` | `DesktopRecordingInfo` | Start recording |
| `stopDesktopRecording()` | `DesktopRecordingInfo` | Stop recording |
| `listDesktopRecordings()` | `DesktopRecordingListResponse` | List recordings |
| `getDesktopRecording(id)` | `DesktopRecordingInfo` | Get recording |
| `downloadDesktopRecording(id)` | `Uint8Array` | Download recording |
| `deleteDesktopRecording(id)` | `void` | Delete recording |
| `startDesktopStream()` | `DesktopStreamStatusResponse` | Start streaming |
| `stopDesktopStream()` | `DesktopStreamStatusResponse` | Stop streaming |
| `getDesktopStreamStatus()` | `DesktopStreamStatusResponse` | Stream status |
## Customizing the desktop environment
The desktop runs inside the sandbox filesystem, so you can customize it using the [File System](/file-system) API before or after starting the desktop. The desktop HOME directory is located at `~/.local/state/sandbox-agent/desktop/home` (or `$XDG_STATE_HOME/sandbox-agent/desktop/home` if `XDG_STATE_HOME` is set).
All configuration files below are written to paths relative to this HOME directory.
### Window manager (openbox)
The desktop uses [openbox](http://openbox.org/) as its window manager. You can customize its behavior, theme, and keyboard shortcuts by writing an `rc.xml` config file.
<CodeGroup>
```ts TypeScript
const openboxConfig = `<?xml version="1.0" encoding="UTF-8"?>
<openbox_config xmlns="http://openbox.org/3.4/rc">
<theme>
<name>Clearlooks</name>
<titleLayout>NLIMC</titleLayout>
<font place="ActiveWindow"><name>DejaVu Sans</name><size>10</size></font>
</theme>
<desktops><number>1</number></desktops>
<keyboard>
<keybind key="A-F4"><action name="Close"/></keybind>
<keybind key="A-Tab"><action name="NextWindow"/></keybind>
</keyboard>
</openbox_config>`;
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox/rc.xml" },
openboxConfig,
);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/fs/mkdir?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox"
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox/rc.xml" \
-H "Content-Type: application/octet-stream" \
--data-binary @rc.xml
```
</CodeGroup>
### Autostart programs
Openbox runs scripts in `~/.config/openbox/autostart` on startup. Use this to launch applications, set the background, or configure the environment.
<CodeGroup>
```ts TypeScript
const autostart = `#!/bin/sh
# Set a solid background color
xsetroot -solid "#1e1e2e" &
# Launch a terminal
xterm -geometry 120x40+50+50 &
# Launch a browser
firefox --no-remote &
`;
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox/autostart" },
autostart,
);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/fs/mkdir?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox"
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox/autostart" \
-H "Content-Type: application/octet-stream" \
--data-binary @autostart.sh
```
</CodeGroup>
<Note>
The autostart script runs when openbox starts, which happens during `startDesktop()`. Write the autostart file before calling `startDesktop()` for it to take effect.
</Note>
### Background
There is no wallpaper set by default (the background is the X root window default). You can set it using `xsetroot` in the autostart script (as shown above), or use `feh` if you need an image:
<CodeGroup>
```ts TypeScript
// Upload a wallpaper image
import fs from "node:fs";
const wallpaper = await fs.promises.readFile("./wallpaper.png");
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/wallpaper.png" },
wallpaper,
);
// Set the autostart to apply it
const autostart = `#!/bin/sh
feh --bg-fill ~/wallpaper.png &
`;
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.config/openbox/autostart" },
autostart,
);
```
```bash cURL
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/wallpaper.png" \
-H "Content-Type: application/octet-stream" \
--data-binary @wallpaper.png
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.config/openbox/autostart" \
-H "Content-Type: application/octet-stream" \
--data-binary @autostart.sh
```
</CodeGroup>
<Note>
`feh` is not installed by default. Install it via the [Process API](/processes) before starting the desktop: `await sdk.runProcess({ command: "apt-get", args: ["install", "-y", "feh"] })`.
</Note>
### Fonts
Only `fonts-dejavu-core` is installed by default. To add more fonts, install them with your system package manager or copy font files into the sandbox:
<CodeGroup>
```ts TypeScript
// Install a font package
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "fonts-noto", "fonts-liberation"],
});
// Or copy a custom font file
import fs from "node:fs";
const font = await fs.promises.readFile("./CustomFont.ttf");
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.local/share/fonts" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.local/share/fonts/CustomFont.ttf" },
font,
);
// Rebuild the font cache
await sdk.runProcess({ command: "fc-cache", args: ["-fv"] });
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","fonts-noto","fonts-liberation"]}'
curl -X POST "http://127.0.0.1:2468/v1/fs/mkdir?path=~/.local/state/sandbox-agent/desktop/home/.local/share/fonts"
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.local/share/fonts/CustomFont.ttf" \
-H "Content-Type: application/octet-stream" \
--data-binary @CustomFont.ttf
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"fc-cache","args":["-fv"]}'
```
</CodeGroup>
### Cursor theme
<CodeGroup>
```ts TypeScript
await sdk.runProcess({
command: "apt-get",
args: ["install", "-y", "dmz-cursor-theme"],
});
const xresources = `Xcursor.theme: DMZ-White\nXcursor.size: 24\n`;
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.Xresources" },
xresources,
);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/processes/run" \
-H "Content-Type: application/json" \
-d '{"command":"apt-get","args":["install","-y","dmz-cursor-theme"]}'
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.Xresources" \
-H "Content-Type: application/octet-stream" \
--data-binary 'Xcursor.theme: DMZ-White\nXcursor.size: 24'
```
</CodeGroup>
<Note>
Run `xrdb -merge ~/.Xresources` (via the autostart or process API) after writing the file for changes to take effect.
</Note>
### Shell and terminal
No terminal emulator or shell is launched by default. Add one to the openbox autostart:
```sh
# In ~/.config/openbox/autostart
xterm -geometry 120x40+50+50 &
```
To use a different shell, set the `SHELL` environment variable in your Dockerfile or install your preferred shell and configure the terminal to use it.
### GTK theme
Applications using GTK will pick up settings from `~/.config/gtk-3.0/settings.ini`:
<CodeGroup>
```ts TypeScript
const gtkSettings = `[Settings]
gtk-theme-name=Adwaita
gtk-icon-theme-name=Adwaita
gtk-font-name=DejaVu Sans 10
gtk-cursor-theme-name=DMZ-White
gtk-cursor-theme-size=24
`;
await sdk.mkdirFs({ path: "~/.local/state/sandbox-agent/desktop/home/.config/gtk-3.0" });
await sdk.writeFsFile(
{ path: "~/.local/state/sandbox-agent/desktop/home/.config/gtk-3.0/settings.ini" },
gtkSettings,
);
```
```bash cURL
curl -X POST "http://127.0.0.1:2468/v1/fs/mkdir?path=~/.local/state/sandbox-agent/desktop/home/.config/gtk-3.0"
curl -X PUT "http://127.0.0.1:2468/v1/fs/file?path=~/.local/state/sandbox-agent/desktop/home/.config/gtk-3.0/settings.ini" \
-H "Content-Type: application/octet-stream" \
--data-binary @settings.ini
```
</CodeGroup>
### Summary of configuration paths
All paths are relative to the desktop HOME directory (`~/.local/state/sandbox-agent/desktop/home`).
| What | Path | Notes |
|------|------|-------|
| Openbox config | `.config/openbox/rc.xml` | Window manager theme, keybindings, behavior |
| Autostart | `.config/openbox/autostart` | Shell script run on desktop start |
| Custom fonts | `.local/share/fonts/` | TTF/OTF files, run `fc-cache -fv` after |
| Cursor theme | `.Xresources` | Requires `xrdb -merge` to apply |
| GTK 3 settings | `.config/gtk-3.0/settings.ini` | Theme, icons, fonts for GTK apps |
| Wallpaper | Any path, referenced from autostart | Requires `feh` or similar tool |

View file

@ -20,7 +20,7 @@ that BoxLite can load directly (BoxLite has its own image store separate from Do
```dockerfile ```dockerfile
FROM node:22-bookworm-slim FROM node:22-bookworm-slim
RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
RUN sandbox-agent install-agent claude RUN sandbox-agent install-agent claude
RUN sandbox-agent install-agent codex RUN sandbox-agent install-agent codex
``` ```

View file

@ -25,7 +25,7 @@ cd my-sandbox
```dockerfile ```dockerfile
FROM cloudflare/sandbox:0.7.0 FROM cloudflare/sandbox:0.7.0
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
RUN sandbox-agent install-agent claude && sandbox-agent install-agent codex RUN sandbox-agent install-agent claude && sandbox-agent install-agent codex
EXPOSE 8000 EXPOSE 8000
@ -36,7 +36,7 @@ EXPOSE 8000
For standalone scripts, use the `cloudflare` provider: For standalone scripts, use the `cloudflare` provider:
```bash ```bash
npm install sandbox-agent@0.3.x @cloudflare/sandbox npm install sandbox-agent@0.4.x @cloudflare/sandbox
``` ```
```typescript ```typescript

View file

@ -14,7 +14,7 @@ description: "Deploy Sandbox Agent using ComputeSDK's provider-agnostic sandbox
## TypeScript example ## TypeScript example
```bash ```bash
npm install sandbox-agent@0.3.x computesdk npm install sandbox-agent@0.4.x computesdk
``` ```
```typescript ```typescript
@ -27,7 +27,11 @@ if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY
const sdk = await SandboxAgent.start({ const sdk = await SandboxAgent.start({
sandbox: computesdk({ sandbox: computesdk({
create: { envs }, create: {
envs,
image: process.env.COMPUTESDK_IMAGE,
templateId: process.env.COMPUTESDK_TEMPLATE_ID,
},
}), }),
}); });
@ -43,6 +47,7 @@ try {
``` ```
The `computesdk` provider handles sandbox creation, Sandbox Agent installation, agent setup, and server startup automatically. ComputeSDK routes to your configured provider behind the scenes. The `computesdk` provider handles sandbox creation, Sandbox Agent installation, agent setup, and server startup automatically. ComputeSDK routes to your configured provider behind the scenes.
The `create` option now forwards the full ComputeSDK sandbox-create payload, including provider-specific fields such as `image` and `templateId` when the selected provider supports them.
Before calling `SandboxAgent.start()`, configure ComputeSDK with your provider: Before calling `SandboxAgent.start()`, configure ComputeSDK with your provider:

View file

@ -16,7 +16,7 @@ See [Daytona network limits](https://www.daytona.io/docs/en/network-limits/).
## TypeScript example ## TypeScript example
```bash ```bash
npm install sandbox-agent@0.3.x @daytonaio/sdk npm install sandbox-agent@0.4.x @daytonaio/sdk
``` ```
```typescript ```typescript
@ -44,7 +44,7 @@ try {
} }
``` ```
The `daytona` provider uses the `rivetdev/sandbox-agent:0.4.0-rc.3-full` image by default and starts the server automatically. The `daytona` provider uses the `rivetdev/sandbox-agent:0.4.2-full` image by default and starts the server automatically.
## Using snapshots for faster startup ## Using snapshots for faster startup
@ -61,7 +61,7 @@ if (!hasSnapshot) {
name: SNAPSHOT, name: SNAPSHOT,
image: Image.base("ubuntu:22.04").runCommands( image: Image.base("ubuntu:22.04").runCommands(
"apt-get update && apt-get install -y curl ca-certificates", "apt-get update && apt-get install -y curl ca-certificates",
"curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh", "curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh",
"sandbox-agent install-agent claude", "sandbox-agent install-agent claude",
"sandbox-agent install-agent codex", "sandbox-agent install-agent codex",
), ),

View file

@ -15,43 +15,64 @@ Run the published full image with all supported agents pre-installed:
docker run --rm -p 3000:3000 \ docker run --rm -p 3000:3000 \
-e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \ -e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
-e OPENAI_API_KEY="$OPENAI_API_KEY" \ -e OPENAI_API_KEY="$OPENAI_API_KEY" \
rivetdev/sandbox-agent:0.4.0-rc.3-full \ rivetdev/sandbox-agent:0.4.2-full \
server --no-token --host 0.0.0.0 --port 3000 server --no-token --host 0.0.0.0 --port 3000
``` ```
The `0.4.0-rc.3-full` tag pins the exact version. The moving `full` tag is also published for contributors who want the latest full image. The `0.4.2-full` tag pins the exact version. The moving `full` tag is also published for contributors who want the latest full image.
## TypeScript with the Docker provider If you also want the desktop API inside the container, install desktop dependencies before starting the server:
```bash ```bash
npm install sandbox-agent@0.3.x dockerode get-port docker run --rm -p 3000:3000 \
-e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \
-e OPENAI_API_KEY="$OPENAI_API_KEY" \
node:22-bookworm-slim sh -c "\
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y curl ca-certificates bash libstdc++6 && \
rm -rf /var/lib/apt/lists/* && \
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh && \
sandbox-agent install desktop --yes && \
sandbox-agent server --no-token --host 0.0.0.0 --port 3000"
``` ```
```typescript In a Dockerfile:
import { SandboxAgent } from "sandbox-agent";
import { docker } from "sandbox-agent/docker";
const sdk = await SandboxAgent.start({ ```dockerfile
sandbox: docker({ RUN sandbox-agent install desktop --yes
env: [ ```
`ANTHROPIC_API_KEY=${process.env.ANTHROPIC_API_KEY}`,
`OPENAI_API_KEY=${process.env.OPENAI_API_KEY}`, ## TypeScript with dockerode
].filter(Boolean),
}), ```typescript
import Docker from "dockerode";
import { SandboxAgent } from "sandbox-agent";
const docker = new Docker();
const PORT = 3000;
const container = await docker.createContainer({
Image: "rivetdev/sandbox-agent:0.4.2-full",
Cmd: ["server", "--no-token", "--host", "0.0.0.0", "--port", `${PORT}`],
Env: [
`ANTHROPIC_API_KEY=${process.env.ANTHROPIC_API_KEY}`,
`OPENAI_API_KEY=${process.env.OPENAI_API_KEY}`,
`CODEX_API_KEY=${process.env.CODEX_API_KEY}`,
].filter(Boolean),
ExposedPorts: { [`${PORT}/tcp`]: {} },
HostConfig: {
AutoRemove: true,
PortBindings: { [`${PORT}/tcp`]: [{ HostPort: `${PORT}` }] },
},
}); });
try { await container.start();
const session = await sdk.createSession({ agent: "codex" });
await session.prompt([{ type: "text", text: "Summarize this repository." }]);
} finally {
await sdk.destroySandbox();
}
```
The `docker` provider uses the `rivetdev/sandbox-agent:0.4.0-rc.3-full` image by default. Override with `image`: const baseUrl = `http://127.0.0.1:${PORT}`;
const sdk = await SandboxAgent.connect({ baseUrl });
```typescript const session = await sdk.createSession({ agent: "codex" });
docker({ image: "my-custom-image:latest" }) await session.prompt([{ type: "text", text: "Summarize this repository." }]);
``` ```
## Building a custom image with everything preinstalled ## Building a custom image with everything preinstalled
@ -65,7 +86,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
bash ca-certificates curl git && \ bash ca-certificates curl git && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh && \ RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh && \
sandbox-agent install-agent --all sandbox-agent install-agent --all
RUN useradd -m -s /bin/bash sandbox RUN useradd -m -s /bin/bash sandbox

View file

@ -11,7 +11,7 @@ description: "Deploy Sandbox Agent inside an E2B sandbox."
## TypeScript example ## TypeScript example
```bash ```bash
npm install sandbox-agent@0.3.x @e2b/code-interpreter npm install sandbox-agent@0.4.x @e2b/code-interpreter
``` ```
```typescript ```typescript
@ -21,9 +21,11 @@ import { e2b } from "sandbox-agent/e2b";
const envs: Record<string, string> = {}; const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY; if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const template = process.env.E2B_TEMPLATE;
const sdk = await SandboxAgent.start({ const sdk = await SandboxAgent.start({
sandbox: e2b({ sandbox: e2b({
template,
create: { envs }, create: { envs },
}), }),
}); });
@ -39,9 +41,12 @@ try {
} }
``` ```
The `e2b` provider handles sandbox creation, Sandbox Agent installation, agent setup, and server startup automatically. The `e2b` provider handles sandbox creation, Sandbox Agent installation, agent setup, and server startup automatically. Sandboxes pause by default instead of being deleted, and reconnecting with the same `sandboxId` resumes them automatically.
Pass `template` when you want to start from a custom E2B template alias or template ID. E2B base-image selection happens when you build the template, then `sandbox-agent/e2b` uses that template at sandbox creation time.
## Faster cold starts ## Faster cold starts
For faster startup, create a custom E2B template with Sandbox Agent and target agents pre-installed. For faster startup, create a custom E2B template with Sandbox Agent and target agents pre-installed.
See [E2B Custom Templates](https://e2b.dev/docs/sandbox-template). Build System 2.0 also lets you choose the template's base image in code.
See [E2B Custom Templates](https://e2b.dev/docs/sandbox-template) and [E2B Base Images](https://e2b.dev/docs/template/base-image).

View file

@ -1,155 +0,0 @@
---
title: "Foundry Self-Hosting"
description: "Environment, credentials, and deployment setup for Sandbox Agent Foundry auth, GitHub, and billing."
---
This guide documents the deployment contract for the Foundry product surface: app auth, GitHub onboarding, repository import, and billing.
It also covers the local-development bootstrap that uses `.env.development` only when `NODE_ENV=development`.
## Local Development
For backend local development, the Foundry backend now supports a development-only dotenv bootstrap:
- It loads `.env.development.local` and `.env.development`
- It does this **only** when `NODE_ENV=development`
- It does **not** load dotenv files in production
The example file lives at [`/.env.development.example`](https://github.com/rivet-dev/sandbox-agent/blob/main/.env.development.example).
To use it locally:
```bash
cp .env.development.example .env.development
```
Run the backend with:
```bash
just foundry-backend-start
```
That recipe sets `NODE_ENV=development`, which enables the dotenv loader.
### Local Defaults
These values can be safely defaulted for local development:
- `APP_URL=http://localhost:4173`
- `BETTER_AUTH_URL=http://localhost:7741`
- `BETTER_AUTH_SECRET=sandbox-agent-foundry-development-only-change-me`
- `GITHUB_REDIRECT_URI=http://localhost:7741/v1/auth/callback/github`
These should be treated as development-only values.
## Production Environment
For production or self-hosting, set these as real environment variables in your deployment platform. Do not rely on dotenv file loading.
### App/Auth
| Variable | Required | Notes |
|---|---:|---|
| `APP_URL` | Yes | Public frontend origin |
| `BETTER_AUTH_URL` | Yes | Public auth base URL |
| `BETTER_AUTH_SECRET` | Yes | Strong random secret for auth/session signing |
### GitHub OAuth
| Variable | Required | Notes |
|---|---:|---|
| `GITHUB_CLIENT_ID` | Yes | GitHub OAuth app client id |
| `GITHUB_CLIENT_SECRET` | Yes | GitHub OAuth app client secret |
| `GITHUB_REDIRECT_URI` | Yes | GitHub OAuth callback URL |
Use GitHub OAuth for:
- user sign-in
- user identity
- org selection
- access to the signed-in users GitHub context
## GitHub App
If your Foundry deployment uses GitHub App-backed organization install and repo import, also configure:
| Variable | Required | Notes |
|---|---:|---|
| `GITHUB_APP_ID` | Yes | GitHub App id |
| `GITHUB_APP_CLIENT_ID` | Yes | GitHub App client id |
| `GITHUB_APP_CLIENT_SECRET` | Yes | GitHub App client secret |
| `GITHUB_APP_PRIVATE_KEY` | Yes | PEM private key for installation auth |
For `.env.development` and `.env.development.local`, store `GITHUB_APP_PRIVATE_KEY` as a quoted single-line value with `\n` escapes instead of raw multi-line PEM text.
Recommended GitHub App permissions:
- Repository `Metadata: Read`
- Repository `Contents: Read & Write`
- Repository `Pull requests: Read & Write`
- Repository `Checks: Read`
- Repository `Commit statuses: Read`
Set the webhook URL to `https://<your-backend-host>/v1/webhooks/github` and generate a webhook secret. Store the secret as `GITHUB_WEBHOOK_SECRET`.
This is required, not optional. Foundry depends on GitHub App webhook delivery for installation lifecycle changes, repo access changes, and ongoing repo / pull request sync. If the GitHub App is not installed for the workspace, or webhook delivery is misconfigured, Foundry will remain in an install / reconnect state and core GitHub-backed functionality will not work correctly.
Recommended webhook subscriptions:
- `installation`
- `installation_repositories`
- `pull_request`
- `pull_request_review`
- `pull_request_review_comment`
- `push`
- `create`
- `delete`
- `check_suite`
- `check_run`
- `status`
Use the GitHub App for:
- installation/reconnect state
- org repo import
- repository sync
- PR creation and updates
Use GitHub OAuth for:
- who the user is
- which orgs they can choose
## Stripe
For live billing, configure:
| Variable | Required | Notes |
|---|---:|---|
| `STRIPE_SECRET_KEY` | Yes | Server-side Stripe secret key |
| `STRIPE_PUBLISHABLE_KEY` | Yes | Client-side Stripe publishable key |
| `STRIPE_WEBHOOK_SECRET` | Yes | Signing secret for billing webhooks |
| `STRIPE_PRICE_TEAM` | Yes | Stripe price id for the Team plan checkout session |
Stripe should own:
- hosted checkout
- billing portal
- subscription status
- invoice history
- webhook-driven state sync
## Mock Invariant
Foundrys mock client path should continue to work end to end even when the real auth/GitHub/Stripe path exists.
That includes:
- sign-in
- org selection/import
- settings
- billing UI
- workspace/task/session flow
- seat accrual
Use mock mode for deterministic UI review and local product development. Use the real env-backed path for integration and self-hosting.

View file

@ -9,7 +9,7 @@ For local development, run Sandbox Agent directly on your machine.
```bash ```bash
# Install # Install
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
# Run # Run
sandbox-agent server --no-token --host 127.0.0.1 --port 2468 sandbox-agent server --no-token --host 127.0.0.1 --port 2468
@ -20,12 +20,12 @@ Or with npm/Bun:
<Tabs> <Tabs>
<Tab title="npx"> <Tab title="npx">
```bash ```bash
npx @sandbox-agent/cli@0.3.x server --no-token --host 127.0.0.1 --port 2468 npx @sandbox-agent/cli@0.4.x server --no-token --host 127.0.0.1 --port 2468
``` ```
</Tab> </Tab>
<Tab title="bunx"> <Tab title="bunx">
```bash ```bash
bunx @sandbox-agent/cli@0.3.x server --no-token --host 127.0.0.1 --port 2468 bunx @sandbox-agent/cli@0.4.x server --no-token --host 127.0.0.1 --port 2468
``` ```
</Tab> </Tab>
</Tabs> </Tabs>

View file

@ -11,7 +11,7 @@ description: "Deploy Sandbox Agent inside a Modal sandbox."
## TypeScript example ## TypeScript example
```bash ```bash
npm install sandbox-agent@0.3.x modal npm install sandbox-agent@0.4.x modal
``` ```
```typescript ```typescript
@ -21,9 +21,11 @@ import { modal } from "sandbox-agent/modal";
const secrets: Record<string, string> = {}; const secrets: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) secrets.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.ANTHROPIC_API_KEY) secrets.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) secrets.OPENAI_API_KEY = process.env.OPENAI_API_KEY; if (process.env.OPENAI_API_KEY) secrets.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const baseImage = process.env.MODAL_BASE_IMAGE ?? "node:22-slim";
const sdk = await SandboxAgent.start({ const sdk = await SandboxAgent.start({
sandbox: modal({ sandbox: modal({
image: baseImage,
create: { secrets }, create: { secrets },
}), }),
}); });
@ -40,6 +42,7 @@ try {
``` ```
The `modal` provider handles app creation, image building, sandbox provisioning, agent installation, server startup, and tunnel networking automatically. The `modal` provider handles app creation, image building, sandbox provisioning, agent installation, server startup, and tunnel networking automatically.
Set `image` to change the base Docker image before Sandbox Agent and its agent binaries are layered on top. You can also pass a prebuilt Modal `Image` object.
## Faster cold starts ## Faster cold starts

View file

@ -11,7 +11,7 @@ description: "Deploy Sandbox Agent inside a Vercel Sandbox."
## TypeScript example ## TypeScript example
```bash ```bash
npm install sandbox-agent@0.3.x @vercel/sandbox npm install sandbox-agent@0.4.x @vercel/sandbox
``` ```
```typescript ```typescript

View file

@ -1,6 +1,6 @@
{ {
"$schema": "https://mintlify.com/docs.json", "$schema": "https://mintlify.com/docs.json",
"theme": "willow", "theme": "mint",
"name": "Sandbox Agent SDK", "name": "Sandbox Agent SDK",
"appearance": { "appearance": {
"default": "dark", "default": "dark",
@ -8,8 +8,8 @@
}, },
"colors": { "colors": {
"primary": "#ff4f00", "primary": "#ff4f00",
"light": "#ff4f00", "light": "#ff6a2a",
"dark": "#ff4f00" "dark": "#cc3f00"
}, },
"favicon": "/favicon.svg", "favicon": "/favicon.svg",
"logo": { "logo": {
@ -25,17 +25,13 @@
}, },
"navbar": { "navbar": {
"links": [ "links": [
{
"label": "Gigacode",
"icon": "terminal",
"href": "https://github.com/rivet-dev/sandbox-agent/tree/main/gigacode"
},
{ {
"label": "Discord", "label": "Discord",
"icon": "discord", "icon": "discord",
"href": "https://discord.gg/auCecybynK" "href": "https://discord.gg/auCecybynK"
}, },
{ {
"label": "GitHub",
"type": "github", "type": "github",
"href": "https://github.com/rivet-dev/sandbox-agent" "href": "https://github.com/rivet-dev/sandbox-agent"
} }
@ -87,15 +83,12 @@
}, },
{ {
"group": "System", "group": "System",
"pages": ["file-system", "processes"] "pages": ["file-system", "processes", "computer-use", "common-software"]
},
{
"group": "Orchestration",
"pages": ["orchestration-architecture", "session-persistence", "observability", "multiplayer", "security"]
}, },
{ {
"group": "Reference", "group": "Reference",
"pages": [ "pages": [
"troubleshooting",
"architecture", "architecture",
"cli", "cli",
"inspector", "inspector",
@ -127,5 +120,11 @@
] ]
} }
] ]
} },
"__removed": [
{
"group": "Orchestration",
"pages": ["orchestration-architecture", "session-persistence", "observability", "multiplayer", "security"]
}
]
} }

View file

@ -1,6 +0,0 @@
---
title: Gigacode
url: "https://github.com/rivet-dev/sandbox-agent/tree/main/gigacode"
---

View file

@ -35,6 +35,7 @@ console.log(url);
- Prompt testing - Prompt testing
- Request/response debugging - Request/response debugging
- Interactive permission prompts (approve, always-allow, or reject tool-use requests) - Interactive permission prompts (approve, always-allow, or reject tool-use requests)
- Desktop panel for status, remediation, start/stop, and screenshot refresh
- Process management (create, stop, kill, delete, view logs) - Process management (create, stop, kill, delete, view logs)
- Interactive PTY terminal for tty processes - Interactive PTY terminal for tty processes
- One-shot command execution - One-shot command execution
@ -50,3 +51,16 @@ console.log(url);
The Inspector includes an embedded Ghostty-based terminal for interactive tty The Inspector includes an embedded Ghostty-based terminal for interactive tty
processes. The UI uses the SDK's high-level `connectProcessTerminal(...)` processes. The UI uses the SDK's high-level `connectProcessTerminal(...)`
wrapper via the shared `@sandbox-agent/react` `ProcessTerminal` component. wrapper via the shared `@sandbox-agent/react` `ProcessTerminal` component.
## Desktop panel
The `Desktop` panel shows the current desktop runtime state, missing dependencies,
the suggested install command, last error details, process/log paths, and the
latest captured screenshot.
Use it to:
- Check whether desktop dependencies are installed
- Start or stop the managed desktop runtime
- Refresh desktop status
- Capture a fresh screenshot on demand

File diff suppressed because it is too large Load diff

View file

@ -1,210 +0,0 @@
# Pi Agent Support Plan (pi-mono)
## Implementation Status Update
- Runtime selection now supports two internal modes:
- `PerSession` (default for unknown/non-allowlisted Pi capabilities)
- `Shared` (allowlist-only compatibility path)
- Pi sessions now use per-session process isolation by default, enabling true concurrent Pi sessions in Inspector and API clients.
- Shared Pi server code remains available and is used only when capability checks allow multiplexing.
- Session termination for per-session Pi mode hard-kills the underlying Pi process and clears queued prompts/pending waiters.
- In-session concurrent sends are serialized with an unbounded daemon-side FIFO queue per session.
## Investigation Summary
### Pi CLI modes and RPC protocol
- Pi supports multiple modes including interactive, print/JSON output, RPC, and SDK usage. JSON mode outputs a stream of JSON events suitable for parsing, and RPC mode is intended for programmatic control over stdin/stdout.
- RPC mode is started with `pi --mode rpc` and supports options like `--provider`, `--model`, `--no-session`, and `--session-dir`.
- The RPC protocol is newline-delimited JSON over stdin/stdout:
- Commands are JSON objects written to stdin.
- Responses are JSON objects with `type: "response"` and optional `id`.
- Events are JSON objects without `id`.
- `prompt` can include images using `ImageContent` (base64 or URL) alongside text.
- JSON/print mode (`pi -p` or `pi --print --mode json`) produces JSONL for non-interactive parsing and can resume sessions with a token.
### RPC commands
RPC commands listed in `rpc.md` include:
- `new_session`, `get_state`, `list_sessions`, `delete_session`, `rename_session`, `clear_session`
- `prompt`, `queue_message`, `abort`, `get_queued_messages`
### RPC event types
RPC events listed in `rpc.md` include:
- `agent_start`, `agent_end`
- `turn_start`, `turn_end`
- `message_start`, `message_update`, `message_end`
- `tool_execution_start`, `tool_execution_update`, `tool_execution_end`
- `auto_compaction`, `auto_retry`, `hook_error`
`message_update` uses `assistantMessageEvent` deltas such as:
- `start`, `text_start`, `text_delta`, `text_end`
- `thinking_start`, `thinking_delta`, `thinking_end`
- `toolcall_start`, `toolcall_delta`, `toolcall_end`
- `toolcall_args_start`, `toolcall_args_delta`, `toolcall_args_end`
- `done`, `error`
`tool_execution_update` includes `partialResult`, which is described as accumulated output so far.
### Schema source locations (pi-mono)
RPC types are documented as living in:
- `packages/ai/src/types.ts` (Model types)
- `packages/agent/src/types.ts` (AgentResponse types)
- `packages/coding-agent/src/core/messages.ts` (message types)
- `packages/coding-agent/src/modes/rpc/rpc-types.ts` (RPC protocol types)
### Distribution assets
Pi releases provide platform-specific binaries such as:
- `pi-darwin-arm64`, `pi-darwin-x64`
- `pi-linux-arm64`, `pi-linux-x64`
- `pi-win-x64.zip`
## Integration Decisions
- Follow the OpenCode pattern: a shared long-running process (stdio RPC) with session multiplexing.
- Primary integration path is RPC streaming (`pi --mode rpc`).
- JSON/print mode is a fallback only (diagnostics or non-interactive runs).
- Create sessions via `new_session`; store the returned `sessionId` as `native_session_id`.
- Use `get_state` as a re-sync path after server restarts.
- Use `prompt` for send-message, with optional image content.
- Convert Pi events into universal events; emit daemon synthetic `session.started` on session creation and `session.ended` only on errors/termination.
## Implementation Plan
### 1) Agent Identity + Capabilities
Files:
- `server/packages/agent-management/src/agents.rs`
- `server/packages/sandbox-agent/src/router.rs`
- `docs/cli.mdx`, `docs/conversion.mdx`, `docs/session-transcript-schema.mdx`
- `README.md`, `frontend/packages/website/src/components/FAQ.tsx`
Tasks:
- Add `AgentId::Pi` with string/binary name `"pi"` and parsing rules.
- Add Pi to `all_agents()` and agent lists.
- Define `AgentCapabilities` for Pi:
- `tool_calls=true`, `tool_results=true`
- `text_messages=true`, `streaming_deltas=true`, `item_started=true`
- `reasoning=true` (from `thinking_*` deltas)
- `images=true` (ImageContent in `prompt`)
- `permissions=false`, `questions=false`, `mcp_tools=false`
- `shared_process=true`, `session_lifecycle=false` (no native session events)
- `error_events=true` (hook_error)
- `command_execution=false`, `file_changes=false`, `file_attachments=false`
### 2) Installer and Binary Resolution
Files:
- `server/packages/agent-management/src/agents.rs`
Tasks:
- Add `install_pi()` that:
- Downloads the correct release asset per platform (`pi-<platform>`).
- Handles `.zip` on Windows and raw binaries elsewhere.
- Marks binary executable.
- Add Pi to `AgentManager::install`, `is_installed`, `version`.
- Version detection: try `--version`, `version`, `-V`.
### 3) Schema Extraction for Pi
Files:
- `resources/agent-schemas/src/pi.ts` (new)
- `resources/agent-schemas/src/index.ts`
- `resources/agent-schemas/artifacts/json-schema/pi.json`
- `server/packages/extracted-agent-schemas/build.rs`
- `server/packages/extracted-agent-schemas/src/lib.rs`
Tasks:
- Implement `extractPiSchema()`:
- Download pi-mono sources (zip/tarball) into a temp dir.
- Use `ts-json-schema-generator` against `packages/coding-agent/src/modes/rpc/rpc-types.ts`.
- Include dependent files per `rpc.md` (ai/types, agent/types, core/messages).
- Extract `RpcEvent`, `RpcResponse`, `RpcCommand` unions (exact type names from source).
- Add fallback schema if remote fetch fails (minimal union with event/response fields).
- Wire pi into extractor index and artifact generation.
### 4) Universal Schema Conversion (Pi -> Universal)
Files:
- `server/packages/universal-agent-schema/src/agents/pi.rs` (new)
- `server/packages/universal-agent-schema/src/agents/mod.rs`
- `server/packages/universal-agent-schema/src/lib.rs`
- `server/packages/sandbox-agent/src/router.rs`
Mapping rules:
- `message_start` -> `item.started` (kind=message, role=assistant, native_item_id=messageId)
- `message_update`:
- `text_*` -> `item.delta` (assistant text delta)
- `thinking_*` -> `item.delta` with `ContentPart::Reasoning` (visibility=Private)
- `toolcall_*` and `toolcall_args_*` -> ignore for now (tool_execution_* is authoritative)
- `error` -> `item.completed` with `ItemStatus::Failed` (if no later message_end)
- `message_end` -> `item.completed` (finalize assistant message)
- `tool_execution_start` -> `item.started` (kind=tool_call, ContentPart::ToolCall)
- `tool_execution_update` -> `item.delta` for a synthetic tool_result item:
- Maintain a per-toolCallId buffer to compute delta from accumulated `partialResult`.
- `tool_execution_end` -> `item.completed` (kind=tool_result, output from `result.content`)
- If `isError=true`, set item status to failed.
- `agent_start`, `turn_start`, `turn_end`, `agent_end`, `auto_compaction`, `auto_retry`, `hook_error`:
- Map to `ItemKind::Status` with a label like `pi.agent_start`, `pi.auto_retry`, etc.
- Do not emit `session.ended` for these events.
- If event parsing fails, emit `agent.unparsed` (source=daemon, synthetic=true) and fail tests.
### 5) Shared RPC Server Integration
Files:
- `server/packages/sandbox-agent/src/router.rs`
Tasks:
- Add a new managed stdio server type for Pi, similar to Codex:
- Create `PiServer` struct with:
- stdin sender
- pending request map keyed by request id
- per-session native session id mapping
- Extend `ManagedServerKind` to include Pi.
- Add `ensure_pi_server()` and `spawn_pi_server()` using `pi --mode rpc`.
- Add a `handle_pi_server_output()` loop to parse stdout lines into events/responses.
- Session creation:
- On `create_session`, ensure Pi server is running, send `new_session`, store sessionId.
- Register session with `server_manager.register_session` for native mapping.
- Sending messages:
- Use `prompt` command; include sessionId and optional images.
- Emit synthetic `item.started` only if Pi does not emit `message_start`.
### 6) Router + Streaming Path Changes
Files:
- `server/packages/sandbox-agent/src/router.rs`
Tasks:
- Add Pi handling to:
- `create_session` (new_session)
- `send_message` (prompt)
- `parse_agent_line` (Pi event conversion)
- `agent_modes` (default to `default` unless Pi exposes a mode list)
- `agent_supports_resume` (true if Pi supports session resume)
### 7) Tests
Files:
- `server/packages/sandbox-agent/tests/...`
- `server/packages/universal-agent-schema/tests/...` (if present)
Tasks:
- Unit tests for conversion:
- `message_start/update/end` -> item.started/delta/completed
- `tool_execution_*` -> tool call/result mapping with partialResult delta
- failure -> agent.unparsed
- Integration tests:
- Start Pi RPC server, create session, send prompt, stream events.
- Validate `native_session_id` mapping and event ordering.
- Update HTTP/SSE test coverage to include Pi agent if relevant.
## Risk Areas / Edge Cases
- `tool_execution_update.partialResult` is cumulative; must compute deltas.
- `message_update` may emit `done`/`error` without `message_end`; handle both paths.
- No native session lifecycle events; rely on daemon synthetic events.
- Session recovery after RPC server restart requires `get_state` + re-register sessions.
## Acceptance Criteria
- Pi appears in `/v1/agents`, CLI list, and docs.
- `create_session` returns `native_session_id` from Pi `new_session`.
- Streaming prompt yields universal events with proper ordering:
- message -> item.started/delta/completed
- tool execution -> tool call + tool result
- Tests pass and no synthetic data is used in test fixtures.
## Sources
- https://upd.dev/badlogic/pi-mono/src/commit/d36e0ea07303d8a76d51b4a7bd5f0d6d3c490860/packages/coding-agent/docs/rpc.md
- https://buildwithpi.ai/pi-cli
- https://takopi.dev/docs/pi-cli/
- https://upd.dev/badlogic/pi-mono/releases

View file

@ -1,370 +1,289 @@
--- ---
title: "Quickstart" title: "Quickstart"
description: "Get a coding agent running in a sandbox in under a minute." description: "Start the server and send your first message."
icon: "rocket" icon: "rocket"
--- ---
<Steps> <Steps>
<Step title="Install"> <Step title="Install skill (optional)">
<Tabs> <Tabs>
<Tab title="npm"> <Tab title="npx">
```bash ```bash
npm install sandbox-agent@0.3.x npx skills add rivet-dev/skills -s sandbox-agent
``` ```
</Tab> </Tab>
<Tab title="bun"> <Tab title="bunx">
```bash ```bash
bun add sandbox-agent@0.3.x bunx skills add rivet-dev/skills -s sandbox-agent
# Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()).
bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
``` ```
</Tab> </Tab>
</Tabs> </Tabs>
</Step> </Step>
<Step title="Start the sandbox"> <Step title="Set environment variables">
`SandboxAgent.start()` provisions a sandbox, starts a lightweight [Sandbox Agent server](/architecture) inside it, and connects your SDK client. Each coding agent requires API keys to connect to their respective LLM providers.
<Tabs> <Tabs>
<Tab title="Local"> <Tab title="Local shell">
```bash ```bash
npm install sandbox-agent@0.3.x export ANTHROPIC_API_KEY="sk-ant-..."
export OPENAI_API_KEY="sk-..."
``` ```
```typescript
import { SandboxAgent } from "sandbox-agent";
import { local } from "sandbox-agent/local";
// Runs on your machine. Inherits process.env automatically.
const client = await SandboxAgent.start({
sandbox: local(),
});
```
See [Local deploy guide](/deploy/local)
</Tab> </Tab>
<Tab title="E2B"> <Tab title="E2B">
```bash
npm install sandbox-agent@0.3.x @e2b/code-interpreter
```
```typescript ```typescript
import { SandboxAgent } from "sandbox-agent"; import { Sandbox } from "@e2b/code-interpreter";
import { e2b } from "sandbox-agent/e2b";
// Provisions a cloud sandbox on E2B, installs the server, and connects. const envs: Record<string, string> = {};
const client = await SandboxAgent.start({ if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
sandbox: e2b(), if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
});
const sandbox = await Sandbox.create({ envs });
``` ```
See [E2B deploy guide](/deploy/e2b)
</Tab> </Tab>
<Tab title="Daytona"> <Tab title="Daytona">
```bash
npm install sandbox-agent@0.3.x @daytonaio/sdk
```
```typescript ```typescript
import { SandboxAgent } from "sandbox-agent"; import { Daytona } from "@daytonaio/sdk";
import { daytona } from "sandbox-agent/daytona";
// Provisions a Daytona workspace with the server pre-installed. const envVars: Record<string, string> = {};
const client = await SandboxAgent.start({ if (process.env.ANTHROPIC_API_KEY) envVars.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
sandbox: daytona(), if (process.env.OPENAI_API_KEY) envVars.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const daytona = new Daytona();
const sandbox = await daytona.create({
snapshot: "sandbox-agent-ready",
envVars,
}); });
``` ```
See [Daytona deploy guide](/deploy/daytona)
</Tab>
<Tab title="Vercel">
```bash
npm install sandbox-agent@0.3.x @vercel/sandbox
```
```typescript
import { SandboxAgent } from "sandbox-agent";
import { vercel } from "sandbox-agent/vercel";
// Provisions a Vercel sandbox with the server installed on boot.
const client = await SandboxAgent.start({
sandbox: vercel(),
});
```
See [Vercel deploy guide](/deploy/vercel)
</Tab>
<Tab title="Modal">
```bash
npm install sandbox-agent@0.3.x modal
```
```typescript
import { SandboxAgent } from "sandbox-agent";
import { modal } from "sandbox-agent/modal";
// Builds a container image with agents pre-installed (cached after first run),
// starts a Modal sandbox from that image, and connects.
const client = await SandboxAgent.start({
sandbox: modal(),
});
```
See [Modal deploy guide](/deploy/modal)
</Tab>
<Tab title="Cloudflare">
```bash
npm install sandbox-agent@0.3.x @cloudflare/sandbox
```
```typescript
import { SandboxAgent } from "sandbox-agent";
import { cloudflare } from "sandbox-agent/cloudflare";
import { SandboxClient } from "@cloudflare/sandbox";
// Uses the Cloudflare Sandbox SDK to provision and connect.
// The Cloudflare SDK handles server lifecycle internally.
const cfSandboxClient = new SandboxClient();
const client = await SandboxAgent.start({
sandbox: cloudflare({ sdk: cfSandboxClient }),
});
```
See [Cloudflare deploy guide](/deploy/cloudflare)
</Tab> </Tab>
<Tab title="Docker"> <Tab title="Docker">
```bash ```bash
npm install sandbox-agent@0.3.x dockerode get-port docker run -p 2468:2468 \
-e ANTHROPIC_API_KEY="sk-ant-..." \
-e OPENAI_API_KEY="sk-..." \
rivetdev/sandbox-agent:0.4.2-full \
server --no-token --host 0.0.0.0 --port 2468
``` ```
```typescript
import { SandboxAgent } from "sandbox-agent";
import { docker } from "sandbox-agent/docker";
// Runs a Docker container locally. Good for testing.
const client = await SandboxAgent.start({
sandbox: docker(),
});
```
See [Docker deploy guide](/deploy/docker)
</Tab> </Tab>
</Tabs> </Tabs>
<div style={{ height: "1rem" }} />
**More info:**
<AccordionGroup> <AccordionGroup>
<Accordion title="Passing LLM credentials"> <Accordion title="Extracting API keys from current machine">
Agents need API keys for their LLM provider. Each provider passes credentials differently: Use `sandbox-agent credentials extract-env --export` to extract your existing API keys (Anthropic, OpenAI, etc.) from local Claude Code or Codex config files.
```typescript
// Local — inherits process.env automatically
// E2B
e2b({ create: { envs: { ANTHROPIC_API_KEY: "..." } } })
// Daytona
daytona({ create: { envVars: { ANTHROPIC_API_KEY: "..." } } })
// Vercel
vercel({ create: { env: { ANTHROPIC_API_KEY: "..." } } })
// Modal
modal({ create: { secrets: { ANTHROPIC_API_KEY: "..." } } })
// Docker
docker({ env: ["ANTHROPIC_API_KEY=..."] })
```
For multi-tenant billing, per-user keys, and gateway options, see [LLM Credentials](/llm-credentials).
</Accordion> </Accordion>
<Accordion title="Testing without API keys">
<Accordion title="Implementing a custom provider"> Use the `mock` agent for SDK and integration testing without provider credentials.
Implement the `SandboxProvider` interface to use any sandbox platform:
```typescript
import { SandboxAgent, type SandboxProvider } from "sandbox-agent";
const myProvider: SandboxProvider = {
name: "my-provider",
async create() {
// Provision a sandbox, install & start the server, return an ID
return "sandbox-123";
},
async destroy(sandboxId) {
// Tear down the sandbox
},
async getUrl(sandboxId) {
// Return the Sandbox Agent server URL
return `https://${sandboxId}.my-platform.dev:3000`;
},
};
const client = await SandboxAgent.start({
sandbox: myProvider,
});
```
</Accordion> </Accordion>
<Accordion title="Multi-tenant and per-user billing">
<Accordion title="Connecting to an existing server"> For per-tenant token tracking, budget enforcement, or usage-based billing, see [LLM Credentials](/llm-credentials) for gateway options like OpenRouter, LiteLLM, and Portkey.
If you already have a Sandbox Agent server running, connect directly:
```typescript
const client = await SandboxAgent.connect({
baseUrl: "http://127.0.0.1:2468",
});
```
</Accordion>
<Accordion title="Starting the server manually">
<Tabs>
<Tab title="curl">
```bash
curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh
sandbox-agent server --no-token --host 0.0.0.0 --port 2468
```
</Tab>
<Tab title="npx">
```bash
npx @sandbox-agent/cli@0.3.x server --no-token --host 0.0.0.0 --port 2468
```
</Tab>
<Tab title="Docker">
```bash
docker run -p 2468:2468 \
-e ANTHROPIC_API_KEY="sk-ant-..." \
-e OPENAI_API_KEY="sk-..." \
rivetdev/sandbox-agent:0.4.0-rc.3-full \
server --no-token --host 0.0.0.0 --port 2468
```
</Tab>
</Tabs>
</Accordion> </Accordion>
</AccordionGroup> </AccordionGroup>
</Step> </Step>
<Step title="Create a session and send a prompt"> <Step title="Run the server">
<CodeGroup> <Tabs>
<Tab title="curl">
Install and run the binary directly.
```typescript Claude ```bash
const session = await client.createSession({ curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
agent: "claude", sandbox-agent server --no-token --host 0.0.0.0 --port 2468
}); ```
</Tab>
session.onEvent((event) => { <Tab title="npx">
console.log(event.sender, event.payload); Run without installing globally.
});
const result = await session.prompt([ ```bash
{ type: "text", text: "Summarize the repository and suggest next steps." }, npx @sandbox-agent/cli@0.4.x server --no-token --host 0.0.0.0 --port 2468
]); ```
</Tab>
console.log(result.stopReason); <Tab title="bunx">
``` Run without installing globally.
```typescript Codex ```bash
const session = await client.createSession({ bunx @sandbox-agent/cli@0.4.x server --no-token --host 0.0.0.0 --port 2468
agent: "codex", ```
}); </Tab>
session.onEvent((event) => { <Tab title="npm i -g">
console.log(event.sender, event.payload); Install globally, then run.
});
const result = await session.prompt([ ```bash
{ type: "text", text: "Summarize the repository and suggest next steps." }, npm install -g @sandbox-agent/cli@0.4.x
]); sandbox-agent server --no-token --host 0.0.0.0 --port 2468
```
</Tab>
console.log(result.stopReason); <Tab title="bun add -g">
``` Install globally, then run.
```typescript OpenCode ```bash
const session = await client.createSession({ bun add -g @sandbox-agent/cli@0.4.x
agent: "opencode", # Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()).
}); bun pm -g trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
sandbox-agent server --no-token --host 0.0.0.0 --port 2468
```
</Tab>
session.onEvent((event) => { <Tab title="Node.js (local)">
console.log(event.sender, event.payload); For local development, use `SandboxAgent.start()` to spawn and manage the server as a subprocess.
});
const result = await session.prompt([ ```bash
{ type: "text", text: "Summarize the repository and suggest next steps." }, npm install sandbox-agent@0.4.x
]); ```
console.log(result.stopReason); ```typescript
``` import { SandboxAgent } from "sandbox-agent";
```typescript Cursor const sdk = await SandboxAgent.start();
const session = await client.createSession({ ```
agent: "cursor", </Tab>
});
session.onEvent((event) => { <Tab title="Bun (local)">
console.log(event.sender, event.payload); For local development, use `SandboxAgent.start()` to spawn and manage the server as a subprocess.
});
const result = await session.prompt([ ```bash
{ type: "text", text: "Summarize the repository and suggest next steps." }, bun add sandbox-agent@0.4.x
]); # Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()).
bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
```
console.log(result.stopReason); ```typescript
``` import { SandboxAgent } from "sandbox-agent";
```typescript Amp const sdk = await SandboxAgent.start();
const session = await client.createSession({ ```
agent: "amp", </Tab>
});
session.onEvent((event) => { <Tab title="Build from source">
console.log(event.sender, event.payload); If you're running from source instead of the installed CLI.
});
const result = await session.prompt([ ```bash
{ type: "text", text: "Summarize the repository and suggest next steps." }, cargo run -p sandbox-agent -- server --no-token --host 0.0.0.0 --port 2468
]); ```
</Tab>
</Tabs>
console.log(result.stopReason); Binding to `0.0.0.0` allows the server to accept connections from any network interface, which is required when running inside a sandbox where clients connect remotely.
```
```typescript Pi <AccordionGroup>
const session = await client.createSession({ <Accordion title="Configuring token">
agent: "pi", Tokens are usually not required. Most sandbox providers (E2B, Daytona, etc.) already secure networking at the infrastructure layer.
});
session.onEvent((event) => { If you expose the server publicly, use `--token "$SANDBOX_TOKEN"` to require authentication:
console.log(event.sender, event.payload);
});
const result = await session.prompt([ ```bash
{ type: "text", text: "Summarize the repository and suggest next steps." }, sandbox-agent server --token "$SANDBOX_TOKEN" --host 0.0.0.0 --port 2468
]); ```
console.log(result.stopReason); Then pass the token when connecting:
```
</CodeGroup> <Tabs>
<Tab title="TypeScript">
```typescript
import { SandboxAgent } from "sandbox-agent";
See [Agent Sessions](/agent-sessions) for the full sessions API. const sdk = await SandboxAgent.connect({
baseUrl: "http://your-server:2468",
token: process.env.SANDBOX_TOKEN,
});
```
</Tab>
<Tab title="curl">
```bash
curl "http://your-server:2468/v1/health" \
-H "Authorization: Bearer $SANDBOX_TOKEN"
```
</Tab>
<Tab title="CLI">
```bash
sandbox-agent --token "$SANDBOX_TOKEN" api agents list \
--endpoint http://your-server:2468
```
</Tab>
</Tabs>
</Accordion>
<Accordion title="CORS">
If you're calling the server from a browser, see the [CORS configuration guide](/cors).
</Accordion>
</AccordionGroup>
</Step> </Step>
<Step title="Clean up"> <Step title="Install agents (optional)">
```typescript To preinstall agents:
await client.destroySandbox(); // tears down the sandbox and disconnects
```bash
sandbox-agent install-agent --all
``` ```
Use `client.dispose()` instead to disconnect without destroying the sandbox (for reconnecting later). If agents are not installed up front, they are lazily installed when creating a session.
</Step> </Step>
<Step title="Inspect with the UI"> <Step title="Install desktop dependencies (optional, Linux only)">
Open the Inspector at `/ui/` on your server (e.g. `http://localhost:2468/ui/`) to view sessions and events in a GUI. If you want to use `/v1/desktop/*`, install the desktop runtime packages first:
```bash
sandbox-agent install desktop --yes
```
Then use `GET /v1/desktop/status` or `sdk.getDesktopStatus()` to verify the runtime is ready before calling desktop screenshot or input APIs.
</Step>
<Step title="Create a session">
```typescript
import { SandboxAgent } from "sandbox-agent";
const sdk = await SandboxAgent.connect({
baseUrl: "http://127.0.0.1:2468",
});
const session = await sdk.createSession({
agent: "claude",
sessionInit: {
cwd: "/",
mcpServers: [],
},
});
console.log(session.id);
```
</Step>
<Step title="Send a message">
```typescript
const result = await session.prompt([
{ type: "text", text: "Summarize the repository and suggest next steps." },
]);
console.log(result.stopReason);
```
</Step>
<Step title="Read events">
```typescript
const off = session.onEvent((event) => {
console.log(event.sender, event.payload);
});
const page = await sdk.getEvents({
sessionId: session.id,
limit: 50,
});
console.log(page.items.length);
off();
```
</Step>
<Step title="Test with Inspector">
Open the Inspector UI at `/ui/` on your server (for example, `http://localhost:2468/ui/`) to inspect sessions and events in a GUI.
<Frame> <Frame>
<img src="/images/inspector.png" alt="Sandbox Agent Inspector" /> <img src="/images/inspector.png" alt="Sandbox Agent Inspector" />
@ -372,44 +291,16 @@ icon: "rocket"
</Step> </Step>
</Steps> </Steps>
## Full example
```typescript
import { SandboxAgent } from "sandbox-agent";
import { e2b } from "sandbox-agent/e2b";
const client = await SandboxAgent.start({
sandbox: e2b({
create: {
envs: { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY },
},
}),
});
try {
const session = await client.createSession({ agent: "claude" });
session.onEvent((event) => {
console.log(`[${event.sender}]`, JSON.stringify(event.payload));
});
const result = await session.prompt([
{ type: "text", text: "Write a function that checks if a number is prime." },
]);
console.log("Done:", result.stopReason);
} finally {
await client.destroySandbox();
}
```
## Next steps ## Next steps
<CardGroup cols={2}> <CardGroup cols={3}>
<Card title="SDK Overview" icon="compass" href="/sdk-overview"> <Card title="Session Persistence" icon="database" href="/session-persistence">
Full TypeScript SDK API surface. Configure in-memory, Rivet Actor state, IndexedDB, SQLite, and Postgres persistence.
</Card> </Card>
<Card title="Deploy to a Sandbox" icon="box" href="/deploy/local"> <Card title="Deploy to a Sandbox" icon="box" href="/deploy/local">
Deploy to E2B, Daytona, Docker, Vercel, or Cloudflare. Deploy your agent to E2B, Daytona, Docker, Vercel, or Cloudflare.
</Card>
<Card title="SDK Overview" icon="compass" href="/sdk-overview">
Use the latest TypeScript SDK API.
</Card> </Card>
</CardGroup> </CardGroup>

View file

@ -17,7 +17,7 @@ Current exports:
## Install ## Install
```bash ```bash
npm install @sandbox-agent/react@0.3.x npm install @sandbox-agent/react@0.4.x
``` ```
## Full example ## Full example

View file

@ -11,12 +11,12 @@ The TypeScript SDK is centered on `sandbox-agent` and its `SandboxAgent` class.
<Tabs> <Tabs>
<Tab title="npm"> <Tab title="npm">
```bash ```bash
npm install sandbox-agent@0.3.x npm install sandbox-agent@0.4.x
``` ```
</Tab> </Tab>
<Tab title="bun"> <Tab title="bun">
```bash ```bash
bun add sandbox-agent@0.3.x bun add sandbox-agent@0.4.x
# Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()). # Allow Bun to run postinstall scripts for native binaries (required for SandboxAgent.start()).
bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64 bun pm trust @sandbox-agent/cli-linux-x64 @sandbox-agent/cli-linux-arm64 @sandbox-agent/cli-darwin-arm64 @sandbox-agent/cli-darwin-x64 @sandbox-agent/cli-win32-x64
``` ```
@ -26,7 +26,7 @@ The TypeScript SDK is centered on `sandbox-agent` and its `SandboxAgent` class.
## Optional React components ## Optional React components
```bash ```bash
npm install @sandbox-agent/react@0.3.x npm install @sandbox-agent/react@0.4.x
``` ```
## Create a client ## Create a client
@ -87,7 +87,7 @@ const sdk = await SandboxAgent.start({
// sdk.sandboxId — prefixed provider ID (e.g. "local/127.0.0.1:2468") // sdk.sandboxId — prefixed provider ID (e.g. "local/127.0.0.1:2468")
await sdk.destroySandbox(); // tears down sandbox + disposes client await sdk.destroySandbox(); // provider-defined cleanup + disposes client
``` ```
`SandboxAgent.start(...)` requires a `sandbox` provider. Built-in providers: `SandboxAgent.start(...)` requires a `sandbox` provider. Built-in providers:
@ -101,7 +101,7 @@ await sdk.destroySandbox(); // tears down sandbox + disposes client
| `sandbox-agent/vercel` | Vercel Sandbox | | `sandbox-agent/vercel` | Vercel Sandbox |
| `sandbox-agent/cloudflare` | Cloudflare Sandbox | | `sandbox-agent/cloudflare` | Cloudflare Sandbox |
Use `sdk.dispose()` to disconnect without destroying the sandbox, or `sdk.destroySandbox()` to tear down both. Use `sdk.dispose()` to disconnect without changing sandbox state, `sdk.pauseSandbox()` for graceful suspension when supported, or `sdk.killSandbox()` for permanent deletion.
## Session flow ## Session flow
@ -196,6 +196,44 @@ const writeResult = await sdk.writeFsFile({ path: "./hello.txt" }, "hello");
console.log(health.status, agents.agents.length, entries.length, writeResult.path); console.log(health.status, agents.agents.length, entries.length, writeResult.path);
``` ```
## Desktop API
The SDK also wraps the desktop host/runtime HTTP API.
Install desktop dependencies first on Linux hosts:
```bash
sandbox-agent install desktop --yes
```
Then query status, surface remediation if needed, and start the runtime:
```ts
const status = await sdk.getDesktopStatus();
if (status.state === "install_required") {
console.log(status.installCommand);
}
const started = await sdk.startDesktop({
width: 1440,
height: 900,
dpi: 96,
});
const screenshot = await sdk.takeDesktopScreenshot();
const displayInfo = await sdk.getDesktopDisplayInfo();
await sdk.moveDesktopMouse({ x: 400, y: 300 });
await sdk.clickDesktop({ x: 400, y: 300, button: "left", clickCount: 1 });
await sdk.typeDesktopText({ text: "hello world", delayMs: 10 });
await sdk.pressDesktopKey({ key: "ctrl+l" });
await sdk.stopDesktop();
```
Screenshot helpers return `Uint8Array` PNG bytes. The SDK does not attempt to install OS packages remotely; callers should surface `missingDependencies` and `installCommand` from `getDesktopStatus()`.
## Error handling ## Error handling
```ts ```ts

View file

@ -1,388 +0,0 @@
---
title: "Session Transcript Schema"
description: "Universal event schema for session transcripts across all agents."
---
Each coding agent outputs events in its own native format. The sandbox-agent converts these into a universal event schema, giving you a consistent session transcript regardless of which agent you use.
The schema is defined in [OpenAPI format](https://github.com/rivet-dev/sandbox-agent/blob/main/docs/openapi.json). See the [HTTP API Reference](/api-reference) for endpoint documentation.
## Coverage Matrix
This table shows which agent feature coverage appears in the universal event stream. All agents retain their full native feature coverage—this only reflects what's normalized into the schema.
| Feature | Claude | Codex | OpenCode | Amp | Pi (RPC) |
|--------------------|:------:|:-----:|:------------:|:------------:|:------------:|
| Stability | Stable | Stable| Experimental | Experimental | Experimental |
| Text Messages | ✓ | ✓ | ✓ | ✓ | ✓ |
| Tool Calls | ✓ | ✓ | ✓ | ✓ | ✓ |
| Tool Results | ✓ | ✓ | ✓ | ✓ | ✓ |
| Questions (HITL) | ✓ | | ✓ | | |
| Permissions (HITL) | ✓ | ✓ | ✓ | - | |
| Images | - | ✓ | ✓ | - | ✓ |
| File Attachments | - | ✓ | ✓ | - | |
| Session Lifecycle | - | ✓ | ✓ | - | |
| Error Events | - | ✓ | ✓ | ✓ | ✓ |
| Reasoning/Thinking | - | ✓ | - | - | ✓ |
| Command Execution | - | ✓ | - | - | |
| File Changes | - | ✓ | - | - | |
| MCP Tools | ✓ | ✓ | ✓ | ✓ | |
| Streaming Deltas | ✓ | ✓ | ✓ | - | ✓ |
| Variants | | ✓ | ✓ | ✓ | ✓ |
Agents: [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) · [Codex](https://github.com/openai/codex) · [OpenCode](https://github.com/opencode-ai/opencode) · [Amp](https://ampcode.com) · [Pi](https://buildwithpi.ai/pi-cli)
- ✓ = Appears in session events
- \- = Agent supports natively, schema conversion coming soon
- (blank) = Not supported by agent
- Pi runtime model is router-managed per-session RPC (`pi --mode rpc`); it does not use generic subprocess streaming.
<AccordionGroup>
<Accordion title="Text Messages">
Basic message exchange between user and assistant.
</Accordion>
<Accordion title="Tool Calls & Results">
Visibility into tool invocations (file reads, command execution, etc.) and their results. When not natively supported, tool activity is embedded in message content.
</Accordion>
<Accordion title="Questions (HITL)">
Interactive questions the agent asks the user. Emits `question.requested` and `question.resolved` events.
</Accordion>
<Accordion title="Permissions (HITL)">
Permission requests for sensitive operations. Emits `permission.requested` and `permission.resolved` events.
</Accordion>
<Accordion title="Images">
Support for image attachments in messages.
</Accordion>
<Accordion title="File Attachments">
Support for file attachments in messages.
</Accordion>
<Accordion title="Session Lifecycle">
Native `session.started` and `session.ended` events. When not supported, the daemon emits synthetic lifecycle events.
</Accordion>
<Accordion title="Error Events">
Structured error events for runtime failures.
</Accordion>
<Accordion title="Reasoning/Thinking">
Extended thinking or reasoning content with visibility controls.
</Accordion>
<Accordion title="Command Execution">
Detailed command execution events with stdout/stderr.
</Accordion>
<Accordion title="File Changes">
Structured file modification events with diffs.
</Accordion>
<Accordion title="MCP Tools">
Model Context Protocol tool support.
</Accordion>
<Accordion title="Streaming Deltas">
Native streaming of content deltas. When not supported, the daemon emits a single synthetic delta before `item.completed`.
</Accordion>
<Accordion title="Variants">
Model variants such as reasoning effort or depth. Agents may expose different variant sets per model.
</Accordion>
</AccordionGroup>
Want support for another agent? [Open an issue](https://github.com/rivet-dev/sandbox-agent/issues/new) to request it.
## UniversalEvent
Every event from the API is wrapped in a `UniversalEvent` envelope.
| Field | Type | Description |
|-------|------|-------------|
| `event_id` | string | Unique identifier for this event |
| `sequence` | integer | Monotonic sequence number within the session (starts at 1) |
| `time` | string | RFC3339 timestamp |
| `session_id` | string | Daemon-generated session identifier |
| `native_session_id` | string? | Provider-native session/thread identifier (e.g., Codex `threadId`, OpenCode `sessionID`) |
| `source` | string | Event origin: `agent` (native) or `daemon` (synthetic) |
| `synthetic` | boolean | Whether this event was generated by the daemon to fill gaps |
| `type` | string | Event type (see [Event Types](#event-types)) |
| `data` | object | Event-specific payload |
| `raw` | any? | Original provider payload (only when `include_raw=true`) |
```json
{
"event_id": "evt_abc123",
"sequence": 1,
"time": "2025-01-28T12:00:00Z",
"session_id": "my-session",
"native_session_id": "thread_xyz",
"source": "agent",
"synthetic": false,
"type": "item.completed",
"data": { ... }
}
```
## Event Types
### Session Lifecycle
| Type | Description | Data |
|------|-------------|------|
| `session.started` | Session has started | `{ metadata?: any }` |
| `session.ended` | Session has ended | `{ reason, terminated_by, message?, exit_code? }` |
### Turn Lifecycle
| Type | Description | Data |
|------|-------------|------|
| `turn.started` | Turn has started | `{ phase: "started", turn_id?, metadata? }` |
| `turn.ended` | Turn has ended | `{ phase: "ended", turn_id?, metadata? }` |
**SessionEndedData**
| Field | Type | Values |
|-------|------|--------|
| `reason` | string | `completed`, `error`, `terminated` |
| `terminated_by` | string | `agent`, `daemon` |
| `message` | string? | Error message (only present when reason is `error`) |
| `exit_code` | int? | Process exit code (only present when reason is `error`) |
| `stderr` | StderrOutput? | Structured stderr output (only present when reason is `error`) |
**StderrOutput**
| Field | Type | Description |
|-------|------|-------------|
| `head` | string? | First 20 lines of stderr (if truncated) or full stderr (if not truncated) |
| `tail` | string? | Last 50 lines of stderr (only present if truncated) |
| `truncated` | boolean | Whether the output was truncated |
| `total_lines` | int? | Total number of lines in stderr |
### Item Lifecycle
| Type | Description | Data |
|------|-------------|------|
| `item.started` | Item creation | `{ item }` |
| `item.delta` | Streaming content delta | `{ item_id, native_item_id?, delta }` |
| `item.completed` | Item finalized | `{ item }` |
Items follow a consistent lifecycle: `item.started` → `item.delta` (0 or more) → `item.completed`.
### HITL (Human-in-the-Loop)
| Type | Description | Data |
|------|-------------|------|
| `permission.requested` | Permission request pending | `{ permission_id, action, status, metadata? }` |
| `permission.resolved` | Permission decision recorded | `{ permission_id, action, status, metadata? }` |
| `question.requested` | Question pending user input | `{ question_id, prompt, options, status }` |
| `question.resolved` | Question answered or rejected | `{ question_id, prompt, options, status, response? }` |
**PermissionEventData**
| Field | Type | Description |
|-------|------|-------------|
| `permission_id` | string | Identifier for the permission request |
| `action` | string | What the agent wants to do |
| `status` | string | `requested`, `accept`, `accept_for_session`, `reject` |
| `metadata` | any? | Additional context |
**QuestionEventData**
| Field | Type | Description |
|-------|------|-------------|
| `question_id` | string | Identifier for the question |
| `prompt` | string | Question text |
| `options` | string[] | Available answer options |
| `status` | string | `requested`, `answered`, `rejected` |
| `response` | string? | Selected answer (when resolved) |
### Errors
| Type | Description | Data |
|------|-------------|------|
| `error` | Runtime error | `{ message, code?, details? }` |
| `agent.unparsed` | Parse failure | `{ error, location, raw_hash? }` |
The `agent.unparsed` event indicates the daemon failed to parse an agent payload. This should be treated as a bug.
## UniversalItem
Items represent discrete units of content within a session.
| Field | Type | Description |
|-------|------|-------------|
| `item_id` | string | Daemon-generated identifier |
| `native_item_id` | string? | Provider-native item/message identifier |
| `parent_id` | string? | Parent item ID (e.g., tool call/result parented to a message) |
| `kind` | string | Item category (see below) |
| `role` | string? | Actor role for message items |
| `status` | string | Lifecycle status |
| `content` | ContentPart[] | Ordered list of content parts |
### ItemKind
| Value | Description |
|-------|-------------|
| `message` | User or assistant message |
| `tool_call` | Tool invocation |
| `tool_result` | Tool execution result |
| `system` | System message |
| `status` | Status update |
| `unknown` | Unrecognized item type |
### ItemRole
| Value | Description |
|-------|-------------|
| `user` | User message |
| `assistant` | Assistant response |
| `system` | System prompt |
| `tool` | Tool-related message |
### ItemStatus
| Value | Description |
|-------|-------------|
| `in_progress` | Item is streaming or pending |
| `completed` | Item is finalized |
| `failed` | Item execution failed |
## Content Parts
The `content` array contains typed parts that make up an item's payload.
### text
Plain text content.
```json
{ "type": "text", "text": "Hello, world!" }
```
### json
Structured JSON content.
```json
{ "type": "json", "json": { "key": "value" } }
```
### tool_call
Tool invocation.
| Field | Type | Description |
|-------|------|-------------|
| `name` | string | Tool name |
| `arguments` | string | JSON-encoded arguments |
| `call_id` | string | Unique call identifier |
```json
{
"type": "tool_call",
"name": "read_file",
"arguments": "{\"path\": \"/src/main.ts\"}",
"call_id": "call_abc123"
}
```
### tool_result
Tool execution result.
| Field | Type | Description |
|-------|------|-------------|
| `call_id` | string | Matching call identifier |
| `output` | string | Tool output |
```json
{
"type": "tool_result",
"call_id": "call_abc123",
"output": "File contents here..."
}
```
### file_ref
File reference with optional diff.
| Field | Type | Description |
|-------|------|-------------|
| `path` | string | File path |
| `action` | string | `read`, `write`, `patch` |
| `diff` | string? | Unified diff (for patches) |
```json
{
"type": "file_ref",
"path": "/src/main.ts",
"action": "write",
"diff": "@@ -1,3 +1,4 @@\n+import { foo } from 'bar';"
}
```
### image
Image reference.
| Field | Type | Description |
|-------|------|-------------|
| `path` | string | Image file path |
| `mime` | string? | MIME type |
```json
{ "type": "image", "path": "/tmp/screenshot.png", "mime": "image/png" }
```
### reasoning
Model reasoning/thinking content.
| Field | Type | Description |
|-------|------|-------------|
| `text` | string | Reasoning text |
| `visibility` | string | `public` or `private` |
```json
{ "type": "reasoning", "text": "Let me think about this...", "visibility": "public" }
```
### status
Status indicator.
| Field | Type | Description |
|-------|------|-------------|
| `label` | string | Status label |
| `detail` | string? | Additional detail |
```json
{ "type": "status", "label": "Running tests", "detail": "3 of 10 passed" }
```
## Source & Synthetics
### EventSource
The `source` field indicates who emitted the event:
| Value | Description |
|-------|-------------|
| `agent` | Native event from the agent |
| `daemon` | Synthetic event generated by the daemon |
### Synthetic Events
The daemon emits synthetic events (`synthetic: true`, `source: "daemon"`) to provide a consistent event stream across all agents. Common synthetics:
| Synthetic | When |
|-----------|------|
| `session.started` | Agent doesn't emit explicit session start |
| `session.ended` | Agent doesn't emit explicit session end |
| `turn.started` | Agent doesn't emit explicit turn start |
| `turn.ended` | Agent doesn't emit explicit turn end |
| `item.started` | Agent doesn't emit item start events |
| `item.delta` | Agent doesn't stream deltas natively |
| `question.*` | Claude Code plan mode (from ExitPlanMode tool) |
### Raw Payloads
Pass `include_raw=true` to event endpoints to receive the original agent payload in the `raw` field. Useful for debugging or accessing agent-specific data not in the universal schema.
```typescript
const events = await client.getEvents("my-session", { includeRaw: true });
// events[0].raw contains the original agent payload
```

View file

@ -20,7 +20,6 @@ body {
color: var(--sa-text); color: var(--sa-text);
} }
/*
a { a {
color: var(--sa-primary); color: var(--sa-primary);
} }
@ -41,6 +40,13 @@ select {
color: var(--sa-text); color: var(--sa-text);
} }
code,
pre {
background-color: var(--sa-card);
border: 1px solid var(--sa-border);
color: var(--sa-text);
}
.card, .card,
.mintlify-card, .mintlify-card,
.docs-card { .docs-card {
@ -64,4 +70,3 @@ select {
.alert-danger { .alert-danger {
border-color: var(--sa-danger); border-color: var(--sa-danger);
} }
*/

View file

@ -29,25 +29,6 @@ Verify the agent is installed:
ls -la ~/.local/share/sandbox-agent/bin/ ls -la ~/.local/share/sandbox-agent/bin/
``` ```
### 4. Binary libc mismatch (musl vs glibc)
Claude Code binaries are available in both musl and glibc variants. If you see errors like:
```
cannot execute: required file not found
Error loading shared library libstdc++.so.6: No such file or directory
```
This means the wrong binary variant was downloaded.
**For sandbox-agent 0.2.0+**: Platform detection is automatic. The correct binary (musl or glibc) is downloaded based on the runtime environment.
**For sandbox-agent 0.1.x**: Use Alpine Linux which has native musl support:
```dockerfile
FROM alpine:latest
RUN apk add --no-cache curl ca-certificates libstdc++ libgcc bash
```
## Daytona Network Restrictions ## Daytona Network Restrictions

View file

@ -1,5 +1,5 @@
FROM node:22-bookworm-slim FROM node:22-bookworm-slim
RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
RUN sandbox-agent install-agent claude RUN sandbox-agent install-agent claude
RUN sandbox-agent install-agent codex RUN sandbox-agent install-agent codex

View file

@ -1,5 +1,5 @@
FROM node:22-bookworm-slim FROM node:22-bookworm-slim
RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y curl ca-certificates && rm -rf /var/lib/apt/lists/*
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
RUN sandbox-agent install-agent claude RUN sandbox-agent install-agent claude
RUN sandbox-agent install-agent codex RUN sandbox-agent install-agent codex

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -1,7 +1,7 @@
FROM cloudflare/sandbox:0.7.0 FROM cloudflare/sandbox:0.7.0
# Install sandbox-agent # Install sandbox-agent
RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.4.x/install.sh | sh
# Pre-install agents # Pre-install agents
RUN sandbox-agent install-agent claude && \ RUN sandbox-agent install-agent claude && \

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -0,0 +1,33 @@
import { SandboxAgent } from "sandbox-agent";
import { daytona } from "sandbox-agent/daytona";
function collectEnvVars(): Record<string, string> {
const envVars: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envVars.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envVars.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
return envVars;
}
function inspectorUrlToBaseUrl(inspectorUrl: string): string {
return inspectorUrl.replace(/\/ui\/$/, "");
}
export async function setupDaytonaSandboxAgent(): Promise<{
baseUrl: string;
token?: string;
extraHeaders?: Record<string, string>;
cleanup: () => Promise<void>;
}> {
const client = await SandboxAgent.start({
sandbox: daytona({
create: { envVars: collectEnvVars() },
}),
});
return {
baseUrl: inspectorUrlToBaseUrl(client.inspectorUrl),
cleanup: async () => {
await client.killSandbox();
},
};
}

View file

@ -16,7 +16,6 @@ console.log(`UI: ${client.inspectorUrl}`);
const session = await client.createSession({ const session = await client.createSession({
agent: detectAgent(), agent: detectAgent(),
cwd: "/home/daytona",
}); });
session.onEvent((event) => { session.onEvent((event) => {

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

34
examples/e2b/src/e2b.ts Normal file
View file

@ -0,0 +1,34 @@
import { SandboxAgent } from "sandbox-agent";
import { e2b } from "sandbox-agent/e2b";
function collectEnvVars(): Record<string, string> {
const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
return envs;
}
function inspectorUrlToBaseUrl(inspectorUrl: string): string {
return inspectorUrl.replace(/\/ui\/$/, "");
}
export async function setupE2BSandboxAgent(): Promise<{
baseUrl: string;
token?: string;
cleanup: () => Promise<void>;
}> {
const template = process.env.E2B_TEMPLATE;
const client = await SandboxAgent.start({
sandbox: e2b({
template,
create: { envs: collectEnvVars() },
}),
});
return {
baseUrl: inspectorUrlToBaseUrl(client.inspectorUrl),
cleanup: async () => {
await client.killSandbox();
},
};
}

View file

@ -5,15 +5,15 @@ import { detectAgent } from "@sandbox-agent/example-shared";
const envs: Record<string, string> = {}; const envs: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY; if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const template = process.env.E2B_TEMPLATE;
const client = await SandboxAgent.start({ const client = await SandboxAgent.start({
// ✨ NEW ✨ // ✨ NEW ✨
sandbox: e2b({ create: { envs } }), sandbox: e2b({ template, create: { envs } }),
}); });
const session = await client.createSession({ const session = await client.createSession({
agent: detectAgent(), agent: detectAgent(),
cwd: "/home/user",
}); });
session.onEvent((event) => { session.onEvent((event) => {

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -1,7 +1,8 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2022", "target": "ES2022",
"lib": ["ES2022"], "lib": ["ES2022", "DOM"],
"types": ["node"],
"module": "ESNext", "module": "ESNext",
"moduleResolution": "Bundler", "moduleResolution": "Bundler",
"allowImportingTsExtensions": true, "allowImportingTsExtensions": true,

View file

@ -1,13 +1,15 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2022", "target": "ES2022",
"lib": ["ES2022", "DOM"],
"module": "ESNext", "module": "ESNext",
"moduleResolution": "Bundler", "moduleResolution": "Bundler",
"allowImportingTsExtensions": true, "allowImportingTsExtensions": true,
"noEmit": true, "noEmit": true,
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true "skipLibCheck": true,
"types": ["node"]
}, },
"include": ["src"] "include": ["src"]
} }

View file

@ -1,13 +1,15 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2022", "target": "ES2022",
"lib": ["ES2022", "DOM"],
"module": "ESNext", "module": "ESNext",
"moduleResolution": "Bundler", "moduleResolution": "Bundler",
"allowImportingTsExtensions": true, "allowImportingTsExtensions": true,
"noEmit": true, "noEmit": true,
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true "skipLibCheck": true,
"types": ["node"]
}, },
"include": ["src"] "include": ["src"]
} }

View file

@ -1,13 +1,15 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2022", "target": "ES2022",
"lib": ["ES2022", "DOM"],
"module": "ESNext", "module": "ESNext",
"moduleResolution": "Bundler", "moduleResolution": "Bundler",
"allowImportingTsExtensions": true, "allowImportingTsExtensions": true,
"noEmit": true, "noEmit": true,
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true "skipLibCheck": true,
"types": ["node"]
}, },
"include": ["src"] "include": ["src"]
} }

View file

@ -9,7 +9,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url));
const REPO_ROOT = path.resolve(__dirname, "..", "..", ".."); const REPO_ROOT = path.resolve(__dirname, "..", "..", "..");
/** Pre-built Docker image with all agents installed. */ /** Pre-built Docker image with all agents installed. */
export const FULL_IMAGE = "rivetdev/sandbox-agent:0.4.0-rc.3-full"; export const FULL_IMAGE = "rivetdev/sandbox-agent:0.4.2-full";
export interface DockerSandboxOptions { export interface DockerSandboxOptions {
/** Container port used by sandbox-agent inside Docker. */ /** Container port used by sandbox-agent inside Docker. */

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

View file

@ -0,0 +1,20 @@
{
"name": "@sandbox-agent/example-sprites",
"private": true,
"type": "module",
"scripts": {
"start": "tsx src/index.ts",
"typecheck": "tsc --noEmit"
},
"dependencies": {
"@fly/sprites": "latest",
"@sandbox-agent/example-shared": "workspace:*",
"sandbox-agent": "workspace:*"
},
"devDependencies": {
"@types/node": "latest",
"tsx": "latest",
"typescript": "latest",
"vitest": "^3.0.0"
}
}

View file

@ -0,0 +1,21 @@
import { SandboxAgent } from "sandbox-agent";
import { sprites } from "sandbox-agent/sprites";
const env: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) env.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) env.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const client = await SandboxAgent.start({
sandbox: sprites({
token: process.env.SPRITES_API_KEY ?? process.env.SPRITE_TOKEN ?? process.env.SPRITES_TOKEN,
env,
}),
});
console.log(`UI: ${client.inspectorUrl}`);
console.log(await client.getHealth());
process.once("SIGINT", async () => {
await client.destroySandbox();
process.exit(0);
});

View file

@ -0,0 +1,34 @@
import { describe, it, expect } from "vitest";
import { SandboxAgent } from "sandbox-agent";
import { sprites } from "sandbox-agent/sprites";
const shouldRun = Boolean(process.env.SPRITES_API_KEY || process.env.SPRITE_TOKEN || process.env.SPRITES_TOKEN);
const timeoutMs = Number.parseInt(process.env.SANDBOX_TEST_TIMEOUT_MS || "", 10) || 300_000;
const testFn = shouldRun ? it : it.skip;
describe("sprites provider", () => {
testFn(
"starts sandbox-agent and responds to /v1/health",
async () => {
const env: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) env.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) env.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
const sdk = await SandboxAgent.start({
sandbox: sprites({
token: process.env.SPRITES_API_KEY ?? process.env.SPRITE_TOKEN ?? process.env.SPRITES_TOKEN,
env,
}),
});
try {
const health = await sdk.getHealth();
expect(health.status).toBe("ok");
} finally {
await sdk.destroySandbox();
}
},
timeoutMs,
);
});

View file

@ -0,0 +1,17 @@
{
"compilerOptions": {
"target": "ES2022",
"lib": ["ES2022", "DOM"],
"module": "ESNext",
"moduleResolution": "Bundler",
"allowImportingTsExtensions": true,
"noEmit": true,
"esModuleInterop": true,
"strict": true,
"skipLibCheck": true,
"resolveJsonModule": true,
"types": ["node"]
},
"include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"]
}

View file

@ -19,7 +19,6 @@ console.log(`UI: ${client.inspectorUrl}`);
const session = await client.createSession({ const session = await client.createSession({
agent: detectAgent(), agent: detectAgent(),
cwd: "/home/vercel-sandbox",
}); });
session.onEvent((event) => { session.onEvent((event) => {

View file

@ -0,0 +1,35 @@
import { SandboxAgent } from "sandbox-agent";
import { vercel } from "sandbox-agent/vercel";
function collectEnvVars(): Record<string, string> {
const env: Record<string, string> = {};
if (process.env.ANTHROPIC_API_KEY) env.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
if (process.env.OPENAI_API_KEY) env.OPENAI_API_KEY = process.env.OPENAI_API_KEY;
return env;
}
function inspectorUrlToBaseUrl(inspectorUrl: string): string {
return inspectorUrl.replace(/\/ui\/$/, "");
}
export async function setupVercelSandboxAgent(): Promise<{
baseUrl: string;
token?: string;
cleanup: () => Promise<void>;
}> {
const client = await SandboxAgent.start({
sandbox: vercel({
create: {
runtime: "node24",
env: collectEnvVars(),
},
}),
});
return {
baseUrl: inspectorUrlToBaseUrl(client.inspectorUrl),
cleanup: async () => {
await client.killSandbox();
},
};
}

View file

@ -9,7 +9,8 @@
"esModuleInterop": true, "esModuleInterop": true,
"strict": true, "strict": true,
"skipLibCheck": true, "skipLibCheck": true,
"resolveJsonModule": true "resolveJsonModule": true,
"types": ["node"]
}, },
"include": ["src/**/*"], "include": ["src/**/*"],
"exclude": ["node_modules", "**/*.test.ts"] "exclude": ["node_modules", "**/*.test.ts"]

179
foundry/AGENT-HANDOFF.md Normal file
View file

@ -0,0 +1,179 @@
# Foundry Agent Handoff
## Baseline
- Repo: `rivet-dev/sandbox-agent`
- Branch: `columbus-v2`
- Last pushed commit: `3174fe73` (`feat(foundry): checkpoint actor and workspace refactor`)
- Progress/spec tracker: [FOUNDRY-CHANGES.md](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/FOUNDRY-CHANGES.md)
## What is already landed
These spec slices are already implemented and pushed:
- Item `1`: backend actor rename `auth-user` -> `user`
- Item `2`: Better Auth mapping comments
- Item `5`: task raw SQL cleanup into migrations
- Item `6`: `history` -> `audit-log`
- Item `7`: default model moved to user-scoped app state
- Item `20`: admin action prefixing
- Item `23`: dead `getTaskEnriched` / `enrichTaskRecord` removal
- Item `25`: `Workbench` -> `Workspace` rename across backend/shared/client/frontend
- Item `26`: branch rename deleted
- Organization realtime was already collapsed to full-snapshot `organizationUpdated`
- Task realtime was already aligned to `taskUpdated`
## Known blocker
Spec item `3` is only partially done. The singleton constraint for the Better Auth `user` table is still blocked.
- File: [foundry/packages/backend/src/actors/user/db/schema.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/user/db/schema.ts)
- Reason: Better Auth still depends on external string `user.id`, so a literal singleton `CHECK (id = 1)` on that table is not a safe mechanical change.
## Important current state
There are uncommitted edits on top of the pushed checkpoint. Another agent should start from the current worktree, not just `origin/columbus-v2`.
Current dirty files:
- [foundry/packages/backend/src/actors/github-data/index.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/github-data/index.ts)
- [foundry/packages/backend/src/actors/organization/actions.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/organization/actions.ts)
- [foundry/packages/backend/src/actors/repository/actions.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/repository/actions.ts)
- [foundry/packages/backend/src/actors/task/workspace.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/task/workspace.ts)
- [foundry/packages/client/src/mock/backend-client.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/client/src/mock/backend-client.ts)
These files are the current hot path for the unfinished structural work.
## What is partially in place but not finished
### User-owned task UI state
The user actor already has the schema and CRUD surface for per-user task/session UI state:
- [foundry/packages/backend/src/actors/user/db/schema.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/user/db/schema.ts)
`user_task_state`
- [foundry/packages/backend/src/actors/user/index.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/user/index.ts)
`getTaskState`, `upsertTaskState`, `deleteTaskState`
But the task actor and UI are still reading/writing the old task-global fields:
- [foundry/packages/backend/src/actors/task/db/schema.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/task/db/schema.ts)
still contains `task_runtime.active_session_id` and session `unread` / `draft_*`
- [foundry/packages/backend/src/actors/task/workspace.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/task/workspace.ts)
still derives unread/draft/active-session from task-local rows
- [foundry/packages/frontend/src/components/mock-layout.tsx](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/frontend/src/components/mock-layout.tsx)
still treats `activeSessionId` as frontend-local and uses task-level unread/draft state
So items `21`, `22`, `24`, and part of `19` are only half-done.
### Coordinator ownership
The current architecture still violates the intended coordinator pattern:
- Organization still owns `taskLookup` and `taskSummaries`
- [foundry/packages/backend/src/actors/organization/db/schema.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/organization/db/schema.ts)
- Organization still resolves `taskId -> repoId`
- [foundry/packages/backend/src/actors/organization/actions.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/organization/actions.ts)
- Task still pushes summary updates to organization instead of repository
- [foundry/packages/backend/src/actors/task/workspace.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/task/workspace.ts)
- Repository still does not own a `tasks` projection table yet
- [foundry/packages/backend/src/actors/repository/db/schema.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/repository/db/schema.ts)
So items `9`, `13`, and `15` are still open.
### Queue-only mutations
Task actor workspace commands already go through queue sends. Other actors still do not fully follow the queue-only mutation rule:
- [foundry/packages/backend/src/actors/user/index.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/user/index.ts)
- [foundry/packages/backend/src/actors/github-data/index.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/github-data/index.ts)
- [foundry/packages/backend/src/actors/organization/actions.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/organization/actions.ts)
- [foundry/packages/backend/src/actors/organization/app-shell.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/organization/app-shell.ts)
So items `4`, `10`, and `11` are still open.
### Dynamic model/agent data
The frontend/client still hardcode model groups:
- [foundry/packages/frontend/src/components/mock-layout/view-model.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/frontend/src/components/mock-layout/view-model.ts)
- [foundry/packages/client/src/workspace-model.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/client/src/workspace-model.ts)
- [foundry/packages/shared/src/workspace.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/shared/src/workspace.ts)
`WorkspaceModelId` is still a hardcoded union
The repo already has the API source of truth available through the TypeScript SDK:
- [sdks/typescript/src/client.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/sdks/typescript/src/client.ts)
`SandboxAgent.listAgents({ config: true })`
- [server/packages/sandbox-agent/src/router.rs](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/server/packages/sandbox-agent/src/router.rs)
`/v1/agents`
- [server/packages/sandbox-agent/src/router/support.rs](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/server/packages/sandbox-agent/src/router/support.rs)
`fallback_config_options`
So item `8` is still open.
### GitHub sync chunking/progress
GitHub data sync is still a delete-and-replace flow:
- [foundry/packages/backend/src/actors/github-data/index.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/github-data/index.ts)
`replaceRepositories`, `replaceBranches`, `replaceMembers`, `replacePullRequests`, and full-sync flow
- [foundry/packages/backend/src/actors/github-data/db/schema.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/github-data/db/schema.ts)
no generation/progress columns yet
- [foundry/packages/shared/src/app-shell.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/shared/src/app-shell.ts)
no structured sync progress field yet
So item `16` is still open.
## Recommended next order
If another agent picks this up, this is the safest order:
1. Finish items `21`, `22`, `24`, `19` together.
Reason: user-owned task UI state is already half-wired, and task schema cleanup depends on the same files.
2. Finish items `9`, `13`, `15` together.
Reason: coordinator ownership, repo-owned task projections, and PR/task unification are the same refactor seam.
3. Finish item `16`.
Reason: GitHub sync chunking is mostly isolated to `github-data` plus app-shell/shared snapshot wiring.
4. Finish item `8`.
Reason: dynamic model/agent data is largely independent once user default model is already user-scoped.
5. Finish items `4`, `10`, `11`, `12`, `18`, final event audit.
6. Do item `17` last.
## Concrete file hotspots for the next agent
Backend:
- [foundry/packages/backend/src/actors/task/workspace.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/task/workspace.ts)
- [foundry/packages/backend/src/actors/task/db/schema.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/task/db/schema.ts)
- [foundry/packages/backend/src/actors/task/workflow/common.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/task/workflow/common.ts)
- [foundry/packages/backend/src/actors/task/workflow/commands.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/task/workflow/commands.ts)
- [foundry/packages/backend/src/actors/task/workflow/init.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/task/workflow/init.ts)
- [foundry/packages/backend/src/actors/repository/actions.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/repository/actions.ts)
- [foundry/packages/backend/src/actors/repository/db/schema.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/repository/db/schema.ts)
- [foundry/packages/backend/src/actors/organization/actions.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/organization/actions.ts)
- [foundry/packages/backend/src/actors/github-data/index.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/github-data/index.ts)
- [foundry/packages/backend/src/actors/user/index.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/backend/src/actors/user/index.ts)
Shared/client/frontend:
- [foundry/packages/shared/src/workspace.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/shared/src/workspace.ts)
- [foundry/packages/shared/src/contracts.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/shared/src/contracts.ts)
- [foundry/packages/shared/src/app-shell.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/shared/src/app-shell.ts)
- [foundry/packages/client/src/backend-client.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/client/src/backend-client.ts)
- [foundry/packages/client/src/workspace-model.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/client/src/workspace-model.ts)
- [foundry/packages/frontend/src/components/mock-layout.tsx](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/frontend/src/components/mock-layout.tsx)
- [foundry/packages/frontend/src/components/mock-layout/view-model.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/frontend/src/components/mock-layout/model-picker.tsx)
- [foundry/packages/frontend/src/features/tasks/status.ts](/Users/nathan/conductor/workspaces/sandbox-agent/columbus-v1/foundry/packages/frontend/src/features/tasks/status.ts)
## Notes that matter
- The pushed checkpoint is useful, but it is not the full current state. There are uncommitted edits in the hot-path backend files listed above.
- The current tree already contains a partially added `user_task_state` path. Do not duplicate that work; finish the migration by removing the old task-owned fields and rewiring readers/writers.
- The current task actor still reads mutable fields from `c.state` such as `repoRemote`, `branchName`, `title`, `task`, `sandboxProviderId`, and `agentType`. That is part of item `19`.
- The current frontend still synthesizes PR-only rows into fake tasks. That should go away as part of repo-owned task projection / PR unification.

View file

@ -56,6 +56,41 @@ Use `pnpm` workspaces and Turborepo.
- mock frontend changes: `just foundry-mock` or restart with `just foundry-mock-down && just foundry-mock` - mock frontend changes: `just foundry-mock` or restart with `just foundry-mock-down && just foundry-mock`
- local frontend-only work outside Docker: restart `pnpm --filter @sandbox-agent/foundry-frontend dev` or `just foundry-dev-mock` as appropriate - local frontend-only work outside Docker: restart `pnpm --filter @sandbox-agent/foundry-frontend dev` or `just foundry-dev-mock` as appropriate
- The backend does **not** hot reload. Bun's `--hot` flag causes the server to re-bind on a different port (e.g. 6421 instead of 6420), breaking all client connections while the container still exposes the original port. After backend code changes, restart the backend container: `just foundry-dev-down && just foundry-dev`. - The backend does **not** hot reload. Bun's `--hot` flag causes the server to re-bind on a different port (e.g. 6421 instead of 6420), breaking all client connections while the container still exposes the original port. After backend code changes, restart the backend container: `just foundry-dev-down && just foundry-dev`.
- The dev server has debug logging enabled by default (`RIVET_LOG_LEVEL=debug`, `FOUNDRY_LOG_LEVEL=debug`) via `compose.dev.yaml`. Error stacks and timestamps are also enabled.
- The frontend client uses JSON encoding for RivetKit in development (`import.meta.env.DEV`) for easier debugging. Production uses the default encoding.
## Foundry Base Sandbox Image
Local Docker sandboxes use the `rivetdev/sandbox-agent:foundry-base-latest` image by default. This image extends the sandbox-agent runtime with sudo, git, neovim, gh, node, bun, chromium, and agent-browser.
- **Dockerfile:** `docker/foundry-base.Dockerfile` (builds sandbox-agent from source, x86_64 only)
- **Publish script:** `scripts/publish-foundry-base.sh` (builds and pushes to Docker Hub `rivetdev/sandbox-agent`)
- **Tags:** `foundry-base-<YYYYMMDD>T<HHMMSS>Z` (timestamped) + `foundry-base-latest` (rolling)
- **Build from repo root:** `./foundry/scripts/publish-foundry-base.sh` (or `--dry-run` to skip push)
- **Override image in dev:** set `HF_LOCAL_SANDBOX_IMAGE` in `foundry/.env` or environment. The env var is passed through `compose.dev.yaml` to the backend.
- **Resolution order:** `config.sandboxProviders.local.image` (config.toml) > `HF_LOCAL_SANDBOX_IMAGE` (env var) > `DEFAULT_LOCAL_SANDBOX_IMAGE` constant in `packages/backend/src/actors/sandbox/index.ts`.
- The image must be built with `--platform linux/amd64`. The Rust build is memory-intensive; Docker Desktop needs at least 8GB RAM allocated.
- When updating the base image contents (new system packages, agent versions), rebuild and push with the publish script, then update the `foundry-base-latest` tag.
## Production GitHub App + OAuth App
Foundry uses two separate GitHub entities in production:
- **OAuth App** (`GITHUB_CLIENT_ID` / `GITHUB_CLIENT_SECRET`) — handles "Sign in with GitHub" via Better Auth. This is a standard OAuth App.
- **GitHub App** (`GITHUB_APP_ID` / `GITHUB_APP_CLIENT_ID` / `GITHUB_APP_CLIENT_SECRET` / `GITHUB_APP_PRIVATE_KEY`) — handles webhooks, installation tokens for repo access, and GitHub API sync (repos, PRs). Must be manually installed on each org.
Key env vars and where they connect:
- `GITHUB_REDIRECT_URI` — OAuth callback, must point to `https://api.sandboxagent.dev/v1/auth/callback/github`
- `GITHUB_WEBHOOK_SECRET` — must match the secret configured on the GitHub App's Webhook settings page exactly. Mismatches cause silent 500s on webhook delivery (signature verification fails inside the actor, surfaced as a generic RivetKit `internal_error`).
- `BETTER_AUTH_URL` — must be the **API** URL (`https://api.sandboxagent.dev`), not the frontend URL. Better Auth uses this internally for sign-out and session management calls.
- `APP_URL` — the **frontend** URL (`https://foundry.sandboxagent.dev`).
Troubleshooting:
- **"GitHub App not installed"** — The GitHub App must be manually installed on each org. Sign-in does not auto-install it. Go to the GitHub App settings → Install App tab. The sign-in flow can only detect existing installations, not create them.
- **Webhooks not arriving** — Check the GitHub App → Advanced tab for delivery history. If deliveries show 500, the webhook secret likely doesn't match `GITHUB_WEBHOOK_SECRET`. Test with: `echo -n '{"test":true}' | openssl dgst -sha256 -hmac "$SECRET"` and curl the endpoint with the computed signature.
- **Deleting all actors wipes GitHub App installation state.** After a full actor reset, you must trigger a webhook (e.g. redeliver from GitHub App Advanced tab, or re-install the app) to repopulate installation records.
## Railway Logs ## Railway Logs
@ -73,13 +108,14 @@ Use `pnpm` workspaces and Turborepo.
- All backend interaction (actor calls, metadata/health checks, backend HTTP endpoint access) must go through the dedicated client library in `packages/client`. - All backend interaction (actor calls, metadata/health checks, backend HTTP endpoint access) must go through the dedicated client library in `packages/client`.
- Outside `packages/client`, do not call backend endpoints directly (for example `fetch(.../v1/rivet...)`), except in black-box E2E tests that intentionally exercise raw transport behavior. - Outside `packages/client`, do not call backend endpoints directly (for example `fetch(.../v1/rivet...)`), except in black-box E2E tests that intentionally exercise raw transport behavior.
- GUI state should update in realtime (no manual refresh buttons). Prefer RivetKit push reactivity and actor-driven events; do not add polling/refetch for normal product flows. - GUI state should update in realtime (no manual refresh buttons). Prefer RivetKit push reactivity and actor-driven events; do not add polling/refetch for normal product flows.
- Keep the mock workbench types and mock client in `packages/shared` + `packages/client` up to date with the frontend contract. The mock is the UI testing reference implementation while backend functionality catches up. - Keep the mock workspace types and mock client in `packages/shared` + `packages/client` up to date with the frontend contract. The mock is the UI testing reference implementation while backend functionality catches up.
- Keep frontend route/state coverage current in code and tests; there is no separate page-inventory doc to maintain. - Keep frontend route/state coverage current in code and tests; there is no separate page-inventory doc to maintain.
- If Foundry uses a shared component from `@sandbox-agent/react`, make changes in `sdks/react` instead of copying or forking that component into Foundry. - If Foundry uses a shared component from `@sandbox-agent/react`, make changes in `sdks/react` instead of copying or forking that component into Foundry.
- When changing shared React components in `sdks/react` for Foundry, verify they still work in the Sandbox Agent Inspector before finishing. - When changing shared React components in `sdks/react` for Foundry, verify they still work in the Sandbox Agent Inspector before finishing.
- When making UI changes, verify the live flow with `agent-browser`, take screenshots of the updated UI, and offer to open those screenshots in Preview when you finish. - When making UI changes, verify the live flow with the Chrome DevTools MCP or `agent-browser`, take screenshots of the updated UI, and offer to open those screenshots in Preview when you finish.
- When asked for screenshots, capture all relevant affected screens and modal states, not just a single viewport. Include empty, populated, success, and blocked/error states when they are part of the changed flow. - When asked for screenshots, capture all relevant affected screens and modal states, not just a single viewport. Include empty, populated, success, and blocked/error states when they are part of the changed flow.
- If a screenshot catches a transition frame, blank modal, or otherwise misleading state, retake it before reporting it. - If a screenshot catches a transition frame, blank modal, or otherwise misleading state, retake it before reporting it.
- When verifying UI in the browser, attempt to sign in by navigating to `/signin` and clicking "Continue with GitHub". If the browser lands on the GitHub login page (github.com/login) and you don't have credentials, stop and ask the user to complete the sign-in. Do not assume the session is invalid just because you see the Foundry sign-in page — always attempt the OAuth flow first.
## Realtime Data Architecture ## Realtime Data Architecture
@ -99,7 +135,7 @@ Do not use polling (`refetchInterval`), empty "go re-fetch" broadcast events, or
- **Organization actor** materializes sidebar-level data in its own SQLite: repo catalog, task summaries (title, status, branch, PR, updatedAt), repo summaries (overview/branch state), and session summaries (id, name, status, unread, model — no transcript). Task actors push summary changes to the organization actor when they mutate. The organization actor broadcasts the updated entity to connected clients. `getOrganizationSummary` reads from local tables only — no fan-out to child actors. - **Organization actor** materializes sidebar-level data in its own SQLite: repo catalog, task summaries (title, status, branch, PR, updatedAt), repo summaries (overview/branch state), and session summaries (id, name, status, unread, model — no transcript). Task actors push summary changes to the organization actor when they mutate. The organization actor broadcasts the updated entity to connected clients. `getOrganizationSummary` reads from local tables only — no fan-out to child actors.
- **Task actor** materializes its own detail state (session summaries, sandbox info, diffs, file tree). `getTaskDetail` reads from the task actor's own SQLite. The task actor broadcasts updates directly to clients connected to it. - **Task actor** materializes its own detail state (session summaries, sandbox info, diffs, file tree). `getTaskDetail` reads from the task actor's own SQLite. The task actor broadcasts updates directly to clients connected to it.
- **Session data** lives on the task actor but is a separate subscription topic. The task topic includes `sessions_summary` (list without content). The `session` topic provides full transcript and draft state. Clients subscribe to the `session` topic for whichever session is active, and filter `sessionUpdated` events by session ID (ignoring events for other sessions on the same actor). - **Session data** lives on the task actor but is a separate subscription topic. The task topic includes `sessions_summary` (list without content). The `session` topic provides full transcript and draft state. Clients subscribe to the `session` topic for whichever session is active, and filter `sessionUpdated` events by session ID (ignoring events for other sessions on the same actor).
- The expensive fan-out (querying every repository/task actor) only exists as a background reconciliation/rebuild path, never on the hot read path. - There is no fan-out on the read path. The organization actor owns all task summaries locally.
### Subscription manager ### Subscription manager
@ -133,6 +169,14 @@ The client subscribes to `app` always, `organization` when entering an organizat
- Backend mutations that affect sidebar data (task title, status, branch, PR state) must push the updated summary to the parent organization actor, which broadcasts to organization subscribers. - Backend mutations that affect sidebar data (task title, status, branch, PR state) must push the updated summary to the parent organization actor, which broadcasts to organization subscribers.
- Comment architecture-related code: add doc comments explaining the materialized state pattern, why deltas flow the way they do, and the relationship between parent/child actor broadcasts. New contributors should understand the data flow from comments alone. - Comment architecture-related code: add doc comments explaining the materialized state pattern, why deltas flow the way they do, and the relationship between parent/child actor broadcasts. New contributors should understand the data flow from comments alone.
## Sandbox Architecture
- Structurally, the system supports multiple sandboxes per task, but in practice there is exactly one active sandbox per task. Design features assuming one sandbox per task. If multi-sandbox is needed in the future, extend at that time.
- Each task has a **primary user** (owner) whose GitHub OAuth credentials are injected into the sandbox for git operations. The owner swaps when a different user sends a message. See `.context/proposal-task-owner-git-auth.md` for the full design.
- **Security: OAuth token scope.** The user's GitHub OAuth token has `repo` scope, granting full control of all private repositories the user has access to. When the user is the active task owner, their token is injected into the sandbox. This means the agent can read/write ANY repo the user has access to, not just the task's target repo. This is the standard trade-off for OAuth-based git integrations (same as GitHub Codespaces, Gitpod). The user consents to `repo` scope at sign-in time. Credential files in the sandbox are `chmod 600` and overwritten on owner swap.
- All git operations in the sandbox must be auto-authenticated. Never configure git to prompt for credentials (no interactive `GIT_ASKPASS` prompts). Use a credential store file that is pre-populated with the active owner's token.
- All git operation errors (push 401, clone failure, branch protection rejection) must surface in the UI with actionable context. Never silently swallow git errors.
## Git State Policy ## Git State Policy
- The backend stores zero git state. No local clones, no refs, no working trees, and no git-spice. - The backend stores zero git state. No local clones, no refs, no working trees, and no git-spice.
@ -141,6 +185,15 @@ The client subscribes to `app` always, `organization` when entering an organizat
- Do not add backend git clone paths, `git fetch`, `git for-each-ref`, or direct backend git CLI calls. If you need git data, either read stored GitHub metadata or run the command inside a sandbox. - Do not add backend git clone paths, `git fetch`, `git for-each-ref`, or direct backend git CLI calls. If you need git data, either read stored GitHub metadata or run the command inside a sandbox.
- The `BackendDriver` has no `GitDriver` or `StackDriver`. Only `GithubDriver` and `TmuxDriver` remain. - The `BackendDriver` has no `GitDriver` or `StackDriver`. Only `GithubDriver` and `TmuxDriver` remain.
## React Hook Dependency Safety
- **Never use unstable references as `useEffect`/`useMemo`/`useCallback` dependencies.** React compares dependencies by reference, not value. Expressions like `?? []`, `?? {}`, `.map(...)`, `.filter(...)`, or object/array literals create new references every render, causing infinite re-render loops when used as dependencies.
- If the upstream value may be `undefined`/`null` and you need a fallback, either:
- Use the raw upstream value as the dependency and apply the fallback inside the effect body: `useEffect(() => { doThing(value ?? []); }, [value]);`
- Derive a stable primitive key: `const key = JSON.stringify(value ?? []);` then depend on `key`
- Memoize: `const stable = useMemo(() => value ?? [], [value]);`
- When reviewing code, treat any `?? []`, `?? {}`, or inline `.map()/.filter()` in a dependency array as a bug.
## UI System ## UI System
- Foundry's base UI system is `BaseUI` with `Styletron`, plus Foundry-specific theme/tokens on top. Treat that as the default UI foundation. - Foundry's base UI system is `BaseUI` with `Styletron`, plus Foundry-specific theme/tokens on top. Treat that as the default UI foundation.
@ -165,6 +218,7 @@ The client subscribes to `app` always, `organization` when entering an organizat
- If the system reaches an unexpected state, raise an explicit error with actionable context. - If the system reaches an unexpected state, raise an explicit error with actionable context.
- Do not fail silently, swallow errors, or auto-ignore inconsistent data. - Do not fail silently, swallow errors, or auto-ignore inconsistent data.
- Prefer fail-fast behavior over hidden degradation when correctness is uncertain. - Prefer fail-fast behavior over hidden degradation when correctness is uncertain.
- **Never use bare `catch {}` or `catch { }` blocks.** Every catch must at minimum log the error with `logActorWarning` or `console.warn`. Silent catches hide bugs and make debugging impossible. If a catch is intentionally degrading (e.g. returning empty data when a sandbox is expired), it must still log so operators can see what happened. Use `catch (error) { logActorWarning(..., { error: resolveErrorMessage(error) }); }` or equivalent.
## RivetKit Dependency Policy ## RivetKit Dependency Policy
@ -178,16 +232,6 @@ For all Rivet/RivetKit implementation:
- Example: the `task` actor instance already represents `(organizationId, repoId, taskId)`, so its SQLite tables should not need those columns for primary keys. - Example: the `task` actor instance already represents `(organizationId, repoId, taskId)`, so its SQLite tables should not need those columns for primary keys.
3. Do not use backend-global SQLite singletons; database access must go through actor `db` providers (`c.db`). 3. Do not use backend-global SQLite singletons; database access must go through actor `db` providers (`c.db`).
4. The default dependency source for RivetKit is the published `rivetkit` package so monorepo installs and CI remain self-contained. 4. The default dependency source for RivetKit is the published `rivetkit` package so monorepo installs and CI remain self-contained.
5. When working on coordinated RivetKit changes, you may temporarily relink to a local checkout instead of the published package.
- Dedicated local checkout for this repo: `/Users/nathan/conductor/workspaces/task/rivet-checkout`
- Preferred local link target: `../rivet-checkout/rivetkit-typescript/packages/rivetkit`
- Sub-packages (`@rivetkit/sqlite-vfs`, etc.) resolve transitively from the RivetKit monorepo when using the local checkout.
6. Before using a local checkout, build RivetKit in the rivet repo:
```bash
cd ../rivet-checkout/rivetkit-typescript
pnpm install
pnpm build -F rivetkit
```
## Rivet Routing ## Rivet Routing
@ -205,8 +249,9 @@ For all Rivet/RivetKit implementation:
- Do not add custom backend REST endpoints (no `/v1/*` shim layer). - Do not add custom backend REST endpoints (no `/v1/*` shim layer).
- We own the sandbox-agent project; treat sandbox-agent defects as first-party bugs and fix them instead of working around them. - We own the sandbox-agent project; treat sandbox-agent defects as first-party bugs and fix them instead of working around them.
- Keep strict single-writer ownership: each table/row has exactly one actor writer. - Keep strict single-writer ownership: each table/row has exactly one actor writer.
- Parent actors (`organization`, `repository`, `task`, `history`, `sandbox-instance`) use command-only loops with no timeout. - Parent actors (`organization`, `task`, `sandbox-instance`) use command-only loops with no timeout.
- Periodic syncing lives in dedicated child actors with one timeout cadence each. - Periodic syncing lives in dedicated child actors with one timeout cadence each.
- **Task actors must be created lazily** — never during sync or bulk operations. PR sync writes virtual entries to the org's local `taskIndex`/`taskSummaries` tables. The task actor is created on first user interaction via `getOrCreate`. See `packages/backend/CLAUDE.md` "Lazy Task Actor Creation" for details.
- Do not build blocking flows that wait on external systems to become ready or complete. Prefer push-based progression driven by actor messages, events, webhooks, or queue/workflow state changes. - Do not build blocking flows that wait on external systems to become ready or complete. Prefer push-based progression driven by actor messages, events, webhooks, or queue/workflow state changes.
- Use workflows/background commands for any repo sync, sandbox provisioning, agent install, branch restack/rebase, or other multi-step external work. Do not keep user-facing actions/requests open while that work runs. - Use workflows/background commands for any repo sync, sandbox provisioning, agent install, branch restack/rebase, or other multi-step external work. Do not keep user-facing actions/requests open while that work runs.
- `send` policy: always `await` the `send(...)` call itself so enqueue failures surface immediately, but default to `wait: false`. - `send` policy: always `await` the `send(...)` call itself so enqueue failures surface immediately, but default to `wait: false`.
@ -227,8 +272,8 @@ Action handlers must return fast. The pattern:
Examples: Examples:
- `createTask``wait: true` (returns `{ taskId }`), then enqueue provisioning with `wait: false`. Client sees task appear immediately with pending status, observes `ready` via organization events. - `createTask``wait: true` (returns `{ taskId }`), then enqueue provisioning with `wait: false`. Client sees task appear immediately with pending status, observes `ready` via organization events.
- `sendWorkbenchMessage` → validate session is `ready` (throw if not), enqueue with `wait: false`. Client observes session transition to `running``idle` via session events. - `sendWorkspaceMessage` → validate session is `ready` (throw if not), enqueue with `wait: false`. Client observes session transition to `running``idle` via session events.
- `createWorkbenchSession` → `wait: true` (returns `{ tabId }`), enqueue sandbox provisioning with `wait: false`. Client observes `pending_provision``ready` via task events. - `createWorkspaceSession` → `wait: true` (returns `{ sessionId }`), enqueue sandbox provisioning with `wait: false`. Client observes `pending_provision``ready` via task events.
Never use `wait: true` for operations that depend on external readiness, sandbox I/O, agent responses, git network operations, polling loops, or long-running queue drains. Never hold an action open while waiting for an external system to become ready — that is a polling/retry loop in disguise. Never use `wait: true` for operations that depend on external readiness, sandbox I/O, agent responses, git network operations, polling loops, or long-running queue drains. Never hold an action open while waiting for an external system to become ready — that is a polling/retry loop in disguise.
@ -240,11 +285,11 @@ All `wait: true` sends must have an explicit `timeout`. Maximum timeout for any
### Task creation: resolve metadata before creating the actor ### Task creation: resolve metadata before creating the actor
When creating a task, all deterministic metadata (title, branch name) must be resolved synchronously in the parent actor (repository) *before* the task actor is created. The task actor must never be created with null `branchName` or `title`. When creating a task, all deterministic metadata (title, branch name) must be resolved synchronously in the organization actor *before* the task actor is created. The task actor must never be created with null `branchName` or `title`.
- Title is derived from the task description via `deriveFallbackTitle()` — pure string manipulation, no external I/O. - Title is derived from the task description via `deriveFallbackTitle()` — pure string manipulation, no external I/O.
- Branch name is derived from the title via `sanitizeBranchName()` + conflict checking against the repository's task index. - Branch name is derived from the title via `sanitizeBranchName()` + conflict checking against the repository's task index.
- The repository actor already has the task index and GitHub-backed default branch metadata. Resolve the branch name there without local git fetches. - The organization actor owns the task index and reads GitHub-backed default branch metadata from the github-data actor. Resolve the branch name there without local git fetches.
- Do not defer naming to a background provision workflow. Do not poll for names to become available. - Do not defer naming to a background provision workflow. Do not poll for names to become available.
- The `onBranch` path (attaching to an existing branch) and the new-task path should both produce a fully-named task record on return. - The `onBranch` path (attaching to an existing branch) and the new-task path should both produce a fully-named task record on return.
- Actor handle policy: - Actor handle policy:
@ -320,9 +365,9 @@ Each entry must include:
- Friction/issue - Friction/issue
- Attempted fix/workaround and outcome - Attempted fix/workaround and outcome
## History Events ## Audit Log Events
Log notable workflow changes to `events` so `hf history` remains complete: Log notable workflow changes to `events` so the audit log remains complete:
- create - create
- attach - attach
@ -331,6 +376,8 @@ Log notable workflow changes to `events` so `hf history` remains complete:
- status transitions - status transitions
- PR state transitions - PR state transitions
When adding new task/workspace commands, always add a corresponding audit log event.
## Validation After Changes ## Validation After Changes
Always run and fix failures: Always run and fix failures:

1456
foundry/FOUNDRY-CHANGES.md Normal file

File diff suppressed because it is too large Load diff

View file

@ -44,6 +44,7 @@ services:
STRIPE_WEBHOOK_SECRET: "${STRIPE_WEBHOOK_SECRET:-}" STRIPE_WEBHOOK_SECRET: "${STRIPE_WEBHOOK_SECRET:-}"
STRIPE_PRICE_TEAM: "${STRIPE_PRICE_TEAM:-}" STRIPE_PRICE_TEAM: "${STRIPE_PRICE_TEAM:-}"
FOUNDRY_SANDBOX_PROVIDER: "${FOUNDRY_SANDBOX_PROVIDER:-local}" FOUNDRY_SANDBOX_PROVIDER: "${FOUNDRY_SANDBOX_PROVIDER:-local}"
HF_LOCAL_SANDBOX_IMAGE: "${HF_LOCAL_SANDBOX_IMAGE:-rivetdev/sandbox-agent:foundry-base-latest}"
E2B_API_KEY: "${E2B_API_KEY:-}" E2B_API_KEY: "${E2B_API_KEY:-}"
E2B_TEMPLATE: "${E2B_TEMPLATE:-}" E2B_TEMPLATE: "${E2B_TEMPLATE:-}"
HF_E2B_TEMPLATE: "${HF_E2B_TEMPLATE:-${E2B_TEMPLATE:-}}" HF_E2B_TEMPLATE: "${HF_E2B_TEMPLATE:-${E2B_TEMPLATE:-}}"
@ -56,8 +57,6 @@ services:
- "7741:7741" - "7741:7741"
volumes: volumes:
- "..:/app" - "..:/app"
# The linked RivetKit checkout resolves from Foundry packages to /task/rivet-checkout in-container.
- "../../../task/rivet-checkout:/task/rivet-checkout:ro"
# Reuse the host Codex auth profile for local sandbox-agent Codex sessions in dev. # Reuse the host Codex auth profile for local sandbox-agent Codex sessions in dev.
- "${HOME}/.codex:/root/.codex" - "${HOME}/.codex:/root/.codex"
- "/var/run/docker.sock:/var/run/docker.sock" - "/var/run/docker.sock:/var/run/docker.sock"
@ -86,7 +85,6 @@ services:
- "..:/app" - "..:/app"
# Ensure logs in .foundry/ persist on the host even if we change source mounts later. # Ensure logs in .foundry/ persist on the host even if we change source mounts later.
- "./.foundry:/app/foundry/.foundry" - "./.foundry:/app/foundry/.foundry"
- "../../../task/rivet-checkout:/task/rivet-checkout:ro"
# Use Linux-native repo dependencies inside the container instead of host node_modules. # Use Linux-native repo dependencies inside the container instead of host node_modules.
- "foundry_node_modules:/app/node_modules" - "foundry_node_modules:/app/node_modules"
- "foundry_client_node_modules:/app/foundry/packages/client/node_modules" - "foundry_client_node_modules:/app/foundry/packages/client/node_modules"

View file

@ -15,7 +15,6 @@ services:
volumes: volumes:
- "..:/app" - "..:/app"
- "./.foundry:/app/foundry/.foundry" - "./.foundry:/app/foundry/.foundry"
- "../../../task/rivet-checkout:/task/rivet-checkout:ro"
- "mock_node_modules:/app/node_modules" - "mock_node_modules:/app/node_modules"
- "mock_client_node_modules:/app/foundry/packages/client/node_modules" - "mock_client_node_modules:/app/foundry/packages/client/node_modules"
- "mock_frontend_node_modules:/app/foundry/packages/frontend/node_modules" - "mock_frontend_node_modules:/app/foundry/packages/frontend/node_modules"

View file

@ -19,6 +19,7 @@ RUN pnpm --filter @sandbox-agent/foundry-backend deploy --prod /out
FROM oven/bun:1.2 AS runtime FROM oven/bun:1.2 AS runtime
ENV NODE_ENV=production ENV NODE_ENV=production
ENV HOME=/home/task ENV HOME=/home/task
ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version
WORKDIR /app WORKDIR /app
RUN apt-get update \ RUN apt-get update \
&& apt-get install -y --no-install-recommends \ && apt-get install -y --no-install-recommends \
@ -31,6 +32,8 @@ RUN addgroup --system --gid 1001 task \
&& adduser --system --uid 1001 --home /home/task --ingroup task task \ && adduser --system --uid 1001 --home /home/task --ingroup task task \
&& mkdir -p /home/task \ && mkdir -p /home/task \
&& chown -R task:task /home/task /app && chown -R task:task /home/task /app
RUN mkdir -p /etc/foundry \
&& date +%s > /etc/foundry/rivet-runner-version
COPY --from=build /out ./ COPY --from=build /out ./
USER task USER task
EXPOSE 7741 EXPOSE 7741

View file

@ -21,6 +21,9 @@ RUN curl -fsSL "https://releases.rivet.dev/sandbox-agent/${SANDBOX_AGENT_VERSION
ENV PATH="/root/.local/bin:${PATH}" ENV PATH="/root/.local/bin:${PATH}"
ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent" ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent"
ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version
RUN mkdir -p /etc/foundry \
&& date +%s > /etc/foundry/rivet-runner-version
WORKDIR /app WORKDIR /app

View file

@ -20,11 +20,13 @@ RUN curl -fsSL "https://releases.rivet.dev/sandbox-agent/${SANDBOX_AGENT_VERSION
ENV PATH="/root/.local/bin:${PATH}" ENV PATH="/root/.local/bin:${PATH}"
ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent" ENV SANDBOX_AGENT_BIN="/root/.local/bin/sandbox-agent"
ENV RIVET_RUNNER_VERSION_FILE=/etc/foundry/rivet-runner-version
RUN mkdir -p /etc/foundry \
&& date +%s > /etc/foundry/rivet-runner-version
WORKDIR /workspace/quebec WORKDIR /workspace/quebec
COPY quebec /workspace/quebec COPY quebec /workspace/quebec
COPY rivet-checkout /workspace/rivet-checkout
RUN pnpm install --frozen-lockfile RUN pnpm install --frozen-lockfile
RUN pnpm --filter @sandbox-agent/foundry-shared build RUN pnpm --filter @sandbox-agent/foundry-shared build

View file

@ -0,0 +1,190 @@
# syntax=docker/dockerfile:1.10.0
#
# Foundry base sandbox image.
#
# Builds sandbox-agent from source (reusing the upstream Dockerfile.full build
# stages) and layers Foundry-specific tooling on top: sudo, git, neovim, gh,
# node, bun, chromium, and agent-browser.
#
# Build:
# docker build --platform linux/amd64 \
# -f foundry/docker/foundry-base.Dockerfile \
# -t rivetdev/sandbox-agent:foundry-base-<timestamp> .
#
# Must be invoked from the repository root so the COPY . picks up the full
# source tree for the Rust + inspector build stages.
# ============================================================================
# Build inspector frontend
# ============================================================================
FROM --platform=linux/amd64 node:22-alpine AS inspector-build
WORKDIR /app
RUN npm install -g pnpm
COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./
COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/
COPY sdks/cli-shared/package.json ./sdks/cli-shared/
COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/
COPY sdks/react/package.json ./sdks/react/
COPY sdks/typescript/package.json ./sdks/typescript/
RUN pnpm install --filter @sandbox-agent/inspector...
COPY docs/openapi.json ./docs/
COPY sdks/cli-shared ./sdks/cli-shared
COPY sdks/acp-http-client ./sdks/acp-http-client
COPY sdks/react ./sdks/react
COPY sdks/typescript ./sdks/typescript
RUN cd sdks/cli-shared && pnpm exec tsup
RUN cd sdks/acp-http-client && pnpm exec tsup
RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup
RUN cd sdks/react && pnpm exec tsup
COPY frontend/packages/inspector ./frontend/packages/inspector
RUN cd frontend/packages/inspector && pnpm exec vite build
# ============================================================================
# AMD64 Builder - sandbox-agent static binary
# ============================================================================
FROM --platform=linux/amd64 rust:1.88.0 AS builder
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
musl-tools \
musl-dev \
llvm-14-dev \
libclang-14-dev \
clang-14 \
libssl-dev \
pkg-config \
ca-certificates \
g++ \
g++-multilib \
git \
curl \
wget && \
rm -rf /var/lib/apt/lists/*
RUN wget -q https://github.com/cross-tools/musl-cross/releases/latest/download/x86_64-unknown-linux-musl.tar.xz && \
tar -xf x86_64-unknown-linux-musl.tar.xz -C /opt/ && \
rm x86_64-unknown-linux-musl.tar.xz && \
rustup target add x86_64-unknown-linux-musl
ENV PATH="/opt/x86_64-unknown-linux-musl/bin:$PATH" \
LIBCLANG_PATH=/usr/lib/llvm-14/lib \
CLANG_PATH=/usr/bin/clang-14 \
CC_x86_64_unknown_linux_musl=x86_64-unknown-linux-musl-gcc \
CXX_x86_64_unknown_linux_musl=x86_64-unknown-linux-musl-g++ \
AR_x86_64_unknown_linux_musl=x86_64-unknown-linux-musl-ar \
CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER=x86_64-unknown-linux-musl-gcc \
CARGO_INCREMENTAL=0 \
CARGO_NET_GIT_FETCH_WITH_CLI=true
ENV SSL_VER=1.1.1w
RUN wget https://www.openssl.org/source/openssl-$SSL_VER.tar.gz && \
tar -xzf openssl-$SSL_VER.tar.gz && \
cd openssl-$SSL_VER && \
./Configure no-shared no-async --prefix=/musl --openssldir=/musl/ssl linux-x86_64 && \
make -j$(nproc) && \
make install_sw && \
cd .. && \
rm -rf openssl-$SSL_VER*
ENV OPENSSL_DIR=/musl \
OPENSSL_INCLUDE_DIR=/musl/include \
OPENSSL_LIB_DIR=/musl/lib \
PKG_CONFIG_ALLOW_CROSS=1 \
RUSTFLAGS="-C target-feature=+crt-static -C link-arg=-static-libgcc"
WORKDIR /build
COPY . .
COPY --from=inspector-build /app/frontend/packages/inspector/dist ./frontend/packages/inspector/dist
RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \
--mount=type=cache,target=/build/target \
cargo build -p sandbox-agent --release --target x86_64-unknown-linux-musl -j4 && \
cp target/x86_64-unknown-linux-musl/release/sandbox-agent /sandbox-agent
# ============================================================================
# Runtime - Foundry base sandbox image
# ============================================================================
FROM --platform=linux/amd64 node:22-bookworm-slim
ENV DEBIAN_FRONTEND=noninteractive
# --- System packages --------------------------------------------------------
RUN apt-get update && apt-get install -y --no-install-recommends \
bash \
ca-certificates \
curl \
git \
gnupg \
neovim \
sudo \
unzip \
wget \
# Chromium and its runtime deps
chromium \
fonts-liberation \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libcups2 \
libdbus-1-3 \
libdrm2 \
libgbm1 \
libgtk-3-0 \
libnspr4 \
libnss3 \
libx11-xcb1 \
libxcomposite1 \
libxdamage1 \
libxrandr2 \
xdg-utils \
&& rm -rf /var/lib/apt/lists/*
# --- GitHub CLI (gh) -------------------------------------------------------
RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \
| dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg \
&& chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg \
&& echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \
> /etc/apt/sources.list.d/github-cli.list \
&& apt-get update && apt-get install -y gh \
&& rm -rf /var/lib/apt/lists/*
# --- Bun --------------------------------------------------------------------
RUN curl -fsSL https://bun.sh/install | bash \
&& mv /root/.bun/bin/bun /usr/local/bin/bun \
&& ln -sf /usr/local/bin/bun /usr/local/bin/bunx \
&& rm -rf /root/.bun
# --- sandbox-agent binary (from local build) --------------------------------
COPY --from=builder /sandbox-agent /usr/local/bin/sandbox-agent
RUN chmod +x /usr/local/bin/sandbox-agent
# --- sandbox user with passwordless sudo ------------------------------------
RUN useradd -m -s /bin/bash sandbox \
&& echo "sandbox ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/sandbox \
&& chmod 0440 /etc/sudoers.d/sandbox
USER sandbox
WORKDIR /home/sandbox
# Point Chromium/Playwright at the system binary
ENV CHROME_PATH=/usr/bin/chromium
ENV CHROMIUM_PATH=/usr/bin/chromium
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
# --- Install all sandbox-agent agents + agent-browser -----------------------
RUN sandbox-agent install-agent --all
RUN sudo npm install -g agent-browser
EXPOSE 2468
ENTRYPOINT ["sandbox-agent"]
CMD ["server", "--host", "0.0.0.0", "--port", "2468"]

View file

@ -8,4 +8,4 @@ RUN npm install -g pnpm@10.28.2
WORKDIR /app WORKDIR /app
CMD ["bash", "-lc", "pnpm install --force --frozen-lockfile --filter @sandbox-agent/foundry-frontend... && cd foundry/packages/frontend && exec pnpm vite --host 0.0.0.0 --port 4173"] CMD ["bash", "-lc", "pnpm install --frozen-lockfile --filter @sandbox-agent/foundry-frontend... && cd foundry/packages/frontend && exec pnpm vite --host 0.0.0.0 --port 4173"]

View file

@ -7,7 +7,6 @@ RUN npm install -g pnpm@10.28.2
WORKDIR /workspace/quebec WORKDIR /workspace/quebec
COPY quebec /workspace/quebec COPY quebec /workspace/quebec
COPY rivet-checkout /workspace/rivet-checkout
RUN pnpm install --frozen-lockfile RUN pnpm install --frozen-lockfile
RUN pnpm --filter @sandbox-agent/foundry-shared build RUN pnpm --filter @sandbox-agent/foundry-shared build

View file

@ -5,14 +5,12 @@
Keep the backend actor tree aligned with this shape unless we explicitly decide to change it: Keep the backend actor tree aligned with this shape unless we explicitly decide to change it:
```text ```text
OrganizationActor OrganizationActor (direct coordinator for tasks)
├─ HistoryActor(organization-scoped global feed) ├─ AuditLogActor (organization-scoped global feed)
├─ GithubDataActor ├─ GithubDataActor
├─ RepositoryActor(repo) ├─ TaskActor(task)
│ └─ TaskActor(task) │ ├─ taskSessions → session metadata/transcripts
│ ├─ TaskSessionActor(session) × N │ └─ taskSandboxes → sandbox instance index
│ │ └─ SessionStatusSyncActor(session) × 0..1
│ └─ Task-local workbench state
└─ SandboxInstanceActor(sandboxProviderId, sandboxId) × N └─ SandboxInstanceActor(sandboxProviderId, sandboxId) × N
``` ```
@ -28,53 +26,173 @@ Children push updates **up** to their direct coordinator only. Coordinators broa
### Coordinator hierarchy and index tables ### Coordinator hierarchy and index tables
```text ```text
OrganizationActor (coordinator for repos + auth users) OrganizationActor (coordinator for tasks + auth users)
│ Index tables: │ Index tables:
│ ├─ repos → RepositoryActor index (repo catalog) │ ├─ taskIndex → TaskActor index (taskId → repoId + branchName)
│ ├─ taskLookup → TaskActor index (taskId → repoId routing) │ ├─ taskSummaries → TaskActor materialized sidebar projection
│ ├─ taskSummaries → TaskActor index (materialized sidebar projection) │ ├─ authSessionIndex → UserActor index (session token → userId)
│ ├─ authSessionIndex → AuthUserActor index (session token → userId) │ ├─ authEmailIndex → UserActor index (email → userId)
│ ├─ authEmailIndex → AuthUserActor index (email → userId) │ └─ authAccountIndex → UserActor index (OAuth account → userId)
│ └─ authAccountIndex → AuthUserActor index (OAuth account → userId)
├─ RepositoryActor (coordinator for tasks) ├─ TaskActor (coordinator for sessions + sandboxes)
│ │ │ │
│ │ Index tables: │ │ Index tables:
│ │ └─ taskIndex → TaskActor index (taskId → branchName) │ │ ├─ taskWorkspaceSessions → Session index (session metadata + transcript)
│ │ └─ taskSandboxes → SandboxInstanceActor index (sandbox history)
│ │ │ │
│ └─ TaskActor (coordinator for sessions + sandboxes) │ └─ SandboxInstanceActor (leaf)
│ │
│ │ Index tables:
│ │ ├─ taskWorkbenchSessions → Session index (session metadata, transcript, draft)
│ │ └─ taskSandboxes → SandboxInstanceActor index (sandbox history)
│ │
│ └─ SandboxInstanceActor (leaf)
├─ HistoryActor (organization-scoped audit log, not a coordinator) ├─ AuditLogActor (organization-scoped audit log, not a coordinator)
└─ GithubDataActor (GitHub API cache, not a coordinator) └─ GithubDataActor (GitHub API cache, not a coordinator)
``` ```
When adding a new index table, annotate it in the schema file with a doc comment identifying it as a coordinator index and which child actor it indexes (see existing examples). When adding a new index table, annotate it in the schema file with a doc comment identifying it as a coordinator index and which child actor it indexes (see existing examples).
## GitHub Sync Data Model
The GithubDataActor syncs **repositories** and **pull requests** from GitHub, not branches. We only need repos (to know which repos exist and their metadata) and PRs (to lazily populate virtual tasks in the sidebar). Branch data is not synced because we only create tasks from PRs or fresh user-initiated creation, never from bare branches. Generated branch names for new tasks are treated as unique enough to skip conflict detection against remote branches.
Tasks are either:
1. **Created fresh** by the user (no PR yet, branch name generated from task description)
2. **Lazily populated from pull requests** during PR sync (virtual task entries in org tables, no actor spawned)
## Lazy Task Actor Creation — CRITICAL
**Task actors must NEVER be created during GitHub sync or bulk operations.** Creating hundreds of task actors simultaneously causes OOM crashes. An org can have 200+ PRs; spawning an actor per PR kills the process.
### The two creation points
There are exactly **two** places that may create a task actor:
1. **`createTaskMutation`** in `task-mutations.ts` — the only backend code that calls `getOrCreateTask`. Triggered by explicit user action ("New Task" button). One actor at a time.
2. **`backend-client.ts` client helper** — calls `client.task.getOrCreate(...)`. This is the lazy materialization point: when a user clicks a virtual task in the sidebar, the client creates the actor, and it self-initializes in `getCurrentRecord()` (`workflow/common.ts`) by reading branch/title from the org's `getTaskIndexEntry` action.
### The rule
### The rule
**Never use `getOrCreateTask` inside a sync loop, webhook handler, or any bulk operation.** That's what caused the OOM — 186 actors spawned simultaneously during PR sync.
`getOrCreateTask` IS allowed in:
- `createTaskMutation` — explicit user "New Task" action
- `requireWorkspaceTask` — user-initiated actions (createSession, sendMessage, etc.) that may hit a virtual task
- `getTask` action on the org — called by sandbox actor and client, needs to materialize virtual tasks
- `backend-client.ts` client helper — lazy materialization when user views a task
### Virtual tasks (PR-driven)
During PR sync, `refreshTaskSummaryForBranchMutation` is called for every changed PR (via github-data's `emitPullRequestChangeEvents`). It writes **virtual task entries** to the org actor's local `taskIndex` + `taskSummaries` tables only. No task actor is spawned. No cross-actor calls to task actors.
When the user interacts with a virtual task (clicks it, creates a session):
1. Client or org actor calls `getOrCreate` on the task actor key → actor is created with empty DB
2. Any action on the actor calls `getCurrentRecord()` → sees empty DB → reads branch/title from org's `getTaskIndexEntry` → calls `initBootstrapDbActivity` + `initCompleteActivity` → task is now real
### Call sites to watch
- `refreshTaskSummaryForBranchMutation` — called in bulk during sync. Must ONLY write to org local tables. Never create task actors or call task actor actions.
- `emitPullRequestChangeEvents` in github-data — iterates all changed PRs. Must remain fire-and-forget with no actor fan-out.
## Queue vs Action Decision Framework
The default is a direct action. Use a queue only if the answer to one or more of these questions is **yes**.
Actions are pure RPCs with no DB overhead on send — fast, but if the call fails the operation is lost. Queues persist the message to the database on send, guaranteeing it will be processed even if the target actor is busy, slow, or recovering. The tradeoff: queues add write overhead and serialize processing.
### 1. Does this operation coordinate multi-step work?
Does it involve external I/O (sandbox API, GitHub API, agent process management) or state machine transitions where interleaving would corrupt state? This is different from database-level serialization — a simple read-then-write on SQLite can use a transaction. The queue is for ordering operations that span DB writes + external I/O.
**Queue examples:**
- `workspace.send_message` — sends to sandbox agent, writes session status, does owner-swap. Multi-step with external I/O.
- `push` / `sync` / `merge` — git operations in sandbox that must not interleave.
- `createTask` — read-then-write across task index + actor creation. Returns result, so `wait: true`.
**Action examples:**
- `billing.stripe_customer.apply` — single column upsert, no external I/O.
- `workspace.update_draft` — writes draft text, no coordination with sandbox ops.
- `workspace.rename_task` — updates title column, queue handlers don't touch title.
### 2. Must this message be processed no matter what?
Is this a cross-actor fire-and-forget where the caller won't retry and data loss is unacceptable? A queue persists the message — if the target is down, it waits. An action RPC that fails is gone.
**Queue examples:**
- `audit.append` — caller must never be affected by audit failures, and audit entries must not be lost.
- `applyTaskSummaryUpdate` — task actor pushes summary to org and moves on. Won't retry if org is busy.
- `refreshTaskSummaryForBranch` — webhook-driven, won't be redelivered for the same event.
**Action examples:**
- `billing.invoice.upsert` — Stripe retries handle failures externally. No durability need on our side.
- `workspace.mark_unread` — UI convenience state. Acceptable to lose on transient failure.
- `github.webhook_receipt.record` — timestamp columns with no downstream effects.
### Once on a queue: wait or fire-and-forget?
If the caller needs a return value, use `wait: true`. If the UI updates via push events, use `wait: false`.
Full migration plan: `QUEUE_TO_ACTION_MIGRATION.md`.
## Ownership Rules ## Ownership Rules
- `OrganizationActor` is the organization coordinator and lookup/index owner. - `OrganizationActor` is the organization coordinator, direct coordinator for tasks, and lookup/index owner. It owns the task index, task summaries, and repo catalog.
- `HistoryActor` is organization-scoped. There is one organization-level history feed. - `AuditLogActor` is organization-scoped. There is one organization-level audit log feed.
- `RepositoryActor` is the repo coordinator and owns repo-local caches/indexes.
- `TaskActor` is one branch. Treat `1 task = 1 branch` once branch assignment is finalized. - `TaskActor` is one branch. Treat `1 task = 1 branch` once branch assignment is finalized.
- `TaskActor` can have many sessions. - `TaskActor` can have many sessions.
- `TaskActor` can reference many sandbox instances historically, but should have only one active sandbox/session at a time. - `TaskActor` can reference many sandbox instances historically, but should have only one active sandbox/session at a time.
- Session unread state and draft prompts are backend-owned workbench state, not frontend-local state. - Session unread state and draft prompts are backend-owned workspace state, not frontend-local state.
- Branch rename is a real git operation, not just metadata. - Branch names are immutable after task creation. Do not implement branch-rename flows.
- `SandboxInstanceActor` stays separate from `TaskActor`; tasks/sessions reference it by identity. - `SandboxInstanceActor` stays separate from `TaskActor`; tasks/sessions reference it by identity.
- The backend stores no local git state. No clones, no refs, no working trees, and no git-spice. Repository metadata comes from GitHub API data and webhook events. Any working-tree git operation runs inside a sandbox via `executeInSandbox()`. - The backend stores no local git state. No clones, no refs, no working trees, and no git-spice. Repository metadata comes from GitHub API data and webhook events. Any working-tree git operation runs inside a sandbox via `executeInSandbox()`.
- When a backend request path must aggregate multiple independent actor calls or reads, prefer bounded parallelism over sequential fan-out when correctness permits. Do not serialize independent work by default. - When a backend request path must aggregate multiple independent actor calls or reads, prefer bounded parallelism over sequential fan-out when correctness permits. Do not serialize independent work by default.
- Only a coordinator creates/destroys its children. Do not create child actors from outside the coordinator. - Only a coordinator creates/destroys its children. Do not create child actors from outside the coordinator.
- Children push state changes up to their direct coordinator only — never skip levels (e.g., task pushes to repo, not directly to org, unless org is the direct coordinator for that index). - Children push state changes up to their direct coordinator only. Task actors push summary updates directly to the organization actor.
- Read paths must use the coordinator's local index tables. Do not fan out to child actors on the hot read path. - Read paths must use the coordinator's local index tables. Do not fan out to child actors on the hot read path.
- Never build "enriched" read actions that chain through multiple actors (e.g., coordinator → child actor → sibling actor). If data from multiple actors is needed for a read, it should already be materialized in the coordinator's index tables via push updates. If it's not there, fix the write path to push it — do not add a fan-out read path. - Never build "enriched" read actions that chain through multiple actors (e.g., coordinator → child actor → sibling actor). If data from multiple actors is needed for a read, it should already be materialized in the coordinator's index tables via push updates. If it's not there, fix the write path to push it — do not add a fan-out read path.
## Drizzle Migration Maintenance
After changing any actor's `db/schema.ts`, you **must** regenerate the corresponding migration so the runtime creates the tables that match the schema. Forgetting this step causes `no such table` errors at runtime.
1. **Generate a new drizzle migration.** Run from `packages/backend`:
```bash
npx drizzle-kit generate --config=./src/actors/<actor>/db/drizzle.config.ts
```
If the interactive prompt is unavailable (e.g. in a non-TTY), manually create a new `.sql` file under `./src/actors/<actor>/db/drizzle/` and add the corresponding entry to `meta/_journal.json`.
2. **Regenerate the compiled `migrations.ts`.** Run from the foundry root:
```bash
npx tsx packages/backend/src/actors/_scripts/generate-actor-migrations.ts
```
3. **Verify insert/upsert calls.** Every column with `.notNull()` (and no `.default(...)`) must be provided a value in all `insert()` and `onConflictDoUpdate()` calls. Missing a NOT NULL column causes a runtime constraint violation, not a type error.
4. **Nuke RivetKit state in dev** after migration changes to start fresh:
```bash
docker compose -f compose.dev.yaml down
docker volume rm foundry_foundry_rivetkit_storage
docker compose -f compose.dev.yaml up -d
```
Actors with drizzle migrations: `organization`, `audit-log`, `task`. Other actors (`user`, `github-data`) use inline migrations without drizzle.
## Workflow Step Nesting — FORBIDDEN
**Never call `c.step()` / `ctx.step()` from inside another step's `run` callback.** RivetKit workflow steps cannot be nested. Doing so causes the runtime error: *"Cannot start a new workflow entry while another is in progress."*
This means:
- Functions called from within a step `run` callback must NOT use `c.step()`, `c.loop()`, `c.sleep()`, or `c.queue.next()`.
- If a mutation function needs to be called both from a step and standalone, it must only do plain DB/API work — no workflow primitives. The workflow step wrapping belongs in the workflow file, not in the mutation.
- Helper wrappers that conditionally call `c.step()` (like a `runSyncStep` pattern) are dangerous — if the caller is already inside a step, the nested `c.step()` will crash at runtime with no compile-time warning.
**Rule of thumb:** Workflow primitives (`step`, `loop`, `sleep`, `queue.next`) may only appear at the top level of a workflow function or inside a `loop` callback — never inside a step's `run`.
## SQLite Constraints
- Single-row tables must use an integer primary key with `CHECK (id = 1)` to enforce the singleton invariant at the database level.
- Follow the task actor pattern for metadata/profile rows and keep the fixed row id in code as `1`, not a string sentinel.
## Multiplayer Correctness ## Multiplayer Correctness
Per-user UI state must live on the user actor, not on shared task/session actors. This is critical for multiplayer — multiple users may view the same task simultaneously with different active sessions, unread states, and in-progress drafts. Per-user UI state must live on the user actor, not on shared task/session actors. This is critical for multiplayer — multiple users may view the same task simultaneously with different active sessions, unread states, and in-progress drafts.
@ -85,6 +203,133 @@ Per-user UI state must live on the user actor, not on shared task/session actors
Do not store per-user preferences, selections, or ephemeral UI state on shared actors. If a field's value should differ between two users looking at the same task, it belongs on the user actor. Do not store per-user preferences, selections, or ephemeral UI state on shared actors. If a field's value should differ between two users looking at the same task, it belongs on the user actor.
## Audit Log Maintenance
Every new action or command handler that represents a user-visible or workflow-significant event must append to the audit log actor. The audit log must remain a comprehensive record of significant operations.
## Debugging Actors
### RivetKit Inspector UI
The RivetKit inspector UI at `http://localhost:6420/ui/` is the most reliable way to debug actor state in local development. The inspector HTTP API (`/inspector/workflow-history`) has a known bug where it returns empty `{}` even when the workflow has entries — always cross-check with the UI.
**Useful inspector URL pattern:**
```
http://localhost:6420/ui/?u=http%3A%2F%2F127.0.0.1%3A6420&ns=default&r=default&n=[%22<actor-name>%22]&actorId=<actor-id>&tab=<tab>
```
Tabs: `workflow`, `database`, `state`, `queue`, `connections`, `metadata`.
**To find actor IDs:**
```bash
curl -s 'http://127.0.0.1:6420/actors?name=organization'
```
**To query actor DB via bun (inside container):**
```bash
docker compose -f compose.dev.yaml exec -T backend bun -e '
var Database = require("bun:sqlite");
var db = new Database("/root/.local/share/foundry/rivetkit/databases/<actor-id>.db", { readonly: true });
console.log(JSON.stringify(db.query("SELECT name FROM sqlite_master WHERE type=?").all("table")));
'
```
**To call actor actions via inspector:**
```bash
curl -s -X POST 'http://127.0.0.1:6420/gateway/<actor-id>/inspector/action/<actionName>' \
-H 'Content-Type: application/json' -d '{"args":[{}]}'
```
### Known inspector API bugs
- `GET /inspector/workflow-history` may return `{"history":{}}` even when workflow has run. Use the UI's Workflow tab instead.
- `GET /inspector/queue` is reliable for checking pending messages.
- `GET /inspector/state` is reliable for checking actor state.
## Inbox & Notification System
The user actor owns two per-user systems: a **task feed** (sidebar ordering) and **notifications** (discrete events). These are distinct concepts that share a common "bump" mechanism.
### Core distinction: bumps vs. notifications
A **bump** updates the task's position in the user's sidebar feed. A **notification** is a discrete event entry shown in the notification panel. Every notification also triggers a bump, but not every bump creates a notification.
| Event | Bumps task? | Creates notification? |
|-------|-------------|----------------------|
| User sends a message | Yes | No |
| User opens/clicks a task | Yes | No |
| User creates a session | Yes | No |
| Agent finishes responding | Yes | Yes |
| PR review requested | Yes | Yes |
| PR merged | Yes | Yes |
| PR comment added | Yes | Yes |
| Agent error/needs input | Yes | Yes |
### Recipient resolution
Notifications and bumps go to the **task owner** only. Each task has exactly one owner at a time (the user who last sent a message or explicitly took ownership). This is an acceptable race condition — it rarely makes sense for two users to work on the same task simultaneously, and ownership transfer is explicit.
The system supports multiplayer (multiple users can view the same task), but the notification/bump target is always the single current owner. Each user has their own independent notification and unread state on their own user actor.
### Tables (on user actor)
Two new tables:
- **`userTaskFeed`** — one row per task. Tracks `bumpedAtMs` and `bumpReason` for sidebar sort order. Does NOT denormalize task content (title, repo, etc.) — the frontend queries the org actor for task content and uses the feed only for ordering/filtering.
- **`userNotifications`** — discrete notification entries with `type`, `message`, `read` state, and optional `sessionId`. Retention: notifications are retained for a configurable number of days after being marked read, then cleaned up.
### Queue commands (user actor workflow)
- `user.bump_task` — upserts `userTaskFeed` row, no notification created. Used for user-initiated actions (send message, open task, create session).
- `user.notify` — inserts `userNotifications` row AND upserts `userTaskFeed` (auto-bump). Used for system events (agent finished, PR review requested).
- `user.mark_read` — marks notifications read for a given `(taskId, sessionId?)`. Also updates `userTaskState.unread` for the session.
### Data flow
Task actor (or org actor) resolves the current task owner, then sends to the owner's user actor queue:
1. `user.notify(...)` for notification-worthy events (auto-bumps the feed)
2. `user.bump_task(...)` for non-notification bumps (send message, open task)
The user actor processes the queue message, writes to its local tables, and broadcasts a `userFeedUpdated` event to connected clients.
### Sidebar architecture change
The left sidebar changes from showing the repo/PR tree to showing **recent tasks** ordered by `userTaskFeed.bumpedAtMs`. Two new buttons at the top of the sidebar:
- **All Repositories** — navigates to a page showing the current repo + PR list (preserving existing functionality)
- **Notifications** — navigates to a page showing the full notification list
The sidebar reads from two sources:
- **User actor** (`userTaskFeed`) — provides sort order and "which tasks are relevant to this user"
- **Org actor** (`taskSummaries`) — provides task content (title, status, branch, PR state, session summaries)
The frontend merges these: org snapshot gives task data, user feed gives sort order. Uses the existing subscription system (`useSubscription`) for both initial state fetch and streaming updates.
### `updatedAtMs` column semantics
The org actor's `taskSummaries.updatedAtMs` and the user actor's `userTaskFeed.bumpedAtMs` serve different purposes:
- `taskSummaries.updatedAtMs` — updated by task actor push. Reflects the last time the task's global state changed (any mutation, any user). Used for "All Repositories" / "All Tasks" views.
- `userTaskFeed.bumpedAtMs` — updated by bump/notify commands. Reflects the last time this specific user's attention was drawn to this task. Used for the per-user sidebar sort.
Add doc comments on both columns clarifying the update source.
### Unread semantics
Each user has independent unread state. The existing `userTaskState` table tracks per-`(taskId, sessionId)` unread state. When the user clicks a session:
1. `userTaskState.unread` is set to 0 for that session
2. All `userNotifications` rows matching `(taskId, sessionId)` are marked `read = 1`
These two unread systems must stay in sync via the `user.mark_read` queue command.
## Better Auth: Actions, Not Queues
All Better Auth adapter operations (verification CRUD, session/email/account index mutations, and user-actor auth record mutations) are exposed as **actions**, not queue commands. This is an intentional exception to the normal pattern of using queues for mutations.
**Why:** The org actor's workflow queue is shared with GitHub sync, webhook processing, task mutations, and billing — 20+ queue names processed sequentially. During the OAuth callback, Better Auth needs to read/write verification records and upsert session/account indexes. If any long-running queue handler (e.g., a GitHub sync step) is ahead in the queue, auth operations time out (10s), `expectQueueResponse` throws a regular `Error`, and Better Auth's `parseState` catches it as a non-`StateError` → redirects to `?error=please_restart_the_process`.
**Why it's safe:** Auth operations are simple SQLite reads/writes scoped to a single actor instance with no cross-actor side effects. They don't need workflow replay semantics or sequential ordering guarantees relative to other queue commands.
**Rule:** Never move Better Auth operations back to queue commands. If new auth-related mutations are added, expose them as actions on the relevant actor.
## Maintenance ## Maintenance
- Keep this file up to date whenever actor ownership, hierarchy, or lifecycle responsibilities change. - Keep this file up to date whenever actor ownership, hierarchy, or lifecycle responsibilities change.

View file

@ -2,4 +2,4 @@ import { db } from "rivetkit/db/drizzle";
import * as schema from "./schema.js"; import * as schema from "./schema.js";
import migrations from "./migrations.js"; import migrations from "./migrations.js";
export const authUserDb = db({ schema, migrations }); export const auditLogDb = db({ schema, migrations });

View file

@ -0,0 +1,6 @@
import { defineConfig } from "rivetkit/db/drizzle";
export default defineConfig({
out: "./src/actors/audit-log/db/drizzle",
schema: "./src/actors/audit-log/db/schema.ts",
});

View file

@ -0,0 +1 @@
ALTER TABLE `events` ADD COLUMN `repo_id` text;

View file

@ -1,48 +1,31 @@
{ {
"version": "6", "version": "6",
"dialect": "sqlite", "dialect": "sqlite",
"id": "6ffd6acb-e737-46ee-a8fe-fcfddcdd6ea9", "id": "a1b2c3d4-0001-4000-8000-000000000001",
"prevId": "00000000-0000-0000-0000-000000000000", "prevId": "e592c829-141f-4740-88b7-09cf957a4405",
"tables": { "tables": {
"repo_meta": { "events": {
"name": "repo_meta", "name": "events",
"columns": { "columns": {
"id": { "id": {
"name": "id", "name": "id",
"type": "integer", "type": "integer",
"primaryKey": true, "primaryKey": true,
"notNull": true, "notNull": true,
"autoincrement": false "autoincrement": true
}, },
"remote_url": { "repo_id": {
"name": "remote_url", "name": "repo_id",
"type": "text", "type": "text",
"primaryKey": false, "primaryKey": false,
"notNull": true, "notNull": false,
"autoincrement": false "autoincrement": false
}, },
"updated_at": {
"name": "updated_at",
"type": "integer",
"primaryKey": false,
"notNull": true,
"autoincrement": false
}
},
"indexes": {},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"checkConstraints": {}
},
"task_index": {
"name": "task_index",
"columns": {
"task_id": { "task_id": {
"name": "task_id", "name": "task_id",
"type": "text", "type": "text",
"primaryKey": true, "primaryKey": false,
"notNull": true, "notNull": false,
"autoincrement": false "autoincrement": false
}, },
"branch_name": { "branch_name": {
@ -52,15 +35,22 @@
"notNull": false, "notNull": false,
"autoincrement": false "autoincrement": false
}, },
"created_at": { "kind": {
"name": "created_at", "name": "kind",
"type": "integer", "type": "text",
"primaryKey": false, "primaryKey": false,
"notNull": true, "notNull": true,
"autoincrement": false "autoincrement": false
}, },
"updated_at": { "payload_json": {
"name": "updated_at", "name": "payload_json",
"type": "text",
"primaryKey": false,
"notNull": true,
"autoincrement": false
},
"created_at": {
"name": "created_at",
"type": "integer", "type": "integer",
"primaryKey": false, "primaryKey": false,
"notNull": true, "notNull": true,

View file

@ -8,6 +8,13 @@
"when": 1773376223815, "when": 1773376223815,
"tag": "0000_fluffy_kid_colt", "tag": "0000_fluffy_kid_colt",
"breakpoints": true "breakpoints": true
},
{
"idx": 1,
"version": "6",
"when": 1773376223816,
"tag": "0001_add_repo_id",
"breakpoints": true
} }
] ]
} }

View file

@ -10,6 +10,12 @@ const journal = {
tag: "0000_fluffy_kid_colt", tag: "0000_fluffy_kid_colt",
breakpoints: true, breakpoints: true,
}, },
{
idx: 1,
when: 1773376223816,
tag: "0001_add_repo_id",
breakpoints: true,
},
], ],
} as const; } as const;
@ -24,6 +30,8 @@ export default {
\`payload_json\` text NOT NULL, \`payload_json\` text NOT NULL,
\`created_at\` integer NOT NULL \`created_at\` integer NOT NULL
); );
`,
m0001: `ALTER TABLE \`events\` ADD COLUMN \`repo_id\` text;
`, `,
} as const, } as const,
}; };

View file

@ -2,10 +2,11 @@ import { integer, sqliteTable, text } from "rivetkit/db/drizzle";
export const events = sqliteTable("events", { export const events = sqliteTable("events", {
id: integer("id").primaryKey({ autoIncrement: true }), id: integer("id").primaryKey({ autoIncrement: true }),
repoId: text("repo_id"),
taskId: text("task_id"), taskId: text("task_id"),
branchName: text("branch_name"), branchName: text("branch_name"),
kind: text("kind").notNull(), kind: text("kind").notNull(),
// Structured by the history event kind definitions in application code. // Structured by the audit-log event kind definitions in application code.
payloadJson: text("payload_json").notNull(), payloadJson: text("payload_json").notNull(),
createdAt: integer("created_at").notNull(), createdAt: integer("created_at").notNull(),
}); });

View file

@ -0,0 +1,180 @@
// @ts-nocheck
import { and, desc, eq } from "drizzle-orm";
import { actor, queue } from "rivetkit";
import { workflow, Loop } from "rivetkit/workflow";
import type { AuditLogEvent } from "@sandbox-agent/foundry-shared";
import { selfAuditLog } from "../handles.js";
import { logActorWarning, resolveErrorMessage } from "../logging.js";
import { auditLogDb } from "./db/db.js";
import { events } from "./db/schema.js";
export interface AuditLogInput {
organizationId: string;
}
export interface AppendAuditLogCommand {
kind: string;
repoId?: string;
taskId?: string;
branchName?: string;
payload: Record<string, unknown>;
}
export interface ListAuditLogParams {
repoId?: string;
branch?: string;
taskId?: string;
limit?: number;
}
// ---------------------------------------------------------------------------
// Queue names
// ---------------------------------------------------------------------------
const AUDIT_LOG_QUEUE_NAMES = ["auditLog.command.append"] as const;
type AuditLogQueueName = (typeof AUDIT_LOG_QUEUE_NAMES)[number];
function auditLogWorkflowQueueName(name: AuditLogQueueName): AuditLogQueueName {
return name;
}
// ---------------------------------------------------------------------------
// Mutation functions
// ---------------------------------------------------------------------------
async function appendMutation(c: any, body: AppendAuditLogCommand): Promise<{ ok: true }> {
const now = Date.now();
await c.db
.insert(events)
.values({
repoId: body.repoId ?? null,
taskId: body.taskId ?? null,
branchName: body.branchName ?? null,
kind: body.kind,
payloadJson: JSON.stringify(body.payload),
createdAt: now,
})
.run();
return { ok: true };
}
// ---------------------------------------------------------------------------
// Workflow command loop
// ---------------------------------------------------------------------------
type AuditLogWorkflowHandler = (loopCtx: any, body: any) => Promise<any>;
const AUDIT_LOG_COMMAND_HANDLERS: Record<AuditLogQueueName, AuditLogWorkflowHandler> = {
"auditLog.command.append": async (c, body) => appendMutation(c, body),
};
async function runAuditLogWorkflow(ctx: any): Promise<void> {
await ctx.loop("audit-log-command-loop", async (loopCtx: any) => {
const msg = await loopCtx.queue.next("next-audit-log-command", {
names: [...AUDIT_LOG_QUEUE_NAMES],
completable: true,
});
if (!msg) {
return Loop.continue(undefined);
}
const handler = AUDIT_LOG_COMMAND_HANDLERS[msg.name as AuditLogQueueName];
if (!handler) {
logActorWarning("auditLog", "unknown audit-log command", { command: msg.name });
await msg.complete({ error: `Unknown command: ${msg.name}` }).catch(() => {});
return Loop.continue(undefined);
}
try {
// Wrap in a step so c.state and c.db are accessible inside mutation functions.
const result = await loopCtx.step({
name: msg.name,
timeout: 60_000,
run: async () => handler(loopCtx, msg.body),
});
await msg.complete(result);
} catch (error) {
const message = resolveErrorMessage(error);
logActorWarning("auditLog", "audit-log workflow command failed", {
command: msg.name,
error: message,
});
await msg.complete({ error: message }).catch(() => {});
}
return Loop.continue(undefined);
});
}
// ---------------------------------------------------------------------------
// Actor definition
// ---------------------------------------------------------------------------
/**
* Organization-scoped audit log. One per org, not one per repo.
*
* The org is the coordinator for all tasks across repos, and we frequently need
* to query the full audit trail across repos (e.g. org-wide activity feed,
* compliance). A per-repo audit log would require fan-out reads every time.
* Keeping it org-scoped gives us a single queryable feed with optional repoId
* filtering when callers want a narrower view.
*/
export const auditLog = actor({
db: auditLogDb,
queues: Object.fromEntries(AUDIT_LOG_QUEUE_NAMES.map((name) => [name, queue()])),
options: {
name: "Audit Log",
icon: "database",
},
createState: (_c, input: AuditLogInput) => ({
organizationId: input.organizationId,
}),
actions: {
// Mutation — self-send to queue for workflow history
async append(c: any, body: AppendAuditLogCommand): Promise<{ ok: true }> {
const self = selfAuditLog(c);
await self.send(auditLogWorkflowQueueName("auditLog.command.append"), body, { wait: false });
return { ok: true };
},
// Read — direct action (no queue)
async list(c, params?: ListAuditLogParams): Promise<AuditLogEvent[]> {
const whereParts = [];
if (params?.repoId) {
whereParts.push(eq(events.repoId, params.repoId));
}
if (params?.taskId) {
whereParts.push(eq(events.taskId, params.taskId));
}
if (params?.branch) {
whereParts.push(eq(events.branchName, params.branch));
}
const base = c.db
.select({
id: events.id,
repoId: events.repoId,
taskId: events.taskId,
branchName: events.branchName,
kind: events.kind,
payloadJson: events.payloadJson,
createdAt: events.createdAt,
})
.from(events);
const rows = await (whereParts.length > 0 ? base.where(and(...whereParts)) : base)
.orderBy(desc(events.createdAt))
.limit(params?.limit ?? 100)
.all();
return rows.map((row) => ({
...row,
organizationId: c.state.organizationId,
repoId: row.repoId ?? null,
}));
},
},
run: workflow(runAuditLogWorkflow),
});

View file

@ -1,70 +0,0 @@
import { integer, sqliteTable, text, uniqueIndex } from "drizzle-orm/sqlite-core";
export const authUsers = sqliteTable("user", {
id: text("id").notNull().primaryKey(),
name: text("name").notNull(),
email: text("email").notNull(),
emailVerified: integer("email_verified").notNull(),
image: text("image"),
createdAt: integer("created_at").notNull(),
updatedAt: integer("updated_at").notNull(),
});
export const authSessions = sqliteTable(
"session",
{
id: text("id").notNull().primaryKey(),
token: text("token").notNull(),
userId: text("user_id").notNull(),
expiresAt: integer("expires_at").notNull(),
ipAddress: text("ip_address"),
userAgent: text("user_agent"),
createdAt: integer("created_at").notNull(),
updatedAt: integer("updated_at").notNull(),
},
(table) => ({
tokenIdx: uniqueIndex("session_token_idx").on(table.token),
}),
);
export const authAccounts = sqliteTable(
"account",
{
id: text("id").notNull().primaryKey(),
accountId: text("account_id").notNull(),
providerId: text("provider_id").notNull(),
userId: text("user_id").notNull(),
accessToken: text("access_token"),
refreshToken: text("refresh_token"),
idToken: text("id_token"),
accessTokenExpiresAt: integer("access_token_expires_at"),
refreshTokenExpiresAt: integer("refresh_token_expires_at"),
scope: text("scope"),
password: text("password"),
createdAt: integer("created_at").notNull(),
updatedAt: integer("updated_at").notNull(),
},
(table) => ({
providerAccountIdx: uniqueIndex("account_provider_account_idx").on(table.providerId, table.accountId),
}),
);
export const userProfiles = sqliteTable("user_profiles", {
userId: text("user_id").notNull().primaryKey(),
githubAccountId: text("github_account_id"),
githubLogin: text("github_login"),
roleLabel: text("role_label").notNull(),
eligibleOrganizationIdsJson: text("eligible_organization_ids_json").notNull(),
starterRepoStatus: text("starter_repo_status").notNull(),
starterRepoStarredAt: integer("starter_repo_starred_at"),
starterRepoSkippedAt: integer("starter_repo_skipped_at"),
createdAt: integer("created_at").notNull(),
updatedAt: integer("updated_at").notNull(),
});
export const sessionState = sqliteTable("session_state", {
sessionId: text("session_id").notNull().primaryKey(),
activeOrganizationId: text("active_organization_id"),
createdAt: integer("created_at").notNull(),
updatedAt: integer("updated_at").notNull(),
});

View file

@ -1,353 +0,0 @@
import { and, asc, count as sqlCount, desc, eq, gt, gte, inArray, isNotNull, isNull, like, lt, lte, ne, notInArray, or } from "drizzle-orm";
import { actor } from "rivetkit";
import { authUserDb } from "./db/db.js";
import { authAccounts, authSessions, authUsers, sessionState, userProfiles } from "./db/schema.js";
const tables = {
user: authUsers,
session: authSessions,
account: authAccounts,
userProfiles,
sessionState,
} as const;
function tableFor(model: string) {
const table = tables[model as keyof typeof tables];
if (!table) {
throw new Error(`Unsupported auth user model: ${model}`);
}
return table as any;
}
function columnFor(table: any, field: string) {
const column = table[field];
if (!column) {
throw new Error(`Unsupported auth user field: ${field}`);
}
return column;
}
function normalizeValue(value: unknown): unknown {
if (value instanceof Date) {
return value.getTime();
}
if (Array.isArray(value)) {
return value.map((entry) => normalizeValue(entry));
}
return value;
}
function clauseToExpr(table: any, clause: any) {
const column = columnFor(table, clause.field);
const value = normalizeValue(clause.value);
switch (clause.operator) {
case "ne":
return value === null ? isNotNull(column) : ne(column, value as any);
case "lt":
return lt(column, value as any);
case "lte":
return lte(column, value as any);
case "gt":
return gt(column, value as any);
case "gte":
return gte(column, value as any);
case "in":
return inArray(column, Array.isArray(value) ? (value as any[]) : [value as any]);
case "not_in":
return notInArray(column, Array.isArray(value) ? (value as any[]) : [value as any]);
case "contains":
return like(column, `%${String(value ?? "")}%`);
case "starts_with":
return like(column, `${String(value ?? "")}%`);
case "ends_with":
return like(column, `%${String(value ?? "")}`);
case "eq":
default:
return value === null ? isNull(column) : eq(column, value as any);
}
}
function buildWhere(table: any, where: any[] | undefined) {
if (!where || where.length === 0) {
return undefined;
}
let expr = clauseToExpr(table, where[0]);
for (const clause of where.slice(1)) {
const next = clauseToExpr(table, clause);
expr = clause.connector === "OR" ? or(expr, next) : and(expr, next);
}
return expr;
}
function applyJoinToRow(c: any, model: string, row: any, join: any) {
if (!row || !join) {
return row;
}
if (model === "session" && join.user) {
return c.db
.select()
.from(authUsers)
.where(eq(authUsers.id, row.userId))
.get()
.then((user: any) => ({ ...row, user: user ?? null }));
}
if (model === "account" && join.user) {
return c.db
.select()
.from(authUsers)
.where(eq(authUsers.id, row.userId))
.get()
.then((user: any) => ({ ...row, user: user ?? null }));
}
if (model === "user" && join.account) {
return c.db
.select()
.from(authAccounts)
.where(eq(authAccounts.userId, row.id))
.all()
.then((accounts: any[]) => ({ ...row, account: accounts }));
}
return Promise.resolve(row);
}
async function applyJoinToRows(c: any, model: string, rows: any[], join: any) {
if (!join || rows.length === 0) {
return rows;
}
if (model === "session" && join.user) {
const userIds = [...new Set(rows.map((row) => row.userId).filter(Boolean))];
const users = userIds.length > 0 ? await c.db.select().from(authUsers).where(inArray(authUsers.id, userIds)).all() : [];
const userMap = new Map(users.map((user: any) => [user.id, user]));
return rows.map((row) => ({ ...row, user: userMap.get(row.userId) ?? null }));
}
if (model === "account" && join.user) {
const userIds = [...new Set(rows.map((row) => row.userId).filter(Boolean))];
const users = userIds.length > 0 ? await c.db.select().from(authUsers).where(inArray(authUsers.id, userIds)).all() : [];
const userMap = new Map(users.map((user: any) => [user.id, user]));
return rows.map((row) => ({ ...row, user: userMap.get(row.userId) ?? null }));
}
if (model === "user" && join.account) {
const userIds = rows.map((row) => row.id);
const accounts = userIds.length > 0 ? await c.db.select().from(authAccounts).where(inArray(authAccounts.userId, userIds)).all() : [];
const accountsByUserId = new Map<string, any[]>();
for (const account of accounts) {
const entries = accountsByUserId.get(account.userId) ?? [];
entries.push(account);
accountsByUserId.set(account.userId, entries);
}
return rows.map((row) => ({ ...row, account: accountsByUserId.get(row.id) ?? [] }));
}
return rows;
}
export const authUser = actor({
db: authUserDb,
options: {
name: "Auth User",
icon: "shield",
actionTimeout: 60_000,
},
createState: (_c, input: { userId: string }) => ({
userId: input.userId,
}),
actions: {
async createAuthRecord(c, input: { model: string; data: Record<string, unknown> }) {
const table = tableFor(input.model);
await c.db
.insert(table)
.values(input.data as any)
.run();
return await c.db
.select()
.from(table)
.where(eq(columnFor(table, "id"), input.data.id as any))
.get();
},
async findOneAuthRecord(c, input: { model: string; where: any[]; join?: any }) {
const table = tableFor(input.model);
const predicate = buildWhere(table, input.where);
const row = predicate ? await c.db.select().from(table).where(predicate).get() : await c.db.select().from(table).get();
return await applyJoinToRow(c, input.model, row ?? null, input.join);
},
async findManyAuthRecords(c, input: { model: string; where?: any[]; limit?: number; offset?: number; sortBy?: any; join?: any }) {
const table = tableFor(input.model);
const predicate = buildWhere(table, input.where);
let query: any = c.db.select().from(table);
if (predicate) {
query = query.where(predicate);
}
if (input.sortBy?.field) {
const column = columnFor(table, input.sortBy.field);
query = query.orderBy(input.sortBy.direction === "asc" ? asc(column) : desc(column));
}
if (typeof input.limit === "number") {
query = query.limit(input.limit);
}
if (typeof input.offset === "number") {
query = query.offset(input.offset);
}
const rows = await query.all();
return await applyJoinToRows(c, input.model, rows, input.join);
},
async updateAuthRecord(c, input: { model: string; where: any[]; update: Record<string, unknown> }) {
const table = tableFor(input.model);
const predicate = buildWhere(table, input.where);
if (!predicate) {
throw new Error("updateAuthRecord requires a where clause");
}
await c.db
.update(table)
.set(input.update as any)
.where(predicate)
.run();
return await c.db.select().from(table).where(predicate).get();
},
async updateManyAuthRecords(c, input: { model: string; where: any[]; update: Record<string, unknown> }) {
const table = tableFor(input.model);
const predicate = buildWhere(table, input.where);
if (!predicate) {
throw new Error("updateManyAuthRecords requires a where clause");
}
await c.db
.update(table)
.set(input.update as any)
.where(predicate)
.run();
const row = await c.db.select({ value: sqlCount() }).from(table).where(predicate).get();
return row?.value ?? 0;
},
async deleteAuthRecord(c, input: { model: string; where: any[] }) {
const table = tableFor(input.model);
const predicate = buildWhere(table, input.where);
if (!predicate) {
throw new Error("deleteAuthRecord requires a where clause");
}
await c.db.delete(table).where(predicate).run();
},
async deleteManyAuthRecords(c, input: { model: string; where: any[] }) {
const table = tableFor(input.model);
const predicate = buildWhere(table, input.where);
if (!predicate) {
throw new Error("deleteManyAuthRecords requires a where clause");
}
const rows = await c.db.select().from(table).where(predicate).all();
await c.db.delete(table).where(predicate).run();
return rows.length;
},
async countAuthRecords(c, input: { model: string; where?: any[] }) {
const table = tableFor(input.model);
const predicate = buildWhere(table, input.where);
const row = predicate
? await c.db.select({ value: sqlCount() }).from(table).where(predicate).get()
: await c.db.select({ value: sqlCount() }).from(table).get();
return row?.value ?? 0;
},
async getAppAuthState(c, input: { sessionId: string }) {
const session = await c.db.select().from(authSessions).where(eq(authSessions.id, input.sessionId)).get();
if (!session) {
return null;
}
const [user, profile, currentSessionState, accounts] = await Promise.all([
c.db.select().from(authUsers).where(eq(authUsers.id, session.userId)).get(),
c.db.select().from(userProfiles).where(eq(userProfiles.userId, session.userId)).get(),
c.db.select().from(sessionState).where(eq(sessionState.sessionId, input.sessionId)).get(),
c.db.select().from(authAccounts).where(eq(authAccounts.userId, session.userId)).all(),
]);
return {
session,
user,
profile: profile ?? null,
sessionState: currentSessionState ?? null,
accounts,
};
},
async upsertUserProfile(
c,
input: {
userId: string;
patch: {
githubAccountId?: string | null;
githubLogin?: string | null;
roleLabel?: string;
eligibleOrganizationIdsJson?: string;
starterRepoStatus?: string;
starterRepoStarredAt?: number | null;
starterRepoSkippedAt?: number | null;
};
},
) {
const now = Date.now();
await c.db
.insert(userProfiles)
.values({
userId: input.userId,
githubAccountId: input.patch.githubAccountId ?? null,
githubLogin: input.patch.githubLogin ?? null,
roleLabel: input.patch.roleLabel ?? "GitHub user",
eligibleOrganizationIdsJson: input.patch.eligibleOrganizationIdsJson ?? "[]",
starterRepoStatus: input.patch.starterRepoStatus ?? "pending",
starterRepoStarredAt: input.patch.starterRepoStarredAt ?? null,
starterRepoSkippedAt: input.patch.starterRepoSkippedAt ?? null,
createdAt: now,
updatedAt: now,
})
.onConflictDoUpdate({
target: userProfiles.userId,
set: {
...(input.patch.githubAccountId !== undefined ? { githubAccountId: input.patch.githubAccountId } : {}),
...(input.patch.githubLogin !== undefined ? { githubLogin: input.patch.githubLogin } : {}),
...(input.patch.roleLabel !== undefined ? { roleLabel: input.patch.roleLabel } : {}),
...(input.patch.eligibleOrganizationIdsJson !== undefined ? { eligibleOrganizationIdsJson: input.patch.eligibleOrganizationIdsJson } : {}),
...(input.patch.starterRepoStatus !== undefined ? { starterRepoStatus: input.patch.starterRepoStatus } : {}),
...(input.patch.starterRepoStarredAt !== undefined ? { starterRepoStarredAt: input.patch.starterRepoStarredAt } : {}),
...(input.patch.starterRepoSkippedAt !== undefined ? { starterRepoSkippedAt: input.patch.starterRepoSkippedAt } : {}),
updatedAt: now,
},
})
.run();
return await c.db.select().from(userProfiles).where(eq(userProfiles.userId, input.userId)).get();
},
async upsertSessionState(c, input: { sessionId: string; activeOrganizationId: string | null }) {
const now = Date.now();
await c.db
.insert(sessionState)
.values({
sessionId: input.sessionId,
activeOrganizationId: input.activeOrganizationId,
createdAt: now,
updatedAt: now,
})
.onConflictDoUpdate({
target: sessionState.sessionId,
set: {
activeOrganizationId: input.activeOrganizationId,
updatedAt: now,
},
})
.run();
return await c.db.select().from(sessionState).where(eq(sessionState.sessionId, input.sessionId)).get();
},
},
});

View file

@ -1,104 +0,0 @@
import type { TaskStatus, SandboxProviderId } from "@sandbox-agent/foundry-shared";
export interface TaskCreatedEvent {
organizationId: string;
repoId: string;
taskId: string;
sandboxProviderId: SandboxProviderId;
branchName: string;
title: string;
}
export interface TaskStatusEvent {
organizationId: string;
repoId: string;
taskId: string;
status: TaskStatus;
message: string;
}
export interface RepositorySnapshotEvent {
organizationId: string;
repoId: string;
updatedAt: number;
}
export interface AgentStartedEvent {
organizationId: string;
repoId: string;
taskId: string;
sessionId: string;
}
export interface AgentIdleEvent {
organizationId: string;
repoId: string;
taskId: string;
sessionId: string;
}
export interface AgentErrorEvent {
organizationId: string;
repoId: string;
taskId: string;
message: string;
}
export interface PrCreatedEvent {
organizationId: string;
repoId: string;
taskId: string;
prNumber: number;
url: string;
}
export interface PrClosedEvent {
organizationId: string;
repoId: string;
taskId: string;
prNumber: number;
merged: boolean;
}
export interface PrReviewEvent {
organizationId: string;
repoId: string;
taskId: string;
prNumber: number;
reviewer: string;
status: string;
}
export interface CiStatusChangedEvent {
organizationId: string;
repoId: string;
taskId: string;
prNumber: number;
status: string;
}
export type TaskStepName = "auto_commit" | "push" | "pr_submit";
export type TaskStepStatus = "started" | "completed" | "skipped" | "failed";
export interface TaskStepEvent {
organizationId: string;
repoId: string;
taskId: string;
step: TaskStepName;
status: TaskStepStatus;
message: string;
}
export interface BranchSwitchedEvent {
organizationId: string;
repoId: string;
taskId: string;
branchName: string;
}
export interface SessionAttachedEvent {
organizationId: string;
repoId: string;
taskId: string;
sessionId: string;
}

View file

@ -18,6 +18,18 @@ const journal = {
tag: "0002_github_branches", tag: "0002_github_branches",
breakpoints: true, breakpoints: true,
}, },
{
idx: 3,
when: 1773907200000,
tag: "0003_sync_progress",
breakpoints: true,
},
{
idx: 4,
when: 1773993600000,
tag: "0004_drop_github_branches",
breakpoints: true,
},
], ],
} as const; } as const;
@ -32,7 +44,8 @@ export default {
\`installation_id\` integer, \`installation_id\` integer,
\`last_sync_label\` text NOT NULL, \`last_sync_label\` text NOT NULL,
\`last_sync_at\` integer, \`last_sync_at\` integer,
\`updated_at\` integer NOT NULL \`updated_at\` integer NOT NULL,
CONSTRAINT \`github_meta_singleton_id_check\` CHECK(\`id\` = 1)
); );
--> statement-breakpoint --> statement-breakpoint
CREATE TABLE \`github_repositories\` ( CREATE TABLE \`github_repositories\` (
@ -78,6 +91,24 @@ CREATE TABLE \`github_pull_requests\` (
\`commit_sha\` text NOT NULL, \`commit_sha\` text NOT NULL,
\`updated_at\` integer NOT NULL \`updated_at\` integer NOT NULL
); );
`,
m0003: `ALTER TABLE \`github_meta\` ADD \`sync_generation\` integer NOT NULL DEFAULT 0;
--> statement-breakpoint
ALTER TABLE \`github_meta\` ADD \`sync_phase\` text;
--> statement-breakpoint
ALTER TABLE \`github_meta\` ADD \`processed_repository_count\` integer NOT NULL DEFAULT 0;
--> statement-breakpoint
ALTER TABLE \`github_meta\` ADD \`total_repository_count\` integer NOT NULL DEFAULT 0;
--> statement-breakpoint
ALTER TABLE \`github_repositories\` ADD \`sync_generation\` integer NOT NULL DEFAULT 0;
--> statement-breakpoint
ALTER TABLE \`github_members\` ADD \`sync_generation\` integer NOT NULL DEFAULT 0;
--> statement-breakpoint
ALTER TABLE \`github_pull_requests\` ADD \`sync_generation\` integer NOT NULL DEFAULT 0;
--> statement-breakpoint
ALTER TABLE \`github_branches\` ADD \`sync_generation\` integer NOT NULL DEFAULT 0;
`,
m0004: `DROP TABLE IF EXISTS \`github_branches\`;
`, `,
} as const, } as const,
}; };

View file

@ -1,15 +1,24 @@
import { integer, sqliteTable, text } from "rivetkit/db/drizzle"; import { check, integer, sqliteTable, text } from "rivetkit/db/drizzle";
import { sql } from "drizzle-orm";
export const githubMeta = sqliteTable("github_meta", { export const githubMeta = sqliteTable(
id: integer("id").primaryKey(), "github_meta",
connectedAccount: text("connected_account").notNull(), {
installationStatus: text("installation_status").notNull(), id: integer("id").primaryKey(),
syncStatus: text("sync_status").notNull(), connectedAccount: text("connected_account").notNull(),
installationId: integer("installation_id"), installationStatus: text("installation_status").notNull(),
lastSyncLabel: text("last_sync_label").notNull(), syncStatus: text("sync_status").notNull(),
lastSyncAt: integer("last_sync_at"), installationId: integer("installation_id"),
updatedAt: integer("updated_at").notNull(), lastSyncLabel: text("last_sync_label").notNull(),
}); lastSyncAt: integer("last_sync_at"),
syncGeneration: integer("sync_generation").notNull(),
syncPhase: text("sync_phase"),
processedRepositoryCount: integer("processed_repository_count").notNull(),
totalRepositoryCount: integer("total_repository_count").notNull(),
updatedAt: integer("updated_at").notNull(),
},
(table) => [check("github_meta_singleton_id_check", sql`${table.id} = 1`)],
);
export const githubRepositories = sqliteTable("github_repositories", { export const githubRepositories = sqliteTable("github_repositories", {
repoId: text("repo_id").notNull().primaryKey(), repoId: text("repo_id").notNull().primaryKey(),
@ -17,14 +26,7 @@ export const githubRepositories = sqliteTable("github_repositories", {
cloneUrl: text("clone_url").notNull(), cloneUrl: text("clone_url").notNull(),
private: integer("private").notNull(), private: integer("private").notNull(),
defaultBranch: text("default_branch").notNull(), defaultBranch: text("default_branch").notNull(),
updatedAt: integer("updated_at").notNull(), syncGeneration: integer("sync_generation").notNull(),
});
export const githubBranches = sqliteTable("github_branches", {
branchId: text("branch_id").notNull().primaryKey(),
repoId: text("repo_id").notNull(),
branchName: text("branch_name").notNull(),
commitSha: text("commit_sha").notNull(),
updatedAt: integer("updated_at").notNull(), updatedAt: integer("updated_at").notNull(),
}); });
@ -35,6 +37,7 @@ export const githubMembers = sqliteTable("github_members", {
email: text("email"), email: text("email"),
role: text("role"), role: text("role"),
state: text("state").notNull(), state: text("state").notNull(),
syncGeneration: integer("sync_generation").notNull(),
updatedAt: integer("updated_at").notNull(), updatedAt: integer("updated_at").notNull(),
}); });
@ -51,5 +54,6 @@ export const githubPullRequests = sqliteTable("github_pull_requests", {
baseRefName: text("base_ref_name").notNull(), baseRefName: text("base_ref_name").notNull(),
authorLogin: text("author_login"), authorLogin: text("author_login"),
isDraft: integer("is_draft").notNull(), isDraft: integer("is_draft").notNull(),
syncGeneration: integer("sync_generation").notNull(),
updatedAt: integer("updated_at").notNull(), updatedAt: integer("updated_at").notNull(),
}); });

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,73 @@
// @ts-nocheck
import { logActorWarning, resolveErrorMessage } from "../logging.js";
// Dynamic imports to break circular dependency: index.ts imports workflow.ts,
// and workflow.ts needs functions from index.ts.
async function getIndexModule() {
return await import("./index.js");
}
export const GITHUB_DATA_QUEUE_NAMES = [
"githubData.command.syncRepos",
"githubData.command.handlePullRequestWebhook",
"githubData.command.clearState",
] as const;
export type GithubDataQueueName = (typeof GITHUB_DATA_QUEUE_NAMES)[number];
export function githubDataWorkflowQueueName(name: GithubDataQueueName): GithubDataQueueName {
return name;
}
/**
* Plain run handler (no workflow engine). Drains the queue using `c.queue.iter()`
* with completable messages. This avoids the RivetKit bug where actors created
* from another actor's workflow context never start their `run: workflow(...)`.
*/
export async function runGithubDataCommandLoop(c: any): Promise<void> {
for await (const msg of c.queue.iter({ names: [...GITHUB_DATA_QUEUE_NAMES], completable: true })) {
try {
if (msg.name === "githubData.command.syncRepos") {
try {
const { runFullSync } = await getIndexModule();
await runFullSync(c, msg.body);
await msg.complete({ ok: true });
} catch (error) {
const { fullSyncError } = await getIndexModule();
try {
await fullSyncError(c, error);
} catch {
/* best effort */
}
const message = error instanceof Error ? error.message : String(error);
await msg.complete({ error: message }).catch(() => {});
}
continue;
}
if (msg.name === "githubData.command.handlePullRequestWebhook") {
const { handlePullRequestWebhookMutation } = await getIndexModule();
await handlePullRequestWebhookMutation(c, msg.body);
await msg.complete({ ok: true });
continue;
}
if (msg.name === "githubData.command.clearState") {
const { clearStateMutation } = await getIndexModule();
await clearStateMutation(c, msg.body);
await msg.complete({ ok: true });
continue;
}
logActorWarning("githubData", "unknown queue message", { queueName: msg.name });
await msg.complete({ error: `Unknown command: ${msg.name}` });
} catch (error) {
const message = resolveErrorMessage(error);
logActorWarning("githubData", "github-data command failed", {
queueName: msg.name,
error: message,
});
await msg.complete({ error: message }).catch(() => {});
}
}
}

View file

@ -1,4 +1,4 @@
import { authUserKey, githubDataKey, historyKey, organizationKey, repositoryKey, taskKey, taskSandboxKey } from "./keys.js"; import { auditLogKey, githubDataKey, organizationKey, taskKey, taskSandboxKey, userKey } from "./keys.js";
export function actorClient(c: any) { export function actorClient(c: any) {
return c.client(); return c.client();
@ -10,28 +10,14 @@ export async function getOrCreateOrganization(c: any, organizationId: string) {
}); });
} }
export async function getOrCreateAuthUser(c: any, userId: string) { export async function getOrCreateUser(c: any, userId: string) {
return await actorClient(c).authUser.getOrCreate(authUserKey(userId), { return await actorClient(c).user.getOrCreate(userKey(userId), {
createWithInput: { userId }, createWithInput: { userId },
}); });
} }
export function getAuthUser(c: any, userId: string) { export function getUser(c: any, userId: string) {
return actorClient(c).authUser.get(authUserKey(userId)); return actorClient(c).user.get(userKey(userId));
}
export async function getOrCreateRepository(c: any, organizationId: string, repoId: string, remoteUrl: string) {
return await actorClient(c).repository.getOrCreate(repositoryKey(organizationId, repoId), {
createWithInput: {
organizationId,
repoId,
remoteUrl,
},
});
}
export function getRepository(c: any, organizationId: string, repoId: string) {
return actorClient(c).repository.get(repositoryKey(organizationId, repoId));
} }
export function getTask(c: any, organizationId: string, repoId: string, taskId: string) { export function getTask(c: any, organizationId: string, repoId: string, taskId: string) {
@ -44,11 +30,10 @@ export async function getOrCreateTask(c: any, organizationId: string, repoId: st
}); });
} }
export async function getOrCreateHistory(c: any, organizationId: string, repoId: string) { export async function getOrCreateAuditLog(c: any, organizationId: string) {
return await actorClient(c).history.getOrCreate(historyKey(organizationId, repoId), { return await actorClient(c).auditLog.getOrCreate(auditLogKey(organizationId), {
createWithInput: { createWithInput: {
organizationId, organizationId,
repoId,
}, },
}); });
} }
@ -75,8 +60,8 @@ export async function getOrCreateTaskSandbox(c: any, organizationId: string, san
}); });
} }
export function selfHistory(c: any) { export function selfAuditLog(c: any) {
return actorClient(c).history.getForId(c.actorId); return actorClient(c).auditLog.getForId(c.actorId);
} }
export function selfTask(c: any) { export function selfTask(c: any) {
@ -87,14 +72,14 @@ export function selfOrganization(c: any) {
return actorClient(c).organization.getForId(c.actorId); return actorClient(c).organization.getForId(c.actorId);
} }
export function selfRepository(c: any) { export function selfUser(c: any) {
return actorClient(c).repository.getForId(c.actorId); return actorClient(c).user.getForId(c.actorId);
}
export function selfAuthUser(c: any) {
return actorClient(c).authUser.getForId(c.actorId);
} }
export function selfGithubData(c: any) { export function selfGithubData(c: any) {
return actorClient(c).githubData.getForId(c.actorId); return actorClient(c).githubData.getForId(c.actorId);
} }
export function selfTaskSandbox(c: any) {
return actorClient(c).taskSandbox.getForId(c.actorId);
}

View file

@ -1,6 +0,0 @@
import { defineConfig } from "rivetkit/db/drizzle";
export default defineConfig({
out: "./src/actors/history/db/drizzle",
schema: "./src/actors/history/db/schema.ts",
});

View file

@ -1,115 +0,0 @@
// @ts-nocheck
import { and, desc, eq } from "drizzle-orm";
import { actor, queue } from "rivetkit";
import { Loop, workflow } from "rivetkit/workflow";
import type { HistoryEvent } from "@sandbox-agent/foundry-shared";
import { selfHistory } from "../handles.js";
import { historyDb } from "./db/db.js";
import { events } from "./db/schema.js";
export interface HistoryInput {
organizationId: string;
repoId: string;
}
export interface AppendHistoryCommand {
kind: string;
taskId?: string;
branchName?: string;
payload: Record<string, unknown>;
}
export interface ListHistoryParams {
branch?: string;
taskId?: string;
limit?: number;
}
const HISTORY_QUEUE_NAMES = ["history.command.append"] as const;
async function appendHistoryRow(loopCtx: any, body: AppendHistoryCommand): Promise<void> {
const now = Date.now();
await loopCtx.db
.insert(events)
.values({
taskId: body.taskId ?? null,
branchName: body.branchName ?? null,
kind: body.kind,
payloadJson: JSON.stringify(body.payload),
createdAt: now,
})
.run();
}
async function runHistoryWorkflow(ctx: any): Promise<void> {
await ctx.loop("history-command-loop", async (loopCtx: any) => {
const msg = await loopCtx.queue.next("next-history-command", {
names: [...HISTORY_QUEUE_NAMES],
completable: true,
});
if (!msg) {
return Loop.continue(undefined);
}
if (msg.name === "history.command.append") {
await loopCtx.step("append-history-row", async () => appendHistoryRow(loopCtx, msg.body as AppendHistoryCommand));
await msg.complete({ ok: true });
}
return Loop.continue(undefined);
});
}
export const history = actor({
db: historyDb,
queues: {
"history.command.append": queue(),
},
options: {
name: "History",
icon: "database",
},
createState: (_c, input: HistoryInput) => ({
organizationId: input.organizationId,
repoId: input.repoId,
}),
actions: {
async append(c, command: AppendHistoryCommand): Promise<void> {
const self = selfHistory(c);
await self.send("history.command.append", command, { wait: true, timeout: 15_000 });
},
async list(c, params?: ListHistoryParams): Promise<HistoryEvent[]> {
const whereParts = [];
if (params?.taskId) {
whereParts.push(eq(events.taskId, params.taskId));
}
if (params?.branch) {
whereParts.push(eq(events.branchName, params.branch));
}
const base = c.db
.select({
id: events.id,
taskId: events.taskId,
branchName: events.branchName,
kind: events.kind,
payloadJson: events.payloadJson,
createdAt: events.createdAt,
})
.from(events);
const rows = await (whereParts.length > 0 ? base.where(and(...whereParts)) : base)
.orderBy(desc(events.createdAt))
.limit(params?.limit ?? 100)
.all();
return rows.map((row) => ({
...row,
organizationId: c.state.organizationId,
repoId: c.state.repoId,
}));
},
},
run: workflow(runHistoryWorkflow),
});

View file

@ -1,43 +1,38 @@
import { authUser } from "./auth-user/index.js"; import { user } from "./user/index.js";
import { setup } from "rivetkit"; import { setup } from "rivetkit";
import { githubData } from "./github-data/index.js"; import { githubData } from "./github-data/index.js";
import { task } from "./task/index.js"; import { task } from "./task/index.js";
import { history } from "./history/index.js"; import { auditLog } from "./audit-log/index.js";
import { repository } from "./repository/index.js";
import { taskSandbox } from "./sandbox/index.js"; import { taskSandbox } from "./sandbox/index.js";
import { organization } from "./organization/index.js"; import { organization } from "./organization/index.js";
import { logger } from "../logging.js"; import { logger } from "../logging.js";
import { resolveRunnerVersion } from "../config/runner-version.js";
const RUNNER_VERSION = Math.floor(Date.now() / 1000); const runnerVersion = resolveRunnerVersion();
export const registry = setup({ export const registry = setup({
serverless: { serverless: {
basePath: "/v1/rivet", basePath: "/v1/rivet",
}, },
runner: { runner: { version: runnerVersion },
version: RUNNER_VERSION,
},
logging: { logging: {
baseLogger: logger, baseLogger: logger,
}, },
use: { use: {
authUser, user,
organization, organization,
repository,
task, task,
taskSandbox, taskSandbox,
history, auditLog,
githubData, githubData,
}, },
}); });
export * from "./context.js"; export * from "./context.js";
export * from "./events.js"; export * from "./audit-log/index.js";
export * from "./auth-user/index.js"; export * from "./user/index.js";
export * from "./github-data/index.js"; export * from "./github-data/index.js";
export * from "./task/index.js"; export * from "./task/index.js";
export * from "./history/index.js";
export * from "./keys.js"; export * from "./keys.js";
export * from "./repository/index.js";
export * from "./sandbox/index.js"; export * from "./sandbox/index.js";
export * from "./organization/index.js"; export * from "./organization/index.js";

View file

@ -4,24 +4,21 @@ export function organizationKey(organizationId: string): ActorKey {
return ["org", organizationId]; return ["org", organizationId];
} }
export function authUserKey(userId: string): ActorKey { export function userKey(userId: string): ActorKey {
return ["org", "app", "user", userId]; return ["org", "app", "user", userId];
} }
export function repositoryKey(organizationId: string, repoId: string): ActorKey {
return ["org", organizationId, "repository", repoId];
}
export function taskKey(organizationId: string, repoId: string, taskId: string): ActorKey { export function taskKey(organizationId: string, repoId: string, taskId: string): ActorKey {
return ["org", organizationId, "repository", repoId, "task", taskId]; return ["org", organizationId, "task", repoId, taskId];
} }
export function taskSandboxKey(organizationId: string, sandboxId: string): ActorKey { export function taskSandboxKey(organizationId: string, sandboxId: string): ActorKey {
return ["org", organizationId, "sandbox", sandboxId]; return ["org", organizationId, "sandbox", sandboxId];
} }
export function historyKey(organizationId: string, repoId: string): ActorKey { /** One audit log per org (not per repo) — see audit-log/index.ts for rationale. */
return ["org", organizationId, "repository", repoId, "history"]; export function auditLogKey(organizationId: string): ActorKey {
return ["org", organizationId, "audit-log"];
} }
export function githubDataKey(organizationId: string): ActorKey { export function githubDataKey(organizationId: string): ActorKey {

View file

@ -22,6 +22,16 @@ export function resolveErrorStack(error: unknown): string | undefined {
return undefined; return undefined;
} }
export function logActorInfo(scope: string, message: string, context?: Record<string, unknown>): void {
logger.info(
{
scope,
...(context ?? {}),
},
message,
);
}
export function logActorWarning(scope: string, message: string, context?: Record<string, unknown>): void { export function logActorWarning(scope: string, message: string, context?: Record<string, unknown>): void {
logger.warn( logger.warn(
{ {

Some files were not shown because too many files have changed in this diff Show more