diff --git a/.dockerignore b/.dockerignore index cb03545..4ba2cf3 100644 --- a/.dockerignore +++ b/.dockerignore @@ -17,7 +17,7 @@ coverage/ # Environment .env .env.* -.openhandoff/ +.foundry/ # IDE .idea/ diff --git a/.env.development.example b/.env.development.example new file mode 100644 index 0000000..0ae0f58 --- /dev/null +++ b/.env.development.example @@ -0,0 +1,34 @@ +# Foundry local development environment. +# Copy ~/misc/the-foundry.env to .env in the repo root to populate secrets. +# .env is gitignored — never commit it. The source of truth is ~/misc/the-foundry.env. +# +# Docker Compose (just foundry-dev) and the justfile (set dotenv-load := true) +# both read .env automatically. + +APP_URL=http://localhost:4173 +BETTER_AUTH_URL=http://localhost:4173 +BETTER_AUTH_SECRET=sandbox-agent-foundry-development-only-change-me +GITHUB_REDIRECT_URI=http://localhost:4173/v1/auth/callback/github + +# Fill these in when enabling live GitHub OAuth. +GITHUB_CLIENT_ID= +GITHUB_CLIENT_SECRET= + +# Fill these in when enabling GitHub App-backed org installation and repo import. +GITHUB_APP_ID= +GITHUB_APP_CLIENT_ID= +GITHUB_APP_CLIENT_SECRET= +# Store PEM material as a quoted single-line value with \n escapes. +GITHUB_APP_PRIVATE_KEY= +# Webhook secret for verifying GitHub webhook payloads. +# Use smee.io for local development: https://smee.io/new +GITHUB_WEBHOOK_SECRET= +# Required for local GitHub webhook forwarding in compose.dev. +SMEE_URL= +SMEE_TARGET=http://backend:7741/v1/webhooks/github + +# Fill these in when enabling live Stripe billing. +STRIPE_SECRET_KEY= +STRIPE_PUBLISHABLE_KEY= +STRIPE_WEBHOOK_SECRET= +STRIPE_PRICE_TEAM= diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 476ed12..85f828d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -11,6 +11,8 @@ jobs: runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: dtolnay/rust-toolchain@stable with: components: rustfmt, clippy @@ -21,6 +23,35 @@ jobs: node-version: 20 cache: pnpm - run: pnpm install + - name: Run formatter hooks + shell: bash + run: | + if [ "${{ github.event_name }}" = "pull_request" ]; then + git fetch origin "${{ github.base_ref }}" --depth=1 + diff_range="origin/${{ github.base_ref }}...HEAD" + elif [ "${{ github.event_name }}" = "push" ] && [ "${{ github.event.before }}" != "0000000000000000000000000000000000000000" ]; then + diff_range="${{ github.event.before }}...${{ github.sha }}" + else + diff_range="HEAD^...HEAD" + fi + + mapfile -t changed_files < <( + git diff --name-only --diff-filter=ACMR "$diff_range" \ + | grep -E '\.(cjs|cts|js|jsx|json|jsonc|mjs|mts|rs|ts|tsx)$' \ + || true + ) + + if [ ${#changed_files[@]} -eq 0 ]; then + echo "No formatter-managed files changed." + exit 0 + fi + + args=() + for file in "${changed_files[@]}"; do + args+=(--file "$file") + done + + pnpm exec lefthook run pre-commit --no-stage-fixed --fail-on-changes "${args[@]}" - run: npm install -g tsx - name: Run checks run: ./scripts/release/main.ts --version 0.0.0 --only-steps run-ci-checks diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 102f612..34fb64a 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -180,10 +180,20 @@ jobs: include: - platform: linux/arm64 runner: depot-ubuntu-24.04-arm-8 - arch_suffix: -arm64 + tag_suffix: -arm64 + dockerfile: docker/runtime/Dockerfile - platform: linux/amd64 runner: depot-ubuntu-24.04-8 - arch_suffix: -amd64 + tag_suffix: -amd64 + dockerfile: docker/runtime/Dockerfile + - platform: linux/arm64 + runner: depot-ubuntu-24.04-arm-8 + tag_suffix: -full-arm64 + dockerfile: docker/runtime/Dockerfile.full + - platform: linux/amd64 + runner: depot-ubuntu-24.04-8 + tag_suffix: -full-amd64 + dockerfile: docker/runtime/Dockerfile.full runs-on: ${{ matrix.runner }} steps: - uses: actions/checkout@v4 @@ -205,8 +215,8 @@ jobs: with: context: . push: true - tags: rivetdev/sandbox-agent:${{ steps.vars.outputs.sha_short }}${{ matrix.arch_suffix }} - file: docker/runtime/Dockerfile + tags: rivetdev/sandbox-agent:${{ steps.vars.outputs.sha_short }}${{ matrix.tag_suffix }} + file: ${{ matrix.dockerfile }} platforms: ${{ matrix.platform }} build-args: | TARGETARCH=${{ contains(matrix.platform, 'arm64') && 'arm64' || 'amd64' }} diff --git a/.gitignore b/.gitignore index da6874a..7b6c859 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,11 @@ Cargo.lock # Example temp files .tmp-upload/ *.db -.openhandoff/ +.foundry/ # CLI binaries (downloaded during npm publish) sdks/cli/platforms/*/bin/ + +# Foundry desktop app build artifacts +foundry/packages/desktop/frontend-dist/ +foundry/packages/desktop/src-tauri/sidecars/ diff --git a/.mcp.json b/.mcp.json index cc04a2b..7bae219 100644 --- a/.mcp.json +++ b/.mcp.json @@ -1,10 +1,8 @@ { "mcpServers": { "everything": { - "args": [ - "@modelcontextprotocol/server-everything" - ], + "args": ["@modelcontextprotocol/server-everything"], "command": "npx" } } -} \ No newline at end of file +} diff --git a/CLAUDE.md b/CLAUDE.md index 866a3f1..624602a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,38 +1,5 @@ # Instructions -## ACP v1 Baseline - -- v1 is ACP-native. -- `/v1/*` is removed and returns `410 Gone` (`application/problem+json`). -- `/opencode/*` is disabled during ACP core phases and returns `503`. -- Prompt/session traffic is ACP JSON-RPC over streamable HTTP on `/v1/rpc`: - - `POST /v1/rpc` - - `GET /v1/rpc` (SSE) - - `DELETE /v1/rpc` -- Control-plane endpoints: - - `GET /v1/health` - - `GET /v1/agents` - - `POST /v1/agents/{agent}/install` -- Binary filesystem transfer endpoints (intentionally HTTP, not ACP extension methods): - - `GET /v1/fs/file` - - `PUT /v1/fs/file` - - `POST /v1/fs/upload-batch` -- Sandbox Agent ACP extension method naming: - - Custom ACP methods use `_sandboxagent/...` (not `_sandboxagent/v1/...`). - - Session detach method is `_sandboxagent/session/detach`. - -## API Scope - -- ACP is the primary protocol for agent/session behavior and all functionality that talks directly to the agent. -- ACP extensions may be used for gaps (for example `skills`, `models`, and related metadata), but the default is that agent-facing behavior is implemented by the agent through ACP. -- Custom HTTP APIs are for non-agent/session platform services (for example filesystem, terminals, and other host/runtime capabilities). -- Filesystem and terminal APIs remain Sandbox Agent-specific HTTP contracts and are not ACP. -- Keep `GET /v1/fs/file`, `PUT /v1/fs/file`, and `POST /v1/fs/upload-batch` on HTTP: - - These are Sandbox Agent host/runtime operations with cross-agent-consistent behavior. - - They may involve very large binary transfers that ACP JSON-RPC envelopes are not suited to stream. - - This is intentionally separate from ACP native `fs/read_text_file` and `fs/write_text_file`. - - ACP extension variants may exist in parallel, but SDK defaults should prefer HTTP for these binary transfer operations. - ## Naming and Ownership - This repository/product is **Sandbox Agent**. @@ -41,46 +8,23 @@ - Canonical extension namespace/domain string is `sandboxagent.dev` (no hyphen). - Canonical custom ACP extension method prefix is `_sandboxagent/...` (no hyphen). -## Architecture (Brief) +## Docs Terminology -- HTTP contract and problem/error mapping: `server/packages/sandbox-agent/src/router.rs` -- ACP client runtime and agent process bridge: `server/packages/sandbox-agent/src/acp_runtime/mod.rs` -- Agent/native + ACP agent process install and lazy install: `server/packages/agent-management/` -- Inspector UI served at `/ui/` and bound to ACP over HTTP from `frontend/packages/inspector/` +- Never mention "ACP" in user-facing docs (`docs/**/*.mdx`) except in docs that are specifically about ACP itself (e.g. `docs/acp-http-client.mdx`). +- Never expose underlying protocol method names (e.g. `session/request_permission`, `session/create`, `_sandboxagent/session/detach`) in non-ACP docs. Describe the behavior in user-facing terms instead. +- Do not describe the underlying protocol implementation in docs. Only document the SDK surface (methods, types, options). ACP protocol details belong exclusively in ACP-specific pages. +- Do not use em dashes (`—`) in docs. Use commas, periods, or parentheses instead. -## TypeScript SDK Architecture +### Docs Source Of Truth (HTTP/CLI) -- TypeScript clients are split into: - - `acp-http-client`: protocol-pure ACP-over-HTTP (`/v1/acp`) with no Sandbox-specific HTTP helpers. - - `sandbox-agent`: `SandboxAgent` SDK wrapper that combines ACP session operations with Sandbox control-plane and filesystem helpers. -- `SandboxAgent` entry points are `SandboxAgent.connect(...)` and `SandboxAgent.start(...)`. -- Stable Sandbox session methods are `createSession`, `resumeSession`, `resumeOrCreateSession`, `destroySession`, `sendSessionMethod`, `onSessionEvent`, `setSessionMode`, `setSessionModel`, `setSessionThoughtLevel`, `setSessionConfigOption`, `getSessionConfigOptions`, and `getSessionModes`. -- `Session` helpers are `prompt(...)`, `send(...)`, `onEvent(...)`, `setMode(...)`, `setModel(...)`, `setThoughtLevel(...)`, `setConfigOption(...)`, `getConfigOptions()`, and `getModes()`. -- Cleanup is `sdk.dispose()`. - -### Docs Source Of Truth - -- For TypeScript docs/examples, source of truth is implementation in: - - `sdks/typescript/src/client.ts` - - `sdks/typescript/src/index.ts` - - `sdks/acp-http-client/src/index.ts` -- Do not document TypeScript APIs unless they are exported and implemented in those files. - For HTTP/CLI docs/examples, source of truth is: - `server/packages/sandbox-agent/src/router.rs` - `server/packages/sandbox-agent/src/cli.rs` - Keep docs aligned to implemented endpoints/commands only (for example ACP under `/v1/acp`, not legacy `/v1/sessions` APIs). -## Source Documents - -- `~/misc/acp-docs/schema/schema.json` -- `~/misc/acp-docs/schema/meta.json` -- `research/acp/spec.md` -- `research/acp/v1-schema-to-acp-mapping.md` -- `research/acp/friction.md` -- `research/acp/todo.md` - ## Change Tracking +- If the user asks to "push" changes, treat that as permission to commit and push all current workspace changes, not a hand-picked subset, unless the user explicitly scopes the push. - Keep CLI subcommands and HTTP endpoints in sync. - Update `docs/cli.mdx` when CLI behavior changes. - Regenerate `docs/openapi.json` when HTTP contracts change. @@ -88,14 +32,6 @@ - Append blockers/decisions to `research/acp/friction.md` during ACP work. - `docs/agent-capabilities.mdx` lists models/modes/thought levels per agent. Update it when adding a new agent or changing `fallback_config_options`. If its "Last updated" date is >2 weeks old, re-run `cd scripts/agent-configs && npx tsx dump.ts` and update the doc to match. Source data: `scripts/agent-configs/resources/*.json` and hardcoded entries in `server/packages/sandbox-agent/src/router/support.rs` (`fallback_config_options`). - Some agent models are gated by subscription (e.g. Claude `opus`). The live report only shows models available to the current credentials. The static doc and JSON resource files should list all known models regardless of subscription tier. -- TypeScript SDK tests should run against a real running server/runtime over real `/v1` HTTP APIs, typically using the real `mock` agent for deterministic behavior. -- Do not use Vitest fetch/transport mocks to simulate server functionality in TypeScript SDK tests. - -## Docker Examples (Dev Testing) - -- When manually testing bleeding-edge (unreleased) versions of sandbox-agent in `examples/`, use `SANDBOX_AGENT_DEV=1` with the Docker-based examples. -- This triggers `examples/shared/Dockerfile.dev` which builds the server binary from local source and packages it into the Docker image. -- Example: `SANDBOX_AGENT_DEV=1 pnpm --filter @sandbox-agent/example-mcp start` ## Install Version References @@ -121,7 +57,7 @@ - `.claude/commands/post-release-testing.md` - `examples/cloudflare/Dockerfile` - `examples/daytona/src/index.ts` - - `examples/daytona/src/daytona-with-snapshot.ts` + - `examples/shared/src/docker.ts` - `examples/docker/src/index.ts` - `examples/e2b/src/index.ts` - `examples/vercel/src/index.ts` diff --git a/Cargo.toml b/Cargo.toml index 95f13c7..c353c2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,10 @@ [workspace] resolver = "2" members = ["server/packages/*", "gigacode"] +exclude = ["factory/packages/desktop/src-tauri", "foundry/packages/desktop/src-tauri"] [workspace.package] -version = "0.3.0" +version = "0.3.2" edition = "2021" authors = [ "Rivet Gaming, LLC " ] license = "Apache-2.0" @@ -12,13 +13,13 @@ description = "Universal API for automatic coding agents in sandboxes. Supports [workspace.dependencies] # Internal crates -sandbox-agent = { version = "0.3.0", path = "server/packages/sandbox-agent" } -sandbox-agent-error = { version = "0.3.0", path = "server/packages/error" } -sandbox-agent-agent-management = { version = "0.3.0", path = "server/packages/agent-management" } -sandbox-agent-agent-credentials = { version = "0.3.0", path = "server/packages/agent-credentials" } -sandbox-agent-opencode-adapter = { version = "0.3.0", path = "server/packages/opencode-adapter" } -sandbox-agent-opencode-server-manager = { version = "0.3.0", path = "server/packages/opencode-server-manager" } -acp-http-adapter = { version = "0.3.0", path = "server/packages/acp-http-adapter" } +sandbox-agent = { version = "0.3.2", path = "server/packages/sandbox-agent" } +sandbox-agent-error = { version = "0.3.2", path = "server/packages/error" } +sandbox-agent-agent-management = { version = "0.3.2", path = "server/packages/agent-management" } +sandbox-agent-agent-credentials = { version = "0.3.2", path = "server/packages/agent-credentials" } +sandbox-agent-opencode-adapter = { version = "0.3.2", path = "server/packages/opencode-adapter" } +sandbox-agent-opencode-server-manager = { version = "0.3.2", path = "server/packages/opencode-server-manager" } +acp-http-adapter = { version = "0.3.2", path = "server/packages/acp-http-adapter" } # Serialization serde = { version = "1.0", features = ["derive"] } diff --git a/README.md b/README.md index 535e5ad..eb427d7 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,6 @@ const agents = await client.listAgents(); await client.createSession("demo", { agent: "codex", agentMode: "default", - permissionMode: "plan", }); await client.postMessage("demo", { message: "Hello from the SDK." }); @@ -128,9 +127,7 @@ for await (const event of client.streamEvents("demo", { offset: 0 })) { } ``` -`permissionMode: "acceptEdits"` passes through to Claude, auto-approves file changes for Codex, and is treated as `default` for other agents. - -[SDK documentation](https://sandboxagent.dev/docs/sdks/typescript) — [Building a Chat UI](https://sandboxagent.dev/docs/building-chat-ui) — [Managing Sessions](https://sandboxagent.dev/docs/manage-sessions) +[SDK documentation](https://sandboxagent.dev/docs/sdks/typescript) — [Managing Sessions](https://sandboxagent.dev/docs/manage-sessions) ### HTTP Server @@ -146,10 +143,7 @@ sandbox-agent server --token "$SANDBOX_TOKEN" --host 127.0.0.1 --port 2468 Optional: preinstall agent binaries (no server required; they will be installed lazily on first use if you skip this): ```bash -sandbox-agent install-agent claude -sandbox-agent install-agent codex -sandbox-agent install-agent opencode -sandbox-agent install-agent amp +sandbox-agent install-agent --all ``` To disable auth locally: @@ -283,7 +277,7 @@ Coding agents expect interactive terminals with proper TTY handling. SSH with pi - **Storage of sessions on disk**: Sessions are already stored by the respective coding agents on disk. It's assumed that the consumer is streaming data from this machine to an external storage, such as Postgres, ClickHouse, or Rivet. - **Direct LLM wrappers**: Use the [Vercel AI SDK](https://ai-sdk.dev/docs/introduction) if you want to implement your own agent from scratch. - **Git Repo Management**: Just use git commands or the features provided by your sandbox provider of choice. -- **Sandbox Provider API**: Sandbox providers have many nuanced differences in their API, it does not make sense for us to try to provide a custom layer. Instead, we opt to provide guides that let you integrate this project with sandbox providers. +- **Sandbox Provider API**: Sandbox providers have many nuanced differences in their API, it does not make sense for us to try to provide a custom layer. Instead, we opt to provide guides that let you integrate this repository with sandbox providers. ## Roadmap diff --git a/biome.json b/biome.json new file mode 100644 index 0000000..4a8bd54 --- /dev/null +++ b/biome.json @@ -0,0 +1,7 @@ +{ + "$schema": "./node_modules/@biomejs/biome/configuration_schema.json", + "formatter": { + "indentStyle": "space", + "lineWidth": 160 + } +} diff --git a/docker/runtime/Dockerfile b/docker/runtime/Dockerfile index 27b9560..bdd1a16 100644 --- a/docker/runtime/Dockerfile +++ b/docker/runtime/Dockerfile @@ -167,4 +167,4 @@ WORKDIR /home/sandbox EXPOSE 2468 ENTRYPOINT ["sandbox-agent"] -CMD ["--host", "0.0.0.0", "--port", "2468"] +CMD ["server", "--host", "0.0.0.0", "--port", "2468"] diff --git a/docker/runtime/Dockerfile.full b/docker/runtime/Dockerfile.full new file mode 100644 index 0000000..beb1664 --- /dev/null +++ b/docker/runtime/Dockerfile.full @@ -0,0 +1,162 @@ +# syntax=docker/dockerfile:1.10.0 + +# ============================================================================ +# Build inspector frontend +# ============================================================================ +FROM node:22-alpine AS inspector-build +WORKDIR /app +RUN npm install -g pnpm + +COPY package.json pnpm-lock.yaml pnpm-workspace.yaml ./ +COPY frontend/packages/inspector/package.json ./frontend/packages/inspector/ +COPY sdks/cli-shared/package.json ./sdks/cli-shared/ +COPY sdks/acp-http-client/package.json ./sdks/acp-http-client/ +COPY sdks/persist-indexeddb/package.json ./sdks/persist-indexeddb/ +COPY sdks/react/package.json ./sdks/react/ +COPY sdks/typescript/package.json ./sdks/typescript/ + +RUN pnpm install --filter @sandbox-agent/inspector... + +COPY docs/openapi.json ./docs/ +COPY sdks/cli-shared ./sdks/cli-shared +COPY sdks/acp-http-client ./sdks/acp-http-client +COPY sdks/persist-indexeddb ./sdks/persist-indexeddb +COPY sdks/react ./sdks/react +COPY sdks/typescript ./sdks/typescript + +RUN cd sdks/cli-shared && pnpm exec tsup +RUN cd sdks/acp-http-client && pnpm exec tsup +RUN cd sdks/typescript && SKIP_OPENAPI_GEN=1 pnpm exec tsup +RUN cd sdks/persist-indexeddb && pnpm exec tsup +RUN cd sdks/react && pnpm exec tsup + +COPY frontend/packages/inspector ./frontend/packages/inspector +RUN cd frontend/packages/inspector && pnpm exec vite build + +# ============================================================================ +# AMD64 Builder - Uses cross-tools musl toolchain +# ============================================================================ +FROM --platform=linux/amd64 rust:1.88.0 AS builder-amd64 + +ENV DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install -y \ + musl-tools \ + musl-dev \ + llvm-14-dev \ + libclang-14-dev \ + clang-14 \ + libssl-dev \ + pkg-config \ + ca-certificates \ + g++ \ + g++-multilib \ + git \ + curl \ + wget && \ + rm -rf /var/lib/apt/lists/* + +RUN wget -q https://github.com/cross-tools/musl-cross/releases/latest/download/x86_64-unknown-linux-musl.tar.xz && \ + tar -xf x86_64-unknown-linux-musl.tar.xz -C /opt/ && \ + rm x86_64-unknown-linux-musl.tar.xz && \ + rustup target add x86_64-unknown-linux-musl + +ENV PATH="/opt/x86_64-unknown-linux-musl/bin:$PATH" \ + LIBCLANG_PATH=/usr/lib/llvm-14/lib \ + CLANG_PATH=/usr/bin/clang-14 \ + CC_x86_64_unknown_linux_musl=x86_64-unknown-linux-musl-gcc \ + CXX_x86_64_unknown_linux_musl=x86_64-unknown-linux-musl-g++ \ + AR_x86_64_unknown_linux_musl=x86_64-unknown-linux-musl-ar \ + CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER=x86_64-unknown-linux-musl-gcc \ + CARGO_INCREMENTAL=0 \ + CARGO_NET_GIT_FETCH_WITH_CLI=true + +ENV SSL_VER=1.1.1w +RUN wget https://www.openssl.org/source/openssl-$SSL_VER.tar.gz && \ + tar -xzf openssl-$SSL_VER.tar.gz && \ + cd openssl-$SSL_VER && \ + ./Configure no-shared no-async --prefix=/musl --openssldir=/musl/ssl linux-x86_64 && \ + make -j$(nproc) && \ + make install_sw && \ + cd .. && \ + rm -rf openssl-$SSL_VER* + +ENV OPENSSL_DIR=/musl \ + OPENSSL_INCLUDE_DIR=/musl/include \ + OPENSSL_LIB_DIR=/musl/lib \ + PKG_CONFIG_ALLOW_CROSS=1 \ + RUSTFLAGS="-C target-feature=+crt-static -C link-arg=-static-libgcc" + +WORKDIR /build +COPY . . + +COPY --from=inspector-build /app/frontend/packages/inspector/dist ./frontend/packages/inspector/dist + +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/target \ + cargo build -p sandbox-agent --release --target x86_64-unknown-linux-musl && \ + cp target/x86_64-unknown-linux-musl/release/sandbox-agent /sandbox-agent + +# ============================================================================ +# ARM64 Builder - Uses Alpine with native musl +# ============================================================================ +FROM --platform=linux/arm64 rust:1.88-alpine AS builder-arm64 + +RUN apk add --no-cache \ + musl-dev \ + clang \ + llvm-dev \ + openssl-dev \ + openssl-libs-static \ + pkgconfig \ + git \ + curl \ + build-base + +RUN rustup target add aarch64-unknown-linux-musl + +ENV CARGO_INCREMENTAL=0 \ + CARGO_NET_GIT_FETCH_WITH_CLI=true \ + RUSTFLAGS="-C target-feature=+crt-static" + +WORKDIR /build +COPY . . + +COPY --from=inspector-build /app/frontend/packages/inspector/dist ./frontend/packages/inspector/dist + +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/target \ + cargo build -p sandbox-agent --release --target aarch64-unknown-linux-musl && \ + cp target/aarch64-unknown-linux-musl/release/sandbox-agent /sandbox-agent + +# ============================================================================ +# Select the appropriate builder based on target architecture +# ============================================================================ +ARG TARGETARCH +FROM builder-${TARGETARCH} AS builder + +# Runtime stage - full image with all supported agents preinstalled +FROM node:22-bookworm-slim + +RUN apt-get update && apt-get install -y \ + bash \ + ca-certificates \ + curl \ + git && \ + rm -rf /var/lib/apt/lists/* + +COPY --from=builder /sandbox-agent /usr/local/bin/sandbox-agent +RUN chmod +x /usr/local/bin/sandbox-agent + +RUN useradd -m -s /bin/bash sandbox +USER sandbox +WORKDIR /home/sandbox + +RUN sandbox-agent install-agent --all + +EXPOSE 2468 + +ENTRYPOINT ["sandbox-agent"] +CMD ["server", "--host", "0.0.0.0", "--port", "2468"] diff --git a/docs/agent-sessions.mdx b/docs/agent-sessions.mdx index a224acd..cf56e9c 100644 --- a/docs/agent-sessions.mdx +++ b/docs/agent-sessions.mdx @@ -125,9 +125,45 @@ for (const opt of options) { await session.setConfigOption("some-agent-option", "value"); ``` +## Handle permission requests + +For agents that request tool-use permissions, register a permission listener and reply with `once`, `always`, or `reject`: + +```ts +const session = await sdk.createSession({ + agent: "claude", + mode: "default", +}); + +session.onPermissionRequest((request) => { + console.log(request.toolCall.title, request.availableReplies); + void session.respondPermission(request.id, "once"); +}); + +await session.prompt([ + { type: "text", text: "Create ./permission-example.txt with the text hello." }, +]); +``` + + +### Auto-approving permissions + +To auto-approve all permission requests, respond with `"once"` or `"always"` in your listener: + +```ts +session.onPermissionRequest((request) => { + void session.respondPermission(request.id, "always"); +}); +``` + +See `examples/permissions/src/index.ts` for a complete permissions example that works with Claude and Codex. + + +Some agents like Claude allow configuring permission behavior through modes (e.g. `bypassPermissions`, `acceptEdits`). We recommend leaving the mode as `default` and handling permission decisions explicitly in `onPermissionRequest` instead. + + ## Destroy a session ```ts await sdk.destroySession(session.id); ``` - diff --git a/docs/attachments.mdx b/docs/attachments.mdx index dcfb412..8deac05 100644 --- a/docs/attachments.mdx +++ b/docs/attachments.mdx @@ -58,4 +58,4 @@ Use the filesystem API to upload files, then include file references in prompt c - Use absolute file URIs in `resource_link` blocks. - If `mimeType` is omitted, the agent/runtime may infer a default. -- Support for non-text resources depends on each agent's ACP prompt capabilities. +- Support for non-text resources depends on each agent's prompt capabilities. diff --git a/docs/building-chat-ui.mdx b/docs/building-chat-ui.mdx deleted file mode 100644 index da706ff..0000000 --- a/docs/building-chat-ui.mdx +++ /dev/null @@ -1,370 +0,0 @@ ---- -title: "Building a Chat UI" -description: "Build a chat interface using the universal event stream." -icon: "comments" ---- - -## Setup - -### List agents - -```ts -const { agents } = await client.listAgents(); - -// Each agent exposes feature coverage via `capabilities` to determine what UI to show -const claude = agents.find((a) => a.id === "claude"); -if (claude?.capabilities.permissions) { - // Show permission approval UI -} -if (claude?.capabilities.questions) { - // Show question response UI -} -``` - -### Create a session - -```ts -const sessionId = `session-${crypto.randomUUID()}`; - -await client.createSession(sessionId, { - agent: "claude", - agentMode: "code", // Optional: agent-specific mode - permissionMode: "default", // Optional: "default" | "plan" | "bypass" | "acceptEdits" (Claude: accept edits; Codex: auto-approve file changes; others: default) - model: "claude-sonnet-4", // Optional: model override -}); -``` - -### Send a message - -```ts -await client.postMessage(sessionId, { message: "Hello, world!" }); -``` - -### Stream events - -Three options for receiving events: - -```ts -// Option 1: SSE (recommended for real-time UI) -const stream = client.streamEvents(sessionId, { offset: 0 }); -for await (const event of stream) { - handleEvent(event); -} - -// Option 2: Polling -const { events, hasMore } = await client.getEvents(sessionId, { offset: 0 }); -events.forEach(handleEvent); - -// Option 3: Turn streaming (send + stream in one call) -const stream = client.streamTurn(sessionId, { message: "Hello" }); -for await (const event of stream) { - handleEvent(event); -} -``` - -Use `offset` to track the last seen `sequence` number and resume from where you left off. - ---- - -## Handling Events - -### Bare minimum - -Handle item lifecycle plus turn lifecycle to render a basic chat: - -```ts -type ItemState = { - item: UniversalItem; - deltas: string[]; -}; - -const items = new Map(); -let turnInProgress = false; - -function handleEvent(event: UniversalEvent) { - switch (event.type) { - case "turn.started": { - turnInProgress = true; - break; - } - - case "turn.ended": { - turnInProgress = false; - break; - } - - case "item.started": { - const { item } = event.data as ItemEventData; - items.set(item.item_id, { item, deltas: [] }); - break; - } - - case "item.delta": { - const { item_id, delta } = event.data as ItemDeltaData; - const state = items.get(item_id); - if (state) { - state.deltas.push(delta); - } - break; - } - - case "item.completed": { - const { item } = event.data as ItemEventData; - const state = items.get(item.item_id); - if (state) { - state.item = item; - state.deltas = []; // Clear deltas, use final content - } - break; - } - } -} -``` - -When rendering: -- Use `turnInProgress` for turn-level UI state (disable send button, show global "Agent is responding", etc.). -- Use `item.status === "in_progress"` for per-item streaming state. - -```ts -function renderItem(state: ItemState) { - const { item, deltas } = state; - const isItemLoading = item.status === "in_progress"; - - // For streaming text, combine item content with accumulated deltas - const text = item.content - .filter((p) => p.type === "text") - .map((p) => p.text) - .join(""); - const streamedText = text + deltas.join(""); - - return { - content: streamedText, - isItemLoading, - isTurnLoading: turnInProgress, - role: item.role, - kind: item.kind, - }; -} -``` - -### Extra events - -Handle these for a complete implementation: - -```ts -function handleEvent(event: UniversalEvent) { - switch (event.type) { - // ... bare minimum events above ... - - case "session.started": { - // Session is ready - break; - } - - case "session.ended": { - const { reason, terminated_by } = event.data as SessionEndedData; - // Disable input, show end reason - // reason: "completed" | "error" | "terminated" - // terminated_by: "agent" | "daemon" - break; - } - - case "error": { - const { message, code } = event.data as ErrorData; - // Display error to user - break; - } - - case "agent.unparsed": { - const { error, location } = event.data as AgentUnparsedData; - // Parsing failure - treat as bug in development - console.error(`Parse error at ${location}: ${error}`); - break; - } - } -} -``` - -### Content parts - -Each item has `content` parts. Render based on `type`: - -```ts -function renderContentPart(part: ContentPart) { - switch (part.type) { - case "text": - return {part.text}; - - case "tool_call": - return ; - - case "tool_result": - return ; - - case "file_ref": - return ; - - case "reasoning": - return {part.text}; - - case "status": - return ; - - case "image": - return ; - } -} -``` - ---- - -## Handling Permissions - -When `permission.requested` arrives, show an approval UI: - -```ts -const pendingPermissions = new Map(); - -function handleEvent(event: UniversalEvent) { - if (event.type === "permission.requested") { - const data = event.data as PermissionEventData; - pendingPermissions.set(data.permission_id, data); - } - - if (event.type === "permission.resolved") { - const data = event.data as PermissionEventData; - pendingPermissions.delete(data.permission_id); - } -} - -// User clicks approve/deny -async function replyPermission(id: string, reply: "once" | "always" | "reject") { - await client.replyPermission(sessionId, id, { reply }); - pendingPermissions.delete(id); -} -``` - -Render permission requests: - -```ts -function PermissionRequest({ data }: { data: PermissionEventData }) { - return ( -
-

Allow: {data.action}

- - - -
- ); -} -``` - ---- - -## Handling Questions - -When `question.requested` arrives, show a selection UI: - -```ts -const pendingQuestions = new Map(); - -function handleEvent(event: UniversalEvent) { - if (event.type === "question.requested") { - const data = event.data as QuestionEventData; - pendingQuestions.set(data.question_id, data); - } - - if (event.type === "question.resolved") { - const data = event.data as QuestionEventData; - pendingQuestions.delete(data.question_id); - } -} - -// User selects answer(s) -async function answerQuestion(id: string, answers: string[][]) { - await client.replyQuestion(sessionId, id, { answers }); - pendingQuestions.delete(id); -} - -async function rejectQuestion(id: string) { - await client.rejectQuestion(sessionId, id); - pendingQuestions.delete(id); -} -``` - -Render question requests: - -```ts -function QuestionRequest({ data }: { data: QuestionEventData }) { - const [selected, setSelected] = useState([]); - - return ( -
-

{data.prompt}

- {data.options.map((option) => ( - - ))} - - -
- ); -} -``` - ---- - -## Testing with Mock Agent - -The `mock` agent lets you test UI behaviors without external credentials: - -```ts -await client.createSession("test-session", { agent: "mock" }); -``` - -Send `help` to see available commands: - -| Command | Tests | -|---------|-------| -| `help` | Lists all commands | -| `demo` | Full UI coverage sequence with markers | -| `markdown` | Streaming markdown rendering | -| `tool` | Tool call + result with file refs | -| `status` | Status item updates | -| `image` | Image content part | -| `permission` | Permission request flow | -| `question` | Question request flow | -| `error` | Error + unparsed events | -| `end` | Session ended event | -| `echo ` | Echo text as assistant message | - -Any unrecognized text is echoed back as an assistant message. - ---- - -## Reference Implementation - -The [Inspector UI](https://github.com/rivet-dev/sandbox-agent/blob/main/frontend/packages/inspector/src/App.tsx) -is a complete reference showing session management, event rendering, and HITL flows. diff --git a/docs/cli.mdx b/docs/cli.mdx index 8d2e36e..6177fb3 100644 --- a/docs/cli.mdx +++ b/docs/cli.mdx @@ -39,20 +39,24 @@ Notes: ## install-agent -Install or reinstall a single agent. +Install or reinstall a single agent, or every supported agent with `--all`. ```bash -sandbox-agent install-agent [OPTIONS] +sandbox-agent install-agent [] [OPTIONS] ``` | Option | Description | |--------|-------------| +| `--all` | Install every supported agent | | `-r, --reinstall` | Force reinstall | -| `--agent-version ` | Override agent package version | -| `--agent-process-version ` | Override agent process version | +| `--agent-version ` | Override agent package version (conflicts with `--all`) | +| `--agent-process-version ` | Override agent process version (conflicts with `--all`) | + +Examples: ```bash sandbox-agent install-agent claude --reinstall +sandbox-agent install-agent --all ``` ### Custom Pi implementation path @@ -214,7 +218,7 @@ sandbox-agent api agents list #### api agents report -Emit a JSON report of available models, modes, and thought levels for every agent. Calls `GET /v1/agents?config=true` and groups each agent's config options by category. +Emit a JSON report of available models, modes, and thought levels for every agent, grouped by category. ```bash sandbox-agent api agents report --endpoint http://127.0.0.1:2468 | jq . diff --git a/docs/credentials.mdx b/docs/credentials.mdx deleted file mode 100644 index d014921..0000000 --- a/docs/credentials.mdx +++ /dev/null @@ -1,115 +0,0 @@ ---- -title: "Credentials" -description: "How Sandbox Agent discovers and uses provider credentials." ---- - -Sandbox Agent discovers API credentials from environment variables and local agent config files. -These credentials are passed through to underlying agent runtimes. - -## Credential sources - -Credentials are discovered in priority order. - -### Environment variables (highest priority) - -API keys first: - -| Variable | Provider | -|----------|----------| -| `ANTHROPIC_API_KEY` | Anthropic | -| `CLAUDE_API_KEY` | Anthropic fallback | -| `OPENAI_API_KEY` | OpenAI | -| `CODEX_API_KEY` | OpenAI fallback | - -OAuth tokens (used when OAuth extraction is enabled): - -| Variable | Provider | -|----------|----------| -| `CLAUDE_CODE_OAUTH_TOKEN` | Anthropic | -| `ANTHROPIC_AUTH_TOKEN` | Anthropic fallback | - -### Agent config files - -| Agent | Config path | Provider | -|-------|-------------|----------| -| Amp | `~/.amp/config.json` | Anthropic | -| Claude Code | `~/.claude.json`, `~/.claude/.credentials.json` | Anthropic | -| Codex | `~/.codex/auth.json` | OpenAI | -| OpenCode | `~/.local/share/opencode/auth.json` | Anthropic/OpenAI | - -## Provider requirements by agent - -| Agent | Required provider | -|-------|-------------------| -| Claude Code | Anthropic | -| Amp | Anthropic | -| Codex | OpenAI | -| OpenCode | Anthropic or OpenAI | -| Mock | None | - -## Error handling behavior - -Credential extraction is best-effort: - -- Missing or malformed files are skipped. -- Discovery continues to later sources. -- Missing credentials mark providers unavailable instead of failing server startup. - -When prompting, Sandbox Agent does not pre-validate provider credentials. Agent-native authentication errors surface through session events/output. - -## Checking credential status - -### API - -`GET /v1/agents` includes `credentialsAvailable` per agent. - -```json -{ - "agents": [ - { - "id": "claude", - "installed": true, - "credentialsAvailable": true - }, - { - "id": "codex", - "installed": true, - "credentialsAvailable": false - } - ] -} -``` - -### TypeScript SDK - -```typescript -const result = await sdk.listAgents(); - -for (const agent of result.agents) { - console.log(`${agent.id}: ${agent.credentialsAvailable ? "authenticated" : "no credentials"}`); -} -``` - -## Passing credentials explicitly - -Set environment variables before starting Sandbox Agent: - -```bash -export ANTHROPIC_API_KEY=sk-ant-... -export OPENAI_API_KEY=sk-... -sandbox-agent daemon start -``` - -Or with SDK-managed local spawn: - -```typescript -import { SandboxAgent } from "sandbox-agent"; - -const sdk = await SandboxAgent.start({ - spawn: { - env: { - ANTHROPIC_API_KEY: process.env.MY_ANTHROPIC_KEY, - }, - }, -}); -``` diff --git a/docs/deploy/cloudflare.mdx b/docs/deploy/cloudflare.mdx index 0dc1d1f..deca490 100644 --- a/docs/deploy/cloudflare.mdx +++ b/docs/deploy/cloudflare.mdx @@ -115,8 +115,8 @@ This keeps all Sandbox Agent calls inside the Cloudflare sandbox routing path an ## Troubleshooting streaming updates If you only receive: -- outbound `session/prompt` -- final `{ stopReason: "end_turn" }` +- the outbound prompt request +- the final `{ stopReason: "end_turn" }` response then the streamed update channel dropped. In Cloudflare sandbox paths, this is typically caused by forwarding `AbortSignal` from SDK fetch init into `containerFetch(...)`. diff --git a/docs/deploy/docker.mdx b/docs/deploy/docker.mdx index 988382a..030ddc9 100644 --- a/docs/deploy/docker.mdx +++ b/docs/deploy/docker.mdx @@ -9,18 +9,18 @@ Docker is not recommended for production isolation of untrusted workloads. Use d ## Quick start -Run Sandbox Agent with agents pre-installed: +Run the published full image with all supported agents pre-installed: ```bash docker run --rm -p 3000:3000 \ -e ANTHROPIC_API_KEY="$ANTHROPIC_API_KEY" \ -e OPENAI_API_KEY="$OPENAI_API_KEY" \ - alpine:latest sh -c "\ - apk add --no-cache curl ca-certificates libstdc++ libgcc bash nodejs npm && \ - curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh && \ - sandbox-agent server --no-token --host 0.0.0.0 --port 3000" + rivetdev/sandbox-agent:0.3.1-full \ + server --no-token --host 0.0.0.0 --port 3000 ``` +The `0.3.1-full` tag pins the exact version. The moving `full` tag is also published for contributors who want the latest full image. + ## TypeScript with dockerode ```typescript @@ -31,14 +31,8 @@ const docker = new Docker(); const PORT = 3000; const container = await docker.createContainer({ - Image: "node:22-bookworm-slim", - Cmd: ["sh", "-c", [ - "apt-get update", - "DEBIAN_FRONTEND=noninteractive apt-get install -y curl ca-certificates bash libstdc++6", - "rm -rf /var/lib/apt/lists/*", - "curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh", - `sandbox-agent server --no-token --host 0.0.0.0 --port ${PORT}`, - ].join(" && ")], + Image: "rivetdev/sandbox-agent:0.3.1-full", + Cmd: ["server", "--no-token", "--host", "0.0.0.0", "--port", `${PORT}`], Env: [ `ANTHROPIC_API_KEY=${process.env.ANTHROPIC_API_KEY}`, `OPENAI_API_KEY=${process.env.OPENAI_API_KEY}`, @@ -60,6 +54,29 @@ const session = await sdk.createSession({ agent: "codex" }); await session.prompt([{ type: "text", text: "Summarize this repository." }]); ``` +## Building a custom image with everything preinstalled + +If you need to extend your own base image, install Sandbox Agent and preinstall every supported agent in one step: + +```dockerfile +FROM node:22-bookworm-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + bash ca-certificates curl git && \ + rm -rf /var/lib/apt/lists/* + +RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.3.x/install.sh | sh && \ + sandbox-agent install-agent --all + +RUN useradd -m -s /bin/bash sandbox +USER sandbox +WORKDIR /home/sandbox + +EXPOSE 2468 +ENTRYPOINT ["sandbox-agent"] +CMD ["server", "--host", "0.0.0.0", "--port", "2468"] +``` + ## Building from source ```bash diff --git a/docs/deploy/foundry-self-hosting.mdx b/docs/deploy/foundry-self-hosting.mdx new file mode 100644 index 0000000..8fd43ae --- /dev/null +++ b/docs/deploy/foundry-self-hosting.mdx @@ -0,0 +1,155 @@ +--- +title: "Foundry Self-Hosting" +description: "Environment, credentials, and deployment setup for Sandbox Agent Foundry auth, GitHub, and billing." +--- + +This guide documents the deployment contract for the Foundry product surface: app auth, GitHub onboarding, repository import, and billing. + +It also covers the local-development bootstrap that uses `.env.development` only when `NODE_ENV=development`. + +## Local Development + +For backend local development, the Foundry backend now supports a development-only dotenv bootstrap: + +- It loads `.env.development.local` and `.env.development` +- It does this **only** when `NODE_ENV=development` +- It does **not** load dotenv files in production + +The example file lives at [`/.env.development.example`](https://github.com/rivet-dev/sandbox-agent/blob/main/.env.development.example). + +To use it locally: + +```bash +cp .env.development.example .env.development +``` + +Run the backend with: + +```bash +just foundry-backend-start +``` + +That recipe sets `NODE_ENV=development`, which enables the dotenv loader. + +### Local Defaults + +These values can be safely defaulted for local development: + +- `APP_URL=http://localhost:4173` +- `BETTER_AUTH_URL=http://localhost:7741` +- `BETTER_AUTH_SECRET=sandbox-agent-foundry-development-only-change-me` +- `GITHUB_REDIRECT_URI=http://localhost:7741/v1/auth/callback/github` + +These should be treated as development-only values. + +## Production Environment + +For production or self-hosting, set these as real environment variables in your deployment platform. Do not rely on dotenv file loading. + +### App/Auth + +| Variable | Required | Notes | +|---|---:|---| +| `APP_URL` | Yes | Public frontend origin | +| `BETTER_AUTH_URL` | Yes | Public auth base URL | +| `BETTER_AUTH_SECRET` | Yes | Strong random secret for auth/session signing | + +### GitHub OAuth + +| Variable | Required | Notes | +|---|---:|---| +| `GITHUB_CLIENT_ID` | Yes | GitHub OAuth app client id | +| `GITHUB_CLIENT_SECRET` | Yes | GitHub OAuth app client secret | +| `GITHUB_REDIRECT_URI` | Yes | GitHub OAuth callback URL | + +Use GitHub OAuth for: + +- user sign-in +- user identity +- org selection +- access to the signed-in user’s GitHub context + +## GitHub App + +If your Foundry deployment uses GitHub App-backed organization install and repo import, also configure: + +| Variable | Required | Notes | +|---|---:|---| +| `GITHUB_APP_ID` | Yes | GitHub App id | +| `GITHUB_APP_CLIENT_ID` | Yes | GitHub App client id | +| `GITHUB_APP_CLIENT_SECRET` | Yes | GitHub App client secret | +| `GITHUB_APP_PRIVATE_KEY` | Yes | PEM private key for installation auth | + +For `.env.development` and `.env.development.local`, store `GITHUB_APP_PRIVATE_KEY` as a quoted single-line value with `\n` escapes instead of raw multi-line PEM text. + +Recommended GitHub App permissions: + +- Repository `Metadata: Read` +- Repository `Contents: Read & Write` +- Repository `Pull requests: Read & Write` +- Repository `Checks: Read` +- Repository `Commit statuses: Read` + +Set the webhook URL to `https:///v1/webhooks/github` and generate a webhook secret. Store the secret as `GITHUB_WEBHOOK_SECRET`. + +This is required, not optional. Foundry depends on GitHub App webhook delivery for installation lifecycle changes, repo access changes, and ongoing repo / pull request sync. If the GitHub App is not installed for the workspace, or webhook delivery is misconfigured, Foundry will remain in an install / reconnect state and core GitHub-backed functionality will not work correctly. + +Recommended webhook subscriptions: + +- `installation` +- `installation_repositories` +- `pull_request` +- `pull_request_review` +- `pull_request_review_comment` +- `push` +- `create` +- `delete` +- `check_suite` +- `check_run` +- `status` + +Use the GitHub App for: + +- installation/reconnect state +- org repo import +- repository sync +- PR creation and updates + +Use GitHub OAuth for: + +- who the user is +- which orgs they can choose + +## Stripe + +For live billing, configure: + +| Variable | Required | Notes | +|---|---:|---| +| `STRIPE_SECRET_KEY` | Yes | Server-side Stripe secret key | +| `STRIPE_PUBLISHABLE_KEY` | Yes | Client-side Stripe publishable key | +| `STRIPE_WEBHOOK_SECRET` | Yes | Signing secret for billing webhooks | +| `STRIPE_PRICE_TEAM` | Yes | Stripe price id for the Team plan checkout session | + +Stripe should own: + +- hosted checkout +- billing portal +- subscription status +- invoice history +- webhook-driven state sync + +## Mock Invariant + +Foundry’s mock client path should continue to work end to end even when the real auth/GitHub/Stripe path exists. + +That includes: + +- sign-in +- org selection/import +- settings +- billing UI +- workspace/task/session flow +- seat accrual + +Use mock mode for deterministic UI review and local product development. Use the real env-backed path for integration and self-hosting. diff --git a/docs/deploy/modal.mdx b/docs/deploy/modal.mdx new file mode 100644 index 0000000..cb081b0 --- /dev/null +++ b/docs/deploy/modal.mdx @@ -0,0 +1,97 @@ +--- +title: "Modal" +description: "Deploy Sandbox Agent inside a Modal sandbox." +--- + +## Prerequisites + +- `MODAL_TOKEN_ID` and `MODAL_TOKEN_SECRET` from [modal.com/settings](https://modal.com/settings) +- `ANTHROPIC_API_KEY` or `OPENAI_API_KEY` + +## TypeScript example + +```typescript +import { ModalClient } from "modal"; +import { SandboxAgent } from "sandbox-agent"; + +const modal = new ModalClient(); +const app = await modal.apps.fromName("sandbox-agent", { createIfMissing: true }); + +const image = modal.images + .fromRegistry("ubuntu:22.04") + .dockerfileCommands([ + "RUN apt-get update && apt-get install -y curl ca-certificates", + "RUN curl -fsSL https://releases.rivet.dev/sandbox-agent/0.2.x/install.sh | sh", + ]); + +const envs: Record = {}; +if (process.env.ANTHROPIC_API_KEY) envs.ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; +if (process.env.OPENAI_API_KEY) envs.OPENAI_API_KEY = process.env.OPENAI_API_KEY; + +const secrets = Object.keys(envs).length > 0 + ? [await modal.secrets.fromObject(envs)] + : []; + +const sb = await modal.sandboxes.create(app, image, { + encryptedPorts: [3000], + secrets, +}); + +const exec = async (cmd: string) => { + const p = await sb.exec(["bash", "-c", cmd], { stdout: "pipe", stderr: "pipe" }); + const exitCode = await p.wait(); + if (exitCode !== 0) { + const stderr = await p.stderr.readText(); + throw new Error(`Command failed (exit ${exitCode}): ${cmd}\n${stderr}`); + } +}; + +await exec("sandbox-agent install-agent claude"); +await exec("sandbox-agent install-agent codex"); + +await sb.exec( + ["bash", "-c", "sandbox-agent server --no-token --host 0.0.0.0 --port 3000 &"], +); + +const tunnels = await sb.tunnels(); +const baseUrl = tunnels[3000].url; + +const sdk = await SandboxAgent.connect({ baseUrl }); + +const session = await sdk.createSession({ agent: "claude" }); +const off = session.onEvent((event) => { + console.log(event.sender, event.payload); +}); + +await session.prompt([{ type: "text", text: "Summarize this repository" }]); +off(); + +await sb.terminate(); +``` + +## Faster cold starts + +Modal caches image layers, so the `dockerfileCommands` that install `curl` and `sandbox-agent` only run on the first build. Subsequent sandbox creates reuse the cached image. + +## Running the test + +The example includes a health-check test. First, build the SDK: + +```bash +pnpm --filter sandbox-agent build +``` + +Then run the test with your Modal credentials: + +```bash +MODAL_TOKEN_ID= MODAL_TOKEN_SECRET= npx vitest run +``` + +Run from `examples/modal/`. The test will skip if credentials are not set. + +## Notes + +- Modal sandboxes use [gVisor](https://gvisor.dev/) for strong isolation. +- Ports are exposed via encrypted tunnels (`encryptedPorts`). Use `sb.tunnels()` to get the public HTTPS URL. +- Environment variables (API keys) are passed as Modal [Secrets](https://modal.com/docs/guide/secrets) rather than plain env vars for security. +- Always call `sb.terminate()` when done to avoid leaking sandbox resources. diff --git a/docs/docs.json b/docs/docs.json index 2d57276..9ba082c 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -1,131 +1,119 @@ { - "$schema": "https://mintlify.com/docs.json", - "theme": "willow", - "name": "Sandbox Agent SDK", - "appearance": { - "default": "dark", - "strict": true - }, - "colors": { - "primary": "#ff4f00", - "light": "#ff4f00", - "dark": "#ff4f00" - }, - "favicon": "/favicon.svg", - "logo": { - "light": "/logo/light.svg", - "dark": "/logo/dark.svg" - }, - "integrations": { - "posthog": { - "apiKey": "phc_6kfTNEAVw7rn1LA51cO3D69FefbKupSWFaM7OUgEpEo", - "apiHost": "https://ph.rivet.gg", - "sessionRecording": true - } - }, - "navbar": { - "links": [ - { - "label": "Gigacode", - "icon": "terminal", - "href": "https://github.com/rivet-dev/sandbox-agent/tree/main/gigacode" - }, - { - "label": "Discord", - "icon": "discord", - "href": "https://discord.gg/auCecybynK" - }, - { - "type": "github", - "href": "https://github.com/rivet-dev/sandbox-agent" - } - ] - }, - "navigation": { - "tabs": [ - { - "tab": "Documentation", - "pages": [ - { - "group": "Getting started", - "pages": [ - "quickstart", - "sdk-overview", - "react-components", - { - "group": "Deploy", - "icon": "server", - "pages": [ - "deploy/local", - "deploy/computesdk", - "deploy/e2b", - "deploy/daytona", - "deploy/vercel", - "deploy/cloudflare", - "deploy/docker", - "deploy/boxlite" - ] - } - ] - }, - { - "group": "Agent", - "pages": [ - "agent-sessions", - "attachments", - "skills-config", - "mcp-config", - "custom-tools" - ] - }, - { - "group": "System", - "pages": ["file-system", "processes"] - }, - { - "group": "Orchestration", - "pages": [ - "architecture", - "session-persistence", - "observability", - "multiplayer", - "security" - ] - }, - { - "group": "Reference", - "pages": [ - "agent-capabilities", - "cli", - "inspector", - "opencode-compatibility", - { - "group": "More", - "pages": [ - "credentials", - "daemon", - "cors", - "session-restoration", - "telemetry", - { - "group": "AI", - "pages": ["ai/skill", "ai/llms-txt"] - } - ] - } - ] - } - ] - }, - { - "tab": "HTTP API", - "pages": [ - { - "group": "HTTP Reference", - "openapi": "openapi.json" - } - ] - } - ] - } + "$schema": "https://mintlify.com/docs.json", + "theme": "willow", + "name": "Sandbox Agent SDK", + "appearance": { + "default": "dark", + "strict": true + }, + "colors": { + "primary": "#ff4f00", + "light": "#ff4f00", + "dark": "#ff4f00" + }, + "favicon": "/favicon.svg", + "logo": { + "light": "/logo/light.svg", + "dark": "/logo/dark.svg" + }, + "integrations": { + "posthog": { + "apiKey": "phc_6kfTNEAVw7rn1LA51cO3D69FefbKupSWFaM7OUgEpEo", + "apiHost": "https://ph.rivet.gg", + "sessionRecording": true + } + }, + "navbar": { + "links": [ + { + "label": "Gigacode", + "icon": "terminal", + "href": "https://github.com/rivet-dev/sandbox-agent/tree/main/gigacode" + }, + { + "label": "Discord", + "icon": "discord", + "href": "https://discord.gg/auCecybynK" + }, + { + "type": "github", + "href": "https://github.com/rivet-dev/sandbox-agent" + } + ] + }, + "navigation": { + "tabs": [ + { + "tab": "Documentation", + "pages": [ + { + "group": "Getting started", + "pages": [ + "quickstart", + "sdk-overview", + "llm-credentials", + "react-components", + { + "group": "Deploy", + "icon": "server", + "pages": [ + "deploy/local", + "deploy/computesdk", + "deploy/e2b", + "deploy/daytona", + "deploy/vercel", + "deploy/cloudflare", + "deploy/docker", + "deploy/boxlite" + ] + } + ] + }, + { + "group": "Agent", + "pages": ["agent-sessions", "attachments", "skills-config", "mcp-config", "custom-tools"] + }, + { + "group": "System", + "pages": ["file-system", "processes"] + }, + { + "group": "Orchestration", + "pages": ["architecture", "session-persistence", "observability", "multiplayer", "security"] + }, + { + "group": "Reference", + "pages": [ + "agent-capabilities", + "cli", + "inspector", + "opencode-compatibility", + { + "group": "More", + "pages": [ + "daemon", + "cors", + "session-restoration", + "telemetry", + { + "group": "AI", + "pages": ["ai/skill", "ai/llms-txt"] + } + ] + } + ] + } + ] + }, + { + "tab": "HTTP API", + "pages": [ + { + "group": "HTTP Reference", + "openapi": "openapi.json" + } + ] + } + ] + } } diff --git a/docs/inspector.mdx b/docs/inspector.mdx index 06318b2..cc5f3d0 100644 --- a/docs/inspector.mdx +++ b/docs/inspector.mdx @@ -34,6 +34,7 @@ console.log(url); - Event JSON inspector - Prompt testing - Request/response debugging +- Interactive permission prompts (approve, always-allow, or reject tool-use requests) - Process management (create, stop, kill, delete, view logs) - Interactive PTY terminal for tty processes - One-shot command execution diff --git a/docs/llm-credentials.mdx b/docs/llm-credentials.mdx new file mode 100644 index 0000000..e771740 --- /dev/null +++ b/docs/llm-credentials.mdx @@ -0,0 +1,250 @@ +--- +title: "LLM Credentials" +description: "Strategies for providing LLM provider credentials to agents." +icon: "key" +--- + +Sandbox Agent needs LLM provider credentials (Anthropic, OpenAI, etc.) to run agent sessions. + +## Configuration + +Pass credentials via `spawn.env` when starting a sandbox. Each call to `SandboxAgent.start()` can use different credentials: + +```typescript +import { SandboxAgent } from "sandbox-agent"; + +const sdk = await SandboxAgent.start({ + spawn: { + env: { + ANTHROPIC_API_KEY: "sk-ant-...", + OPENAI_API_KEY: "sk-...", + }, + }, +}); +``` + +Each agent requires credentials from a specific provider. Sandbox Agent checks environment variables (including those passed via `spawn.env`) and host config files: + +| Agent | Provider | Environment variables | Config files | +|-------|----------|----------------------|--------------| +| Claude Code | Anthropic | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | `~/.claude.json`, `~/.claude/.credentials.json` | +| Amp | Anthropic | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | `~/.amp/config.json` | +| Codex | OpenAI | `OPENAI_API_KEY`, `CODEX_API_KEY` | `~/.codex/auth.json` | +| OpenCode | Anthropic or OpenAI | `ANTHROPIC_API_KEY`, `OPENAI_API_KEY` | `~/.local/share/opencode/auth.json` | +| Mock | None | - | - | + +## Credential strategies + +LLM credentials are passed into the sandbox as environment variables. The agent and everything inside the sandbox has access to the token, so it's important to choose the right strategy for how you provision and scope these credentials. + +| Strategy | Who pays | Cost attribution | Best for | +|----------|----------|-----------------|----------| +| **Per-tenant gateway** (recommended) | Your organization, billed back per tenant | Per-tenant keys with budgets | Multi-tenant SaaS, usage-based billing | +| **Bring your own key** | Each user (usage-based) | Per-user by default | Dev environments, internal tools | +| **Shared API key** | Your organization | None (single bill) | Single-tenant apps, internal platforms | +| **Personal subscription** | Each user (existing subscription) | Per-user by default | Local dev, internal tools where users have Claude or Codex subscriptions | + +### Per-tenant gateway (recommended) + +Route LLM traffic through a gateway that mints per-tenant API keys, each with its own spend tracking and budget limits. + +```mermaid +graph LR + B[Your Backend] -->|tenant key| S[Sandbox] + S -->|LLM requests| G[Gateway] + G -->|scoped key| P[LLM Provider] +``` + +Your backend issues a scoped key per tenant, then passes it to the sandbox. This is the typical pattern when using sandbox providers (E2B, Daytona, Docker). + +```typescript expandable +import { SandboxAgent } from "sandbox-agent"; + +async function createTenantSandbox(tenantId: string) { + // Issue a scoped key for this tenant via OpenRouter + const res = await fetch("https://openrouter.ai/api/v1/keys", { + method: "POST", + headers: { + Authorization: `Bearer ${process.env.OPENROUTER_PROVISIONING_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + name: `tenant-${tenantId}`, + limit: 50, + limitResetType: "monthly", + }), + }); + const { key } = await res.json(); + + // Start a sandbox with the tenant's scoped key + const sdk = await SandboxAgent.start({ + spawn: { + env: { + OPENAI_API_KEY: key, // OpenRouter uses OpenAI-compatible endpoints + }, + }, + }); + + const session = await sdk.createSession({ + agent: "claude", + sessionInit: { cwd: "/workspace" }, + }); + + return { sdk, session }; +} +``` + +#### Security + +Recommended for multi-tenant applications. Each tenant gets a scoped key with its own budget, so exfiltration only exposes that tenant's allowance. + +#### Use cases + +- **Multi-tenant SaaS**: per-tenant spend tracking and budget limits +- **Production apps**: exposed to end users who need isolated credentials +- **Usage-based billing**: each tenant pays for their own consumption + +#### Choosing a gateway + + + + + +Managed service, zero infrastructure. [OpenRouter](https://openrouter.ai/docs/features/provisioning-api-keys) provides per-tenant API keys with spend tracking and budget limits via their Provisioning API. Pass the tenant key to Sandbox Agent as `OPENAI_API_KEY` (OpenRouter uses OpenAI-compatible endpoints). + +```bash +# Create a key for a tenant with a $50/month budget +curl https://openrouter.ai/api/v1/keys \ + -H "Authorization: Bearer $PROVISIONING_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "tenant-acme", + "limit": 50, + "limitResetType": "monthly" + }' +``` + +Easiest to set up but not open-source. See [OpenRouter pricing](https://openrouter.ai/docs/framework/pricing) for details. + + + + + +Self-hosted, open-source (MIT). [LiteLLM](https://github.com/BerriAI/litellm) is an OpenAI-compatible proxy with hierarchical budgets (org, team, user, key), virtual keys, and spend tracking. Requires Python + PostgreSQL. + +```bash +# Create a team (tenant) with a $500 budget +curl http://litellm:4000/team/new \ + -H "Authorization: Bearer $LITELLM_MASTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "team_alias": "tenant-acme", + "max_budget": 500 + }' + +# Generate a key for that team +curl http://litellm:4000/key/generate \ + -H "Authorization: Bearer $LITELLM_MASTER_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "team_id": "team-abc123", + "max_budget": 100 + }' +``` + +Full control with no vendor lock-in. Organization-level features require an enterprise license. + + + + + +Self-hosted, open-source (Apache 2.0). [Portkey](https://github.com/Portkey-AI/gateway) is a lightweight OpenAI-compatible gateway supporting 200+ providers. Single binary, no database required. Create virtual keys with per-tenant budget limits and pass them to Sandbox Agent. + +Lightest operational footprint of the self-hosted options. Observability and analytics require the managed platform or your own tooling. + + + + + +To bill tenants for LLM usage, use [Stripe token billing](https://docs.stripe.com/billing/token-billing) (integrates natively with OpenRouter) or query your gateway's spend API and feed usage into your billing system. + +### Bring your own key + +Each user provides their own API key. Users are billed directly by the LLM provider with no additional infrastructure needed. + +Pass the user's key via `spawn.env`: + +```typescript +const sdk = await SandboxAgent.start({ + spawn: { + env: { + ANTHROPIC_API_KEY: userProvidedKey, + }, + }, +}); +``` + +#### Security + +API keys are typically long-lived. The key is visible to the agent and anything running inside the sandbox, so exfiltration is possible. This is usually acceptable for developer-facing tools where the user owns the key. + +#### Use cases + +- **Developer tools**: each user manages their own API key +- **Internal platforms**: users already have LLM provider accounts +- **Per-user billing**: no extra infrastructure needed + +### Shared credentials + +A single organization-wide API key is used for all sessions. All token usage appears on one bill with no per-user or per-tenant cost attribution. + +```typescript +const sdk = await SandboxAgent.start({ + spawn: { + env: { + ANTHROPIC_API_KEY: process.env.ORG_ANTHROPIC_KEY!, + OPENAI_API_KEY: process.env.ORG_OPENAI_KEY!, + }, + }, +}); +``` + +If you need to track or limit spend per tenant, use a per-tenant gateway instead. + +#### Security + +Not recommended for anything other than internal tooling. A single exfiltrated key exposes your organization's entire LLM budget. If you need org-paid credentials for external users, use a per-tenant gateway with scoped keys instead. + +#### Use cases + +- **Single-tenant apps**: small number of users, one bill +- **Prototyping**: cost attribution not needed yet +- **Simplicity over security**: acceptable when exfiltration risk is low + +### Personal subscription + +If the user is signed into Claude Code or Codex on the host machine, Sandbox Agent automatically picks up their OAuth tokens. No configuration is needed. + +#### Remote sandboxes + +Extract credentials locally and pass them to a remote sandbox via `spawn.env`: + +```bash +$ sandbox-agent credentials extract-env +ANTHROPIC_API_KEY=sk-ant-... +CLAUDE_API_KEY=sk-ant-... +OPENAI_API_KEY=sk-... +CODEX_API_KEY=sk-... +``` + +Use `-e` to prefix with `export` for shell sourcing. + +#### Security + +Personal subscriptions use OAuth tokens with a limited lifespan. These are the same credentials used when running an agent normally on the host. If a token is exfiltrated from the sandbox, the exposure window is short. + +#### Use cases + +- **Local development**: users are already signed into Claude Code or Codex +- **Internal tools**: every user has their own subscription +- **Prototyping**: no key management needed \ No newline at end of file diff --git a/docs/manage-sessions.mdx b/docs/manage-sessions.mdx index 7d51538..c39dd04 100644 --- a/docs/manage-sessions.mdx +++ b/docs/manage-sessions.mdx @@ -6,8 +6,6 @@ icon: "database" Sandbox Agent stores sessions in memory only. When the server restarts or the sandbox is destroyed, all session data is lost. It's your responsibility to persist events to your own database. -See the [Building a Chat UI](/building-chat-ui) guide for understanding session lifecycle events like `session.started` and `session.ended`. - ## Recommended approach 1. Store events to your database as they arrive @@ -18,11 +16,11 @@ This prevents duplicate writes and lets you recover from disconnects. ## Receiving Events -Two ways to receive events: SSE streaming (recommended) or polling. +Two ways to receive events: streaming (recommended) or polling. ### Streaming -Use SSE for real-time events with automatic reconnection support. +Use streaming for real-time events with automatic reconnection support. ```typescript import { SandboxAgentClient } from "sandbox-agent"; @@ -44,7 +42,7 @@ for await (const event of client.streamEvents("my-session", { offset })) { ### Polling -If you can't use SSE streaming, poll the events endpoint: +If you can't use streaming, poll the events endpoint: ```typescript const lastEvent = await db.getLastEvent("my-session"); @@ -244,7 +242,7 @@ const events = await redis.lrange(`session:${sessionId}`, offset, -1); ## Handling disconnects -The SSE stream may disconnect due to network issues. Handle reconnection gracefully: +The event stream may disconnect due to network issues. Handle reconnection gracefully: ```typescript async function streamWithRetry(sessionId: string) { diff --git a/docs/openapi.json b/docs/openapi.json index b399f74..f2bd640 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -10,7 +10,7 @@ "license": { "name": "Apache-2.0" }, - "version": "0.3.0" + "version": "0.3.2" }, "servers": [ { @@ -20,9 +20,7 @@ "paths": { "/v1/acp": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_acp_servers", "responses": { "200": { @@ -40,9 +38,7 @@ }, "/v1/acp/{server_id}": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_acp", "parameters": [ { @@ -92,9 +88,7 @@ } }, "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "post_v1_acp", "parameters": [ { @@ -204,9 +198,7 @@ } }, "delete": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "delete_v1_acp", "parameters": [ { @@ -228,9 +220,7 @@ }, "/v1/agents": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_agents", "parameters": [ { @@ -280,9 +270,7 @@ }, "/v1/agents/{agent}": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_agent", "parameters": [ { @@ -351,9 +339,7 @@ }, "/v1/agents/{agent}/install": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "post_v1_agent_install", "parameters": [ { @@ -412,9 +398,7 @@ }, "/v1/config/mcp": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_config_mcp", "parameters": [ { @@ -460,9 +444,7 @@ } }, "put": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "put_v1_config_mcp", "parameters": [ { @@ -501,9 +483,7 @@ } }, "delete": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "delete_v1_config_mcp", "parameters": [ { @@ -534,9 +514,7 @@ }, "/v1/config/skills": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_config_skills", "parameters": [ { @@ -582,9 +560,7 @@ } }, "put": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "put_v1_config_skills", "parameters": [ { @@ -623,9 +599,7 @@ } }, "delete": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "delete_v1_config_skills", "parameters": [ { @@ -656,9 +630,7 @@ }, "/v1/fs/entries": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_fs_entries", "parameters": [ { @@ -691,9 +663,7 @@ }, "/v1/fs/entry": { "delete": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "delete_v1_fs_entry", "parameters": [ { @@ -732,9 +702,7 @@ }, "/v1/fs/file": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_fs_file", "parameters": [ { @@ -754,9 +722,7 @@ } }, "put": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "put_v1_fs_file", "parameters": [ { @@ -796,9 +762,7 @@ }, "/v1/fs/mkdir": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "post_v1_fs_mkdir", "parameters": [ { @@ -827,9 +791,7 @@ }, "/v1/fs/move": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "post_v1_fs_move", "requestBody": { "content": { @@ -857,9 +819,7 @@ }, "/v1/fs/stat": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_fs_stat", "parameters": [ { @@ -888,9 +848,7 @@ }, "/v1/fs/upload-batch": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "post_v1_fs_upload_batch", "parameters": [ { @@ -931,9 +889,7 @@ }, "/v1/health": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "get_v1_health", "responses": { "200": { @@ -951,9 +907,7 @@ }, "/v1/processes": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "List all managed processes.", "description": "Returns a list of all processes (running and exited) currently tracked\nby the runtime, sorted by process ID.", "operationId": "get_v1_processes", @@ -981,9 +935,7 @@ } }, "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Create a long-lived managed process.", "description": "Spawns a new process with the given command and arguments. Supports both\npipe-based and PTY (tty) modes. Returns the process descriptor on success.", "operationId": "post_v1_processes", @@ -1043,9 +995,7 @@ }, "/v1/processes/config": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Get process runtime configuration.", "description": "Returns the current runtime configuration for the process management API,\nincluding limits for concurrency, timeouts, and buffer sizes.", "operationId": "get_v1_processes_config", @@ -1073,9 +1023,7 @@ } }, "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Update process runtime configuration.", "description": "Replaces the runtime configuration for the process management API.\nValidates that all values are non-zero and clamps default timeout to max.", "operationId": "post_v1_processes_config", @@ -1125,9 +1073,7 @@ }, "/v1/processes/run": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Run a one-shot command.", "description": "Executes a command to completion and returns its stdout, stderr, exit code,\nand duration. Supports configurable timeout and output size limits.", "operationId": "post_v1_processes_run", @@ -1177,9 +1123,7 @@ }, "/v1/processes/{id}": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Get a single process by ID.", "description": "Returns the current state of a managed process including its status,\nPID, exit code, and creation/exit timestamps.", "operationId": "get_v1_process", @@ -1228,9 +1172,7 @@ } }, "delete": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Delete a process record.", "description": "Removes a stopped process from the runtime. Returns 409 if the process\nis still running; stop or kill it first.", "operationId": "delete_v1_process", @@ -1284,9 +1226,7 @@ }, "/v1/processes/{id}/input": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Write input to a process.", "description": "Sends data to a process's stdin (pipe mode) or PTY writer (tty mode).\nData can be encoded as base64, utf8, or text. Returns 413 if the decoded\npayload exceeds the configured `maxInputBytesPerRequest` limit.", "operationId": "post_v1_process_input", @@ -1367,9 +1307,7 @@ }, "/v1/processes/{id}/kill": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Send SIGKILL to a process.", "description": "Sends SIGKILL to the process and optionally waits up to `waitMs`\nmilliseconds for the process to exit before returning.", "operationId": "post_v1_process_kill", @@ -1432,9 +1370,7 @@ }, "/v1/processes/{id}/logs": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Fetch process logs.", "description": "Returns buffered log entries for a process. Supports filtering by stream\ntype, tail count, and sequence-based resumption. When `follow=true`,\nreturns an SSE stream that replays buffered entries then streams live output.", "operationId": "get_v1_process_logs", @@ -1532,9 +1468,7 @@ }, "/v1/processes/{id}/stop": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Send SIGTERM to a process.", "description": "Sends SIGTERM to the process and optionally waits up to `waitMs`\nmilliseconds for the process to exit before returning.", "operationId": "post_v1_process_stop", @@ -1597,9 +1531,7 @@ }, "/v1/processes/{id}/terminal/resize": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Resize a process terminal.", "description": "Sets the PTY window size (columns and rows) for a tty-mode process and\nsends SIGWINCH so the child process can adapt.", "operationId": "post_v1_process_terminal_resize", @@ -1680,9 +1612,7 @@ }, "/v1/processes/{id}/terminal/ws": { "get": { - "tags": [ - "v1" - ], + "tags": ["v1"], "summary": "Open an interactive WebSocket terminal session.", "description": "Upgrades the connection to a WebSocket for bidirectional PTY I/O. Accepts\n`access_token` query param for browser-based auth (WebSocket API cannot\nsend custom headers). Streams raw PTY output as binary frames and accepts\nJSON control frames for input, resize, and close.", "operationId": "get_v1_process_terminal_ws", @@ -1759,9 +1689,7 @@ "schemas": { "AcpEnvelope": { "type": "object", - "required": [ - "jsonrpc" - ], + "required": ["jsonrpc"], "properties": { "error": { "nullable": true @@ -1795,11 +1723,7 @@ }, "AcpServerInfo": { "type": "object", - "required": [ - "serverId", - "agent", - "createdAtMs" - ], + "required": ["serverId", "agent", "createdAtMs"], "properties": { "agent": { "type": "string" @@ -1815,9 +1739,7 @@ }, "AcpServerListResponse": { "type": "object", - "required": [ - "servers" - ], + "required": ["servers"], "properties": { "servers": { "type": "array", @@ -1908,12 +1830,7 @@ }, "AgentInfo": { "type": "object", - "required": [ - "id", - "installed", - "credentialsAvailable", - "capabilities" - ], + "required": ["id", "installed", "credentialsAvailable", "capabilities"], "properties": { "capabilities": { "$ref": "#/components/schemas/AgentCapabilities" @@ -1956,11 +1873,7 @@ }, "AgentInstallArtifact": { "type": "object", - "required": [ - "kind", - "path", - "source" - ], + "required": ["kind", "path", "source"], "properties": { "kind": { "type": "string" @@ -1996,10 +1909,7 @@ }, "AgentInstallResponse": { "type": "object", - "required": [ - "already_installed", - "artifacts" - ], + "required": ["already_installed", "artifacts"], "properties": { "already_installed": { "type": "boolean" @@ -2014,9 +1924,7 @@ }, "AgentListResponse": { "type": "object", - "required": [ - "agents" - ], + "required": ["agents"], "properties": { "agents": { "type": "array", @@ -2049,9 +1957,7 @@ }, "FsActionResponse": { "type": "object", - "required": [ - "path" - ], + "required": ["path"], "properties": { "path": { "type": "string" @@ -2060,9 +1966,7 @@ }, "FsDeleteQuery": { "type": "object", - "required": [ - "path" - ], + "required": ["path"], "properties": { "path": { "type": "string" @@ -2084,12 +1988,7 @@ }, "FsEntry": { "type": "object", - "required": [ - "name", - "path", - "entryType", - "size" - ], + "required": ["name", "path", "entryType", "size"], "properties": { "entryType": { "$ref": "#/components/schemas/FsEntryType" @@ -2113,17 +2012,11 @@ }, "FsEntryType": { "type": "string", - "enum": [ - "file", - "directory" - ] + "enum": ["file", "directory"] }, "FsMoveRequest": { "type": "object", - "required": [ - "from", - "to" - ], + "required": ["from", "to"], "properties": { "from": { "type": "string" @@ -2139,10 +2032,7 @@ }, "FsMoveResponse": { "type": "object", - "required": [ - "from", - "to" - ], + "required": ["from", "to"], "properties": { "from": { "type": "string" @@ -2154,9 +2044,7 @@ }, "FsPathQuery": { "type": "object", - "required": [ - "path" - ], + "required": ["path"], "properties": { "path": { "type": "string" @@ -2165,11 +2053,7 @@ }, "FsStat": { "type": "object", - "required": [ - "path", - "entryType", - "size" - ], + "required": ["path", "entryType", "size"], "properties": { "entryType": { "$ref": "#/components/schemas/FsEntryType" @@ -2199,10 +2083,7 @@ }, "FsUploadBatchResponse": { "type": "object", - "required": [ - "paths", - "truncated" - ], + "required": ["paths", "truncated"], "properties": { "paths": { "type": "array", @@ -2217,10 +2098,7 @@ }, "FsWriteResponse": { "type": "object", - "required": [ - "path", - "bytesWritten" - ], + "required": ["path", "bytesWritten"], "properties": { "bytesWritten": { "type": "integer", @@ -2234,9 +2112,7 @@ }, "HealthResponse": { "type": "object", - "required": [ - "status" - ], + "required": ["status"], "properties": { "status": { "type": "string" @@ -2245,10 +2121,7 @@ }, "McpConfigQuery": { "type": "object", - "required": [ - "directory", - "mcpName" - ], + "required": ["directory", "mcpName"], "properties": { "directory": { "type": "string" @@ -2262,10 +2135,7 @@ "oneOf": [ { "type": "object", - "required": [ - "command", - "type" - ], + "required": ["command", "type"], "properties": { "args": { "type": "array", @@ -2299,18 +2169,13 @@ }, "type": { "type": "string", - "enum": [ - "local" - ] + "enum": ["local"] } } }, { "type": "object", - "required": [ - "url", - "type" - ], + "required": ["url", "type"], "properties": { "bearerTokenEnvVar": { "type": "string", @@ -2358,9 +2223,7 @@ }, "type": { "type": "string", - "enum": [ - "remote" - ] + "enum": ["remote"] }, "url": { "type": "string" @@ -2374,11 +2237,7 @@ }, "ProblemDetails": { "type": "object", - "required": [ - "type", - "title", - "status" - ], + "required": ["type", "title", "status"], "properties": { "detail": { "type": "string", @@ -2404,14 +2263,7 @@ }, "ProcessConfig": { "type": "object", - "required": [ - "maxConcurrentProcesses", - "defaultRunTimeoutMs", - "maxRunTimeoutMs", - "maxOutputBytes", - "maxLogBytesPerProcess", - "maxInputBytesPerRequest" - ], + "required": ["maxConcurrentProcesses", "defaultRunTimeoutMs", "maxRunTimeoutMs", "maxOutputBytes", "maxLogBytesPerProcess", "maxInputBytesPerRequest"], "properties": { "defaultRunTimeoutMs": { "type": "integer", @@ -2443,9 +2295,7 @@ }, "ProcessCreateRequest": { "type": "object", - "required": [ - "command" - ], + "required": ["command"], "properties": { "args": { "type": "array", @@ -2476,15 +2326,7 @@ }, "ProcessInfo": { "type": "object", - "required": [ - "id", - "command", - "args", - "tty", - "interactive", - "status", - "createdAtMs" - ], + "required": ["id", "command", "args", "tty", "interactive", "status", "createdAtMs"], "properties": { "args": { "type": "array", @@ -2535,9 +2377,7 @@ }, "ProcessInputRequest": { "type": "object", - "required": [ - "data" - ], + "required": ["data"], "properties": { "data": { "type": "string" @@ -2550,9 +2390,7 @@ }, "ProcessInputResponse": { "type": "object", - "required": [ - "bytesWritten" - ], + "required": ["bytesWritten"], "properties": { "bytesWritten": { "type": "integer", @@ -2562,9 +2400,7 @@ }, "ProcessListResponse": { "type": "object", - "required": [ - "processes" - ], + "required": ["processes"], "properties": { "processes": { "type": "array", @@ -2576,13 +2412,7 @@ }, "ProcessLogEntry": { "type": "object", - "required": [ - "sequence", - "stream", - "timestampMs", - "data", - "encoding" - ], + "required": ["sequence", "stream", "timestampMs", "data", "encoding"], "properties": { "data": { "type": "string" @@ -2634,11 +2464,7 @@ }, "ProcessLogsResponse": { "type": "object", - "required": [ - "processId", - "stream", - "entries" - ], + "required": ["processId", "stream", "entries"], "properties": { "entries": { "type": "array", @@ -2656,18 +2482,11 @@ }, "ProcessLogsStream": { "type": "string", - "enum": [ - "stdout", - "stderr", - "combined", - "pty" - ] + "enum": ["stdout", "stderr", "combined", "pty"] }, "ProcessRunRequest": { "type": "object", - "required": [ - "command" - ], + "required": ["command"], "properties": { "args": { "type": "array", @@ -2703,14 +2522,7 @@ }, "ProcessRunResponse": { "type": "object", - "required": [ - "timedOut", - "stdout", - "stderr", - "stdoutTruncated", - "stderrTruncated", - "durationMs" - ], + "required": ["timedOut", "stdout", "stderr", "stdoutTruncated", "stderrTruncated", "durationMs"], "properties": { "durationMs": { "type": "integer", @@ -2752,17 +2564,11 @@ }, "ProcessState": { "type": "string", - "enum": [ - "running", - "exited" - ] + "enum": ["running", "exited"] }, "ProcessTerminalResizeRequest": { "type": "object", - "required": [ - "cols", - "rows" - ], + "required": ["cols", "rows"], "properties": { "cols": { "type": "integer", @@ -2778,10 +2584,7 @@ }, "ProcessTerminalResizeResponse": { "type": "object", - "required": [ - "cols", - "rows" - ], + "required": ["cols", "rows"], "properties": { "cols": { "type": "integer", @@ -2797,16 +2600,11 @@ }, "ServerStatus": { "type": "string", - "enum": [ - "running", - "stopped" - ] + "enum": ["running", "stopped"] }, "ServerStatusInfo": { "type": "object", - "required": [ - "status" - ], + "required": ["status"], "properties": { "status": { "$ref": "#/components/schemas/ServerStatus" @@ -2821,10 +2619,7 @@ }, "SkillSource": { "type": "object", - "required": [ - "type", - "source" - ], + "required": ["type", "source"], "properties": { "ref": { "type": "string", @@ -2851,9 +2646,7 @@ }, "SkillsConfig": { "type": "object", - "required": [ - "sources" - ], + "required": ["sources"], "properties": { "sources": { "type": "array", @@ -2865,10 +2658,7 @@ }, "SkillsConfigQuery": { "type": "object", - "required": [ - "directory", - "skillName" - ], + "required": ["directory", "skillName"], "properties": { "directory": { "type": "string" @@ -2886,4 +2676,4 @@ "description": "ACP proxy v1 API" } ] -} \ No newline at end of file +} diff --git a/docs/processes.mdx b/docs/processes.mdx index 45c246c..282c0f1 100644 --- a/docs/processes.mdx +++ b/docs/processes.mdx @@ -9,7 +9,7 @@ The process API supports: - **One-shot execution** — run a command to completion and capture stdout, stderr, and exit code - **Managed processes** — spawn, list, stop, kill, and delete long-lived processes -- **Log streaming** — fetch buffered logs or follow live output via SSE +- **Log streaming** — fetch buffered logs or follow live output - **Terminals** — full PTY support with bidirectional WebSocket I/O - **Configurable limits** — control concurrency, timeouts, and buffer sizes per runtime @@ -155,7 +155,7 @@ curl "http://127.0.0.1:2468/v1/processes/proc_1/logs?tail=50&stream=combined" ``` -### Follow logs via SSE +### Follow logs Stream log entries in real time. The subscription replays buffered entries first, then streams new output as it arrives. diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index 25f7d66..caf2c21 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -61,9 +61,11 @@ icon: "rocket" ```bash - docker run -e ANTHROPIC_API_KEY="sk-ant-..." \ + docker run -p 2468:2468 \ + -e ANTHROPIC_API_KEY="sk-ant-..." \ -e OPENAI_API_KEY="sk-..." \ - your-image + rivetdev/sandbox-agent:0.3.1-full \ + server --no-token --host 0.0.0.0 --port 2468 ``` @@ -75,6 +77,9 @@ icon: "rocket" Use the `mock` agent for SDK and integration testing without provider credentials. + + For per-tenant token tracking, budget enforcement, or usage-based billing, see [LLM Credentials](/llm-credentials) for gateway options like OpenRouter, LiteLLM, and Portkey. + @@ -217,12 +222,7 @@ icon: "rocket" To preinstall agents: ```bash - sandbox-agent install-agent claude - sandbox-agent install-agent codex - sandbox-agent install-agent opencode - sandbox-agent install-agent amp - sandbox-agent install-agent pi - sandbox-agent install-agent cursor + sandbox-agent install-agent --all ``` If agents are not installed up front, they are lazily installed when creating a session. diff --git a/docs/react-components.mdx b/docs/react-components.mdx index e37e2a3..93183b2 100644 --- a/docs/react-components.mdx +++ b/docs/react-components.mdx @@ -6,6 +6,14 @@ icon: "react" `@sandbox-agent/react` exposes small React components built on top of the `sandbox-agent` SDK. +Current exports: + +- `AgentConversation` for a combined transcript + composer surface +- `ProcessTerminal` for attaching to a running tty process +- `AgentTranscript` for rendering session/message timelines without bundling any styles +- `ChatComposer` for a reusable prompt input/send surface +- `useTranscriptVirtualizer` for wiring large transcript lists to a scroll container + ## Install ```bash @@ -101,3 +109,137 @@ export default function TerminalPane() { - `onExit`, `onError`: optional lifecycle callbacks See [Processes](/processes) for the lower-level terminal APIs. + +## Headless transcript + +`AgentTranscript` is intentionally unstyled. It follows the common headless React pattern used by libraries like Radix, Headless UI, and React Aria: behavior lives in the component, while styling stays in your app through `className`, slot-level `classNames`, and `data-*` state attributes on the rendered DOM. + +```tsx TranscriptPane.tsx +import { + AgentTranscript, + type AgentTranscriptClassNames, + type TranscriptEntry, +} from "@sandbox-agent/react"; + +const transcriptClasses: Partial = { + root: "transcript", + message: "transcript-message", + messageContent: "transcript-message-content", + toolGroupContainer: "transcript-tools", + toolGroupHeader: "transcript-tools-header", + toolItem: "transcript-tool-item", + toolItemHeader: "transcript-tool-item-header", + toolItemBody: "transcript-tool-item-body", + divider: "transcript-divider", + dividerText: "transcript-divider-text", + error: "transcript-error", +}; + +export function TranscriptPane({ entries }: { entries: TranscriptEntry[] }) { + return ( +
{entry.text}
} + renderInlinePendingIndicator={() => ...} + renderToolGroupIcon={() => Events} + renderChevron={(expanded) => {expanded ? "Hide" : "Show"}} + /> + ); +} +``` + +```css +.transcript { + display: grid; + gap: 12px; +} + +.transcript [data-slot="message"][data-variant="user"] .transcript-message-content { + background: #161616; + color: white; +} + +.transcript [data-slot="message"][data-variant="assistant"] .transcript-message-content { + background: #f4f4f0; + color: #161616; +} + +.transcript [data-slot="tool-item"][data-failed="true"] { + border-color: #d33; +} + +.transcript [data-slot="tool-item-header"][data-expanded="true"] { + background: rgba(0, 0, 0, 0.06); +} +``` + +`AgentTranscript` accepts `TranscriptEntry[]`, which matches the Inspector timeline shape: + +- `message` entries render user/assistant text +- `tool` entries render expandable tool input/output sections +- `reasoning` entries render expandable reasoning blocks +- `meta` entries render status rows or expandable metadata details + +Useful props: + +- `className`: root class hook +- `classNames`: slot-level class hooks for styling from outside the package +- `scrollRef` + `virtualize`: opt into TanStack Virtual against an external scroll container +- `renderMessageText`: custom text or markdown renderer +- `renderToolItemIcon`, `renderToolGroupIcon`, `renderChevron`, `renderEventLinkContent`: presentation overrides +- `renderInlinePendingIndicator`, `renderThinkingState`: loading/thinking UI overrides +- `isDividerEntry`, `canOpenEvent`, `getToolGroupSummary`: behavior overrides for grouping and labels + +## Transcript virtualization hook + +`useTranscriptVirtualizer` exposes the same TanStack Virtual behavior used by `AgentTranscript` when `virtualize` is enabled. + +- Pass the grouped transcript rows you want to virtualize +- Pass a `scrollRef` that points at the actual scrollable element +- Use it when you need transcript-aware virtualization outside the stock `AgentTranscript` renderer + +## Composer and conversation + +`ChatComposer` is the headless message input. `AgentConversation` composes `AgentTranscript` and `ChatComposer` so apps can reuse the transcript/composer pairing without pulling in Inspector session chrome. + +```tsx ConversationPane.tsx +import { AgentConversation, type TranscriptEntry } from "@sandbox-agent/react"; + +export function ConversationPane({ + entries, + message, + onMessageChange, + onSubmit, +}: { + entries: TranscriptEntry[]; + message: string; + onMessageChange: (value: string) => void; + onSubmit: () => void; +}) { + return ( + Start the conversation.} + transcriptProps={{ + renderMessageText: (entry) =>
{entry.text}
, + }} + composerProps={{ + message, + onMessageChange, + onSubmit, + placeholder: "Send a message...", + }} + /> + ); +} +``` + +Useful `ChatComposer` props: + +- `className` and `classNames` for external styling +- `inputRef` to manage focus or autoresize from the consumer +- `textareaProps` for lower-level textarea behavior +- `allowEmptySubmit` when the submit action is valid without draft text, such as a stop button + +Use `transcriptProps` and `composerProps` when you want the shared composition but still need custom rendering or behavior. Use `transcriptClassNames` and `composerClassNames` when you want styling hooks for each subcomponent. diff --git a/docs/sdk-overview.mdx b/docs/sdk-overview.mdx index 5bd2a50..fc4aee1 100644 --- a/docs/sdk-overview.mdx +++ b/docs/sdk-overview.mdx @@ -138,6 +138,19 @@ const options = await session.getConfigOptions(); const modes = await session.getModes(); ``` +Handle permission requests from agents that ask before executing tools: + +```ts +const claude = await sdk.createSession({ + agent: "claude", + mode: "default", +}); + +claude.onPermissionRequest((request) => { + void claude.respondPermission(request.id, "once"); +}); +``` + See [Agent Sessions](/agent-sessions) for full details on config options and error handling. ## Events @@ -209,6 +222,10 @@ Parameters: - `baseUrl` (required unless `fetch` is provided): Sandbox Agent server URL - `token` (optional): Bearer token for authenticated servers - `headers` (optional): Additional request headers -- `fetch` (optional): Custom fetch implementation used by SDK HTTP and ACP calls -- `waitForHealth` (optional, defaults to enabled): waits for `/v1/health` before HTTP helpers and ACP session setup proceed; pass `false` to disable or `{ timeoutMs }` to bound the wait +- `fetch` (optional): Custom fetch implementation used by SDK HTTP and session calls +- `waitForHealth` (optional, defaults to enabled): waits for `/v1/health` before HTTP helpers and session setup proceed; pass `false` to disable or `{ timeoutMs }` to bound the wait - `signal` (optional): aborts the startup `/v1/health` wait used by `connect()` + +## LLM credentials + +Sandbox Agent supports personal API keys, shared organization keys, and per-tenant gateway keys with budget enforcement. See [LLM Credentials](/llm-credentials) for setup details. diff --git a/examples/boxlite/src/index.ts b/examples/boxlite/src/index.ts index c2401be..bdcd53a 100644 --- a/examples/boxlite/src/index.ts +++ b/examples/boxlite/src/index.ts @@ -11,17 +11,14 @@ setupImage(); console.log("Creating BoxLite sandbox..."); const box = new SimpleBox({ - rootfsPath: OCI_DIR, - env, - ports: [{ hostPort: 3000, guestPort: 3000 }], - diskSizeGb: 4, + rootfsPath: OCI_DIR, + env, + ports: [{ hostPort: 3000, guestPort: 3000 }], + diskSizeGb: 4, }); console.log("Starting server..."); -const result = await box.exec( - "sh", "-c", - "nohup sandbox-agent server --no-token --host 0.0.0.0 --port 3000 >/tmp/sandbox-agent.log 2>&1 &", -); +const result = await box.exec("sh", "-c", "nohup sandbox-agent server --no-token --host 0.0.0.0 --port 3000 >/tmp/sandbox-agent.log 2>&1 &"); if (result.exitCode !== 0) throw new Error(`Failed to start server: ${result.stderr}`); const baseUrl = "http://localhost:3000"; @@ -36,9 +33,9 @@ console.log(" Press Ctrl+C to stop."); const keepAlive = setInterval(() => {}, 60_000); const cleanup = async () => { - clearInterval(keepAlive); - await box.stop(); - process.exit(0); + clearInterval(keepAlive); + await box.stop(); + process.exit(0); }; process.once("SIGINT", cleanup); process.once("SIGTERM", cleanup); diff --git a/examples/boxlite/src/setup-image.ts b/examples/boxlite/src/setup-image.ts index 25b157e..9c15c99 100644 --- a/examples/boxlite/src/setup-image.ts +++ b/examples/boxlite/src/setup-image.ts @@ -5,12 +5,12 @@ export const DOCKER_IMAGE = "sandbox-agent-boxlite"; export const OCI_DIR = new URL("../oci-image", import.meta.url).pathname; export function setupImage() { - console.log(`Building image "${DOCKER_IMAGE}" (cached after first run)...`); - execSync(`docker build -t ${DOCKER_IMAGE} ${new URL("..", import.meta.url).pathname}`, { stdio: "inherit" }); + console.log(`Building image "${DOCKER_IMAGE}" (cached after first run)...`); + execSync(`docker build -t ${DOCKER_IMAGE} ${new URL("..", import.meta.url).pathname}`, { stdio: "inherit" }); - if (!existsSync(`${OCI_DIR}/oci-layout`)) { - console.log("Exporting to OCI layout..."); - mkdirSync(OCI_DIR, { recursive: true }); - execSync(`docker save ${DOCKER_IMAGE} | tar -xf - -C ${OCI_DIR}`, { stdio: "inherit" }); - } + if (!existsSync(`${OCI_DIR}/oci-layout`)) { + console.log("Exporting to OCI layout..."); + mkdirSync(OCI_DIR, { recursive: true }); + execSync(`docker save ${DOCKER_IMAGE} | tar -xf - -C ${OCI_DIR}`, { stdio: "inherit" }); + } } diff --git a/examples/cloudflare/frontend/App.tsx b/examples/cloudflare/frontend/App.tsx index 812f83d..499fc63 100644 --- a/examples/cloudflare/frontend/App.tsx +++ b/examples/cloudflare/frontend/App.tsx @@ -71,7 +71,7 @@ export function App() { if (event.type === "permission.requested") { const data = event.data as PermissionEventData; log(`[Auto-approved] ${data.action}`); - await client.replyPermission(sessionIdRef.current, data.permission_id, { reply: "once" }); + await client.respondPermission(sessionIdRef.current, data.permission_id, { reply: "once" }); } // Reject questions (don't support interactive input) @@ -128,7 +128,7 @@ export function App() { console.error("Event stream error:", err); } }, - [log] + [log], ); const send = useCallback(async () => { @@ -162,12 +162,7 @@ export function App() {
-
- - - ) : ( - - { - void copyMessage(message); - }} - thinkingTimerLabel={thinkingTimerLabel} - /> - - )} - {!isTerminal && promptTab ? ( - updateDraft(value, attachments)} - onSend={sendMessage} - onStop={stopAgent} - onRemoveAttachment={removeAttachment} - onChangeModel={changeModel} - onSetDefaultModel={setDefaultModel} - /> - ) : null} - - ); -}); - -interface MockLayoutProps { - workspaceId: string; - selectedHandoffId?: string | null; - selectedSessionId?: string | null; -} - -export function MockLayout({ workspaceId, selectedHandoffId, selectedSessionId }: MockLayoutProps) { - const navigate = useNavigate(); - const viewModel = useSyncExternalStore( - handoffWorkbenchClient.subscribe.bind(handoffWorkbenchClient), - handoffWorkbenchClient.getSnapshot.bind(handoffWorkbenchClient), - handoffWorkbenchClient.getSnapshot.bind(handoffWorkbenchClient), - ); - const handoffs = viewModel.handoffs ?? []; - const projects = viewModel.projects ?? []; - const [activeTabIdByHandoff, setActiveTabIdByHandoff] = useState>({}); - const [lastAgentTabIdByHandoff, setLastAgentTabIdByHandoff] = useState>({}); - const [openDiffsByHandoff, setOpenDiffsByHandoff] = useState>({}); - - const activeHandoff = useMemo( - () => handoffs.find((handoff) => handoff.id === selectedHandoffId) ?? handoffs[0] ?? null, - [handoffs, selectedHandoffId], - ); - - useEffect(() => { - if (activeHandoff) { - return; - } - - const fallbackHandoffId = handoffs[0]?.id; - if (!fallbackHandoffId) { - return; - } - - const fallbackHandoff = handoffs.find((handoff) => handoff.id === fallbackHandoffId) ?? null; - - void navigate({ - to: "/workspaces/$workspaceId/handoffs/$handoffId", - params: { - workspaceId, - handoffId: fallbackHandoffId, - }, - search: { sessionId: fallbackHandoff?.tabs[0]?.id ?? undefined }, - replace: true, - }); - }, [activeHandoff, handoffs, navigate, workspaceId]); - - const openDiffs = activeHandoff ? sanitizeOpenDiffs(activeHandoff, openDiffsByHandoff[activeHandoff.id]) : []; - const lastAgentTabId = activeHandoff ? sanitizeLastAgentTabId(activeHandoff, lastAgentTabIdByHandoff[activeHandoff.id]) : null; - const activeTabId = activeHandoff - ? sanitizeActiveTabId(activeHandoff, activeTabIdByHandoff[activeHandoff.id], openDiffs, lastAgentTabId) - : null; - - const syncRouteSession = useCallback( - (handoffId: string, sessionId: string | null, replace = false) => { - void navigate({ - to: "/workspaces/$workspaceId/handoffs/$handoffId", - params: { - workspaceId, - handoffId, - }, - search: { sessionId: sessionId ?? undefined }, - ...(replace ? { replace: true } : {}), - }); - }, - [navigate, workspaceId], - ); - - useEffect(() => { - if (!activeHandoff) { - return; - } - - const resolvedRouteSessionId = sanitizeLastAgentTabId(activeHandoff, selectedSessionId); - if (!resolvedRouteSessionId) { - return; - } - - if (selectedSessionId !== resolvedRouteSessionId) { - syncRouteSession(activeHandoff.id, resolvedRouteSessionId, true); - return; - } - - if (lastAgentTabIdByHandoff[activeHandoff.id] === resolvedRouteSessionId) { - return; - } - - setLastAgentTabIdByHandoff((current) => ({ - ...current, - [activeHandoff.id]: resolvedRouteSessionId, - })); - setActiveTabIdByHandoff((current) => { - const currentActive = current[activeHandoff.id]; - if (currentActive && isDiffTab(currentActive)) { - return current; - } - - return { - ...current, - [activeHandoff.id]: resolvedRouteSessionId, - }; - }); - }, [activeHandoff, lastAgentTabIdByHandoff, selectedSessionId, syncRouteSession]); - - const createHandoff = useCallback(() => { - void (async () => { - const repoId = activeHandoff?.repoId ?? viewModel.repos[0]?.id ?? ""; - if (!repoId) { - throw new Error("Cannot create a handoff without an available repo"); - } - - const task = window.prompt("Describe the handoff task", "Investigate and implement the requested change"); - if (!task) { - return; - } - - const title = window.prompt("Optional handoff title", "")?.trim() || undefined; - const branch = window.prompt("Optional branch name", "")?.trim() || undefined; - const { handoffId, tabId } = await handoffWorkbenchClient.createHandoff({ - repoId, - task, - model: "gpt-4o", - ...(title ? { title } : {}), - ...(branch ? { branch } : {}), - }); - await navigate({ - to: "/workspaces/$workspaceId/handoffs/$handoffId", - params: { - workspaceId, - handoffId, - }, - search: { sessionId: tabId ?? undefined }, - }); - })(); - }, [activeHandoff?.repoId, navigate, viewModel.repos, workspaceId]); - - const openDiffTab = useCallback( - (path: string) => { - if (!activeHandoff) { - throw new Error("Cannot open a diff tab without an active handoff"); - } - setOpenDiffsByHandoff((current) => { - const existing = sanitizeOpenDiffs(activeHandoff, current[activeHandoff.id]); - if (existing.includes(path)) { - return current; - } - - return { - ...current, - [activeHandoff.id]: [...existing, path], - }; - }); - setActiveTabIdByHandoff((current) => ({ - ...current, - [activeHandoff.id]: diffTabId(path), - })); - }, - [activeHandoff], - ); - - const selectHandoff = useCallback( - (id: string) => { - const handoff = handoffs.find((candidate) => candidate.id === id) ?? null; - void navigate({ - to: "/workspaces/$workspaceId/handoffs/$handoffId", - params: { - workspaceId, - handoffId: id, - }, - search: { sessionId: handoff?.tabs[0]?.id ?? undefined }, - }); - }, - [handoffs, navigate, workspaceId], - ); - - const markHandoffUnread = useCallback((id: string) => { - void handoffWorkbenchClient.markHandoffUnread({ handoffId: id }); - }, []); - - const renameHandoff = useCallback( - (id: string) => { - const currentHandoff = handoffs.find((handoff) => handoff.id === id); - if (!currentHandoff) { - throw new Error(`Unable to rename missing handoff ${id}`); - } - - const nextTitle = window.prompt("Rename handoff", currentHandoff.title); - if (nextTitle === null) { - return; - } - - const trimmedTitle = nextTitle.trim(); - if (!trimmedTitle) { - return; - } - - void handoffWorkbenchClient.renameHandoff({ handoffId: id, value: trimmedTitle }); - }, - [handoffs], - ); - - const renameBranch = useCallback( - (id: string) => { - const currentHandoff = handoffs.find((handoff) => handoff.id === id); - if (!currentHandoff) { - throw new Error(`Unable to rename missing handoff ${id}`); - } - - const nextBranch = window.prompt("Rename branch", currentHandoff.branch ?? ""); - if (nextBranch === null) { - return; - } - - const trimmedBranch = nextBranch.trim(); - if (!trimmedBranch) { - return; - } - - void handoffWorkbenchClient.renameBranch({ handoffId: id, value: trimmedBranch }); - }, - [handoffs], - ); - - const archiveHandoff = useCallback(() => { - if (!activeHandoff) { - throw new Error("Cannot archive without an active handoff"); - } - void handoffWorkbenchClient.archiveHandoff({ handoffId: activeHandoff.id }); - }, [activeHandoff]); - - const publishPr = useCallback(() => { - if (!activeHandoff) { - throw new Error("Cannot publish PR without an active handoff"); - } - void handoffWorkbenchClient.publishPr({ handoffId: activeHandoff.id }); - }, [activeHandoff]); - - const revertFile = useCallback( - (path: string) => { - if (!activeHandoff) { - throw new Error("Cannot revert a file without an active handoff"); - } - setOpenDiffsByHandoff((current) => ({ - ...current, - [activeHandoff.id]: sanitizeOpenDiffs(activeHandoff, current[activeHandoff.id]).filter((candidate) => candidate !== path), - })); - setActiveTabIdByHandoff((current) => ({ - ...current, - [activeHandoff.id]: - current[activeHandoff.id] === diffTabId(path) - ? sanitizeLastAgentTabId(activeHandoff, lastAgentTabIdByHandoff[activeHandoff.id]) - : current[activeHandoff.id] ?? null, - })); - - void handoffWorkbenchClient.revertFile({ - handoffId: activeHandoff.id, - path, - }); - }, - [activeHandoff, lastAgentTabIdByHandoff], - ); - - if (!activeHandoff) { - return ( - - - - -
-
-

Create your first handoff

-

- {viewModel.repos.length > 0 - ? "Start from the sidebar to create a handoff on the first available repo." - : "No repos are available in this workspace yet."} -

- -
-
-
-
- -
- ); - } - - return ( - - - { - setActiveTabIdByHandoff((current) => ({ ...current, [activeHandoff.id]: tabId })); - }} - onSetLastAgentTabId={(tabId) => { - setLastAgentTabIdByHandoff((current) => ({ ...current, [activeHandoff.id]: tabId })); - }} - onSetOpenDiffs={(paths) => { - setOpenDiffsByHandoff((current) => ({ ...current, [activeHandoff.id]: paths })); - }} - /> - - - ); -} diff --git a/factory/packages/frontend/src/components/mock-layout/history-minimap.tsx b/factory/packages/frontend/src/components/mock-layout/history-minimap.tsx deleted file mode 100644 index 83c8904..0000000 --- a/factory/packages/frontend/src/components/mock-layout/history-minimap.tsx +++ /dev/null @@ -1,134 +0,0 @@ -import { memo, useEffect, useState } from "react"; -import { useStyletron } from "baseui"; -import { LabelXSmall } from "baseui/typography"; - -import { formatMessageTimestamp, type HistoryEvent } from "./view-model"; - -export const HistoryMinimap = memo(function HistoryMinimap({ - events, - onSelect, -}: { - events: HistoryEvent[]; - onSelect: (event: HistoryEvent) => void; -}) { - const [css, theme] = useStyletron(); - const [open, setOpen] = useState(false); - const [activeEventId, setActiveEventId] = useState(events[events.length - 1]?.id ?? null); - - useEffect(() => { - if (!events.some((event) => event.id === activeEventId)) { - setActiveEventId(events[events.length - 1]?.id ?? null); - } - }, [activeEventId, events]); - - if (events.length === 0) { - return null; - } - - return ( -
setOpen(true)} - onMouseLeave={() => setOpen(false)} - > - {open ? ( -
-
- - Handoff Events - - {events.length} -
-
- {events.map((event) => { - const isActive = event.id === activeEventId; - return ( - - ); - })} -
-
- ) : null} - -
- {events.map((event) => { - const isActive = event.id === activeEventId; - return ( -
- ); - })} -
-
- ); -}); diff --git a/factory/packages/frontend/src/components/mock-layout/message-list.tsx b/factory/packages/frontend/src/components/mock-layout/message-list.tsx deleted file mode 100644 index baf758f..0000000 --- a/factory/packages/frontend/src/components/mock-layout/message-list.tsx +++ /dev/null @@ -1,197 +0,0 @@ -import { memo, type MutableRefObject, type Ref } from "react"; -import { useStyletron } from "baseui"; -import { LabelSmall, LabelXSmall } from "baseui/typography"; -import { Copy } from "lucide-react"; - -import { HistoryMinimap } from "./history-minimap"; -import { SpinnerDot } from "./ui"; -import { buildDisplayMessages, formatMessageDuration, formatMessageTimestamp, type AgentTab, type HistoryEvent, type Message } from "./view-model"; - -export const MessageList = memo(function MessageList({ - tab, - scrollRef, - messageRefs, - historyEvents, - onSelectHistoryEvent, - copiedMessageId, - onCopyMessage, - thinkingTimerLabel, -}: { - tab: AgentTab | null | undefined; - scrollRef: Ref; - messageRefs: MutableRefObject>; - historyEvents: HistoryEvent[]; - onSelectHistoryEvent: (event: HistoryEvent) => void; - copiedMessageId: string | null; - onCopyMessage: (message: Message) => void; - thinkingTimerLabel: string | null; -}) { - const [css, theme] = useStyletron(); - const messages = buildDisplayMessages(tab); - - return ( - <> - {historyEvents.length > 0 ? : null} -
- {tab && messages.length === 0 ? ( -
- - {!tab.created ? "Choose an agent and model, then send your first message" : "No messages yet in this session"} - -
- ) : null} - {messages.map((message) => { - const isUser = message.sender === "client"; - const isCopied = copiedMessageId === message.id; - const messageTimestamp = formatMessageTimestamp(message.createdAtMs); - const displayFooter = isUser - ? messageTimestamp - : message.durationMs - ? `${messageTimestamp} • Took ${formatMessageDuration(message.durationMs)}` - : null; - - return ( -
{ - if (node) { - messageRefs.current.set(message.id, node); - } else { - messageRefs.current.delete(message.id); - } - }} - className={css({ display: "flex", justifyContent: isUser ? "flex-end" : "flex-start" })} - > -
-
-
- {message.text} -
-
-
- {displayFooter ? ( - - {displayFooter} - - ) : null} - -
-
-
- ); - })} - {tab && tab.status === "running" && messages.length > 0 ? ( -
- - - Agent is thinking - {thinkingTimerLabel ? ( - - {thinkingTimerLabel} - - ) : null} - -
- ) : null} -
- - ); -}); diff --git a/factory/packages/frontend/src/components/mock-layout/prompt-composer.tsx b/factory/packages/frontend/src/components/mock-layout/prompt-composer.tsx deleted file mode 100644 index f3cdcd2..0000000 --- a/factory/packages/frontend/src/components/mock-layout/prompt-composer.tsx +++ /dev/null @@ -1,181 +0,0 @@ -import { memo, type Ref } from "react"; -import { useStyletron } from "baseui"; -import { ArrowUpFromLine, FileCode, Square, X } from "lucide-react"; - -import { ModelPicker } from "./model-picker"; -import { PROMPT_TEXTAREA_MIN_HEIGHT, PROMPT_TEXTAREA_MAX_HEIGHT } from "./ui"; -import { fileName, type LineAttachment, type ModelId } from "./view-model"; - -export const PromptComposer = memo(function PromptComposer({ - draft, - textareaRef, - placeholder, - attachments, - defaultModel, - model, - isRunning, - onDraftChange, - onSend, - onStop, - onRemoveAttachment, - onChangeModel, - onSetDefaultModel, -}: { - draft: string; - textareaRef: Ref; - placeholder: string; - attachments: LineAttachment[]; - defaultModel: ModelId; - model: ModelId; - isRunning: boolean; - onDraftChange: (value: string) => void; - onSend: () => void; - onStop: () => void; - onRemoveAttachment: (id: string) => void; - onChangeModel: (model: ModelId) => void; - onSetDefaultModel: (model: ModelId) => void; -}) { - const [css, theme] = useStyletron(); - - return ( -
- {attachments.length > 0 ? ( -
- {attachments.map((attachment) => ( -
- - - {fileName(attachment.filePath)}:{attachment.lineNumber} - - onRemoveAttachment(attachment.id)} - /> -
- ))} -
- ) : null} -
-