fix: add agent_server_logs module import to lib.rs

This commit is contained in:
Nathan Flurry 2026-01-27 17:24:42 -08:00
parent be7aecb362
commit 7a5bb2b8b0
87 changed files with 2438 additions and 1671 deletions

View file

@ -15,15 +15,19 @@ on:
defaults:
run:
# Enable fail-fast behavior
shell: bash -e {0}
env:
# Disable incremental compilation for faster from-scratch builds
CARGO_INCREMENTAL: 0
jobs:
setup:
name: "Setup"
runs-on: ubuntu-24.04
permissions:
contents: write
steps:
- uses: actions/checkout@v4
with:
@ -38,19 +42,28 @@ jobs:
node-version: 20
cache: pnpm
- name: Install tsx
run: npm install -g tsx
- name: Run setup phase
- name: Setup
env:
R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
run: |
CMD="./scripts/release/main.ts --version '${{ inputs.version }}' --phase setup-ci"
# Install dependencies
pnpm install
# Install tsx globally
npm install -g tsx
# Build command based on inputs
CMD="./scripts/release/main.ts --version \"${{ github.event.inputs.version }}\" --phase setup-ci"
if [ "${{ inputs.latest }}" != "true" ]; then
CMD="$CMD --no-latest"
fi
eval "$CMD"
binaries:
name: "Build Binaries"
name: "Build & Upload Binaries"
needs: [setup]
strategy:
matrix:
@ -97,15 +110,34 @@ jobs:
docker/release/build.sh ${{ matrix.target }}
ls -la dist/
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: binary-${{ matrix.target }}
path: dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}
- name: Upload to R2
env:
AWS_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
run: |
# Install AWS CLI
sudo apt-get update
sudo apt-get install -y unzip curl
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install --update
COMMIT_SHA_SHORT="${GITHUB_SHA::7}"
BINARY_PATH="dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}"
# Upload to commit directory for later promotion
aws s3 cp \
"${BINARY_PATH}" \
"s3://rivet-releases/sandbox-agent/${COMMIT_SHA_SHORT}/binaries/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" \
--region auto \
--endpoint-url https://2a94c6a0ced8d35ea63cddc86c2681e7.r2.cloudflarestorage.com \
--checksum-algorithm CRC32
complete:
name: "Complete"
needs: [setup, binaries]
if: ${{ always() && !cancelled() && needs.setup.result == 'success' && needs.binaries.result == 'success' }}
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
@ -122,37 +154,31 @@ jobs:
registry-url: "https://registry.npmjs.org"
cache: pnpm
- name: Install tsx
run: npm install -g tsx
- name: Install AWS CLI
run: |
sudo apt-get update
sudo apt-get install -y unzip curl
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip
sudo ./aws/install --update
- name: Download binaries
uses: actions/download-artifact@v4
with:
path: dist/
pattern: binary-*
merge-multiple: true
- name: List downloaded binaries
run: ls -la dist/
- name: Publish & upload artifacts
- name: Complete
env:
# https://cli.github.com/manual/gh_help_environment
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
run: |
CMD="./scripts/release/main.ts --version '${{ inputs.version }}' --phase complete-ci --no-validate-git"
# Authenticate with NPM
cat << EOF > ~/.npmrc
//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}
EOF
# Install dependencies
pnpm install
# Install tsx globally
npm install -g tsx
# Build command based on inputs
CMD="./scripts/release/main.ts --version \"${{ github.event.inputs.version }}\" --phase complete-ci --no-validate-git"
if [ "${{ inputs.latest }}" != "true" ]; then
CMD="$CMD --no-latest"
fi
eval "$CMD"

View file

@ -6,7 +6,18 @@ Documentation lives in `docs/` (Mintlify). Start with:
- `docs/quickstart.mdx` to run the daemon
- `docs/http-api.mdx` and `docs/cli.mdx` for API references
Quickstart (local dev):
## Development Setup
### Prerequisites
- Rust (latest stable)
- Node.js 20+
- pnpm 9+
- [just](https://github.com/casey/just) (optional, but recommended)
### Quickstart
Run the agent locally:
```bash
sandbox-agent --token "$SANDBOX_TOKEN" --host 127.0.0.1 --port 2468
@ -26,5 +37,112 @@ Run the web console (includes all dependencies):
```bash
pnpm dev -F @sandbox-agent/inspector
# or
just dev
```
### Common Commands
```bash
# Run checks (cargo check, fmt, typecheck)
just check
# Run tests
just test
# Format code
just fmt
# Build the agent
just build
```
## Releasing
Releases are managed through a release script that handles version bumps, artifact uploads, npm/crates.io publishing, and GitHub releases.
### Prerequisites
1. Install dependencies in the release script directory:
```bash
cd scripts/release && pnpm install && cd ../..
```
2. Ensure you have the following configured:
- `gh` CLI authenticated
- npm authenticated (`npm login`)
- `CARGO_REGISTRY_TOKEN` for crates.io (or run `cargo login`)
- R2 credentials: `R2_RELEASES_ACCESS_KEY_ID` and `R2_RELEASES_SECRET_ACCESS_KEY`
(or 1Password CLI for local dev)
### Release Commands
```bash
# Release with automatic patch bump
just release --patch
# Release with minor bump
just release --minor
# Release with specific version
just release --version 0.2.0
# Release a pre-release
just release --version 0.2.0-rc.1 --no-latest
```
### Release Flow
The release process has three phases:
**1. setup-local** (runs locally via `just release`):
- Confirms release details with user
- Runs local checks (cargo check, fmt, typecheck)
- Updates version numbers across all packages
- Generates artifacts (OpenAPI spec, TypeScript SDK)
- Commits and pushes changes
- Triggers the GitHub Actions release workflow
**2. setup-ci** (runs in CI):
- Runs full test suite (Rust + TypeScript)
- Builds TypeScript SDK and uploads to R2 at `sandbox-agent/{commit}/typescript/`
**3. binaries** (runs in CI, parallel with setup-ci completing):
- Builds binaries for all platforms via Docker cross-compilation
- Uploads binaries to R2 at `sandbox-agent/{commit}/binaries/`
**4. complete-ci** (runs in CI after setup + binaries):
- Publishes crates to crates.io
- Publishes npm packages (SDK + CLI)
- Promotes artifacts from `{commit}/` to `{version}/` (S3-to-S3 copy)
- Creates git tag and pushes
- Creates GitHub release with auto-generated notes
### Manual Steps
To run specific steps manually:
```bash
# Run only local checks
cd scripts/release && pnpm exec tsx ./main.ts --version 0.1.0 --only-steps run-local-checks
# Build binaries locally
just release-build-all
```
## Project Structure
```
sandbox-daemon/
├── server/packages/ # Rust crates
│ ├── sandbox-agent/ # Main agent binary
│ ├── agent-schema/ # Agent-specific schemas (Claude, Codex, etc.)
│ └── ...
├── sdks/
│ ├── typescript/ # TypeScript SDK (npm: sandbox-agent)
│ └── cli/ # CLI wrapper (npm: @sandbox-agent/cli)
├── frontend/packages/
│ └── inspector/ # Web console UI
├── docs/ # Mintlify documentation
└── scripts/release/ # Release automation
```

4
docs/deploy/index.mdx Normal file
View file

@ -0,0 +1,4 @@
---
sidebarTitle: Overview
---

View file

@ -42,7 +42,7 @@
"universal-api",
"frontend",
"building-chat-ui",
"persisting-chat-logs"
"manage-session-state"
]
},
{

View file

@ -1,9 +1,9 @@
---
title: "Persisting Chat Logs"
description: "Persist event streams so you can resume sessions and keep durable chat history."
title: "Manage Session State"
description: "TODO"
---
Persisting chat logs is easiest when you treat the event stream as the source of truth.
TODO
## Recommended approach

View file

@ -1,42 +1,48 @@
set dotenv-load := true
# =============================================================================
# Release
# =============================================================================
[group('release')]
release *ARGS:
cd scripts/release && pnpm exec tsx ./main.ts --phase setup-local {{ ARGS }}
# Build a single target via Docker
[group('release')]
release-build target="x86_64-unknown-linux-musl":
./docker/release/build.sh {{target}}
# Build all release binaries
[group('release')]
release-build-all:
./docker/release/build.sh x86_64-unknown-linux-musl
./docker/release/build.sh x86_64-pc-windows-gnu
./docker/release/build.sh x86_64-apple-darwin
./docker/release/build.sh aarch64-apple-darwin
# Upload binaries from dist/ (requires AWS creds + aws cli)
release-upload-binaries version latest="auto":
{{~ if latest == "auto" ~}}
npx tsx scripts/release/main.ts --version {{version}} --upload-binaries
{{~ else if latest == "true" ~}}
npx tsx scripts/release/main.ts --version {{version}} --latest --upload-binaries
{{~ else if latest == "false" ~}}
npx tsx scripts/release/main.ts --version {{version}} --no-latest --upload-binaries
{{~ else ~}}
@echo "latest must be auto|true|false" && exit 1
{{~ endif ~}}
# =============================================================================
# Development
# =============================================================================
# Upload TypeScript artifacts + install.sh
release-upload-artifacts version latest="auto":
{{~ if latest == "auto" ~}}
npx tsx scripts/release/main.ts --version {{version}} --upload-typescript --upload-install
{{~ else if latest == "true" ~}}
npx tsx scripts/release/main.ts --version {{version}} --latest --upload-typescript --upload-install
{{~ else if latest == "false" ~}}
npx tsx scripts/release/main.ts --version {{version}} --no-latest --upload-typescript --upload-install
{{~ else ~}}
@echo "latest must be auto|true|false" && exit 1
{{~ endif ~}}
[group('dev')]
dev:
pnpm dev -F @sandbox-agent/inspector
# Full local release test: build all, then upload binaries + artifacts
release-test version latest="auto":
just release-build-all
just release-upload-binaries {{version}} {{latest}}
just release-upload-artifacts {{version}} {{latest}}
[group('dev')]
build:
cargo build -p sandbox-agent
[group('dev')]
test:
cargo test --all-targets
[group('dev')]
check:
cargo check --all-targets
cargo fmt --all -- --check
pnpm run typecheck
[group('dev')]
fmt:
cargo fmt --all

231
pnpm-lock.yaml generated
View file

@ -53,10 +53,10 @@ importers:
dependencies:
'@anthropic-ai/claude-code':
specifier: latest
version: 2.1.19
version: 2.1.20
'@openai/codex':
specifier: latest
version: 0.89.0
version: 0.92.0
cheerio:
specifier: ^1.0.0
version: 1.2.0
@ -105,6 +105,34 @@ importers:
specifier: ^4.19.0
version: 4.21.0
scripts/release:
dependencies:
commander:
specifier: ^12.1.0
version: 12.1.0
execa:
specifier: ^9.5.0
version: 9.6.1
glob:
specifier: ^10.3.10
version: 10.5.0
semver:
specifier: ^7.6.0
version: 7.7.3
devDependencies:
'@types/node':
specifier: ^22.0.0
version: 22.19.7
'@types/semver':
specifier: ^7.5.8
version: 7.7.1
tsx:
specifier: ^4.19.0
version: 4.21.0
typescript:
specifier: ^5.9.3
version: 5.9.3
sdks/cli:
devDependencies:
vitest:
@ -139,8 +167,8 @@ importers:
packages:
'@anthropic-ai/claude-code@2.1.19':
resolution: {integrity: sha512-/bUlQuX/6nKr1Zqfi/9Q6xf7WonUBk72ZfKKENU4WVrIFWqTv/0JJsoW/dHol9QBNHvyfKIeBbYu4avHNRAnuQ==}
'@anthropic-ai/claude-code@2.1.20':
resolution: {integrity: sha512-5r9OEF5TTmkhOKWtJ9RYqdn/vchwQWABO3dvgZVXftqlBZV/IiKjHVISu0dKtqWzByLBolchwePrhY68ul0QrA==}
engines: {node: '>=18.0.0'}
hasBin: true
@ -652,11 +680,15 @@ packages:
resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==}
engines: {node: '>= 8'}
'@openai/codex@0.89.0':
resolution: {integrity: sha512-mIX0FLKTT26sWqLcpwb2GvRI89snDNvUbgTxEtrPMP/wXRtYasTLROY0UBL1qLHVrm532mU4RLepNITqBPvAOQ==}
'@openai/codex@0.92.0':
resolution: {integrity: sha512-DR9A2QlJDtEpMwqUGMIztTCzzCYTVrM7rqG3XuMVURnQ4b7XrScmY5RnSUuUZ/ga7wDTqw0BTmVzPurm4NX3Tw==}
engines: {node: '>=16'}
hasBin: true
'@pkgjs/parseargs@0.11.0':
resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
engines: {node: '>=14'}
'@rolldown/pluginutils@1.0.0-beta.27':
resolution: {integrity: sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==}
@ -785,6 +817,13 @@ packages:
cpu: [x64]
os: [win32]
'@sec-ant/readable-stream@0.4.1':
resolution: {integrity: sha512-831qok9r2t8AlxLko40y2ebgSDhenenCatLVeW/uBtnHPyhHOvG0C7TvfgecV+wHzIm5KUICgzmVpWS+IMEAeg==}
'@sindresorhus/merge-streams@4.0.0':
resolution: {integrity: sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ==}
engines: {node: '>=18'}
'@types/babel__core@7.20.5':
resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==}
@ -896,6 +935,9 @@ packages:
resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==}
engines: {node: '>=12'}
balanced-match@1.0.2:
resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==}
baseline-browser-mapping@2.9.18:
resolution: {integrity: sha512-e23vBV1ZLfjb9apvfPk4rHVu2ry6RIr2Wfs+O324okSidrX7pTAnEJPCh/O5BtRlr7QtZI7ktOP3vsqr7Z5XoA==}
hasBin: true
@ -903,6 +945,9 @@ packages:
boolbase@1.0.0:
resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
brace-expansion@2.0.2:
resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==}
braces@3.0.3:
resolution: {integrity: sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==}
engines: {node: '>=8'}
@ -955,6 +1000,10 @@ packages:
color-name@1.1.4:
resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==}
commander@12.1.0:
resolution: {integrity: sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==}
engines: {node: '>=18'}
commander@13.1.0:
resolution: {integrity: sha512-/rFeCpNJQbhSZjGVwO9RFV3xPqbnERS8MmIQzCtD/zl6gpJuV/bMLuN92oG3F7d8oDEHHRrujSXNUr8fpjntKw==}
engines: {node: '>=18'}
@ -1060,6 +1109,10 @@ packages:
estree-walker@3.0.3:
resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==}
execa@9.6.1:
resolution: {integrity: sha512-9Be3ZoN4LmYR90tUoVu2te2BsbzHfhJyfEiAVfz7N5/zv+jduIfLrV2xdQXOHbaD6KgpGdO9PRPM1Y4Q9QkPkA==}
engines: {node: ^18.19.0 || >=20.5.0}
expect-type@1.3.0:
resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==}
engines: {node: '>=12.0.0'}
@ -1080,6 +1133,10 @@ packages:
picomatch:
optional: true
figures@6.1.0:
resolution: {integrity: sha512-d+l3qxjSesT4V7v2fh+QnmFnUWv9lSpjarhShNTgBOfA0ttejbQUAlHLitbjkoRiDulW0OPoQPYIGhIC8ohejg==}
engines: {node: '>=18'}
fill-range@7.1.1:
resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==}
engines: {node: '>=8'}
@ -1100,6 +1157,10 @@ packages:
resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==}
engines: {node: '>=6.9.0'}
get-stream@9.0.1:
resolution: {integrity: sha512-kVCxPF3vQM/N0B1PmoqVUqgHP+EeVjmZSQn+1oCRPxd2P21P2F19lIgbR3HBosbB1PUhOAoctJnfEn2GbN2eZA==}
engines: {node: '>=18'}
get-tsconfig@4.13.0:
resolution: {integrity: sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==}
@ -1107,6 +1168,10 @@ packages:
resolution: {integrity: sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==}
engines: {node: '>= 6'}
glob@10.5.0:
resolution: {integrity: sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==}
hasBin: true
glob@11.1.0:
resolution: {integrity: sha512-vuNwKSaKiqm7g0THUBu2x7ckSs3XJLXE+2ssL7/MfTGPLLcrJQ/4Uq1CjPTtO5cCIiRxqvN6Twy1qOwhL0Xjcw==}
engines: {node: 20 || >=22}
@ -1115,6 +1180,10 @@ packages:
htmlparser2@10.1.0:
resolution: {integrity: sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==}
human-signals@8.0.1:
resolution: {integrity: sha512-eKCa6bwnJhvxj14kZk5NCPc6Hb6BdsU9DZcOnmQKSnO1VKrfV0zCvtttPZUsBvjmNDn8rpcJfpwSYnHBjc95MQ==}
engines: {node: '>=18.18.0'}
iconv-lite@0.6.3:
resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==}
engines: {node: '>=0.10.0'}
@ -1135,9 +1204,24 @@ packages:
resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==}
engines: {node: '>=0.12.0'}
is-plain-obj@4.1.0:
resolution: {integrity: sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==}
engines: {node: '>=12'}
is-stream@4.0.1:
resolution: {integrity: sha512-Dnz92NInDqYckGEUJv689RbRiTSEHCQ7wOVeALbkOz999YpqT46yMRIGtSNl2iCL1waAZSx40+h59NV/EwzV/A==}
engines: {node: '>=18'}
is-unicode-supported@2.1.0:
resolution: {integrity: sha512-mE00Gnza5EEB3Ds0HfMyllZzbBrmLOX3vfWoj9A9PEnTfratQ/BcaJOuMhnkhjXvb2+FkY3VuHqtAGpTPmglFQ==}
engines: {node: '>=18'}
isexe@2.0.0:
resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}
jackspeak@3.4.3:
resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==}
jackspeak@4.1.1:
resolution: {integrity: sha512-zptv57P3GpL+O0I7VdMJNBZCu+BPHVQUk55Ft8/QCJjTVxrnJHuVuX/0Bl2A6/+2oyR/ZMEuFKwmzqqZ/U5nPQ==}
engines: {node: 20 || >=22}
@ -1184,6 +1268,9 @@ packages:
loupe@3.2.1:
resolution: {integrity: sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==}
lru-cache@10.4.3:
resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
lru-cache@11.2.4:
resolution: {integrity: sha512-B5Y16Jr9LB9dHVkh6ZevG+vAbOsNOYCX+sXvFWFu7B3Iz5mijW3zdbMyhsh8ANd2mSWBYdJgnqi+mL7/LrOPYg==}
engines: {node: 20 || >=22}
@ -1211,6 +1298,10 @@ packages:
resolution: {integrity: sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==}
engines: {node: 20 || >=22}
minimatch@9.0.5:
resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==}
engines: {node: '>=16 || 14 >=14.17'}
minipass@7.1.2:
resolution: {integrity: sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==}
engines: {node: '>=16 || 14 >=14.17'}
@ -1240,6 +1331,10 @@ packages:
resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==}
engines: {node: '>=0.10.0'}
npm-run-path@6.0.0:
resolution: {integrity: sha512-9qny7Z9DsQU8Ou39ERsPU4OZQlSTP47ShQzuKZ6PRXpYLtIFgl/DEBYEXKlvcEa+9tHVcK8CF81Y2V72qaZhWA==}
engines: {node: '>=18'}
nth-check@2.1.1:
resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
@ -1254,6 +1349,10 @@ packages:
package-json-from-dist@1.0.1:
resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==}
parse-ms@4.0.0:
resolution: {integrity: sha512-TXfryirbmq34y8QBwgqCVLi+8oA3oWx2eAnSn62ITyEhEYaWRlVZ2DvMM9eZbMs/RfxPu/PK/aBLyGj4IrqMHw==}
engines: {node: '>=18'}
parse5-htmlparser2-tree-adapter@7.1.0:
resolution: {integrity: sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==}
@ -1267,6 +1366,14 @@ packages:
resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
engines: {node: '>=8'}
path-key@4.0.0:
resolution: {integrity: sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ==}
engines: {node: '>=12'}
path-scurry@1.11.1:
resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==}
engines: {node: '>=16 || 14 >=14.18'}
path-scurry@2.0.1:
resolution: {integrity: sha512-oWyT4gICAu+kaA7QWk/jvCHWarMKNs6pXOGWKDTr7cw4IGcUbW+PeTfbaQiLGheFRpjo6O9J0PmyMfQPjH71oA==}
engines: {node: 20 || >=22}
@ -1318,6 +1425,10 @@ packages:
resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==}
engines: {node: ^10 || ^12 || >=14}
pretty-ms@9.3.0:
resolution: {integrity: sha512-gjVS5hOP+M3wMm5nmNOucbIrqudzs9v/57bWRHQWLYklXqoXKrVfYW2W9+glfGsqtPgpiz5WwyEEB+ksXIx3gQ==}
engines: {node: '>=18'}
queue-microtask@1.2.3:
resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
@ -1421,6 +1532,10 @@ packages:
resolution: {integrity: sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==}
engines: {node: '>=12'}
strip-final-newline@4.0.0:
resolution: {integrity: sha512-aulFJcD6YK8V1G7iRB5tigAP4TsHBZZrOV8pjV++zdUwmeV8uzbY7yn6h9MswN62adStNZFuCIx4haBnRuMDaw==}
engines: {node: '>=18'}
strip-literal@3.1.0:
resolution: {integrity: sha512-8r3mkIM/2+PpjHoOtiAW8Rg3jJLHaV7xPwG+YRGrv6FP0wwk/toTpATxWYOW0BKdWwl82VT2tFYi5DlROa0Mxg==}
@ -1562,6 +1677,10 @@ packages:
resolution: {integrity: sha512-Gpq0iNm5M6cQWlyHQv9MV+uOj1jWk7LpkoE5vSp/7zjb4zMdAcUD+VL5y0nH4p9EbUklq00eVIIX/XcDHzu5xg==}
engines: {node: '>=20.18.1'}
unicorn-magic@0.3.0:
resolution: {integrity: sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==}
engines: {node: '>=18'}
update-browserslist-db@1.2.3:
resolution: {integrity: sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==}
hasBin: true
@ -1670,9 +1789,13 @@ packages:
resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==}
engines: {node: '>=12'}
yoctocolors@2.1.2:
resolution: {integrity: sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug==}
engines: {node: '>=18'}
snapshots:
'@anthropic-ai/claude-code@2.1.19':
'@anthropic-ai/claude-code@2.1.20':
optionalDependencies:
'@img/sharp-darwin-arm64': 0.33.5
'@img/sharp-darwin-x64': 0.33.5
@ -2053,7 +2176,10 @@ snapshots:
'@nodelib/fs.scandir': 2.1.5
fastq: 1.20.1
'@openai/codex@0.89.0': {}
'@openai/codex@0.92.0': {}
'@pkgjs/parseargs@0.11.0':
optional: true
'@rolldown/pluginutils@1.0.0-beta.27': {}
@ -2132,6 +2258,10 @@ snapshots:
'@rollup/rollup-win32-x64-msvc@4.56.0':
optional: true
'@sec-ant/readable-stream@0.4.1': {}
'@sindresorhus/merge-streams@4.0.0': {}
'@types/babel__core@7.20.5':
dependencies:
'@babel/parser': 7.28.6
@ -2255,10 +2385,16 @@ snapshots:
assertion-error@2.0.1: {}
balanced-match@1.0.2: {}
baseline-browser-mapping@2.9.18: {}
boolbase@1.0.0: {}
brace-expansion@2.0.2:
dependencies:
balanced-match: 1.0.2
braces@3.0.3:
dependencies:
fill-range: 7.1.1
@ -2325,6 +2461,8 @@ snapshots:
color-name@1.1.4: {}
commander@12.1.0: {}
commander@13.1.0: {}
commander@4.1.1: {}
@ -2459,6 +2597,21 @@ snapshots:
dependencies:
'@types/estree': 1.0.8
execa@9.6.1:
dependencies:
'@sindresorhus/merge-streams': 4.0.0
cross-spawn: 7.0.6
figures: 6.1.0
get-stream: 9.0.1
human-signals: 8.0.1
is-plain-obj: 4.1.0
is-stream: 4.0.1
npm-run-path: 6.0.0
pretty-ms: 9.3.0
signal-exit: 4.1.0
strip-final-newline: 4.0.0
yoctocolors: 2.1.2
expect-type@1.3.0: {}
fast-glob@3.3.3:
@ -2477,6 +2630,10 @@ snapshots:
optionalDependencies:
picomatch: 4.0.3
figures@6.1.0:
dependencies:
is-unicode-supported: 2.1.0
fill-range@7.1.1:
dependencies:
to-regex-range: 5.0.1
@ -2497,6 +2654,11 @@ snapshots:
gensync@1.0.0-beta.2: {}
get-stream@9.0.1:
dependencies:
'@sec-ant/readable-stream': 0.4.1
is-stream: 4.0.1
get-tsconfig@4.13.0:
dependencies:
resolve-pkg-maps: 1.0.0
@ -2505,6 +2667,15 @@ snapshots:
dependencies:
is-glob: 4.0.3
glob@10.5.0:
dependencies:
foreground-child: 3.3.1
jackspeak: 3.4.3
minimatch: 9.0.5
minipass: 7.1.2
package-json-from-dist: 1.0.1
path-scurry: 1.11.1
glob@11.1.0:
dependencies:
foreground-child: 3.3.1
@ -2521,6 +2692,8 @@ snapshots:
domutils: 3.2.2
entities: 7.0.1
human-signals@8.0.1: {}
iconv-lite@0.6.3:
dependencies:
safer-buffer: 2.1.2
@ -2535,8 +2708,20 @@ snapshots:
is-number@7.0.0: {}
is-plain-obj@4.1.0: {}
is-stream@4.0.1: {}
is-unicode-supported@2.1.0: {}
isexe@2.0.0: {}
jackspeak@3.4.3:
dependencies:
'@isaacs/cliui': 8.0.2
optionalDependencies:
'@pkgjs/parseargs': 0.11.0
jackspeak@4.1.1:
dependencies:
'@isaacs/cliui': 8.0.2
@ -2567,6 +2752,8 @@ snapshots:
loupe@3.2.1: {}
lru-cache@10.4.3: {}
lru-cache@11.2.4: {}
lru-cache@5.1.1:
@ -2592,6 +2779,10 @@ snapshots:
dependencies:
'@isaacs/brace-expansion': 5.0.0
minimatch@9.0.5:
dependencies:
brace-expansion: 2.0.2
minipass@7.1.2: {}
minizlib@3.1.0:
@ -2619,6 +2810,11 @@ snapshots:
normalize-path@3.0.0: {}
npm-run-path@6.0.0:
dependencies:
path-key: 4.0.0
unicorn-magic: 0.3.0
nth-check@2.1.1:
dependencies:
boolbase: 1.0.0
@ -2636,6 +2832,8 @@ snapshots:
package-json-from-dist@1.0.1: {}
parse-ms@4.0.0: {}
parse5-htmlparser2-tree-adapter@7.1.0:
dependencies:
domhandler: 5.0.3
@ -2651,6 +2849,13 @@ snapshots:
path-key@3.1.1: {}
path-key@4.0.0: {}
path-scurry@1.11.1:
dependencies:
lru-cache: 10.4.3
minipass: 7.1.2
path-scurry@2.0.1:
dependencies:
lru-cache: 11.2.4
@ -2687,6 +2892,10 @@ snapshots:
picocolors: 1.1.1
source-map-js: 1.2.1
pretty-ms@9.3.0:
dependencies:
parse-ms: 4.0.0
queue-microtask@1.2.3: {}
react-dom@18.3.1(react@18.3.1):
@ -2794,6 +3003,8 @@ snapshots:
dependencies:
ansi-regex: 6.2.2
strip-final-newline@4.0.0: {}
strip-literal@3.1.0:
dependencies:
js-tokens: 9.0.1
@ -2936,6 +3147,8 @@ snapshots:
undici@7.19.1: {}
unicorn-magic@0.3.0: {}
update-browserslist-db@1.2.3(browserslist@4.28.1):
dependencies:
browserslist: 4.28.1
@ -3039,3 +3252,5 @@ snapshots:
yallist@5.0.0: {}
yargs-parser@21.1.1: {}
yoctocolors@2.1.2: {}

View file

@ -5,3 +5,4 @@ packages:
- "sdks/cli/platforms/*"
- "resources/agent-schemas"
- "resources/vercel-ai-sdk-schemas"
- "scripts/release"

View file

@ -1,12 +1,27 @@
# Server Testing
## Test placement
Place all new tests under `server/packages/**/tests/` (or a package-specific `tests/` folder). Avoid inline tests inside source files unless there is no viable alternative.
## Test locations (overview)
- Sandbox-agent integration tests live under `server/packages/sandbox-agent/tests/`:
- Agent flow coverage in `agent-flows/`
- Agent management coverage in `agent-management/`
- Shared server manager coverage in `server-manager/`
- HTTP/SSE and snapshot coverage in `http/` (snapshots in `http/snapshots/`)
- UI coverage in `ui/`
- Shared helpers in `common/`
- Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/`
## Snapshot tests
The HTTP/SSE snapshot suite lives in:
- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs`
The HTTP/SSE snapshot suite entrypoint lives in:
- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` (includes `tests/http/http_sse_snapshots.rs`)
Snapshots are written to:
- `server/packages/sandbox-agent/tests/snapshots/`
- `server/packages/sandbox-agent/tests/http/snapshots/`
## Agent selection
@ -47,9 +62,20 @@ Health checks run in a blocking thread to avoid Tokio runtime drop errors inside
## Snapshot stability
To keep snapshots deterministic:
- Use the mock agent as the **master** event sequence; all other agents must match its behavior 1:1.
- Snapshots should compare a **canonical event skeleton** (event order matters) with strict ordering across:
- `item.started``item.delta``item.completed`
- presence/absence of `session.ended`
- permission/question request and resolution flows
- Scrub non-deterministic fields from snapshots:
- IDs, timestamps, native IDs
- text content, tool inputs/outputs, provider-specific metadata
- `source` and `synthetic` flags (these are implementation details)
- The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly.
- Event streams are truncated after the first assistant or error event.
- Permission flow snapshots are truncated after the permission request (or first assistant) event.
- Unknown events are preserved as `kind: unknown` (raw payload in universal schema).
- Prefer snapshot-based event skeleton assertions over manual event-order assertions in tests.
## Typical commands

View file

@ -25,87 +25,3 @@ pub mod amp {
//! AMP Code SDK types.
include!(concat!(env!("OUT_DIR"), "/amp.rs"));
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_claude_bash_input() {
let input = claude::BashInput {
command: "ls -la".to_string(),
timeout: Some(5000.0),
working_directory: None,
};
let json = serde_json::to_string(&input).unwrap();
assert!(json.contains("ls -la"));
let parsed: claude::BashInput = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.command, "ls -la");
}
#[test]
fn test_codex_server_notification() {
// Test ItemCompletedNotification with AgentMessage
let notification = codex::ServerNotification::ItemCompleted(
codex::ItemCompletedNotification {
item: codex::ThreadItem::AgentMessage {
id: "msg-123".to_string(),
text: "Hello from Codex".to_string(),
},
thread_id: "thread-123".to_string(),
turn_id: "turn-456".to_string(),
}
);
let json = serde_json::to_string(&notification).unwrap();
assert!(json.contains("item/completed"));
assert!(json.contains("Hello from Codex"));
assert!(json.contains("agentMessage"));
}
#[test]
fn test_codex_thread_item_variants() {
// Test UserMessage variant
let user_msg = codex::ThreadItem::UserMessage {
content: vec![codex::UserInput::Text {
text: "Hello".to_string(),
text_elements: vec![],
}],
id: "user-1".to_string(),
};
let json = serde_json::to_string(&user_msg).unwrap();
assert!(json.contains("userMessage"));
assert!(json.contains("Hello"));
// Test CommandExecution variant
let cmd = codex::ThreadItem::CommandExecution {
aggregated_output: Some("output".to_string()),
command: "ls -la".to_string(),
command_actions: vec![],
cwd: "/tmp".to_string(),
duration_ms: Some(100),
exit_code: Some(0),
id: "cmd-1".to_string(),
process_id: None,
status: codex::CommandExecutionStatus::Completed,
};
let json = serde_json::to_string(&cmd).unwrap();
assert!(json.contains("commandExecution"));
assert!(json.contains("ls -la"));
}
#[test]
fn test_amp_message() {
let msg = amp::Message {
role: amp::MessageRole::User,
content: "Hello".to_string(),
tool_calls: vec![],
};
let json = serde_json::to_string(&msg).unwrap();
assert!(json.contains("user"));
assert!(json.contains("Hello"));
}
}

View file

@ -0,0 +1,77 @@
use sandbox_agent_extracted_agent_schemas::{amp, claude, codex};
#[test]
fn test_claude_bash_input() {
let input = claude::BashInput {
command: "ls -la".to_string(),
timeout: Some(5000.0),
working_directory: None,
};
let json = serde_json::to_string(&input).unwrap();
assert!(json.contains("ls -la"));
let parsed: claude::BashInput = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.command, "ls -la");
}
#[test]
fn test_codex_server_notification() {
let notification = codex::ServerNotification::ItemCompleted(
codex::ItemCompletedNotification {
item: codex::ThreadItem::AgentMessage {
id: "msg-123".to_string(),
text: "Hello from Codex".to_string(),
},
thread_id: "thread-123".to_string(),
turn_id: "turn-456".to_string(),
},
);
let json = serde_json::to_string(&notification).unwrap();
assert!(json.contains("item/completed"));
assert!(json.contains("Hello from Codex"));
assert!(json.contains("agentMessage"));
}
#[test]
fn test_codex_thread_item_variants() {
let user_msg = codex::ThreadItem::UserMessage {
content: vec![codex::UserInput::Text {
text: "Hello".to_string(),
text_elements: vec![],
}],
id: "user-1".to_string(),
};
let json = serde_json::to_string(&user_msg).unwrap();
assert!(json.contains("userMessage"));
assert!(json.contains("Hello"));
let cmd = codex::ThreadItem::CommandExecution {
aggregated_output: Some("output".to_string()),
command: "ls -la".to_string(),
command_actions: vec![],
cwd: "/tmp".to_string(),
duration_ms: Some(100),
exit_code: Some(0),
id: "cmd-1".to_string(),
process_id: None,
status: codex::CommandExecutionStatus::Completed,
};
let json = serde_json::to_string(&cmd).unwrap();
assert!(json.contains("commandExecution"));
assert!(json.contains("ls -la"));
}
#[test]
fn test_amp_message() {
let msg = amp::Message {
role: amp::MessageRole::User,
content: "Hello".to_string(),
tool_calls: vec![],
};
let json = serde_json::to_string(&msg).unwrap();
assert!(json.contains("user"));
assert!(json.contains("Hello"));
}

View file

@ -32,9 +32,13 @@ tracing.workspace = true
tracing-logfmt.workspace = true
tracing-subscriber.workspace = true
include_dir.workspace = true
tempfile = { workspace = true, optional = true }
[dev-dependencies]
http-body-util.workspace = true
insta.workspace = true
tempfile.workspace = true
tower.workspace = true
tempfile.workspace = true
[features]
test-utils = ["tempfile"]

View file

@ -1,6 +1,7 @@
//! Sandbox agent core utilities.
pub mod credentials;
mod agent_server_logs;
pub mod router;
pub mod telemetry;
pub mod ui;

View file

@ -1,6 +1,7 @@
use std::collections::HashMap;
use std::io::Write;
use std::path::PathBuf;
use std::sync::Arc;
use clap::{Args, Parser, Subcommand};
use reqwest::blocking::Client as HttpClient;
@ -16,7 +17,7 @@ use sandbox_agent::router::{
};
use sandbox_agent::telemetry;
use sandbox_agent::router::{AgentListResponse, AgentModesResponse, CreateSessionResponse, EventsResponse};
use sandbox_agent::router::build_router;
use sandbox_agent::router::{build_router_with_state, shutdown_servers};
use sandbox_agent::ui;
use serde::Serialize;
use serde_json::Value;
@ -352,8 +353,8 @@ fn run_server(cli: &Cli, server: &ServerArgs) -> Result<(), CliError> {
let agent_manager =
AgentManager::new(default_install_dir()).map_err(|err| CliError::Server(err.to_string()))?;
let state = AppState::new(auth, agent_manager);
let mut router = build_router(state);
let state = Arc::new(AppState::new(auth, agent_manager));
let (mut router, state) = build_router_with_state(state);
if let Some(cors) = build_cors_layer(server)? {
router = router.layer(cors);
@ -384,7 +385,12 @@ fn run_server(cli: &Cli, server: &ServerArgs) -> Result<(), CliError> {
} else {
tracing::info!("inspector ui not embedded; set SANDBOX_AGENT_SKIP_INSPECTOR=1 to skip embedding during builds");
}
let shutdown_state = state.clone();
axum::serve(listener, router)
.with_graceful_shutdown(async move {
let _ = tokio::signal::ctrl_c().await;
shutdown_servers(&shutdown_state).await;
})
.await
.map_err(|err| CliError::Server(err.to_string()))
})

View file

@ -1,3 +1,4 @@
#[path = "../common/mod.rs"]
mod common;
use common::*;
@ -29,8 +30,6 @@ async fn agent_basic_reply() {
"no events collected for {}",
config.agent.as_str()
);
expect_basic_sequence(&events);
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");

View file

@ -1,3 +1,4 @@
#[path = "../common/mod.rs"]
mod common;
use common::*;

View file

@ -1,3 +1,4 @@
#[path = "../common/mod.rs"]
mod common;
use common::*;

View file

@ -0,0 +1,6 @@
mod agent_basic_reply;
mod agent_multi_turn;
mod agent_permission_flow;
mod agent_question_flow;
mod agent_termination;
mod agent_tool_flow;

View file

@ -0,0 +1 @@
mod agents;

View file

@ -0,0 +1,2 @@
#[path = "agent-flows/mod.rs"]
mod agent_flows;

View file

@ -0,0 +1,2 @@
#[path = "agent-management/mod.rs"]
mod agent_management;

View file

@ -272,38 +272,6 @@ pub fn find_assistant_message_item(events: &[Value]) -> Option<String> {
})
}
pub fn event_sequence(event: &Value) -> Option<u64> {
event.get("sequence").and_then(Value::as_u64)
}
pub fn find_item_event_seq(events: &[Value], event_type: &str, item_id: &str) -> Option<u64> {
events.iter().find_map(|event| {
if event.get("type").and_then(Value::as_str) != Some(event_type) {
return None;
}
match event_type {
"item.delta" => {
let data = event.get("data")?;
let id = data.get("item_id")?.as_str()?;
if id == item_id {
event_sequence(event)
} else {
None
}
}
_ => {
let item = event.get("data")?.get("item")?;
let id = item.get("item_id")?.as_str()?;
if id == item_id {
event_sequence(event)
} else {
None
}
}
}
})
}
pub fn find_permission_id(events: &[Value]) -> Option<String> {
events.iter().find_map(|event| {
if event.get("type").and_then(Value::as_str) != Some("permission.requested") {
@ -372,17 +340,3 @@ pub fn has_tool_result(events: &[Value]) -> bool {
item.get("kind").and_then(Value::as_str) == Some("tool_result")
})
}
pub fn expect_basic_sequence(events: &[Value]) {
assert!(has_event_type(events, "session.started"), "session.started missing");
let item_id = find_assistant_message_item(events).expect("assistant message missing");
let started_seq = find_item_event_seq(events, "item.started", &item_id)
.expect("item.started missing");
// Intentionally require deltas here to validate our synthetic delta behavior.
let delta_seq = find_item_event_seq(events, "item.delta", &item_id)
.expect("item.delta missing");
let completed_seq = find_item_event_seq(events, "item.completed", &item_id)
.expect("item.completed missing");
assert!(started_seq < delta_seq, "item.started must precede delta");
assert!(delta_seq < completed_seq, "delta must precede completion");
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1016
expression: snapshot_status(status)
---
status: 204

View file

@ -0,0 +1,12 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1034
expression: normalize_agent_modes(&modes)
---
modes:
- description: true
id: build
name: Build
- description: true
id: plan
name: Plan

View file

@ -0,0 +1,7 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1053
expression: normalize_create_session(&created)
---
healthy: true
nativeSessionId: "<redacted>"

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1078
expression: snapshot_status(status)
---
status: 204

View file

@ -0,0 +1,17 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1119
expression: normalize_events(&permission_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started

View file

@ -1,19 +1,15 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1025
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1112
expression: normalize_events(&permission_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
@ -22,16 +18,12 @@ expression: normalize_events(&permission_events)
role: assistant
status: in_progress
seq: 3
source: daemon
synthetic: true
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
@ -40,6 +32,4 @@ expression: normalize_events(&permission_events)
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
type: item.completed

View file

@ -0,0 +1,11 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1152
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown permission id: missing-permission"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -0,0 +1,35 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1236
expression: normalize_events(&reject_events)
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -0,0 +1,11 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1276
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -0,0 +1,35 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1174
expression: normalize_events(&question_events)
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -0,0 +1,11 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1214
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,6 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1259
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1351
expression: snapshot
---
session_a:
@ -23,16 +23,16 @@ session_a:
role: assistant
status: in_progress
seq: 3
source: daemon
synthetic: true
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: daemon
synthetic: true
source: agent
synthetic: false
type: item.delta
- item:
content_types:
@ -64,16 +64,16 @@ session_b:
role: assistant
status: in_progress
seq: 3
source: daemon
synthetic: true
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: daemon
synthetic: true
source: agent
synthetic: false
type: item.delta
- item:
content_types:

View file

@ -0,0 +1,67 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1344
expression: snapshot
---
session_a:
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed
session_b:
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,6 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 742
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 811
expression: normalized
---
- metadata: true
@ -22,16 +22,16 @@ expression: normalized
role: assistant
status: in_progress
seq: 3
source: daemon
synthetic: true
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: daemon
synthetic: true
source: agent
synthetic: false
type: item.delta
- item:
content_types:

View file

@ -0,0 +1,35 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 804
expression: normalized
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,6 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 775
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 848
expression: normalized
---
- metadata: true
@ -22,16 +22,16 @@ expression: normalized
role: assistant
status: in_progress
seq: 3
source: daemon
synthetic: true
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: daemon
synthetic: true
source: agent
synthetic: false
type: item.delta
- item:
content_types:

View file

@ -1,5 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 848
expression: normalized
---
- metadata: true
@ -62,30 +63,11 @@ expression: normalized
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
content_types: []
kind: message
role: assistant
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 10
source: agent
synthetic: false
type: item.completed

View file

@ -0,0 +1,35 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 841
expression: normalized
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,136 @@
use std::sync::Arc;
use sandbox_agent::router::test_utils::{exit_status, spawn_sleep_process, TestHarness};
use sandbox_agent_agent_management::agents::AgentId;
use sandbox_agent_universal_agent_schema::SessionEndReason;
use tokio::time::{timeout, Duration};
async fn wait_for_exit(child: &Arc<std::sync::Mutex<Option<std::process::Child>>>) {
for _ in 0..20 {
let done = {
let mut guard = child.lock().expect("child lock");
match guard.as_mut() {
Some(child) => child.try_wait().ok().flatten().is_some(),
None => true,
}
};
if done {
return;
}
tokio::time::sleep(Duration::from_millis(50)).await;
}
}
#[tokio::test]
async fn register_and_unregister_sessions() {
let harness = TestHarness::new().await;
harness
.register_session(AgentId::Codex, "sess-1", Some("thread-1"))
.await;
assert!(
harness
.has_session_mapping(AgentId::Codex, "sess-1")
.await
);
assert_eq!(
harness
.native_mapping(AgentId::Codex, "thread-1")
.await
.as_deref(),
Some("sess-1")
);
harness
.unregister_session(AgentId::Codex, "sess-1", Some("thread-1"))
.await;
assert!(
!harness
.has_session_mapping(AgentId::Codex, "sess-1")
.await
);
assert!(
harness
.native_mapping(AgentId::Codex, "thread-1")
.await
.is_none()
);
}
#[tokio::test]
async fn shutdown_marks_servers_stopped_and_kills_child() {
let harness = TestHarness::new().await;
let child = harness
.insert_stdio_server(AgentId::Codex, Some(spawn_sleep_process()), 0)
.await;
harness.shutdown().await;
assert!(matches!(
harness.server_status(AgentId::Codex).await,
Some(sandbox_agent::router::ServerStatus::Stopped)
));
wait_for_exit(&child).await;
let exited = {
let mut guard = child.lock().expect("child lock");
guard
.as_mut()
.and_then(|child| child.try_wait().ok().flatten())
.is_some()
};
assert!(exited);
}
#[tokio::test]
async fn handle_process_exit_marks_error_and_ends_sessions() {
let harness = TestHarness::new().await;
harness
.insert_session("sess-1", AgentId::Codex, Some("thread-1"))
.await;
harness
.register_session(AgentId::Codex, "sess-1", Some("thread-1"))
.await;
harness
.insert_stdio_server(AgentId::Codex, None, 1)
.await;
harness
.handle_process_exit(AgentId::Codex, 1, exit_status(7))
.await;
assert!(matches!(
harness.server_status(AgentId::Codex).await,
Some(sandbox_agent::router::ServerStatus::Error)
));
assert!(
harness
.server_last_error(AgentId::Codex)
.await
.unwrap_or_default()
.contains("exited")
);
assert!(harness.session_ended("sess-1").await);
assert!(matches!(
harness.session_end_reason("sess-1").await,
Some(SessionEndReason::Error)
));
}
#[tokio::test]
async fn auto_restart_notifier_emits_signal() {
let harness = TestHarness::new().await;
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel();
harness.set_restart_notifier(tx).await;
harness.insert_http_server(AgentId::Mock, 2).await;
harness
.handle_process_exit(AgentId::Mock, 2, exit_status(2))
.await;
let received = timeout(Duration::from_millis(200), rx.recv())
.await
.expect("timeout");
assert_eq!(received, Some(AgentId::Mock));
}

View file

@ -0,0 +1 @@
mod agent_server_manager;

View file

@ -0,0 +1,2 @@
#[path = "server-manager/mod.rs"]
mod server_manager;

View file

@ -0,0 +1,2 @@
#[path = "ui/mod.rs"]
mod ui;

View file

@ -0,0 +1 @@
mod inspector_ui;

View file

@ -7,3 +7,6 @@
- [x] Add inspector UI mode for turn stream and wire send flow.
- [x] Refresh docs for new endpoint and UI mode.
- [x] Add Docker/Vercel/Daytona/E2B examples with basic prompt scripts and tests.
- [x] Add unified AgentServerManager for shared agent servers (Codex/OpenCode).
- [x] Expose server status details in agent list API (uptime/restarts/last error/base URL).
- [ ] Regenerate TypeScript SDK from updated OpenAPI (blocked: Node/pnpm not available in env).