diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7796b42..3353830 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -15,15 +15,19 @@ on: defaults: run: + # Enable fail-fast behavior shell: bash -e {0} env: + # Disable incremental compilation for faster from-scratch builds CARGO_INCREMENTAL: 0 jobs: setup: name: "Setup" runs-on: ubuntu-24.04 + permissions: + contents: write steps: - uses: actions/checkout@v4 with: @@ -38,19 +42,28 @@ jobs: node-version: 20 cache: pnpm - - name: Install tsx - run: npm install -g tsx - - - name: Run setup phase + - name: Setup + env: + R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }} + R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }} run: | - CMD="./scripts/release/main.ts --version '${{ inputs.version }}' --phase setup-ci" + # Install dependencies + pnpm install + + # Install tsx globally + npm install -g tsx + + # Build command based on inputs + CMD="./scripts/release/main.ts --version \"${{ github.event.inputs.version }}\" --phase setup-ci" + if [ "${{ inputs.latest }}" != "true" ]; then CMD="$CMD --no-latest" fi + eval "$CMD" binaries: - name: "Build Binaries" + name: "Build & Upload Binaries" needs: [setup] strategy: matrix: @@ -97,15 +110,34 @@ jobs: docker/release/build.sh ${{ matrix.target }} ls -la dist/ - - name: Upload artifact - uses: actions/upload-artifact@v4 - with: - name: binary-${{ matrix.target }} - path: dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }} + - name: Upload to R2 + env: + AWS_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }} + run: | + # Install AWS CLI + sudo apt-get update + sudo apt-get install -y unzip curl + + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" + unzip awscliv2.zip + sudo ./aws/install --update + + COMMIT_SHA_SHORT="${GITHUB_SHA::7}" + BINARY_PATH="dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" + + # Upload to commit directory for later promotion + aws s3 cp \ + "${BINARY_PATH}" \ + "s3://rivet-releases/sandbox-agent/${COMMIT_SHA_SHORT}/binaries/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" \ + --region auto \ + --endpoint-url https://2a94c6a0ced8d35ea63cddc86c2681e7.r2.cloudflarestorage.com \ + --checksum-algorithm CRC32 complete: name: "Complete" needs: [setup, binaries] + if: ${{ always() && !cancelled() && needs.setup.result == 'success' && needs.binaries.result == 'success' }} runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 @@ -122,37 +154,31 @@ jobs: registry-url: "https://registry.npmjs.org" cache: pnpm - - name: Install tsx - run: npm install -g tsx - - - name: Install AWS CLI - run: | - sudo apt-get update - sudo apt-get install -y unzip curl - - curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" - unzip awscliv2.zip - sudo ./aws/install --update - - - name: Download binaries - uses: actions/download-artifact@v4 - with: - path: dist/ - pattern: binary-* - merge-multiple: true - - - name: List downloaded binaries - run: ls -la dist/ - - - name: Publish & upload artifacts + - name: Complete env: + # https://cli.github.com/manual/gh_help_environment + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }} R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }} run: | - CMD="./scripts/release/main.ts --version '${{ inputs.version }}' --phase complete-ci --no-validate-git" + # Authenticate with NPM + cat << EOF > ~/.npmrc + //registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }} + EOF + + # Install dependencies + pnpm install + + # Install tsx globally + npm install -g tsx + + # Build command based on inputs + CMD="./scripts/release/main.ts --version \"${{ github.event.inputs.version }}\" --phase complete-ci --no-validate-git" + if [ "${{ inputs.latest }}" != "true" ]; then CMD="$CMD --no-latest" fi + eval "$CMD" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d12ca50..be30073 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,7 +6,18 @@ Documentation lives in `docs/` (Mintlify). Start with: - `docs/quickstart.mdx` to run the daemon - `docs/http-api.mdx` and `docs/cli.mdx` for API references -Quickstart (local dev): +## Development Setup + +### Prerequisites + +- Rust (latest stable) +- Node.js 20+ +- pnpm 9+ +- [just](https://github.com/casey/just) (optional, but recommended) + +### Quickstart + +Run the agent locally: ```bash sandbox-agent --token "$SANDBOX_TOKEN" --host 127.0.0.1 --port 2468 @@ -26,5 +37,112 @@ Run the web console (includes all dependencies): ```bash pnpm dev -F @sandbox-agent/inspector +# or +just dev ``` +### Common Commands + +```bash +# Run checks (cargo check, fmt, typecheck) +just check + +# Run tests +just test + +# Format code +just fmt + +# Build the agent +just build +``` + +## Releasing + +Releases are managed through a release script that handles version bumps, artifact uploads, npm/crates.io publishing, and GitHub releases. + +### Prerequisites + +1. Install dependencies in the release script directory: + ```bash + cd scripts/release && pnpm install && cd ../.. + ``` + +2. Ensure you have the following configured: + - `gh` CLI authenticated + - npm authenticated (`npm login`) + - `CARGO_REGISTRY_TOKEN` for crates.io (or run `cargo login`) + - R2 credentials: `R2_RELEASES_ACCESS_KEY_ID` and `R2_RELEASES_SECRET_ACCESS_KEY` + (or 1Password CLI for local dev) + +### Release Commands + +```bash +# Release with automatic patch bump +just release --patch + +# Release with minor bump +just release --minor + +# Release with specific version +just release --version 0.2.0 + +# Release a pre-release +just release --version 0.2.0-rc.1 --no-latest +``` + +### Release Flow + +The release process has three phases: + +**1. setup-local** (runs locally via `just release`): +- Confirms release details with user +- Runs local checks (cargo check, fmt, typecheck) +- Updates version numbers across all packages +- Generates artifacts (OpenAPI spec, TypeScript SDK) +- Commits and pushes changes +- Triggers the GitHub Actions release workflow + +**2. setup-ci** (runs in CI): +- Runs full test suite (Rust + TypeScript) +- Builds TypeScript SDK and uploads to R2 at `sandbox-agent/{commit}/typescript/` + +**3. binaries** (runs in CI, parallel with setup-ci completing): +- Builds binaries for all platforms via Docker cross-compilation +- Uploads binaries to R2 at `sandbox-agent/{commit}/binaries/` + +**4. complete-ci** (runs in CI after setup + binaries): +- Publishes crates to crates.io +- Publishes npm packages (SDK + CLI) +- Promotes artifacts from `{commit}/` to `{version}/` (S3-to-S3 copy) +- Creates git tag and pushes +- Creates GitHub release with auto-generated notes + +### Manual Steps + +To run specific steps manually: + +```bash +# Run only local checks +cd scripts/release && pnpm exec tsx ./main.ts --version 0.1.0 --only-steps run-local-checks + +# Build binaries locally +just release-build-all +``` + +## Project Structure + +``` +sandbox-daemon/ +├── server/packages/ # Rust crates +│ ├── sandbox-agent/ # Main agent binary +│ ├── agent-schema/ # Agent-specific schemas (Claude, Codex, etc.) +│ └── ... +├── sdks/ +│ ├── typescript/ # TypeScript SDK (npm: sandbox-agent) +│ └── cli/ # CLI wrapper (npm: @sandbox-agent/cli) +├── frontend/packages/ +│ └── inspector/ # Web console UI +├── docs/ # Mintlify documentation +└── scripts/release/ # Release automation +``` diff --git a/docs/deploy/index.mdx b/docs/deploy/index.mdx new file mode 100644 index 0000000..aa4b1b3 --- /dev/null +++ b/docs/deploy/index.mdx @@ -0,0 +1,4 @@ +--- +sidebarTitle: Overview +--- + diff --git a/docs/docs.json b/docs/docs.json index e4e2e52..bbf3174 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -42,7 +42,7 @@ "universal-api", "frontend", "building-chat-ui", - "persisting-chat-logs" + "manage-session-state" ] }, { diff --git a/docs/persisting-chat-logs.mdx b/docs/manage-sessions.mdx similarity index 77% rename from docs/persisting-chat-logs.mdx rename to docs/manage-sessions.mdx index 94b1195..6424a72 100644 --- a/docs/persisting-chat-logs.mdx +++ b/docs/manage-sessions.mdx @@ -1,9 +1,9 @@ --- -title: "Persisting Chat Logs" -description: "Persist event streams so you can resume sessions and keep durable chat history." +title: "Manage Session State" +description: "TODO" --- -Persisting chat logs is easiest when you treat the event stream as the source of truth. +TODO ## Recommended approach diff --git a/justfile b/justfile index 7a76c29..d426bb4 100644 --- a/justfile +++ b/justfile @@ -1,42 +1,48 @@ set dotenv-load := true +# ============================================================================= +# Release +# ============================================================================= + +[group('release')] +release *ARGS: + cd scripts/release && pnpm exec tsx ./main.ts --phase setup-local {{ ARGS }} + # Build a single target via Docker +[group('release')] release-build target="x86_64-unknown-linux-musl": - ./docker/release/build.sh {{target}} + ./docker/release/build.sh {{target}} # Build all release binaries +[group('release')] release-build-all: - ./docker/release/build.sh x86_64-unknown-linux-musl - ./docker/release/build.sh x86_64-pc-windows-gnu - ./docker/release/build.sh x86_64-apple-darwin - ./docker/release/build.sh aarch64-apple-darwin + ./docker/release/build.sh x86_64-unknown-linux-musl + ./docker/release/build.sh x86_64-pc-windows-gnu + ./docker/release/build.sh x86_64-apple-darwin + ./docker/release/build.sh aarch64-apple-darwin -# Upload binaries from dist/ (requires AWS creds + aws cli) -release-upload-binaries version latest="auto": - {{~ if latest == "auto" ~}} - npx tsx scripts/release/main.ts --version {{version}} --upload-binaries - {{~ else if latest == "true" ~}} - npx tsx scripts/release/main.ts --version {{version}} --latest --upload-binaries - {{~ else if latest == "false" ~}} - npx tsx scripts/release/main.ts --version {{version}} --no-latest --upload-binaries - {{~ else ~}} - @echo "latest must be auto|true|false" && exit 1 - {{~ endif ~}} +# ============================================================================= +# Development +# ============================================================================= -# Upload TypeScript artifacts + install.sh -release-upload-artifacts version latest="auto": - {{~ if latest == "auto" ~}} - npx tsx scripts/release/main.ts --version {{version}} --upload-typescript --upload-install - {{~ else if latest == "true" ~}} - npx tsx scripts/release/main.ts --version {{version}} --latest --upload-typescript --upload-install - {{~ else if latest == "false" ~}} - npx tsx scripts/release/main.ts --version {{version}} --no-latest --upload-typescript --upload-install - {{~ else ~}} - @echo "latest must be auto|true|false" && exit 1 - {{~ endif ~}} +[group('dev')] +dev: + pnpm dev -F @sandbox-agent/inspector -# Full local release test: build all, then upload binaries + artifacts -release-test version latest="auto": - just release-build-all - just release-upload-binaries {{version}} {{latest}} - just release-upload-artifacts {{version}} {{latest}} +[group('dev')] +build: + cargo build -p sandbox-agent + +[group('dev')] +test: + cargo test --all-targets + +[group('dev')] +check: + cargo check --all-targets + cargo fmt --all -- --check + pnpm run typecheck + +[group('dev')] +fmt: + cargo fmt --all diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index bb23b61..a0b2856 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -53,10 +53,10 @@ importers: dependencies: '@anthropic-ai/claude-code': specifier: latest - version: 2.1.19 + version: 2.1.20 '@openai/codex': specifier: latest - version: 0.89.0 + version: 0.92.0 cheerio: specifier: ^1.0.0 version: 1.2.0 @@ -105,6 +105,34 @@ importers: specifier: ^4.19.0 version: 4.21.0 + scripts/release: + dependencies: + commander: + specifier: ^12.1.0 + version: 12.1.0 + execa: + specifier: ^9.5.0 + version: 9.6.1 + glob: + specifier: ^10.3.10 + version: 10.5.0 + semver: + specifier: ^7.6.0 + version: 7.7.3 + devDependencies: + '@types/node': + specifier: ^22.0.0 + version: 22.19.7 + '@types/semver': + specifier: ^7.5.8 + version: 7.7.1 + tsx: + specifier: ^4.19.0 + version: 4.21.0 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + sdks/cli: devDependencies: vitest: @@ -139,8 +167,8 @@ importers: packages: - '@anthropic-ai/claude-code@2.1.19': - resolution: {integrity: sha512-/bUlQuX/6nKr1Zqfi/9Q6xf7WonUBk72ZfKKENU4WVrIFWqTv/0JJsoW/dHol9QBNHvyfKIeBbYu4avHNRAnuQ==} + '@anthropic-ai/claude-code@2.1.20': + resolution: {integrity: sha512-5r9OEF5TTmkhOKWtJ9RYqdn/vchwQWABO3dvgZVXftqlBZV/IiKjHVISu0dKtqWzByLBolchwePrhY68ul0QrA==} engines: {node: '>=18.0.0'} hasBin: true @@ -652,11 +680,15 @@ packages: resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==} engines: {node: '>= 8'} - '@openai/codex@0.89.0': - resolution: {integrity: sha512-mIX0FLKTT26sWqLcpwb2GvRI89snDNvUbgTxEtrPMP/wXRtYasTLROY0UBL1qLHVrm532mU4RLepNITqBPvAOQ==} + '@openai/codex@0.92.0': + resolution: {integrity: sha512-DR9A2QlJDtEpMwqUGMIztTCzzCYTVrM7rqG3XuMVURnQ4b7XrScmY5RnSUuUZ/ga7wDTqw0BTmVzPurm4NX3Tw==} engines: {node: '>=16'} hasBin: true + '@pkgjs/parseargs@0.11.0': + resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==} + engines: {node: '>=14'} + '@rolldown/pluginutils@1.0.0-beta.27': resolution: {integrity: sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==} @@ -785,6 +817,13 @@ packages: cpu: [x64] os: [win32] + '@sec-ant/readable-stream@0.4.1': + resolution: {integrity: sha512-831qok9r2t8AlxLko40y2ebgSDhenenCatLVeW/uBtnHPyhHOvG0C7TvfgecV+wHzIm5KUICgzmVpWS+IMEAeg==} + + '@sindresorhus/merge-streams@4.0.0': + resolution: {integrity: sha512-tlqY9xq5ukxTUZBmoOp+m61cqwQD5pHJtFY3Mn8CA8ps6yghLH/Hw8UPdqg4OLmFW3IFlcXnQNmo/dh8HzXYIQ==} + engines: {node: '>=18'} + '@types/babel__core@7.20.5': resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==} @@ -896,6 +935,9 @@ packages: resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} engines: {node: '>=12'} + balanced-match@1.0.2: + resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + baseline-browser-mapping@2.9.18: resolution: {integrity: sha512-e23vBV1ZLfjb9apvfPk4rHVu2ry6RIr2Wfs+O324okSidrX7pTAnEJPCh/O5BtRlr7QtZI7ktOP3vsqr7Z5XoA==} hasBin: true @@ -903,6 +945,9 @@ packages: boolbase@1.0.0: resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==} + brace-expansion@2.0.2: + resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==} + braces@3.0.3: resolution: {integrity: sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==} engines: {node: '>=8'} @@ -955,6 +1000,10 @@ packages: color-name@1.1.4: resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} + commander@12.1.0: + resolution: {integrity: sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==} + engines: {node: '>=18'} + commander@13.1.0: resolution: {integrity: sha512-/rFeCpNJQbhSZjGVwO9RFV3xPqbnERS8MmIQzCtD/zl6gpJuV/bMLuN92oG3F7d8oDEHHRrujSXNUr8fpjntKw==} engines: {node: '>=18'} @@ -1060,6 +1109,10 @@ packages: estree-walker@3.0.3: resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==} + execa@9.6.1: + resolution: {integrity: sha512-9Be3ZoN4LmYR90tUoVu2te2BsbzHfhJyfEiAVfz7N5/zv+jduIfLrV2xdQXOHbaD6KgpGdO9PRPM1Y4Q9QkPkA==} + engines: {node: ^18.19.0 || >=20.5.0} + expect-type@1.3.0: resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==} engines: {node: '>=12.0.0'} @@ -1080,6 +1133,10 @@ packages: picomatch: optional: true + figures@6.1.0: + resolution: {integrity: sha512-d+l3qxjSesT4V7v2fh+QnmFnUWv9lSpjarhShNTgBOfA0ttejbQUAlHLitbjkoRiDulW0OPoQPYIGhIC8ohejg==} + engines: {node: '>=18'} + fill-range@7.1.1: resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==} engines: {node: '>=8'} @@ -1100,6 +1157,10 @@ packages: resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==} engines: {node: '>=6.9.0'} + get-stream@9.0.1: + resolution: {integrity: sha512-kVCxPF3vQM/N0B1PmoqVUqgHP+EeVjmZSQn+1oCRPxd2P21P2F19lIgbR3HBosbB1PUhOAoctJnfEn2GbN2eZA==} + engines: {node: '>=18'} + get-tsconfig@4.13.0: resolution: {integrity: sha512-1VKTZJCwBrvbd+Wn3AOgQP/2Av+TfTCOlE4AcRJE72W1ksZXbAx8PPBR9RzgTeSPzlPMHrbANMH3LbltH73wxQ==} @@ -1107,6 +1168,10 @@ packages: resolution: {integrity: sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==} engines: {node: '>= 6'} + glob@10.5.0: + resolution: {integrity: sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==} + hasBin: true + glob@11.1.0: resolution: {integrity: sha512-vuNwKSaKiqm7g0THUBu2x7ckSs3XJLXE+2ssL7/MfTGPLLcrJQ/4Uq1CjPTtO5cCIiRxqvN6Twy1qOwhL0Xjcw==} engines: {node: 20 || >=22} @@ -1115,6 +1180,10 @@ packages: htmlparser2@10.1.0: resolution: {integrity: sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==} + human-signals@8.0.1: + resolution: {integrity: sha512-eKCa6bwnJhvxj14kZk5NCPc6Hb6BdsU9DZcOnmQKSnO1VKrfV0zCvtttPZUsBvjmNDn8rpcJfpwSYnHBjc95MQ==} + engines: {node: '>=18.18.0'} + iconv-lite@0.6.3: resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==} engines: {node: '>=0.10.0'} @@ -1135,9 +1204,24 @@ packages: resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==} engines: {node: '>=0.12.0'} + is-plain-obj@4.1.0: + resolution: {integrity: sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==} + engines: {node: '>=12'} + + is-stream@4.0.1: + resolution: {integrity: sha512-Dnz92NInDqYckGEUJv689RbRiTSEHCQ7wOVeALbkOz999YpqT46yMRIGtSNl2iCL1waAZSx40+h59NV/EwzV/A==} + engines: {node: '>=18'} + + is-unicode-supported@2.1.0: + resolution: {integrity: sha512-mE00Gnza5EEB3Ds0HfMyllZzbBrmLOX3vfWoj9A9PEnTfratQ/BcaJOuMhnkhjXvb2+FkY3VuHqtAGpTPmglFQ==} + engines: {node: '>=18'} + isexe@2.0.0: resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} + jackspeak@3.4.3: + resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==} + jackspeak@4.1.1: resolution: {integrity: sha512-zptv57P3GpL+O0I7VdMJNBZCu+BPHVQUk55Ft8/QCJjTVxrnJHuVuX/0Bl2A6/+2oyR/ZMEuFKwmzqqZ/U5nPQ==} engines: {node: 20 || >=22} @@ -1184,6 +1268,9 @@ packages: loupe@3.2.1: resolution: {integrity: sha512-CdzqowRJCeLU72bHvWqwRBBlLcMEtIvGrlvef74kMnV2AolS9Y8xUv1I0U/MNAWMhBlKIoyuEgoJ0t/bbwHbLQ==} + lru-cache@10.4.3: + resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==} + lru-cache@11.2.4: resolution: {integrity: sha512-B5Y16Jr9LB9dHVkh6ZevG+vAbOsNOYCX+sXvFWFu7B3Iz5mijW3zdbMyhsh8ANd2mSWBYdJgnqi+mL7/LrOPYg==} engines: {node: 20 || >=22} @@ -1211,6 +1298,10 @@ packages: resolution: {integrity: sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==} engines: {node: 20 || >=22} + minimatch@9.0.5: + resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==} + engines: {node: '>=16 || 14 >=14.17'} + minipass@7.1.2: resolution: {integrity: sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==} engines: {node: '>=16 || 14 >=14.17'} @@ -1240,6 +1331,10 @@ packages: resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==} engines: {node: '>=0.10.0'} + npm-run-path@6.0.0: + resolution: {integrity: sha512-9qny7Z9DsQU8Ou39ERsPU4OZQlSTP47ShQzuKZ6PRXpYLtIFgl/DEBYEXKlvcEa+9tHVcK8CF81Y2V72qaZhWA==} + engines: {node: '>=18'} + nth-check@2.1.1: resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} @@ -1254,6 +1349,10 @@ packages: package-json-from-dist@1.0.1: resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==} + parse-ms@4.0.0: + resolution: {integrity: sha512-TXfryirbmq34y8QBwgqCVLi+8oA3oWx2eAnSn62ITyEhEYaWRlVZ2DvMM9eZbMs/RfxPu/PK/aBLyGj4IrqMHw==} + engines: {node: '>=18'} + parse5-htmlparser2-tree-adapter@7.1.0: resolution: {integrity: sha512-ruw5xyKs6lrpo9x9rCZqZZnIUntICjQAd0Wsmp396Ul9lN/h+ifgVV1x1gZHi8euej6wTfpqX8j+BFQxF0NS/g==} @@ -1267,6 +1366,14 @@ packages: resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} engines: {node: '>=8'} + path-key@4.0.0: + resolution: {integrity: sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ==} + engines: {node: '>=12'} + + path-scurry@1.11.1: + resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==} + engines: {node: '>=16 || 14 >=14.18'} + path-scurry@2.0.1: resolution: {integrity: sha512-oWyT4gICAu+kaA7QWk/jvCHWarMKNs6pXOGWKDTr7cw4IGcUbW+PeTfbaQiLGheFRpjo6O9J0PmyMfQPjH71oA==} engines: {node: 20 || >=22} @@ -1318,6 +1425,10 @@ packages: resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==} engines: {node: ^10 || ^12 || >=14} + pretty-ms@9.3.0: + resolution: {integrity: sha512-gjVS5hOP+M3wMm5nmNOucbIrqudzs9v/57bWRHQWLYklXqoXKrVfYW2W9+glfGsqtPgpiz5WwyEEB+ksXIx3gQ==} + engines: {node: '>=18'} + queue-microtask@1.2.3: resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==} @@ -1421,6 +1532,10 @@ packages: resolution: {integrity: sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA==} engines: {node: '>=12'} + strip-final-newline@4.0.0: + resolution: {integrity: sha512-aulFJcD6YK8V1G7iRB5tigAP4TsHBZZrOV8pjV++zdUwmeV8uzbY7yn6h9MswN62adStNZFuCIx4haBnRuMDaw==} + engines: {node: '>=18'} + strip-literal@3.1.0: resolution: {integrity: sha512-8r3mkIM/2+PpjHoOtiAW8Rg3jJLHaV7xPwG+YRGrv6FP0wwk/toTpATxWYOW0BKdWwl82VT2tFYi5DlROa0Mxg==} @@ -1562,6 +1677,10 @@ packages: resolution: {integrity: sha512-Gpq0iNm5M6cQWlyHQv9MV+uOj1jWk7LpkoE5vSp/7zjb4zMdAcUD+VL5y0nH4p9EbUklq00eVIIX/XcDHzu5xg==} engines: {node: '>=20.18.1'} + unicorn-magic@0.3.0: + resolution: {integrity: sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==} + engines: {node: '>=18'} + update-browserslist-db@1.2.3: resolution: {integrity: sha512-Js0m9cx+qOgDxo0eMiFGEueWztz+d4+M3rGlmKPT+T4IS/jP4ylw3Nwpu6cpTTP8R1MAC1kF4VbdLt3ARf209w==} hasBin: true @@ -1670,9 +1789,13 @@ packages: resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==} engines: {node: '>=12'} + yoctocolors@2.1.2: + resolution: {integrity: sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug==} + engines: {node: '>=18'} + snapshots: - '@anthropic-ai/claude-code@2.1.19': + '@anthropic-ai/claude-code@2.1.20': optionalDependencies: '@img/sharp-darwin-arm64': 0.33.5 '@img/sharp-darwin-x64': 0.33.5 @@ -2053,7 +2176,10 @@ snapshots: '@nodelib/fs.scandir': 2.1.5 fastq: 1.20.1 - '@openai/codex@0.89.0': {} + '@openai/codex@0.92.0': {} + + '@pkgjs/parseargs@0.11.0': + optional: true '@rolldown/pluginutils@1.0.0-beta.27': {} @@ -2132,6 +2258,10 @@ snapshots: '@rollup/rollup-win32-x64-msvc@4.56.0': optional: true + '@sec-ant/readable-stream@0.4.1': {} + + '@sindresorhus/merge-streams@4.0.0': {} + '@types/babel__core@7.20.5': dependencies: '@babel/parser': 7.28.6 @@ -2255,10 +2385,16 @@ snapshots: assertion-error@2.0.1: {} + balanced-match@1.0.2: {} + baseline-browser-mapping@2.9.18: {} boolbase@1.0.0: {} + brace-expansion@2.0.2: + dependencies: + balanced-match: 1.0.2 + braces@3.0.3: dependencies: fill-range: 7.1.1 @@ -2325,6 +2461,8 @@ snapshots: color-name@1.1.4: {} + commander@12.1.0: {} + commander@13.1.0: {} commander@4.1.1: {} @@ -2459,6 +2597,21 @@ snapshots: dependencies: '@types/estree': 1.0.8 + execa@9.6.1: + dependencies: + '@sindresorhus/merge-streams': 4.0.0 + cross-spawn: 7.0.6 + figures: 6.1.0 + get-stream: 9.0.1 + human-signals: 8.0.1 + is-plain-obj: 4.1.0 + is-stream: 4.0.1 + npm-run-path: 6.0.0 + pretty-ms: 9.3.0 + signal-exit: 4.1.0 + strip-final-newline: 4.0.0 + yoctocolors: 2.1.2 + expect-type@1.3.0: {} fast-glob@3.3.3: @@ -2477,6 +2630,10 @@ snapshots: optionalDependencies: picomatch: 4.0.3 + figures@6.1.0: + dependencies: + is-unicode-supported: 2.1.0 + fill-range@7.1.1: dependencies: to-regex-range: 5.0.1 @@ -2497,6 +2654,11 @@ snapshots: gensync@1.0.0-beta.2: {} + get-stream@9.0.1: + dependencies: + '@sec-ant/readable-stream': 0.4.1 + is-stream: 4.0.1 + get-tsconfig@4.13.0: dependencies: resolve-pkg-maps: 1.0.0 @@ -2505,6 +2667,15 @@ snapshots: dependencies: is-glob: 4.0.3 + glob@10.5.0: + dependencies: + foreground-child: 3.3.1 + jackspeak: 3.4.3 + minimatch: 9.0.5 + minipass: 7.1.2 + package-json-from-dist: 1.0.1 + path-scurry: 1.11.1 + glob@11.1.0: dependencies: foreground-child: 3.3.1 @@ -2521,6 +2692,8 @@ snapshots: domutils: 3.2.2 entities: 7.0.1 + human-signals@8.0.1: {} + iconv-lite@0.6.3: dependencies: safer-buffer: 2.1.2 @@ -2535,8 +2708,20 @@ snapshots: is-number@7.0.0: {} + is-plain-obj@4.1.0: {} + + is-stream@4.0.1: {} + + is-unicode-supported@2.1.0: {} + isexe@2.0.0: {} + jackspeak@3.4.3: + dependencies: + '@isaacs/cliui': 8.0.2 + optionalDependencies: + '@pkgjs/parseargs': 0.11.0 + jackspeak@4.1.1: dependencies: '@isaacs/cliui': 8.0.2 @@ -2567,6 +2752,8 @@ snapshots: loupe@3.2.1: {} + lru-cache@10.4.3: {} + lru-cache@11.2.4: {} lru-cache@5.1.1: @@ -2592,6 +2779,10 @@ snapshots: dependencies: '@isaacs/brace-expansion': 5.0.0 + minimatch@9.0.5: + dependencies: + brace-expansion: 2.0.2 + minipass@7.1.2: {} minizlib@3.1.0: @@ -2619,6 +2810,11 @@ snapshots: normalize-path@3.0.0: {} + npm-run-path@6.0.0: + dependencies: + path-key: 4.0.0 + unicorn-magic: 0.3.0 + nth-check@2.1.1: dependencies: boolbase: 1.0.0 @@ -2636,6 +2832,8 @@ snapshots: package-json-from-dist@1.0.1: {} + parse-ms@4.0.0: {} + parse5-htmlparser2-tree-adapter@7.1.0: dependencies: domhandler: 5.0.3 @@ -2651,6 +2849,13 @@ snapshots: path-key@3.1.1: {} + path-key@4.0.0: {} + + path-scurry@1.11.1: + dependencies: + lru-cache: 10.4.3 + minipass: 7.1.2 + path-scurry@2.0.1: dependencies: lru-cache: 11.2.4 @@ -2687,6 +2892,10 @@ snapshots: picocolors: 1.1.1 source-map-js: 1.2.1 + pretty-ms@9.3.0: + dependencies: + parse-ms: 4.0.0 + queue-microtask@1.2.3: {} react-dom@18.3.1(react@18.3.1): @@ -2794,6 +3003,8 @@ snapshots: dependencies: ansi-regex: 6.2.2 + strip-final-newline@4.0.0: {} + strip-literal@3.1.0: dependencies: js-tokens: 9.0.1 @@ -2936,6 +3147,8 @@ snapshots: undici@7.19.1: {} + unicorn-magic@0.3.0: {} + update-browserslist-db@1.2.3(browserslist@4.28.1): dependencies: browserslist: 4.28.1 @@ -3039,3 +3252,5 @@ snapshots: yallist@5.0.0: {} yargs-parser@21.1.1: {} + + yoctocolors@2.1.2: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index d4d687a..83eba22 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -5,3 +5,4 @@ packages: - "sdks/cli/platforms/*" - "resources/agent-schemas" - "resources/vercel-ai-sdk-schemas" + - "scripts/release" diff --git a/server/CLAUDE.md b/server/CLAUDE.md index fa4023f..1167240 100644 --- a/server/CLAUDE.md +++ b/server/CLAUDE.md @@ -1,12 +1,27 @@ # Server Testing +## Test placement + +Place all new tests under `server/packages/**/tests/` (or a package-specific `tests/` folder). Avoid inline tests inside source files unless there is no viable alternative. + +## Test locations (overview) + +- Sandbox-agent integration tests live under `server/packages/sandbox-agent/tests/`: + - Agent flow coverage in `agent-flows/` + - Agent management coverage in `agent-management/` + - Shared server manager coverage in `server-manager/` + - HTTP/SSE and snapshot coverage in `http/` (snapshots in `http/snapshots/`) + - UI coverage in `ui/` + - Shared helpers in `common/` +- Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/` + ## Snapshot tests -The HTTP/SSE snapshot suite lives in: -- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` +The HTTP/SSE snapshot suite entrypoint lives in: +- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` (includes `tests/http/http_sse_snapshots.rs`) Snapshots are written to: -- `server/packages/sandbox-agent/tests/snapshots/` +- `server/packages/sandbox-agent/tests/http/snapshots/` ## Agent selection @@ -47,9 +62,20 @@ Health checks run in a blocking thread to avoid Tokio runtime drop errors inside ## Snapshot stability To keep snapshots deterministic: +- Use the mock agent as the **master** event sequence; all other agents must match its behavior 1:1. +- Snapshots should compare a **canonical event skeleton** (event order matters) with strict ordering across: + - `item.started` → `item.delta` → `item.completed` + - presence/absence of `session.ended` + - permission/question request and resolution flows +- Scrub non-deterministic fields from snapshots: + - IDs, timestamps, native IDs + - text content, tool inputs/outputs, provider-specific metadata + - `source` and `synthetic` flags (these are implementation details) +- The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly. - Event streams are truncated after the first assistant or error event. - Permission flow snapshots are truncated after the permission request (or first assistant) event. - Unknown events are preserved as `kind: unknown` (raw payload in universal schema). +- Prefer snapshot-based event skeleton assertions over manual event-order assertions in tests. ## Typical commands diff --git a/server/packages/extracted-agent-schemas/src/lib.rs b/server/packages/extracted-agent-schemas/src/lib.rs index d4f6069..f72a064 100644 --- a/server/packages/extracted-agent-schemas/src/lib.rs +++ b/server/packages/extracted-agent-schemas/src/lib.rs @@ -25,87 +25,3 @@ pub mod amp { //! AMP Code SDK types. include!(concat!(env!("OUT_DIR"), "/amp.rs")); } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_claude_bash_input() { - let input = claude::BashInput { - command: "ls -la".to_string(), - timeout: Some(5000.0), - working_directory: None, - }; - - let json = serde_json::to_string(&input).unwrap(); - assert!(json.contains("ls -la")); - - let parsed: claude::BashInput = serde_json::from_str(&json).unwrap(); - assert_eq!(parsed.command, "ls -la"); - } - - #[test] - fn test_codex_server_notification() { - // Test ItemCompletedNotification with AgentMessage - let notification = codex::ServerNotification::ItemCompleted( - codex::ItemCompletedNotification { - item: codex::ThreadItem::AgentMessage { - id: "msg-123".to_string(), - text: "Hello from Codex".to_string(), - }, - thread_id: "thread-123".to_string(), - turn_id: "turn-456".to_string(), - } - ); - - let json = serde_json::to_string(¬ification).unwrap(); - assert!(json.contains("item/completed")); - assert!(json.contains("Hello from Codex")); - assert!(json.contains("agentMessage")); - } - - #[test] - fn test_codex_thread_item_variants() { - // Test UserMessage variant - let user_msg = codex::ThreadItem::UserMessage { - content: vec![codex::UserInput::Text { - text: "Hello".to_string(), - text_elements: vec![], - }], - id: "user-1".to_string(), - }; - let json = serde_json::to_string(&user_msg).unwrap(); - assert!(json.contains("userMessage")); - assert!(json.contains("Hello")); - - // Test CommandExecution variant - let cmd = codex::ThreadItem::CommandExecution { - aggregated_output: Some("output".to_string()), - command: "ls -la".to_string(), - command_actions: vec![], - cwd: "/tmp".to_string(), - duration_ms: Some(100), - exit_code: Some(0), - id: "cmd-1".to_string(), - process_id: None, - status: codex::CommandExecutionStatus::Completed, - }; - let json = serde_json::to_string(&cmd).unwrap(); - assert!(json.contains("commandExecution")); - assert!(json.contains("ls -la")); - } - - #[test] - fn test_amp_message() { - let msg = amp::Message { - role: amp::MessageRole::User, - content: "Hello".to_string(), - tool_calls: vec![], - }; - - let json = serde_json::to_string(&msg).unwrap(); - assert!(json.contains("user")); - assert!(json.contains("Hello")); - } -} diff --git a/server/packages/extracted-agent-schemas/tests/schema_roundtrip.rs b/server/packages/extracted-agent-schemas/tests/schema_roundtrip.rs new file mode 100644 index 0000000..7c992b0 --- /dev/null +++ b/server/packages/extracted-agent-schemas/tests/schema_roundtrip.rs @@ -0,0 +1,77 @@ +use sandbox_agent_extracted_agent_schemas::{amp, claude, codex}; + +#[test] +fn test_claude_bash_input() { + let input = claude::BashInput { + command: "ls -la".to_string(), + timeout: Some(5000.0), + working_directory: None, + }; + + let json = serde_json::to_string(&input).unwrap(); + assert!(json.contains("ls -la")); + + let parsed: claude::BashInput = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed.command, "ls -la"); +} + +#[test] +fn test_codex_server_notification() { + let notification = codex::ServerNotification::ItemCompleted( + codex::ItemCompletedNotification { + item: codex::ThreadItem::AgentMessage { + id: "msg-123".to_string(), + text: "Hello from Codex".to_string(), + }, + thread_id: "thread-123".to_string(), + turn_id: "turn-456".to_string(), + }, + ); + + let json = serde_json::to_string(¬ification).unwrap(); + assert!(json.contains("item/completed")); + assert!(json.contains("Hello from Codex")); + assert!(json.contains("agentMessage")); +} + +#[test] +fn test_codex_thread_item_variants() { + let user_msg = codex::ThreadItem::UserMessage { + content: vec![codex::UserInput::Text { + text: "Hello".to_string(), + text_elements: vec![], + }], + id: "user-1".to_string(), + }; + let json = serde_json::to_string(&user_msg).unwrap(); + assert!(json.contains("userMessage")); + assert!(json.contains("Hello")); + + let cmd = codex::ThreadItem::CommandExecution { + aggregated_output: Some("output".to_string()), + command: "ls -la".to_string(), + command_actions: vec![], + cwd: "/tmp".to_string(), + duration_ms: Some(100), + exit_code: Some(0), + id: "cmd-1".to_string(), + process_id: None, + status: codex::CommandExecutionStatus::Completed, + }; + let json = serde_json::to_string(&cmd).unwrap(); + assert!(json.contains("commandExecution")); + assert!(json.contains("ls -la")); +} + +#[test] +fn test_amp_message() { + let msg = amp::Message { + role: amp::MessageRole::User, + content: "Hello".to_string(), + tool_calls: vec![], + }; + + let json = serde_json::to_string(&msg).unwrap(); + assert!(json.contains("user")); + assert!(json.contains("Hello")); +} diff --git a/server/packages/sandbox-agent/Cargo.toml b/server/packages/sandbox-agent/Cargo.toml index da2e88f..b08cabb 100644 --- a/server/packages/sandbox-agent/Cargo.toml +++ b/server/packages/sandbox-agent/Cargo.toml @@ -32,9 +32,13 @@ tracing.workspace = true tracing-logfmt.workspace = true tracing-subscriber.workspace = true include_dir.workspace = true +tempfile = { workspace = true, optional = true } [dev-dependencies] http-body-util.workspace = true insta.workspace = true -tempfile.workspace = true tower.workspace = true +tempfile.workspace = true + +[features] +test-utils = ["tempfile"] diff --git a/server/packages/sandbox-agent/src/lib.rs b/server/packages/sandbox-agent/src/lib.rs index bab1209..1767f73 100644 --- a/server/packages/sandbox-agent/src/lib.rs +++ b/server/packages/sandbox-agent/src/lib.rs @@ -1,6 +1,7 @@ //! Sandbox agent core utilities. pub mod credentials; +mod agent_server_logs; pub mod router; pub mod telemetry; pub mod ui; diff --git a/server/packages/sandbox-agent/src/main.rs b/server/packages/sandbox-agent/src/main.rs index 972c697..fea4e6a 100644 --- a/server/packages/sandbox-agent/src/main.rs +++ b/server/packages/sandbox-agent/src/main.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::io::Write; use std::path::PathBuf; +use std::sync::Arc; use clap::{Args, Parser, Subcommand}; use reqwest::blocking::Client as HttpClient; @@ -16,7 +17,7 @@ use sandbox_agent::router::{ }; use sandbox_agent::telemetry; use sandbox_agent::router::{AgentListResponse, AgentModesResponse, CreateSessionResponse, EventsResponse}; -use sandbox_agent::router::build_router; +use sandbox_agent::router::{build_router_with_state, shutdown_servers}; use sandbox_agent::ui; use serde::Serialize; use serde_json::Value; @@ -352,8 +353,8 @@ fn run_server(cli: &Cli, server: &ServerArgs) -> Result<(), CliError> { let agent_manager = AgentManager::new(default_install_dir()).map_err(|err| CliError::Server(err.to_string()))?; - let state = AppState::new(auth, agent_manager); - let mut router = build_router(state); + let state = Arc::new(AppState::new(auth, agent_manager)); + let (mut router, state) = build_router_with_state(state); if let Some(cors) = build_cors_layer(server)? { router = router.layer(cors); @@ -384,7 +385,12 @@ fn run_server(cli: &Cli, server: &ServerArgs) -> Result<(), CliError> { } else { tracing::info!("inspector ui not embedded; set SANDBOX_AGENT_SKIP_INSPECTOR=1 to skip embedding during builds"); } + let shutdown_state = state.clone(); axum::serve(listener, router) + .with_graceful_shutdown(async move { + let _ = tokio::signal::ctrl_c().await; + shutdown_servers(&shutdown_state).await; + }) .await .map_err(|err| CliError::Server(err.to_string())) }) diff --git a/server/packages/sandbox-agent/tests/agent_basic_reply.rs b/server/packages/sandbox-agent/tests/agent-flows/agent_basic_reply.rs similarity index 97% rename from server/packages/sandbox-agent/tests/agent_basic_reply.rs rename to server/packages/sandbox-agent/tests/agent-flows/agent_basic_reply.rs index 7136341..2b97ad5 100644 --- a/server/packages/sandbox-agent/tests/agent_basic_reply.rs +++ b/server/packages/sandbox-agent/tests/agent-flows/agent_basic_reply.rs @@ -1,3 +1,4 @@ +#[path = "../common/mod.rs"] mod common; use common::*; @@ -29,8 +30,6 @@ async fn agent_basic_reply() { "no events collected for {}", config.agent.as_str() ); - expect_basic_sequence(&events); - let caps = capabilities .get(config.agent.as_str()) .expect("capabilities missing"); diff --git a/server/packages/sandbox-agent/tests/agent_multi_turn.rs b/server/packages/sandbox-agent/tests/agent-flows/agent_multi_turn.rs similarity index 100% rename from server/packages/sandbox-agent/tests/agent_multi_turn.rs rename to server/packages/sandbox-agent/tests/agent-flows/agent_multi_turn.rs diff --git a/server/packages/sandbox-agent/tests/agent_permission_flow.rs b/server/packages/sandbox-agent/tests/agent-flows/agent_permission_flow.rs similarity index 98% rename from server/packages/sandbox-agent/tests/agent_permission_flow.rs rename to server/packages/sandbox-agent/tests/agent-flows/agent_permission_flow.rs index 5047305..4698d0c 100644 --- a/server/packages/sandbox-agent/tests/agent_permission_flow.rs +++ b/server/packages/sandbox-agent/tests/agent-flows/agent_permission_flow.rs @@ -1,3 +1,4 @@ +#[path = "../common/mod.rs"] mod common; use common::*; diff --git a/server/packages/sandbox-agent/tests/agent_question_flow.rs b/server/packages/sandbox-agent/tests/agent-flows/agent_question_flow.rs similarity index 98% rename from server/packages/sandbox-agent/tests/agent_question_flow.rs rename to server/packages/sandbox-agent/tests/agent-flows/agent_question_flow.rs index 0c85aae..e5a85b7 100644 --- a/server/packages/sandbox-agent/tests/agent_question_flow.rs +++ b/server/packages/sandbox-agent/tests/agent-flows/agent_question_flow.rs @@ -1,3 +1,4 @@ +#[path = "../common/mod.rs"] mod common; use common::*; diff --git a/server/packages/sandbox-agent/tests/agent_termination.rs b/server/packages/sandbox-agent/tests/agent-flows/agent_termination.rs similarity index 98% rename from server/packages/sandbox-agent/tests/agent_termination.rs rename to server/packages/sandbox-agent/tests/agent-flows/agent_termination.rs index 809baa1..961197a 100644 --- a/server/packages/sandbox-agent/tests/agent_termination.rs +++ b/server/packages/sandbox-agent/tests/agent-flows/agent_termination.rs @@ -1,3 +1,4 @@ +#[path = "../common/mod.rs"] mod common; use common::*; diff --git a/server/packages/sandbox-agent/tests/agent_tool_flow.rs b/server/packages/sandbox-agent/tests/agent-flows/agent_tool_flow.rs similarity index 99% rename from server/packages/sandbox-agent/tests/agent_tool_flow.rs rename to server/packages/sandbox-agent/tests/agent-flows/agent_tool_flow.rs index 297306b..3674c34 100644 --- a/server/packages/sandbox-agent/tests/agent_tool_flow.rs +++ b/server/packages/sandbox-agent/tests/agent-flows/agent_tool_flow.rs @@ -1,3 +1,4 @@ +#[path = "../common/mod.rs"] mod common; use common::*; diff --git a/server/packages/sandbox-agent/tests/agent-flows/mod.rs b/server/packages/sandbox-agent/tests/agent-flows/mod.rs new file mode 100644 index 0000000..ea87461 --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent-flows/mod.rs @@ -0,0 +1,6 @@ +mod agent_basic_reply; +mod agent_multi_turn; +mod agent_permission_flow; +mod agent_question_flow; +mod agent_termination; +mod agent_tool_flow; diff --git a/server/packages/sandbox-agent/tests/agents.rs b/server/packages/sandbox-agent/tests/agent-management/agents.rs similarity index 100% rename from server/packages/sandbox-agent/tests/agents.rs rename to server/packages/sandbox-agent/tests/agent-management/agents.rs diff --git a/server/packages/sandbox-agent/tests/agent-management/mod.rs b/server/packages/sandbox-agent/tests/agent-management/mod.rs new file mode 100644 index 0000000..fcf8db0 --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent-management/mod.rs @@ -0,0 +1 @@ +mod agents; diff --git a/server/packages/sandbox-agent/tests/agent_flows.rs b/server/packages/sandbox-agent/tests/agent_flows.rs new file mode 100644 index 0000000..bd40f63 --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent_flows.rs @@ -0,0 +1,2 @@ +#[path = "agent-flows/mod.rs"] +mod agent_flows; diff --git a/server/packages/sandbox-agent/tests/agent_management.rs b/server/packages/sandbox-agent/tests/agent_management.rs new file mode 100644 index 0000000..c2a9157 --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent_management.rs @@ -0,0 +1,2 @@ +#[path = "agent-management/mod.rs"] +mod agent_management; diff --git a/server/packages/sandbox-agent/tests/common/mod.rs b/server/packages/sandbox-agent/tests/common/mod.rs index 9c74437..1d2be09 100644 --- a/server/packages/sandbox-agent/tests/common/mod.rs +++ b/server/packages/sandbox-agent/tests/common/mod.rs @@ -272,38 +272,6 @@ pub fn find_assistant_message_item(events: &[Value]) -> Option { }) } -pub fn event_sequence(event: &Value) -> Option { - event.get("sequence").and_then(Value::as_u64) -} - -pub fn find_item_event_seq(events: &[Value], event_type: &str, item_id: &str) -> Option { - events.iter().find_map(|event| { - if event.get("type").and_then(Value::as_str) != Some(event_type) { - return None; - } - match event_type { - "item.delta" => { - let data = event.get("data")?; - let id = data.get("item_id")?.as_str()?; - if id == item_id { - event_sequence(event) - } else { - None - } - } - _ => { - let item = event.get("data")?.get("item")?; - let id = item.get("item_id")?.as_str()?; - if id == item_id { - event_sequence(event) - } else { - None - } - } - } - }) -} - pub fn find_permission_id(events: &[Value]) -> Option { events.iter().find_map(|event| { if event.get("type").and_then(Value::as_str) != Some("permission.requested") { @@ -372,17 +340,3 @@ pub fn has_tool_result(events: &[Value]) -> bool { item.get("kind").and_then(Value::as_str) == Some("tool_result") }) } - -pub fn expect_basic_sequence(events: &[Value]) { - assert!(has_event_type(events, "session.started"), "session.started missing"); - let item_id = find_assistant_message_item(events).expect("assistant message missing"); - let started_seq = find_item_event_seq(events, "item.started", &item_id) - .expect("item.started missing"); - // Intentionally require deltas here to validate our synthetic delta behavior. - let delta_seq = find_item_event_seq(events, "item.delta", &item_id) - .expect("item.delta missing"); - let completed_seq = find_item_event_seq(events, "item.completed", &item_id) - .expect("item.completed missing"); - assert!(started_seq < delta_seq, "item.started must precede delta"); - assert!(delta_seq < completed_seq, "delta must precede completion"); -} diff --git a/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs new file mode 100644 index 0000000..5cbfbee --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs @@ -0,0 +1,1388 @@ +use std::collections::BTreeMap; +use std::time::{Duration, Instant}; + +use axum::body::{Body, Bytes}; +use axum::http::{header, HeaderMap, HeaderValue, Method, Request, StatusCode}; +use axum::Router; +use futures::StreamExt; +use http_body_util::BodyExt; +use serde_json::{json, Map, Value}; +use tempfile::TempDir; + +use sandbox_agent_agent_management::agents::{AgentId, AgentManager}; +use sandbox_agent_agent_management::testing::{test_agents_from_env, TestAgentConfig}; +use sandbox_agent_agent_credentials::ExtractedCredentials; +use sandbox_agent::router::{build_router, AppState, AuthConfig}; +use tower::util::ServiceExt; +use tower_http::cors::CorsLayer; + +const PROMPT: &str = "Reply with exactly the single word OK."; +const PERMISSION_PROMPT: &str = "List files in the current directory using available tools."; +const QUESTION_PROMPT: &str = + "Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself."; + +struct TestApp { + app: Router, + _install_dir: TempDir, +} + +impl TestApp { + fn new() -> Self { + Self::new_with_auth(AuthConfig::disabled()) + } + + fn new_with_auth(auth: AuthConfig) -> Self { + Self::new_with_auth_and_cors(auth, None) + } + + fn new_with_auth_and_cors(auth: AuthConfig, cors: Option) -> Self { + let install_dir = tempfile::tempdir().expect("create temp install dir"); + let manager = AgentManager::new(install_dir.path()) + .expect("create agent manager"); + let state = AppState::new(auth, manager); + let mut app = build_router(state); + if let Some(cors) = cors { + app = app.layer(cors); + } + Self { + app, + _install_dir: install_dir, + } + } +} + +struct EnvGuard { + saved: BTreeMap>, +} + +impl Drop for EnvGuard { + fn drop(&mut self) { + for (key, value) in &self.saved { + match value { + Some(value) => std::env::set_var(key, value), + None => std::env::remove_var(key), + } + } + } +} + +fn apply_credentials(creds: &ExtractedCredentials) -> EnvGuard { + let keys = ["ANTHROPIC_API_KEY", "CLAUDE_API_KEY", "OPENAI_API_KEY", "CODEX_API_KEY"]; + let mut saved = BTreeMap::new(); + for key in keys { + saved.insert(key.to_string(), std::env::var(key).ok()); + } + + match creds.anthropic.as_ref() { + Some(cred) => { + std::env::set_var("ANTHROPIC_API_KEY", &cred.api_key); + std::env::set_var("CLAUDE_API_KEY", &cred.api_key); + } + None => { + std::env::remove_var("ANTHROPIC_API_KEY"); + std::env::remove_var("CLAUDE_API_KEY"); + } + } + + match creds.openai.as_ref() { + Some(cred) => { + std::env::set_var("OPENAI_API_KEY", &cred.api_key); + std::env::set_var("CODEX_API_KEY", &cred.api_key); + } + None => { + std::env::remove_var("OPENAI_API_KEY"); + std::env::remove_var("CODEX_API_KEY"); + } + } + + EnvGuard { saved } +} + +async fn send_json(app: &Router, method: Method, path: &str, body: Option) -> (StatusCode, Value) { + let mut builder = Request::builder().method(method).uri(path); + let body = if let Some(body) = body { + builder = builder.header("content-type", "application/json"); + Body::from(body.to_string()) + } else { + Body::empty() + }; + let request = builder.body(body).expect("request"); + let response = app + .clone() + .oneshot(request) + .await + .expect("request handled"); + let status = response.status(); + let bytes = response + .into_body() + .collect() + .await + .expect("read body") + .to_bytes(); + let value = if bytes.is_empty() { + Value::Null + } else { + serde_json::from_slice(&bytes).unwrap_or(Value::String(String::from_utf8_lossy(&bytes).to_string())) + }; + (status, value) +} + +async fn send_request(app: &Router, request: Request) -> (StatusCode, HeaderMap, Bytes) { + let response = app + .clone() + .oneshot(request) + .await + .expect("request handled"); + let status = response.status(); + let headers = response.headers().clone(); + let bytes = response + .into_body() + .collect() + .await + .expect("read body") + .to_bytes(); + (status, headers, bytes) +} + +async fn send_json_request( + app: &Router, + request: Request, +) -> (StatusCode, HeaderMap, Value) { + let (status, headers, bytes) = send_request(app, request).await; + let value = if bytes.is_empty() { + Value::Null + } else { + serde_json::from_slice(&bytes) + .unwrap_or(Value::String(String::from_utf8_lossy(&bytes).to_string())) + }; + (status, headers, value) +} + +async fn send_status(app: &Router, method: Method, path: &str, body: Option) -> StatusCode { + let (status, _) = send_json(app, method, path, body).await; + status +} + +async fn install_agent(app: &Router, agent: AgentId) { + let status = send_status( + app, + Method::POST, + &format!("/v1/agents/{}/install", agent.as_str()), + Some(json!({})), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "install {agent}"); +} + +/// Returns the default permission mode for tests. OpenCode only supports "default", +/// while other agents support "bypass" which skips tool approval. +fn test_permission_mode(agent: AgentId) -> &'static str { + match agent { + AgentId::Opencode => "default", + _ => "bypass", + } +} + +async fn create_session(app: &Router, agent: AgentId, session_id: &str, permission_mode: &str) { + let status = send_status( + app, + Method::POST, + &format!("/v1/sessions/{session_id}"), + Some(json!({ + "agent": agent.as_str(), + "permissionMode": permission_mode + })), + ) + .await; + assert_eq!(status, StatusCode::OK, "create session {agent}"); +} + +async fn send_message(app: &Router, session_id: &str) { + let status = send_status( + app, + Method::POST, + &format!("/v1/sessions/{session_id}/messages"), + Some(json!({ "message": PROMPT })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send message"); +} + +async fn poll_events_until( + app: &Router, + session_id: &str, + timeout: Duration, +) -> Vec { + let start = Instant::now(); + let mut offset = 0u64; + let mut events = Vec::new(); + while start.elapsed() < timeout { + let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); + let (status, payload) = send_json(app, Method::GET, &path, None).await; + assert_eq!(status, StatusCode::OK, "poll events"); + let new_events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + if !new_events.is_empty() { + if let Some(last) = new_events + .last() + .and_then(|event| event.get("sequence")) + .and_then(Value::as_u64) + { + offset = last; + } + events.extend(new_events); + if should_stop(&events) { + break; + } + } + tokio::time::sleep(Duration::from_millis(800)).await; + } + events +} + +async fn read_sse_events( + app: &Router, + session_id: &str, + timeout: Duration, +) -> Vec { + let request = Request::builder() + .method(Method::GET) + .uri(format!("/v1/sessions/{session_id}/events/sse?offset=0")) + .body(Body::empty()) + .expect("sse request"); + let response = app + .clone() + .oneshot(request) + .await + .expect("sse response"); + assert_eq!(response.status(), StatusCode::OK, "sse status"); + + let mut stream = response.into_body().into_data_stream(); + let mut buffer = String::new(); + let mut events = Vec::new(); + let start = Instant::now(); + loop { + let remaining = match timeout.checked_sub(start.elapsed()) { + Some(remaining) if !remaining.is_zero() => remaining, + _ => break, + }; + let next = tokio::time::timeout(remaining, stream.next()).await; + let chunk: Bytes = match next { + Ok(Some(Ok(chunk))) => chunk, + Ok(Some(Err(_))) => break, + Ok(None) => break, + Err(_) => break, + }; + buffer.push_str(&String::from_utf8_lossy(&chunk)); + while let Some(idx) = buffer.find("\n\n") { + let block = buffer[..idx].to_string(); + buffer = buffer[idx + 2..].to_string(); + if let Some(event) = parse_sse_block(&block) { + events.push(event); + } + } + if should_stop(&events) { + break; + } + } + events +} + +async fn read_turn_stream_events( + app: &Router, + session_id: &str, + timeout: Duration, +) -> Vec { + let request = Request::builder() + .method(Method::POST) + .uri(format!("/v1/sessions/{session_id}/messages/stream")) + .header("content-type", "application/json") + .body(Body::from(json!({ "message": PROMPT }).to_string())) + .expect("turn stream request"); + let response = app + .clone() + .oneshot(request) + .await + .expect("turn stream response"); + assert_eq!(response.status(), StatusCode::OK, "turn stream status"); + + let mut stream = response.into_body().into_data_stream(); + let mut buffer = String::new(); + let mut events = Vec::new(); + let start = Instant::now(); + let mut ended = false; + loop { + let remaining = match timeout.checked_sub(start.elapsed()) { + Some(remaining) if !remaining.is_zero() => remaining, + _ => break, + }; + let next = tokio::time::timeout(remaining, stream.next()).await; + let chunk: Bytes = match next { + Ok(Some(Ok(chunk))) => chunk, + Ok(Some(Err(_))) => break, + Ok(None) => { + ended = true; + break; + } + Err(_) => break, + }; + buffer.push_str(&String::from_utf8_lossy(&chunk)); + while let Some(idx) = buffer.find("\n\n") { + let block = buffer[..idx].to_string(); + buffer = buffer[idx + 2..].to_string(); + if let Some(event) = parse_sse_block(&block) { + events.push(event); + } + } + } + assert!(ended, "turn stream did not close before timeout"); + events +} + +fn parse_sse_block(block: &str) -> Option { + let mut data_lines = Vec::new(); + for line in block.lines() { + if let Some(rest) = line.strip_prefix("data:") { + data_lines.push(rest.trim_start()); + } + } + if data_lines.is_empty() { + return None; + } + let data = data_lines.join("\n"); + serde_json::from_str(&data).ok() +} + +fn should_stop(events: &[Value]) -> bool { + events.iter().any(|event| is_assistant_message(event) || is_error_event(event)) +} + +fn is_assistant_message(event: &Value) -> bool { + event + .get("type") + .and_then(Value::as_str) + .map(|event_type| event_type == "item.completed") + .unwrap_or(false) + && event + .get("data") + .and_then(|data| data.get("item")) + .and_then(|item| item.get("role")) + .and_then(Value::as_str) + .map(|role| role == "assistant") + .unwrap_or(false) +} + +fn is_error_event(event: &Value) -> bool { + matches!( + event.get("type").and_then(Value::as_str), + Some("error") | Some("agent.unparsed") + ) +} + +fn is_unparsed_event(event: &Value) -> bool { + event + .get("type") + .and_then(Value::as_str) + .map(|value| value == "agent.unparsed") + .unwrap_or(false) +} + +fn is_permission_event(event: &Value) -> bool { + event + .get("type") + .and_then(Value::as_str) + .map(|value| value == "permission.requested") + .unwrap_or(false) +} + +fn is_question_event(event: &Value) -> bool { + event + .get("type") + .and_then(Value::as_str) + .map(|value| value == "question.requested") + .unwrap_or(false) +} + +fn truncate_permission_events(events: &[Value]) -> Vec { + if let Some(idx) = events.iter().position(is_permission_event) { + return events[..=idx].to_vec(); + } + if let Some(idx) = events.iter().position(is_assistant_message) { + return events[..=idx].to_vec(); + } + events.to_vec() +} + +fn truncate_question_events(events: &[Value]) -> Vec { + if let Some(idx) = events.iter().position(is_question_event) { + return events[..=idx].to_vec(); + } + if let Some(idx) = events.iter().position(is_assistant_message) { + return events[..=idx].to_vec(); + } + events.to_vec() +} + +fn normalize_events(events: &[Value]) -> Value { + assert!( + !events.iter().any(is_unparsed_event), + "agent.unparsed event encountered" + ); + let normalized = events + .iter() + .enumerate() + .map(|(idx, event)| normalize_event(event, idx + 1)) + .collect::>(); + Value::Array(normalized) +} + +fn truncate_after_first_stop(events: &[Value]) -> Vec { + if let Some(idx) = events + .iter() + .position(|event| is_assistant_message(event) || is_error_event(event)) + { + return events[..=idx].to_vec(); + } + events.to_vec() +} + +fn normalize_event(event: &Value, seq: usize) -> Value { + let mut map = Map::new(); + map.insert("seq".to_string(), Value::Number(seq.into())); + if let Some(event_type) = event.get("type").and_then(Value::as_str) { + map.insert("type".to_string(), Value::String(event_type.to_string())); + } + let data = event.get("data").unwrap_or(&Value::Null); + match event.get("type").and_then(Value::as_str).unwrap_or("") { + "session.started" => { + map.insert("session".to_string(), Value::String("started".to_string())); + if data.get("metadata").is_some() { + map.insert("metadata".to_string(), Value::Bool(true)); + } + } + "session.ended" => { + map.insert("session".to_string(), Value::String("ended".to_string())); + map.insert("ended".to_string(), normalize_session_end(data)); + } + "item.started" | "item.completed" => { + if let Some(item) = data.get("item") { + map.insert("item".to_string(), normalize_item(item)); + } + } + "item.delta" => { + let mut delta = Map::new(); + if data.get("item_id").is_some() { + delta.insert("item_id".to_string(), Value::String("".to_string())); + } + if data.get("native_item_id").is_some() { + delta.insert("native_item_id".to_string(), Value::String("".to_string())); + } + if data.get("delta").is_some() { + delta.insert("delta".to_string(), Value::String("".to_string())); + } + map.insert("delta".to_string(), Value::Object(delta)); + } + "permission.requested" | "permission.resolved" => { + map.insert("permission".to_string(), normalize_permission(data)); + } + "question.requested" | "question.resolved" => { + map.insert("question".to_string(), normalize_question(data)); + } + "error" => { + map.insert("error".to_string(), normalize_error(data)); + } + "agent.unparsed" => { + map.insert("unparsed".to_string(), Value::Bool(true)); + } + _ => {} + } + Value::Object(map) +} + +fn normalize_item(item: &Value) -> Value { + let mut map = Map::new(); + if let Some(kind) = item.get("kind").and_then(Value::as_str) { + map.insert("kind".to_string(), Value::String(kind.to_string())); + } + if let Some(role) = item.get("role").and_then(Value::as_str) { + map.insert("role".to_string(), Value::String(role.to_string())); + } + if let Some(status) = item.get("status").and_then(Value::as_str) { + map.insert("status".to_string(), Value::String(status.to_string())); + } + if let Some(content) = item.get("content").and_then(Value::as_array) { + let types = content + .iter() + .filter_map(|part| part.get("type").and_then(Value::as_str)) + .map(|value| Value::String(value.to_string())) + .collect::>(); + map.insert("content_types".to_string(), Value::Array(types)); + } + Value::Object(map) +} + +fn normalize_session_end(data: &Value) -> Value { + let mut map = Map::new(); + if let Some(reason) = data.get("reason").and_then(Value::as_str) { + map.insert("reason".to_string(), Value::String(reason.to_string())); + } + if let Some(terminated_by) = data.get("terminated_by").and_then(Value::as_str) { + map.insert("terminated_by".to_string(), Value::String(terminated_by.to_string())); + } + Value::Object(map) +} + +fn normalize_error(error: &Value) -> Value { + let mut map = Map::new(); + if let Some(code) = error.get("code").and_then(Value::as_str) { + map.insert("code".to_string(), Value::String(code.to_string())); + } + if let Some(message) = error.get("message").and_then(Value::as_str) { + map.insert("message".to_string(), Value::String(message.to_string())); + } + Value::Object(map) +} + +fn normalize_question(question: &Value) -> Value { + let mut map = Map::new(); + if question.get("question_id").is_some() { + map.insert("id".to_string(), Value::String("".to_string())); + } + if let Some(options) = question.get("options").and_then(Value::as_array) { + map.insert("options".to_string(), Value::Number(options.len().into())); + } + if let Some(status) = question.get("status").and_then(Value::as_str) { + map.insert("status".to_string(), Value::String(status.to_string())); + } + Value::Object(map) +} + +fn normalize_permission(permission: &Value) -> Value { + let mut map = Map::new(); + if permission.get("permission_id").is_some() { + map.insert("id".to_string(), Value::String("".to_string())); + } + if let Some(value) = permission.get("action").and_then(Value::as_str) { + map.insert("action".to_string(), Value::String(value.to_string())); + } + if let Some(status) = permission.get("status").and_then(Value::as_str) { + map.insert("status".to_string(), Value::String(status.to_string())); + } + Value::Object(map) +} + +fn normalize_agent_list(value: &Value) -> Value { + let agents = value + .get("agents") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + let mut normalized = Vec::new(); + for agent in agents { + let mut map = Map::new(); + if let Some(id) = agent.get("id").and_then(Value::as_str) { + map.insert("id".to_string(), Value::String(id.to_string())); + } + // Skip installed/version/path fields - they depend on local environment + // and make snapshots non-deterministic + normalized.push(Value::Object(map)); + } + normalized.sort_by(|a, b| { + a.get("id") + .and_then(Value::as_str) + .cmp(&b.get("id").and_then(Value::as_str)) + }); + json!({ "agents": normalized }) +} + +fn normalize_agent_modes(value: &Value) -> Value { + let modes = value + .get("modes") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + let mut normalized = Vec::new(); + for mode in modes { + let mut map = Map::new(); + if let Some(id) = mode.get("id").and_then(Value::as_str) { + map.insert("id".to_string(), Value::String(id.to_string())); + } + if let Some(name) = mode.get("name").and_then(Value::as_str) { + map.insert("name".to_string(), Value::String(name.to_string())); + } + if mode.get("description").is_some() { + map.insert("description".to_string(), Value::Bool(true)); + } + normalized.push(Value::Object(map)); + } + normalized.sort_by(|a, b| { + a.get("id") + .and_then(Value::as_str) + .cmp(&b.get("id").and_then(Value::as_str)) + }); + json!({ "modes": normalized }) +} + +fn normalize_sessions(value: &Value) -> Value { + let sessions = value + .get("sessions") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + // For the global sessions list snapshot, we just verify the count and structure + // since the specific agents/sessions vary based on test configuration + json!({ + "sessionCount": sessions.len(), + "hasExpectedFields": sessions.iter().all(|s| { + s.get("sessionId").is_some() + && s.get("agent").is_some() + && s.get("agentMode").is_some() + && s.get("permissionMode").is_some() + && s.get("ended").is_some() + }) + }) +} + +fn normalize_create_session(value: &Value) -> Value { + let mut map = Map::new(); + if let Some(healthy) = value.get("healthy").and_then(Value::as_bool) { + map.insert("healthy".to_string(), Value::Bool(healthy)); + } + if value.get("nativeSessionId").is_some() { + map.insert("nativeSessionId".to_string(), Value::String("".to_string())); + } + if let Some(error) = value.get("error") { + map.insert("error".to_string(), error.clone()); + } + Value::Object(map) +} + +fn normalize_health(value: &Value) -> Value { + let mut map = Map::new(); + if let Some(status) = value.get("status").and_then(Value::as_str) { + map.insert("status".to_string(), Value::String(status.to_string())); + } + Value::Object(map) +} + +fn snapshot_status(status: StatusCode) -> Value { + json!({ "status": status.as_u16() }) +} + +fn snapshot_cors(status: StatusCode, headers: &HeaderMap) -> Value { + let mut map = Map::new(); + map.insert("status".to_string(), Value::Number(status.as_u16().into())); + for name in [ + header::ACCESS_CONTROL_ALLOW_ORIGIN, + header::ACCESS_CONTROL_ALLOW_METHODS, + header::ACCESS_CONTROL_ALLOW_HEADERS, + header::ACCESS_CONTROL_ALLOW_CREDENTIALS, + header::VARY, + ] { + if let Some(value) = headers.get(&name) { + map.insert( + name.as_str().to_string(), + Value::String(value.to_str().unwrap_or("").to_string()), + ); + } + } + Value::Object(map) +} + +fn snapshot_name(prefix: &str, agent: Option) -> String { + match agent { + Some(agent) => format!("{prefix}_{}", agent.as_str()), + None => format!("{prefix}_global"), + } +} + + +async fn poll_events_until_match( + app: &Router, + session_id: &str, + timeout: Duration, + stop: F, +) -> Vec +where + F: Fn(&[Value]) -> bool, +{ + let start = Instant::now(); + let mut offset = 0u64; + let mut events = Vec::new(); + while start.elapsed() < timeout { + let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); + let (status, payload) = send_json(app, Method::GET, &path, None).await; + assert_eq!(status, StatusCode::OK, "poll events"); + let new_events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + if !new_events.is_empty() { + if let Some(last) = new_events + .last() + .and_then(|event| event.get("sequence")) + .and_then(Value::as_u64) + { + offset = last; + } + events.extend(new_events); + if stop(&events) { + break; + } + } + tokio::time::sleep(Duration::from_millis(800)).await; + } + events +} + +fn find_permission_id(events: &[Value]) -> Option { + events + .iter() + .find_map(|event| { + event + .get("type") + .and_then(Value::as_str) + .filter(|value| *value == "permission.requested") + .and_then(|_| event.get("data")) + .and_then(|data| data.get("permission_id")) + .and_then(Value::as_str) + .map(|id| id.to_string()) + }) +} + +fn find_question_id_and_answers(events: &[Value]) -> Option<(String, Vec>)> { + let question = events.iter().find_map(|event| { + let event_type = event.get("type").and_then(Value::as_str)?; + if event_type != "question.requested" { + return None; + } + event.get("data").cloned() + })?; + let id = question.get("question_id").and_then(Value::as_str)?.to_string(); + let options = question + .get("options") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + let mut answers = Vec::new(); + if let Some(option) = options.first().and_then(Value::as_str) { + answers.push(vec![option.to_string()]); + } else { + answers.push(Vec::new()); + } + Some((id, answers)) +} + +async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) { + let _guard = apply_credentials(&config.credentials); + install_agent(app, config.agent).await; + + let session_id = format!("session-{}", config.agent.as_str()); + create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; + send_message(app, &session_id).await; + + let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await; + let events = truncate_after_first_stop(&events); + assert!( + !events.is_empty(), + "no events collected for {}", + config.agent + ); + assert!( + should_stop(&events), + "timed out waiting for assistant/error event for {}", + config.agent + ); + let normalized = normalize_events(&events); + insta::with_settings!({ + snapshot_suffix => snapshot_name("http_events", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(normalized); + }); +} + +async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) { + let _guard = apply_credentials(&config.credentials); + install_agent(app, config.agent).await; + + let session_id = format!("sse-{}", config.agent.as_str()); + create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; + + let sse_task = { + let app = app.clone(); + let session_id = session_id.clone(); + tokio::spawn(async move { + read_sse_events(&app, &session_id, Duration::from_secs(120)).await + }) + }; + + send_message(app, &session_id).await; + + let events = sse_task.await.expect("sse task"); + let events = truncate_after_first_stop(&events); + assert!( + !events.is_empty(), + "no sse events collected for {}", + config.agent + ); + assert!( + should_stop(&events), + "timed out waiting for assistant/error event for {}", + config.agent + ); + let normalized = normalize_events(&events); + insta::with_settings!({ + snapshot_suffix => snapshot_name("sse_events", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(normalized); + }); +} + +async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) { + let _guard = apply_credentials(&config.credentials); + install_agent(app, config.agent).await; + + let session_id = format!("turn-{}", config.agent.as_str()); + create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; + + let events = read_turn_stream_events(app, &session_id, Duration::from_secs(120)).await; + let events = truncate_after_first_stop(&events); + assert!( + !events.is_empty(), + "no turn stream events collected for {}", + config.agent + ); + assert!( + should_stop(&events), + "timed out waiting for assistant/error event for {}", + config.agent + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auth_snapshots() { + let token = "test-token"; + let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string())); + + let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await; + assert_eq!(status, StatusCode::OK, "health should be public"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("auth_health_public", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": normalize_health(&payload), + })); + }); + + let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await; + assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("auth_missing_token", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + + let request = Request::builder() + .method(Method::GET) + .uri("/v1/agents") + .header(header::AUTHORIZATION, "Bearer wrong-token") + .body(Body::empty()) + .expect("auth invalid request"); + let (status, _headers, payload) = send_json_request(&app.app, request).await; + assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("auth_invalid_token", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + + let request = Request::builder() + .method(Method::GET) + .uri("/v1/agents") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .body(Body::empty()) + .expect("auth valid request"); + let (status, _headers, payload) = send_json_request(&app.app, request).await; + assert_eq!(status, StatusCode::OK, "valid token should allow request"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("auth_valid_token", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": normalize_agent_list(&payload), + })); + }); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn cors_snapshots() { + let cors = CorsLayer::new() + .allow_origin(vec![HeaderValue::from_static("http://example.com")]) + .allow_methods([Method::GET, Method::POST]) + .allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION]) + .allow_credentials(true); + let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors)); + + let preflight = Request::builder() + .method(Method::OPTIONS) + .uri("/v1/health") + .header(header::ORIGIN, "http://example.com") + .header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET") + .header( + header::ACCESS_CONTROL_REQUEST_HEADERS, + "authorization,content-type", + ) + .body(Body::empty()) + .expect("cors preflight request"); + let (status, headers, _payload) = send_request(&app.app, preflight).await; + insta::with_settings!({ + snapshot_suffix => snapshot_name("cors_preflight", None), + }, { + insta::assert_yaml_snapshot!(snapshot_cors(status, &headers)); + }); + + let actual = Request::builder() + .method(Method::GET) + .uri("/v1/health") + .header(header::ORIGIN, "http://example.com") + .body(Body::empty()) + .expect("cors actual request"); + let (status, headers, payload) = send_json_request(&app.app, actual).await; + assert_eq!(status, StatusCode::OK, "cors actual request should succeed"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("cors_actual", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "cors": snapshot_cors(status, &headers), + "payload": normalize_health(&payload), + })); + }); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn api_endpoints_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + + let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await; + assert_eq!(status, StatusCode::OK, "health status"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("health", None), + }, { + insta::assert_yaml_snapshot!(normalize_health(&health)); + }); + + // List agents (just verify the API returns correct agent IDs, not install state) + let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await; + assert_eq!(status, StatusCode::OK, "agents list"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("agents_list", None), + }, { + insta::assert_yaml_snapshot!(normalize_agent_list(&agents)); + }); + + // Install agents (ensure they're available for subsequent tests) + for config in &configs { + let _guard = apply_credentials(&config.credentials); + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/agents/{}/install", config.agent.as_str()), + Some(json!({})), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "install agent"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("agent_install", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(snapshot_status(status)); + }); + } + + let mut session_ids = Vec::new(); + for config in &configs { + let _guard = apply_credentials(&config.credentials); + let (status, modes) = send_json( + &app.app, + Method::GET, + &format!("/v1/agents/{}/modes", config.agent.as_str()), + None, + ) + .await; + assert_eq!(status, StatusCode::OK, "agent modes"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(normalize_agent_modes(&modes)); + }); + + let session_id = format!("snapshot-{}", config.agent.as_str()); + let permission_mode = test_permission_mode(config.agent); + let (status, created) = send_json( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}"), + Some(json!({ + "agent": config.agent.as_str(), + "permissionMode": permission_mode + })), + ) + .await; + assert_eq!(status, StatusCode::OK, "create session"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("create_session", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(normalize_create_session(&created)); + }); + session_ids.push((config.agent, session_id)); + } + + let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await; + assert_eq!(status, StatusCode::OK, "list sessions"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("sessions_list", None), + }, { + insta::assert_yaml_snapshot!(normalize_sessions(&sessions)); + }); + + for (agent, session_id) in &session_ids { + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}/messages"), + Some(json!({ "message": PROMPT })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send message"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("send_message", Some(*agent)), + }, { + insta::assert_yaml_snapshot!(snapshot_status(status)); + }); + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn approval_flow_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + + for config in &configs { + // OpenCode doesn't support "plan" permission mode required for approval flows + if config.agent == AgentId::Opencode { + continue; + } + + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let permission_session = format!("perm-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &permission_session, "plan").await; + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{permission_session}/messages"), + Some(json!({ "message": PERMISSION_PROMPT })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt"); + + let permission_events = poll_events_until_match( + &app.app, + &permission_session, + Duration::from_secs(120), + |events| find_permission_id(events).is_some() || should_stop(events), + ) + .await; + let permission_events = truncate_permission_events(&permission_events); + insta::with_settings!({ + snapshot_suffix => snapshot_name("permission_events", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(normalize_events(&permission_events)); + }); + + if let Some(permission_id) = find_permission_id(&permission_events) { + let status = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{permission_session}/permissions/{permission_id}/reply" + ), + Some(json!({ "reply": "once" })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "reply permission"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(snapshot_status(status)); + }); + } else { + let (status, payload) = send_json( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{permission_session}/permissions/missing-permission/reply" + ), + Some(json!({ "reply": "once" })), + ) + .await; + assert!(!status.is_success(), "missing permission id should error"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + } + + let question_reply_session = format!("question-reply-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &question_reply_session, "plan").await; + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{question_reply_session}/messages"), + Some(json!({ "message": QUESTION_PROMPT })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt"); + + let question_events = poll_events_until_match( + &app.app, + &question_reply_session, + Duration::from_secs(120), + |events| find_question_id_and_answers(events).is_some() || should_stop(events), + ) + .await; + let question_events = truncate_question_events(&question_events); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(normalize_events(&question_events)); + }); + + if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) { + let status = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reply_session}/questions/{question_id}/reply" + ), + Some(json!({ "answers": answers })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "reply question"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reply", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(snapshot_status(status)); + }); + } else { + let (status, payload) = send_json( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reply_session}/questions/missing-question/reply" + ), + Some(json!({ "answers": [] })), + ) + .await; + assert!(!status.is_success(), "missing question id should error"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + } + + let question_reject_session = format!("question-reject-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &question_reject_session, "plan").await; + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{question_reject_session}/messages"), + Some(json!({ "message": QUESTION_PROMPT })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject"); + + let reject_events = poll_events_until_match( + &app.app, + &question_reject_session, + Duration::from_secs(120), + |events| find_question_id_and_answers(events).is_some() || should_stop(events), + ) + .await; + let reject_events = truncate_question_events(&reject_events); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(normalize_events(&reject_events)); + }); + + if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) { + let status = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reject_session}/questions/{question_id}/reject" + ), + None, + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "reject question"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reject", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(snapshot_status(status)); + }); + } else { + let (status, payload) = send_json( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reject_session}/questions/missing-question/reject" + ), + None, + ) + .await; + assert!(!status.is_success(), "missing question id reject should error"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + } + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn http_events_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + for config in &configs { + // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. + // See: https://github.com/opencode-ai/opencode/issues/XXX + if config.agent == AgentId::Opencode { + continue; + } + run_http_events_snapshot(&app.app, config).await; + } +} + +async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) { + let _guard = apply_credentials(&config.credentials); + install_agent(app, config.agent).await; + + let session_a = format!("concurrent-a-{}", config.agent.as_str()); + let session_b = format!("concurrent-b-{}", config.agent.as_str()); + let perm_mode = test_permission_mode(config.agent); + create_session(app, config.agent, &session_a, perm_mode).await; + create_session(app, config.agent, &session_b, perm_mode).await; + + let app_a = app.clone(); + let app_b = app.clone(); + let send_a = send_message(&app_a, &session_a); + let send_b = send_message(&app_b, &session_b); + tokio::join!(send_a, send_b); + + let app_a = app.clone(); + let app_b = app.clone(); + let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120)); + let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120)); + let (events_a, events_b) = tokio::join!(poll_a, poll_b); + let events_a = truncate_after_first_stop(&events_a); + let events_b = truncate_after_first_stop(&events_b); + + assert!( + !events_a.is_empty(), + "no events collected for concurrent session a {}", + config.agent + ); + assert!( + !events_b.is_empty(), + "no events collected for concurrent session b {}", + config.agent + ); + assert!( + should_stop(&events_a), + "timed out waiting for assistant/error event for concurrent session a {}", + config.agent + ); + assert!( + should_stop(&events_b), + "timed out waiting for assistant/error event for concurrent session b {}", + config.agent + ); + + let snapshot = json!({ + "session_a": normalize_events(&events_a), + "session_b": normalize_events(&events_b), + }); + insta::with_settings!({ + snapshot_suffix => snapshot_name("concurrency_events", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(snapshot); + }); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn sse_events_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + for config in &configs { + // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. + // See: https://github.com/opencode-ai/opencode/issues/XXX + if config.agent == AgentId::Opencode { + continue; + } + run_sse_events_snapshot(&app.app, config).await; + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn turn_stream_route() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + for config in &configs { + // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. + // See: https://github.com/opencode-ai/opencode/issues/XXX + if config.agent == AgentId::Opencode { + continue; + } + run_turn_stream_check(&app.app, config).await; + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn concurrency_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + for config in &configs { + // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. + // See: https://github.com/opencode-ai/opencode/issues/XXX + if config.agent == AgentId::Opencode { + continue; + } + run_concurrency_snapshot(&app.app, config).await; + } +} diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap new file mode 100644 index 0000000..bffc4ac --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap @@ -0,0 +1,6 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1016 +expression: snapshot_status(status) +--- +status: 204 diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap new file mode 100644 index 0000000..d858ef4 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap @@ -0,0 +1,12 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1034 +expression: normalize_agent_modes(&modes) +--- +modes: + - description: true + id: build + name: Build + - description: true + id: plan + name: Plan diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap new file mode 100644 index 0000000..f0bd98a --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap @@ -0,0 +1,7 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1053 +expression: normalize_create_session(&created) +--- +healthy: true +nativeSessionId: "" diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap new file mode 100644 index 0000000..0ce7ff9 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap @@ -0,0 +1,6 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1078 +expression: snapshot_status(status) +--- +status: 204 diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap new file mode 100644 index 0000000..1b31317 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap @@ -0,0 +1,17 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1119 +expression: normalize_events(&permission_events) +--- +- metadata: true + seq: 1 + session: started + source: daemon + synthetic: true + type: session.started +- metadata: true + seq: 2 + session: started + source: agent + synthetic: false + type: session.started diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap similarity index 68% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap index e318618..b27511c 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap @@ -1,19 +1,15 @@ --- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1025 +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1112 expression: normalize_events(&permission_events) --- - metadata: true seq: 1 session: started - source: daemon - synthetic: true type: session.started - metadata: true seq: 2 session: started - source: agent - synthetic: false type: session.started - item: content_types: @@ -22,16 +18,12 @@ expression: normalize_events(&permission_events) role: assistant status: in_progress seq: 3 - source: daemon - synthetic: true type: item.started - delta: delta: "" item_id: "" native_item_id: "" seq: 4 - source: daemon - synthetic: true type: item.delta - item: content_types: @@ -40,6 +32,4 @@ expression: normalize_events(&permission_events) role: assistant status: completed seq: 5 - source: agent - synthetic: false type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap new file mode 100644 index 0000000..de6549e --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap @@ -0,0 +1,11 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1152 +expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" +--- +payload: + detail: "invalid request: unknown permission id: missing-permission" + status: 400 + title: Invalid Request + type: "urn:sandbox-agent:error:invalid_request" +status: 400 diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap new file mode 100644 index 0000000..84dd20e --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap @@ -0,0 +1,35 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1236 +expression: normalize_events(&reject_events) +--- +- metadata: true + seq: 1 + session: started + type: session.started +- metadata: true + seq: 2 + session: started + type: session.started +- item: + content_types: + - text + kind: message + role: assistant + status: in_progress + seq: 3 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 4 + type: item.delta +- item: + content_types: + - text + kind: message + role: assistant + status: completed + seq: 5 + type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap new file mode 100644 index 0000000..075fe0f --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap @@ -0,0 +1,11 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1276 +expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" +--- +payload: + detail: "invalid request: unknown question id: missing-question" + status: 400 + title: Invalid Request + type: "urn:sandbox-agent:error:invalid_request" +status: 400 diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap new file mode 100644 index 0000000..e525eb2 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap @@ -0,0 +1,35 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1174 +expression: normalize_events(&question_events) +--- +- metadata: true + seq: 1 + session: started + type: session.started +- metadata: true + seq: 2 + session: started + type: session.started +- item: + content_types: + - text + kind: message + role: assistant + status: in_progress + seq: 3 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 4 + type: item.delta +- item: + content_types: + - text + kind: message + role: assistant + status: completed + seq: 5 + type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap new file mode 100644 index 0000000..2c65fd0 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap @@ -0,0 +1,11 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1214 +expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" +--- +payload: + detail: "invalid request: unknown question id: missing-question" + status: 400 + title: Invalid Request + type: "urn:sandbox-agent:error:invalid_request" +status: 400 diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap similarity index 85% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap index 3047e92..a6fdd2f 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap @@ -1,6 +1,6 @@ --- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1259 +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1351 expression: snapshot --- session_a: @@ -23,16 +23,16 @@ session_a: role: assistant status: in_progress seq: 3 - source: daemon - synthetic: true + source: agent + synthetic: false type: item.started - delta: delta: "" item_id: "" native_item_id: "" seq: 4 - source: daemon - synthetic: true + source: agent + synthetic: false type: item.delta - item: content_types: @@ -64,16 +64,16 @@ session_b: role: assistant status: in_progress seq: 3 - source: daemon - synthetic: true + source: agent + synthetic: false type: item.started - delta: delta: "" item_id: "" native_item_id: "" seq: 4 - source: daemon - synthetic: true + source: agent + synthetic: false type: item.delta - item: content_types: diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap new file mode 100644 index 0000000..f9abaa0 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap @@ -0,0 +1,67 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 1344 +expression: snapshot +--- +session_a: + - metadata: true + seq: 1 + session: started + type: session.started + - metadata: true + seq: 2 + session: started + type: session.started + - item: + content_types: + - text + kind: message + role: assistant + status: in_progress + seq: 3 + type: item.started + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 4 + type: item.delta + - item: + content_types: + - text + kind: message + role: assistant + status: completed + seq: 5 + type: item.completed +session_b: + - metadata: true + seq: 1 + session: started + type: session.started + - metadata: true + seq: 2 + session: started + type: session.started + - item: + content_types: + - text + kind: message + role: assistant + status: in_progress + seq: 3 + type: item.started + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 4 + type: item.delta + - item: + content_types: + - text + kind: message + role: assistant + status: completed + seq: 5 + type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap similarity index 80% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap index 19d0fb3..7ad3222 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap @@ -1,6 +1,6 @@ --- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 742 +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 811 expression: normalized --- - metadata: true @@ -22,16 +22,16 @@ expression: normalized role: assistant status: in_progress seq: 3 - source: daemon - synthetic: true + source: agent + synthetic: false type: item.started - delta: delta: "" item_id: "" native_item_id: "" seq: 4 - source: daemon - synthetic: true + source: agent + synthetic: false type: item.delta - item: content_types: diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap new file mode 100644 index 0000000..1686c1e --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap @@ -0,0 +1,35 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 804 +expression: normalized +--- +- metadata: true + seq: 1 + session: started + type: session.started +- metadata: true + seq: 2 + session: started + type: session.started +- item: + content_types: + - text + kind: message + role: assistant + status: in_progress + seq: 3 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 4 + type: item.delta +- item: + content_types: + - text + kind: message + role: assistant + status: completed + seq: 5 + type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap similarity index 80% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap index 4c732b3..48235e5 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap @@ -1,6 +1,6 @@ --- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 775 +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 848 expression: normalized --- - metadata: true @@ -22,16 +22,16 @@ expression: normalized role: assistant status: in_progress seq: 3 - source: daemon - synthetic: true + source: agent + synthetic: false type: item.started - delta: delta: "" item_id: "" native_item_id: "" seq: 4 - source: daemon - synthetic: true + source: agent + synthetic: false type: item.delta - item: content_types: diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap similarity index 73% rename from server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap index 99d8675..dc82798 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap @@ -1,5 +1,6 @@ --- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 848 expression: normalized --- - metadata: true @@ -62,30 +63,11 @@ expression: normalized synthetic: false type: item.started - item: - content_types: - - status - kind: status - role: system + content_types: [] + kind: message + role: assistant status: completed seq: 8 source: agent synthetic: false type: item.completed -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 9 - source: agent - synthetic: false - type: item.delta -- item: - content_types: - - reasoning - kind: message - role: assistant - status: completed - seq: 10 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap new file mode 100644 index 0000000..add0b00 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap @@ -0,0 +1,35 @@ +--- +source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +assertion_line: 841 +expression: normalized +--- +- metadata: true + seq: 1 + session: started + type: session.started +- metadata: true + seq: 2 + session: started + type: session.started +- item: + content_types: + - text + kind: message + role: assistant + status: in_progress + seq: 3 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 4 + type: item.delta +- item: + content_types: + - text + kind: message + role: assistant + status: completed + seq: 5 + type: item.completed diff --git a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs index a4a376a..b0ce134 100644 --- a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs +++ b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs @@ -1,1395 +1 @@ -use std::collections::BTreeMap; -use std::time::{Duration, Instant}; - -use axum::body::{Body, Bytes}; -use axum::http::{header, HeaderMap, HeaderValue, Method, Request, StatusCode}; -use axum::Router; -use futures::StreamExt; -use http_body_util::BodyExt; -use serde_json::{json, Map, Value}; -use tempfile::TempDir; - -use sandbox_agent_agent_management::agents::{AgentId, AgentManager}; -use sandbox_agent_agent_management::testing::{test_agents_from_env, TestAgentConfig}; -use sandbox_agent_agent_credentials::ExtractedCredentials; -use sandbox_agent::router::{build_router, AppState, AuthConfig}; -use tower::util::ServiceExt; -use tower_http::cors::CorsLayer; - -const PROMPT: &str = "Reply with exactly the single word OK."; -const PERMISSION_PROMPT: &str = "List files in the current directory using available tools."; -const QUESTION_PROMPT: &str = - "Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself."; - -struct TestApp { - app: Router, - _install_dir: TempDir, -} - -impl TestApp { - fn new() -> Self { - Self::new_with_auth(AuthConfig::disabled()) - } - - fn new_with_auth(auth: AuthConfig) -> Self { - Self::new_with_auth_and_cors(auth, None) - } - - fn new_with_auth_and_cors(auth: AuthConfig, cors: Option) -> Self { - let install_dir = tempfile::tempdir().expect("create temp install dir"); - let manager = AgentManager::new(install_dir.path()) - .expect("create agent manager"); - let state = AppState::new(auth, manager); - let mut app = build_router(state); - if let Some(cors) = cors { - app = app.layer(cors); - } - Self { - app, - _install_dir: install_dir, - } - } -} - -struct EnvGuard { - saved: BTreeMap>, -} - -impl Drop for EnvGuard { - fn drop(&mut self) { - for (key, value) in &self.saved { - match value { - Some(value) => std::env::set_var(key, value), - None => std::env::remove_var(key), - } - } - } -} - -fn apply_credentials(creds: &ExtractedCredentials) -> EnvGuard { - let keys = ["ANTHROPIC_API_KEY", "CLAUDE_API_KEY", "OPENAI_API_KEY", "CODEX_API_KEY"]; - let mut saved = BTreeMap::new(); - for key in keys { - saved.insert(key.to_string(), std::env::var(key).ok()); - } - - match creds.anthropic.as_ref() { - Some(cred) => { - std::env::set_var("ANTHROPIC_API_KEY", &cred.api_key); - std::env::set_var("CLAUDE_API_KEY", &cred.api_key); - } - None => { - std::env::remove_var("ANTHROPIC_API_KEY"); - std::env::remove_var("CLAUDE_API_KEY"); - } - } - - match creds.openai.as_ref() { - Some(cred) => { - std::env::set_var("OPENAI_API_KEY", &cred.api_key); - std::env::set_var("CODEX_API_KEY", &cred.api_key); - } - None => { - std::env::remove_var("OPENAI_API_KEY"); - std::env::remove_var("CODEX_API_KEY"); - } - } - - EnvGuard { saved } -} - -async fn send_json(app: &Router, method: Method, path: &str, body: Option) -> (StatusCode, Value) { - let mut builder = Request::builder().method(method).uri(path); - let body = if let Some(body) = body { - builder = builder.header("content-type", "application/json"); - Body::from(body.to_string()) - } else { - Body::empty() - }; - let request = builder.body(body).expect("request"); - let response = app - .clone() - .oneshot(request) - .await - .expect("request handled"); - let status = response.status(); - let bytes = response - .into_body() - .collect() - .await - .expect("read body") - .to_bytes(); - let value = if bytes.is_empty() { - Value::Null - } else { - serde_json::from_slice(&bytes).unwrap_or(Value::String(String::from_utf8_lossy(&bytes).to_string())) - }; - (status, value) -} - -async fn send_request(app: &Router, request: Request) -> (StatusCode, HeaderMap, Bytes) { - let response = app - .clone() - .oneshot(request) - .await - .expect("request handled"); - let status = response.status(); - let headers = response.headers().clone(); - let bytes = response - .into_body() - .collect() - .await - .expect("read body") - .to_bytes(); - (status, headers, bytes) -} - -async fn send_json_request( - app: &Router, - request: Request, -) -> (StatusCode, HeaderMap, Value) { - let (status, headers, bytes) = send_request(app, request).await; - let value = if bytes.is_empty() { - Value::Null - } else { - serde_json::from_slice(&bytes) - .unwrap_or(Value::String(String::from_utf8_lossy(&bytes).to_string())) - }; - (status, headers, value) -} - -async fn send_status(app: &Router, method: Method, path: &str, body: Option) -> StatusCode { - let (status, _) = send_json(app, method, path, body).await; - status -} - -async fn install_agent(app: &Router, agent: AgentId) { - let status = send_status( - app, - Method::POST, - &format!("/v1/agents/{}/install", agent.as_str()), - Some(json!({})), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "install {agent}"); -} - -/// Returns the default permission mode for tests. OpenCode only supports "default", -/// while other agents support "bypass" which skips tool approval. -fn test_permission_mode(agent: AgentId) -> &'static str { - match agent { - AgentId::Opencode => "default", - _ => "bypass", - } -} - -async fn create_session(app: &Router, agent: AgentId, session_id: &str, permission_mode: &str) { - let status = send_status( - app, - Method::POST, - &format!("/v1/sessions/{session_id}"), - Some(json!({ - "agent": agent.as_str(), - "permissionMode": permission_mode - })), - ) - .await; - assert_eq!(status, StatusCode::OK, "create session {agent}"); -} - -async fn send_message(app: &Router, session_id: &str) { - let status = send_status( - app, - Method::POST, - &format!("/v1/sessions/{session_id}/messages"), - Some(json!({ "message": PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send message"); -} - -async fn poll_events_until( - app: &Router, - session_id: &str, - timeout: Duration, -) -> Vec { - let start = Instant::now(); - let mut offset = 0u64; - let mut events = Vec::new(); - while start.elapsed() < timeout { - let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); - let (status, payload) = send_json(app, Method::GET, &path, None).await; - assert_eq!(status, StatusCode::OK, "poll events"); - let new_events = payload - .get("events") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - if !new_events.is_empty() { - if let Some(last) = new_events - .last() - .and_then(|event| event.get("sequence")) - .and_then(Value::as_u64) - { - offset = last; - } - events.extend(new_events); - if should_stop(&events) { - break; - } - } - tokio::time::sleep(Duration::from_millis(800)).await; - } - events -} - -async fn read_sse_events( - app: &Router, - session_id: &str, - timeout: Duration, -) -> Vec { - let request = Request::builder() - .method(Method::GET) - .uri(format!("/v1/sessions/{session_id}/events/sse?offset=0")) - .body(Body::empty()) - .expect("sse request"); - let response = app - .clone() - .oneshot(request) - .await - .expect("sse response"); - assert_eq!(response.status(), StatusCode::OK, "sse status"); - - let mut stream = response.into_body().into_data_stream(); - let mut buffer = String::new(); - let mut events = Vec::new(); - let start = Instant::now(); - loop { - let remaining = match timeout.checked_sub(start.elapsed()) { - Some(remaining) if !remaining.is_zero() => remaining, - _ => break, - }; - let next = tokio::time::timeout(remaining, stream.next()).await; - let chunk: Bytes = match next { - Ok(Some(Ok(chunk))) => chunk, - Ok(Some(Err(_))) => break, - Ok(None) => break, - Err(_) => break, - }; - buffer.push_str(&String::from_utf8_lossy(&chunk)); - while let Some(idx) = buffer.find("\n\n") { - let block = buffer[..idx].to_string(); - buffer = buffer[idx + 2..].to_string(); - if let Some(event) = parse_sse_block(&block) { - events.push(event); - } - } - if should_stop(&events) { - break; - } - } - events -} - -async fn read_turn_stream_events( - app: &Router, - session_id: &str, - timeout: Duration, -) -> Vec { - let request = Request::builder() - .method(Method::POST) - .uri(format!("/v1/sessions/{session_id}/messages/stream")) - .header("content-type", "application/json") - .body(Body::from(json!({ "message": PROMPT }).to_string())) - .expect("turn stream request"); - let response = app - .clone() - .oneshot(request) - .await - .expect("turn stream response"); - assert_eq!(response.status(), StatusCode::OK, "turn stream status"); - - let mut stream = response.into_body().into_data_stream(); - let mut buffer = String::new(); - let mut events = Vec::new(); - let start = Instant::now(); - let mut ended = false; - loop { - let remaining = match timeout.checked_sub(start.elapsed()) { - Some(remaining) if !remaining.is_zero() => remaining, - _ => break, - }; - let next = tokio::time::timeout(remaining, stream.next()).await; - let chunk: Bytes = match next { - Ok(Some(Ok(chunk))) => chunk, - Ok(Some(Err(_))) => break, - Ok(None) => { - ended = true; - break; - } - Err(_) => break, - }; - buffer.push_str(&String::from_utf8_lossy(&chunk)); - while let Some(idx) = buffer.find("\n\n") { - let block = buffer[..idx].to_string(); - buffer = buffer[idx + 2..].to_string(); - if let Some(event) = parse_sse_block(&block) { - events.push(event); - } - } - } - assert!(ended, "turn stream did not close before timeout"); - events -} - -fn parse_sse_block(block: &str) -> Option { - let mut data_lines = Vec::new(); - for line in block.lines() { - if let Some(rest) = line.strip_prefix("data:") { - data_lines.push(rest.trim_start()); - } - } - if data_lines.is_empty() { - return None; - } - let data = data_lines.join("\n"); - serde_json::from_str(&data).ok() -} - -fn should_stop(events: &[Value]) -> bool { - events.iter().any(|event| is_assistant_message(event) || is_error_event(event)) -} - -fn is_assistant_message(event: &Value) -> bool { - event - .get("type") - .and_then(Value::as_str) - .map(|event_type| event_type == "item.completed") - .unwrap_or(false) - && event - .get("data") - .and_then(|data| data.get("item")) - .and_then(|item| item.get("role")) - .and_then(Value::as_str) - .map(|role| role == "assistant") - .unwrap_or(false) -} - -fn is_error_event(event: &Value) -> bool { - matches!( - event.get("type").and_then(Value::as_str), - Some("error") | Some("agent.unparsed") - ) -} - -fn is_unparsed_event(event: &Value) -> bool { - event - .get("type") - .and_then(Value::as_str) - .map(|value| value == "agent.unparsed") - .unwrap_or(false) -} - -fn is_permission_event(event: &Value) -> bool { - event - .get("type") - .and_then(Value::as_str) - .map(|value| value == "permission.requested") - .unwrap_or(false) -} - -fn is_question_event(event: &Value) -> bool { - event - .get("type") - .and_then(Value::as_str) - .map(|value| value == "question.requested") - .unwrap_or(false) -} - -fn truncate_permission_events(events: &[Value]) -> Vec { - if let Some(idx) = events.iter().position(is_permission_event) { - return events[..=idx].to_vec(); - } - if let Some(idx) = events.iter().position(is_assistant_message) { - return events[..=idx].to_vec(); - } - events.to_vec() -} - -fn truncate_question_events(events: &[Value]) -> Vec { - if let Some(idx) = events.iter().position(is_question_event) { - return events[..=idx].to_vec(); - } - if let Some(idx) = events.iter().position(is_assistant_message) { - return events[..=idx].to_vec(); - } - events.to_vec() -} - -fn normalize_events(events: &[Value]) -> Value { - assert!( - !events.iter().any(is_unparsed_event), - "agent.unparsed event encountered" - ); - let normalized = events - .iter() - .enumerate() - .map(|(idx, event)| normalize_event(event, idx + 1)) - .collect::>(); - Value::Array(normalized) -} - -fn truncate_after_first_stop(events: &[Value]) -> Vec { - if let Some(idx) = events - .iter() - .position(|event| is_assistant_message(event) || is_error_event(event)) - { - return events[..=idx].to_vec(); - } - events.to_vec() -} - -fn normalize_event(event: &Value, seq: usize) -> Value { - let mut map = Map::new(); - map.insert("seq".to_string(), Value::Number(seq.into())); - if let Some(event_type) = event.get("type").and_then(Value::as_str) { - map.insert("type".to_string(), Value::String(event_type.to_string())); - } - if let Some(source) = event.get("source").and_then(Value::as_str) { - map.insert("source".to_string(), Value::String(source.to_string())); - } - if let Some(synthetic) = event.get("synthetic").and_then(Value::as_bool) { - map.insert("synthetic".to_string(), Value::Bool(synthetic)); - } - - let data = event.get("data").unwrap_or(&Value::Null); - match event.get("type").and_then(Value::as_str).unwrap_or("") { - "session.started" => { - map.insert("session".to_string(), Value::String("started".to_string())); - if data.get("metadata").is_some() { - map.insert("metadata".to_string(), Value::Bool(true)); - } - } - "session.ended" => { - map.insert("session".to_string(), Value::String("ended".to_string())); - map.insert("ended".to_string(), normalize_session_end(data)); - } - "item.started" | "item.completed" => { - if let Some(item) = data.get("item") { - map.insert("item".to_string(), normalize_item(item)); - } - } - "item.delta" => { - let mut delta = Map::new(); - if data.get("item_id").is_some() { - delta.insert("item_id".to_string(), Value::String("".to_string())); - } - if data.get("native_item_id").is_some() { - delta.insert("native_item_id".to_string(), Value::String("".to_string())); - } - if data.get("delta").is_some() { - delta.insert("delta".to_string(), Value::String("".to_string())); - } - map.insert("delta".to_string(), Value::Object(delta)); - } - "permission.requested" | "permission.resolved" => { - map.insert("permission".to_string(), normalize_permission(data)); - } - "question.requested" | "question.resolved" => { - map.insert("question".to_string(), normalize_question(data)); - } - "error" => { - map.insert("error".to_string(), normalize_error(data)); - } - "agent.unparsed" => { - map.insert("unparsed".to_string(), Value::Bool(true)); - } - _ => {} - } - Value::Object(map) -} - -fn normalize_item(item: &Value) -> Value { - let mut map = Map::new(); - if let Some(kind) = item.get("kind").and_then(Value::as_str) { - map.insert("kind".to_string(), Value::String(kind.to_string())); - } - if let Some(role) = item.get("role").and_then(Value::as_str) { - map.insert("role".to_string(), Value::String(role.to_string())); - } - if let Some(status) = item.get("status").and_then(Value::as_str) { - map.insert("status".to_string(), Value::String(status.to_string())); - } - if let Some(content) = item.get("content").and_then(Value::as_array) { - let types = content - .iter() - .filter_map(|part| part.get("type").and_then(Value::as_str)) - .map(|value| Value::String(value.to_string())) - .collect::>(); - map.insert("content_types".to_string(), Value::Array(types)); - } - Value::Object(map) -} - -fn normalize_session_end(data: &Value) -> Value { - let mut map = Map::new(); - if let Some(reason) = data.get("reason").and_then(Value::as_str) { - map.insert("reason".to_string(), Value::String(reason.to_string())); - } - if let Some(terminated_by) = data.get("terminated_by").and_then(Value::as_str) { - map.insert("terminated_by".to_string(), Value::String(terminated_by.to_string())); - } - Value::Object(map) -} - -fn normalize_error(error: &Value) -> Value { - let mut map = Map::new(); - if let Some(code) = error.get("code").and_then(Value::as_str) { - map.insert("code".to_string(), Value::String(code.to_string())); - } - if let Some(message) = error.get("message").and_then(Value::as_str) { - map.insert("message".to_string(), Value::String(message.to_string())); - } - Value::Object(map) -} - -fn normalize_question(question: &Value) -> Value { - let mut map = Map::new(); - if question.get("question_id").is_some() { - map.insert("id".to_string(), Value::String("".to_string())); - } - if let Some(options) = question.get("options").and_then(Value::as_array) { - map.insert("options".to_string(), Value::Number(options.len().into())); - } - if let Some(status) = question.get("status").and_then(Value::as_str) { - map.insert("status".to_string(), Value::String(status.to_string())); - } - Value::Object(map) -} - -fn normalize_permission(permission: &Value) -> Value { - let mut map = Map::new(); - if permission.get("permission_id").is_some() { - map.insert("id".to_string(), Value::String("".to_string())); - } - if let Some(value) = permission.get("action").and_then(Value::as_str) { - map.insert("action".to_string(), Value::String(value.to_string())); - } - if let Some(status) = permission.get("status").and_then(Value::as_str) { - map.insert("status".to_string(), Value::String(status.to_string())); - } - Value::Object(map) -} - -fn normalize_agent_list(value: &Value) -> Value { - let agents = value - .get("agents") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - let mut normalized = Vec::new(); - for agent in agents { - let mut map = Map::new(); - if let Some(id) = agent.get("id").and_then(Value::as_str) { - map.insert("id".to_string(), Value::String(id.to_string())); - } - // Skip installed/version/path fields - they depend on local environment - // and make snapshots non-deterministic - normalized.push(Value::Object(map)); - } - normalized.sort_by(|a, b| { - a.get("id") - .and_then(Value::as_str) - .cmp(&b.get("id").and_then(Value::as_str)) - }); - json!({ "agents": normalized }) -} - -fn normalize_agent_modes(value: &Value) -> Value { - let modes = value - .get("modes") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - let mut normalized = Vec::new(); - for mode in modes { - let mut map = Map::new(); - if let Some(id) = mode.get("id").and_then(Value::as_str) { - map.insert("id".to_string(), Value::String(id.to_string())); - } - if let Some(name) = mode.get("name").and_then(Value::as_str) { - map.insert("name".to_string(), Value::String(name.to_string())); - } - if mode.get("description").is_some() { - map.insert("description".to_string(), Value::Bool(true)); - } - normalized.push(Value::Object(map)); - } - normalized.sort_by(|a, b| { - a.get("id") - .and_then(Value::as_str) - .cmp(&b.get("id").and_then(Value::as_str)) - }); - json!({ "modes": normalized }) -} - -fn normalize_sessions(value: &Value) -> Value { - let sessions = value - .get("sessions") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - // For the global sessions list snapshot, we just verify the count and structure - // since the specific agents/sessions vary based on test configuration - json!({ - "sessionCount": sessions.len(), - "hasExpectedFields": sessions.iter().all(|s| { - s.get("sessionId").is_some() - && s.get("agent").is_some() - && s.get("agentMode").is_some() - && s.get("permissionMode").is_some() - && s.get("ended").is_some() - }) - }) -} - -fn normalize_create_session(value: &Value) -> Value { - let mut map = Map::new(); - if let Some(healthy) = value.get("healthy").and_then(Value::as_bool) { - map.insert("healthy".to_string(), Value::Bool(healthy)); - } - if value.get("nativeSessionId").is_some() { - map.insert("nativeSessionId".to_string(), Value::String("".to_string())); - } - if let Some(error) = value.get("error") { - map.insert("error".to_string(), error.clone()); - } - Value::Object(map) -} - -fn normalize_health(value: &Value) -> Value { - let mut map = Map::new(); - if let Some(status) = value.get("status").and_then(Value::as_str) { - map.insert("status".to_string(), Value::String(status.to_string())); - } - Value::Object(map) -} - -fn snapshot_status(status: StatusCode) -> Value { - json!({ "status": status.as_u16() }) -} - -fn snapshot_cors(status: StatusCode, headers: &HeaderMap) -> Value { - let mut map = Map::new(); - map.insert("status".to_string(), Value::Number(status.as_u16().into())); - for name in [ - header::ACCESS_CONTROL_ALLOW_ORIGIN, - header::ACCESS_CONTROL_ALLOW_METHODS, - header::ACCESS_CONTROL_ALLOW_HEADERS, - header::ACCESS_CONTROL_ALLOW_CREDENTIALS, - header::VARY, - ] { - if let Some(value) = headers.get(&name) { - map.insert( - name.as_str().to_string(), - Value::String(value.to_str().unwrap_or("").to_string()), - ); - } - } - Value::Object(map) -} - -fn snapshot_name(prefix: &str, agent: Option) -> String { - match agent { - Some(agent) => format!("{prefix}_{}", agent.as_str()), - None => format!("{prefix}_global"), - } -} - - -async fn poll_events_until_match( - app: &Router, - session_id: &str, - timeout: Duration, - stop: F, -) -> Vec -where - F: Fn(&[Value]) -> bool, -{ - let start = Instant::now(); - let mut offset = 0u64; - let mut events = Vec::new(); - while start.elapsed() < timeout { - let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); - let (status, payload) = send_json(app, Method::GET, &path, None).await; - assert_eq!(status, StatusCode::OK, "poll events"); - let new_events = payload - .get("events") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - if !new_events.is_empty() { - if let Some(last) = new_events - .last() - .and_then(|event| event.get("sequence")) - .and_then(Value::as_u64) - { - offset = last; - } - events.extend(new_events); - if stop(&events) { - break; - } - } - tokio::time::sleep(Duration::from_millis(800)).await; - } - events -} - -fn find_permission_id(events: &[Value]) -> Option { - events - .iter() - .find_map(|event| { - event - .get("type") - .and_then(Value::as_str) - .filter(|value| *value == "permission.requested") - .and_then(|_| event.get("data")) - .and_then(|data| data.get("permission_id")) - .and_then(Value::as_str) - .map(|id| id.to_string()) - }) -} - -fn find_question_id_and_answers(events: &[Value]) -> Option<(String, Vec>)> { - let question = events.iter().find_map(|event| { - let event_type = event.get("type").and_then(Value::as_str)?; - if event_type != "question.requested" { - return None; - } - event.get("data").cloned() - })?; - let id = question.get("question_id").and_then(Value::as_str)?.to_string(); - let options = question - .get("options") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - let mut answers = Vec::new(); - if let Some(option) = options.first().and_then(Value::as_str) { - answers.push(vec![option.to_string()]); - } else { - answers.push(Vec::new()); - } - Some((id, answers)) -} - -async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) { - let _guard = apply_credentials(&config.credentials); - install_agent(app, config.agent).await; - - let session_id = format!("session-{}", config.agent.as_str()); - create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; - send_message(app, &session_id).await; - - let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await; - let events = truncate_after_first_stop(&events); - assert!( - !events.is_empty(), - "no events collected for {}", - config.agent - ); - assert!( - should_stop(&events), - "timed out waiting for assistant/error event for {}", - config.agent - ); - let normalized = normalize_events(&events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("http_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalized); - }); -} - -async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) { - let _guard = apply_credentials(&config.credentials); - install_agent(app, config.agent).await; - - let session_id = format!("sse-{}", config.agent.as_str()); - create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; - - let sse_task = { - let app = app.clone(); - let session_id = session_id.clone(); - tokio::spawn(async move { - read_sse_events(&app, &session_id, Duration::from_secs(120)).await - }) - }; - - send_message(app, &session_id).await; - - let events = sse_task.await.expect("sse task"); - let events = truncate_after_first_stop(&events); - assert!( - !events.is_empty(), - "no sse events collected for {}", - config.agent - ); - assert!( - should_stop(&events), - "timed out waiting for assistant/error event for {}", - config.agent - ); - let normalized = normalize_events(&events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("sse_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalized); - }); -} - -async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) { - let _guard = apply_credentials(&config.credentials); - install_agent(app, config.agent).await; - - let session_id = format!("turn-{}", config.agent.as_str()); - create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; - - let events = read_turn_stream_events(app, &session_id, Duration::from_secs(120)).await; - let events = truncate_after_first_stop(&events); - assert!( - !events.is_empty(), - "no turn stream events collected for {}", - config.agent - ); - assert!( - should_stop(&events), - "timed out waiting for assistant/error event for {}", - config.agent - ); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn auth_snapshots() { - let token = "test-token"; - let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string())); - - let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await; - assert_eq!(status, StatusCode::OK, "health should be public"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("auth_health_public", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": normalize_health(&payload), - })); - }); - - let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await; - assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("auth_missing_token", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - - let request = Request::builder() - .method(Method::GET) - .uri("/v1/agents") - .header(header::AUTHORIZATION, "Bearer wrong-token") - .body(Body::empty()) - .expect("auth invalid request"); - let (status, _headers, payload) = send_json_request(&app.app, request).await; - assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("auth_invalid_token", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - - let request = Request::builder() - .method(Method::GET) - .uri("/v1/agents") - .header(header::AUTHORIZATION, format!("Bearer {token}")) - .body(Body::empty()) - .expect("auth valid request"); - let (status, _headers, payload) = send_json_request(&app.app, request).await; - assert_eq!(status, StatusCode::OK, "valid token should allow request"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("auth_valid_token", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": normalize_agent_list(&payload), - })); - }); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn cors_snapshots() { - let cors = CorsLayer::new() - .allow_origin(vec![HeaderValue::from_static("http://example.com")]) - .allow_methods([Method::GET, Method::POST]) - .allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION]) - .allow_credentials(true); - let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors)); - - let preflight = Request::builder() - .method(Method::OPTIONS) - .uri("/v1/health") - .header(header::ORIGIN, "http://example.com") - .header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET") - .header( - header::ACCESS_CONTROL_REQUEST_HEADERS, - "authorization,content-type", - ) - .body(Body::empty()) - .expect("cors preflight request"); - let (status, headers, _payload) = send_request(&app.app, preflight).await; - insta::with_settings!({ - snapshot_suffix => snapshot_name("cors_preflight", None), - }, { - insta::assert_yaml_snapshot!(snapshot_cors(status, &headers)); - }); - - let actual = Request::builder() - .method(Method::GET) - .uri("/v1/health") - .header(header::ORIGIN, "http://example.com") - .body(Body::empty()) - .expect("cors actual request"); - let (status, headers, payload) = send_json_request(&app.app, actual).await; - assert_eq!(status, StatusCode::OK, "cors actual request should succeed"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("cors_actual", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "cors": snapshot_cors(status, &headers), - "payload": normalize_health(&payload), - })); - }); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn api_endpoints_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - - let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await; - assert_eq!(status, StatusCode::OK, "health status"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("health", None), - }, { - insta::assert_yaml_snapshot!(normalize_health(&health)); - }); - - // List agents (just verify the API returns correct agent IDs, not install state) - let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await; - assert_eq!(status, StatusCode::OK, "agents list"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("agents_list", None), - }, { - insta::assert_yaml_snapshot!(normalize_agent_list(&agents)); - }); - - // Install agents (ensure they're available for subsequent tests) - for config in &configs { - let _guard = apply_credentials(&config.credentials); - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/agents/{}/install", config.agent.as_str()), - Some(json!({})), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "install agent"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("agent_install", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } - - let mut session_ids = Vec::new(); - for config in &configs { - let _guard = apply_credentials(&config.credentials); - let (status, modes) = send_json( - &app.app, - Method::GET, - &format!("/v1/agents/{}/modes", config.agent.as_str()), - None, - ) - .await; - assert_eq!(status, StatusCode::OK, "agent modes"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_agent_modes(&modes)); - }); - - let session_id = format!("snapshot-{}", config.agent.as_str()); - let permission_mode = test_permission_mode(config.agent); - let (status, created) = send_json( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_id}"), - Some(json!({ - "agent": config.agent.as_str(), - "permissionMode": permission_mode - })), - ) - .await; - assert_eq!(status, StatusCode::OK, "create session"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("create_session", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_create_session(&created)); - }); - session_ids.push((config.agent, session_id)); - } - - let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await; - assert_eq!(status, StatusCode::OK, "list sessions"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("sessions_list", None), - }, { - insta::assert_yaml_snapshot!(normalize_sessions(&sessions)); - }); - - for (agent, session_id) in &session_ids { - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_id}/messages"), - Some(json!({ "message": PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send message"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("send_message", Some(*agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn approval_flow_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - - for config in &configs { - // OpenCode doesn't support "plan" permission mode required for approval flows - if config.agent == AgentId::Opencode { - continue; - } - - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let permission_session = format!("perm-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &permission_session, "plan").await; - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{permission_session}/messages"), - Some(json!({ "message": PERMISSION_PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt"); - - let permission_events = poll_events_until_match( - &app.app, - &permission_session, - Duration::from_secs(120), - |events| find_permission_id(events).is_some() || should_stop(events), - ) - .await; - let permission_events = truncate_permission_events(&permission_events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("permission_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_events(&permission_events)); - }); - - if let Some(permission_id) = find_permission_id(&permission_events) { - let status = send_status( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{permission_session}/permissions/{permission_id}/reply" - ), - Some(json!({ "reply": "once" })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "reply permission"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } else { - let (status, payload) = send_json( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{permission_session}/permissions/missing-permission/reply" - ), - Some(json!({ "reply": "once" })), - ) - .await; - assert!(!status.is_success(), "missing permission id should error"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - } - - let question_reply_session = format!("question-reply-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &question_reply_session, "plan").await; - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{question_reply_session}/messages"), - Some(json!({ "message": QUESTION_PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt"); - - let question_events = poll_events_until_match( - &app.app, - &question_reply_session, - Duration::from_secs(120), - |events| find_question_id_and_answers(events).is_some() || should_stop(events), - ) - .await; - let question_events = truncate_question_events(&question_events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_events(&question_events)); - }); - - if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) { - let status = send_status( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reply_session}/questions/{question_id}/reply" - ), - Some(json!({ "answers": answers })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "reply question"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reply", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } else { - let (status, payload) = send_json( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reply_session}/questions/missing-question/reply" - ), - Some(json!({ "answers": [] })), - ) - .await; - assert!(!status.is_success(), "missing question id should error"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - } - - let question_reject_session = format!("question-reject-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &question_reject_session, "plan").await; - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{question_reject_session}/messages"), - Some(json!({ "message": QUESTION_PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject"); - - let reject_events = poll_events_until_match( - &app.app, - &question_reject_session, - Duration::from_secs(120), - |events| find_question_id_and_answers(events).is_some() || should_stop(events), - ) - .await; - let reject_events = truncate_question_events(&reject_events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_events(&reject_events)); - }); - - if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) { - let status = send_status( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reject_session}/questions/{question_id}/reject" - ), - None, - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "reject question"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reject", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } else { - let (status, payload) = send_json( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reject_session}/questions/missing-question/reject" - ), - None, - ) - .await; - assert!(!status.is_success(), "missing question id reject should error"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - } - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn http_events_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - for config in &configs { - // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. - // See: https://github.com/opencode-ai/opencode/issues/XXX - if config.agent == AgentId::Opencode { - continue; - } - run_http_events_snapshot(&app.app, config).await; - } -} - -async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) { - let _guard = apply_credentials(&config.credentials); - install_agent(app, config.agent).await; - - let session_a = format!("concurrent-a-{}", config.agent.as_str()); - let session_b = format!("concurrent-b-{}", config.agent.as_str()); - let perm_mode = test_permission_mode(config.agent); - create_session(app, config.agent, &session_a, perm_mode).await; - create_session(app, config.agent, &session_b, perm_mode).await; - - let app_a = app.clone(); - let app_b = app.clone(); - let send_a = send_message(&app_a, &session_a); - let send_b = send_message(&app_b, &session_b); - tokio::join!(send_a, send_b); - - let app_a = app.clone(); - let app_b = app.clone(); - let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120)); - let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120)); - let (events_a, events_b) = tokio::join!(poll_a, poll_b); - let events_a = truncate_after_first_stop(&events_a); - let events_b = truncate_after_first_stop(&events_b); - - assert!( - !events_a.is_empty(), - "no events collected for concurrent session a {}", - config.agent - ); - assert!( - !events_b.is_empty(), - "no events collected for concurrent session b {}", - config.agent - ); - assert!( - should_stop(&events_a), - "timed out waiting for assistant/error event for concurrent session a {}", - config.agent - ); - assert!( - should_stop(&events_b), - "timed out waiting for assistant/error event for concurrent session b {}", - config.agent - ); - - let snapshot = json!({ - "session_a": normalize_events(&events_a), - "session_b": normalize_events(&events_b), - }); - insta::with_settings!({ - snapshot_suffix => snapshot_name("concurrency_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot); - }); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn sse_events_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - for config in &configs { - // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. - // See: https://github.com/opencode-ai/opencode/issues/XXX - if config.agent == AgentId::Opencode { - continue; - } - run_sse_events_snapshot(&app.app, config).await; - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn turn_stream_route() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - for config in &configs { - // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. - // See: https://github.com/opencode-ai/opencode/issues/XXX - if config.agent == AgentId::Opencode { - continue; - } - run_turn_stream_check(&app.app, config).await; - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn concurrency_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - for config in &configs { - // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. - // See: https://github.com/opencode-ai/opencode/issues/XXX - if config.agent == AgentId::Opencode { - continue; - } - run_concurrency_snapshot(&app.app, config).await; - } -} +include!("http/http_sse_snapshots.rs"); diff --git a/server/packages/sandbox-agent/tests/server-manager/agent_server_manager.rs b/server/packages/sandbox-agent/tests/server-manager/agent_server_manager.rs new file mode 100644 index 0000000..7ef46a8 --- /dev/null +++ b/server/packages/sandbox-agent/tests/server-manager/agent_server_manager.rs @@ -0,0 +1,136 @@ +use std::sync::Arc; + +use sandbox_agent::router::test_utils::{exit_status, spawn_sleep_process, TestHarness}; +use sandbox_agent_agent_management::agents::AgentId; +use sandbox_agent_universal_agent_schema::SessionEndReason; +use tokio::time::{timeout, Duration}; + +async fn wait_for_exit(child: &Arc>>) { + for _ in 0..20 { + let done = { + let mut guard = child.lock().expect("child lock"); + match guard.as_mut() { + Some(child) => child.try_wait().ok().flatten().is_some(), + None => true, + } + }; + if done { + return; + } + tokio::time::sleep(Duration::from_millis(50)).await; + } +} + +#[tokio::test] +async fn register_and_unregister_sessions() { + let harness = TestHarness::new().await; + harness + .register_session(AgentId::Codex, "sess-1", Some("thread-1")) + .await; + + assert!( + harness + .has_session_mapping(AgentId::Codex, "sess-1") + .await + ); + assert_eq!( + harness + .native_mapping(AgentId::Codex, "thread-1") + .await + .as_deref(), + Some("sess-1") + ); + + harness + .unregister_session(AgentId::Codex, "sess-1", Some("thread-1")) + .await; + + assert!( + !harness + .has_session_mapping(AgentId::Codex, "sess-1") + .await + ); + assert!( + harness + .native_mapping(AgentId::Codex, "thread-1") + .await + .is_none() + ); +} + +#[tokio::test] +async fn shutdown_marks_servers_stopped_and_kills_child() { + let harness = TestHarness::new().await; + let child = harness + .insert_stdio_server(AgentId::Codex, Some(spawn_sleep_process()), 0) + .await; + + harness.shutdown().await; + + assert!(matches!( + harness.server_status(AgentId::Codex).await, + Some(sandbox_agent::router::ServerStatus::Stopped) + )); + + wait_for_exit(&child).await; + let exited = { + let mut guard = child.lock().expect("child lock"); + guard + .as_mut() + .and_then(|child| child.try_wait().ok().flatten()) + .is_some() + }; + assert!(exited); +} + +#[tokio::test] +async fn handle_process_exit_marks_error_and_ends_sessions() { + let harness = TestHarness::new().await; + harness + .insert_session("sess-1", AgentId::Codex, Some("thread-1")) + .await; + harness + .register_session(AgentId::Codex, "sess-1", Some("thread-1")) + .await; + harness + .insert_stdio_server(AgentId::Codex, None, 1) + .await; + + harness + .handle_process_exit(AgentId::Codex, 1, exit_status(7)) + .await; + + assert!(matches!( + harness.server_status(AgentId::Codex).await, + Some(sandbox_agent::router::ServerStatus::Error) + )); + assert!( + harness + .server_last_error(AgentId::Codex) + .await + .unwrap_or_default() + .contains("exited") + ); + assert!(harness.session_ended("sess-1").await); + assert!(matches!( + harness.session_end_reason("sess-1").await, + Some(SessionEndReason::Error) + )); +} + +#[tokio::test] +async fn auto_restart_notifier_emits_signal() { + let harness = TestHarness::new().await; + let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel(); + harness.set_restart_notifier(tx).await; + harness.insert_http_server(AgentId::Mock, 2).await; + + harness + .handle_process_exit(AgentId::Mock, 2, exit_status(2)) + .await; + + let received = timeout(Duration::from_millis(200), rx.recv()) + .await + .expect("timeout"); + assert_eq!(received, Some(AgentId::Mock)); +} diff --git a/server/packages/sandbox-agent/tests/server-manager/mod.rs b/server/packages/sandbox-agent/tests/server-manager/mod.rs new file mode 100644 index 0000000..5dbab28 --- /dev/null +++ b/server/packages/sandbox-agent/tests/server-manager/mod.rs @@ -0,0 +1 @@ +mod agent_server_manager; diff --git a/server/packages/sandbox-agent/tests/server_manager.rs b/server/packages/sandbox-agent/tests/server_manager.rs new file mode 100644 index 0000000..eed6085 --- /dev/null +++ b/server/packages/sandbox-agent/tests/server_manager.rs @@ -0,0 +1,2 @@ +#[path = "server-manager/mod.rs"] +mod server_manager; diff --git a/server/packages/sandbox-agent/tests/ui.rs b/server/packages/sandbox-agent/tests/ui.rs new file mode 100644 index 0000000..f04f341 --- /dev/null +++ b/server/packages/sandbox-agent/tests/ui.rs @@ -0,0 +1,2 @@ +#[path = "ui/mod.rs"] +mod ui; diff --git a/server/packages/sandbox-agent/tests/inspector_ui.rs b/server/packages/sandbox-agent/tests/ui/inspector_ui.rs similarity index 100% rename from server/packages/sandbox-agent/tests/inspector_ui.rs rename to server/packages/sandbox-agent/tests/ui/inspector_ui.rs diff --git a/server/packages/sandbox-agent/tests/ui/mod.rs b/server/packages/sandbox-agent/tests/ui/mod.rs new file mode 100644 index 0000000..838b756 --- /dev/null +++ b/server/packages/sandbox-agent/tests/ui/mod.rs @@ -0,0 +1 @@ +mod inspector_ui; diff --git a/todo.md b/todo.md index 9f077af..6986730 100644 --- a/todo.md +++ b/todo.md @@ -7,3 +7,6 @@ - [x] Add inspector UI mode for turn stream and wire send flow. - [x] Refresh docs for new endpoint and UI mode. - [x] Add Docker/Vercel/Daytona/E2B examples with basic prompt scripts and tests. +- [x] Add unified AgentServerManager for shared agent servers (Codex/OpenCode). +- [x] Expose server status details in agent list API (uptime/restarts/last error/base URL). +- [ ] Regenerate TypeScript SDK from updated OpenAPI (blocked: Node/pnpm not available in env).