From b49776145b221190ba9dce14acbe869bfbf31eef Mon Sep 17 00:00:00 2001
From: Nathan Flurry <git@nathanflurry.com>
Date: Tue, 27 Jan 2026 19:29:54 -0800
Subject: [PATCH] fix: add docker-setup action, runtime Dockerfile, and align
 release workflow

- Add .github/actions/docker-setup composite action (from rivet)
- Add docker/runtime/Dockerfile for Docker image builds
- Update release.yaml to match rivet patterns:
  - Use corepack enable instead of pnpm/action-setup
  - Add reuse_engine_version input
  - Add Docker job with Depot runners
  - Use --no-frozen-lockfile for pnpm install
  - Add id-token permission for setup job
---
 .github/actions/docker-setup/action.yaml      |  31 +
 .github/workflows/release.yaml                | 122 ++-
 Cargo.toml                                    |   4 +-
 docker/runtime/Dockerfile                     |  51 ++
 docs/building-chat-ui.mdx                     |   1 +
 docs/openapi.json                             |   7 +-
 examples/daytona/daytona.ts                   |   2 +
 examples/docker/docker.ts                     |   2 +
 examples/e2b/e2b.ts                           |   2 +
 examples/shared/sandbox-agent-client.ts       |  21 +
 examples/vercel/vercel-sandbox.ts             |   2 +
 server/CLAUDE.md                              |  27 +-
 server/packages/sandbox-agent/src/router.rs   |   6 +
 .../http_sse_snapshots.rs => common/http.rs}  | 804 +++++-------------
 .../tests/http/agent_endpoints.rs             | 165 ++++
 ...oints_snapshots@agent_install_claude.snap} |   0
 ...points_snapshots@agent_install_codex.snap} |   0
 ...dpoints_snapshots@agent_install_mock.snap} |   0
 ...nts_snapshots@agent_install_opencode.snap} |   0
 ...dpoints_snapshots@agent_modes_claude.snap} |   0
 ...ndpoints_snapshots@agent_modes_codex.snap} |   0
 ...endpoints_snapshots@agent_modes_mock.snap} |   0
 ...oints_snapshots@agent_modes_opencode.snap} |   0
 ...dpoints_snapshots@agents_list_global.snap} |   0
 ...nt_endpoints_snapshots@health_global.snap} |   0
 ..._snapshots@auth_health_public_global.snap} |   0
 ..._snapshots@auth_invalid_token_global.snap} |   0
 ..._snapshots@auth_missing_token_global.snap} |   0
 ...th_snapshots@auth_valid_token_global.snap} |   0
 ...s__cors_snapshots@cors_actual_global.snap} |   0
 ...cors_snapshots@cors_preflight_global.snap} |   0
 ...oints_snapshots@create_session_claude.snap |   6 -
 ...points_snapshots@create_session_codex.snap |   6 -
 ...dpoints_snapshots@create_session_mock.snap |   7 -
 ...nts_snapshots@create_session_opencode.snap |   6 -
 ...dpoints_snapshots@send_message_claude.snap |   6 -
 ...ndpoints_snapshots@send_message_codex.snap |   6 -
 ...endpoints_snapshots@send_message_mock.snap |   6 -
 ...oints_snapshots@send_message_opencode.snap |   5 -
 ...points_snapshots@sessions_list_global.snap |   6 -
 ...ow_snapshots@permission_events_claude.snap |  17 -
 ...low_snapshots@permission_events_codex.snap | 131 ---
 ...flow_snapshots@permission_events_mock.snap |  35 -
 ...pshots@permission_reply_missing_codex.snap |  11 -
 ...apshots@permission_reply_missing_mock.snap |  11 -
 ...apshots@question_reject_events_claude.snap |  45 -
 ...napshots@question_reject_events_codex.snap | 331 -------
 ...snapshots@question_reject_events_mock.snap |  35 -
 ...pshots@question_reject_missing_claude.snap |  11 -
 ...apshots@question_reject_missing_codex.snap |  11 -
 ...napshots@question_reject_missing_mock.snap |  11 -
 ...napshots@question_reply_events_claude.snap |  45 -
 ...snapshots@question_reply_events_codex.snap | 315 -------
 ..._snapshots@question_reply_events_mock.snap |  35 -
 ...snapshots@question_reply_missing_mock.snap |  11 -
 ...ncy_snapshot@concurrency_events_codex.snap | 201 -----
 ...ency_snapshot@concurrency_events_mock.snap |  67 --
 ...ttp_events_snapshot@http_events_codex.snap | 171 ----
 ...sse_events_snapshot@sse_events_claude.snap |  45 -
 ..._sse_events_snapshot@sse_events_codex.snap |  73 --
 ...n_sse_events_snapshot@sse_events_mock.snap |  35 -
 .../sandbox-agent/tests/http_endpoints.rs     |   2 +
 .../sandbox-agent/tests/http_sse_snapshots.rs |   1 -
 .../packages/sandbox-agent/tests/sessions.rs  |   2 +
 .../sandbox-agent/tests/sessions/mod.rs       |   5 +
 .../tests/sessions/permissions.rs             |  88 ++
 .../sandbox-agent/tests/sessions/questions.rs | 145 ++++
 .../sandbox-agent/tests/sessions/reasoning.rs |  56 ++
 .../tests/sessions/session_lifecycle.rs       | 192 +++++
 ...ssion_snapshot@permission_events_mock.snap |  48 ++
 ...apshot@permission_reply_missing_mock.snap} |   5 +-
 ..._snapshot@question_reject_events_mock.snap |  48 ++
 ...napshot@question_reject_missing_mock.snap} |   5 +-
 ...n_snapshot@question_reply_events_mock.snap |  48 ++
 ...snapshot@question_reply_missing_mock.snap} |   5 +-
 ...ion_snapshot@concurrency_events_mock.snap} |  89 +-
 ..._session_snapshot@create_session_mock.snap |   6 +
 ...rt_session_snapshot@send_message_mock.snap |   5 +
 ...t_session_snapshot@sessions_list_mock.snap |   6 +
 ...ttp_events_snapshot@http_events_mock.snap} |  45 +-
 ..._sse_events_snapshot@sse_events_mock.snap} |  35 +-
 .../sandbox-agent/tests/sessions/status.rs    |  61 ++
 82 files changed, 1415 insertions(+), 2430 deletions(-)
 create mode 100644 .github/actions/docker-setup/action.yaml
 create mode 100644 docker/runtime/Dockerfile
 rename server/packages/sandbox-agent/tests/{http/http_sse_snapshots.rs => common/http.rs} (53%)
 create mode 100644 server/packages/sandbox-agent/tests/http/agent_endpoints.rs
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap => agent_endpoints__agent_endpoints_snapshots@agent_install_claude.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap => agent_endpoints__agent_endpoints_snapshots@agent_install_codex.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap => agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap => agent_endpoints__agent_endpoints_snapshots@agent_install_opencode.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap => agent_endpoints__agent_endpoints_snapshots@agent_modes_claude.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap => agent_endpoints__agent_endpoints_snapshots@agent_modes_codex.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap => agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap => agent_endpoints__agent_endpoints_snapshots@agent_modes_opencode.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap => agent_endpoints__agent_endpoints_snapshots@agents_list_global.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@health_global.snap => agent_endpoints__agent_endpoints_snapshots@health_global.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__auth_snapshots@auth_health_public_global.snap => agent_endpoints__auth_snapshots@auth_health_public_global.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap => agent_endpoints__auth_snapshots@auth_invalid_token_global.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap => agent_endpoints__auth_snapshots@auth_missing_token_global.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap => agent_endpoints__auth_snapshots@auth_valid_token_global.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__cors_snapshots@cors_actual_global.snap => agent_endpoints__cors_snapshots@cors_actual_global.snap} (100%)
 rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__cors_snapshots@cors_preflight_global.snap => agent_endpoints__cors_snapshots@cors_preflight_global.snap} (100%)
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap
 delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap
 create mode 100644 server/packages/sandbox-agent/tests/http_endpoints.rs
 delete mode 100644 server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 create mode 100644 server/packages/sandbox-agent/tests/sessions.rs
 create mode 100644 server/packages/sandbox-agent/tests/sessions/mod.rs
 create mode 100644 server/packages/sandbox-agent/tests/sessions/permissions.rs
 create mode 100644 server/packages/sandbox-agent/tests/sessions/questions.rs
 create mode 100644 server/packages/sandbox-agent/tests/sessions/reasoning.rs
 create mode 100644 server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
 create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap
 rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap => sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_missing_mock.snap} (53%)
 create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap
 rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap => sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_missing_mock.snap} (53%)
 create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap
 rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap => sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_missing_mock.snap} (53%)
 rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap => sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap} (51%)
 create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@create_session_mock.snap
 create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@send_message_mock.snap
 create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@sessions_list_mock.snap
 rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap => sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap} (50%)
 rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap => sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap} (50%)
 create mode 100644 server/packages/sandbox-agent/tests/sessions/status.rs

diff --git a/.github/actions/docker-setup/action.yaml b/.github/actions/docker-setup/action.yaml
new file mode 100644
index 0000000..d07ec5a
--- /dev/null
+++ b/.github/actions/docker-setup/action.yaml
@@ -0,0 +1,31 @@
+name: 'Docker Setup'
+description: 'Set up Docker Buildx and log in to Docker Hub'
+inputs:
+  docker_username:
+    description: 'Docker Hub username'
+    required: true
+  docker_password:
+    description: 'Docker Hub password'
+    required: true
+  github_token:
+    description: 'GitHub token'
+    required: true
+runs:
+  using: 'composite'
+  steps:
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+
+    - name: Log in to Docker Hub
+      uses: docker/login-action@v3
+      with:
+        username: ${{ inputs.docker_username }}
+        password: ${{ inputs.docker_password }}
+
+    # This will be used as a secret to authenticate with Git repo pulls
+    - name: Create .netrc file
+      run: |
+        echo "machine github.com" > ${{ runner.temp }}/netrc
+        echo "login x-access-token" >> ${{ runner.temp }}/netrc
+        echo "password ${{ inputs.github_token }}" >> ${{ runner.temp }}/netrc
+      shell: bash
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 3353830..09e39c2 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -4,14 +4,18 @@ on:
   workflow_dispatch:
     inputs:
       version:
-        description: "Version (e.g. 0.1.0 or v0.1.0)"
+        description: 'Version'
         required: true
         type: string
       latest:
-        description: "Latest"
+        description: 'Latest'
         required: true
         type: boolean
         default: true
+      reuse_engine_version:
+        description: 'Reuse artifacts from this version (skips building)'
+        required: false
+        type: string
 
 defaults:
   run:
@@ -27,7 +31,10 @@ jobs:
     name: "Setup"
     runs-on: ubuntu-24.04
     permissions:
+      # Allow pushing to GitHub
       contents: write
+      # Allows authentication
+      id-token: write
     steps:
       - uses: actions/checkout@v4
         with:
@@ -35,20 +42,29 @@ jobs:
 
       - uses: dtolnay/rust-toolchain@stable
 
-      - uses: pnpm/action-setup@v4
-
       - uses: actions/setup-node@v4
         with:
           node-version: 20
-          cache: pnpm
+
+      - run: corepack enable
 
       - name: Setup
         env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
           R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
         run: |
+          # Configure Git
+          git config --global user.name "github-actions[bot]"
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+
+          # Authenticate with NPM
+          cat << EOF > ~/.npmrc
+          //registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}
+          EOF
+
           # Install dependencies
-          pnpm install
+          pnpm install --no-frozen-lockfile
 
           # Install tsx globally
           npm install -g tsx
@@ -60,54 +76,57 @@ jobs:
             CMD="$CMD --no-latest"
           fi
 
+          if [ -n "${{ inputs.reuse_engine_version }}" ]; then
+            CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\""
+          fi
+
           eval "$CMD"
 
   binaries:
     name: "Build & Upload Binaries"
     needs: [setup]
+    if: ${{ !inputs.reuse_engine_version }}
     strategy:
       matrix:
         include:
           - platform: linux
+            runner: depot-ubuntu-24.04-8
             target: x86_64-unknown-linux-musl
             binary_ext: ""
             arch: x86_64
           - platform: windows
+            runner: depot-ubuntu-24.04-8
             target: x86_64-pc-windows-gnu
             binary_ext: ".exe"
             arch: x86_64
           - platform: macos
+            runner: depot-ubuntu-24.04-8
             target: x86_64-apple-darwin
             binary_ext: ""
             arch: x86_64
           - platform: macos
+            runner: depot-ubuntu-24.04-8
             target: aarch64-apple-darwin
             binary_ext: ""
             arch: aarch64
-    runs-on: ubuntu-24.04
+    runs-on: ${{ matrix.runner }}
     steps:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
 
-      - uses: pnpm/action-setup@v4
-
-      - uses: actions/setup-node@v4
-        with:
-          node-version: 20
-          cache: pnpm
-
-      - name: Build inspector frontend
-        run: |
-          pnpm install
-          SANDBOX_AGENT_SKIP_INSPECTOR=1 pnpm --filter @sandbox-agent/inspector build
-
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
       - name: Build binary
         run: |
+          # Use Docker BuildKit
+          export DOCKER_BUILDKIT=1
+
+          # Build the binary using our Dockerfile
           docker/release/build.sh ${{ matrix.target }}
+
+          # Make sure dist directory exists and binary is there
           ls -la dist/
 
       - name: Upload to R2
@@ -115,10 +134,11 @@ jobs:
           AWS_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
         run: |
-          # Install AWS CLI
+          # Install dependencies for AWS CLI
           sudo apt-get update
           sudo apt-get install -y unzip curl
 
+          # Install AWS CLI
           curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
           unzip awscliv2.zip
           sudo ./aws/install --update
@@ -126,7 +146,7 @@ jobs:
           COMMIT_SHA_SHORT="${GITHUB_SHA::7}"
           BINARY_PATH="dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}"
 
-          # Upload to commit directory for later promotion
+          # Must specify --checksum-algorithm for compatibility with R2
           aws s3 cp \
             "${BINARY_PATH}" \
             "s3://rivet-releases/sandbox-agent/${COMMIT_SHA_SHORT}/binaries/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" \
@@ -134,10 +154,48 @@ jobs:
             --endpoint-url https://2a94c6a0ced8d35ea63cddc86c2681e7.r2.cloudflarestorage.com \
             --checksum-algorithm CRC32
 
+  docker:
+    name: "Build & Push Docker Images"
+    needs: [setup]
+    if: ${{ !inputs.reuse_engine_version }}
+    strategy:
+      matrix:
+        include:
+          - platform: linux/arm64
+            runner: depot-ubuntu-24.04-arm-8
+            arch_suffix: -arm64
+          - platform: linux/amd64
+            runner: depot-ubuntu-24.04-8
+            arch_suffix: -amd64
+    runs-on: ${{ matrix.runner }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Set outputs
+        id: vars
+        run: echo "sha_short=${GITHUB_SHA::7}" >> $GITHUB_OUTPUT
+
+      - uses: ./.github/actions/docker-setup
+        with:
+          docker_username: ${{ secrets.DOCKER_CI_USERNAME }}
+          docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build & Push
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          tags: rivetdev/sandbox-agent:${{ steps.vars.outputs.sha_short }}${{ matrix.arch_suffix }}
+          file: docker/runtime/Dockerfile
+          platforms: ${{ matrix.platform }}
+
   complete:
     name: "Complete"
-    needs: [setup, binaries]
-    if: ${{ always() && !cancelled() && needs.setup.result == 'success' && needs.binaries.result == 'success' }}
+    needs: [setup, docker, binaries]
+    if: ${{ always() && !cancelled() && needs.setup.result == 'success' && (needs.docker.result == 'success' || needs.docker.result == 'skipped') && (needs.binaries.result == 'success' || needs.binaries.result == 'skipped') }}
     runs-on: ubuntu-24.04
     steps:
       - uses: actions/checkout@v4
@@ -146,17 +204,21 @@ jobs:
 
       - uses: dtolnay/rust-toolchain@stable
 
-      - uses: pnpm/action-setup@v4
-
       - uses: actions/setup-node@v4
         with:
           node-version: 20
           registry-url: "https://registry.npmjs.org"
-          cache: pnpm
+
+      - run: corepack enable
+
+      - uses: ./.github/actions/docker-setup
+        with:
+          docker_username: ${{ secrets.DOCKER_CI_USERNAME }}
+          docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }}
+          github_token: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Complete
         env:
-          # https://cli.github.com/manual/gh_help_environment
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
@@ -169,7 +231,7 @@ jobs:
           EOF
 
           # Install dependencies
-          pnpm install
+          pnpm install --no-frozen-lockfile
 
           # Install tsx globally
           npm install -g tsx
@@ -181,4 +243,8 @@ jobs:
             CMD="$CMD --no-latest"
           fi
 
+          if [ -n "${{ inputs.reuse_engine_version }}" ]; then
+            CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\""
+          fi
+
           eval "$CMD"
diff --git a/Cargo.toml b/Cargo.toml
index 114ae4d..9338a05 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,10 +5,10 @@ members = ["server/packages/*"]
 [workspace.package]
 version = "0.1.0"
 edition = "2021"
-authors = ["Sandbox Agent Contributors"]
+authors = [ "Rivet Gaming, LLC <developer@rivet.gg>" ]
 license = "Apache-2.0"
 repository = "https://github.com/rivet-dev/sandbox-agent"
-description = "Universal agent API for AI coding assistants"
+description = "Universal API for automatic coding agents in sandboxes. Supprots Claude Code, Codex, OpenCode, and Amp."
 
 [workspace.dependencies]
 # Internal crates
diff --git a/docker/runtime/Dockerfile b/docker/runtime/Dockerfile
new file mode 100644
index 0000000..0e71c2f
--- /dev/null
+++ b/docker/runtime/Dockerfile
@@ -0,0 +1,51 @@
+# syntax=docker/dockerfile:1.10.0
+
+# Build stage - compile the binary
+FROM rust:1.88.0 AS builder
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && apt-get install -y \
+    musl-tools \
+    musl-dev \
+    pkg-config \
+    ca-certificates \
+    git && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN rustup target add x86_64-unknown-linux-musl
+
+WORKDIR /build
+COPY . .
+
+# Build static binary
+RUN --mount=type=cache,target=/usr/local/cargo/registry \
+    --mount=type=cache,target=/usr/local/cargo/git \
+    --mount=type=cache,target=/build/target \
+    SANDBOX_AGENT_SKIP_INSPECTOR=1 \
+    RUSTFLAGS="-C target-feature=+crt-static" \
+    cargo build -p sandbox-agent --release --target x86_64-unknown-linux-musl && \
+    cp target/x86_64-unknown-linux-musl/release/sandbox-agent /sandbox-agent
+
+# Runtime stage - minimal image
+FROM debian:bookworm-slim
+
+RUN apt-get update && apt-get install -y \
+    ca-certificates \
+    curl \
+    git && \
+    rm -rf /var/lib/apt/lists/*
+
+# Copy the binary from builder
+COPY --from=builder /sandbox-agent /usr/local/bin/sandbox-agent
+RUN chmod +x /usr/local/bin/sandbox-agent
+
+# Create non-root user
+RUN useradd -m -s /bin/bash sandbox
+USER sandbox
+WORKDIR /home/sandbox
+
+EXPOSE 2468
+
+ENTRYPOINT ["sandbox-agent"]
+CMD ["--host", "0.0.0.0", "--port", "2468"]
diff --git a/docs/building-chat-ui.mdx b/docs/building-chat-ui.mdx
index a2bd9b5..80363f9 100644
--- a/docs/building-chat-ui.mdx
+++ b/docs/building-chat-ui.mdx
@@ -21,6 +21,7 @@ Capabilities tell you which features are supported for the selected agent:
 - `tool_calls` and `tool_results` indicate tool execution events.
 - `questions` and `permissions` indicate HITL flows.
 - `plan_mode` indicates that the agent supports plan-only execution.
+- `reasoning` and `status` indicate that the agent can emit reasoning/status content parts.
 
 Use these to enable or disable UI affordances (tool panels, approval buttons, etc.).
 
diff --git a/docs/openapi.json b/docs/openapi.json
index f10b073..7bd9301 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -4,7 +4,8 @@
     "title": "sandbox-agent",
     "description": "",
     "contact": {
-      "name": "Sandbox Agent Contributors"
+      "name": "Rivet Gaming, LLC",
+      "email": "developer@rivet.gg"
     },
     "license": {
       "name": "Apache-2.0"
@@ -662,6 +663,7 @@
           "sessionLifecycle",
           "errorEvents",
           "reasoning",
+          "status",
           "commandExecution",
           "fileChanges",
           "mcpTools",
@@ -706,6 +708,9 @@
             "type": "boolean",
             "description": "Whether this agent uses a shared long-running server process (vs per-turn subprocess)"
           },
+          "status": {
+            "type": "boolean"
+          },
           "streamingDeltas": {
             "type": "boolean"
           },
diff --git a/examples/daytona/daytona.ts b/examples/daytona/daytona.ts
index 45e0070..4c23149 100644
--- a/examples/daytona/daytona.ts
+++ b/examples/daytona/daytona.ts
@@ -2,6 +2,7 @@ import { Daytona } from "@daytonaio/sdk";
 import { pathToFileURL } from "node:url";
 import {
   ensureUrl,
+  logInspectorUrl,
   runPrompt,
   waitForHealth,
 } from "../shared/sandbox-agent-client.ts";
@@ -39,6 +40,7 @@ export async function setupDaytonaSandboxAgent(): Promise<{
 
   const baseUrl = ensureUrl(preview.url);
   await waitForHealth({ baseUrl, token, extraHeaders });
+  logInspectorUrl({ baseUrl, token });
 
   const cleanup = async () => {
     try {
diff --git a/examples/docker/docker.ts b/examples/docker/docker.ts
index e41e776..1626b34 100644
--- a/examples/docker/docker.ts
+++ b/examples/docker/docker.ts
@@ -2,6 +2,7 @@ import Docker from "dockerode";
 import { pathToFileURL } from "node:url";
 import {
   ensureUrl,
+  logInspectorUrl,
   runPrompt,
   waitForHealth,
 } from "../shared/sandbox-agent-client.ts";
@@ -83,6 +84,7 @@ export async function setupDockerSandboxAgent(): Promise<{
 
   const baseUrl = ensureUrl(`http://127.0.0.1:${hostPort}`);
   await waitForHealth({ baseUrl, token });
+  logInspectorUrl({ baseUrl, token });
 
   const cleanup = async () => {
     try {
diff --git a/examples/e2b/e2b.ts b/examples/e2b/e2b.ts
index f2c32e2..9d0fcce 100644
--- a/examples/e2b/e2b.ts
+++ b/examples/e2b/e2b.ts
@@ -2,6 +2,7 @@ import { Sandbox } from "@e2b/code-interpreter";
 import { pathToFileURL } from "node:url";
 import {
   ensureUrl,
+  logInspectorUrl,
   runPrompt,
   waitForHealth,
 } from "../shared/sandbox-agent-client.ts";
@@ -45,6 +46,7 @@ export async function setupE2BSandboxAgent(): Promise<{
 
   const baseUrl = ensureUrl(sandbox.getHost(port));
   await waitForHealth({ baseUrl, token });
+  logInspectorUrl({ baseUrl, token });
 
   const cleanup = async () => {
     try {
diff --git a/examples/shared/sandbox-agent-client.ts b/examples/shared/sandbox-agent-client.ts
index 4dd9b26..9f5423a 100644
--- a/examples/shared/sandbox-agent-client.ts
+++ b/examples/shared/sandbox-agent-client.ts
@@ -16,6 +16,27 @@ export function ensureUrl(rawUrl: string): string {
   return `https://${rawUrl}`;
 }
 
+const INSPECTOR_URL = "https://inspect.sandboxagent.dev";
+
+export function buildInspectorUrl({
+  baseUrl,
+  token,
+}: {
+  baseUrl: string;
+  token?: string;
+}): string {
+  const normalized = normalizeBaseUrl(ensureUrl(baseUrl));
+  const params = new URLSearchParams({ url: normalized });
+  if (token) {
+    params.set("token", token);
+  }
+  return `${INSPECTOR_URL}?${params.toString()}`;
+}
+
+export function logInspectorUrl({ baseUrl, token }: { baseUrl: string; token?: string }): void {
+  console.log(`Inspector: ${buildInspectorUrl({ baseUrl, token })}`);
+}
+
 type HeaderOptions = {
   token?: string;
   extraHeaders?: Record<string, string>;
diff --git a/examples/vercel/vercel-sandbox.ts b/examples/vercel/vercel-sandbox.ts
index 4295cb8..5175578 100644
--- a/examples/vercel/vercel-sandbox.ts
+++ b/examples/vercel/vercel-sandbox.ts
@@ -2,6 +2,7 @@ import { Sandbox } from "@vercel/sandbox";
 import { pathToFileURL } from "node:url";
 import {
   ensureUrl,
+  logInspectorUrl,
   runPrompt,
   waitForHealth,
 } from "../shared/sandbox-agent-client.ts";
@@ -61,6 +62,7 @@ export async function setupVercelSandboxAgent(): Promise<{
 
   const baseUrl = ensureUrl(sandbox.domain(port));
   await waitForHealth({ baseUrl, token });
+  logInspectorUrl({ baseUrl, token });
 
   const cleanup = async () => {
     try {
diff --git a/server/CLAUDE.md b/server/CLAUDE.md
index 6de8a1e..be03de6 100644
--- a/server/CLAUDE.md
+++ b/server/CLAUDE.md
@@ -10,18 +10,23 @@ Place all new tests under `server/packages/**/tests/` (or a package-specific `te
   - Agent flow coverage in `agent-flows/`
   - Agent management coverage in `agent-management/`
   - Shared server manager coverage in `server-manager/`
-  - HTTP/SSE and snapshot coverage in `http/` (snapshots in `http/snapshots/`)
+  - HTTP endpoint snapshots in `http/` (snapshots in `http/snapshots/`)
+  - Session capability snapshots in `sessions/` (one file per capability, e.g. `session_lifecycle.rs`, `permissions.rs`, `questions.rs`, `reasoning.rs`, `status.rs`; snapshots in `sessions/snapshots/`)
   - UI coverage in `ui/`
   - Shared helpers in `common/`
 - Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/`
 
 ## Snapshot tests
 
-The HTTP/SSE snapshot suite entrypoint lives in:
-- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` (includes `tests/http/http_sse_snapshots.rs`)
+HTTP endpoint snapshot entrypoint:
+- `server/packages/sandbox-agent/tests/http_endpoints.rs`
+
+Session snapshot entrypoint:
+- `server/packages/sandbox-agent/tests/sessions.rs`
 
 Snapshots are written to:
-- `server/packages/sandbox-agent/tests/http/snapshots/`
+- `server/packages/sandbox-agent/tests/http/snapshots/` (HTTP endpoint snapshots)
+- `server/packages/sandbox-agent/tests/sessions/snapshots/` (session/capability snapshots)
 
 ## Agent selection
 
@@ -71,6 +76,7 @@ To keep snapshots deterministic:
   - IDs, timestamps, native IDs
   - text content, tool inputs/outputs, provider-specific metadata
   - `source` and `synthetic` flags (these are implementation details)
+- Scrub `reasoning` and `status` content from session-baseline snapshots to keep the core event skeleton consistent across agents; validate those content types separately in their capability-specific tests.
 - The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly.
 - Event streams are truncated after the first assistant or error event.
 - Permission flow snapshots are truncated after the permission request (or first assistant) event.
@@ -81,14 +87,19 @@ To keep snapshots deterministic:
 
 ## Typical commands
 
-Run only Claude snapshots:
+Run only Claude session snapshots:
 ```
-SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test http_sse_snapshots
+SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test sessions
 ```
 
-Run all detected agents:
+Run all detected session snapshots:
 ```
-cargo test -p sandbox-agent --test http_sse_snapshots
+cargo test -p sandbox-agent --test sessions
+```
+
+Run HTTP endpoint snapshots:
+```
+cargo test -p sandbox-agent --test http_endpoints
 ```
 
 ## Universal Schema
diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs
index c1ab3c7..12e8be0 100644
--- a/server/packages/sandbox-agent/src/router.rs
+++ b/server/packages/sandbox-agent/src/router.rs
@@ -2913,6 +2913,7 @@ pub struct AgentCapabilities {
     pub session_lifecycle: bool,
     pub error_events: bool,
     pub reasoning: bool,
+    pub status: bool,
     pub command_execution: bool,
     pub file_changes: bool,
     pub mcp_tools: bool,
@@ -3512,6 +3513,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
             session_lifecycle: false,
             error_events: false,
             reasoning: false,
+            status: false,
             command_execution: false,
             file_changes: false,
             mcp_tools: false,
@@ -3530,6 +3532,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
             session_lifecycle: true,
             error_events: true,
             reasoning: true,
+            status: true,
             command_execution: true,
             file_changes: true,
             mcp_tools: true,
@@ -3548,6 +3551,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
             session_lifecycle: true,
             error_events: true,
             reasoning: false,
+            status: true,
             command_execution: false,
             file_changes: false,
             mcp_tools: false,
@@ -3566,6 +3570,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
             session_lifecycle: false,
             error_events: true,
             reasoning: false,
+            status: false,
             command_execution: false,
             file_changes: false,
             mcp_tools: false,
@@ -3584,6 +3589,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
             session_lifecycle: true,
             error_events: true,
             reasoning: true,
+            status: true,
             command_execution: true,
             file_changes: true,
             mcp_tools: true,
diff --git a/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/common/http.rs
similarity index 53%
rename from server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
rename to server/packages/sandbox-agent/tests/common/http.rs
index 75fdd9e..d719a03 100644
--- a/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
+++ b/server/packages/sandbox-agent/tests/common/http.rs
@@ -1,4 +1,4 @@
-use std::collections::{BTreeMap, HashMap};
+use std::collections::{BTreeMap, HashMap, HashSet};
 use std::time::{Duration, Instant};
 
 use axum::body::{Body, Bytes};
@@ -208,49 +208,65 @@ async fn send_message(app: &Router, session_id: &str) {
     assert_eq!(status, StatusCode::NO_CONTENT, "send message");
 }
 
-async fn poll_events_until(
-    app: &Router,
-    session_id: &str,
-    timeout: Duration,
-) -> Vec<Value> {
-    let start = Instant::now();
-    let mut offset = 0u64;
-    let mut events = Vec::new();
-    while start.elapsed() < timeout {
-        let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
-        let (status, payload) = send_json(app, Method::GET, &path, None).await;
-        assert_eq!(status, StatusCode::OK, "poll events");
-        let new_events = payload
-            .get("events")
-            .and_then(Value::as_array)
-            .cloned()
-            .unwrap_or_default();
-        if !new_events.is_empty() {
-            if let Some(last) = new_events
-                .last()
-                .and_then(|event| event.get("sequence"))
-                .and_then(Value::as_u64)
-            {
-                offset = last;
-            }
-            events.extend(new_events);
-            if should_stop(&events) {
-                break;
-            }
-        }
-        tokio::time::sleep(Duration::from_millis(800)).await;
-    }
-    events
+async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec<Value>, u64) {
+    let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
+    let (status, payload) = send_json(app, Method::GET, &path, None).await;
+    assert_eq!(status, StatusCode::OK, "poll events");
+    let new_events = payload
+        .get("events")
+        .and_then(Value::as_array)
+        .cloned()
+        .unwrap_or_default();
+    let new_offset = new_events
+        .last()
+        .and_then(|event| event.get("sequence"))
+        .and_then(Value::as_u64)
+        .unwrap_or(offset);
+    (new_events, new_offset)
 }
 
-async fn read_sse_events(
+async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 {
+    let start = Instant::now();
+    let mut offset = 0u64;
+    loop {
+        if start.elapsed() >= timeout {
+            break;
+        }
+        let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
+        if new_events.is_empty() {
+            if offset == 0 {
+                tokio::time::sleep(Duration::from_millis(200)).await;
+                continue;
+            }
+            break;
+        }
+        offset = new_offset;
+    }
+    offset
+}
+
+async fn poll_events_until_from(
     app: &Router,
     session_id: &str,
+    offset: u64,
+    timeout: Duration,
+) -> Vec<Value> {
+    poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await
+}
+
+async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
+    poll_events_until_from(app, session_id, 0, timeout).await
+}
+
+async fn read_sse_events_from(
+    app: &Router,
+    session_id: &str,
+    offset: u64,
     timeout: Duration,
 ) -> Vec<Value> {
     let request = Request::builder()
         .method(Method::GET)
-        .uri(format!("/v1/sessions/{session_id}/events/sse?offset=0"))
+        .uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}"))
         .body(Body::empty())
         .expect("sse request");
     let response = app
@@ -291,6 +307,10 @@ async fn read_sse_events(
     events
 }
 
+async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
+    read_sse_events_from(app, session_id, 0, timeout).await
+}
+
 async fn read_turn_stream_events(
     app: &Router,
     session_id: &str,
@@ -431,7 +451,8 @@ fn normalize_events(events: &[Value]) -> Value {
         !events.iter().any(is_unparsed_event),
         "agent.unparsed event encountered"
     );
-    let normalized = events
+    let scrubbed = scrub_events(events);
+    let normalized = scrubbed
         .iter()
         .enumerate()
         .map(|(idx, event)| normalize_event(event, idx + 1))
@@ -439,6 +460,71 @@ fn normalize_events(events: &[Value]) -> Value {
     Value::Array(normalized)
 }
 
+fn scrub_events(events: &[Value]) -> Vec<Value> {
+    let mut scrub_ids = HashSet::new();
+    let mut output = Vec::new();
+
+    for event in events {
+        let event_type = event.get("type").and_then(Value::as_str).unwrap_or("");
+        match event_type {
+            "item.started" | "item.completed" => {
+                if let Some(item) = event.get("data").and_then(|data| data.get("item")) {
+                    if should_scrub_item(item) {
+                        record_item_ids(item, &mut scrub_ids);
+                        continue;
+                    }
+                }
+                output.push(event.clone());
+            }
+            "item.delta" => {
+                let item_id = event
+                    .get("data")
+                    .and_then(|data| data.get("item_id"))
+                    .and_then(Value::as_str);
+                let native_item_id = event
+                    .get("data")
+                    .and_then(|data| data.get("native_item_id"))
+                    .and_then(Value::as_str);
+                if item_id.is_some_and(|id| scrub_ids.contains(id))
+                    || native_item_id.is_some_and(|id| scrub_ids.contains(id))
+                {
+                    continue;
+                }
+                output.push(event.clone());
+            }
+            _ => output.push(event.clone()),
+        }
+    }
+
+    output
+}
+
+fn should_scrub_item(item: &Value) -> bool {
+    if item
+        .get("kind")
+        .and_then(Value::as_str)
+        .is_some_and(|kind| kind == "status")
+    {
+        return true;
+    }
+
+    let types = item_content_types(item);
+    let filtered = types
+        .iter()
+        .filter(|value| value.as_str() != "reasoning" && value.as_str() != "status")
+        .collect::<Vec<_>>();
+    types.iter().any(|value| value == "reasoning") && filtered.is_empty()
+}
+
+fn record_item_ids(item: &Value, ids: &mut HashSet<String>) {
+    if let Some(id) = item.get("item_id").and_then(Value::as_str) {
+        ids.insert(id.to_string());
+    }
+    if let Some(id) = item.get("native_item_id").and_then(Value::as_str) {
+        ids.insert(id.to_string());
+    }
+}
+
 fn truncate_after_first_stop(events: &[Value]) -> Vec<Value> {
     if let Some(idx) = events
         .iter()
@@ -455,12 +541,6 @@ fn normalize_event(event: &Value, seq: usize) -> Value {
     if let Some(event_type) = event.get("type").and_then(Value::as_str) {
         map.insert("type".to_string(), Value::String(event_type.to_string()));
     }
-    if let Some(source) = event.get("source").and_then(Value::as_str) {
-        map.insert("source".to_string(), Value::String(source.to_string()));
-    }
-    if let Some(synthetic) = event.get("synthetic").and_then(Value::as_bool) {
-        map.insert("synthetic".to_string(), Value::Bool(synthetic));
-    }
     let data = event.get("data").unwrap_or(&Value::Null);
     match event.get("type").and_then(Value::as_str).unwrap_or("") {
         "session.started" => {
@@ -523,6 +603,7 @@ fn normalize_item(item: &Value) -> Value {
         let types = content
             .iter()
             .filter_map(|part| part.get("type").and_then(Value::as_str))
+            .filter(|value| *value != "reasoning" && *value != "status")
             .map(|value| Value::String(value.to_string()))
             .collect::<Vec<_>>();
         map.insert("content_types".to_string(), Value::Array(types));
@@ -530,6 +611,42 @@ fn normalize_item(item: &Value) -> Value {
     Value::Object(map)
 }
 
+fn item_content_types(item: &Value) -> Vec<String> {
+    item.get("content")
+        .and_then(Value::as_array)
+        .map(|content| {
+            content
+                .iter()
+                .filter_map(|part| part.get("type").and_then(Value::as_str))
+                .map(|value| value.to_string())
+                .collect::<Vec<_>>()
+        })
+        .unwrap_or_default()
+}
+
+fn event_content_types(event: &Value) -> Vec<String> {
+    event
+        .get("data")
+        .and_then(|data| data.get("item"))
+        .map(item_content_types)
+        .unwrap_or_default()
+}
+
+fn event_is_status_item(event: &Value) -> bool {
+    event
+        .get("data")
+        .and_then(|data| data.get("item"))
+        .and_then(|item| item.get("kind"))
+        .and_then(Value::as_str)
+        .is_some_and(|kind| kind == "status")
+}
+
+fn events_have_content_type(events: &[Value], content_type: &str) -> bool {
+    events
+        .iter()
+        .any(|event| event_content_types(event).iter().any(|t| t == content_type))
+}
+
 fn normalize_session_end(data: &Value) -> Value {
     let mut map = Map::new();
     if let Some(reason) = data.get("reason").and_then(Value::as_str) {
@@ -717,6 +834,33 @@ fn snapshot_name(prefix: &str, agent: Option<AgentId>) -> String {
 }
 
 
+async fn poll_events_until_match_from<F>(
+    app: &Router,
+    session_id: &str,
+    offset: u64,
+    timeout: Duration,
+    stop: F,
+) -> Vec<Value>
+where
+    F: Fn(&[Value]) -> bool,
+{
+    let start = Instant::now();
+    let mut offset = offset;
+    let mut events = Vec::new();
+    while start.elapsed() < timeout {
+        let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
+        if !new_events.is_empty() {
+            offset = new_offset;
+            events.extend(new_events);
+            if stop(&events) {
+                break;
+            }
+        }
+        tokio::time::sleep(Duration::from_millis(800)).await;
+    }
+    events
+}
+
 async fn poll_events_until_match<F>(
     app: &Router,
     session_id: &str,
@@ -726,34 +870,7 @@ async fn poll_events_until_match<F>(
 where
     F: Fn(&[Value]) -> bool,
 {
-    let start = Instant::now();
-    let mut offset = 0u64;
-    let mut events = Vec::new();
-    while start.elapsed() < timeout {
-        let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
-        let (status, payload) = send_json(app, Method::GET, &path, None).await;
-        assert_eq!(status, StatusCode::OK, "poll events");
-        let new_events = payload
-            .get("events")
-            .and_then(Value::as_array)
-            .cloned()
-            .unwrap_or_default();
-        if !new_events.is_empty() {
-            if let Some(last) = new_events
-                .last()
-                .and_then(|event| event.get("sequence"))
-                .and_then(Value::as_u64)
-            {
-                offset = last;
-            }
-            events.extend(new_events);
-            if stop(&events) {
-                break;
-            }
-        }
-        tokio::time::sleep(Duration::from_millis(800)).await;
-    }
-    events
+    poll_events_until_match_from(app, session_id, 0, timeout, stop).await
 }
 
 fn find_permission_id(events: &[Value]) -> Option<String> {
@@ -800,9 +917,10 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
 
     let session_id = format!("session-{}", config.agent.as_str());
     create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
+    let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
     send_message(app, &session_id).await;
 
-    let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await;
+    let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await;
     let events = truncate_after_first_stop(&events);
     assert!(
         !events.is_empty(),
@@ -816,7 +934,8 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
     );
     let normalized = normalize_events(&events);
     insta::with_settings!({
-        snapshot_suffix => snapshot_name("http_events", Some(config.agent)),
+        snapshot_suffix => snapshot_name("http_events", Some(AgentId::Mock)),
+        snapshot_path => "../sessions/snapshots",
     }, {
         insta::assert_yaml_snapshot!(normalized);
     });
@@ -828,12 +947,14 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
 
     let session_id = format!("sse-{}", config.agent.as_str());
     create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
+    let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
 
     let sse_task = {
         let app = app.clone();
         let session_id = session_id.clone();
+        let offset = offset;
         tokio::spawn(async move {
-            read_sse_events(&app, &session_id, Duration::from_secs(120)).await
+            read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await
         })
     };
 
@@ -853,7 +974,8 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
     );
     let normalized = normalize_events(&events);
     insta::with_settings!({
-        snapshot_suffix => snapshot_name("sse_events", Some(config.agent)),
+        snapshot_suffix => snapshot_name("sse_events", Some(AgentId::Mock)),
+        snapshot_path => "../sessions/snapshots",
     }, {
         insta::assert_yaml_snapshot!(normalized);
     });
@@ -879,535 +1001,3 @@ async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) {
         config.agent
     );
 }
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn auth_snapshots() {
-    let token = "test-token";
-    let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
-
-    let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
-    assert_eq!(status, StatusCode::OK, "health should be public");
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("auth_health_public", None),
-    }, {
-        insta::assert_yaml_snapshot!(json!({
-            "status": status.as_u16(),
-            "payload": normalize_health(&payload),
-        }));
-    });
-
-    let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
-    assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("auth_missing_token", None),
-    }, {
-        insta::assert_yaml_snapshot!(json!({
-            "status": status.as_u16(),
-            "payload": payload,
-        }));
-    });
-
-    let request = Request::builder()
-        .method(Method::GET)
-        .uri("/v1/agents")
-        .header(header::AUTHORIZATION, "Bearer wrong-token")
-        .body(Body::empty())
-        .expect("auth invalid request");
-    let (status, _headers, payload) = send_json_request(&app.app, request).await;
-    assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("auth_invalid_token", None),
-    }, {
-        insta::assert_yaml_snapshot!(json!({
-            "status": status.as_u16(),
-            "payload": payload,
-        }));
-    });
-
-    let request = Request::builder()
-        .method(Method::GET)
-        .uri("/v1/agents")
-        .header(header::AUTHORIZATION, format!("Bearer {token}"))
-        .body(Body::empty())
-        .expect("auth valid request");
-    let (status, _headers, payload) = send_json_request(&app.app, request).await;
-    assert_eq!(status, StatusCode::OK, "valid token should allow request");
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("auth_valid_token", None),
-    }, {
-        insta::assert_yaml_snapshot!(json!({
-            "status": status.as_u16(),
-            "payload": normalize_agent_list(&payload),
-        }));
-    });
-}
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn cors_snapshots() {
-    let cors = CorsLayer::new()
-        .allow_origin(vec![HeaderValue::from_static("http://example.com")])
-        .allow_methods([Method::GET, Method::POST])
-        .allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION])
-        .allow_credentials(true);
-    let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
-
-    let preflight = Request::builder()
-        .method(Method::OPTIONS)
-        .uri("/v1/health")
-        .header(header::ORIGIN, "http://example.com")
-        .header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
-        .header(
-            header::ACCESS_CONTROL_REQUEST_HEADERS,
-            "authorization,content-type",
-        )
-        .body(Body::empty())
-        .expect("cors preflight request");
-    let (status, headers, _payload) = send_request(&app.app, preflight).await;
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("cors_preflight", None),
-    }, {
-        insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
-    });
-
-    let actual = Request::builder()
-        .method(Method::GET)
-        .uri("/v1/health")
-        .header(header::ORIGIN, "http://example.com")
-        .body(Body::empty())
-        .expect("cors actual request");
-    let (status, headers, payload) = send_json_request(&app.app, actual).await;
-    assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("cors_actual", None),
-    }, {
-        insta::assert_yaml_snapshot!(json!({
-            "cors": snapshot_cors(status, &headers),
-            "payload": normalize_health(&payload),
-        }));
-    });
-}
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn api_endpoints_snapshots() {
-    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
-    let app = TestApp::new();
-
-    let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
-    assert_eq!(status, StatusCode::OK, "health status");
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("health", None),
-    }, {
-        insta::assert_yaml_snapshot!(normalize_health(&health));
-    });
-
-    // List agents (just verify the API returns correct agent IDs, not install state)
-    let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
-    assert_eq!(status, StatusCode::OK, "agents list");
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("agents_list", None),
-    }, {
-        insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
-    });
-
-    // Install agents (ensure they're available for subsequent tests)
-    for config in &configs {
-        let _guard = apply_credentials(&config.credentials);
-        let status = send_status(
-            &app.app,
-            Method::POST,
-            &format!("/v1/agents/{}/install", config.agent.as_str()),
-            Some(json!({})),
-        )
-        .await;
-        assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
-        insta::with_settings!({
-            snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
-        }, {
-            insta::assert_yaml_snapshot!(snapshot_status(status));
-        });
-    }
-
-    let mut session_ids = Vec::new();
-    for config in &configs {
-        let _guard = apply_credentials(&config.credentials);
-        let (status, modes) = send_json(
-            &app.app,
-            Method::GET,
-            &format!("/v1/agents/{}/modes", config.agent.as_str()),
-            None,
-        )
-        .await;
-        assert_eq!(status, StatusCode::OK, "agent modes");
-        insta::with_settings!({
-            snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
-        }, {
-            insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
-        });
-
-        let session_id = format!("snapshot-{}", config.agent.as_str());
-        let permission_mode = test_permission_mode(config.agent);
-        let (status, created) = send_json(
-            &app.app,
-            Method::POST,
-            &format!("/v1/sessions/{session_id}"),
-            Some(json!({
-                "agent": config.agent.as_str(),
-                "permissionMode": permission_mode
-            })),
-        )
-        .await;
-        assert_eq!(status, StatusCode::OK, "create session");
-        insta::with_settings!({
-            snapshot_suffix => snapshot_name("create_session", Some(config.agent)),
-        }, {
-            insta::assert_yaml_snapshot!(normalize_create_session(&created));
-        });
-        session_ids.push((config.agent, session_id));
-    }
-
-    let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
-    assert_eq!(status, StatusCode::OK, "list sessions");
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("sessions_list", None),
-    }, {
-        insta::assert_yaml_snapshot!(normalize_sessions(&sessions));
-    });
-
-    for (agent, session_id) in &session_ids {
-        let status = send_status(
-            &app.app,
-            Method::POST,
-            &format!("/v1/sessions/{session_id}/messages"),
-            Some(json!({ "message": PROMPT })),
-        )
-        .await;
-        assert_eq!(status, StatusCode::NO_CONTENT, "send message");
-        insta::with_settings!({
-            snapshot_suffix => snapshot_name("send_message", Some(*agent)),
-        }, {
-            insta::assert_yaml_snapshot!(snapshot_status(status));
-        });
-    }
-}
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn approval_flow_snapshots() {
-    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
-    let app = TestApp::new();
-    let capabilities = fetch_capabilities(&app.app).await;
-
-    for config in &configs {
-        // OpenCode doesn't support "plan" permission mode required for approval flows
-        if config.agent == AgentId::Opencode {
-            continue;
-        }
-        let caps = capabilities
-            .get(config.agent.as_str())
-            .expect("capabilities missing");
-
-        let _guard = apply_credentials(&config.credentials);
-        install_agent(&app.app, config.agent).await;
-
-        if caps.plan_mode && caps.permissions {
-            let permission_session = format!("perm-{}", config.agent.as_str());
-            create_session(&app.app, config.agent, &permission_session, "plan").await;
-            let status = send_status(
-                &app.app,
-                Method::POST,
-                &format!("/v1/sessions/{permission_session}/messages"),
-                Some(json!({ "message": PERMISSION_PROMPT })),
-            )
-            .await;
-            assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
-
-            let permission_events = poll_events_until_match(
-                &app.app,
-                &permission_session,
-                Duration::from_secs(120),
-                |events| find_permission_id(events).is_some() || should_stop(events),
-            )
-            .await;
-            let permission_events = truncate_permission_events(&permission_events);
-            insta::with_settings!({
-                snapshot_suffix => snapshot_name("permission_events", Some(config.agent)),
-            }, {
-                insta::assert_yaml_snapshot!(normalize_events(&permission_events));
-            });
-
-            if let Some(permission_id) = find_permission_id(&permission_events) {
-                let status = send_status(
-                    &app.app,
-                    Method::POST,
-                    &format!(
-                        "/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
-                    ),
-                    Some(json!({ "reply": "once" })),
-                )
-                .await;
-                assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
-                insta::with_settings!({
-                    snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)),
-                }, {
-                    insta::assert_yaml_snapshot!(snapshot_status(status));
-                });
-            } else {
-                let (status, payload) = send_json(
-                    &app.app,
-                    Method::POST,
-                    &format!(
-                        "/v1/sessions/{permission_session}/permissions/missing-permission/reply"
-                    ),
-                    Some(json!({ "reply": "once" })),
-                )
-                .await;
-                assert!(!status.is_success(), "missing permission id should error");
-                insta::with_settings!({
-                    snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)),
-                }, {
-                    insta::assert_yaml_snapshot!(json!({
-                        "status": status.as_u16(),
-                        "payload": payload,
-                    }));
-                });
-            }
-        }
-
-        if caps.questions {
-            let question_reply_session = format!("question-reply-{}", config.agent.as_str());
-            create_session(&app.app, config.agent, &question_reply_session, "plan").await;
-            let status = send_status(
-                &app.app,
-                Method::POST,
-                &format!("/v1/sessions/{question_reply_session}/messages"),
-                Some(json!({ "message": QUESTION_PROMPT })),
-            )
-            .await;
-            assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
-
-            let question_events = poll_events_until_match(
-                &app.app,
-                &question_reply_session,
-                Duration::from_secs(120),
-                |events| find_question_id_and_answers(events).is_some() || should_stop(events),
-            )
-            .await;
-            let question_events = truncate_question_events(&question_events);
-            insta::with_settings!({
-                snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)),
-            }, {
-                insta::assert_yaml_snapshot!(normalize_events(&question_events));
-            });
-
-            if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
-                let status = send_status(
-                    &app.app,
-                    Method::POST,
-                    &format!(
-                        "/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
-                    ),
-                    Some(json!({ "answers": answers })),
-                )
-                .await;
-                assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
-                insta::with_settings!({
-                    snapshot_suffix => snapshot_name("question_reply", Some(config.agent)),
-                }, {
-                    insta::assert_yaml_snapshot!(snapshot_status(status));
-                });
-            } else {
-                let (status, payload) = send_json(
-                    &app.app,
-                    Method::POST,
-                    &format!(
-                        "/v1/sessions/{question_reply_session}/questions/missing-question/reply"
-                    ),
-                    Some(json!({ "answers": [] })),
-                )
-                .await;
-                assert!(!status.is_success(), "missing question id should error");
-                insta::with_settings!({
-                    snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)),
-                }, {
-                    insta::assert_yaml_snapshot!(json!({
-                        "status": status.as_u16(),
-                        "payload": payload,
-                    }));
-                });
-            }
-
-            let question_reject_session = format!("question-reject-{}", config.agent.as_str());
-            create_session(&app.app, config.agent, &question_reject_session, "plan").await;
-            let status = send_status(
-                &app.app,
-                Method::POST,
-                &format!("/v1/sessions/{question_reject_session}/messages"),
-                Some(json!({ "message": QUESTION_PROMPT })),
-            )
-            .await;
-            assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
-
-            let reject_events = poll_events_until_match(
-                &app.app,
-                &question_reject_session,
-                Duration::from_secs(120),
-                |events| find_question_id_and_answers(events).is_some() || should_stop(events),
-            )
-            .await;
-            let reject_events = truncate_question_events(&reject_events);
-            insta::with_settings!({
-                snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)),
-            }, {
-                insta::assert_yaml_snapshot!(normalize_events(&reject_events));
-            });
-
-            if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
-                let status = send_status(
-                    &app.app,
-                    Method::POST,
-                    &format!(
-                        "/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
-                    ),
-                    None,
-                )
-                .await;
-                assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
-                insta::with_settings!({
-                    snapshot_suffix => snapshot_name("question_reject", Some(config.agent)),
-                }, {
-                    insta::assert_yaml_snapshot!(snapshot_status(status));
-                });
-            } else {
-                let (status, payload) = send_json(
-                    &app.app,
-                    Method::POST,
-                    &format!(
-                        "/v1/sessions/{question_reject_session}/questions/missing-question/reject"
-                    ),
-                    None,
-                )
-                .await;
-                assert!(!status.is_success(), "missing question id reject should error");
-                insta::with_settings!({
-                    snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)),
-                }, {
-                    insta::assert_yaml_snapshot!(json!({
-                        "status": status.as_u16(),
-                        "payload": payload,
-                    }));
-                });
-            }
-        }
-    }
-}
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn http_events_snapshots() {
-    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
-    let app = TestApp::new();
-    for config in &configs {
-        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
-        // See: https://github.com/opencode-ai/opencode/issues/XXX
-        if config.agent == AgentId::Opencode {
-            continue;
-        }
-        run_http_events_snapshot(&app.app, config).await;
-    }
-}
-
-async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
-    let _guard = apply_credentials(&config.credentials);
-    install_agent(app, config.agent).await;
-
-    let session_a = format!("concurrent-a-{}", config.agent.as_str());
-    let session_b = format!("concurrent-b-{}", config.agent.as_str());
-    let perm_mode = test_permission_mode(config.agent);
-    create_session(app, config.agent, &session_a, perm_mode).await;
-    create_session(app, config.agent, &session_b, perm_mode).await;
-
-    let app_a = app.clone();
-    let app_b = app.clone();
-    let send_a = send_message(&app_a, &session_a);
-    let send_b = send_message(&app_b, &session_b);
-    tokio::join!(send_a, send_b);
-
-    let app_a = app.clone();
-    let app_b = app.clone();
-    let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120));
-    let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120));
-    let (events_a, events_b) = tokio::join!(poll_a, poll_b);
-    let events_a = truncate_after_first_stop(&events_a);
-    let events_b = truncate_after_first_stop(&events_b);
-
-    assert!(
-        !events_a.is_empty(),
-        "no events collected for concurrent session a {}",
-        config.agent
-    );
-    assert!(
-        !events_b.is_empty(),
-        "no events collected for concurrent session b {}",
-        config.agent
-    );
-    assert!(
-        should_stop(&events_a),
-        "timed out waiting for assistant/error event for concurrent session a {}",
-        config.agent
-    );
-    assert!(
-        should_stop(&events_b),
-        "timed out waiting for assistant/error event for concurrent session b {}",
-        config.agent
-    );
-
-    let snapshot = json!({
-        "session_a": normalize_events(&events_a),
-        "session_b": normalize_events(&events_b),
-    });
-    insta::with_settings!({
-        snapshot_suffix => snapshot_name("concurrency_events", Some(config.agent)),
-    }, {
-        insta::assert_yaml_snapshot!(snapshot);
-    });
-}
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn sse_events_snapshots() {
-    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
-    let app = TestApp::new();
-    for config in &configs {
-        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
-        // See: https://github.com/opencode-ai/opencode/issues/XXX
-        if config.agent == AgentId::Opencode {
-            continue;
-        }
-        run_sse_events_snapshot(&app.app, config).await;
-    }
-}
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn turn_stream_route() {
-    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
-    let app = TestApp::new();
-    for config in &configs {
-        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
-        // See: https://github.com/opencode-ai/opencode/issues/XXX
-        if config.agent == AgentId::Opencode {
-            continue;
-        }
-        run_turn_stream_check(&app.app, config).await;
-    }
-}
-
-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn concurrency_snapshots() {
-    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
-    let app = TestApp::new();
-    for config in &configs {
-        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
-        // See: https://github.com/opencode-ai/opencode/issues/XXX
-        if config.agent == AgentId::Opencode {
-            continue;
-        }
-        run_concurrency_snapshot(&app.app, config).await;
-    }
-}
diff --git a/server/packages/sandbox-agent/tests/http/agent_endpoints.rs b/server/packages/sandbox-agent/tests/http/agent_endpoints.rs
new file mode 100644
index 0000000..f195205
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/http/agent_endpoints.rs
@@ -0,0 +1,165 @@
+// Agent-specific HTTP endpoints live here; session-related snapshots are in tests/sessions/.
+include!("../common/http.rs");
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auth_snapshots() {
+    let token = "test-token";
+    let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
+
+    let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
+    assert_eq!(status, StatusCode::OK, "health should be public");
+    insta::with_settings!({
+        snapshot_suffix => snapshot_name("auth_health_public", None),
+    }, {
+        insta::assert_yaml_snapshot!(json!({
+            "status": status.as_u16(),
+            "payload": normalize_health(&payload),
+        }));
+    });
+
+    let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
+    assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
+    insta::with_settings!({
+        snapshot_suffix => snapshot_name("auth_missing_token", None),
+    }, {
+        insta::assert_yaml_snapshot!(json!({
+            "status": status.as_u16(),
+            "payload": payload,
+        }));
+    });
+
+    let request = Request::builder()
+        .method(Method::GET)
+        .uri("/v1/agents")
+        .header(header::AUTHORIZATION, "Bearer wrong-token")
+        .body(Body::empty())
+        .expect("auth invalid request");
+    let (status, _headers, payload) = send_json_request(&app.app, request).await;
+    assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
+    insta::with_settings!({
+        snapshot_suffix => snapshot_name("auth_invalid_token", None),
+    }, {
+        insta::assert_yaml_snapshot!(json!({
+            "status": status.as_u16(),
+            "payload": payload,
+        }));
+    });
+
+    let request = Request::builder()
+        .method(Method::GET)
+        .uri("/v1/agents")
+        .header(header::AUTHORIZATION, format!("Bearer {token}"))
+        .body(Body::empty())
+        .expect("auth valid request");
+    let (status, _headers, payload) = send_json_request(&app.app, request).await;
+    assert_eq!(status, StatusCode::OK, "valid token should succeed");
+    insta::with_settings!({
+        snapshot_suffix => snapshot_name("auth_valid_token", None),
+    }, {
+        insta::assert_yaml_snapshot!(json!({
+            "status": status.as_u16(),
+            "payload": normalize_agent_list(&payload),
+        }));
+    });
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn cors_snapshots() {
+    let cors = CorsLayer::new()
+        .allow_origin("http://example.com".parse::<HeaderValue>().unwrap())
+        .allow_methods([Method::GET, Method::POST])
+        .allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION]);
+    let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
+
+    let preflight = Request::builder()
+        .method(Method::OPTIONS)
+        .uri("/v1/agents")
+        .header(header::ORIGIN, "http://example.com")
+        .header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
+        .header(
+            header::ACCESS_CONTROL_REQUEST_HEADERS,
+            "authorization,content-type",
+        )
+        .body(Body::empty())
+        .expect("cors preflight request");
+    let (status, headers, _payload) = send_request(&app.app, preflight).await;
+    insta::with_settings!({
+        snapshot_suffix => snapshot_name("cors_preflight", None),
+    }, {
+        insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
+    });
+
+    let actual = Request::builder()
+        .method(Method::GET)
+        .uri("/v1/health")
+        .header(header::ORIGIN, "http://example.com")
+        .body(Body::empty())
+        .expect("cors actual request");
+    let (status, headers, payload) = send_json_request(&app.app, actual).await;
+    assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
+    insta::with_settings!({
+        snapshot_suffix => snapshot_name("cors_actual", None),
+    }, {
+        insta::assert_yaml_snapshot!(json!({
+            "cors": snapshot_cors(status, &headers),
+            "payload": normalize_health(&payload),
+        }));
+    });
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn agent_endpoints_snapshots() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+    let app = TestApp::new();
+
+    let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
+    assert_eq!(status, StatusCode::OK, "health status");
+    insta::with_settings!({
+        snapshot_suffix => snapshot_name("health", None),
+    }, {
+        insta::assert_yaml_snapshot!(normalize_health(&health));
+    });
+
+    // List agents (verify IDs only; install state is environment-dependent).
+    let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
+    assert_eq!(status, StatusCode::OK, "agents list");
+    insta::with_settings!({
+        snapshot_suffix => snapshot_name("agents_list", None),
+    }, {
+        insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
+    });
+
+    for config in &configs {
+        let _guard = apply_credentials(&config.credentials);
+        let status = send_status(
+            &app.app,
+            Method::POST,
+            &format!("/v1/agents/{}/install", config.agent.as_str()),
+            Some(json!({})),
+        )
+        .await;
+        assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
+        insta::with_settings!({
+            snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
+        }, {
+            insta::assert_yaml_snapshot!(snapshot_status(status));
+        });
+    }
+
+    for config in &configs {
+        let _guard = apply_credentials(&config.credentials);
+        let (status, modes) = send_json(
+            &app.app,
+            Method::GET,
+            &format!("/v1/agents/{}/modes", config.agent.as_str()),
+            None,
+        )
+        .await;
+        assert_eq!(status, StatusCode::OK, "agent modes");
+        insta::with_settings!({
+            snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
+        }, {
+            insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
+        });
+    }
+}
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_claude.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_claude.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_codex.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_codex.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_opencode.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_opencode.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_claude.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_claude.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_codex.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_codex.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_opencode.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_opencode.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agents_list_global.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agents_list_global.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@health_global.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@health_global.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_health_public_global.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_health_public_global.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_invalid_token_global.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_invalid_token_global.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_missing_token_global.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_missing_token_global.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_valid_token_global.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_valid_token_global.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__cors_snapshots@cors_actual_global.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__cors_snapshots@cors_actual_global.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__cors_snapshots@cors_preflight_global.snap
similarity index 100%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap
rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__cors_snapshots@cors_preflight_global.snap
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap
deleted file mode 100644
index c9e259a..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap
+++ /dev/null
@@ -1,6 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 918
-expression: normalize_create_session(&created)
----
-healthy: true
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap
deleted file mode 100644
index 25be48c..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap
+++ /dev/null
@@ -1,6 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-expression: normalize_create_session(&created)
----
-healthy: true
-nativeSessionId: "<redacted>"
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap
deleted file mode 100644
index f0bd98a..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap
+++ /dev/null
@@ -1,7 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1053
-expression: normalize_create_session(&created)
----
-healthy: true
-nativeSessionId: "<redacted>"
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap
deleted file mode 100644
index 31bd8a7..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap
+++ /dev/null
@@ -1,6 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-expression: normalize_create_session(&created)
----
-agentSessionId: "<redacted>"
-healthy: true
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap
deleted file mode 100644
index 636137e..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap
+++ /dev/null
@@ -1,6 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 943
-expression: snapshot_status(status)
----
-status: 204
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap
deleted file mode 100644
index af9bc66..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap
+++ /dev/null
@@ -1,6 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 959
-expression: snapshot_status(status)
----
-status: 204
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap
deleted file mode 100644
index 0ce7ff9..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap
+++ /dev/null
@@ -1,6 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1078
-expression: snapshot_status(status)
----
-status: 204
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap
deleted file mode 100644
index 74ac4c1..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap
+++ /dev/null
@@ -1,5 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-expression: snapshot_status(status)
----
-status: 204
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap
deleted file mode 100644
index 5a87a3e..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap
+++ /dev/null
@@ -1,6 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-expression: normalize_sessions(&sessions)
----
-hasExpectedFields: true
-sessionCount: 1
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap
deleted file mode 100644
index 1b31317..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap
+++ /dev/null
@@ -1,17 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1119
-expression: normalize_events(&permission_events)
----
-- metadata: true
-  seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap
deleted file mode 100644
index 46f9648..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap
+++ /dev/null
@@ -1,131 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-expression: normalize_events(&permission_events)
----
-- metadata: true
-  seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
-- item:
-    content_types:
-      - status
-    kind: status
-    role: system
-    status: completed
-  seq: 3
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: in_progress
-  seq: 4
-  source: agent
-  synthetic: false
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 5
-  source: daemon
-  synthetic: true
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: completed
-  seq: 6
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types: []
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 7
-  source: agent
-  synthetic: false
-  type: item.started
-- item:
-    content_types:
-      - status
-    kind: status
-    role: system
-    status: completed
-  seq: 8
-  source: agent
-  synthetic: false
-  type: item.completed
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 9
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 10
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 11
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 12
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 13
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 14
-  source: agent
-  synthetic: false
-  type: item.delta
-- item:
-    content_types:
-      - reasoning
-    kind: message
-    role: assistant
-    status: completed
-  seq: 15
-  source: agent
-  synthetic: false
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap
deleted file mode 100644
index b27511c..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap
+++ /dev/null
@@ -1,35 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1112
-expression: normalize_events(&permission_events)
----
-- metadata: true
-  seq: 1
-  session: started
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  type: session.started
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 3
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 4
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: completed
-  seq: 5
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap
deleted file mode 100644
index fafb7c8..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap
+++ /dev/null
@@ -1,11 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1017
-expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
----
-payload:
-  detail: "invalid request: unknown permission id: missing-permission"
-  status: 400
-  title: Invalid Request
-  type: "urn:sandbox-agent:error:invalid_request"
-status: 400
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap
deleted file mode 100644
index de6549e..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap
+++ /dev/null
@@ -1,11 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1152
-expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
----
-payload:
-  detail: "invalid request: unknown permission id: missing-permission"
-  status: 400
-  title: Invalid Request
-  type: "urn:sandbox-agent:error:invalid_request"
-status: 400
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap
deleted file mode 100644
index 8cb0493..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap
+++ /dev/null
@@ -1,45 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1151
-expression: normalize_events(&reject_events)
----
-- metadata: true
-  seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 3
-  source: daemon
-  synthetic: true
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 4
-  source: daemon
-  synthetic: true
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: completed
-  seq: 5
-  source: agent
-  synthetic: false
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap
deleted file mode 100644
index 53eb2e3..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap
+++ /dev/null
@@ -1,331 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-expression: normalize_events(&reject_events)
----
-- metadata: true
-  seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
-- item:
-    content_types:
-      - status
-    kind: status
-    role: system
-    status: completed
-  seq: 3
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: in_progress
-  seq: 4
-  source: agent
-  synthetic: false
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 5
-  source: daemon
-  synthetic: true
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: completed
-  seq: 6
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types: []
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 7
-  source: agent
-  synthetic: false
-  type: item.started
-- item:
-    content_types:
-      - status
-    kind: status
-    role: system
-    status: completed
-  seq: 8
-  source: agent
-  synthetic: false
-  type: item.completed
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 9
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 10
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 11
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 12
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 13
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 14
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 15
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 16
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 17
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 18
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 19
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 20
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 21
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 22
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 23
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 24
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 25
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 26
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 27
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 28
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 29
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 30
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 31
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 32
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 33
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 34
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 35
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 36
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 37
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 38
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 39
-  source: agent
-  synthetic: false
-  type: item.delta
-- item:
-    content_types:
-      - reasoning
-    kind: message
-    role: assistant
-    status: completed
-  seq: 40
-  source: agent
-  synthetic: false
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap
deleted file mode 100644
index 84dd20e..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap
+++ /dev/null
@@ -1,35 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1236
-expression: normalize_events(&reject_events)
----
-- metadata: true
-  seq: 1
-  session: started
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  type: session.started
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 3
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 4
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: completed
-  seq: 5
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap
deleted file mode 100644
index df61c32..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap
+++ /dev/null
@@ -1,11 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1151
-expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
----
-payload:
-  detail: "invalid request: unknown question id: missing-question"
-  status: 400
-  title: Invalid Request
-  type: "urn:sandbox-agent:error:invalid_request"
-status: 400
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap
deleted file mode 100644
index 6c6dbae..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap
+++ /dev/null
@@ -1,11 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1139
-expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
----
-payload:
-  detail: "invalid request: unknown question id: missing-question"
-  status: 400
-  title: Invalid Request
-  type: "urn:sandbox-agent:error:invalid_request"
-status: 400
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap
deleted file mode 100644
index 075fe0f..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap
+++ /dev/null
@@ -1,11 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1276
-expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
----
-payload:
-  detail: "invalid request: unknown question id: missing-question"
-  status: 400
-  title: Invalid Request
-  type: "urn:sandbox-agent:error:invalid_request"
-status: 400
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap
deleted file mode 100644
index 90cd95f..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap
+++ /dev/null
@@ -1,45 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1109
-expression: normalize_events(&question_events)
----
-- metadata: true
-  seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 3
-  source: daemon
-  synthetic: true
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 4
-  source: daemon
-  synthetic: true
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: completed
-  seq: 5
-  source: agent
-  synthetic: false
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap
deleted file mode 100644
index a1b3098..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap
+++ /dev/null
@@ -1,315 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-expression: normalize_events(&question_events)
----
-- metadata: true
-  seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
-- item:
-    content_types:
-      - status
-    kind: status
-    role: system
-    status: completed
-  seq: 3
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: in_progress
-  seq: 4
-  source: agent
-  synthetic: false
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 5
-  source: daemon
-  synthetic: true
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: completed
-  seq: 6
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types: []
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 7
-  source: agent
-  synthetic: false
-  type: item.started
-- item:
-    content_types:
-      - status
-    kind: status
-    role: system
-    status: completed
-  seq: 8
-  source: agent
-  synthetic: false
-  type: item.completed
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 9
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 10
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 11
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 12
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 13
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 14
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 15
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 16
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 17
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 18
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 19
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 20
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 21
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 22
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 23
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 24
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 25
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 26
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 27
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 28
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 29
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 30
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 31
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 32
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 33
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 34
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 35
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 36
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 37
-  source: agent
-  synthetic: false
-  type: item.delta
-- item:
-    content_types:
-      - reasoning
-    kind: message
-    role: assistant
-    status: completed
-  seq: 38
-  source: agent
-  synthetic: false
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap
deleted file mode 100644
index e525eb2..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap
+++ /dev/null
@@ -1,35 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1174
-expression: normalize_events(&question_events)
----
-- metadata: true
-  seq: 1
-  session: started
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  type: session.started
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 3
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 4
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: completed
-  seq: 5
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap
deleted file mode 100644
index 2c65fd0..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap
+++ /dev/null
@@ -1,11 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1214
-expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
----
-payload:
-  detail: "invalid request: unknown question id: missing-question"
-  status: 400
-  title: Invalid Request
-  type: "urn:sandbox-agent:error:invalid_request"
-status: 400
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap
deleted file mode 100644
index b0ecfb4..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap
+++ /dev/null
@@ -1,201 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-expression: snapshot
----
-session_a:
-  - metadata: true
-    seq: 1
-    session: started
-    source: daemon
-    synthetic: true
-    type: session.started
-  - metadata: true
-    seq: 2
-    session: started
-    source: agent
-    synthetic: false
-    type: session.started
-  - item:
-      content_types:
-        - status
-      kind: status
-      role: system
-      status: completed
-    seq: 3
-    source: agent
-    synthetic: false
-    type: item.completed
-  - item:
-      content_types:
-        - text
-      kind: message
-      role: user
-      status: in_progress
-    seq: 4
-    source: agent
-    synthetic: false
-    type: item.started
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 5
-    source: daemon
-    synthetic: true
-    type: item.delta
-  - item:
-      content_types:
-        - text
-      kind: message
-      role: user
-      status: completed
-    seq: 6
-    source: agent
-    synthetic: false
-    type: item.completed
-  - item:
-      content_types: []
-      kind: message
-      role: assistant
-      status: in_progress
-    seq: 7
-    source: agent
-    synthetic: false
-    type: item.started
-  - item:
-      content_types: []
-      kind: message
-      role: assistant
-      status: completed
-    seq: 8
-    source: agent
-    synthetic: false
-    type: item.completed
-session_b:
-  - metadata: true
-    seq: 1
-    session: started
-    source: daemon
-    synthetic: true
-    type: session.started
-  - metadata: true
-    seq: 2
-    session: started
-    source: agent
-    synthetic: false
-    type: session.started
-  - item:
-      content_types:
-        - status
-      kind: status
-      role: system
-      status: completed
-    seq: 3
-    source: agent
-    synthetic: false
-    type: item.completed
-  - item:
-      content_types:
-        - text
-      kind: message
-      role: user
-      status: in_progress
-    seq: 4
-    source: agent
-    synthetic: false
-    type: item.started
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 5
-    source: daemon
-    synthetic: true
-    type: item.delta
-  - item:
-      content_types:
-        - text
-      kind: message
-      role: user
-      status: completed
-    seq: 6
-    source: agent
-    synthetic: false
-    type: item.completed
-  - item:
-      content_types: []
-      kind: message
-      role: assistant
-      status: in_progress
-    seq: 7
-    source: agent
-    synthetic: false
-    type: item.started
-  - item:
-      content_types:
-        - status
-      kind: status
-      role: system
-      status: completed
-    seq: 8
-    source: agent
-    synthetic: false
-    type: item.completed
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 9
-    source: agent
-    synthetic: false
-    type: item.delta
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 10
-    source: agent
-    synthetic: false
-    type: item.delta
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 11
-    source: agent
-    synthetic: false
-    type: item.delta
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 12
-    source: agent
-    synthetic: false
-    type: item.delta
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 13
-    source: agent
-    synthetic: false
-    type: item.delta
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 14
-    source: agent
-    synthetic: false
-    type: item.delta
-  - item:
-      content_types:
-        - reasoning
-      kind: message
-      role: assistant
-      status: completed
-    seq: 15
-    source: agent
-    synthetic: false
-    type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap
deleted file mode 100644
index f9abaa0..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap
+++ /dev/null
@@ -1,67 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1344
-expression: snapshot
----
-session_a:
-  - metadata: true
-    seq: 1
-    session: started
-    type: session.started
-  - metadata: true
-    seq: 2
-    session: started
-    type: session.started
-  - item:
-      content_types:
-        - text
-      kind: message
-      role: assistant
-      status: in_progress
-    seq: 3
-    type: item.started
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 4
-    type: item.delta
-  - item:
-      content_types:
-        - text
-      kind: message
-      role: assistant
-      status: completed
-    seq: 5
-    type: item.completed
-session_b:
-  - metadata: true
-    seq: 1
-    session: started
-    type: session.started
-  - metadata: true
-    seq: 2
-    session: started
-    type: session.started
-  - item:
-      content_types:
-        - text
-      kind: message
-      role: assistant
-      status: in_progress
-    seq: 3
-    type: item.started
-  - delta:
-      delta: "<redacted>"
-      item_id: "<redacted>"
-      native_item_id: "<redacted>"
-    seq: 4
-    type: item.delta
-  - item:
-      content_types:
-        - text
-      kind: message
-      role: assistant
-      status: completed
-    seq: 5
-    type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap
deleted file mode 100644
index 46d5eb1..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap
+++ /dev/null
@@ -1,171 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-expression: normalized
----
-- metadata: true
-  seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
-- item:
-    content_types:
-      - status
-    kind: status
-    role: system
-    status: completed
-  seq: 3
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: in_progress
-  seq: 4
-  source: agent
-  synthetic: false
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 5
-  source: daemon
-  synthetic: true
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: completed
-  seq: 6
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types: []
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 7
-  source: agent
-  synthetic: false
-  type: item.started
-- item:
-    content_types:
-      - status
-    kind: status
-    role: system
-    status: completed
-  seq: 8
-  source: agent
-  synthetic: false
-  type: item.completed
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 9
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 10
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 11
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 12
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 13
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 14
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 15
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 16
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 17
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 18
-  source: agent
-  synthetic: false
-  type: item.delta
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 19
-  source: agent
-  synthetic: false
-  type: item.delta
-- item:
-    content_types:
-      - reasoning
-    kind: message
-    role: assistant
-    status: completed
-  seq: 20
-  source: agent
-  synthetic: false
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap
deleted file mode 100644
index 48235e5..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap
+++ /dev/null
@@ -1,45 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 848
-expression: normalized
----
-- metadata: true
-  seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 3
-  source: agent
-  synthetic: false
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 4
-  source: agent
-  synthetic: false
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: completed
-  seq: 5
-  source: agent
-  synthetic: false
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap
deleted file mode 100644
index dc82798..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap
+++ /dev/null
@@ -1,73 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 848
-expression: normalized
----
-- metadata: true
-  seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
-- item:
-    content_types:
-      - status
-    kind: status
-    role: system
-    status: completed
-  seq: 3
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: in_progress
-  seq: 4
-  source: agent
-  synthetic: false
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 5
-  source: daemon
-  synthetic: true
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: user
-    status: completed
-  seq: 6
-  source: agent
-  synthetic: false
-  type: item.completed
-- item:
-    content_types: []
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 7
-  source: agent
-  synthetic: false
-  type: item.started
-- item:
-    content_types: []
-    kind: message
-    role: assistant
-    status: completed
-  seq: 8
-  source: agent
-  synthetic: false
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap
deleted file mode 100644
index add0b00..0000000
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap
+++ /dev/null
@@ -1,35 +0,0 @@
----
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 841
-expression: normalized
----
-- metadata: true
-  seq: 1
-  session: started
-  type: session.started
-- metadata: true
-  seq: 2
-  session: started
-  type: session.started
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: in_progress
-  seq: 3
-  type: item.started
-- delta:
-    delta: "<redacted>"
-    item_id: "<redacted>"
-    native_item_id: "<redacted>"
-  seq: 4
-  type: item.delta
-- item:
-    content_types:
-      - text
-    kind: message
-    role: assistant
-    status: completed
-  seq: 5
-  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http_endpoints.rs b/server/packages/sandbox-agent/tests/http_endpoints.rs
new file mode 100644
index 0000000..a443a95
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/http_endpoints.rs
@@ -0,0 +1,2 @@
+#[path = "http/agent_endpoints.rs"]
+mod agent_endpoints;
diff --git a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs
deleted file mode 100644
index b0ce134..0000000
--- a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs
+++ /dev/null
@@ -1 +0,0 @@
-include!("http/http_sse_snapshots.rs");
diff --git a/server/packages/sandbox-agent/tests/sessions.rs b/server/packages/sandbox-agent/tests/sessions.rs
new file mode 100644
index 0000000..6abcb74
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions.rs
@@ -0,0 +1,2 @@
+#[path = "sessions/mod.rs"]
+mod sessions;
diff --git a/server/packages/sandbox-agent/tests/sessions/mod.rs b/server/packages/sandbox-agent/tests/sessions/mod.rs
new file mode 100644
index 0000000..6bc2a16
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/mod.rs
@@ -0,0 +1,5 @@
+mod session_lifecycle;
+mod permissions;
+mod questions;
+mod reasoning;
+mod status;
diff --git a/server/packages/sandbox-agent/tests/sessions/permissions.rs b/server/packages/sandbox-agent/tests/sessions/permissions.rs
new file mode 100644
index 0000000..34dcc07
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/permissions.rs
@@ -0,0 +1,88 @@
+// Permission flow snapshots compare every agent to the mock baseline.
+include!("../common/http.rs");
+
+fn session_snapshot_suffix(prefix: &str) -> String {
+    snapshot_name(prefix, Some(AgentId::Mock))
+}
+
+fn assert_session_snapshot(prefix: &str, value: Value) {
+    insta::with_settings!({
+        snapshot_suffix => session_snapshot_suffix(prefix),
+    }, {
+        insta::assert_yaml_snapshot!(value);
+    });
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn permission_flow_snapshots() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+
+    for config in &configs {
+        let app = TestApp::new();
+        let capabilities = fetch_capabilities(&app.app).await;
+        let caps = capabilities
+            .get(config.agent.as_str())
+            .expect("capabilities missing");
+        if !(caps.plan_mode && caps.permissions) {
+            continue;
+        }
+
+        let _guard = apply_credentials(&config.credentials);
+        install_agent(&app.app, config.agent).await;
+
+        let permission_session = format!("perm-{}", config.agent.as_str());
+        create_session(&app.app, config.agent, &permission_session, "plan").await;
+        let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await;
+        let status = send_status(
+            &app.app,
+            Method::POST,
+            &format!("/v1/sessions/{permission_session}/messages"),
+            Some(json!({ "message": PERMISSION_PROMPT })),
+        )
+        .await;
+        assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
+
+        let permission_events = poll_events_until_match_from(
+            &app.app,
+            &permission_session,
+            offset,
+            Duration::from_secs(120),
+            |events| find_permission_id(events).is_some() || should_stop(events),
+        )
+        .await;
+        let permission_events = truncate_permission_events(&permission_events);
+        assert_session_snapshot("permission_events", normalize_events(&permission_events));
+
+        if let Some(permission_id) = find_permission_id(&permission_events) {
+            let status = send_status(
+                &app.app,
+                Method::POST,
+                &format!(
+                    "/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
+                ),
+                Some(json!({ "reply": "once" })),
+            )
+            .await;
+            assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
+            assert_session_snapshot("permission_reply", snapshot_status(status));
+        } else {
+            let (status, payload) = send_json(
+                &app.app,
+                Method::POST,
+                &format!(
+                    "/v1/sessions/{permission_session}/permissions/missing-permission/reply"
+                ),
+                Some(json!({ "reply": "once" })),
+            )
+            .await;
+            assert!(!status.is_success(), "missing permission id should error");
+            assert_session_snapshot(
+                "permission_reply_missing",
+                json!({
+                    "status": status.as_u16(),
+                    "payload": payload,
+                }),
+            );
+        }
+    }
+}
diff --git a/server/packages/sandbox-agent/tests/sessions/questions.rs b/server/packages/sandbox-agent/tests/sessions/questions.rs
new file mode 100644
index 0000000..9f5b55e
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/questions.rs
@@ -0,0 +1,145 @@
+// Question flow snapshots compare every agent to the mock baseline.
+include!("../common/http.rs");
+
+fn session_snapshot_suffix(prefix: &str) -> String {
+    snapshot_name(prefix, Some(AgentId::Mock))
+}
+
+fn assert_session_snapshot(prefix: &str, value: Value) {
+    insta::with_settings!({
+        snapshot_suffix => session_snapshot_suffix(prefix),
+    }, {
+        insta::assert_yaml_snapshot!(value);
+    });
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn question_flow_snapshots() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+
+    for config in &configs {
+        let app = TestApp::new();
+        let capabilities = fetch_capabilities(&app.app).await;
+        let caps = capabilities
+            .get(config.agent.as_str())
+            .expect("capabilities missing");
+        if !caps.questions {
+            continue;
+        }
+
+        let _guard = apply_credentials(&config.credentials);
+        install_agent(&app.app, config.agent).await;
+
+        let question_reply_session = format!("question-reply-{}", config.agent.as_str());
+        create_session(&app.app, config.agent, &question_reply_session, "plan").await;
+        let reply_offset =
+            drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await;
+        let status = send_status(
+            &app.app,
+            Method::POST,
+            &format!("/v1/sessions/{question_reply_session}/messages"),
+            Some(json!({ "message": QUESTION_PROMPT })),
+        )
+        .await;
+        assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
+
+        let question_events = poll_events_until_match_from(
+            &app.app,
+            &question_reply_session,
+            reply_offset,
+            Duration::from_secs(120),
+            |events| find_question_id_and_answers(events).is_some() || should_stop(events),
+        )
+        .await;
+        let question_events = truncate_question_events(&question_events);
+        assert_session_snapshot("question_reply_events", normalize_events(&question_events));
+
+        if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
+            let status = send_status(
+                &app.app,
+                Method::POST,
+                &format!(
+                    "/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
+                ),
+                Some(json!({ "answers": answers })),
+            )
+            .await;
+            assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
+            assert_session_snapshot("question_reply", snapshot_status(status));
+        } else {
+            let (status, payload) = send_json(
+                &app.app,
+                Method::POST,
+                &format!(
+                    "/v1/sessions/{question_reply_session}/questions/missing-question/reply"
+                ),
+                Some(json!({ "answers": [] })),
+            )
+            .await;
+            assert!(!status.is_success(), "missing question id should error");
+            assert_session_snapshot(
+                "question_reply_missing",
+                json!({
+                    "status": status.as_u16(),
+                    "payload": payload,
+                }),
+            );
+        }
+
+        let question_reject_session = format!("question-reject-{}", config.agent.as_str());
+        create_session(&app.app, config.agent, &question_reject_session, "plan").await;
+        let reject_offset =
+            drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await;
+        let status = send_status(
+            &app.app,
+            Method::POST,
+            &format!("/v1/sessions/{question_reject_session}/messages"),
+            Some(json!({ "message": QUESTION_PROMPT })),
+        )
+        .await;
+        assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
+
+        let reject_events = poll_events_until_match_from(
+            &app.app,
+            &question_reject_session,
+            reject_offset,
+            Duration::from_secs(120),
+            |events| find_question_id_and_answers(events).is_some() || should_stop(events),
+        )
+        .await;
+        let reject_events = truncate_question_events(&reject_events);
+        assert_session_snapshot("question_reject_events", normalize_events(&reject_events));
+
+        if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
+            let status = send_status(
+                &app.app,
+                Method::POST,
+                &format!(
+                    "/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
+                ),
+                None,
+            )
+            .await;
+            assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
+            assert_session_snapshot("question_reject", snapshot_status(status));
+        } else {
+            let (status, payload) = send_json(
+                &app.app,
+                Method::POST,
+                &format!(
+                    "/v1/sessions/{question_reject_session}/questions/missing-question/reject"
+                ),
+                None,
+            )
+            .await;
+            assert!(!status.is_success(), "missing question id reject should error");
+            assert_session_snapshot(
+                "question_reject_missing",
+                json!({
+                    "status": status.as_u16(),
+                    "payload": payload,
+                }),
+            );
+        }
+    }
+}
diff --git a/server/packages/sandbox-agent/tests/sessions/reasoning.rs b/server/packages/sandbox-agent/tests/sessions/reasoning.rs
new file mode 100644
index 0000000..6994d06
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/reasoning.rs
@@ -0,0 +1,56 @@
+// Reasoning capability checks are isolated from baseline snapshots.
+include!("../common/http.rs");
+
+fn reasoning_prompt(agent: AgentId) -> &'static str {
+    if agent == AgentId::Mock {
+        "demo"
+    } else {
+        "Answer briefly and include your reasoning."
+    }
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn reasoning_events_present() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+
+    for config in &configs {
+        let app = TestApp::new();
+        let capabilities = fetch_capabilities(&app.app).await;
+        let caps = capabilities
+            .get(config.agent.as_str())
+            .expect("capabilities missing");
+        if !caps.reasoning {
+            continue;
+        }
+
+        let _guard = apply_credentials(&config.credentials);
+        install_agent(&app.app, config.agent).await;
+
+        let session_id = format!("reasoning-{}", config.agent.as_str());
+        create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
+            .await;
+        let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
+        let status = send_status(
+            &app.app,
+            Method::POST,
+            &format!("/v1/sessions/{session_id}/messages"),
+            Some(json!({ "message": reasoning_prompt(config.agent) })),
+        )
+        .await;
+        assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt");
+
+        let events = poll_events_until_match_from(
+            &app.app,
+            &session_id,
+            offset,
+            Duration::from_secs(120),
+            |events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event),
+        )
+        .await;
+        assert!(
+            events_have_content_type(&events, "reasoning"),
+            "expected reasoning content for {}",
+            config.agent
+        );
+    }
+}
diff --git a/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs b/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
new file mode 100644
index 0000000..ed14e76
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
@@ -0,0 +1,192 @@
+// Session lifecycle and streaming snapshots use the mock baseline as the single source of truth.
+include!("../common/http.rs");
+
+fn session_snapshot_suffix(prefix: &str) -> String {
+    snapshot_name(prefix, Some(AgentId::Mock))
+}
+
+fn assert_session_snapshot(prefix: &str, value: Value) {
+    insta::with_settings!({
+        snapshot_suffix => session_snapshot_suffix(prefix),
+    }, {
+        insta::assert_yaml_snapshot!(value);
+    });
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn session_endpoints_snapshots() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+
+    for config in &configs {
+        let app = TestApp::new();
+        let capabilities = fetch_capabilities(&app.app).await;
+        let caps = capabilities
+            .get(config.agent.as_str())
+            .expect("capabilities missing");
+        if !caps.session_lifecycle {
+            continue;
+        }
+
+        let _guard = apply_credentials(&config.credentials);
+        install_agent(&app.app, config.agent).await;
+
+        let session_id = format!("snapshot-{}", config.agent.as_str());
+        let permission_mode = test_permission_mode(config.agent);
+        let (status, created) = send_json(
+            &app.app,
+            Method::POST,
+            &format!("/v1/sessions/{session_id}"),
+            Some(json!({
+                "agent": config.agent.as_str(),
+                "permissionMode": permission_mode
+            })),
+        )
+        .await;
+        assert_eq!(status, StatusCode::OK, "create session");
+        assert_session_snapshot("create_session", normalize_create_session(&created));
+
+        let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
+        assert_eq!(status, StatusCode::OK, "list sessions");
+        assert_session_snapshot("sessions_list", normalize_sessions(&sessions));
+
+        let status = send_status(
+            &app.app,
+            Method::POST,
+            &format!("/v1/sessions/{session_id}/messages"),
+            Some(json!({ "message": PROMPT })),
+        )
+        .await;
+        assert_eq!(status, StatusCode::NO_CONTENT, "send message");
+        assert_session_snapshot("send_message", snapshot_status(status));
+    }
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn http_events_snapshots() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+
+    for config in &configs {
+        // OpenCode's embedded bun hangs when installing plugins, blocking event streaming.
+        if config.agent == AgentId::Opencode {
+            continue;
+        }
+        let app = TestApp::new();
+        let capabilities = fetch_capabilities(&app.app).await;
+        let caps = capabilities
+            .get(config.agent.as_str())
+            .expect("capabilities missing");
+        if !caps.session_lifecycle {
+            continue;
+        }
+        run_http_events_snapshot(&app.app, config).await;
+    }
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn sse_events_snapshots() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+
+    for config in &configs {
+        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
+        if config.agent == AgentId::Opencode {
+            continue;
+        }
+        let app = TestApp::new();
+        let capabilities = fetch_capabilities(&app.app).await;
+        let caps = capabilities
+            .get(config.agent.as_str())
+            .expect("capabilities missing");
+        if !caps.session_lifecycle {
+            continue;
+        }
+        run_sse_events_snapshot(&app.app, config).await;
+    }
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn concurrency_snapshots() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+
+    for config in &configs {
+        let app = TestApp::new();
+        let capabilities = fetch_capabilities(&app.app).await;
+        let caps = capabilities
+            .get(config.agent.as_str())
+            .expect("capabilities missing");
+        if !caps.session_lifecycle {
+            continue;
+        }
+        run_concurrency_snapshot(&app.app, config).await;
+    }
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn turn_stream_route() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+
+    for config in &configs {
+        let app = TestApp::new();
+        let capabilities = fetch_capabilities(&app.app).await;
+        let caps = capabilities
+            .get(config.agent.as_str())
+            .expect("capabilities missing");
+        if !caps.session_lifecycle {
+            continue;
+        }
+        run_turn_stream_check(&app.app, config).await;
+    }
+}
+
+async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
+    let _guard = apply_credentials(&config.credentials);
+    install_agent(app, config.agent).await;
+
+    let session_a = format!("concurrent-a-{}", config.agent.as_str());
+    let session_b = format!("concurrent-b-{}", config.agent.as_str());
+    let perm_mode = test_permission_mode(config.agent);
+    create_session(app, config.agent, &session_a, perm_mode).await;
+    create_session(app, config.agent, &session_b, perm_mode).await;
+    let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await;
+    let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await;
+
+    let app_a = app.clone();
+    let app_b = app.clone();
+    let send_a = send_message(&app_a, &session_a);
+    let send_b = send_message(&app_b, &session_b);
+    tokio::join!(send_a, send_b);
+
+    let app_a = app.clone();
+    let app_b = app.clone();
+    let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120));
+    let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120));
+    let (events_a, events_b) = tokio::join!(poll_a, poll_b);
+    let events_a = truncate_after_first_stop(&events_a);
+    let events_b = truncate_after_first_stop(&events_b);
+
+    assert!(
+        !events_a.is_empty(),
+        "no events collected for concurrent session a {}",
+        config.agent
+    );
+    assert!(
+        !events_b.is_empty(),
+        "no events collected for concurrent session b {}",
+        config.agent
+    );
+    assert!(
+        should_stop(&events_a),
+        "timed out waiting for assistant/error event for concurrent session a {}",
+        config.agent
+    );
+    assert!(
+        should_stop(&events_b),
+        "timed out waiting for assistant/error event for concurrent session b {}",
+        config.agent
+    );
+
+    let snapshot = json!({
+        "session_a": normalize_events(&events_a),
+        "session_b": normalize_events(&events_b),
+    });
+    assert_session_snapshot("concurrency_events", snapshot);
+}
diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap
new file mode 100644
index 0000000..b9828b2
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap
@@ -0,0 +1,48 @@
+---
+source: server/packages/sandbox-agent/tests/sessions/permissions.rs
+expression: value
+---
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: in_progress
+  seq: 1
+  type: item.started
+- delta:
+    delta: "<redacted>"
+    item_id: "<redacted>"
+    native_item_id: "<redacted>"
+  seq: 2
+  type: item.delta
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: completed
+  seq: 3
+  type: item.completed
+- item:
+    content_types:
+      - text
+    kind: message
+    role: assistant
+    status: in_progress
+  seq: 4
+  type: item.started
+- delta:
+    delta: "<redacted>"
+    item_id: "<redacted>"
+    native_item_id: "<redacted>"
+  seq: 5
+  type: item.delta
+- item:
+    content_types:
+      - text
+    kind: message
+    role: assistant
+    status: completed
+  seq: 6
+  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_missing_mock.snap
similarity index 53%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap
rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_missing_mock.snap
index 006333a..dcaa414 100644
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_missing_mock.snap
@@ -1,7 +1,6 @@
 ---
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1011
-expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
+source: server/packages/sandbox-agent/tests/sessions/permissions.rs
+expression: value
 ---
 payload:
   detail: "invalid request: unknown permission id: missing-permission"
diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap
new file mode 100644
index 0000000..35e0f56
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap
@@ -0,0 +1,48 @@
+---
+source: server/packages/sandbox-agent/tests/sessions/questions.rs
+expression: value
+---
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: in_progress
+  seq: 1
+  type: item.started
+- delta:
+    delta: "<redacted>"
+    item_id: "<redacted>"
+    native_item_id: "<redacted>"
+  seq: 2
+  type: item.delta
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: completed
+  seq: 3
+  type: item.completed
+- item:
+    content_types:
+      - text
+    kind: message
+    role: assistant
+    status: in_progress
+  seq: 4
+  type: item.started
+- delta:
+    delta: "<redacted>"
+    item_id: "<redacted>"
+    native_item_id: "<redacted>"
+  seq: 5
+  type: item.delta
+- item:
+    content_types:
+      - text
+    kind: message
+    role: assistant
+    status: completed
+  seq: 6
+  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_missing_mock.snap
similarity index 53%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap
rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_missing_mock.snap
index 8585cd4..5a484f7 100644
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_missing_mock.snap
@@ -1,7 +1,6 @@
 ---
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1078
-expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
+source: server/packages/sandbox-agent/tests/sessions/questions.rs
+expression: value
 ---
 payload:
   detail: "invalid request: unknown question id: missing-question"
diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap
new file mode 100644
index 0000000..35e0f56
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap
@@ -0,0 +1,48 @@
+---
+source: server/packages/sandbox-agent/tests/sessions/questions.rs
+expression: value
+---
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: in_progress
+  seq: 1
+  type: item.started
+- delta:
+    delta: "<redacted>"
+    item_id: "<redacted>"
+    native_item_id: "<redacted>"
+  seq: 2
+  type: item.delta
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: completed
+  seq: 3
+  type: item.completed
+- item:
+    content_types:
+      - text
+    kind: message
+    role: assistant
+    status: in_progress
+  seq: 4
+  type: item.started
+- delta:
+    delta: "<redacted>"
+    item_id: "<redacted>"
+    native_item_id: "<redacted>"
+  seq: 5
+  type: item.delta
+- item:
+    content_types:
+      - text
+    kind: message
+    role: assistant
+    status: completed
+  seq: 6
+  type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_missing_mock.snap
similarity index 53%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap
rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_missing_mock.snap
index c3dac9d..5a484f7 100644
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_missing_mock.snap
@@ -1,7 +1,6 @@
 ---
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1072
-expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
+source: server/packages/sandbox-agent/tests/sessions/questions.rs
+expression: value
 ---
 payload:
   detail: "invalid request: unknown question id: missing-question"
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap
similarity index 51%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap
rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap
index a6fdd2f..d2ed9f3 100644
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap
@@ -1,38 +1,43 @@
 ---
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 1351
-expression: snapshot
+source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
+expression: value
 ---
 session_a:
-  - metadata: true
+  - item:
+      content_types:
+        - text
+      kind: message
+      role: user
+      status: in_progress
     seq: 1
-    session: started
-    source: daemon
-    synthetic: true
-    type: session.started
-  - metadata: true
+    type: item.started
+  - delta:
+      delta: "<redacted>"
+      item_id: "<redacted>"
+      native_item_id: "<redacted>"
     seq: 2
-    session: started
-    source: agent
-    synthetic: false
-    type: session.started
+    type: item.delta
+  - item:
+      content_types:
+        - text
+      kind: message
+      role: user
+      status: completed
+    seq: 3
+    type: item.completed
   - item:
       content_types:
         - text
       kind: message
       role: assistant
       status: in_progress
-    seq: 3
-    source: agent
-    synthetic: false
+    seq: 4
     type: item.started
   - delta:
       delta: "<redacted>"
       item_id: "<redacted>"
       native_item_id: "<redacted>"
-    seq: 4
-    source: agent
-    synthetic: false
+    seq: 5
     type: item.delta
   - item:
       content_types:
@@ -40,40 +45,44 @@ session_a:
       kind: message
       role: assistant
       status: completed
-    seq: 5
-    source: agent
-    synthetic: false
+    seq: 6
     type: item.completed
 session_b:
-  - metadata: true
+  - item:
+      content_types:
+        - text
+      kind: message
+      role: user
+      status: in_progress
     seq: 1
-    session: started
-    source: daemon
-    synthetic: true
-    type: session.started
-  - metadata: true
+    type: item.started
+  - delta:
+      delta: "<redacted>"
+      item_id: "<redacted>"
+      native_item_id: "<redacted>"
     seq: 2
-    session: started
-    source: agent
-    synthetic: false
-    type: session.started
+    type: item.delta
+  - item:
+      content_types:
+        - text
+      kind: message
+      role: user
+      status: completed
+    seq: 3
+    type: item.completed
   - item:
       content_types:
         - text
       kind: message
       role: assistant
       status: in_progress
-    seq: 3
-    source: agent
-    synthetic: false
+    seq: 4
     type: item.started
   - delta:
       delta: "<redacted>"
       item_id: "<redacted>"
       native_item_id: "<redacted>"
-    seq: 4
-    source: agent
-    synthetic: false
+    seq: 5
     type: item.delta
   - item:
       content_types:
@@ -81,7 +90,5 @@ session_b:
       kind: message
       role: assistant
       status: completed
-    seq: 5
-    source: agent
-    synthetic: false
+    seq: 6
     type: item.completed
diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@create_session_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@create_session_mock.snap
new file mode 100644
index 0000000..8a578ee
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@create_session_mock.snap
@@ -0,0 +1,6 @@
+---
+source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
+expression: value
+---
+healthy: true
+nativeSessionId: "<redacted>"
diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@send_message_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@send_message_mock.snap
new file mode 100644
index 0000000..b735d66
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@send_message_mock.snap
@@ -0,0 +1,5 @@
+---
+source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
+expression: value
+---
+status: 204
diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@sessions_list_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@sessions_list_mock.snap
new file mode 100644
index 0000000..fba833a
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@sessions_list_mock.snap
@@ -0,0 +1,6 @@
+---
+source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
+expression: value
+---
+hasExpectedFields: true
+sessionCount: 1
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap
similarity index 50%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap
rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap
index 7ad3222..0a6a9d0 100644
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap
@@ -1,37 +1,42 @@
 ---
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 811
+source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
 expression: normalized
 ---
-- metadata: true
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: in_progress
   seq: 1
-  session: started
-  source: daemon
-  synthetic: true
-  type: session.started
-- metadata: true
+  type: item.started
+- delta:
+    delta: "<redacted>"
+    item_id: "<redacted>"
+    native_item_id: "<redacted>"
   seq: 2
-  session: started
-  source: agent
-  synthetic: false
-  type: session.started
+  type: item.delta
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: completed
+  seq: 3
+  type: item.completed
 - item:
     content_types:
       - text
     kind: message
     role: assistant
     status: in_progress
-  seq: 3
-  source: agent
-  synthetic: false
+  seq: 4
   type: item.started
 - delta:
     delta: "<redacted>"
     item_id: "<redacted>"
     native_item_id: "<redacted>"
-  seq: 4
-  source: agent
-  synthetic: false
+  seq: 5
   type: item.delta
 - item:
     content_types:
@@ -39,7 +44,5 @@ expression: normalized
     kind: message
     role: assistant
     status: completed
-  seq: 5
-  source: agent
-  synthetic: false
+  seq: 6
   type: item.completed
diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap
similarity index 50%
rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap
rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap
index 1686c1e..0a6a9d0 100644
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap
@@ -1,29 +1,42 @@
 ---
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
-assertion_line: 804
+source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
 expression: normalized
 ---
-- metadata: true
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: in_progress
   seq: 1
-  session: started
-  type: session.started
-- metadata: true
+  type: item.started
+- delta:
+    delta: "<redacted>"
+    item_id: "<redacted>"
+    native_item_id: "<redacted>"
   seq: 2
-  session: started
-  type: session.started
+  type: item.delta
+- item:
+    content_types:
+      - text
+    kind: message
+    role: user
+    status: completed
+  seq: 3
+  type: item.completed
 - item:
     content_types:
       - text
     kind: message
     role: assistant
     status: in_progress
-  seq: 3
+  seq: 4
   type: item.started
 - delta:
     delta: "<redacted>"
     item_id: "<redacted>"
     native_item_id: "<redacted>"
-  seq: 4
+  seq: 5
   type: item.delta
 - item:
     content_types:
@@ -31,5 +44,5 @@ expression: normalized
     kind: message
     role: assistant
     status: completed
-  seq: 5
+  seq: 6
   type: item.completed
diff --git a/server/packages/sandbox-agent/tests/sessions/status.rs b/server/packages/sandbox-agent/tests/sessions/status.rs
new file mode 100644
index 0000000..c2e0389
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/sessions/status.rs
@@ -0,0 +1,61 @@
+// Status capability checks are isolated from baseline snapshots.
+include!("../common/http.rs");
+
+fn status_prompt(agent: AgentId) -> &'static str {
+    if agent == AgentId::Mock {
+        "status"
+    } else {
+        "Provide a short status update."
+    }
+}
+
+fn events_have_status(events: &[Value]) -> bool {
+    events.iter().any(|event| event_is_status_item(event))
+        || events_have_content_type(events, "status")
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn status_events_present() {
+    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
+
+    for config in &configs {
+        let app = TestApp::new();
+        let capabilities = fetch_capabilities(&app.app).await;
+        let caps = capabilities
+            .get(config.agent.as_str())
+            .expect("capabilities missing");
+        if !caps.status {
+            continue;
+        }
+
+        let _guard = apply_credentials(&config.credentials);
+        install_agent(&app.app, config.agent).await;
+
+        let session_id = format!("status-{}", config.agent.as_str());
+        create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
+            .await;
+        let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
+        let status = send_status(
+            &app.app,
+            Method::POST,
+            &format!("/v1/sessions/{session_id}/messages"),
+            Some(json!({ "message": status_prompt(config.agent) })),
+        )
+        .await;
+        assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt");
+
+        let events = poll_events_until_match_from(
+            &app.app,
+            &session_id,
+            offset,
+            Duration::from_secs(120),
+            |events| events_have_status(events) || events.iter().any(is_error_event),
+        )
+        .await;
+        assert!(
+            events_have_status(&events),
+            "expected status events for {}",
+            config.agent
+        );
+    }
+}