fix: add docker-setup action, runtime Dockerfile, and align release workflow

- Add .github/actions/docker-setup composite action (from rivet) - Add docker/runtime/Dockerfile for Docker image builds - Update release.yaml to match rivet patterns: - Use corepack enable instead of pnpm/action-setup - Add reuse_engine_version input - Add Docker job with Depot runners - Use --no-frozen-lockfile for pnpm install - Add id-token permission for setup job
2026-04-15 05:02:11 +00:00 · 2026-01-27 19:29:54 -08:00 · 2026-01-27 19:29:54 -08:00 · b49776145b
commit b49776145b
parent f05389307a
82 changed files with 1415 additions and 2430 deletions
--- a/.github/actions/docker-setup/action.yaml
+++ b/.github/actions/docker-setup/action.yaml
@ -0,0 +1,31 @@
 name: 'Docker Setup'
 description: 'Set up Docker Buildx and log in to Docker Hub'
 inputs:
  docker_username:
    description: 'Docker Hub username'
    required: true
  docker_password:
    description: 'Docker Hub password'
    required: true
  github_token:
    description: 'GitHub token'
    required: true
 runs:
  using: 'composite'
  steps:
    - name: Set up Docker Buildx
      uses: docker/setup-buildx-action@v3
    - name: Log in to Docker Hub
      uses: docker/login-action@v3
      with:
        username: ${{ inputs.docker_username }}
        password: ${{ inputs.docker_password }}
    # This will be used as a secret to authenticate with Git repo pulls
    - name: Create .netrc file
      run: |
        echo "machine github.com" > ${{ runner.temp }}/netrc
        echo "login x-access-token" >> ${{ runner.temp }}/netrc
        echo "password ${{ inputs.github_token }}" >> ${{ runner.temp }}/netrc
      shell: bash
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@ -4,14 +4,18 @@ on:
  workflow_dispatch:
    inputs:
      version:
-        description: "Version (e.g. 0.1.0 or v0.1.0)"
+        description: 'Version'
        required: true
        type: string
      latest:
-        description: "Latest"
+        description: 'Latest'
        required: true
        type: boolean
        default: true
      reuse_engine_version:
        description: 'Reuse artifacts from this version (skips building)'
        required: false
        type: string
 defaults:
  run:
@ -27,7 +31,10 @@ jobs:
    name: "Setup"
    runs-on: ubuntu-24.04
    permissions:
      # Allow pushing to GitHub
      contents: write
      # Allows authentication
      id-token: write
    steps:
      - uses: actions/checkout@v4
        with:
@ -35,20 +42,29 @@ jobs:
      - uses: dtolnay/rust-toolchain@stable
      - uses: pnpm/action-setup@v4
      - uses: actions/setup-node@v4
        with:
          node-version: 20
-          cache: pnpm
+
      - run: corepack enable
      - name: Setup
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
          R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
        run: |
          # Configure Git
          git config --global user.name "github-actions[bot]"
          git config --global user.email "github-actions[bot]@users.noreply.github.com"
          # Authenticate with NPM
          cat << EOF > ~/.npmrc
          //registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}
          EOF
          # Install dependencies
-          pnpm install
+          pnpm install --no-frozen-lockfile
          # Install tsx globally
          npm install -g tsx
@ -60,54 +76,57 @@ jobs:
            CMD="$CMD --no-latest"
          fi
          if [ -n "${{ inputs.reuse_engine_version }}" ]; then
            CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\""
          fi
          eval "$CMD"
  binaries:
    name: "Build & Upload Binaries"
    needs: [setup]
    if: ${{ !inputs.reuse_engine_version }}
    strategy:
      matrix:
        include:
          - platform: linux
            runner: depot-ubuntu-24.04-8
            target: x86_64-unknown-linux-musl
            binary_ext: ""
            arch: x86_64
          - platform: windows
            runner: depot-ubuntu-24.04-8
            target: x86_64-pc-windows-gnu
            binary_ext: ".exe"
            arch: x86_64
          - platform: macos
            runner: depot-ubuntu-24.04-8
            target: x86_64-apple-darwin
            binary_ext: ""
            arch: x86_64
          - platform: macos
            runner: depot-ubuntu-24.04-8
            target: aarch64-apple-darwin
            binary_ext: ""
            arch: aarch64
-    runs-on: ubuntu-24.04
+    runs-on: ${{ matrix.runner }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - uses: pnpm/action-setup@v4
      - uses: actions/setup-node@v4
        with:
          node-version: 20
          cache: pnpm
      - name: Build inspector frontend
        run: |
          pnpm install
          SANDBOX_AGENT_SKIP_INSPECTOR=1 pnpm --filter @sandbox-agent/inspector build
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Build binary
        run: |
          # Use Docker BuildKit
          export DOCKER_BUILDKIT=1
          # Build the binary using our Dockerfile
          docker/release/build.sh ${{ matrix.target }}
          # Make sure dist directory exists and binary is there
          ls -la dist/
      - name: Upload to R2
@ -115,10 +134,11 @@ jobs:
          AWS_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
        run: |
-          # Install AWS CLI
+          # Install dependencies for AWS CLI
          sudo apt-get update
          sudo apt-get install -y unzip curl
          # Install AWS CLI
          curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
          unzip awscliv2.zip
          sudo ./aws/install --update
@ -126,7 +146,7 @@ jobs:
          COMMIT_SHA_SHORT="${GITHUB_SHA::7}"
          BINARY_PATH="dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}"
-          # Upload to commit directory for later promotion
+          # Must specify --checksum-algorithm for compatibility with R2
          aws s3 cp \
            "${BINARY_PATH}" \
            "s3://rivet-releases/sandbox-agent/${COMMIT_SHA_SHORT}/binaries/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" \
@ -134,10 +154,48 @@ jobs:
            --endpoint-url https://2a94c6a0ced8d35ea63cddc86c2681e7.r2.cloudflarestorage.com \
            --checksum-algorithm CRC32
  docker:
    name: "Build & Push Docker Images"
    needs: [setup]
    if: ${{ !inputs.reuse_engine_version }}
    strategy:
      matrix:
        include:
          - platform: linux/arm64
            runner: depot-ubuntu-24.04-arm-8
            arch_suffix: -arm64
          - platform: linux/amd64
            runner: depot-ubuntu-24.04-8
            arch_suffix: -amd64
    runs-on: ${{ matrix.runner }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Set outputs
        id: vars
        run: echo "sha_short=${GITHUB_SHA::7}" >> $GITHUB_OUTPUT
      - uses: ./.github/actions/docker-setup
        with:
          docker_username: ${{ secrets.DOCKER_CI_USERNAME }}
          docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }}
          github_token: ${{ secrets.GITHUB_TOKEN }}
      - name: Build & Push
        uses: docker/build-push-action@v4
        with:
          context: .
          push: true
          tags: rivetdev/sandbox-agent:${{ steps.vars.outputs.sha_short }}${{ matrix.arch_suffix }}
          file: docker/runtime/Dockerfile
          platforms: ${{ matrix.platform }}
  complete:
    name: "Complete"
-    needs: [setup, binaries]
+    needs: [setup, docker, binaries]
-    if: ${{ always() && !cancelled() && needs.setup.result == 'success' && needs.binaries.result == 'success' }}
+    if: ${{ always() && !cancelled() && needs.setup.result == 'success' && (needs.docker.result == 'success' || needs.docker.result == 'skipped') && (needs.binaries.result == 'success' || needs.binaries.result == 'skipped') }}
    runs-on: ubuntu-24.04
    steps:
      - uses: actions/checkout@v4
@ -146,17 +204,21 @@ jobs:
      - uses: dtolnay/rust-toolchain@stable
      - uses: pnpm/action-setup@v4
      - uses: actions/setup-node@v4
        with:
          node-version: 20
          registry-url: "https://registry.npmjs.org"
-          cache: pnpm
+
      - run: corepack enable
      - uses: ./.github/actions/docker-setup
        with:
          docker_username: ${{ secrets.DOCKER_CI_USERNAME }}
          docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }}
          github_token: ${{ secrets.GITHUB_TOKEN }}
      - name: Complete
        env:
          # https://cli.github.com/manual/gh_help_environment
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
@ -169,7 +231,7 @@ jobs:
          EOF
          # Install dependencies
-          pnpm install
+          pnpm install --no-frozen-lockfile
          # Install tsx globally
          npm install -g tsx
@ -181,4 +243,8 @@ jobs:
            CMD="$CMD --no-latest"
          fi
          if [ -n "${{ inputs.reuse_engine_version }}" ]; then
            CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\""
          fi
          eval "$CMD"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -5,10 +5,10 @@ members = ["server/packages/*"]
 [workspace.package]
 version = "0.1.0"
 edition = "2021"
-authors = ["Sandbox Agent Contributors"]
+authors = [ "Rivet Gaming, LLC <developer@rivet.gg>" ]
 license = "Apache-2.0"
 repository = "https://github.com/rivet-dev/sandbox-agent"
-description = "Universal agent API for AI coding assistants"
+description = "Universal API for automatic coding agents in sandboxes. Supprots Claude Code, Codex, OpenCode, and Amp."
 [workspace.dependencies]
 # Internal crates
--- a/docker/runtime/Dockerfile
+++ b/docker/runtime/Dockerfile
@ -0,0 +1,51 @@
 # syntax=docker/dockerfile:1.10.0
 # Build stage - compile the binary
 FROM rust:1.88.0 AS builder
 ENV DEBIAN_FRONTEND=noninteractive
 RUN apt-get update && apt-get install -y \
    musl-tools \
    musl-dev \
    pkg-config \
    ca-certificates \
    git && \
    apt-get clean && \
    rm -rf /var/lib/apt/lists/*
 RUN rustup target add x86_64-unknown-linux-musl
 WORKDIR /build
 COPY . .
 # Build static binary
 RUN --mount=type=cache,target=/usr/local/cargo/registry \
    --mount=type=cache,target=/usr/local/cargo/git \
    --mount=type=cache,target=/build/target \
    SANDBOX_AGENT_SKIP_INSPECTOR=1 \
    RUSTFLAGS="-C target-feature=+crt-static" \
    cargo build -p sandbox-agent --release --target x86_64-unknown-linux-musl && \
    cp target/x86_64-unknown-linux-musl/release/sandbox-agent /sandbox-agent
 # Runtime stage - minimal image
 FROM debian:bookworm-slim
 RUN apt-get update && apt-get install -y \
    ca-certificates \
    curl \
    git && \
    rm -rf /var/lib/apt/lists/*
 # Copy the binary from builder
 COPY --from=builder /sandbox-agent /usr/local/bin/sandbox-agent
 RUN chmod +x /usr/local/bin/sandbox-agent
 # Create non-root user
 RUN useradd -m -s /bin/bash sandbox
 USER sandbox
 WORKDIR /home/sandbox
 EXPOSE 2468
 ENTRYPOINT ["sandbox-agent"]
 CMD ["--host", "0.0.0.0", "--port", "2468"]
--- a/docs/building-chat-ui.mdx
+++ b/docs/building-chat-ui.mdx
@ -21,6 +21,7 @@ Capabilities tell you which features are supported for the selected agent:
 - `tool_calls` and `tool_results` indicate tool execution events.
 - `questions` and `permissions` indicate HITL flows.
 - `plan_mode` indicates that the agent supports plan-only execution.
 - `reasoning` and `status` indicate that the agent can emit reasoning/status content parts.
 Use these to enable or disable UI affordances (tool panels, approval buttons, etc.).
--- a/docs/openapi.json
+++ b/docs/openapi.json
@ -4,7 +4,8 @@
    "title": "sandbox-agent",
    "description": "",
    "contact": {
-      "name": "Sandbox Agent Contributors"
+      "name": "Rivet Gaming, LLC",
      "email": "developer@rivet.gg"
    },
    "license": {
      "name": "Apache-2.0"
@ -662,6 +663,7 @@
          "sessionLifecycle",
          "errorEvents",
          "reasoning",
          "status",
          "commandExecution",
          "fileChanges",
          "mcpTools",
@ -706,6 +708,9 @@
            "type": "boolean",
            "description": "Whether this agent uses a shared long-running server process (vs per-turn subprocess)"
          },
          "status": {
            "type": "boolean"
          },
          "streamingDeltas": {
            "type": "boolean"
          },
--- a/examples/daytona/daytona.ts
+++ b/examples/daytona/daytona.ts
@ -2,6 +2,7 @@ import { Daytona } from "@daytonaio/sdk";
 import { pathToFileURL } from "node:url";
 import {
  ensureUrl,
  logInspectorUrl,
  runPrompt,
  waitForHealth,
 } from "../shared/sandbox-agent-client.ts";
@ -39,6 +40,7 @@ export async function setupDaytonaSandboxAgent(): Promise<{
  const baseUrl = ensureUrl(preview.url);
  await waitForHealth({ baseUrl, token, extraHeaders });
  logInspectorUrl({ baseUrl, token });
  const cleanup = async () => {
    try {
--- a/examples/docker/docker.ts
+++ b/examples/docker/docker.ts
@ -2,6 +2,7 @@ import Docker from "dockerode";
 import { pathToFileURL } from "node:url";
 import {
  ensureUrl,
  logInspectorUrl,
  runPrompt,
  waitForHealth,
 } from "../shared/sandbox-agent-client.ts";
@ -83,6 +84,7 @@ export async function setupDockerSandboxAgent(): Promise<{
  const baseUrl = ensureUrl(`http://127.0.0.1:${hostPort}`);
  await waitForHealth({ baseUrl, token });
  logInspectorUrl({ baseUrl, token });
  const cleanup = async () => {
    try {
--- a/examples/e2b/e2b.ts
+++ b/examples/e2b/e2b.ts
@ -2,6 +2,7 @@ import { Sandbox } from "@e2b/code-interpreter";
 import { pathToFileURL } from "node:url";
 import {
  ensureUrl,
  logInspectorUrl,
  runPrompt,
  waitForHealth,
 } from "../shared/sandbox-agent-client.ts";
@ -45,6 +46,7 @@ export async function setupE2BSandboxAgent(): Promise<{
  const baseUrl = ensureUrl(sandbox.getHost(port));
  await waitForHealth({ baseUrl, token });
  logInspectorUrl({ baseUrl, token });
  const cleanup = async () => {
    try {
--- a/examples/shared/sandbox-agent-client.ts
+++ b/examples/shared/sandbox-agent-client.ts
@ -16,6 +16,27 @@ export function ensureUrl(rawUrl: string): string {
  return `https://${rawUrl}`;
 }
 const INSPECTOR_URL = "https://inspect.sandboxagent.dev";
 export function buildInspectorUrl({
  baseUrl,
  token,
 }: {
  baseUrl: string;
  token?: string;
 }): string {
  const normalized = normalizeBaseUrl(ensureUrl(baseUrl));
  const params = new URLSearchParams({ url: normalized });
  if (token) {
    params.set("token", token);
  }
  return `${INSPECTOR_URL}?${params.toString()}`;
 }
 export function logInspectorUrl({ baseUrl, token }: { baseUrl: string; token?: string }): void {
  console.log(`Inspector: ${buildInspectorUrl({ baseUrl, token })}`);
 }
 type HeaderOptions = {
  token?: string;
  extraHeaders?: Record<string, string>;
--- a/examples/vercel/vercel-sandbox.ts
+++ b/examples/vercel/vercel-sandbox.ts
@ -2,6 +2,7 @@ import { Sandbox } from "@vercel/sandbox";
 import { pathToFileURL } from "node:url";
 import {
  ensureUrl,
  logInspectorUrl,
  runPrompt,
  waitForHealth,
 } from "../shared/sandbox-agent-client.ts";
@ -61,6 +62,7 @@ export async function setupVercelSandboxAgent(): Promise<{
  const baseUrl = ensureUrl(sandbox.domain(port));
  await waitForHealth({ baseUrl, token });
  logInspectorUrl({ baseUrl, token });
  const cleanup = async () => {
    try {
--- a/server/CLAUDE.md
+++ b/server/CLAUDE.md
@ -10,18 +10,23 @@ Place all new tests under `server/packages/**/tests/` (or a package-specific `te
  - Agent flow coverage in `agent-flows/`
  - Agent management coverage in `agent-management/`
  - Shared server manager coverage in `server-manager/`
-  - HTTP/SSE and snapshot coverage in `http/` (snapshots in `http/snapshots/`)
+  - HTTP endpoint snapshots in `http/` (snapshots in `http/snapshots/`)
  - Session capability snapshots in `sessions/` (one file per capability, e.g. `session_lifecycle.rs`, `permissions.rs`, `questions.rs`, `reasoning.rs`, `status.rs`; snapshots in `sessions/snapshots/`)
  - UI coverage in `ui/`
  - Shared helpers in `common/`
 - Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/`
 ## Snapshot tests
-The HTTP/SSE snapshot suite entrypoint lives in:
+HTTP endpoint snapshot entrypoint:
- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` (includes `tests/http/http_sse_snapshots.rs`)
+- `server/packages/sandbox-agent/tests/http_endpoints.rs`
 Session snapshot entrypoint:
 - `server/packages/sandbox-agent/tests/sessions.rs`
 Snapshots are written to:
- `server/packages/sandbox-agent/tests/http/snapshots/`
+- `server/packages/sandbox-agent/tests/http/snapshots/` (HTTP endpoint snapshots)
 - `server/packages/sandbox-agent/tests/sessions/snapshots/` (session/capability snapshots)
 ## Agent selection
@ -71,6 +76,7 @@ To keep snapshots deterministic:
  - IDs, timestamps, native IDs
  - text content, tool inputs/outputs, provider-specific metadata
  - `source` and `synthetic` flags (these are implementation details)
 - Scrub `reasoning` and `status` content from session-baseline snapshots to keep the core event skeleton consistent across agents; validate those content types separately in their capability-specific tests.
 - The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly.
 - Event streams are truncated after the first assistant or error event.
 - Permission flow snapshots are truncated after the permission request (or first assistant) event.
@ -81,14 +87,19 @@ To keep snapshots deterministic:
 ## Typical commands
-Run only Claude snapshots:
+Run only Claude session snapshots:
 ```
-SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test http_sse_snapshots
+SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test sessions
 ```
-Run all detected agents:
+Run all detected session snapshots:
 ```
-cargo test -p sandbox-agent --test http_sse_snapshots
+cargo test -p sandbox-agent --test sessions
 ```
 Run HTTP endpoint snapshots:
 ```
 cargo test -p sandbox-agent --test http_endpoints
 ```
 ## Universal Schema
--- a/server/packages/sandbox-agent/src/router.rs
+++ b/server/packages/sandbox-agent/src/router.rs
@ -2913,6 +2913,7 @@ pub struct AgentCapabilities {
    pub session_lifecycle: bool,
    pub error_events: bool,
    pub reasoning: bool,
    pub status: bool,
    pub command_execution: bool,
    pub file_changes: bool,
    pub mcp_tools: bool,
@ -3512,6 +3513,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
            session_lifecycle: false,
            error_events: false,
            reasoning: false,
            status: false,
            command_execution: false,
            file_changes: false,
            mcp_tools: false,
@ -3530,6 +3532,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
            session_lifecycle: true,
            error_events: true,
            reasoning: true,
            status: true,
            command_execution: true,
            file_changes: true,
            mcp_tools: true,
@ -3548,6 +3551,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
            session_lifecycle: true,
            error_events: true,
            reasoning: false,
            status: true,
            command_execution: false,
            file_changes: false,
            mcp_tools: false,
@ -3566,6 +3570,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
            session_lifecycle: false,
            error_events: true,
            reasoning: false,
            status: false,
            command_execution: false,
            file_changes: false,
            mcp_tools: false,
@ -3584,6 +3589,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
            session_lifecycle: true,
            error_events: true,
            reasoning: true,
            status: true,
            command_execution: true,
            file_changes: true,
            mcp_tools: true,
--- a/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
+++ b/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
@ -1,4 +1,4 @@
-use std::collections::{BTreeMap, HashMap};
+use std::collections::{BTreeMap, HashMap, HashSet};
 use std::time::{Duration, Instant};
 use axum::body::{Body, Bytes};
@ -208,49 +208,65 @@ async fn send_message(app: &Router, session_id: &str) {
    assert_eq!(status, StatusCode::NO_CONTENT, "send message");
 }
-async fn poll_events_until(
+async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec<Value>, u64) {
-    app: &Router,
+    let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
-    session_id: &str,
+    let (status, payload) = send_json(app, Method::GET, &path, None).await;
-    timeout: Duration,
+    assert_eq!(status, StatusCode::OK, "poll events");
-) -> Vec<Value> {
+    let new_events = payload
-    let start = Instant::now();
+        .get("events")
-    let mut offset = 0u64;
+        .and_then(Value::as_array)
-    let mut events = Vec::new();
+        .cloned()
-    while start.elapsed() < timeout {
+        .unwrap_or_default();
-        let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
+    let new_offset = new_events
-        let (status, payload) = send_json(app, Method::GET, &path, None).await;
+        .last()
-        assert_eq!(status, StatusCode::OK, "poll events");
+        .and_then(|event| event.get("sequence"))
-        let new_events = payload
+        .and_then(Value::as_u64)
-            .get("events")
+        .unwrap_or(offset);
-            .and_then(Value::as_array)
+    (new_events, new_offset)
            .cloned()
            .unwrap_or_default();
        if !new_events.is_empty() {
            if let Some(last) = new_events
                .last()
                .and_then(|event| event.get("sequence"))
                .and_then(Value::as_u64)
            {
                offset = last;
            }
            events.extend(new_events);
            if should_stop(&events) {
                break;
            }
        }
        tokio::time::sleep(Duration::from_millis(800)).await;
    }
    events
 }
-async fn read_sse_events(
+async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 {
    let start = Instant::now();
    let mut offset = 0u64;
    loop {
        if start.elapsed() >= timeout {
            break;
        }
        let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
        if new_events.is_empty() {
            if offset == 0 {
                tokio::time::sleep(Duration::from_millis(200)).await;
                continue;
            }
            break;
        }
        offset = new_offset;
    }
    offset
 }
 async fn poll_events_until_from(
    app: &Router,
    session_id: &str,
    offset: u64,
    timeout: Duration,
 ) -> Vec<Value> {
    poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await
 }
 async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
    poll_events_until_from(app, session_id, 0, timeout).await
 }
 async fn read_sse_events_from(
    app: &Router,
    session_id: &str,
    offset: u64,
    timeout: Duration,
 ) -> Vec<Value> {
    let request = Request::builder()
        .method(Method::GET)
-        .uri(format!("/v1/sessions/{session_id}/events/sse?offset=0"))
+        .uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}"))
        .body(Body::empty())
        .expect("sse request");
    let response = app
@ -291,6 +307,10 @@ async fn read_sse_events(
    events
 }
 async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
    read_sse_events_from(app, session_id, 0, timeout).await
 }
 async fn read_turn_stream_events(
    app: &Router,
    session_id: &str,
@ -431,7 +451,8 @@ fn normalize_events(events: &[Value]) -> Value {
        !events.iter().any(is_unparsed_event),
        "agent.unparsed event encountered"
    );
-    let normalized = events
+    let scrubbed = scrub_events(events);
    let normalized = scrubbed
        .iter()
        .enumerate()
        .map(|(idx, event)| normalize_event(event, idx + 1))
@ -439,6 +460,71 @@ fn normalize_events(events: &[Value]) -> Value {
    Value::Array(normalized)
 }
 fn scrub_events(events: &[Value]) -> Vec<Value> {
    let mut scrub_ids = HashSet::new();
    let mut output = Vec::new();
    for event in events {
        let event_type = event.get("type").and_then(Value::as_str).unwrap_or("");
        match event_type {
            "item.started" | "item.completed" => {
                if let Some(item) = event.get("data").and_then(|data| data.get("item")) {
                    if should_scrub_item(item) {
                        record_item_ids(item, &mut scrub_ids);
                        continue;
                    }
                }
                output.push(event.clone());
            }
            "item.delta" => {
                let item_id = event
                    .get("data")
                    .and_then(|data| data.get("item_id"))
                    .and_then(Value::as_str);
                let native_item_id = event
                    .get("data")
                    .and_then(|data| data.get("native_item_id"))
                    .and_then(Value::as_str);
                if item_id.is_some_and(|id| scrub_ids.contains(id))
                    || native_item_id.is_some_and(|id| scrub_ids.contains(id))
                {
                    continue;
                }
                output.push(event.clone());
            }
            _ => output.push(event.clone()),
        }
    }
    output
 }
 fn should_scrub_item(item: &Value) -> bool {
    if item
        .get("kind")
        .and_then(Value::as_str)
        .is_some_and(|kind| kind == "status")
    {
        return true;
    }
    let types = item_content_types(item);
    let filtered = types
        .iter()
        .filter(|value| value.as_str() != "reasoning" && value.as_str() != "status")
        .collect::<Vec<_>>();
    types.iter().any(|value| value == "reasoning") && filtered.is_empty()
 }
 fn record_item_ids(item: &Value, ids: &mut HashSet<String>) {
    if let Some(id) = item.get("item_id").and_then(Value::as_str) {
        ids.insert(id.to_string());
    }
    if let Some(id) = item.get("native_item_id").and_then(Value::as_str) {
        ids.insert(id.to_string());
    }
 }
 fn truncate_after_first_stop(events: &[Value]) -> Vec<Value> {
    if let Some(idx) = events
        .iter()
@ -455,12 +541,6 @@ fn normalize_event(event: &Value, seq: usize) -> Value {
    if let Some(event_type) = event.get("type").and_then(Value::as_str) {
        map.insert("type".to_string(), Value::String(event_type.to_string()));
    }
    if let Some(source) = event.get("source").and_then(Value::as_str) {
        map.insert("source".to_string(), Value::String(source.to_string()));
    }
    if let Some(synthetic) = event.get("synthetic").and_then(Value::as_bool) {
        map.insert("synthetic".to_string(), Value::Bool(synthetic));
    }
    let data = event.get("data").unwrap_or(&Value::Null);
    match event.get("type").and_then(Value::as_str).unwrap_or("") {
        "session.started" => {
@ -523,6 +603,7 @@ fn normalize_item(item: &Value) -> Value {
        let types = content
            .iter()
            .filter_map(|part| part.get("type").and_then(Value::as_str))
            .filter(|value| *value != "reasoning" && *value != "status")
            .map(|value| Value::String(value.to_string()))
            .collect::<Vec<_>>();
        map.insert("content_types".to_string(), Value::Array(types));
@ -530,6 +611,42 @@ fn normalize_item(item: &Value) -> Value {
    Value::Object(map)
 }
 fn item_content_types(item: &Value) -> Vec<String> {
    item.get("content")
        .and_then(Value::as_array)
        .map(|content| {
            content
                .iter()
                .filter_map(|part| part.get("type").and_then(Value::as_str))
                .map(|value| value.to_string())
                .collect::<Vec<_>>()
        })
        .unwrap_or_default()
 }
 fn event_content_types(event: &Value) -> Vec<String> {
    event
        .get("data")
        .and_then(|data| data.get("item"))
        .map(item_content_types)
        .unwrap_or_default()
 }
 fn event_is_status_item(event: &Value) -> bool {
    event
        .get("data")
        .and_then(|data| data.get("item"))
        .and_then(|item| item.get("kind"))
        .and_then(Value::as_str)
        .is_some_and(|kind| kind == "status")
 }
 fn events_have_content_type(events: &[Value], content_type: &str) -> bool {
    events
        .iter()
        .any(|event| event_content_types(event).iter().any(|t| t == content_type))
 }
 fn normalize_session_end(data: &Value) -> Value {
    let mut map = Map::new();
    if let Some(reason) = data.get("reason").and_then(Value::as_str) {
@ -717,6 +834,33 @@ fn snapshot_name(prefix: &str, agent: Option<AgentId>) -> String {
 }
 async fn poll_events_until_match_from<F>(
    app: &Router,
    session_id: &str,
    offset: u64,
    timeout: Duration,
    stop: F,
 ) -> Vec<Value>
 where
    F: Fn(&[Value]) -> bool,
 {
    let start = Instant::now();
    let mut offset = offset;
    let mut events = Vec::new();
    while start.elapsed() < timeout {
        let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
        if !new_events.is_empty() {
            offset = new_offset;
            events.extend(new_events);
            if stop(&events) {
                break;
            }
        }
        tokio::time::sleep(Duration::from_millis(800)).await;
    }
    events
 }
 async fn poll_events_until_match<F>(
    app: &Router,
    session_id: &str,
@ -726,34 +870,7 @@ async fn poll_events_until_match<F>(
 where
    F: Fn(&[Value]) -> bool,
 {
-    let start = Instant::now();
+    poll_events_until_match_from(app, session_id, 0, timeout, stop).await
    let mut offset = 0u64;
    let mut events = Vec::new();
    while start.elapsed() < timeout {
        let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
        let (status, payload) = send_json(app, Method::GET, &path, None).await;
        assert_eq!(status, StatusCode::OK, "poll events");
        let new_events = payload
            .get("events")
            .and_then(Value::as_array)
            .cloned()
            .unwrap_or_default();
        if !new_events.is_empty() {
            if let Some(last) = new_events
                .last()
                .and_then(|event| event.get("sequence"))
                .and_then(Value::as_u64)
            {
                offset = last;
            }
            events.extend(new_events);
            if stop(&events) {
                break;
            }
        }
        tokio::time::sleep(Duration::from_millis(800)).await;
    }
    events
 }
 fn find_permission_id(events: &[Value]) -> Option<String> {
@ -800,9 +917,10 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
    let session_id = format!("session-{}", config.agent.as_str());
    create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
    let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
    send_message(app, &session_id).await;
-    let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await;
+    let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await;
    let events = truncate_after_first_stop(&events);
    assert!(
        !events.is_empty(),
@ -816,7 +934,8 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
    );
    let normalized = normalize_events(&events);
    insta::with_settings!({
-        snapshot_suffix => snapshot_name("http_events", Some(config.agent)),
+        snapshot_suffix => snapshot_name("http_events", Some(AgentId::Mock)),
        snapshot_path => "../sessions/snapshots",
    }, {
        insta::assert_yaml_snapshot!(normalized);
    });
@ -828,12 +947,14 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
    let session_id = format!("sse-{}", config.agent.as_str());
    create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
    let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
    let sse_task = {
        let app = app.clone();
        let session_id = session_id.clone();
        let offset = offset;
        tokio::spawn(async move {
-            read_sse_events(&app, &session_id, Duration::from_secs(120)).await
+            read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await
        })
    };
@ -853,7 +974,8 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
    );
    let normalized = normalize_events(&events);
    insta::with_settings!({
-        snapshot_suffix => snapshot_name("sse_events", Some(config.agent)),
+        snapshot_suffix => snapshot_name("sse_events", Some(AgentId::Mock)),
        snapshot_path => "../sessions/snapshots",
    }, {
        insta::assert_yaml_snapshot!(normalized);
    });
@ -879,535 +1001,3 @@ async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) {
        config.agent
    );
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auth_snapshots() {
    let token = "test-token";
    let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
    let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
    assert_eq!(status, StatusCode::OK, "health should be public");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("auth_health_public", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "status": status.as_u16(),
            "payload": normalize_health(&payload),
        }));
    });
    let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
    assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("auth_missing_token", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "status": status.as_u16(),
            "payload": payload,
        }));
    });
    let request = Request::builder()
        .method(Method::GET)
        .uri("/v1/agents")
        .header(header::AUTHORIZATION, "Bearer wrong-token")
        .body(Body::empty())
        .expect("auth invalid request");
    let (status, _headers, payload) = send_json_request(&app.app, request).await;
    assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("auth_invalid_token", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "status": status.as_u16(),
            "payload": payload,
        }));
    });
    let request = Request::builder()
        .method(Method::GET)
        .uri("/v1/agents")
        .header(header::AUTHORIZATION, format!("Bearer {token}"))
        .body(Body::empty())
        .expect("auth valid request");
    let (status, _headers, payload) = send_json_request(&app.app, request).await;
    assert_eq!(status, StatusCode::OK, "valid token should allow request");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("auth_valid_token", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "status": status.as_u16(),
            "payload": normalize_agent_list(&payload),
        }));
    });
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn cors_snapshots() {
    let cors = CorsLayer::new()
        .allow_origin(vec![HeaderValue::from_static("http://example.com")])
        .allow_methods([Method::GET, Method::POST])
        .allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION])
        .allow_credentials(true);
    let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
    let preflight = Request::builder()
        .method(Method::OPTIONS)
        .uri("/v1/health")
        .header(header::ORIGIN, "http://example.com")
        .header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
        .header(
            header::ACCESS_CONTROL_REQUEST_HEADERS,
            "authorization,content-type",
        )
        .body(Body::empty())
        .expect("cors preflight request");
    let (status, headers, _payload) = send_request(&app.app, preflight).await;
    insta::with_settings!({
        snapshot_suffix => snapshot_name("cors_preflight", None),
    }, {
        insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
    });
    let actual = Request::builder()
        .method(Method::GET)
        .uri("/v1/health")
        .header(header::ORIGIN, "http://example.com")
        .body(Body::empty())
        .expect("cors actual request");
    let (status, headers, payload) = send_json_request(&app.app, actual).await;
    assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("cors_actual", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "cors": snapshot_cors(status, &headers),
            "payload": normalize_health(&payload),
        }));
    });
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn api_endpoints_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    let app = TestApp::new();
    let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
    assert_eq!(status, StatusCode::OK, "health status");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("health", None),
    }, {
        insta::assert_yaml_snapshot!(normalize_health(&health));
    });
    // List agents (just verify the API returns correct agent IDs, not install state)
    let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
    assert_eq!(status, StatusCode::OK, "agents list");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("agents_list", None),
    }, {
        insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
    });
    // Install agents (ensure they're available for subsequent tests)
    for config in &configs {
        let _guard = apply_credentials(&config.credentials);
        let status = send_status(
            &app.app,
            Method::POST,
            &format!("/v1/agents/{}/install", config.agent.as_str()),
            Some(json!({})),
        )
        .await;
        assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
        insta::with_settings!({
            snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
        }, {
            insta::assert_yaml_snapshot!(snapshot_status(status));
        });
    }
    let mut session_ids = Vec::new();
    for config in &configs {
        let _guard = apply_credentials(&config.credentials);
        let (status, modes) = send_json(
            &app.app,
            Method::GET,
            &format!("/v1/agents/{}/modes", config.agent.as_str()),
            None,
        )
        .await;
        assert_eq!(status, StatusCode::OK, "agent modes");
        insta::with_settings!({
            snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
        }, {
            insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
        });
        let session_id = format!("snapshot-{}", config.agent.as_str());
        let permission_mode = test_permission_mode(config.agent);
        let (status, created) = send_json(
            &app.app,
            Method::POST,
            &format!("/v1/sessions/{session_id}"),
            Some(json!({
                "agent": config.agent.as_str(),
                "permissionMode": permission_mode
            })),
        )
        .await;
        assert_eq!(status, StatusCode::OK, "create session");
        insta::with_settings!({
            snapshot_suffix => snapshot_name("create_session", Some(config.agent)),
        }, {
            insta::assert_yaml_snapshot!(normalize_create_session(&created));
        });
        session_ids.push((config.agent, session_id));
    }
    let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
    assert_eq!(status, StatusCode::OK, "list sessions");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("sessions_list", None),
    }, {
        insta::assert_yaml_snapshot!(normalize_sessions(&sessions));
    });
    for (agent, session_id) in &session_ids {
        let status = send_status(
            &app.app,
            Method::POST,
            &format!("/v1/sessions/{session_id}/messages"),
            Some(json!({ "message": PROMPT })),
        )
        .await;
        assert_eq!(status, StatusCode::NO_CONTENT, "send message");
        insta::with_settings!({
            snapshot_suffix => snapshot_name("send_message", Some(*agent)),
        }, {
            insta::assert_yaml_snapshot!(snapshot_status(status));
        });
    }
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn approval_flow_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    let app = TestApp::new();
    let capabilities = fetch_capabilities(&app.app).await;
    for config in &configs {
        // OpenCode doesn't support "plan" permission mode required for approval flows
        if config.agent == AgentId::Opencode {
            continue;
        }
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        let _guard = apply_credentials(&config.credentials);
        install_agent(&app.app, config.agent).await;
        if caps.plan_mode && caps.permissions {
            let permission_session = format!("perm-{}", config.agent.as_str());
            create_session(&app.app, config.agent, &permission_session, "plan").await;
            let status = send_status(
                &app.app,
                Method::POST,
                &format!("/v1/sessions/{permission_session}/messages"),
                Some(json!({ "message": PERMISSION_PROMPT })),
            )
            .await;
            assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
            let permission_events = poll_events_until_match(
                &app.app,
                &permission_session,
                Duration::from_secs(120),
                |events| find_permission_id(events).is_some() || should_stop(events),
            )
            .await;
            let permission_events = truncate_permission_events(&permission_events);
            insta::with_settings!({
                snapshot_suffix => snapshot_name("permission_events", Some(config.agent)),
            }, {
                insta::assert_yaml_snapshot!(normalize_events(&permission_events));
            });
            if let Some(permission_id) = find_permission_id(&permission_events) {
                let status = send_status(
                    &app.app,
                    Method::POST,
                    &format!(
                        "/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
                    ),
                    Some(json!({ "reply": "once" })),
                )
                .await;
                assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
                insta::with_settings!({
                    snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)),
                }, {
                    insta::assert_yaml_snapshot!(snapshot_status(status));
                });
            } else {
                let (status, payload) = send_json(
                    &app.app,
                    Method::POST,
                    &format!(
                        "/v1/sessions/{permission_session}/permissions/missing-permission/reply"
                    ),
                    Some(json!({ "reply": "once" })),
                )
                .await;
                assert!(!status.is_success(), "missing permission id should error");
                insta::with_settings!({
                    snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)),
                }, {
                    insta::assert_yaml_snapshot!(json!({
                        "status": status.as_u16(),
                        "payload": payload,
                    }));
                });
            }
        }
        if caps.questions {
            let question_reply_session = format!("question-reply-{}", config.agent.as_str());
            create_session(&app.app, config.agent, &question_reply_session, "plan").await;
            let status = send_status(
                &app.app,
                Method::POST,
                &format!("/v1/sessions/{question_reply_session}/messages"),
                Some(json!({ "message": QUESTION_PROMPT })),
            )
            .await;
            assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
            let question_events = poll_events_until_match(
                &app.app,
                &question_reply_session,
                Duration::from_secs(120),
                |events| find_question_id_and_answers(events).is_some() || should_stop(events),
            )
            .await;
            let question_events = truncate_question_events(&question_events);
            insta::with_settings!({
                snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)),
            }, {
                insta::assert_yaml_snapshot!(normalize_events(&question_events));
            });
            if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
                let status = send_status(
                    &app.app,
                    Method::POST,
                    &format!(
                        "/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
                    ),
                    Some(json!({ "answers": answers })),
                )
                .await;
                assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
                insta::with_settings!({
                    snapshot_suffix => snapshot_name("question_reply", Some(config.agent)),
                }, {
                    insta::assert_yaml_snapshot!(snapshot_status(status));
                });
            } else {
                let (status, payload) = send_json(
                    &app.app,
                    Method::POST,
                    &format!(
                        "/v1/sessions/{question_reply_session}/questions/missing-question/reply"
                    ),
                    Some(json!({ "answers": [] })),
                )
                .await;
                assert!(!status.is_success(), "missing question id should error");
                insta::with_settings!({
                    snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)),
                }, {
                    insta::assert_yaml_snapshot!(json!({
                        "status": status.as_u16(),
                        "payload": payload,
                    }));
                });
            }
            let question_reject_session = format!("question-reject-{}", config.agent.as_str());
            create_session(&app.app, config.agent, &question_reject_session, "plan").await;
            let status = send_status(
                &app.app,
                Method::POST,
                &format!("/v1/sessions/{question_reject_session}/messages"),
                Some(json!({ "message": QUESTION_PROMPT })),
            )
            .await;
            assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
            let reject_events = poll_events_until_match(
                &app.app,
                &question_reject_session,
                Duration::from_secs(120),
                |events| find_question_id_and_answers(events).is_some() || should_stop(events),
            )
            .await;
            let reject_events = truncate_question_events(&reject_events);
            insta::with_settings!({
                snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)),
            }, {
                insta::assert_yaml_snapshot!(normalize_events(&reject_events));
            });
            if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
                let status = send_status(
                    &app.app,
                    Method::POST,
                    &format!(
                        "/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
                    ),
                    None,
                )
                .await;
                assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
                insta::with_settings!({
                    snapshot_suffix => snapshot_name("question_reject", Some(config.agent)),
                }, {
                    insta::assert_yaml_snapshot!(snapshot_status(status));
                });
            } else {
                let (status, payload) = send_json(
                    &app.app,
                    Method::POST,
                    &format!(
                        "/v1/sessions/{question_reject_session}/questions/missing-question/reject"
                    ),
                    None,
                )
                .await;
                assert!(!status.is_success(), "missing question id reject should error");
                insta::with_settings!({
                    snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)),
                }, {
                    insta::assert_yaml_snapshot!(json!({
                        "status": status.as_u16(),
                        "payload": payload,
                    }));
                });
            }
        }
    }
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn http_events_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    let app = TestApp::new();
    for config in &configs {
        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
        // See: https://github.com/opencode-ai/opencode/issues/XXX
        if config.agent == AgentId::Opencode {
            continue;
        }
        run_http_events_snapshot(&app.app, config).await;
    }
 }
 async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
    let _guard = apply_credentials(&config.credentials);
    install_agent(app, config.agent).await;
    let session_a = format!("concurrent-a-{}", config.agent.as_str());
    let session_b = format!("concurrent-b-{}", config.agent.as_str());
    let perm_mode = test_permission_mode(config.agent);
    create_session(app, config.agent, &session_a, perm_mode).await;
    create_session(app, config.agent, &session_b, perm_mode).await;
    let app_a = app.clone();
    let app_b = app.clone();
    let send_a = send_message(&app_a, &session_a);
    let send_b = send_message(&app_b, &session_b);
    tokio::join!(send_a, send_b);
    let app_a = app.clone();
    let app_b = app.clone();
    let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120));
    let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120));
    let (events_a, events_b) = tokio::join!(poll_a, poll_b);
    let events_a = truncate_after_first_stop(&events_a);
    let events_b = truncate_after_first_stop(&events_b);
    assert!(
        !events_a.is_empty(),
        "no events collected for concurrent session a {}",
        config.agent
    );
    assert!(
        !events_b.is_empty(),
        "no events collected for concurrent session b {}",
        config.agent
    );
    assert!(
        should_stop(&events_a),
        "timed out waiting for assistant/error event for concurrent session a {}",
        config.agent
    );
    assert!(
        should_stop(&events_b),
        "timed out waiting for assistant/error event for concurrent session b {}",
        config.agent
    );
    let snapshot = json!({
        "session_a": normalize_events(&events_a),
        "session_b": normalize_events(&events_b),
    });
    insta::with_settings!({
        snapshot_suffix => snapshot_name("concurrency_events", Some(config.agent)),
    }, {
        insta::assert_yaml_snapshot!(snapshot);
    });
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn sse_events_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    let app = TestApp::new();
    for config in &configs {
        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
        // See: https://github.com/opencode-ai/opencode/issues/XXX
        if config.agent == AgentId::Opencode {
            continue;
        }
        run_sse_events_snapshot(&app.app, config).await;
    }
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn turn_stream_route() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    let app = TestApp::new();
    for config in &configs {
        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
        // See: https://github.com/opencode-ai/opencode/issues/XXX
        if config.agent == AgentId::Opencode {
            continue;
        }
        run_turn_stream_check(&app.app, config).await;
    }
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn concurrency_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    let app = TestApp::new();
    for config in &configs {
        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
        // See: https://github.com/opencode-ai/opencode/issues/XXX
        if config.agent == AgentId::Opencode {
            continue;
        }
        run_concurrency_snapshot(&app.app, config).await;
    }
 }
--- a/server/packages/sandbox-agent/tests/http/agent_endpoints.rs
+++ b/server/packages/sandbox-agent/tests/http/agent_endpoints.rs
@ -0,0 +1,165 @@
 // Agent-specific HTTP endpoints live here; session-related snapshots are in tests/sessions/.
 include!("../common/http.rs");
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auth_snapshots() {
    let token = "test-token";
    let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
    let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
    assert_eq!(status, StatusCode::OK, "health should be public");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("auth_health_public", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "status": status.as_u16(),
            "payload": normalize_health(&payload),
        }));
    });
    let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
    assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("auth_missing_token", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "status": status.as_u16(),
            "payload": payload,
        }));
    });
    let request = Request::builder()
        .method(Method::GET)
        .uri("/v1/agents")
        .header(header::AUTHORIZATION, "Bearer wrong-token")
        .body(Body::empty())
        .expect("auth invalid request");
    let (status, _headers, payload) = send_json_request(&app.app, request).await;
    assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("auth_invalid_token", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "status": status.as_u16(),
            "payload": payload,
        }));
    });
    let request = Request::builder()
        .method(Method::GET)
        .uri("/v1/agents")
        .header(header::AUTHORIZATION, format!("Bearer {token}"))
        .body(Body::empty())
        .expect("auth valid request");
    let (status, _headers, payload) = send_json_request(&app.app, request).await;
    assert_eq!(status, StatusCode::OK, "valid token should succeed");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("auth_valid_token", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "status": status.as_u16(),
            "payload": normalize_agent_list(&payload),
        }));
    });
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn cors_snapshots() {
    let cors = CorsLayer::new()
        .allow_origin("http://example.com".parse::<HeaderValue>().unwrap())
        .allow_methods([Method::GET, Method::POST])
        .allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION]);
    let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
    let preflight = Request::builder()
        .method(Method::OPTIONS)
        .uri("/v1/agents")
        .header(header::ORIGIN, "http://example.com")
        .header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
        .header(
            header::ACCESS_CONTROL_REQUEST_HEADERS,
            "authorization,content-type",
        )
        .body(Body::empty())
        .expect("cors preflight request");
    let (status, headers, _payload) = send_request(&app.app, preflight).await;
    insta::with_settings!({
        snapshot_suffix => snapshot_name("cors_preflight", None),
    }, {
        insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
    });
    let actual = Request::builder()
        .method(Method::GET)
        .uri("/v1/health")
        .header(header::ORIGIN, "http://example.com")
        .body(Body::empty())
        .expect("cors actual request");
    let (status, headers, payload) = send_json_request(&app.app, actual).await;
    assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("cors_actual", None),
    }, {
        insta::assert_yaml_snapshot!(json!({
            "cors": snapshot_cors(status, &headers),
            "payload": normalize_health(&payload),
        }));
    });
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn agent_endpoints_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    let app = TestApp::new();
    let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
    assert_eq!(status, StatusCode::OK, "health status");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("health", None),
    }, {
        insta::assert_yaml_snapshot!(normalize_health(&health));
    });
    // List agents (verify IDs only; install state is environment-dependent).
    let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
    assert_eq!(status, StatusCode::OK, "agents list");
    insta::with_settings!({
        snapshot_suffix => snapshot_name("agents_list", None),
    }, {
        insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
    });
    for config in &configs {
        let _guard = apply_credentials(&config.credentials);
        let status = send_status(
            &app.app,
            Method::POST,
            &format!("/v1/agents/{}/install", config.agent.as_str()),
            Some(json!({})),
        )
        .await;
        assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
        insta::with_settings!({
            snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
        }, {
            insta::assert_yaml_snapshot!(snapshot_status(status));
        });
    }
    for config in &configs {
        let _guard = apply_credentials(&config.credentials);
        let (status, modes) = send_json(
            &app.app,
            Method::GET,
            &format!("/v1/agents/{}/modes", config.agent.as_str()),
            None,
        )
        .await;
        assert_eq!(status, StatusCode::OK, "agent modes");
        insta::with_settings!({
            snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
        }, {
            insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
        });
    }
 }
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap
@ -1,6 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 assertion_line: 918
 expression: normalize_create_session(&created)
 ---
 healthy: true
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap
@ -1,6 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 expression: normalize_create_session(&created)
 ---
 healthy: true
 nativeSessionId: "<redacted>"
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap
@ -1,7 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1053
 expression: normalize_create_session(&created)
 ---
 healthy: true
 nativeSessionId: "<redacted>"
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap
@ -1,6 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 expression: normalize_create_session(&created)
 ---
 agentSessionId: "<redacted>"
 healthy: true
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap
@ -1,6 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 assertion_line: 943
 expression: snapshot_status(status)
 ---
 status: 204
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap
@ -1,6 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 assertion_line: 959
 expression: snapshot_status(status)
 ---
 status: 204
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap
@ -1,6 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1078
 expression: snapshot_status(status)
 ---
 status: 204
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap
@ -1,5 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 expression: snapshot_status(status)
 ---
 status: 204
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap
@ -1,6 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 expression: normalize_sessions(&sessions)
 ---
 hasExpectedFields: true
 sessionCount: 1
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap
@ -1,17 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1119
 expression: normalize_events(&permission_events)
 ---
 - metadata: true
  seq: 1
  session: started
  source: daemon
  synthetic: true
  type: session.started
 - metadata: true
  seq: 2
  session: started
  source: agent
  synthetic: false
  type: session.started
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap
@ -1,131 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 expression: normalize_events(&permission_events)
 ---
 - metadata: true
  seq: 1
  session: started
  source: daemon
  synthetic: true
  type: session.started
 - metadata: true
  seq: 2
  session: started
  source: agent
  synthetic: false
  type: session.started
 - item:
    content_types:
      - status
    kind: status
    role: system
    status: completed
  seq: 3
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 4
  source: agent
  synthetic: false
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 5
  source: daemon
  synthetic: true
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: completed
  seq: 6
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types: []
    kind: message
    role: assistant
    status: in_progress
  seq: 7
  source: agent
  synthetic: false
  type: item.started
 - item:
    content_types:
      - status
    kind: status
    role: system
    status: completed
  seq: 8
  source: agent
  synthetic: false
  type: item.completed
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 9
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 10
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 11
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 12
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 13
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 14
  source: agent
  synthetic: false
  type: item.delta
 - item:
    content_types:
      - reasoning
    kind: message
    role: assistant
    status: completed
  seq: 15
  source: agent
  synthetic: false
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap
@ -1,35 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1112
 expression: normalize_events(&permission_events)
 ---
 - metadata: true
  seq: 1
  session: started
  type: session.started
 - metadata: true
  seq: 2
  session: started
  type: session.started
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 3
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 4
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 5
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap
@ -1,11 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 assertion_line: 1017
 expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
 ---
 payload:
  detail: "invalid request: unknown permission id: missing-permission"
  status: 400
  title: Invalid Request
  type: "urn:sandbox-agent:error:invalid_request"
 status: 400
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap
@ -1,11 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1152
 expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
 ---
 payload:
  detail: "invalid request: unknown permission id: missing-permission"
  status: 400
  title: Invalid Request
  type: "urn:sandbox-agent:error:invalid_request"
 status: 400
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap
@ -1,45 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 assertion_line: 1151
 expression: normalize_events(&reject_events)
 ---
 - metadata: true
  seq: 1
  session: started
  source: daemon
  synthetic: true
  type: session.started
 - metadata: true
  seq: 2
  session: started
  source: agent
  synthetic: false
  type: session.started
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 3
  source: daemon
  synthetic: true
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 4
  source: daemon
  synthetic: true
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 5
  source: agent
  synthetic: false
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap
@ -1,331 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 expression: normalize_events(&reject_events)
 ---
 - metadata: true
  seq: 1
  session: started
  source: daemon
  synthetic: true
  type: session.started
 - metadata: true
  seq: 2
  session: started
  source: agent
  synthetic: false
  type: session.started
 - item:
    content_types:
      - status
    kind: status
    role: system
    status: completed
  seq: 3
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 4
  source: agent
  synthetic: false
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 5
  source: daemon
  synthetic: true
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: completed
  seq: 6
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types: []
    kind: message
    role: assistant
    status: in_progress
  seq: 7
  source: agent
  synthetic: false
  type: item.started
 - item:
    content_types:
      - status
    kind: status
    role: system
    status: completed
  seq: 8
  source: agent
  synthetic: false
  type: item.completed
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 9
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 10
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 11
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 12
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 13
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 14
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 15
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 16
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 17
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 18
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 19
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 20
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 21
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 22
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 23
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 24
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 25
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 26
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 27
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 28
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 29
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 30
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 31
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 32
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 33
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 34
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 35
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 36
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 37
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 38
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 39
  source: agent
  synthetic: false
  type: item.delta
 - item:
    content_types:
      - reasoning
    kind: message
    role: assistant
    status: completed
  seq: 40
  source: agent
  synthetic: false
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap
@ -1,35 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1236
 expression: normalize_events(&reject_events)
 ---
 - metadata: true
  seq: 1
  session: started
  type: session.started
 - metadata: true
  seq: 2
  session: started
  type: session.started
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 3
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 4
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 5
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap
@ -1,11 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 assertion_line: 1151
 expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
 ---
 payload:
  detail: "invalid request: unknown question id: missing-question"
  status: 400
  title: Invalid Request
  type: "urn:sandbox-agent:error:invalid_request"
 status: 400
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap
@ -1,11 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 assertion_line: 1139
 expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
 ---
 payload:
  detail: "invalid request: unknown question id: missing-question"
  status: 400
  title: Invalid Request
  type: "urn:sandbox-agent:error:invalid_request"
 status: 400
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap
@ -1,11 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1276
 expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
 ---
 payload:
  detail: "invalid request: unknown question id: missing-question"
  status: 400
  title: Invalid Request
  type: "urn:sandbox-agent:error:invalid_request"
 status: 400
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap
@ -1,45 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 assertion_line: 1109
 expression: normalize_events(&question_events)
 ---
 - metadata: true
  seq: 1
  session: started
  source: daemon
  synthetic: true
  type: session.started
 - metadata: true
  seq: 2
  session: started
  source: agent
  synthetic: false
  type: session.started
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 3
  source: daemon
  synthetic: true
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 4
  source: daemon
  synthetic: true
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 5
  source: agent
  synthetic: false
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap
@ -1,315 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 expression: normalize_events(&question_events)
 ---
 - metadata: true
  seq: 1
  session: started
  source: daemon
  synthetic: true
  type: session.started
 - metadata: true
  seq: 2
  session: started
  source: agent
  synthetic: false
  type: session.started
 - item:
    content_types:
      - status
    kind: status
    role: system
    status: completed
  seq: 3
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 4
  source: agent
  synthetic: false
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 5
  source: daemon
  synthetic: true
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: completed
  seq: 6
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types: []
    kind: message
    role: assistant
    status: in_progress
  seq: 7
  source: agent
  synthetic: false
  type: item.started
 - item:
    content_types:
      - status
    kind: status
    role: system
    status: completed
  seq: 8
  source: agent
  synthetic: false
  type: item.completed
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 9
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 10
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 11
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 12
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 13
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 14
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 15
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 16
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 17
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 18
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 19
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 20
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 21
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 22
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 23
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 24
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 25
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 26
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 27
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 28
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 29
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 30
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 31
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 32
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 33
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 34
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 35
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 36
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 37
  source: agent
  synthetic: false
  type: item.delta
 - item:
    content_types:
      - reasoning
    kind: message
    role: assistant
    status: completed
  seq: 38
  source: agent
  synthetic: false
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap
@ -1,35 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1174
 expression: normalize_events(&question_events)
 ---
 - metadata: true
  seq: 1
  session: started
  type: session.started
 - metadata: true
  seq: 2
  session: started
  type: session.started
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 3
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 4
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 5
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap
@ -1,11 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1214
 expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
 ---
 payload:
  detail: "invalid request: unknown question id: missing-question"
  status: 400
  title: Invalid Request
  type: "urn:sandbox-agent:error:invalid_request"
 status: 400
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap
@ -1,201 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 expression: snapshot
 ---
 session_a:
  - metadata: true
    seq: 1
    session: started
    source: daemon
    synthetic: true
    type: session.started
  - metadata: true
    seq: 2
    session: started
    source: agent
    synthetic: false
    type: session.started
  - item:
      content_types:
        - status
      kind: status
      role: system
      status: completed
    seq: 3
    source: agent
    synthetic: false
    type: item.completed
  - item:
      content_types:
        - text
      kind: message
      role: user
      status: in_progress
    seq: 4
    source: agent
    synthetic: false
    type: item.started
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 5
    source: daemon
    synthetic: true
    type: item.delta
  - item:
      content_types:
        - text
      kind: message
      role: user
      status: completed
    seq: 6
    source: agent
    synthetic: false
    type: item.completed
  - item:
      content_types: []
      kind: message
      role: assistant
      status: in_progress
    seq: 7
    source: agent
    synthetic: false
    type: item.started
  - item:
      content_types: []
      kind: message
      role: assistant
      status: completed
    seq: 8
    source: agent
    synthetic: false
    type: item.completed
 session_b:
  - metadata: true
    seq: 1
    session: started
    source: daemon
    synthetic: true
    type: session.started
  - metadata: true
    seq: 2
    session: started
    source: agent
    synthetic: false
    type: session.started
  - item:
      content_types:
        - status
      kind: status
      role: system
      status: completed
    seq: 3
    source: agent
    synthetic: false
    type: item.completed
  - item:
      content_types:
        - text
      kind: message
      role: user
      status: in_progress
    seq: 4
    source: agent
    synthetic: false
    type: item.started
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 5
    source: daemon
    synthetic: true
    type: item.delta
  - item:
      content_types:
        - text
      kind: message
      role: user
      status: completed
    seq: 6
    source: agent
    synthetic: false
    type: item.completed
  - item:
      content_types: []
      kind: message
      role: assistant
      status: in_progress
    seq: 7
    source: agent
    synthetic: false
    type: item.started
  - item:
      content_types:
        - status
      kind: status
      role: system
      status: completed
    seq: 8
    source: agent
    synthetic: false
    type: item.completed
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 9
    source: agent
    synthetic: false
    type: item.delta
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 10
    source: agent
    synthetic: false
    type: item.delta
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 11
    source: agent
    synthetic: false
    type: item.delta
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 12
    source: agent
    synthetic: false
    type: item.delta
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 13
    source: agent
    synthetic: false
    type: item.delta
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 14
    source: agent
    synthetic: false
    type: item.delta
  - item:
      content_types:
        - reasoning
      kind: message
      role: assistant
      status: completed
    seq: 15
    source: agent
    synthetic: false
    type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap
@ -1,67 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 1344
 expression: snapshot
 ---
 session_a:
  - metadata: true
    seq: 1
    session: started
    type: session.started
  - metadata: true
    seq: 2
    session: started
    type: session.started
  - item:
      content_types:
        - text
      kind: message
      role: assistant
      status: in_progress
    seq: 3
    type: item.started
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 4
    type: item.delta
  - item:
      content_types:
        - text
      kind: message
      role: assistant
      status: completed
    seq: 5
    type: item.completed
 session_b:
  - metadata: true
    seq: 1
    session: started
    type: session.started
  - metadata: true
    seq: 2
    session: started
    type: session.started
  - item:
      content_types:
        - text
      kind: message
      role: assistant
      status: in_progress
    seq: 3
    type: item.started
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
    seq: 4
    type: item.delta
  - item:
      content_types:
        - text
      kind: message
      role: assistant
      status: completed
    seq: 5
    type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap
@ -1,171 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
 expression: normalized
 ---
 - metadata: true
  seq: 1
  session: started
  source: daemon
  synthetic: true
  type: session.started
 - metadata: true
  seq: 2
  session: started
  source: agent
  synthetic: false
  type: session.started
 - item:
    content_types:
      - status
    kind: status
    role: system
    status: completed
  seq: 3
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 4
  source: agent
  synthetic: false
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 5
  source: daemon
  synthetic: true
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: completed
  seq: 6
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types: []
    kind: message
    role: assistant
    status: in_progress
  seq: 7
  source: agent
  synthetic: false
  type: item.started
 - item:
    content_types:
      - status
    kind: status
    role: system
    status: completed
  seq: 8
  source: agent
  synthetic: false
  type: item.completed
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 9
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 10
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 11
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 12
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 13
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 14
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 15
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 16
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 17
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 18
  source: agent
  synthetic: false
  type: item.delta
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 19
  source: agent
  synthetic: false
  type: item.delta
 - item:
    content_types:
      - reasoning
    kind: message
    role: assistant
    status: completed
  seq: 20
  source: agent
  synthetic: false
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap
@ -1,45 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 848
 expression: normalized
 ---
 - metadata: true
  seq: 1
  session: started
  source: daemon
  synthetic: true
  type: session.started
 - metadata: true
  seq: 2
  session: started
  source: agent
  synthetic: false
  type: session.started
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 3
  source: agent
  synthetic: false
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 4
  source: agent
  synthetic: false
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 5
  source: agent
  synthetic: false
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap
@ -1,73 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 848
 expression: normalized
 ---
 - metadata: true
  seq: 1
  session: started
  source: daemon
  synthetic: true
  type: session.started
 - metadata: true
  seq: 2
  session: started
  source: agent
  synthetic: false
  type: session.started
 - item:
    content_types:
      - status
    kind: status
    role: system
    status: completed
  seq: 3
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 4
  source: agent
  synthetic: false
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 5
  source: daemon
  synthetic: true
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: completed
  seq: 6
  source: agent
  synthetic: false
  type: item.completed
 - item:
    content_types: []
    kind: message
    role: assistant
    status: in_progress
  seq: 7
  source: agent
  synthetic: false
  type: item.started
 - item:
    content_types: []
    kind: message
    role: assistant
    status: completed
  seq: 8
  source: agent
  synthetic: false
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap
@ -1,35 +0,0 @@
 ---
 source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
 assertion_line: 841
 expression: normalized
 ---
 - metadata: true
  seq: 1
  session: started
  type: session.started
 - metadata: true
  seq: 2
  session: started
  type: session.started
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 3
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 4
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 5
  type: item.completed
--- a/server/packages/sandbox-agent/tests/http_endpoints.rs
+++ b/server/packages/sandbox-agent/tests/http_endpoints.rs
@ -0,0 +1,2 @@
 #[path = "http/agent_endpoints.rs"]
 mod agent_endpoints;
--- a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs
+++ b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs
@ -1 +0,0 @@
 include!("http/http_sse_snapshots.rs");
--- a/server/packages/sandbox-agent/tests/sessions.rs
+++ b/server/packages/sandbox-agent/tests/sessions.rs
@ -0,0 +1,2 @@
 #[path = "sessions/mod.rs"]
 mod sessions;
--- a/server/packages/sandbox-agent/tests/sessions/mod.rs
+++ b/server/packages/sandbox-agent/tests/sessions/mod.rs
@ -0,0 +1,5 @@
 mod session_lifecycle;
 mod permissions;
 mod questions;
 mod reasoning;
 mod status;
--- a/server/packages/sandbox-agent/tests/sessions/permissions.rs
+++ b/server/packages/sandbox-agent/tests/sessions/permissions.rs
@ -0,0 +1,88 @@
 // Permission flow snapshots compare every agent to the mock baseline.
 include!("../common/http.rs");
 fn session_snapshot_suffix(prefix: &str) -> String {
    snapshot_name(prefix, Some(AgentId::Mock))
 }
 fn assert_session_snapshot(prefix: &str, value: Value) {
    insta::with_settings!({
        snapshot_suffix => session_snapshot_suffix(prefix),
    }, {
        insta::assert_yaml_snapshot!(value);
    });
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn permission_flow_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    for config in &configs {
        let app = TestApp::new();
        let capabilities = fetch_capabilities(&app.app).await;
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        if !(caps.plan_mode && caps.permissions) {
            continue;
        }
        let _guard = apply_credentials(&config.credentials);
        install_agent(&app.app, config.agent).await;
        let permission_session = format!("perm-{}", config.agent.as_str());
        create_session(&app.app, config.agent, &permission_session, "plan").await;
        let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await;
        let status = send_status(
            &app.app,
            Method::POST,
            &format!("/v1/sessions/{permission_session}/messages"),
            Some(json!({ "message": PERMISSION_PROMPT })),
        )
        .await;
        assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
        let permission_events = poll_events_until_match_from(
            &app.app,
            &permission_session,
            offset,
            Duration::from_secs(120),
            |events| find_permission_id(events).is_some() || should_stop(events),
        )
        .await;
        let permission_events = truncate_permission_events(&permission_events);
        assert_session_snapshot("permission_events", normalize_events(&permission_events));
        if let Some(permission_id) = find_permission_id(&permission_events) {
            let status = send_status(
                &app.app,
                Method::POST,
                &format!(
                    "/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
                ),
                Some(json!({ "reply": "once" })),
            )
            .await;
            assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
            assert_session_snapshot("permission_reply", snapshot_status(status));
        } else {
            let (status, payload) = send_json(
                &app.app,
                Method::POST,
                &format!(
                    "/v1/sessions/{permission_session}/permissions/missing-permission/reply"
                ),
                Some(json!({ "reply": "once" })),
            )
            .await;
            assert!(!status.is_success(), "missing permission id should error");
            assert_session_snapshot(
                "permission_reply_missing",
                json!({
                    "status": status.as_u16(),
                    "payload": payload,
                }),
            );
        }
    }
 }
--- a/server/packages/sandbox-agent/tests/sessions/questions.rs
+++ b/server/packages/sandbox-agent/tests/sessions/questions.rs
@ -0,0 +1,145 @@
 // Question flow snapshots compare every agent to the mock baseline.
 include!("../common/http.rs");
 fn session_snapshot_suffix(prefix: &str) -> String {
    snapshot_name(prefix, Some(AgentId::Mock))
 }
 fn assert_session_snapshot(prefix: &str, value: Value) {
    insta::with_settings!({
        snapshot_suffix => session_snapshot_suffix(prefix),
    }, {
        insta::assert_yaml_snapshot!(value);
    });
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn question_flow_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    for config in &configs {
        let app = TestApp::new();
        let capabilities = fetch_capabilities(&app.app).await;
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        if !caps.questions {
            continue;
        }
        let _guard = apply_credentials(&config.credentials);
        install_agent(&app.app, config.agent).await;
        let question_reply_session = format!("question-reply-{}", config.agent.as_str());
        create_session(&app.app, config.agent, &question_reply_session, "plan").await;
        let reply_offset =
            drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await;
        let status = send_status(
            &app.app,
            Method::POST,
            &format!("/v1/sessions/{question_reply_session}/messages"),
            Some(json!({ "message": QUESTION_PROMPT })),
        )
        .await;
        assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
        let question_events = poll_events_until_match_from(
            &app.app,
            &question_reply_session,
            reply_offset,
            Duration::from_secs(120),
            |events| find_question_id_and_answers(events).is_some() || should_stop(events),
        )
        .await;
        let question_events = truncate_question_events(&question_events);
        assert_session_snapshot("question_reply_events", normalize_events(&question_events));
        if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
            let status = send_status(
                &app.app,
                Method::POST,
                &format!(
                    "/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
                ),
                Some(json!({ "answers": answers })),
            )
            .await;
            assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
            assert_session_snapshot("question_reply", snapshot_status(status));
        } else {
            let (status, payload) = send_json(
                &app.app,
                Method::POST,
                &format!(
                    "/v1/sessions/{question_reply_session}/questions/missing-question/reply"
                ),
                Some(json!({ "answers": [] })),
            )
            .await;
            assert!(!status.is_success(), "missing question id should error");
            assert_session_snapshot(
                "question_reply_missing",
                json!({
                    "status": status.as_u16(),
                    "payload": payload,
                }),
            );
        }
        let question_reject_session = format!("question-reject-{}", config.agent.as_str());
        create_session(&app.app, config.agent, &question_reject_session, "plan").await;
        let reject_offset =
            drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await;
        let status = send_status(
            &app.app,
            Method::POST,
            &format!("/v1/sessions/{question_reject_session}/messages"),
            Some(json!({ "message": QUESTION_PROMPT })),
        )
        .await;
        assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
        let reject_events = poll_events_until_match_from(
            &app.app,
            &question_reject_session,
            reject_offset,
            Duration::from_secs(120),
            |events| find_question_id_and_answers(events).is_some() || should_stop(events),
        )
        .await;
        let reject_events = truncate_question_events(&reject_events);
        assert_session_snapshot("question_reject_events", normalize_events(&reject_events));
        if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
            let status = send_status(
                &app.app,
                Method::POST,
                &format!(
                    "/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
                ),
                None,
            )
            .await;
            assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
            assert_session_snapshot("question_reject", snapshot_status(status));
        } else {
            let (status, payload) = send_json(
                &app.app,
                Method::POST,
                &format!(
                    "/v1/sessions/{question_reject_session}/questions/missing-question/reject"
                ),
                None,
            )
            .await;
            assert!(!status.is_success(), "missing question id reject should error");
            assert_session_snapshot(
                "question_reject_missing",
                json!({
                    "status": status.as_u16(),
                    "payload": payload,
                }),
            );
        }
    }
 }
--- a/server/packages/sandbox-agent/tests/sessions/reasoning.rs
+++ b/server/packages/sandbox-agent/tests/sessions/reasoning.rs
@ -0,0 +1,56 @@
 // Reasoning capability checks are isolated from baseline snapshots.
 include!("../common/http.rs");
 fn reasoning_prompt(agent: AgentId) -> &'static str {
    if agent == AgentId::Mock {
        "demo"
    } else {
        "Answer briefly and include your reasoning."
    }
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn reasoning_events_present() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    for config in &configs {
        let app = TestApp::new();
        let capabilities = fetch_capabilities(&app.app).await;
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        if !caps.reasoning {
            continue;
        }
        let _guard = apply_credentials(&config.credentials);
        install_agent(&app.app, config.agent).await;
        let session_id = format!("reasoning-{}", config.agent.as_str());
        create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
            .await;
        let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
        let status = send_status(
            &app.app,
            Method::POST,
            &format!("/v1/sessions/{session_id}/messages"),
            Some(json!({ "message": reasoning_prompt(config.agent) })),
        )
        .await;
        assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt");
        let events = poll_events_until_match_from(
            &app.app,
            &session_id,
            offset,
            Duration::from_secs(120),
            |events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event),
        )
        .await;
        assert!(
            events_have_content_type(&events, "reasoning"),
            "expected reasoning content for {}",
            config.agent
        );
    }
 }
--- a/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
+++ b/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
@ -0,0 +1,192 @@
 // Session lifecycle and streaming snapshots use the mock baseline as the single source of truth.
 include!("../common/http.rs");
 fn session_snapshot_suffix(prefix: &str) -> String {
    snapshot_name(prefix, Some(AgentId::Mock))
 }
 fn assert_session_snapshot(prefix: &str, value: Value) {
    insta::with_settings!({
        snapshot_suffix => session_snapshot_suffix(prefix),
    }, {
        insta::assert_yaml_snapshot!(value);
    });
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn session_endpoints_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    for config in &configs {
        let app = TestApp::new();
        let capabilities = fetch_capabilities(&app.app).await;
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        if !caps.session_lifecycle {
            continue;
        }
        let _guard = apply_credentials(&config.credentials);
        install_agent(&app.app, config.agent).await;
        let session_id = format!("snapshot-{}", config.agent.as_str());
        let permission_mode = test_permission_mode(config.agent);
        let (status, created) = send_json(
            &app.app,
            Method::POST,
            &format!("/v1/sessions/{session_id}"),
            Some(json!({
                "agent": config.agent.as_str(),
                "permissionMode": permission_mode
            })),
        )
        .await;
        assert_eq!(status, StatusCode::OK, "create session");
        assert_session_snapshot("create_session", normalize_create_session(&created));
        let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
        assert_eq!(status, StatusCode::OK, "list sessions");
        assert_session_snapshot("sessions_list", normalize_sessions(&sessions));
        let status = send_status(
            &app.app,
            Method::POST,
            &format!("/v1/sessions/{session_id}/messages"),
            Some(json!({ "message": PROMPT })),
        )
        .await;
        assert_eq!(status, StatusCode::NO_CONTENT, "send message");
        assert_session_snapshot("send_message", snapshot_status(status));
    }
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn http_events_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    for config in &configs {
        // OpenCode's embedded bun hangs when installing plugins, blocking event streaming.
        if config.agent == AgentId::Opencode {
            continue;
        }
        let app = TestApp::new();
        let capabilities = fetch_capabilities(&app.app).await;
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        if !caps.session_lifecycle {
            continue;
        }
        run_http_events_snapshot(&app.app, config).await;
    }
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn sse_events_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    for config in &configs {
        // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
        if config.agent == AgentId::Opencode {
            continue;
        }
        let app = TestApp::new();
        let capabilities = fetch_capabilities(&app.app).await;
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        if !caps.session_lifecycle {
            continue;
        }
        run_sse_events_snapshot(&app.app, config).await;
    }
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn concurrency_snapshots() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    for config in &configs {
        let app = TestApp::new();
        let capabilities = fetch_capabilities(&app.app).await;
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        if !caps.session_lifecycle {
            continue;
        }
        run_concurrency_snapshot(&app.app, config).await;
    }
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn turn_stream_route() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    for config in &configs {
        let app = TestApp::new();
        let capabilities = fetch_capabilities(&app.app).await;
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        if !caps.session_lifecycle {
            continue;
        }
        run_turn_stream_check(&app.app, config).await;
    }
 }
 async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
    let _guard = apply_credentials(&config.credentials);
    install_agent(app, config.agent).await;
    let session_a = format!("concurrent-a-{}", config.agent.as_str());
    let session_b = format!("concurrent-b-{}", config.agent.as_str());
    let perm_mode = test_permission_mode(config.agent);
    create_session(app, config.agent, &session_a, perm_mode).await;
    create_session(app, config.agent, &session_b, perm_mode).await;
    let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await;
    let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await;
    let app_a = app.clone();
    let app_b = app.clone();
    let send_a = send_message(&app_a, &session_a);
    let send_b = send_message(&app_b, &session_b);
    tokio::join!(send_a, send_b);
    let app_a = app.clone();
    let app_b = app.clone();
    let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120));
    let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120));
    let (events_a, events_b) = tokio::join!(poll_a, poll_b);
    let events_a = truncate_after_first_stop(&events_a);
    let events_b = truncate_after_first_stop(&events_b);
    assert!(
        !events_a.is_empty(),
        "no events collected for concurrent session a {}",
        config.agent
    );
    assert!(
        !events_b.is_empty(),
        "no events collected for concurrent session b {}",
        config.agent
    );
    assert!(
        should_stop(&events_a),
        "timed out waiting for assistant/error event for concurrent session a {}",
        config.agent
    );
    assert!(
        should_stop(&events_b),
        "timed out waiting for assistant/error event for concurrent session b {}",
        config.agent
    );
    let snapshot = json!({
        "session_a": normalize_events(&events_a),
        "session_b": normalize_events(&events_b),
    });
    assert_session_snapshot("concurrency_events", snapshot);
 }
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionspermissions__assert_session_snapshot@permission_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionspermissions__assert_session_snapshot@permission_events_mock.snap
@ -0,0 +1,48 @@
 ---
 source: server/packages/sandbox-agent/tests/sessions/permissions.rs
 expression: value
 ---
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 1
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 2
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: completed
  seq: 3
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 4
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 5
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 6
  type: item.completed
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionspermissions__assert_session_snapshot@permission_reply_missing_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionspermissions__assert_session_snapshot@permission_reply_missing_mock.snap
@ -1,7 +1,6 @@
 ---
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
+source: server/packages/sandbox-agent/tests/sessions/permissions.rs
-assertion_line: 1011
+expression: value
 expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
 ---
 payload:
  detail: "invalid request: unknown permission id: missing-permission"
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionsquestions__assert_session_snapshot@question_reject_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionsquestions__assert_session_snapshot@question_reject_events_mock.snap
@ -0,0 +1,48 @@
 ---
 source: server/packages/sandbox-agent/tests/sessions/questions.rs
 expression: value
 ---
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 1
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 2
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: completed
  seq: 3
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 4
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 5
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 6
  type: item.completed
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionsquestions__assert_session_snapshot@question_reject_missing_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionsquestions__assert_session_snapshot@question_reject_missing_mock.snap
@ -1,7 +1,6 @@
 ---
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
+source: server/packages/sandbox-agent/tests/sessions/questions.rs
-assertion_line: 1078
+expression: value
 expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
 ---
 payload:
  detail: "invalid request: unknown question id: missing-question"
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionsquestions__assert_session_snapshot@question_reply_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionsquestions__assert_session_snapshot@question_reply_events_mock.snap
@ -0,0 +1,48 @@
 ---
 source: server/packages/sandbox-agent/tests/sessions/questions.rs
 expression: value
 ---
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 1
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 2
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: user
    status: completed
  seq: 3
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
  seq: 4
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 5
  type: item.delta
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: completed
  seq: 6
  type: item.completed
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionsquestions__assert_session_snapshot@question_reply_missing_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionsquestions__assert_session_snapshot@question_reply_missing_mock.snap
@ -1,7 +1,6 @@
 ---
-source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
+source: server/packages/sandbox-agent/tests/sessions/questions.rs
-assertion_line: 1072
+expression: value
 expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
 ---
 payload:
  detail: "invalid request: unknown question id: missing-question"
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__assert_session_snapshot@concurrency_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__assert_session_snapshot@concurrency_events_mock.snap
@ -1,38 +1,43 @@
 ---
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
+source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
-assertion_line: 1351
+expression: value
 expression: snapshot
 ---
 session_a:
-  - metadata: true
+  - item:
      content_types:
        - text
      kind: message
      role: user
      status: in_progress
    seq: 1
-    session: started
+    type: item.started
-    source: daemon
+  - delta:
-    synthetic: true
+      delta: "<redacted>"
-    type: session.started
+      item_id: "<redacted>"
-  - metadata: true
+      native_item_id: "<redacted>"
    seq: 2
-    session: started
+    type: item.delta
-    source: agent
+  - item:
-    synthetic: false
+      content_types:
-    type: session.started
+        - text
      kind: message
      role: user
      status: completed
    seq: 3
    type: item.completed
  - item:
      content_types:
        - text
      kind: message
      role: assistant
      status: in_progress
-    seq: 3
+    seq: 4
    source: agent
    synthetic: false
    type: item.started
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
-    seq: 4
+    seq: 5
    source: agent
    synthetic: false
    type: item.delta
  - item:
      content_types:
@ -40,40 +45,44 @@ session_a:
      kind: message
      role: assistant
      status: completed
-    seq: 5
+    seq: 6
    source: agent
    synthetic: false
    type: item.completed
 session_b:
-  - metadata: true
+  - item:
      content_types:
        - text
      kind: message
      role: user
      status: in_progress
    seq: 1
-    session: started
+    type: item.started
-    source: daemon
+  - delta:
-    synthetic: true
+      delta: "<redacted>"
-    type: session.started
+      item_id: "<redacted>"
-  - metadata: true
+      native_item_id: "<redacted>"
    seq: 2
-    session: started
+    type: item.delta
-    source: agent
+  - item:
-    synthetic: false
+      content_types:
-    type: session.started
+        - text
      kind: message
      role: user
      status: completed
    seq: 3
    type: item.completed
  - item:
      content_types:
        - text
      kind: message
      role: assistant
      status: in_progress
-    seq: 3
+    seq: 4
    source: agent
    synthetic: false
    type: item.started
  - delta:
      delta: "<redacted>"
      item_id: "<redacted>"
      native_item_id: "<redacted>"
-    seq: 4
+    seq: 5
    source: agent
    synthetic: false
    type: item.delta
  - item:
      content_types:
@ -81,7 +90,5 @@ session_b:
      kind: message
      role: assistant
      status: completed
-    seq: 5
+    seq: 6
    source: agent
    synthetic: false
    type: item.completed
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__assert_session_snapshot@create_session_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__assert_session_snapshot@create_session_mock.snap
@ -0,0 +1,6 @@
 ---
 source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
 expression: value
 ---
 healthy: true
 nativeSessionId: "<redacted>"
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__assert_session_snapshot@send_message_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__assert_session_snapshot@send_message_mock.snap
@ -0,0 +1,5 @@
 ---
 source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
 expression: value
 ---
 status: 204
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__assert_session_snapshot@sessions_list_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__assert_session_snapshot@sessions_list_mock.snap
@ -0,0 +1,6 @@
 ---
 source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
 expression: value
 ---
 hasExpectedFields: true
 sessionCount: 1
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__run_http_events_snapshot@http_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__run_http_events_snapshot@http_events_mock.snap
@ -1,37 +1,42 @@
 ---
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
+source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
 assertion_line: 811
 expression: normalized
 ---
- metadata: true
+- item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 1
-  session: started
+  type: item.started
-  source: daemon
+- delta:
-  synthetic: true
+    delta: "<redacted>"
-  type: session.started
+    item_id: "<redacted>"
- metadata: true
+    native_item_id: "<redacted>"
  seq: 2
-  session: started
+  type: item.delta
-  source: agent
+- item:
-  synthetic: false
+    content_types:
-  type: session.started
+      - text
    kind: message
    role: user
    status: completed
  seq: 3
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
-  seq: 3
+  seq: 4
  source: agent
  synthetic: false
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
-  seq: 4
+  seq: 5
  source: agent
  synthetic: false
  type: item.delta
 - item:
    content_types:
@ -39,7 +44,5 @@ expression: normalized
    kind: message
    role: assistant
    status: completed
-  seq: 5
+  seq: 6
  source: agent
  synthetic: false
  type: item.completed
--- a/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__run_sse_events_snapshot@sse_events_mock.snap
+++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessionssessionssession_lifecycle__run_sse_events_snapshot@sse_events_mock.snap
@ -1,29 +1,42 @@
 ---
-source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
+source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
 assertion_line: 804
 expression: normalized
 ---
- metadata: true
+- item:
    content_types:
      - text
    kind: message
    role: user
    status: in_progress
  seq: 1
-  session: started
+  type: item.started
-  type: session.started
+- delta:
- metadata: true
+    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
  seq: 2
-  session: started
+  type: item.delta
-  type: session.started
+- item:
    content_types:
      - text
    kind: message
    role: user
    status: completed
  seq: 3
  type: item.completed
 - item:
    content_types:
      - text
    kind: message
    role: assistant
    status: in_progress
-  seq: 3
+  seq: 4
  type: item.started
 - delta:
    delta: "<redacted>"
    item_id: "<redacted>"
    native_item_id: "<redacted>"
-  seq: 4
+  seq: 5
  type: item.delta
 - item:
    content_types:
@ -31,5 +44,5 @@ expression: normalized
    kind: message
    role: assistant
    status: completed
-  seq: 5
+  seq: 6
  type: item.completed
--- a/server/packages/sandbox-agent/tests/sessions/status.rs
+++ b/server/packages/sandbox-agent/tests/sessions/status.rs
@ -0,0 +1,61 @@
 // Status capability checks are isolated from baseline snapshots.
 include!("../common/http.rs");
 fn status_prompt(agent: AgentId) -> &'static str {
    if agent == AgentId::Mock {
        "status"
    } else {
        "Provide a short status update."
    }
 }
 fn events_have_status(events: &[Value]) -> bool {
    events.iter().any(|event| event_is_status_item(event))
        || events_have_content_type(events, "status")
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn status_events_present() {
    let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
    for config in &configs {
        let app = TestApp::new();
        let capabilities = fetch_capabilities(&app.app).await;
        let caps = capabilities
            .get(config.agent.as_str())
            .expect("capabilities missing");
        if !caps.status {
            continue;
        }
        let _guard = apply_credentials(&config.credentials);
        install_agent(&app.app, config.agent).await;
        let session_id = format!("status-{}", config.agent.as_str());
        create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
            .await;
        let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
        let status = send_status(
            &app.app,
            Method::POST,
            &format!("/v1/sessions/{session_id}/messages"),
            Some(json!({ "message": status_prompt(config.agent) })),
        )
        .await;
        assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt");
        let events = poll_events_until_match_from(
            &app.app,
            &session_id,
            offset,
            Duration::from_secs(120),
            |events| events_have_status(events) || events.iter().any(is_error_event),
        )
        .await;
        assert!(
            events_have_status(&events),
            "expected status events for {}",
            config.agent
        );
    }
 }
		`@ -0,0 +1,2 @@`
							`#[path = "http/agent_endpoints.rs"]`
							`mod agent_endpoints;`
		`@ -0,0 +1,2 @@`
							`#[path = "sessions/mod.rs"]`
							`mod sessions;`