From b49776145b221190ba9dce14acbe869bfbf31eef Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 27 Jan 2026 19:29:54 -0800 Subject: [PATCH] fix: add docker-setup action, runtime Dockerfile, and align release workflow - Add .github/actions/docker-setup composite action (from rivet) - Add docker/runtime/Dockerfile for Docker image builds - Update release.yaml to match rivet patterns: - Use corepack enable instead of pnpm/action-setup - Add reuse_engine_version input - Add Docker job with Depot runners - Use --no-frozen-lockfile for pnpm install - Add id-token permission for setup job --- .github/actions/docker-setup/action.yaml | 31 + .github/workflows/release.yaml | 122 ++- Cargo.toml | 4 +- docker/runtime/Dockerfile | 51 ++ docs/building-chat-ui.mdx | 1 + docs/openapi.json | 7 +- examples/daytona/daytona.ts | 2 + examples/docker/docker.ts | 2 + examples/e2b/e2b.ts | 2 + examples/shared/sandbox-agent-client.ts | 21 + examples/vercel/vercel-sandbox.ts | 2 + server/CLAUDE.md | 27 +- server/packages/sandbox-agent/src/router.rs | 6 + .../http_sse_snapshots.rs => common/http.rs} | 804 +++++------------- .../tests/http/agent_endpoints.rs | 165 ++++ ...oints_snapshots@agent_install_claude.snap} | 0 ...points_snapshots@agent_install_codex.snap} | 0 ...dpoints_snapshots@agent_install_mock.snap} | 0 ...nts_snapshots@agent_install_opencode.snap} | 0 ...dpoints_snapshots@agent_modes_claude.snap} | 0 ...ndpoints_snapshots@agent_modes_codex.snap} | 0 ...endpoints_snapshots@agent_modes_mock.snap} | 0 ...oints_snapshots@agent_modes_opencode.snap} | 0 ...dpoints_snapshots@agents_list_global.snap} | 0 ...nt_endpoints_snapshots@health_global.snap} | 0 ..._snapshots@auth_health_public_global.snap} | 0 ..._snapshots@auth_invalid_token_global.snap} | 0 ..._snapshots@auth_missing_token_global.snap} | 0 ...th_snapshots@auth_valid_token_global.snap} | 0 ...s__cors_snapshots@cors_actual_global.snap} | 0 ...cors_snapshots@cors_preflight_global.snap} | 0 ...oints_snapshots@create_session_claude.snap | 6 - ...points_snapshots@create_session_codex.snap | 6 - ...dpoints_snapshots@create_session_mock.snap | 7 - ...nts_snapshots@create_session_opencode.snap | 6 - ...dpoints_snapshots@send_message_claude.snap | 6 - ...ndpoints_snapshots@send_message_codex.snap | 6 - ...endpoints_snapshots@send_message_mock.snap | 6 - ...oints_snapshots@send_message_opencode.snap | 5 - ...points_snapshots@sessions_list_global.snap | 6 - ...ow_snapshots@permission_events_claude.snap | 17 - ...low_snapshots@permission_events_codex.snap | 131 --- ...flow_snapshots@permission_events_mock.snap | 35 - ...pshots@permission_reply_missing_codex.snap | 11 - ...apshots@permission_reply_missing_mock.snap | 11 - ...apshots@question_reject_events_claude.snap | 45 - ...napshots@question_reject_events_codex.snap | 331 ------- ...snapshots@question_reject_events_mock.snap | 35 - ...pshots@question_reject_missing_claude.snap | 11 - ...apshots@question_reject_missing_codex.snap | 11 - ...napshots@question_reject_missing_mock.snap | 11 - ...napshots@question_reply_events_claude.snap | 45 - ...snapshots@question_reply_events_codex.snap | 315 ------- ..._snapshots@question_reply_events_mock.snap | 35 - ...snapshots@question_reply_missing_mock.snap | 11 - ...ncy_snapshot@concurrency_events_codex.snap | 201 ----- ...ency_snapshot@concurrency_events_mock.snap | 67 -- ...ttp_events_snapshot@http_events_codex.snap | 171 ---- ...sse_events_snapshot@sse_events_claude.snap | 45 - ..._sse_events_snapshot@sse_events_codex.snap | 73 -- ...n_sse_events_snapshot@sse_events_mock.snap | 35 - .../sandbox-agent/tests/http_endpoints.rs | 2 + .../sandbox-agent/tests/http_sse_snapshots.rs | 1 - .../packages/sandbox-agent/tests/sessions.rs | 2 + .../sandbox-agent/tests/sessions/mod.rs | 5 + .../tests/sessions/permissions.rs | 88 ++ .../sandbox-agent/tests/sessions/questions.rs | 145 ++++ .../sandbox-agent/tests/sessions/reasoning.rs | 56 ++ .../tests/sessions/session_lifecycle.rs | 192 +++++ ...ssion_snapshot@permission_events_mock.snap | 48 ++ ...apshot@permission_reply_missing_mock.snap} | 5 +- ..._snapshot@question_reject_events_mock.snap | 48 ++ ...napshot@question_reject_missing_mock.snap} | 5 +- ...n_snapshot@question_reply_events_mock.snap | 48 ++ ...snapshot@question_reply_missing_mock.snap} | 5 +- ...ion_snapshot@concurrency_events_mock.snap} | 89 +- ..._session_snapshot@create_session_mock.snap | 6 + ...rt_session_snapshot@send_message_mock.snap | 5 + ...t_session_snapshot@sessions_list_mock.snap | 6 + ...ttp_events_snapshot@http_events_mock.snap} | 45 +- ..._sse_events_snapshot@sse_events_mock.snap} | 35 +- .../sandbox-agent/tests/sessions/status.rs | 61 ++ 82 files changed, 1415 insertions(+), 2430 deletions(-) create mode 100644 .github/actions/docker-setup/action.yaml create mode 100644 docker/runtime/Dockerfile rename server/packages/sandbox-agent/tests/{http/http_sse_snapshots.rs => common/http.rs} (53%) create mode 100644 server/packages/sandbox-agent/tests/http/agent_endpoints.rs rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap => agent_endpoints__agent_endpoints_snapshots@agent_install_claude.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap => agent_endpoints__agent_endpoints_snapshots@agent_install_codex.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap => agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap => agent_endpoints__agent_endpoints_snapshots@agent_install_opencode.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap => agent_endpoints__agent_endpoints_snapshots@agent_modes_claude.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap => agent_endpoints__agent_endpoints_snapshots@agent_modes_codex.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap => agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap => agent_endpoints__agent_endpoints_snapshots@agent_modes_opencode.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap => agent_endpoints__agent_endpoints_snapshots@agents_list_global.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__api_endpoints_snapshots@health_global.snap => agent_endpoints__agent_endpoints_snapshots@health_global.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__auth_snapshots@auth_health_public_global.snap => agent_endpoints__auth_snapshots@auth_health_public_global.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap => agent_endpoints__auth_snapshots@auth_invalid_token_global.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap => agent_endpoints__auth_snapshots@auth_missing_token_global.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap => agent_endpoints__auth_snapshots@auth_valid_token_global.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__cors_snapshots@cors_actual_global.snap => agent_endpoints__cors_snapshots@cors_actual_global.snap} (100%) rename server/packages/sandbox-agent/tests/http/snapshots/{http_sse_snapshots__cors_snapshots@cors_preflight_global.snap => agent_endpoints__cors_snapshots@cors_preflight_global.snap} (100%) delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap delete mode 100644 server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap create mode 100644 server/packages/sandbox-agent/tests/http_endpoints.rs delete mode 100644 server/packages/sandbox-agent/tests/http_sse_snapshots.rs create mode 100644 server/packages/sandbox-agent/tests/sessions.rs create mode 100644 server/packages/sandbox-agent/tests/sessions/mod.rs create mode 100644 server/packages/sandbox-agent/tests/sessions/permissions.rs create mode 100644 server/packages/sandbox-agent/tests/sessions/questions.rs create mode 100644 server/packages/sandbox-agent/tests/sessions/reasoning.rs create mode 100644 server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap => sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_missing_mock.snap} (53%) create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap => sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_missing_mock.snap} (53%) create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap => sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_missing_mock.snap} (53%) rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap => sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap} (51%) create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@create_session_mock.snap create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@send_message_mock.snap create mode 100644 server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@sessions_list_mock.snap rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap => sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap} (50%) rename server/packages/sandbox-agent/tests/{http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap => sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap} (50%) create mode 100644 server/packages/sandbox-agent/tests/sessions/status.rs diff --git a/.github/actions/docker-setup/action.yaml b/.github/actions/docker-setup/action.yaml new file mode 100644 index 0000000..d07ec5a --- /dev/null +++ b/.github/actions/docker-setup/action.yaml @@ -0,0 +1,31 @@ +name: 'Docker Setup' +description: 'Set up Docker Buildx and log in to Docker Hub' +inputs: + docker_username: + description: 'Docker Hub username' + required: true + docker_password: + description: 'Docker Hub password' + required: true + github_token: + description: 'GitHub token' + required: true +runs: + using: 'composite' + steps: + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ inputs.docker_username }} + password: ${{ inputs.docker_password }} + + # This will be used as a secret to authenticate with Git repo pulls + - name: Create .netrc file + run: | + echo "machine github.com" > ${{ runner.temp }}/netrc + echo "login x-access-token" >> ${{ runner.temp }}/netrc + echo "password ${{ inputs.github_token }}" >> ${{ runner.temp }}/netrc + shell: bash diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 3353830..09e39c2 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -4,14 +4,18 @@ on: workflow_dispatch: inputs: version: - description: "Version (e.g. 0.1.0 or v0.1.0)" + description: 'Version' required: true type: string latest: - description: "Latest" + description: 'Latest' required: true type: boolean default: true + reuse_engine_version: + description: 'Reuse artifacts from this version (skips building)' + required: false + type: string defaults: run: @@ -27,7 +31,10 @@ jobs: name: "Setup" runs-on: ubuntu-24.04 permissions: + # Allow pushing to GitHub contents: write + # Allows authentication + id-token: write steps: - uses: actions/checkout@v4 with: @@ -35,20 +42,29 @@ jobs: - uses: dtolnay/rust-toolchain@stable - - uses: pnpm/action-setup@v4 - - uses: actions/setup-node@v4 with: node-version: 20 - cache: pnpm + + - run: corepack enable - name: Setup env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }} R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }} run: | + # Configure Git + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + + # Authenticate with NPM + cat << EOF > ~/.npmrc + //registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }} + EOF + # Install dependencies - pnpm install + pnpm install --no-frozen-lockfile # Install tsx globally npm install -g tsx @@ -60,54 +76,57 @@ jobs: CMD="$CMD --no-latest" fi + if [ -n "${{ inputs.reuse_engine_version }}" ]; then + CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\"" + fi + eval "$CMD" binaries: name: "Build & Upload Binaries" needs: [setup] + if: ${{ !inputs.reuse_engine_version }} strategy: matrix: include: - platform: linux + runner: depot-ubuntu-24.04-8 target: x86_64-unknown-linux-musl binary_ext: "" arch: x86_64 - platform: windows + runner: depot-ubuntu-24.04-8 target: x86_64-pc-windows-gnu binary_ext: ".exe" arch: x86_64 - platform: macos + runner: depot-ubuntu-24.04-8 target: x86_64-apple-darwin binary_ext: "" arch: x86_64 - platform: macos + runner: depot-ubuntu-24.04-8 target: aarch64-apple-darwin binary_ext: "" arch: aarch64 - runs-on: ubuntu-24.04 + runs-on: ${{ matrix.runner }} steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: pnpm/action-setup@v4 - - - uses: actions/setup-node@v4 - with: - node-version: 20 - cache: pnpm - - - name: Build inspector frontend - run: | - pnpm install - SANDBOX_AGENT_SKIP_INSPECTOR=1 pnpm --filter @sandbox-agent/inspector build - - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build binary run: | + # Use Docker BuildKit + export DOCKER_BUILDKIT=1 + + # Build the binary using our Dockerfile docker/release/build.sh ${{ matrix.target }} + + # Make sure dist directory exists and binary is there ls -la dist/ - name: Upload to R2 @@ -115,10 +134,11 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }} run: | - # Install AWS CLI + # Install dependencies for AWS CLI sudo apt-get update sudo apt-get install -y unzip curl + # Install AWS CLI curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" unzip awscliv2.zip sudo ./aws/install --update @@ -126,7 +146,7 @@ jobs: COMMIT_SHA_SHORT="${GITHUB_SHA::7}" BINARY_PATH="dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" - # Upload to commit directory for later promotion + # Must specify --checksum-algorithm for compatibility with R2 aws s3 cp \ "${BINARY_PATH}" \ "s3://rivet-releases/sandbox-agent/${COMMIT_SHA_SHORT}/binaries/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" \ @@ -134,10 +154,48 @@ jobs: --endpoint-url https://2a94c6a0ced8d35ea63cddc86c2681e7.r2.cloudflarestorage.com \ --checksum-algorithm CRC32 + docker: + name: "Build & Push Docker Images" + needs: [setup] + if: ${{ !inputs.reuse_engine_version }} + strategy: + matrix: + include: + - platform: linux/arm64 + runner: depot-ubuntu-24.04-arm-8 + arch_suffix: -arm64 + - platform: linux/amd64 + runner: depot-ubuntu-24.04-8 + arch_suffix: -amd64 + runs-on: ${{ matrix.runner }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set outputs + id: vars + run: echo "sha_short=${GITHUB_SHA::7}" >> $GITHUB_OUTPUT + + - uses: ./.github/actions/docker-setup + with: + docker_username: ${{ secrets.DOCKER_CI_USERNAME }} + docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }} + github_token: ${{ secrets.GITHUB_TOKEN }} + + - name: Build & Push + uses: docker/build-push-action@v4 + with: + context: . + push: true + tags: rivetdev/sandbox-agent:${{ steps.vars.outputs.sha_short }}${{ matrix.arch_suffix }} + file: docker/runtime/Dockerfile + platforms: ${{ matrix.platform }} + complete: name: "Complete" - needs: [setup, binaries] - if: ${{ always() && !cancelled() && needs.setup.result == 'success' && needs.binaries.result == 'success' }} + needs: [setup, docker, binaries] + if: ${{ always() && !cancelled() && needs.setup.result == 'success' && (needs.docker.result == 'success' || needs.docker.result == 'skipped') && (needs.binaries.result == 'success' || needs.binaries.result == 'skipped') }} runs-on: ubuntu-24.04 steps: - uses: actions/checkout@v4 @@ -146,17 +204,21 @@ jobs: - uses: dtolnay/rust-toolchain@stable - - uses: pnpm/action-setup@v4 - - uses: actions/setup-node@v4 with: node-version: 20 registry-url: "https://registry.npmjs.org" - cache: pnpm + + - run: corepack enable + + - uses: ./.github/actions/docker-setup + with: + docker_username: ${{ secrets.DOCKER_CI_USERNAME }} + docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }} + github_token: ${{ secrets.GITHUB_TOKEN }} - name: Complete env: - # https://cli.github.com/manual/gh_help_environment GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} @@ -169,7 +231,7 @@ jobs: EOF # Install dependencies - pnpm install + pnpm install --no-frozen-lockfile # Install tsx globally npm install -g tsx @@ -181,4 +243,8 @@ jobs: CMD="$CMD --no-latest" fi + if [ -n "${{ inputs.reuse_engine_version }}" ]; then + CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\"" + fi + eval "$CMD" diff --git a/Cargo.toml b/Cargo.toml index 114ae4d..9338a05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,10 +5,10 @@ members = ["server/packages/*"] [workspace.package] version = "0.1.0" edition = "2021" -authors = ["Sandbox Agent Contributors"] +authors = [ "Rivet Gaming, LLC " ] license = "Apache-2.0" repository = "https://github.com/rivet-dev/sandbox-agent" -description = "Universal agent API for AI coding assistants" +description = "Universal API for automatic coding agents in sandboxes. Supprots Claude Code, Codex, OpenCode, and Amp." [workspace.dependencies] # Internal crates diff --git a/docker/runtime/Dockerfile b/docker/runtime/Dockerfile new file mode 100644 index 0000000..0e71c2f --- /dev/null +++ b/docker/runtime/Dockerfile @@ -0,0 +1,51 @@ +# syntax=docker/dockerfile:1.10.0 + +# Build stage - compile the binary +FROM rust:1.88.0 AS builder + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y \ + musl-tools \ + musl-dev \ + pkg-config \ + ca-certificates \ + git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN rustup target add x86_64-unknown-linux-musl + +WORKDIR /build +COPY . . + +# Build static binary +RUN --mount=type=cache,target=/usr/local/cargo/registry \ + --mount=type=cache,target=/usr/local/cargo/git \ + --mount=type=cache,target=/build/target \ + SANDBOX_AGENT_SKIP_INSPECTOR=1 \ + RUSTFLAGS="-C target-feature=+crt-static" \ + cargo build -p sandbox-agent --release --target x86_64-unknown-linux-musl && \ + cp target/x86_64-unknown-linux-musl/release/sandbox-agent /sandbox-agent + +# Runtime stage - minimal image +FROM debian:bookworm-slim + +RUN apt-get update && apt-get install -y \ + ca-certificates \ + curl \ + git && \ + rm -rf /var/lib/apt/lists/* + +# Copy the binary from builder +COPY --from=builder /sandbox-agent /usr/local/bin/sandbox-agent +RUN chmod +x /usr/local/bin/sandbox-agent + +# Create non-root user +RUN useradd -m -s /bin/bash sandbox +USER sandbox +WORKDIR /home/sandbox + +EXPOSE 2468 + +ENTRYPOINT ["sandbox-agent"] +CMD ["--host", "0.0.0.0", "--port", "2468"] diff --git a/docs/building-chat-ui.mdx b/docs/building-chat-ui.mdx index a2bd9b5..80363f9 100644 --- a/docs/building-chat-ui.mdx +++ b/docs/building-chat-ui.mdx @@ -21,6 +21,7 @@ Capabilities tell you which features are supported for the selected agent: - `tool_calls` and `tool_results` indicate tool execution events. - `questions` and `permissions` indicate HITL flows. - `plan_mode` indicates that the agent supports plan-only execution. +- `reasoning` and `status` indicate that the agent can emit reasoning/status content parts. Use these to enable or disable UI affordances (tool panels, approval buttons, etc.). diff --git a/docs/openapi.json b/docs/openapi.json index f10b073..7bd9301 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -4,7 +4,8 @@ "title": "sandbox-agent", "description": "", "contact": { - "name": "Sandbox Agent Contributors" + "name": "Rivet Gaming, LLC", + "email": "developer@rivet.gg" }, "license": { "name": "Apache-2.0" @@ -662,6 +663,7 @@ "sessionLifecycle", "errorEvents", "reasoning", + "status", "commandExecution", "fileChanges", "mcpTools", @@ -706,6 +708,9 @@ "type": "boolean", "description": "Whether this agent uses a shared long-running server process (vs per-turn subprocess)" }, + "status": { + "type": "boolean" + }, "streamingDeltas": { "type": "boolean" }, diff --git a/examples/daytona/daytona.ts b/examples/daytona/daytona.ts index 45e0070..4c23149 100644 --- a/examples/daytona/daytona.ts +++ b/examples/daytona/daytona.ts @@ -2,6 +2,7 @@ import { Daytona } from "@daytonaio/sdk"; import { pathToFileURL } from "node:url"; import { ensureUrl, + logInspectorUrl, runPrompt, waitForHealth, } from "../shared/sandbox-agent-client.ts"; @@ -39,6 +40,7 @@ export async function setupDaytonaSandboxAgent(): Promise<{ const baseUrl = ensureUrl(preview.url); await waitForHealth({ baseUrl, token, extraHeaders }); + logInspectorUrl({ baseUrl, token }); const cleanup = async () => { try { diff --git a/examples/docker/docker.ts b/examples/docker/docker.ts index e41e776..1626b34 100644 --- a/examples/docker/docker.ts +++ b/examples/docker/docker.ts @@ -2,6 +2,7 @@ import Docker from "dockerode"; import { pathToFileURL } from "node:url"; import { ensureUrl, + logInspectorUrl, runPrompt, waitForHealth, } from "../shared/sandbox-agent-client.ts"; @@ -83,6 +84,7 @@ export async function setupDockerSandboxAgent(): Promise<{ const baseUrl = ensureUrl(`http://127.0.0.1:${hostPort}`); await waitForHealth({ baseUrl, token }); + logInspectorUrl({ baseUrl, token }); const cleanup = async () => { try { diff --git a/examples/e2b/e2b.ts b/examples/e2b/e2b.ts index f2c32e2..9d0fcce 100644 --- a/examples/e2b/e2b.ts +++ b/examples/e2b/e2b.ts @@ -2,6 +2,7 @@ import { Sandbox } from "@e2b/code-interpreter"; import { pathToFileURL } from "node:url"; import { ensureUrl, + logInspectorUrl, runPrompt, waitForHealth, } from "../shared/sandbox-agent-client.ts"; @@ -45,6 +46,7 @@ export async function setupE2BSandboxAgent(): Promise<{ const baseUrl = ensureUrl(sandbox.getHost(port)); await waitForHealth({ baseUrl, token }); + logInspectorUrl({ baseUrl, token }); const cleanup = async () => { try { diff --git a/examples/shared/sandbox-agent-client.ts b/examples/shared/sandbox-agent-client.ts index 4dd9b26..9f5423a 100644 --- a/examples/shared/sandbox-agent-client.ts +++ b/examples/shared/sandbox-agent-client.ts @@ -16,6 +16,27 @@ export function ensureUrl(rawUrl: string): string { return `https://${rawUrl}`; } +const INSPECTOR_URL = "https://inspect.sandboxagent.dev"; + +export function buildInspectorUrl({ + baseUrl, + token, +}: { + baseUrl: string; + token?: string; +}): string { + const normalized = normalizeBaseUrl(ensureUrl(baseUrl)); + const params = new URLSearchParams({ url: normalized }); + if (token) { + params.set("token", token); + } + return `${INSPECTOR_URL}?${params.toString()}`; +} + +export function logInspectorUrl({ baseUrl, token }: { baseUrl: string; token?: string }): void { + console.log(`Inspector: ${buildInspectorUrl({ baseUrl, token })}`); +} + type HeaderOptions = { token?: string; extraHeaders?: Record; diff --git a/examples/vercel/vercel-sandbox.ts b/examples/vercel/vercel-sandbox.ts index 4295cb8..5175578 100644 --- a/examples/vercel/vercel-sandbox.ts +++ b/examples/vercel/vercel-sandbox.ts @@ -2,6 +2,7 @@ import { Sandbox } from "@vercel/sandbox"; import { pathToFileURL } from "node:url"; import { ensureUrl, + logInspectorUrl, runPrompt, waitForHealth, } from "../shared/sandbox-agent-client.ts"; @@ -61,6 +62,7 @@ export async function setupVercelSandboxAgent(): Promise<{ const baseUrl = ensureUrl(sandbox.domain(port)); await waitForHealth({ baseUrl, token }); + logInspectorUrl({ baseUrl, token }); const cleanup = async () => { try { diff --git a/server/CLAUDE.md b/server/CLAUDE.md index 6de8a1e..be03de6 100644 --- a/server/CLAUDE.md +++ b/server/CLAUDE.md @@ -10,18 +10,23 @@ Place all new tests under `server/packages/**/tests/` (or a package-specific `te - Agent flow coverage in `agent-flows/` - Agent management coverage in `agent-management/` - Shared server manager coverage in `server-manager/` - - HTTP/SSE and snapshot coverage in `http/` (snapshots in `http/snapshots/`) + - HTTP endpoint snapshots in `http/` (snapshots in `http/snapshots/`) + - Session capability snapshots in `sessions/` (one file per capability, e.g. `session_lifecycle.rs`, `permissions.rs`, `questions.rs`, `reasoning.rs`, `status.rs`; snapshots in `sessions/snapshots/`) - UI coverage in `ui/` - Shared helpers in `common/` - Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/` ## Snapshot tests -The HTTP/SSE snapshot suite entrypoint lives in: -- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` (includes `tests/http/http_sse_snapshots.rs`) +HTTP endpoint snapshot entrypoint: +- `server/packages/sandbox-agent/tests/http_endpoints.rs` + +Session snapshot entrypoint: +- `server/packages/sandbox-agent/tests/sessions.rs` Snapshots are written to: -- `server/packages/sandbox-agent/tests/http/snapshots/` +- `server/packages/sandbox-agent/tests/http/snapshots/` (HTTP endpoint snapshots) +- `server/packages/sandbox-agent/tests/sessions/snapshots/` (session/capability snapshots) ## Agent selection @@ -71,6 +76,7 @@ To keep snapshots deterministic: - IDs, timestamps, native IDs - text content, tool inputs/outputs, provider-specific metadata - `source` and `synthetic` flags (these are implementation details) +- Scrub `reasoning` and `status` content from session-baseline snapshots to keep the core event skeleton consistent across agents; validate those content types separately in their capability-specific tests. - The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly. - Event streams are truncated after the first assistant or error event. - Permission flow snapshots are truncated after the permission request (or first assistant) event. @@ -81,14 +87,19 @@ To keep snapshots deterministic: ## Typical commands -Run only Claude snapshots: +Run only Claude session snapshots: ``` -SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test http_sse_snapshots +SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test sessions ``` -Run all detected agents: +Run all detected session snapshots: ``` -cargo test -p sandbox-agent --test http_sse_snapshots +cargo test -p sandbox-agent --test sessions +``` + +Run HTTP endpoint snapshots: +``` +cargo test -p sandbox-agent --test http_endpoints ``` ## Universal Schema diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index c1ab3c7..12e8be0 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -2913,6 +2913,7 @@ pub struct AgentCapabilities { pub session_lifecycle: bool, pub error_events: bool, pub reasoning: bool, + pub status: bool, pub command_execution: bool, pub file_changes: bool, pub mcp_tools: bool, @@ -3512,6 +3513,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { session_lifecycle: false, error_events: false, reasoning: false, + status: false, command_execution: false, file_changes: false, mcp_tools: false, @@ -3530,6 +3532,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { session_lifecycle: true, error_events: true, reasoning: true, + status: true, command_execution: true, file_changes: true, mcp_tools: true, @@ -3548,6 +3551,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { session_lifecycle: true, error_events: true, reasoning: false, + status: true, command_execution: false, file_changes: false, mcp_tools: false, @@ -3566,6 +3570,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { session_lifecycle: false, error_events: true, reasoning: false, + status: false, command_execution: false, file_changes: false, mcp_tools: false, @@ -3584,6 +3589,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { session_lifecycle: true, error_events: true, reasoning: true, + status: true, command_execution: true, file_changes: true, mcp_tools: true, diff --git a/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/common/http.rs similarity index 53% rename from server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs rename to server/packages/sandbox-agent/tests/common/http.rs index 75fdd9e..d719a03 100644 --- a/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +++ b/server/packages/sandbox-agent/tests/common/http.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::time::{Duration, Instant}; use axum::body::{Body, Bytes}; @@ -208,49 +208,65 @@ async fn send_message(app: &Router, session_id: &str) { assert_eq!(status, StatusCode::NO_CONTENT, "send message"); } -async fn poll_events_until( - app: &Router, - session_id: &str, - timeout: Duration, -) -> Vec { - let start = Instant::now(); - let mut offset = 0u64; - let mut events = Vec::new(); - while start.elapsed() < timeout { - let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); - let (status, payload) = send_json(app, Method::GET, &path, None).await; - assert_eq!(status, StatusCode::OK, "poll events"); - let new_events = payload - .get("events") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - if !new_events.is_empty() { - if let Some(last) = new_events - .last() - .and_then(|event| event.get("sequence")) - .and_then(Value::as_u64) - { - offset = last; - } - events.extend(new_events); - if should_stop(&events) { - break; - } - } - tokio::time::sleep(Duration::from_millis(800)).await; - } - events +async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec, u64) { + let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); + let (status, payload) = send_json(app, Method::GET, &path, None).await; + assert_eq!(status, StatusCode::OK, "poll events"); + let new_events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + let new_offset = new_events + .last() + .and_then(|event| event.get("sequence")) + .and_then(Value::as_u64) + .unwrap_or(offset); + (new_events, new_offset) } -async fn read_sse_events( +async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 { + let start = Instant::now(); + let mut offset = 0u64; + loop { + if start.elapsed() >= timeout { + break; + } + let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await; + if new_events.is_empty() { + if offset == 0 { + tokio::time::sleep(Duration::from_millis(200)).await; + continue; + } + break; + } + offset = new_offset; + } + offset +} + +async fn poll_events_until_from( app: &Router, session_id: &str, + offset: u64, + timeout: Duration, +) -> Vec { + poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await +} + +async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec { + poll_events_until_from(app, session_id, 0, timeout).await +} + +async fn read_sse_events_from( + app: &Router, + session_id: &str, + offset: u64, timeout: Duration, ) -> Vec { let request = Request::builder() .method(Method::GET) - .uri(format!("/v1/sessions/{session_id}/events/sse?offset=0")) + .uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}")) .body(Body::empty()) .expect("sse request"); let response = app @@ -291,6 +307,10 @@ async fn read_sse_events( events } +async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec { + read_sse_events_from(app, session_id, 0, timeout).await +} + async fn read_turn_stream_events( app: &Router, session_id: &str, @@ -431,7 +451,8 @@ fn normalize_events(events: &[Value]) -> Value { !events.iter().any(is_unparsed_event), "agent.unparsed event encountered" ); - let normalized = events + let scrubbed = scrub_events(events); + let normalized = scrubbed .iter() .enumerate() .map(|(idx, event)| normalize_event(event, idx + 1)) @@ -439,6 +460,71 @@ fn normalize_events(events: &[Value]) -> Value { Value::Array(normalized) } +fn scrub_events(events: &[Value]) -> Vec { + let mut scrub_ids = HashSet::new(); + let mut output = Vec::new(); + + for event in events { + let event_type = event.get("type").and_then(Value::as_str).unwrap_or(""); + match event_type { + "item.started" | "item.completed" => { + if let Some(item) = event.get("data").and_then(|data| data.get("item")) { + if should_scrub_item(item) { + record_item_ids(item, &mut scrub_ids); + continue; + } + } + output.push(event.clone()); + } + "item.delta" => { + let item_id = event + .get("data") + .and_then(|data| data.get("item_id")) + .and_then(Value::as_str); + let native_item_id = event + .get("data") + .and_then(|data| data.get("native_item_id")) + .and_then(Value::as_str); + if item_id.is_some_and(|id| scrub_ids.contains(id)) + || native_item_id.is_some_and(|id| scrub_ids.contains(id)) + { + continue; + } + output.push(event.clone()); + } + _ => output.push(event.clone()), + } + } + + output +} + +fn should_scrub_item(item: &Value) -> bool { + if item + .get("kind") + .and_then(Value::as_str) + .is_some_and(|kind| kind == "status") + { + return true; + } + + let types = item_content_types(item); + let filtered = types + .iter() + .filter(|value| value.as_str() != "reasoning" && value.as_str() != "status") + .collect::>(); + types.iter().any(|value| value == "reasoning") && filtered.is_empty() +} + +fn record_item_ids(item: &Value, ids: &mut HashSet) { + if let Some(id) = item.get("item_id").and_then(Value::as_str) { + ids.insert(id.to_string()); + } + if let Some(id) = item.get("native_item_id").and_then(Value::as_str) { + ids.insert(id.to_string()); + } +} + fn truncate_after_first_stop(events: &[Value]) -> Vec { if let Some(idx) = events .iter() @@ -455,12 +541,6 @@ fn normalize_event(event: &Value, seq: usize) -> Value { if let Some(event_type) = event.get("type").and_then(Value::as_str) { map.insert("type".to_string(), Value::String(event_type.to_string())); } - if let Some(source) = event.get("source").and_then(Value::as_str) { - map.insert("source".to_string(), Value::String(source.to_string())); - } - if let Some(synthetic) = event.get("synthetic").and_then(Value::as_bool) { - map.insert("synthetic".to_string(), Value::Bool(synthetic)); - } let data = event.get("data").unwrap_or(&Value::Null); match event.get("type").and_then(Value::as_str).unwrap_or("") { "session.started" => { @@ -523,6 +603,7 @@ fn normalize_item(item: &Value) -> Value { let types = content .iter() .filter_map(|part| part.get("type").and_then(Value::as_str)) + .filter(|value| *value != "reasoning" && *value != "status") .map(|value| Value::String(value.to_string())) .collect::>(); map.insert("content_types".to_string(), Value::Array(types)); @@ -530,6 +611,42 @@ fn normalize_item(item: &Value) -> Value { Value::Object(map) } +fn item_content_types(item: &Value) -> Vec { + item.get("content") + .and_then(Value::as_array) + .map(|content| { + content + .iter() + .filter_map(|part| part.get("type").and_then(Value::as_str)) + .map(|value| value.to_string()) + .collect::>() + }) + .unwrap_or_default() +} + +fn event_content_types(event: &Value) -> Vec { + event + .get("data") + .and_then(|data| data.get("item")) + .map(item_content_types) + .unwrap_or_default() +} + +fn event_is_status_item(event: &Value) -> bool { + event + .get("data") + .and_then(|data| data.get("item")) + .and_then(|item| item.get("kind")) + .and_then(Value::as_str) + .is_some_and(|kind| kind == "status") +} + +fn events_have_content_type(events: &[Value], content_type: &str) -> bool { + events + .iter() + .any(|event| event_content_types(event).iter().any(|t| t == content_type)) +} + fn normalize_session_end(data: &Value) -> Value { let mut map = Map::new(); if let Some(reason) = data.get("reason").and_then(Value::as_str) { @@ -717,6 +834,33 @@ fn snapshot_name(prefix: &str, agent: Option) -> String { } +async fn poll_events_until_match_from( + app: &Router, + session_id: &str, + offset: u64, + timeout: Duration, + stop: F, +) -> Vec +where + F: Fn(&[Value]) -> bool, +{ + let start = Instant::now(); + let mut offset = offset; + let mut events = Vec::new(); + while start.elapsed() < timeout { + let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await; + if !new_events.is_empty() { + offset = new_offset; + events.extend(new_events); + if stop(&events) { + break; + } + } + tokio::time::sleep(Duration::from_millis(800)).await; + } + events +} + async fn poll_events_until_match( app: &Router, session_id: &str, @@ -726,34 +870,7 @@ async fn poll_events_until_match( where F: Fn(&[Value]) -> bool, { - let start = Instant::now(); - let mut offset = 0u64; - let mut events = Vec::new(); - while start.elapsed() < timeout { - let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); - let (status, payload) = send_json(app, Method::GET, &path, None).await; - assert_eq!(status, StatusCode::OK, "poll events"); - let new_events = payload - .get("events") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - if !new_events.is_empty() { - if let Some(last) = new_events - .last() - .and_then(|event| event.get("sequence")) - .and_then(Value::as_u64) - { - offset = last; - } - events.extend(new_events); - if stop(&events) { - break; - } - } - tokio::time::sleep(Duration::from_millis(800)).await; - } - events + poll_events_until_match_from(app, session_id, 0, timeout, stop).await } fn find_permission_id(events: &[Value]) -> Option { @@ -800,9 +917,10 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) { let session_id = format!("session-{}", config.agent.as_str()); create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; + let offset = drain_events(app, &session_id, Duration::from_secs(6)).await; send_message(app, &session_id).await; - let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await; + let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await; let events = truncate_after_first_stop(&events); assert!( !events.is_empty(), @@ -816,7 +934,8 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) { ); let normalized = normalize_events(&events); insta::with_settings!({ - snapshot_suffix => snapshot_name("http_events", Some(config.agent)), + snapshot_suffix => snapshot_name("http_events", Some(AgentId::Mock)), + snapshot_path => "../sessions/snapshots", }, { insta::assert_yaml_snapshot!(normalized); }); @@ -828,12 +947,14 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) { let session_id = format!("sse-{}", config.agent.as_str()); create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; + let offset = drain_events(app, &session_id, Duration::from_secs(6)).await; let sse_task = { let app = app.clone(); let session_id = session_id.clone(); + let offset = offset; tokio::spawn(async move { - read_sse_events(&app, &session_id, Duration::from_secs(120)).await + read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await }) }; @@ -853,7 +974,8 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) { ); let normalized = normalize_events(&events); insta::with_settings!({ - snapshot_suffix => snapshot_name("sse_events", Some(config.agent)), + snapshot_suffix => snapshot_name("sse_events", Some(AgentId::Mock)), + snapshot_path => "../sessions/snapshots", }, { insta::assert_yaml_snapshot!(normalized); }); @@ -879,535 +1001,3 @@ async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) { config.agent ); } - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn auth_snapshots() { - let token = "test-token"; - let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string())); - - let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await; - assert_eq!(status, StatusCode::OK, "health should be public"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("auth_health_public", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": normalize_health(&payload), - })); - }); - - let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await; - assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("auth_missing_token", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - - let request = Request::builder() - .method(Method::GET) - .uri("/v1/agents") - .header(header::AUTHORIZATION, "Bearer wrong-token") - .body(Body::empty()) - .expect("auth invalid request"); - let (status, _headers, payload) = send_json_request(&app.app, request).await; - assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("auth_invalid_token", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - - let request = Request::builder() - .method(Method::GET) - .uri("/v1/agents") - .header(header::AUTHORIZATION, format!("Bearer {token}")) - .body(Body::empty()) - .expect("auth valid request"); - let (status, _headers, payload) = send_json_request(&app.app, request).await; - assert_eq!(status, StatusCode::OK, "valid token should allow request"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("auth_valid_token", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": normalize_agent_list(&payload), - })); - }); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn cors_snapshots() { - let cors = CorsLayer::new() - .allow_origin(vec![HeaderValue::from_static("http://example.com")]) - .allow_methods([Method::GET, Method::POST]) - .allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION]) - .allow_credentials(true); - let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors)); - - let preflight = Request::builder() - .method(Method::OPTIONS) - .uri("/v1/health") - .header(header::ORIGIN, "http://example.com") - .header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET") - .header( - header::ACCESS_CONTROL_REQUEST_HEADERS, - "authorization,content-type", - ) - .body(Body::empty()) - .expect("cors preflight request"); - let (status, headers, _payload) = send_request(&app.app, preflight).await; - insta::with_settings!({ - snapshot_suffix => snapshot_name("cors_preflight", None), - }, { - insta::assert_yaml_snapshot!(snapshot_cors(status, &headers)); - }); - - let actual = Request::builder() - .method(Method::GET) - .uri("/v1/health") - .header(header::ORIGIN, "http://example.com") - .body(Body::empty()) - .expect("cors actual request"); - let (status, headers, payload) = send_json_request(&app.app, actual).await; - assert_eq!(status, StatusCode::OK, "cors actual request should succeed"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("cors_actual", None), - }, { - insta::assert_yaml_snapshot!(json!({ - "cors": snapshot_cors(status, &headers), - "payload": normalize_health(&payload), - })); - }); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn api_endpoints_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - - let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await; - assert_eq!(status, StatusCode::OK, "health status"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("health", None), - }, { - insta::assert_yaml_snapshot!(normalize_health(&health)); - }); - - // List agents (just verify the API returns correct agent IDs, not install state) - let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await; - assert_eq!(status, StatusCode::OK, "agents list"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("agents_list", None), - }, { - insta::assert_yaml_snapshot!(normalize_agent_list(&agents)); - }); - - // Install agents (ensure they're available for subsequent tests) - for config in &configs { - let _guard = apply_credentials(&config.credentials); - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/agents/{}/install", config.agent.as_str()), - Some(json!({})), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "install agent"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("agent_install", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } - - let mut session_ids = Vec::new(); - for config in &configs { - let _guard = apply_credentials(&config.credentials); - let (status, modes) = send_json( - &app.app, - Method::GET, - &format!("/v1/agents/{}/modes", config.agent.as_str()), - None, - ) - .await; - assert_eq!(status, StatusCode::OK, "agent modes"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_agent_modes(&modes)); - }); - - let session_id = format!("snapshot-{}", config.agent.as_str()); - let permission_mode = test_permission_mode(config.agent); - let (status, created) = send_json( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_id}"), - Some(json!({ - "agent": config.agent.as_str(), - "permissionMode": permission_mode - })), - ) - .await; - assert_eq!(status, StatusCode::OK, "create session"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("create_session", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_create_session(&created)); - }); - session_ids.push((config.agent, session_id)); - } - - let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await; - assert_eq!(status, StatusCode::OK, "list sessions"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("sessions_list", None), - }, { - insta::assert_yaml_snapshot!(normalize_sessions(&sessions)); - }); - - for (agent, session_id) in &session_ids { - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_id}/messages"), - Some(json!({ "message": PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send message"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("send_message", Some(*agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn approval_flow_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - let capabilities = fetch_capabilities(&app.app).await; - - for config in &configs { - // OpenCode doesn't support "plan" permission mode required for approval flows - if config.agent == AgentId::Opencode { - continue; - } - let caps = capabilities - .get(config.agent.as_str()) - .expect("capabilities missing"); - - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - if caps.plan_mode && caps.permissions { - let permission_session = format!("perm-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &permission_session, "plan").await; - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{permission_session}/messages"), - Some(json!({ "message": PERMISSION_PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt"); - - let permission_events = poll_events_until_match( - &app.app, - &permission_session, - Duration::from_secs(120), - |events| find_permission_id(events).is_some() || should_stop(events), - ) - .await; - let permission_events = truncate_permission_events(&permission_events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("permission_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_events(&permission_events)); - }); - - if let Some(permission_id) = find_permission_id(&permission_events) { - let status = send_status( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{permission_session}/permissions/{permission_id}/reply" - ), - Some(json!({ "reply": "once" })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "reply permission"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } else { - let (status, payload) = send_json( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{permission_session}/permissions/missing-permission/reply" - ), - Some(json!({ "reply": "once" })), - ) - .await; - assert!(!status.is_success(), "missing permission id should error"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - } - } - - if caps.questions { - let question_reply_session = format!("question-reply-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &question_reply_session, "plan").await; - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{question_reply_session}/messages"), - Some(json!({ "message": QUESTION_PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt"); - - let question_events = poll_events_until_match( - &app.app, - &question_reply_session, - Duration::from_secs(120), - |events| find_question_id_and_answers(events).is_some() || should_stop(events), - ) - .await; - let question_events = truncate_question_events(&question_events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_events(&question_events)); - }); - - if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) { - let status = send_status( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reply_session}/questions/{question_id}/reply" - ), - Some(json!({ "answers": answers })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "reply question"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reply", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } else { - let (status, payload) = send_json( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reply_session}/questions/missing-question/reply" - ), - Some(json!({ "answers": [] })), - ) - .await; - assert!(!status.is_success(), "missing question id should error"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - } - - let question_reject_session = format!("question-reject-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &question_reject_session, "plan").await; - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{question_reject_session}/messages"), - Some(json!({ "message": QUESTION_PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject"); - - let reject_events = poll_events_until_match( - &app.app, - &question_reject_session, - Duration::from_secs(120), - |events| find_question_id_and_answers(events).is_some() || should_stop(events), - ) - .await; - let reject_events = truncate_question_events(&reject_events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_events(&reject_events)); - }); - - if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) { - let status = send_status( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reject_session}/questions/{question_id}/reject" - ), - None, - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "reject question"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reject", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } else { - let (status, payload) = send_json( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reject_session}/questions/missing-question/reject" - ), - None, - ) - .await; - assert!(!status.is_success(), "missing question id reject should error"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); - }); - } - } - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn http_events_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - for config in &configs { - // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. - // See: https://github.com/opencode-ai/opencode/issues/XXX - if config.agent == AgentId::Opencode { - continue; - } - run_http_events_snapshot(&app.app, config).await; - } -} - -async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) { - let _guard = apply_credentials(&config.credentials); - install_agent(app, config.agent).await; - - let session_a = format!("concurrent-a-{}", config.agent.as_str()); - let session_b = format!("concurrent-b-{}", config.agent.as_str()); - let perm_mode = test_permission_mode(config.agent); - create_session(app, config.agent, &session_a, perm_mode).await; - create_session(app, config.agent, &session_b, perm_mode).await; - - let app_a = app.clone(); - let app_b = app.clone(); - let send_a = send_message(&app_a, &session_a); - let send_b = send_message(&app_b, &session_b); - tokio::join!(send_a, send_b); - - let app_a = app.clone(); - let app_b = app.clone(); - let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120)); - let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120)); - let (events_a, events_b) = tokio::join!(poll_a, poll_b); - let events_a = truncate_after_first_stop(&events_a); - let events_b = truncate_after_first_stop(&events_b); - - assert!( - !events_a.is_empty(), - "no events collected for concurrent session a {}", - config.agent - ); - assert!( - !events_b.is_empty(), - "no events collected for concurrent session b {}", - config.agent - ); - assert!( - should_stop(&events_a), - "timed out waiting for assistant/error event for concurrent session a {}", - config.agent - ); - assert!( - should_stop(&events_b), - "timed out waiting for assistant/error event for concurrent session b {}", - config.agent - ); - - let snapshot = json!({ - "session_a": normalize_events(&events_a), - "session_b": normalize_events(&events_b), - }); - insta::with_settings!({ - snapshot_suffix => snapshot_name("concurrency_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot); - }); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn sse_events_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - for config in &configs { - // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. - // See: https://github.com/opencode-ai/opencode/issues/XXX - if config.agent == AgentId::Opencode { - continue; - } - run_sse_events_snapshot(&app.app, config).await; - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn turn_stream_route() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - for config in &configs { - // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. - // See: https://github.com/opencode-ai/opencode/issues/XXX - if config.agent == AgentId::Opencode { - continue; - } - run_turn_stream_check(&app.app, config).await; - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn concurrency_snapshots() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - for config in &configs { - // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. - // See: https://github.com/opencode-ai/opencode/issues/XXX - if config.agent == AgentId::Opencode { - continue; - } - run_concurrency_snapshot(&app.app, config).await; - } -} diff --git a/server/packages/sandbox-agent/tests/http/agent_endpoints.rs b/server/packages/sandbox-agent/tests/http/agent_endpoints.rs new file mode 100644 index 0000000..f195205 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http/agent_endpoints.rs @@ -0,0 +1,165 @@ +// Agent-specific HTTP endpoints live here; session-related snapshots are in tests/sessions/. +include!("../common/http.rs"); + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auth_snapshots() { + let token = "test-token"; + let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string())); + + let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await; + assert_eq!(status, StatusCode::OK, "health should be public"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("auth_health_public", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": normalize_health(&payload), + })); + }); + + let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await; + assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("auth_missing_token", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + + let request = Request::builder() + .method(Method::GET) + .uri("/v1/agents") + .header(header::AUTHORIZATION, "Bearer wrong-token") + .body(Body::empty()) + .expect("auth invalid request"); + let (status, _headers, payload) = send_json_request(&app.app, request).await; + assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("auth_invalid_token", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + + let request = Request::builder() + .method(Method::GET) + .uri("/v1/agents") + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .body(Body::empty()) + .expect("auth valid request"); + let (status, _headers, payload) = send_json_request(&app.app, request).await; + assert_eq!(status, StatusCode::OK, "valid token should succeed"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("auth_valid_token", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": normalize_agent_list(&payload), + })); + }); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn cors_snapshots() { + let cors = CorsLayer::new() + .allow_origin("http://example.com".parse::().unwrap()) + .allow_methods([Method::GET, Method::POST]) + .allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION]); + let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors)); + + let preflight = Request::builder() + .method(Method::OPTIONS) + .uri("/v1/agents") + .header(header::ORIGIN, "http://example.com") + .header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET") + .header( + header::ACCESS_CONTROL_REQUEST_HEADERS, + "authorization,content-type", + ) + .body(Body::empty()) + .expect("cors preflight request"); + let (status, headers, _payload) = send_request(&app.app, preflight).await; + insta::with_settings!({ + snapshot_suffix => snapshot_name("cors_preflight", None), + }, { + insta::assert_yaml_snapshot!(snapshot_cors(status, &headers)); + }); + + let actual = Request::builder() + .method(Method::GET) + .uri("/v1/health") + .header(header::ORIGIN, "http://example.com") + .body(Body::empty()) + .expect("cors actual request"); + let (status, headers, payload) = send_json_request(&app.app, actual).await; + assert_eq!(status, StatusCode::OK, "cors actual request should succeed"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("cors_actual", None), + }, { + insta::assert_yaml_snapshot!(json!({ + "cors": snapshot_cors(status, &headers), + "payload": normalize_health(&payload), + })); + }); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn agent_endpoints_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + + let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await; + assert_eq!(status, StatusCode::OK, "health status"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("health", None), + }, { + insta::assert_yaml_snapshot!(normalize_health(&health)); + }); + + // List agents (verify IDs only; install state is environment-dependent). + let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await; + assert_eq!(status, StatusCode::OK, "agents list"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("agents_list", None), + }, { + insta::assert_yaml_snapshot!(normalize_agent_list(&agents)); + }); + + for config in &configs { + let _guard = apply_credentials(&config.credentials); + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/agents/{}/install", config.agent.as_str()), + Some(json!({})), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "install agent"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("agent_install", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(snapshot_status(status)); + }); + } + + for config in &configs { + let _guard = apply_credentials(&config.credentials); + let (status, modes) = send_json( + &app.app, + Method::GET, + &format!("/v1/agents/{}/modes", config.agent.as_str()), + None, + ) + .await; + assert_eq!(status, StatusCode::OK, "agent modes"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(normalize_agent_modes(&modes)); + }); + } +} diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_claude.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_mock.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_mock.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_opencode.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_install_opencode.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_install_opencode.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_claude.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_claude.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_claude.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_codex.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_codex.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_codex.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_mock.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_mock.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_opencode.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agent_modes_opencode.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agent_modes_opencode.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agents_list_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@agents_list_global.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@health_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@health_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__agent_endpoints_snapshots@health_global.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_health_public_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_health_public_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_health_public_global.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_invalid_token_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_invalid_token_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_invalid_token_global.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_missing_token_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_missing_token_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_missing_token_global.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_valid_token_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__auth_snapshots@auth_valid_token_global.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__cors_snapshots@cors_actual_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_actual_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__cors_snapshots@cors_actual_global.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__cors_snapshots@cors_preflight_global.snap similarity index 100% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__cors_snapshots@cors_preflight_global.snap rename to server/packages/sandbox-agent/tests/http/snapshots/agent_endpoints__cors_snapshots@cors_preflight_global.snap diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap deleted file mode 100644 index c9e259a..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_claude.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 918 -expression: normalize_create_session(&created) ---- -healthy: true diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap deleted file mode 100644 index 25be48c..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -expression: normalize_create_session(&created) ---- -healthy: true -nativeSessionId: "" diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap deleted file mode 100644 index f0bd98a..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_mock.snap +++ /dev/null @@ -1,7 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1053 -expression: normalize_create_session(&created) ---- -healthy: true -nativeSessionId: "" diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap deleted file mode 100644 index 31bd8a7..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_opencode.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -expression: normalize_create_session(&created) ---- -agentSessionId: "" -healthy: true diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap deleted file mode 100644 index 636137e..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_claude.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 943 -expression: snapshot_status(status) ---- -status: 204 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap deleted file mode 100644 index af9bc66..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_codex.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 959 -expression: snapshot_status(status) ---- -status: 204 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap deleted file mode 100644 index 0ce7ff9..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_mock.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1078 -expression: snapshot_status(status) ---- -status: 204 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap deleted file mode 100644 index 74ac4c1..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@send_message_opencode.snap +++ /dev/null @@ -1,5 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -expression: snapshot_status(status) ---- -status: 204 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap deleted file mode 100644 index 5a87a3e..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__api_endpoints_snapshots@sessions_list_global.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -expression: normalize_sessions(&sessions) ---- -hasExpectedFields: true -sessionCount: 1 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap deleted file mode 100644 index 1b31317..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap +++ /dev/null @@ -1,17 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1119 -expression: normalize_events(&permission_events) ---- -- metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap deleted file mode 100644 index 46f9648..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap +++ /dev/null @@ -1,131 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -expression: normalize_events(&permission_events) ---- -- metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started -- item: - content_types: - - status - kind: status - role: system - status: completed - seq: 3 - source: agent - synthetic: false - type: item.completed -- item: - content_types: - - text - kind: message - role: user - status: in_progress - seq: 4 - source: agent - synthetic: false - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 5 - source: daemon - synthetic: true - type: item.delta -- item: - content_types: - - text - kind: message - role: user - status: completed - seq: 6 - source: agent - synthetic: false - type: item.completed -- item: - content_types: [] - kind: message - role: assistant - status: in_progress - seq: 7 - source: agent - synthetic: false - type: item.started -- item: - content_types: - - status - kind: status - role: system - status: completed - seq: 8 - source: agent - synthetic: false - type: item.completed -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 9 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 10 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 11 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 12 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 13 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 14 - source: agent - synthetic: false - type: item.delta -- item: - content_types: - - reasoning - kind: message - role: assistant - status: completed - seq: 15 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap deleted file mode 100644 index b27511c..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_mock.snap +++ /dev/null @@ -1,35 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1112 -expression: normalize_events(&permission_events) ---- -- metadata: true - seq: 1 - session: started - type: session.started -- metadata: true - seq: 2 - session: started - type: session.started -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 3 - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 4 - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 5 - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap deleted file mode 100644 index fafb7c8..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_codex.snap +++ /dev/null @@ -1,11 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1017 -expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" ---- -payload: - detail: "invalid request: unknown permission id: missing-permission" - status: 400 - title: Invalid Request - type: "urn:sandbox-agent:error:invalid_request" -status: 400 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap deleted file mode 100644 index de6549e..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_mock.snap +++ /dev/null @@ -1,11 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1152 -expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" ---- -payload: - detail: "invalid request: unknown permission id: missing-permission" - status: 400 - title: Invalid Request - type: "urn:sandbox-agent:error:invalid_request" -status: 400 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap deleted file mode 100644 index 8cb0493..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1151 -expression: normalize_events(&reject_events) ---- -- metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 3 - source: daemon - synthetic: true - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 4 - source: daemon - synthetic: true - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 5 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap deleted file mode 100644 index 53eb2e3..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap +++ /dev/null @@ -1,331 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -expression: normalize_events(&reject_events) ---- -- metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started -- item: - content_types: - - status - kind: status - role: system - status: completed - seq: 3 - source: agent - synthetic: false - type: item.completed -- item: - content_types: - - text - kind: message - role: user - status: in_progress - seq: 4 - source: agent - synthetic: false - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 5 - source: daemon - synthetic: true - type: item.delta -- item: - content_types: - - text - kind: message - role: user - status: completed - seq: 6 - source: agent - synthetic: false - type: item.completed -- item: - content_types: [] - kind: message - role: assistant - status: in_progress - seq: 7 - source: agent - synthetic: false - type: item.started -- item: - content_types: - - status - kind: status - role: system - status: completed - seq: 8 - source: agent - synthetic: false - type: item.completed -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 9 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 10 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 11 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 12 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 13 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 14 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 15 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 16 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 17 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 18 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 19 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 20 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 21 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 22 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 23 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 24 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 25 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 26 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 27 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 28 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 29 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 30 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 31 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 32 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 33 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 34 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 35 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 36 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 37 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 38 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 39 - source: agent - synthetic: false - type: item.delta -- item: - content_types: - - reasoning - kind: message - role: assistant - status: completed - seq: 40 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap deleted file mode 100644 index 84dd20e..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_mock.snap +++ /dev/null @@ -1,35 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1236 -expression: normalize_events(&reject_events) ---- -- metadata: true - seq: 1 - session: started - type: session.started -- metadata: true - seq: 2 - session: started - type: session.started -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 3 - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 4 - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 5 - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap deleted file mode 100644 index df61c32..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_claude.snap +++ /dev/null @@ -1,11 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1151 -expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" ---- -payload: - detail: "invalid request: unknown question id: missing-question" - status: 400 - title: Invalid Request - type: "urn:sandbox-agent:error:invalid_request" -status: 400 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap deleted file mode 100644 index 6c6dbae..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_codex.snap +++ /dev/null @@ -1,11 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1139 -expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" ---- -payload: - detail: "invalid request: unknown question id: missing-question" - status: 400 - title: Invalid Request - type: "urn:sandbox-agent:error:invalid_request" -status: 400 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap deleted file mode 100644 index 075fe0f..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_missing_mock.snap +++ /dev/null @@ -1,11 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1276 -expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" ---- -payload: - detail: "invalid request: unknown question id: missing-question" - status: 400 - title: Invalid Request - type: "urn:sandbox-agent:error:invalid_request" -status: 400 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap deleted file mode 100644 index 90cd95f..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1109 -expression: normalize_events(&question_events) ---- -- metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 3 - source: daemon - synthetic: true - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 4 - source: daemon - synthetic: true - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 5 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap deleted file mode 100644 index a1b3098..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap +++ /dev/null @@ -1,315 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -expression: normalize_events(&question_events) ---- -- metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started -- item: - content_types: - - status - kind: status - role: system - status: completed - seq: 3 - source: agent - synthetic: false - type: item.completed -- item: - content_types: - - text - kind: message - role: user - status: in_progress - seq: 4 - source: agent - synthetic: false - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 5 - source: daemon - synthetic: true - type: item.delta -- item: - content_types: - - text - kind: message - role: user - status: completed - seq: 6 - source: agent - synthetic: false - type: item.completed -- item: - content_types: [] - kind: message - role: assistant - status: in_progress - seq: 7 - source: agent - synthetic: false - type: item.started -- item: - content_types: - - status - kind: status - role: system - status: completed - seq: 8 - source: agent - synthetic: false - type: item.completed -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 9 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 10 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 11 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 12 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 13 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 14 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 15 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 16 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 17 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 18 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 19 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 20 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 21 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 22 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 23 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 24 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 25 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 26 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 27 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 28 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 29 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 30 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 31 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 32 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 33 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 34 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 35 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 36 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 37 - source: agent - synthetic: false - type: item.delta -- item: - content_types: - - reasoning - kind: message - role: assistant - status: completed - seq: 38 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap deleted file mode 100644 index e525eb2..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_mock.snap +++ /dev/null @@ -1,35 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1174 -expression: normalize_events(&question_events) ---- -- metadata: true - seq: 1 - session: started - type: session.started -- metadata: true - seq: 2 - session: started - type: session.started -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 3 - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 4 - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 5 - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap deleted file mode 100644 index 2c65fd0..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_mock.snap +++ /dev/null @@ -1,11 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1214 -expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" ---- -payload: - detail: "invalid request: unknown question id: missing-question" - status: 400 - title: Invalid Request - type: "urn:sandbox-agent:error:invalid_request" -status: 400 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap deleted file mode 100644 index b0ecfb4..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap +++ /dev/null @@ -1,201 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -expression: snapshot ---- -session_a: - - metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started - - metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started - - item: - content_types: - - status - kind: status - role: system - status: completed - seq: 3 - source: agent - synthetic: false - type: item.completed - - item: - content_types: - - text - kind: message - role: user - status: in_progress - seq: 4 - source: agent - synthetic: false - type: item.started - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 5 - source: daemon - synthetic: true - type: item.delta - - item: - content_types: - - text - kind: message - role: user - status: completed - seq: 6 - source: agent - synthetic: false - type: item.completed - - item: - content_types: [] - kind: message - role: assistant - status: in_progress - seq: 7 - source: agent - synthetic: false - type: item.started - - item: - content_types: [] - kind: message - role: assistant - status: completed - seq: 8 - source: agent - synthetic: false - type: item.completed -session_b: - - metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started - - metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started - - item: - content_types: - - status - kind: status - role: system - status: completed - seq: 3 - source: agent - synthetic: false - type: item.completed - - item: - content_types: - - text - kind: message - role: user - status: in_progress - seq: 4 - source: agent - synthetic: false - type: item.started - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 5 - source: daemon - synthetic: true - type: item.delta - - item: - content_types: - - text - kind: message - role: user - status: completed - seq: 6 - source: agent - synthetic: false - type: item.completed - - item: - content_types: [] - kind: message - role: assistant - status: in_progress - seq: 7 - source: agent - synthetic: false - type: item.started - - item: - content_types: - - status - kind: status - role: system - status: completed - seq: 8 - source: agent - synthetic: false - type: item.completed - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 9 - source: agent - synthetic: false - type: item.delta - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 10 - source: agent - synthetic: false - type: item.delta - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 11 - source: agent - synthetic: false - type: item.delta - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 12 - source: agent - synthetic: false - type: item.delta - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 13 - source: agent - synthetic: false - type: item.delta - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 14 - source: agent - synthetic: false - type: item.delta - - item: - content_types: - - reasoning - kind: message - role: assistant - status: completed - seq: 15 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap deleted file mode 100644 index f9abaa0..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_mock.snap +++ /dev/null @@ -1,67 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1344 -expression: snapshot ---- -session_a: - - metadata: true - seq: 1 - session: started - type: session.started - - metadata: true - seq: 2 - session: started - type: session.started - - item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 3 - type: item.started - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 4 - type: item.delta - - item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 5 - type: item.completed -session_b: - - metadata: true - seq: 1 - session: started - type: session.started - - metadata: true - seq: 2 - session: started - type: session.started - - item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 3 - type: item.started - - delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 4 - type: item.delta - - item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 5 - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap deleted file mode 100644 index 46d5eb1..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap +++ /dev/null @@ -1,171 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -expression: normalized ---- -- metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started -- item: - content_types: - - status - kind: status - role: system - status: completed - seq: 3 - source: agent - synthetic: false - type: item.completed -- item: - content_types: - - text - kind: message - role: user - status: in_progress - seq: 4 - source: agent - synthetic: false - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 5 - source: daemon - synthetic: true - type: item.delta -- item: - content_types: - - text - kind: message - role: user - status: completed - seq: 6 - source: agent - synthetic: false - type: item.completed -- item: - content_types: [] - kind: message - role: assistant - status: in_progress - seq: 7 - source: agent - synthetic: false - type: item.started -- item: - content_types: - - status - kind: status - role: system - status: completed - seq: 8 - source: agent - synthetic: false - type: item.completed -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 9 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 10 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 11 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 12 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 13 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 14 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 15 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 16 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 17 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 18 - source: agent - synthetic: false - type: item.delta -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 19 - source: agent - synthetic: false - type: item.delta -- item: - content_types: - - reasoning - kind: message - role: assistant - status: completed - seq: 20 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap deleted file mode 100644 index 48235e5..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap +++ /dev/null @@ -1,45 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 848 -expression: normalized ---- -- metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 3 - source: agent - synthetic: false - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 4 - source: agent - synthetic: false - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 5 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap deleted file mode 100644 index dc82798..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap +++ /dev/null @@ -1,73 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 848 -expression: normalized ---- -- metadata: true - seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true - seq: 2 - session: started - source: agent - synthetic: false - type: session.started -- item: - content_types: - - status - kind: status - role: system - status: completed - seq: 3 - source: agent - synthetic: false - type: item.completed -- item: - content_types: - - text - kind: message - role: user - status: in_progress - seq: 4 - source: agent - synthetic: false - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 5 - source: daemon - synthetic: true - type: item.delta -- item: - content_types: - - text - kind: message - role: user - status: completed - seq: 6 - source: agent - synthetic: false - type: item.completed -- item: - content_types: [] - kind: message - role: assistant - status: in_progress - seq: 7 - source: agent - synthetic: false - type: item.started -- item: - content_types: [] - kind: message - role: assistant - status: completed - seq: 8 - source: agent - synthetic: false - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap b/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap deleted file mode 100644 index add0b00..0000000 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_mock.snap +++ /dev/null @@ -1,35 +0,0 @@ ---- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 841 -expression: normalized ---- -- metadata: true - seq: 1 - session: started - type: session.started -- metadata: true - seq: 2 - session: started - type: session.started -- item: - content_types: - - text - kind: message - role: assistant - status: in_progress - seq: 3 - type: item.started -- delta: - delta: "" - item_id: "" - native_item_id: "" - seq: 4 - type: item.delta -- item: - content_types: - - text - kind: message - role: assistant - status: completed - seq: 5 - type: item.completed diff --git a/server/packages/sandbox-agent/tests/http_endpoints.rs b/server/packages/sandbox-agent/tests/http_endpoints.rs new file mode 100644 index 0000000..a443a95 --- /dev/null +++ b/server/packages/sandbox-agent/tests/http_endpoints.rs @@ -0,0 +1,2 @@ +#[path = "http/agent_endpoints.rs"] +mod agent_endpoints; diff --git a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs deleted file mode 100644 index b0ce134..0000000 --- a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs +++ /dev/null @@ -1 +0,0 @@ -include!("http/http_sse_snapshots.rs"); diff --git a/server/packages/sandbox-agent/tests/sessions.rs b/server/packages/sandbox-agent/tests/sessions.rs new file mode 100644 index 0000000..6abcb74 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions.rs @@ -0,0 +1,2 @@ +#[path = "sessions/mod.rs"] +mod sessions; diff --git a/server/packages/sandbox-agent/tests/sessions/mod.rs b/server/packages/sandbox-agent/tests/sessions/mod.rs new file mode 100644 index 0000000..6bc2a16 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/mod.rs @@ -0,0 +1,5 @@ +mod session_lifecycle; +mod permissions; +mod questions; +mod reasoning; +mod status; diff --git a/server/packages/sandbox-agent/tests/sessions/permissions.rs b/server/packages/sandbox-agent/tests/sessions/permissions.rs new file mode 100644 index 0000000..34dcc07 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/permissions.rs @@ -0,0 +1,88 @@ +// Permission flow snapshots compare every agent to the mock baseline. +include!("../common/http.rs"); + +fn session_snapshot_suffix(prefix: &str) -> String { + snapshot_name(prefix, Some(AgentId::Mock)) +} + +fn assert_session_snapshot(prefix: &str, value: Value) { + insta::with_settings!({ + snapshot_suffix => session_snapshot_suffix(prefix), + }, { + insta::assert_yaml_snapshot!(value); + }); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn permission_flow_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + + for config in &configs { + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !(caps.plan_mode && caps.permissions) { + continue; + } + + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let permission_session = format!("perm-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &permission_session, "plan").await; + let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await; + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{permission_session}/messages"), + Some(json!({ "message": PERMISSION_PROMPT })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt"); + + let permission_events = poll_events_until_match_from( + &app.app, + &permission_session, + offset, + Duration::from_secs(120), + |events| find_permission_id(events).is_some() || should_stop(events), + ) + .await; + let permission_events = truncate_permission_events(&permission_events); + assert_session_snapshot("permission_events", normalize_events(&permission_events)); + + if let Some(permission_id) = find_permission_id(&permission_events) { + let status = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{permission_session}/permissions/{permission_id}/reply" + ), + Some(json!({ "reply": "once" })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "reply permission"); + assert_session_snapshot("permission_reply", snapshot_status(status)); + } else { + let (status, payload) = send_json( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{permission_session}/permissions/missing-permission/reply" + ), + Some(json!({ "reply": "once" })), + ) + .await; + assert!(!status.is_success(), "missing permission id should error"); + assert_session_snapshot( + "permission_reply_missing", + json!({ + "status": status.as_u16(), + "payload": payload, + }), + ); + } + } +} diff --git a/server/packages/sandbox-agent/tests/sessions/questions.rs b/server/packages/sandbox-agent/tests/sessions/questions.rs new file mode 100644 index 0000000..9f5b55e --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/questions.rs @@ -0,0 +1,145 @@ +// Question flow snapshots compare every agent to the mock baseline. +include!("../common/http.rs"); + +fn session_snapshot_suffix(prefix: &str) -> String { + snapshot_name(prefix, Some(AgentId::Mock)) +} + +fn assert_session_snapshot(prefix: &str, value: Value) { + insta::with_settings!({ + snapshot_suffix => session_snapshot_suffix(prefix), + }, { + insta::assert_yaml_snapshot!(value); + }); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn question_flow_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + + for config in &configs { + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.questions { + continue; + } + + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let question_reply_session = format!("question-reply-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &question_reply_session, "plan").await; + let reply_offset = + drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await; + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{question_reply_session}/messages"), + Some(json!({ "message": QUESTION_PROMPT })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt"); + + let question_events = poll_events_until_match_from( + &app.app, + &question_reply_session, + reply_offset, + Duration::from_secs(120), + |events| find_question_id_and_answers(events).is_some() || should_stop(events), + ) + .await; + let question_events = truncate_question_events(&question_events); + assert_session_snapshot("question_reply_events", normalize_events(&question_events)); + + if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) { + let status = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reply_session}/questions/{question_id}/reply" + ), + Some(json!({ "answers": answers })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "reply question"); + assert_session_snapshot("question_reply", snapshot_status(status)); + } else { + let (status, payload) = send_json( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reply_session}/questions/missing-question/reply" + ), + Some(json!({ "answers": [] })), + ) + .await; + assert!(!status.is_success(), "missing question id should error"); + assert_session_snapshot( + "question_reply_missing", + json!({ + "status": status.as_u16(), + "payload": payload, + }), + ); + } + + let question_reject_session = format!("question-reject-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &question_reject_session, "plan").await; + let reject_offset = + drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await; + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{question_reject_session}/messages"), + Some(json!({ "message": QUESTION_PROMPT })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject"); + + let reject_events = poll_events_until_match_from( + &app.app, + &question_reject_session, + reject_offset, + Duration::from_secs(120), + |events| find_question_id_and_answers(events).is_some() || should_stop(events), + ) + .await; + let reject_events = truncate_question_events(&reject_events); + assert_session_snapshot("question_reject_events", normalize_events(&reject_events)); + + if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) { + let status = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reject_session}/questions/{question_id}/reject" + ), + None, + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "reject question"); + assert_session_snapshot("question_reject", snapshot_status(status)); + } else { + let (status, payload) = send_json( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reject_session}/questions/missing-question/reject" + ), + None, + ) + .await; + assert!(!status.is_success(), "missing question id reject should error"); + assert_session_snapshot( + "question_reject_missing", + json!({ + "status": status.as_u16(), + "payload": payload, + }), + ); + } + } +} diff --git a/server/packages/sandbox-agent/tests/sessions/reasoning.rs b/server/packages/sandbox-agent/tests/sessions/reasoning.rs new file mode 100644 index 0000000..6994d06 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/reasoning.rs @@ -0,0 +1,56 @@ +// Reasoning capability checks are isolated from baseline snapshots. +include!("../common/http.rs"); + +fn reasoning_prompt(agent: AgentId) -> &'static str { + if agent == AgentId::Mock { + "demo" + } else { + "Answer briefly and include your reasoning." + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn reasoning_events_present() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + + for config in &configs { + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.reasoning { + continue; + } + + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let session_id = format!("reasoning-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent)) + .await; + let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await; + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}/messages"), + Some(json!({ "message": reasoning_prompt(config.agent) })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt"); + + let events = poll_events_until_match_from( + &app.app, + &session_id, + offset, + Duration::from_secs(120), + |events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event), + ) + .await; + assert!( + events_have_content_type(&events, "reasoning"), + "expected reasoning content for {}", + config.agent + ); + } +} diff --git a/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs b/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs new file mode 100644 index 0000000..ed14e76 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs @@ -0,0 +1,192 @@ +// Session lifecycle and streaming snapshots use the mock baseline as the single source of truth. +include!("../common/http.rs"); + +fn session_snapshot_suffix(prefix: &str) -> String { + snapshot_name(prefix, Some(AgentId::Mock)) +} + +fn assert_session_snapshot(prefix: &str, value: Value) { + insta::with_settings!({ + snapshot_suffix => session_snapshot_suffix(prefix), + }, { + insta::assert_yaml_snapshot!(value); + }); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn session_endpoints_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + + for config in &configs { + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.session_lifecycle { + continue; + } + + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let session_id = format!("snapshot-{}", config.agent.as_str()); + let permission_mode = test_permission_mode(config.agent); + let (status, created) = send_json( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}"), + Some(json!({ + "agent": config.agent.as_str(), + "permissionMode": permission_mode + })), + ) + .await; + assert_eq!(status, StatusCode::OK, "create session"); + assert_session_snapshot("create_session", normalize_create_session(&created)); + + let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await; + assert_eq!(status, StatusCode::OK, "list sessions"); + assert_session_snapshot("sessions_list", normalize_sessions(&sessions)); + + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}/messages"), + Some(json!({ "message": PROMPT })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send message"); + assert_session_snapshot("send_message", snapshot_status(status)); + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn http_events_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + + for config in &configs { + // OpenCode's embedded bun hangs when installing plugins, blocking event streaming. + if config.agent == AgentId::Opencode { + continue; + } + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.session_lifecycle { + continue; + } + run_http_events_snapshot(&app.app, config).await; + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn sse_events_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + + for config in &configs { + // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. + if config.agent == AgentId::Opencode { + continue; + } + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.session_lifecycle { + continue; + } + run_sse_events_snapshot(&app.app, config).await; + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn concurrency_snapshots() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + + for config in &configs { + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.session_lifecycle { + continue; + } + run_concurrency_snapshot(&app.app, config).await; + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn turn_stream_route() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + + for config in &configs { + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.session_lifecycle { + continue; + } + run_turn_stream_check(&app.app, config).await; + } +} + +async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) { + let _guard = apply_credentials(&config.credentials); + install_agent(app, config.agent).await; + + let session_a = format!("concurrent-a-{}", config.agent.as_str()); + let session_b = format!("concurrent-b-{}", config.agent.as_str()); + let perm_mode = test_permission_mode(config.agent); + create_session(app, config.agent, &session_a, perm_mode).await; + create_session(app, config.agent, &session_b, perm_mode).await; + let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await; + let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await; + + let app_a = app.clone(); + let app_b = app.clone(); + let send_a = send_message(&app_a, &session_a); + let send_b = send_message(&app_b, &session_b); + tokio::join!(send_a, send_b); + + let app_a = app.clone(); + let app_b = app.clone(); + let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120)); + let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120)); + let (events_a, events_b) = tokio::join!(poll_a, poll_b); + let events_a = truncate_after_first_stop(&events_a); + let events_b = truncate_after_first_stop(&events_b); + + assert!( + !events_a.is_empty(), + "no events collected for concurrent session a {}", + config.agent + ); + assert!( + !events_b.is_empty(), + "no events collected for concurrent session b {}", + config.agent + ); + assert!( + should_stop(&events_a), + "timed out waiting for assistant/error event for concurrent session a {}", + config.agent + ); + assert!( + should_stop(&events_b), + "timed out waiting for assistant/error event for concurrent session b {}", + config.agent + ); + + let snapshot = json!({ + "session_a": normalize_events(&events_a), + "session_b": normalize_events(&events_b), + }); + assert_session_snapshot("concurrency_events", snapshot); +} diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap new file mode 100644 index 0000000..b9828b2 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_events_mock.snap @@ -0,0 +1,48 @@ +--- +source: server/packages/sandbox-agent/tests/sessions/permissions.rs +expression: value +--- +- item: + content_types: + - text + kind: message + role: user + status: in_progress + seq: 1 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 2 + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed + seq: 3 + type: item.completed +- item: + content_types: + - text + kind: message + role: assistant + status: in_progress + seq: 4 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 5 + type: item.delta +- item: + content_types: + - text + kind: message + role: assistant + status: completed + seq: 6 + type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_missing_mock.snap similarity index 53% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_missing_mock.snap index 006333a..dcaa414 100644 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_reply_missing_claude.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__permissions__assert_session_snapshot@permission_reply_missing_mock.snap @@ -1,7 +1,6 @@ --- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1011 -expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" +source: server/packages/sandbox-agent/tests/sessions/permissions.rs +expression: value --- payload: detail: "invalid request: unknown permission id: missing-permission" diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap new file mode 100644 index 0000000..35e0f56 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_events_mock.snap @@ -0,0 +1,48 @@ +--- +source: server/packages/sandbox-agent/tests/sessions/questions.rs +expression: value +--- +- item: + content_types: + - text + kind: message + role: user + status: in_progress + seq: 1 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 2 + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed + seq: 3 + type: item.completed +- item: + content_types: + - text + kind: message + role: assistant + status: in_progress + seq: 4 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 5 + type: item.delta +- item: + content_types: + - text + kind: message + role: assistant + status: completed + seq: 6 + type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_missing_mock.snap similarity index 53% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_missing_mock.snap index 8585cd4..5a484f7 100644 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_codex.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reject_missing_mock.snap @@ -1,7 +1,6 @@ --- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1078 -expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" +source: server/packages/sandbox-agent/tests/sessions/questions.rs +expression: value --- payload: detail: "invalid request: unknown question id: missing-question" diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap new file mode 100644 index 0000000..35e0f56 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_events_mock.snap @@ -0,0 +1,48 @@ +--- +source: server/packages/sandbox-agent/tests/sessions/questions.rs +expression: value +--- +- item: + content_types: + - text + kind: message + role: user + status: in_progress + seq: 1 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 2 + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed + seq: 3 + type: item.completed +- item: + content_types: + - text + kind: message + role: assistant + status: in_progress + seq: 4 + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 5 + type: item.delta +- item: + content_types: + - text + kind: message + role: assistant + status: completed + seq: 6 + type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_missing_mock.snap similarity index 53% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_missing_mock.snap index c3dac9d..5a484f7 100644 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_missing_claude.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__questions__assert_session_snapshot@question_reply_missing_mock.snap @@ -1,7 +1,6 @@ --- -source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1072 -expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })" +source: server/packages/sandbox-agent/tests/sessions/questions.rs +expression: value --- payload: detail: "invalid request: unknown question id: missing-question" diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap similarity index 51% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap index a6fdd2f..d2ed9f3 100644 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@concurrency_events_mock.snap @@ -1,38 +1,43 @@ --- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 1351 -expression: snapshot +source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs +expression: value --- session_a: - - metadata: true + - item: + content_types: + - text + kind: message + role: user + status: in_progress seq: 1 - session: started - source: daemon - synthetic: true - type: session.started - - metadata: true + type: item.started + - delta: + delta: "" + item_id: "" + native_item_id: "" seq: 2 - session: started - source: agent - synthetic: false - type: session.started + type: item.delta + - item: + content_types: + - text + kind: message + role: user + status: completed + seq: 3 + type: item.completed - item: content_types: - text kind: message role: assistant status: in_progress - seq: 3 - source: agent - synthetic: false + seq: 4 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 4 - source: agent - synthetic: false + seq: 5 type: item.delta - item: content_types: @@ -40,40 +45,44 @@ session_a: kind: message role: assistant status: completed - seq: 5 - source: agent - synthetic: false + seq: 6 type: item.completed session_b: - - metadata: true + - item: + content_types: + - text + kind: message + role: user + status: in_progress seq: 1 - session: started - source: daemon - synthetic: true - type: session.started - - metadata: true + type: item.started + - delta: + delta: "" + item_id: "" + native_item_id: "" seq: 2 - session: started - source: agent - synthetic: false - type: session.started + type: item.delta + - item: + content_types: + - text + kind: message + role: user + status: completed + seq: 3 + type: item.completed - item: content_types: - text kind: message role: assistant status: in_progress - seq: 3 - source: agent - synthetic: false + seq: 4 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 4 - source: agent - synthetic: false + seq: 5 type: item.delta - item: content_types: @@ -81,7 +90,5 @@ session_b: kind: message role: assistant status: completed - seq: 5 - source: agent - synthetic: false + seq: 6 type: item.completed diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@create_session_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@create_session_mock.snap new file mode 100644 index 0000000..8a578ee --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@create_session_mock.snap @@ -0,0 +1,6 @@ +--- +source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs +expression: value +--- +healthy: true +nativeSessionId: "" diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@send_message_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@send_message_mock.snap new file mode 100644 index 0000000..b735d66 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@send_message_mock.snap @@ -0,0 +1,5 @@ +--- +source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs +expression: value +--- +status: 204 diff --git a/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@sessions_list_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@sessions_list_mock.snap new file mode 100644 index 0000000..fba833a --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__assert_session_snapshot@sessions_list_mock.snap @@ -0,0 +1,6 @@ +--- +source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs +expression: value +--- +hasExpectedFields: true +sessionCount: 1 diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap similarity index 50% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap index 7ad3222..0a6a9d0 100644 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_http_events_snapshot@http_events_mock.snap @@ -1,37 +1,42 @@ --- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 811 +source: server/packages/sandbox-agent/tests/sessions/../common/http.rs expression: normalized --- -- metadata: true +- item: + content_types: + - text + kind: message + role: user + status: in_progress seq: 1 - session: started - source: daemon - synthetic: true - type: session.started -- metadata: true + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" seq: 2 - session: started - source: agent - synthetic: false - type: session.started + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed + seq: 3 + type: item.completed - item: content_types: - text kind: message role: assistant status: in_progress - seq: 3 - source: agent - synthetic: false + seq: 4 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 4 - source: agent - synthetic: false + seq: 5 type: item.delta - item: content_types: @@ -39,7 +44,5 @@ expression: normalized kind: message role: assistant status: completed - seq: 5 - source: agent - synthetic: false + seq: 6 type: item.completed diff --git a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap similarity index 50% rename from server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap rename to server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap index 1686c1e..0a6a9d0 100644 --- a/server/packages/sandbox-agent/tests/http/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_mock.snap +++ b/server/packages/sandbox-agent/tests/sessions/snapshots/sessions__sessions__session_lifecycle__run_sse_events_snapshot@sse_events_mock.snap @@ -1,29 +1,42 @@ --- -source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs -assertion_line: 804 +source: server/packages/sandbox-agent/tests/sessions/../common/http.rs expression: normalized --- -- metadata: true +- item: + content_types: + - text + kind: message + role: user + status: in_progress seq: 1 - session: started - type: session.started -- metadata: true + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" seq: 2 - session: started - type: session.started + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed + seq: 3 + type: item.completed - item: content_types: - text kind: message role: assistant status: in_progress - seq: 3 + seq: 4 type: item.started - delta: delta: "" item_id: "" native_item_id: "" - seq: 4 + seq: 5 type: item.delta - item: content_types: @@ -31,5 +44,5 @@ expression: normalized kind: message role: assistant status: completed - seq: 5 + seq: 6 type: item.completed diff --git a/server/packages/sandbox-agent/tests/sessions/status.rs b/server/packages/sandbox-agent/tests/sessions/status.rs new file mode 100644 index 0000000..c2e0389 --- /dev/null +++ b/server/packages/sandbox-agent/tests/sessions/status.rs @@ -0,0 +1,61 @@ +// Status capability checks are isolated from baseline snapshots. +include!("../common/http.rs"); + +fn status_prompt(agent: AgentId) -> &'static str { + if agent == AgentId::Mock { + "status" + } else { + "Provide a short status update." + } +} + +fn events_have_status(events: &[Value]) -> bool { + events.iter().any(|event| event_is_status_item(event)) + || events_have_content_type(events, "status") +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn status_events_present() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + + for config in &configs { + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.status { + continue; + } + + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let session_id = format!("status-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent)) + .await; + let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await; + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}/messages"), + Some(json!({ "message": status_prompt(config.agent) })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt"); + + let events = poll_events_until_match_from( + &app.app, + &session_id, + offset, + Duration::from_secs(120), + |events| events_have_status(events) || events.iter().any(is_error_event), + ) + .await; + assert!( + events_have_status(&events), + "expected status events for {}", + config.agent + ); + } +}