fix: add docker-setup action, runtime Dockerfile, and align release workflow

- Add .github/actions/docker-setup composite action (from rivet)
- Add docker/runtime/Dockerfile for Docker image builds
- Update release.yaml to match rivet patterns:
  - Use corepack enable instead of pnpm/action-setup
  - Add reuse_engine_version input
  - Add Docker job with Depot runners
  - Use --no-frozen-lockfile for pnpm install
  - Add id-token permission for setup job
This commit is contained in:
Nathan Flurry 2026-01-27 19:29:54 -08:00
parent f05389307a
commit b49776145b
82 changed files with 1415 additions and 2430 deletions

View file

@ -0,0 +1,31 @@
name: 'Docker Setup'
description: 'Set up Docker Buildx and log in to Docker Hub'
inputs:
docker_username:
description: 'Docker Hub username'
required: true
docker_password:
description: 'Docker Hub password'
required: true
github_token:
description: 'GitHub token'
required: true
runs:
using: 'composite'
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ inputs.docker_username }}
password: ${{ inputs.docker_password }}
# This will be used as a secret to authenticate with Git repo pulls
- name: Create .netrc file
run: |
echo "machine github.com" > ${{ runner.temp }}/netrc
echo "login x-access-token" >> ${{ runner.temp }}/netrc
echo "password ${{ inputs.github_token }}" >> ${{ runner.temp }}/netrc
shell: bash

View file

@ -4,14 +4,18 @@ on:
workflow_dispatch: workflow_dispatch:
inputs: inputs:
version: version:
description: "Version (e.g. 0.1.0 or v0.1.0)" description: 'Version'
required: true required: true
type: string type: string
latest: latest:
description: "Latest" description: 'Latest'
required: true required: true
type: boolean type: boolean
default: true default: true
reuse_engine_version:
description: 'Reuse artifacts from this version (skips building)'
required: false
type: string
defaults: defaults:
run: run:
@ -27,7 +31,10 @@ jobs:
name: "Setup" name: "Setup"
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
permissions: permissions:
# Allow pushing to GitHub
contents: write contents: write
# Allows authentication
id-token: write
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
@ -35,20 +42,29 @@ jobs:
- uses: dtolnay/rust-toolchain@stable - uses: dtolnay/rust-toolchain@stable
- uses: pnpm/action-setup@v4
- uses: actions/setup-node@v4 - uses: actions/setup-node@v4
with: with:
node-version: 20 node-version: 20
cache: pnpm
- run: corepack enable
- name: Setup - name: Setup
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }} R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }} R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
run: | run: |
# Configure Git
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
# Authenticate with NPM
cat << EOF > ~/.npmrc
//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}
EOF
# Install dependencies # Install dependencies
pnpm install pnpm install --no-frozen-lockfile
# Install tsx globally # Install tsx globally
npm install -g tsx npm install -g tsx
@ -60,54 +76,57 @@ jobs:
CMD="$CMD --no-latest" CMD="$CMD --no-latest"
fi fi
if [ -n "${{ inputs.reuse_engine_version }}" ]; then
CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\""
fi
eval "$CMD" eval "$CMD"
binaries: binaries:
name: "Build & Upload Binaries" name: "Build & Upload Binaries"
needs: [setup] needs: [setup]
if: ${{ !inputs.reuse_engine_version }}
strategy: strategy:
matrix: matrix:
include: include:
- platform: linux - platform: linux
runner: depot-ubuntu-24.04-8
target: x86_64-unknown-linux-musl target: x86_64-unknown-linux-musl
binary_ext: "" binary_ext: ""
arch: x86_64 arch: x86_64
- platform: windows - platform: windows
runner: depot-ubuntu-24.04-8
target: x86_64-pc-windows-gnu target: x86_64-pc-windows-gnu
binary_ext: ".exe" binary_ext: ".exe"
arch: x86_64 arch: x86_64
- platform: macos - platform: macos
runner: depot-ubuntu-24.04-8
target: x86_64-apple-darwin target: x86_64-apple-darwin
binary_ext: "" binary_ext: ""
arch: x86_64 arch: x86_64
- platform: macos - platform: macos
runner: depot-ubuntu-24.04-8
target: aarch64-apple-darwin target: aarch64-apple-darwin
binary_ext: "" binary_ext: ""
arch: aarch64 arch: aarch64
runs-on: ubuntu-24.04 runs-on: ${{ matrix.runner }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- uses: pnpm/action-setup@v4
- uses: actions/setup-node@v4
with:
node-version: 20
cache: pnpm
- name: Build inspector frontend
run: |
pnpm install
SANDBOX_AGENT_SKIP_INSPECTOR=1 pnpm --filter @sandbox-agent/inspector build
- name: Set up Docker Buildx - name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3 uses: docker/setup-buildx-action@v3
- name: Build binary - name: Build binary
run: | run: |
# Use Docker BuildKit
export DOCKER_BUILDKIT=1
# Build the binary using our Dockerfile
docker/release/build.sh ${{ matrix.target }} docker/release/build.sh ${{ matrix.target }}
# Make sure dist directory exists and binary is there
ls -la dist/ ls -la dist/
- name: Upload to R2 - name: Upload to R2
@ -115,10 +134,11 @@ jobs:
AWS_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
run: | run: |
# Install AWS CLI # Install dependencies for AWS CLI
sudo apt-get update sudo apt-get update
sudo apt-get install -y unzip curl sudo apt-get install -y unzip curl
# Install AWS CLI
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
unzip awscliv2.zip unzip awscliv2.zip
sudo ./aws/install --update sudo ./aws/install --update
@ -126,7 +146,7 @@ jobs:
COMMIT_SHA_SHORT="${GITHUB_SHA::7}" COMMIT_SHA_SHORT="${GITHUB_SHA::7}"
BINARY_PATH="dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" BINARY_PATH="dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}"
# Upload to commit directory for later promotion # Must specify --checksum-algorithm for compatibility with R2
aws s3 cp \ aws s3 cp \
"${BINARY_PATH}" \ "${BINARY_PATH}" \
"s3://rivet-releases/sandbox-agent/${COMMIT_SHA_SHORT}/binaries/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" \ "s3://rivet-releases/sandbox-agent/${COMMIT_SHA_SHORT}/binaries/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" \
@ -134,10 +154,48 @@ jobs:
--endpoint-url https://2a94c6a0ced8d35ea63cddc86c2681e7.r2.cloudflarestorage.com \ --endpoint-url https://2a94c6a0ced8d35ea63cddc86c2681e7.r2.cloudflarestorage.com \
--checksum-algorithm CRC32 --checksum-algorithm CRC32
docker:
name: "Build & Push Docker Images"
needs: [setup]
if: ${{ !inputs.reuse_engine_version }}
strategy:
matrix:
include:
- platform: linux/arm64
runner: depot-ubuntu-24.04-arm-8
arch_suffix: -arm64
- platform: linux/amd64
runner: depot-ubuntu-24.04-8
arch_suffix: -amd64
runs-on: ${{ matrix.runner }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set outputs
id: vars
run: echo "sha_short=${GITHUB_SHA::7}" >> $GITHUB_OUTPUT
- uses: ./.github/actions/docker-setup
with:
docker_username: ${{ secrets.DOCKER_CI_USERNAME }}
docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }}
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: Build & Push
uses: docker/build-push-action@v4
with:
context: .
push: true
tags: rivetdev/sandbox-agent:${{ steps.vars.outputs.sha_short }}${{ matrix.arch_suffix }}
file: docker/runtime/Dockerfile
platforms: ${{ matrix.platform }}
complete: complete:
name: "Complete" name: "Complete"
needs: [setup, binaries] needs: [setup, docker, binaries]
if: ${{ always() && !cancelled() && needs.setup.result == 'success' && needs.binaries.result == 'success' }} if: ${{ always() && !cancelled() && needs.setup.result == 'success' && (needs.docker.result == 'success' || needs.docker.result == 'skipped') && (needs.binaries.result == 'success' || needs.binaries.result == 'skipped') }}
runs-on: ubuntu-24.04 runs-on: ubuntu-24.04
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@ -146,17 +204,21 @@ jobs:
- uses: dtolnay/rust-toolchain@stable - uses: dtolnay/rust-toolchain@stable
- uses: pnpm/action-setup@v4
- uses: actions/setup-node@v4 - uses: actions/setup-node@v4
with: with:
node-version: 20 node-version: 20
registry-url: "https://registry.npmjs.org" registry-url: "https://registry.npmjs.org"
cache: pnpm
- run: corepack enable
- uses: ./.github/actions/docker-setup
with:
docker_username: ${{ secrets.DOCKER_CI_USERNAME }}
docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }}
github_token: ${{ secrets.GITHUB_TOKEN }}
- name: Complete - name: Complete
env: env:
# https://cli.github.com/manual/gh_help_environment
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
@ -169,7 +231,7 @@ jobs:
EOF EOF
# Install dependencies # Install dependencies
pnpm install pnpm install --no-frozen-lockfile
# Install tsx globally # Install tsx globally
npm install -g tsx npm install -g tsx
@ -181,4 +243,8 @@ jobs:
CMD="$CMD --no-latest" CMD="$CMD --no-latest"
fi fi
if [ -n "${{ inputs.reuse_engine_version }}" ]; then
CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\""
fi
eval "$CMD" eval "$CMD"

View file

@ -5,10 +5,10 @@ members = ["server/packages/*"]
[workspace.package] [workspace.package]
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
authors = ["Sandbox Agent Contributors"] authors = [ "Rivet Gaming, LLC <developer@rivet.gg>" ]
license = "Apache-2.0" license = "Apache-2.0"
repository = "https://github.com/rivet-dev/sandbox-agent" repository = "https://github.com/rivet-dev/sandbox-agent"
description = "Universal agent API for AI coding assistants" description = "Universal API for automatic coding agents in sandboxes. Supprots Claude Code, Codex, OpenCode, and Amp."
[workspace.dependencies] [workspace.dependencies]
# Internal crates # Internal crates

51
docker/runtime/Dockerfile Normal file
View file

@ -0,0 +1,51 @@
# syntax=docker/dockerfile:1.10.0
# Build stage - compile the binary
FROM rust:1.88.0 AS builder
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y \
musl-tools \
musl-dev \
pkg-config \
ca-certificates \
git && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN rustup target add x86_64-unknown-linux-musl
WORKDIR /build
COPY . .
# Build static binary
RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/local/cargo/git \
--mount=type=cache,target=/build/target \
SANDBOX_AGENT_SKIP_INSPECTOR=1 \
RUSTFLAGS="-C target-feature=+crt-static" \
cargo build -p sandbox-agent --release --target x86_64-unknown-linux-musl && \
cp target/x86_64-unknown-linux-musl/release/sandbox-agent /sandbox-agent
# Runtime stage - minimal image
FROM debian:bookworm-slim
RUN apt-get update && apt-get install -y \
ca-certificates \
curl \
git && \
rm -rf /var/lib/apt/lists/*
# Copy the binary from builder
COPY --from=builder /sandbox-agent /usr/local/bin/sandbox-agent
RUN chmod +x /usr/local/bin/sandbox-agent
# Create non-root user
RUN useradd -m -s /bin/bash sandbox
USER sandbox
WORKDIR /home/sandbox
EXPOSE 2468
ENTRYPOINT ["sandbox-agent"]
CMD ["--host", "0.0.0.0", "--port", "2468"]

View file

@ -21,6 +21,7 @@ Capabilities tell you which features are supported for the selected agent:
- `tool_calls` and `tool_results` indicate tool execution events. - `tool_calls` and `tool_results` indicate tool execution events.
- `questions` and `permissions` indicate HITL flows. - `questions` and `permissions` indicate HITL flows.
- `plan_mode` indicates that the agent supports plan-only execution. - `plan_mode` indicates that the agent supports plan-only execution.
- `reasoning` and `status` indicate that the agent can emit reasoning/status content parts.
Use these to enable or disable UI affordances (tool panels, approval buttons, etc.). Use these to enable or disable UI affordances (tool panels, approval buttons, etc.).

View file

@ -4,7 +4,8 @@
"title": "sandbox-agent", "title": "sandbox-agent",
"description": "", "description": "",
"contact": { "contact": {
"name": "Sandbox Agent Contributors" "name": "Rivet Gaming, LLC",
"email": "developer@rivet.gg"
}, },
"license": { "license": {
"name": "Apache-2.0" "name": "Apache-2.0"
@ -662,6 +663,7 @@
"sessionLifecycle", "sessionLifecycle",
"errorEvents", "errorEvents",
"reasoning", "reasoning",
"status",
"commandExecution", "commandExecution",
"fileChanges", "fileChanges",
"mcpTools", "mcpTools",
@ -706,6 +708,9 @@
"type": "boolean", "type": "boolean",
"description": "Whether this agent uses a shared long-running server process (vs per-turn subprocess)" "description": "Whether this agent uses a shared long-running server process (vs per-turn subprocess)"
}, },
"status": {
"type": "boolean"
},
"streamingDeltas": { "streamingDeltas": {
"type": "boolean" "type": "boolean"
}, },

View file

@ -2,6 +2,7 @@ import { Daytona } from "@daytonaio/sdk";
import { pathToFileURL } from "node:url"; import { pathToFileURL } from "node:url";
import { import {
ensureUrl, ensureUrl,
logInspectorUrl,
runPrompt, runPrompt,
waitForHealth, waitForHealth,
} from "../shared/sandbox-agent-client.ts"; } from "../shared/sandbox-agent-client.ts";
@ -39,6 +40,7 @@ export async function setupDaytonaSandboxAgent(): Promise<{
const baseUrl = ensureUrl(preview.url); const baseUrl = ensureUrl(preview.url);
await waitForHealth({ baseUrl, token, extraHeaders }); await waitForHealth({ baseUrl, token, extraHeaders });
logInspectorUrl({ baseUrl, token });
const cleanup = async () => { const cleanup = async () => {
try { try {

View file

@ -2,6 +2,7 @@ import Docker from "dockerode";
import { pathToFileURL } from "node:url"; import { pathToFileURL } from "node:url";
import { import {
ensureUrl, ensureUrl,
logInspectorUrl,
runPrompt, runPrompt,
waitForHealth, waitForHealth,
} from "../shared/sandbox-agent-client.ts"; } from "../shared/sandbox-agent-client.ts";
@ -83,6 +84,7 @@ export async function setupDockerSandboxAgent(): Promise<{
const baseUrl = ensureUrl(`http://127.0.0.1:${hostPort}`); const baseUrl = ensureUrl(`http://127.0.0.1:${hostPort}`);
await waitForHealth({ baseUrl, token }); await waitForHealth({ baseUrl, token });
logInspectorUrl({ baseUrl, token });
const cleanup = async () => { const cleanup = async () => {
try { try {

View file

@ -2,6 +2,7 @@ import { Sandbox } from "@e2b/code-interpreter";
import { pathToFileURL } from "node:url"; import { pathToFileURL } from "node:url";
import { import {
ensureUrl, ensureUrl,
logInspectorUrl,
runPrompt, runPrompt,
waitForHealth, waitForHealth,
} from "../shared/sandbox-agent-client.ts"; } from "../shared/sandbox-agent-client.ts";
@ -45,6 +46,7 @@ export async function setupE2BSandboxAgent(): Promise<{
const baseUrl = ensureUrl(sandbox.getHost(port)); const baseUrl = ensureUrl(sandbox.getHost(port));
await waitForHealth({ baseUrl, token }); await waitForHealth({ baseUrl, token });
logInspectorUrl({ baseUrl, token });
const cleanup = async () => { const cleanup = async () => {
try { try {

View file

@ -16,6 +16,27 @@ export function ensureUrl(rawUrl: string): string {
return `https://${rawUrl}`; return `https://${rawUrl}`;
} }
const INSPECTOR_URL = "https://inspect.sandboxagent.dev";
export function buildInspectorUrl({
baseUrl,
token,
}: {
baseUrl: string;
token?: string;
}): string {
const normalized = normalizeBaseUrl(ensureUrl(baseUrl));
const params = new URLSearchParams({ url: normalized });
if (token) {
params.set("token", token);
}
return `${INSPECTOR_URL}?${params.toString()}`;
}
export function logInspectorUrl({ baseUrl, token }: { baseUrl: string; token?: string }): void {
console.log(`Inspector: ${buildInspectorUrl({ baseUrl, token })}`);
}
type HeaderOptions = { type HeaderOptions = {
token?: string; token?: string;
extraHeaders?: Record<string, string>; extraHeaders?: Record<string, string>;

View file

@ -2,6 +2,7 @@ import { Sandbox } from "@vercel/sandbox";
import { pathToFileURL } from "node:url"; import { pathToFileURL } from "node:url";
import { import {
ensureUrl, ensureUrl,
logInspectorUrl,
runPrompt, runPrompt,
waitForHealth, waitForHealth,
} from "../shared/sandbox-agent-client.ts"; } from "../shared/sandbox-agent-client.ts";
@ -61,6 +62,7 @@ export async function setupVercelSandboxAgent(): Promise<{
const baseUrl = ensureUrl(sandbox.domain(port)); const baseUrl = ensureUrl(sandbox.domain(port));
await waitForHealth({ baseUrl, token }); await waitForHealth({ baseUrl, token });
logInspectorUrl({ baseUrl, token });
const cleanup = async () => { const cleanup = async () => {
try { try {

View file

@ -10,18 +10,23 @@ Place all new tests under `server/packages/**/tests/` (or a package-specific `te
- Agent flow coverage in `agent-flows/` - Agent flow coverage in `agent-flows/`
- Agent management coverage in `agent-management/` - Agent management coverage in `agent-management/`
- Shared server manager coverage in `server-manager/` - Shared server manager coverage in `server-manager/`
- HTTP/SSE and snapshot coverage in `http/` (snapshots in `http/snapshots/`) - HTTP endpoint snapshots in `http/` (snapshots in `http/snapshots/`)
- Session capability snapshots in `sessions/` (one file per capability, e.g. `session_lifecycle.rs`, `permissions.rs`, `questions.rs`, `reasoning.rs`, `status.rs`; snapshots in `sessions/snapshots/`)
- UI coverage in `ui/` - UI coverage in `ui/`
- Shared helpers in `common/` - Shared helpers in `common/`
- Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/` - Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/`
## Snapshot tests ## Snapshot tests
The HTTP/SSE snapshot suite entrypoint lives in: HTTP endpoint snapshot entrypoint:
- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` (includes `tests/http/http_sse_snapshots.rs`) - `server/packages/sandbox-agent/tests/http_endpoints.rs`
Session snapshot entrypoint:
- `server/packages/sandbox-agent/tests/sessions.rs`
Snapshots are written to: Snapshots are written to:
- `server/packages/sandbox-agent/tests/http/snapshots/` - `server/packages/sandbox-agent/tests/http/snapshots/` (HTTP endpoint snapshots)
- `server/packages/sandbox-agent/tests/sessions/snapshots/` (session/capability snapshots)
## Agent selection ## Agent selection
@ -71,6 +76,7 @@ To keep snapshots deterministic:
- IDs, timestamps, native IDs - IDs, timestamps, native IDs
- text content, tool inputs/outputs, provider-specific metadata - text content, tool inputs/outputs, provider-specific metadata
- `source` and `synthetic` flags (these are implementation details) - `source` and `synthetic` flags (these are implementation details)
- Scrub `reasoning` and `status` content from session-baseline snapshots to keep the core event skeleton consistent across agents; validate those content types separately in their capability-specific tests.
- The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly. - The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly.
- Event streams are truncated after the first assistant or error event. - Event streams are truncated after the first assistant or error event.
- Permission flow snapshots are truncated after the permission request (or first assistant) event. - Permission flow snapshots are truncated after the permission request (or first assistant) event.
@ -81,14 +87,19 @@ To keep snapshots deterministic:
## Typical commands ## Typical commands
Run only Claude snapshots: Run only Claude session snapshots:
``` ```
SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test http_sse_snapshots SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test sessions
``` ```
Run all detected agents: Run all detected session snapshots:
``` ```
cargo test -p sandbox-agent --test http_sse_snapshots cargo test -p sandbox-agent --test sessions
```
Run HTTP endpoint snapshots:
```
cargo test -p sandbox-agent --test http_endpoints
``` ```
## Universal Schema ## Universal Schema

View file

@ -2913,6 +2913,7 @@ pub struct AgentCapabilities {
pub session_lifecycle: bool, pub session_lifecycle: bool,
pub error_events: bool, pub error_events: bool,
pub reasoning: bool, pub reasoning: bool,
pub status: bool,
pub command_execution: bool, pub command_execution: bool,
pub file_changes: bool, pub file_changes: bool,
pub mcp_tools: bool, pub mcp_tools: bool,
@ -3512,6 +3513,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: false, session_lifecycle: false,
error_events: false, error_events: false,
reasoning: false, reasoning: false,
status: false,
command_execution: false, command_execution: false,
file_changes: false, file_changes: false,
mcp_tools: false, mcp_tools: false,
@ -3530,6 +3532,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: true, session_lifecycle: true,
error_events: true, error_events: true,
reasoning: true, reasoning: true,
status: true,
command_execution: true, command_execution: true,
file_changes: true, file_changes: true,
mcp_tools: true, mcp_tools: true,
@ -3548,6 +3551,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: true, session_lifecycle: true,
error_events: true, error_events: true,
reasoning: false, reasoning: false,
status: true,
command_execution: false, command_execution: false,
file_changes: false, file_changes: false,
mcp_tools: false, mcp_tools: false,
@ -3566,6 +3570,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: false, session_lifecycle: false,
error_events: true, error_events: true,
reasoning: false, reasoning: false,
status: false,
command_execution: false, command_execution: false,
file_changes: false, file_changes: false,
mcp_tools: false, mcp_tools: false,
@ -3584,6 +3589,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: true, session_lifecycle: true,
error_events: true, error_events: true,
reasoning: true, reasoning: true,
status: true,
command_execution: true, command_execution: true,
file_changes: true, file_changes: true,
mcp_tools: true, mcp_tools: true,

View file

@ -1,4 +1,4 @@
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap, HashSet};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use axum::body::{Body, Bytes}; use axum::body::{Body, Bytes};
@ -208,49 +208,65 @@ async fn send_message(app: &Router, session_id: &str) {
assert_eq!(status, StatusCode::NO_CONTENT, "send message"); assert_eq!(status, StatusCode::NO_CONTENT, "send message");
} }
async fn poll_events_until( async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec<Value>, u64) {
app: &Router, let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
session_id: &str, let (status, payload) = send_json(app, Method::GET, &path, None).await;
timeout: Duration, assert_eq!(status, StatusCode::OK, "poll events");
) -> Vec<Value> { let new_events = payload
let start = Instant::now(); .get("events")
let mut offset = 0u64; .and_then(Value::as_array)
let mut events = Vec::new(); .cloned()
while start.elapsed() < timeout { .unwrap_or_default();
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); let new_offset = new_events
let (status, payload) = send_json(app, Method::GET, &path, None).await; .last()
assert_eq!(status, StatusCode::OK, "poll events"); .and_then(|event| event.get("sequence"))
let new_events = payload .and_then(Value::as_u64)
.get("events") .unwrap_or(offset);
.and_then(Value::as_array) (new_events, new_offset)
.cloned()
.unwrap_or_default();
if !new_events.is_empty() {
if let Some(last) = new_events
.last()
.and_then(|event| event.get("sequence"))
.and_then(Value::as_u64)
{
offset = last;
}
events.extend(new_events);
if should_stop(&events) {
break;
}
}
tokio::time::sleep(Duration::from_millis(800)).await;
}
events
} }
async fn read_sse_events( async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 {
let start = Instant::now();
let mut offset = 0u64;
loop {
if start.elapsed() >= timeout {
break;
}
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
if new_events.is_empty() {
if offset == 0 {
tokio::time::sleep(Duration::from_millis(200)).await;
continue;
}
break;
}
offset = new_offset;
}
offset
}
async fn poll_events_until_from(
app: &Router, app: &Router,
session_id: &str, session_id: &str,
offset: u64,
timeout: Duration,
) -> Vec<Value> {
poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await
}
async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
poll_events_until_from(app, session_id, 0, timeout).await
}
async fn read_sse_events_from(
app: &Router,
session_id: &str,
offset: u64,
timeout: Duration, timeout: Duration,
) -> Vec<Value> { ) -> Vec<Value> {
let request = Request::builder() let request = Request::builder()
.method(Method::GET) .method(Method::GET)
.uri(format!("/v1/sessions/{session_id}/events/sse?offset=0")) .uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}"))
.body(Body::empty()) .body(Body::empty())
.expect("sse request"); .expect("sse request");
let response = app let response = app
@ -291,6 +307,10 @@ async fn read_sse_events(
events events
} }
async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
read_sse_events_from(app, session_id, 0, timeout).await
}
async fn read_turn_stream_events( async fn read_turn_stream_events(
app: &Router, app: &Router,
session_id: &str, session_id: &str,
@ -431,7 +451,8 @@ fn normalize_events(events: &[Value]) -> Value {
!events.iter().any(is_unparsed_event), !events.iter().any(is_unparsed_event),
"agent.unparsed event encountered" "agent.unparsed event encountered"
); );
let normalized = events let scrubbed = scrub_events(events);
let normalized = scrubbed
.iter() .iter()
.enumerate() .enumerate()
.map(|(idx, event)| normalize_event(event, idx + 1)) .map(|(idx, event)| normalize_event(event, idx + 1))
@ -439,6 +460,71 @@ fn normalize_events(events: &[Value]) -> Value {
Value::Array(normalized) Value::Array(normalized)
} }
fn scrub_events(events: &[Value]) -> Vec<Value> {
let mut scrub_ids = HashSet::new();
let mut output = Vec::new();
for event in events {
let event_type = event.get("type").and_then(Value::as_str).unwrap_or("");
match event_type {
"item.started" | "item.completed" => {
if let Some(item) = event.get("data").and_then(|data| data.get("item")) {
if should_scrub_item(item) {
record_item_ids(item, &mut scrub_ids);
continue;
}
}
output.push(event.clone());
}
"item.delta" => {
let item_id = event
.get("data")
.and_then(|data| data.get("item_id"))
.and_then(Value::as_str);
let native_item_id = event
.get("data")
.and_then(|data| data.get("native_item_id"))
.and_then(Value::as_str);
if item_id.is_some_and(|id| scrub_ids.contains(id))
|| native_item_id.is_some_and(|id| scrub_ids.contains(id))
{
continue;
}
output.push(event.clone());
}
_ => output.push(event.clone()),
}
}
output
}
fn should_scrub_item(item: &Value) -> bool {
if item
.get("kind")
.and_then(Value::as_str)
.is_some_and(|kind| kind == "status")
{
return true;
}
let types = item_content_types(item);
let filtered = types
.iter()
.filter(|value| value.as_str() != "reasoning" && value.as_str() != "status")
.collect::<Vec<_>>();
types.iter().any(|value| value == "reasoning") && filtered.is_empty()
}
fn record_item_ids(item: &Value, ids: &mut HashSet<String>) {
if let Some(id) = item.get("item_id").and_then(Value::as_str) {
ids.insert(id.to_string());
}
if let Some(id) = item.get("native_item_id").and_then(Value::as_str) {
ids.insert(id.to_string());
}
}
fn truncate_after_first_stop(events: &[Value]) -> Vec<Value> { fn truncate_after_first_stop(events: &[Value]) -> Vec<Value> {
if let Some(idx) = events if let Some(idx) = events
.iter() .iter()
@ -455,12 +541,6 @@ fn normalize_event(event: &Value, seq: usize) -> Value {
if let Some(event_type) = event.get("type").and_then(Value::as_str) { if let Some(event_type) = event.get("type").and_then(Value::as_str) {
map.insert("type".to_string(), Value::String(event_type.to_string())); map.insert("type".to_string(), Value::String(event_type.to_string()));
} }
if let Some(source) = event.get("source").and_then(Value::as_str) {
map.insert("source".to_string(), Value::String(source.to_string()));
}
if let Some(synthetic) = event.get("synthetic").and_then(Value::as_bool) {
map.insert("synthetic".to_string(), Value::Bool(synthetic));
}
let data = event.get("data").unwrap_or(&Value::Null); let data = event.get("data").unwrap_or(&Value::Null);
match event.get("type").and_then(Value::as_str).unwrap_or("") { match event.get("type").and_then(Value::as_str).unwrap_or("") {
"session.started" => { "session.started" => {
@ -523,6 +603,7 @@ fn normalize_item(item: &Value) -> Value {
let types = content let types = content
.iter() .iter()
.filter_map(|part| part.get("type").and_then(Value::as_str)) .filter_map(|part| part.get("type").and_then(Value::as_str))
.filter(|value| *value != "reasoning" && *value != "status")
.map(|value| Value::String(value.to_string())) .map(|value| Value::String(value.to_string()))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
map.insert("content_types".to_string(), Value::Array(types)); map.insert("content_types".to_string(), Value::Array(types));
@ -530,6 +611,42 @@ fn normalize_item(item: &Value) -> Value {
Value::Object(map) Value::Object(map)
} }
fn item_content_types(item: &Value) -> Vec<String> {
item.get("content")
.and_then(Value::as_array)
.map(|content| {
content
.iter()
.filter_map(|part| part.get("type").and_then(Value::as_str))
.map(|value| value.to_string())
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
fn event_content_types(event: &Value) -> Vec<String> {
event
.get("data")
.and_then(|data| data.get("item"))
.map(item_content_types)
.unwrap_or_default()
}
fn event_is_status_item(event: &Value) -> bool {
event
.get("data")
.and_then(|data| data.get("item"))
.and_then(|item| item.get("kind"))
.and_then(Value::as_str)
.is_some_and(|kind| kind == "status")
}
fn events_have_content_type(events: &[Value], content_type: &str) -> bool {
events
.iter()
.any(|event| event_content_types(event).iter().any(|t| t == content_type))
}
fn normalize_session_end(data: &Value) -> Value { fn normalize_session_end(data: &Value) -> Value {
let mut map = Map::new(); let mut map = Map::new();
if let Some(reason) = data.get("reason").and_then(Value::as_str) { if let Some(reason) = data.get("reason").and_then(Value::as_str) {
@ -717,6 +834,33 @@ fn snapshot_name(prefix: &str, agent: Option<AgentId>) -> String {
} }
async fn poll_events_until_match_from<F>(
app: &Router,
session_id: &str,
offset: u64,
timeout: Duration,
stop: F,
) -> Vec<Value>
where
F: Fn(&[Value]) -> bool,
{
let start = Instant::now();
let mut offset = offset;
let mut events = Vec::new();
while start.elapsed() < timeout {
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
if !new_events.is_empty() {
offset = new_offset;
events.extend(new_events);
if stop(&events) {
break;
}
}
tokio::time::sleep(Duration::from_millis(800)).await;
}
events
}
async fn poll_events_until_match<F>( async fn poll_events_until_match<F>(
app: &Router, app: &Router,
session_id: &str, session_id: &str,
@ -726,34 +870,7 @@ async fn poll_events_until_match<F>(
where where
F: Fn(&[Value]) -> bool, F: Fn(&[Value]) -> bool,
{ {
let start = Instant::now(); poll_events_until_match_from(app, session_id, 0, timeout, stop).await
let mut offset = 0u64;
let mut events = Vec::new();
while start.elapsed() < timeout {
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
let (status, payload) = send_json(app, Method::GET, &path, None).await;
assert_eq!(status, StatusCode::OK, "poll events");
let new_events = payload
.get("events")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
if !new_events.is_empty() {
if let Some(last) = new_events
.last()
.and_then(|event| event.get("sequence"))
.and_then(Value::as_u64)
{
offset = last;
}
events.extend(new_events);
if stop(&events) {
break;
}
}
tokio::time::sleep(Duration::from_millis(800)).await;
}
events
} }
fn find_permission_id(events: &[Value]) -> Option<String> { fn find_permission_id(events: &[Value]) -> Option<String> {
@ -800,9 +917,10 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
let session_id = format!("session-{}", config.agent.as_str()); let session_id = format!("session-{}", config.agent.as_str());
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
send_message(app, &session_id).await; send_message(app, &session_id).await;
let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await; let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await;
let events = truncate_after_first_stop(&events); let events = truncate_after_first_stop(&events);
assert!( assert!(
!events.is_empty(), !events.is_empty(),
@ -816,7 +934,8 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
); );
let normalized = normalize_events(&events); let normalized = normalize_events(&events);
insta::with_settings!({ insta::with_settings!({
snapshot_suffix => snapshot_name("http_events", Some(config.agent)), snapshot_suffix => snapshot_name("http_events", Some(AgentId::Mock)),
snapshot_path => "../sessions/snapshots",
}, { }, {
insta::assert_yaml_snapshot!(normalized); insta::assert_yaml_snapshot!(normalized);
}); });
@ -828,12 +947,14 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
let session_id = format!("sse-{}", config.agent.as_str()); let session_id = format!("sse-{}", config.agent.as_str());
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
let sse_task = { let sse_task = {
let app = app.clone(); let app = app.clone();
let session_id = session_id.clone(); let session_id = session_id.clone();
let offset = offset;
tokio::spawn(async move { tokio::spawn(async move {
read_sse_events(&app, &session_id, Duration::from_secs(120)).await read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await
}) })
}; };
@ -853,7 +974,8 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
); );
let normalized = normalize_events(&events); let normalized = normalize_events(&events);
insta::with_settings!({ insta::with_settings!({
snapshot_suffix => snapshot_name("sse_events", Some(config.agent)), snapshot_suffix => snapshot_name("sse_events", Some(AgentId::Mock)),
snapshot_path => "../sessions/snapshots",
}, { }, {
insta::assert_yaml_snapshot!(normalized); insta::assert_yaml_snapshot!(normalized);
}); });
@ -879,535 +1001,3 @@ async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) {
config.agent config.agent
); );
} }
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auth_snapshots() {
let token = "test-token";
let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health should be public");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_health_public", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_health(&payload),
}));
});
let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_missing_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, "Bearer wrong-token")
.body(Body::empty())
.expect("auth invalid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_invalid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, format!("Bearer {token}"))
.body(Body::empty())
.expect("auth valid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::OK, "valid token should allow request");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_valid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_agent_list(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn cors_snapshots() {
let cors = CorsLayer::new()
.allow_origin(vec![HeaderValue::from_static("http://example.com")])
.allow_methods([Method::GET, Method::POST])
.allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION])
.allow_credentials(true);
let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
let preflight = Request::builder()
.method(Method::OPTIONS)
.uri("/v1/health")
.header(header::ORIGIN, "http://example.com")
.header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
.header(
header::ACCESS_CONTROL_REQUEST_HEADERS,
"authorization,content-type",
)
.body(Body::empty())
.expect("cors preflight request");
let (status, headers, _payload) = send_request(&app.app, preflight).await;
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_preflight", None),
}, {
insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
});
let actual = Request::builder()
.method(Method::GET)
.uri("/v1/health")
.header(header::ORIGIN, "http://example.com")
.body(Body::empty())
.expect("cors actual request");
let (status, headers, payload) = send_json_request(&app.app, actual).await;
assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_actual", None),
}, {
insta::assert_yaml_snapshot!(json!({
"cors": snapshot_cors(status, &headers),
"payload": normalize_health(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn api_endpoints_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health status");
insta::with_settings!({
snapshot_suffix => snapshot_name("health", None),
}, {
insta::assert_yaml_snapshot!(normalize_health(&health));
});
// List agents (just verify the API returns correct agent IDs, not install state)
let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::OK, "agents list");
insta::with_settings!({
snapshot_suffix => snapshot_name("agents_list", None),
}, {
insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
});
// Install agents (ensure they're available for subsequent tests)
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/agents/{}/install", config.agent.as_str()),
Some(json!({})),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
}
let mut session_ids = Vec::new();
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let (status, modes) = send_json(
&app.app,
Method::GET,
&format!("/v1/agents/{}/modes", config.agent.as_str()),
None,
)
.await;
assert_eq!(status, StatusCode::OK, "agent modes");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
});
let session_id = format!("snapshot-{}", config.agent.as_str());
let permission_mode = test_permission_mode(config.agent);
let (status, created) = send_json(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}"),
Some(json!({
"agent": config.agent.as_str(),
"permissionMode": permission_mode
})),
)
.await;
assert_eq!(status, StatusCode::OK, "create session");
insta::with_settings!({
snapshot_suffix => snapshot_name("create_session", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_create_session(&created));
});
session_ids.push((config.agent, session_id));
}
let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
assert_eq!(status, StatusCode::OK, "list sessions");
insta::with_settings!({
snapshot_suffix => snapshot_name("sessions_list", None),
}, {
insta::assert_yaml_snapshot!(normalize_sessions(&sessions));
});
for (agent, session_id) in &session_ids {
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}/messages"),
Some(json!({ "message": PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
insta::with_settings!({
snapshot_suffix => snapshot_name("send_message", Some(*agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn approval_flow_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
for config in &configs {
// OpenCode doesn't support "plan" permission mode required for approval flows
if config.agent == AgentId::Opencode {
continue;
}
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
if caps.plan_mode && caps.permissions {
let permission_session = format!("perm-{}", config.agent.as_str());
create_session(&app.app, config.agent, &permission_session, "plan").await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{permission_session}/messages"),
Some(json!({ "message": PERMISSION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
let permission_events = poll_events_until_match(
&app.app,
&permission_session,
Duration::from_secs(120),
|events| find_permission_id(events).is_some() || should_stop(events),
)
.await;
let permission_events = truncate_permission_events(&permission_events);
insta::with_settings!({
snapshot_suffix => snapshot_name("permission_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_events(&permission_events));
});
if let Some(permission_id) = find_permission_id(&permission_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
insta::with_settings!({
snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/missing-permission/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert!(!status.is_success(), "missing permission id should error");
insta::with_settings!({
snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
}
}
if caps.questions {
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reply_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
let question_events = poll_events_until_match(
&app.app,
&question_reply_session,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
let question_events = truncate_question_events(&question_events);
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_events(&question_events));
});
if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
),
Some(json!({ "answers": answers })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reply", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/missing-question/reply"
),
Some(json!({ "answers": [] })),
)
.await;
assert!(!status.is_success(), "missing question id should error");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
}
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reject_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
let reject_events = poll_events_until_match(
&app.app,
&question_reject_session,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
let reject_events = truncate_question_events(&reject_events);
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_events(&reject_events));
});
if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
),
None,
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reject", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/missing-question/reject"
),
None,
)
.await;
assert!(!status.is_success(), "missing question id reject should error");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
}
}
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn http_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
// See: https://github.com/opencode-ai/opencode/issues/XXX
if config.agent == AgentId::Opencode {
continue;
}
run_http_events_snapshot(&app.app, config).await;
}
}
async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
let _guard = apply_credentials(&config.credentials);
install_agent(app, config.agent).await;
let session_a = format!("concurrent-a-{}", config.agent.as_str());
let session_b = format!("concurrent-b-{}", config.agent.as_str());
let perm_mode = test_permission_mode(config.agent);
create_session(app, config.agent, &session_a, perm_mode).await;
create_session(app, config.agent, &session_b, perm_mode).await;
let app_a = app.clone();
let app_b = app.clone();
let send_a = send_message(&app_a, &session_a);
let send_b = send_message(&app_b, &session_b);
tokio::join!(send_a, send_b);
let app_a = app.clone();
let app_b = app.clone();
let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120));
let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120));
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
let events_a = truncate_after_first_stop(&events_a);
let events_b = truncate_after_first_stop(&events_b);
assert!(
!events_a.is_empty(),
"no events collected for concurrent session a {}",
config.agent
);
assert!(
!events_b.is_empty(),
"no events collected for concurrent session b {}",
config.agent
);
assert!(
should_stop(&events_a),
"timed out waiting for assistant/error event for concurrent session a {}",
config.agent
);
assert!(
should_stop(&events_b),
"timed out waiting for assistant/error event for concurrent session b {}",
config.agent
);
let snapshot = json!({
"session_a": normalize_events(&events_a),
"session_b": normalize_events(&events_b),
});
insta::with_settings!({
snapshot_suffix => snapshot_name("concurrency_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn sse_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
// See: https://github.com/opencode-ai/opencode/issues/XXX
if config.agent == AgentId::Opencode {
continue;
}
run_sse_events_snapshot(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn turn_stream_route() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
// See: https://github.com/opencode-ai/opencode/issues/XXX
if config.agent == AgentId::Opencode {
continue;
}
run_turn_stream_check(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn concurrency_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
// See: https://github.com/opencode-ai/opencode/issues/XXX
if config.agent == AgentId::Opencode {
continue;
}
run_concurrency_snapshot(&app.app, config).await;
}
}

View file

@ -0,0 +1,165 @@
// Agent-specific HTTP endpoints live here; session-related snapshots are in tests/sessions/.
include!("../common/http.rs");
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auth_snapshots() {
let token = "test-token";
let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health should be public");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_health_public", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_health(&payload),
}));
});
let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_missing_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, "Bearer wrong-token")
.body(Body::empty())
.expect("auth invalid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_invalid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, format!("Bearer {token}"))
.body(Body::empty())
.expect("auth valid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::OK, "valid token should succeed");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_valid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_agent_list(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn cors_snapshots() {
let cors = CorsLayer::new()
.allow_origin("http://example.com".parse::<HeaderValue>().unwrap())
.allow_methods([Method::GET, Method::POST])
.allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION]);
let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
let preflight = Request::builder()
.method(Method::OPTIONS)
.uri("/v1/agents")
.header(header::ORIGIN, "http://example.com")
.header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
.header(
header::ACCESS_CONTROL_REQUEST_HEADERS,
"authorization,content-type",
)
.body(Body::empty())
.expect("cors preflight request");
let (status, headers, _payload) = send_request(&app.app, preflight).await;
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_preflight", None),
}, {
insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
});
let actual = Request::builder()
.method(Method::GET)
.uri("/v1/health")
.header(header::ORIGIN, "http://example.com")
.body(Body::empty())
.expect("cors actual request");
let (status, headers, payload) = send_json_request(&app.app, actual).await;
assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_actual", None),
}, {
insta::assert_yaml_snapshot!(json!({
"cors": snapshot_cors(status, &headers),
"payload": normalize_health(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn agent_endpoints_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health status");
insta::with_settings!({
snapshot_suffix => snapshot_name("health", None),
}, {
insta::assert_yaml_snapshot!(normalize_health(&health));
});
// List agents (verify IDs only; install state is environment-dependent).
let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::OK, "agents list");
insta::with_settings!({
snapshot_suffix => snapshot_name("agents_list", None),
}, {
insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
});
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/agents/{}/install", config.agent.as_str()),
Some(json!({})),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
}
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let (status, modes) = send_json(
&app.app,
Method::GET,
&format!("/v1/agents/{}/modes", config.agent.as_str()),
None,
)
.await;
assert_eq!(status, StatusCode::OK, "agent modes");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
});
}
}

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 918
expression: normalize_create_session(&created)
---
healthy: true

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_create_session(&created)
---
healthy: true
nativeSessionId: "<redacted>"

View file

@ -1,7 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1053
expression: normalize_create_session(&created)
---
healthy: true
nativeSessionId: "<redacted>"

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_create_session(&created)
---
agentSessionId: "<redacted>"
healthy: true

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 943
expression: snapshot_status(status)
---
status: 204

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 959
expression: snapshot_status(status)
---
status: 204

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1078
expression: snapshot_status(status)
---
status: 204

View file

@ -1,5 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: snapshot_status(status)
---
status: 204

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_sessions(&sessions)
---
hasExpectedFields: true
sessionCount: 1

View file

@ -1,17 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1119
expression: normalize_events(&permission_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started

View file

@ -1,131 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_events(&permission_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 15
source: agent
synthetic: false
type: item.completed

View file

@ -1,35 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1112
expression: normalize_events(&permission_events)
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1017
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown permission id: missing-permission"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1152
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown permission id: missing-permission"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,45 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1151
expression: normalize_events(&reject_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
source: daemon
synthetic: true
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
type: item.completed

View file

@ -1,331 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_events(&reject_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 15
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 16
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 17
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 18
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 19
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 20
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 21
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 22
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 23
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 24
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 25
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 26
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 27
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 28
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 29
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 30
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 31
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 32
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 33
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 34
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 35
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 36
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 37
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 38
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 39
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 40
source: agent
synthetic: false
type: item.completed

View file

@ -1,35 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1236
expression: normalize_events(&reject_events)
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1151
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1139
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1276
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,45 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1109
expression: normalize_events(&question_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
source: daemon
synthetic: true
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
type: item.completed

View file

@ -1,315 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_events(&question_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 15
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 16
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 17
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 18
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 19
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 20
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 21
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 22
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 23
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 24
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 25
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 26
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 27
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 28
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 29
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 30
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 31
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 32
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 33
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 34
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 35
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 36
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 37
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 38
source: agent
synthetic: false
type: item.completed

View file

@ -1,35 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1174
expression: normalize_events(&question_events)
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1214
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,201 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: snapshot
---
session_a:
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types: []
kind: message
role: assistant
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
session_b:
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 15
source: agent
synthetic: false
type: item.completed

View file

@ -1,67 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1344
expression: snapshot
---
session_a:
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed
session_b:
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,171 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalized
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 15
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 16
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 17
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 18
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 19
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 20
source: agent
synthetic: false
type: item.completed

View file

@ -1,45 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 848
expression: normalized
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
type: item.completed

View file

@ -1,73 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 848
expression: normalized
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types: []
kind: message
role: assistant
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed

View file

@ -1,35 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 841
expression: normalized
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -0,0 +1,2 @@
#[path = "http/agent_endpoints.rs"]
mod agent_endpoints;

View file

@ -1 +0,0 @@
include!("http/http_sse_snapshots.rs");

View file

@ -0,0 +1,2 @@
#[path = "sessions/mod.rs"]
mod sessions;

View file

@ -0,0 +1,5 @@
mod session_lifecycle;
mod permissions;
mod questions;
mod reasoning;
mod status;

View file

@ -0,0 +1,88 @@
// Permission flow snapshots compare every agent to the mock baseline.
include!("../common/http.rs");
fn session_snapshot_suffix(prefix: &str) -> String {
snapshot_name(prefix, Some(AgentId::Mock))
}
fn assert_session_snapshot(prefix: &str, value: Value) {
insta::with_settings!({
snapshot_suffix => session_snapshot_suffix(prefix),
}, {
insta::assert_yaml_snapshot!(value);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn permission_flow_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !(caps.plan_mode && caps.permissions) {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let permission_session = format!("perm-{}", config.agent.as_str());
create_session(&app.app, config.agent, &permission_session, "plan").await;
let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{permission_session}/messages"),
Some(json!({ "message": PERMISSION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
let permission_events = poll_events_until_match_from(
&app.app,
&permission_session,
offset,
Duration::from_secs(120),
|events| find_permission_id(events).is_some() || should_stop(events),
)
.await;
let permission_events = truncate_permission_events(&permission_events);
assert_session_snapshot("permission_events", normalize_events(&permission_events));
if let Some(permission_id) = find_permission_id(&permission_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
assert_session_snapshot("permission_reply", snapshot_status(status));
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/missing-permission/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert!(!status.is_success(), "missing permission id should error");
assert_session_snapshot(
"permission_reply_missing",
json!({
"status": status.as_u16(),
"payload": payload,
}),
);
}
}
}

View file

@ -0,0 +1,145 @@
// Question flow snapshots compare every agent to the mock baseline.
include!("../common/http.rs");
fn session_snapshot_suffix(prefix: &str) -> String {
snapshot_name(prefix, Some(AgentId::Mock))
}
fn assert_session_snapshot(prefix: &str, value: Value) {
insta::with_settings!({
snapshot_suffix => session_snapshot_suffix(prefix),
}, {
insta::assert_yaml_snapshot!(value);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn question_flow_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.questions {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
let reply_offset =
drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reply_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
let question_events = poll_events_until_match_from(
&app.app,
&question_reply_session,
reply_offset,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
let question_events = truncate_question_events(&question_events);
assert_session_snapshot("question_reply_events", normalize_events(&question_events));
if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
),
Some(json!({ "answers": answers })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
assert_session_snapshot("question_reply", snapshot_status(status));
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/missing-question/reply"
),
Some(json!({ "answers": [] })),
)
.await;
assert!(!status.is_success(), "missing question id should error");
assert_session_snapshot(
"question_reply_missing",
json!({
"status": status.as_u16(),
"payload": payload,
}),
);
}
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
let reject_offset =
drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reject_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
let reject_events = poll_events_until_match_from(
&app.app,
&question_reject_session,
reject_offset,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
let reject_events = truncate_question_events(&reject_events);
assert_session_snapshot("question_reject_events", normalize_events(&reject_events));
if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
),
None,
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
assert_session_snapshot("question_reject", snapshot_status(status));
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/missing-question/reject"
),
None,
)
.await;
assert!(!status.is_success(), "missing question id reject should error");
assert_session_snapshot(
"question_reject_missing",
json!({
"status": status.as_u16(),
"payload": payload,
}),
);
}
}
}

View file

@ -0,0 +1,56 @@
// Reasoning capability checks are isolated from baseline snapshots.
include!("../common/http.rs");
fn reasoning_prompt(agent: AgentId) -> &'static str {
if agent == AgentId::Mock {
"demo"
} else {
"Answer briefly and include your reasoning."
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn reasoning_events_present() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.reasoning {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let session_id = format!("reasoning-{}", config.agent.as_str());
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
.await;
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}/messages"),
Some(json!({ "message": reasoning_prompt(config.agent) })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt");
let events = poll_events_until_match_from(
&app.app,
&session_id,
offset,
Duration::from_secs(120),
|events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event),
)
.await;
assert!(
events_have_content_type(&events, "reasoning"),
"expected reasoning content for {}",
config.agent
);
}
}

View file

@ -0,0 +1,192 @@
// Session lifecycle and streaming snapshots use the mock baseline as the single source of truth.
include!("../common/http.rs");
fn session_snapshot_suffix(prefix: &str) -> String {
snapshot_name(prefix, Some(AgentId::Mock))
}
fn assert_session_snapshot(prefix: &str, value: Value) {
insta::with_settings!({
snapshot_suffix => session_snapshot_suffix(prefix),
}, {
insta::assert_yaml_snapshot!(value);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn session_endpoints_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let session_id = format!("snapshot-{}", config.agent.as_str());
let permission_mode = test_permission_mode(config.agent);
let (status, created) = send_json(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}"),
Some(json!({
"agent": config.agent.as_str(),
"permissionMode": permission_mode
})),
)
.await;
assert_eq!(status, StatusCode::OK, "create session");
assert_session_snapshot("create_session", normalize_create_session(&created));
let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
assert_eq!(status, StatusCode::OK, "list sessions");
assert_session_snapshot("sessions_list", normalize_sessions(&sessions));
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}/messages"),
Some(json!({ "message": PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
assert_session_snapshot("send_message", snapshot_status(status));
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn http_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking event streaming.
if config.agent == AgentId::Opencode {
continue;
}
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
run_http_events_snapshot(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn sse_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
if config.agent == AgentId::Opencode {
continue;
}
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
run_sse_events_snapshot(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn concurrency_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
run_concurrency_snapshot(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn turn_stream_route() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
run_turn_stream_check(&app.app, config).await;
}
}
async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
let _guard = apply_credentials(&config.credentials);
install_agent(app, config.agent).await;
let session_a = format!("concurrent-a-{}", config.agent.as_str());
let session_b = format!("concurrent-b-{}", config.agent.as_str());
let perm_mode = test_permission_mode(config.agent);
create_session(app, config.agent, &session_a, perm_mode).await;
create_session(app, config.agent, &session_b, perm_mode).await;
let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await;
let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await;
let app_a = app.clone();
let app_b = app.clone();
let send_a = send_message(&app_a, &session_a);
let send_b = send_message(&app_b, &session_b);
tokio::join!(send_a, send_b);
let app_a = app.clone();
let app_b = app.clone();
let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120));
let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120));
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
let events_a = truncate_after_first_stop(&events_a);
let events_b = truncate_after_first_stop(&events_b);
assert!(
!events_a.is_empty(),
"no events collected for concurrent session a {}",
config.agent
);
assert!(
!events_b.is_empty(),
"no events collected for concurrent session b {}",
config.agent
);
assert!(
should_stop(&events_a),
"timed out waiting for assistant/error event for concurrent session a {}",
config.agent
);
assert!(
should_stop(&events_b),
"timed out waiting for assistant/error event for concurrent session b {}",
config.agent
);
let snapshot = json!({
"session_a": normalize_events(&events_a),
"session_b": normalize_events(&events_b),
});
assert_session_snapshot("concurrency_events", snapshot);
}

View file

@ -0,0 +1,48 @@
---
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
expression: value
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 6
type: item.completed

View file

@ -1,7 +1,6 @@
--- ---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs source: server/packages/sandbox-agent/tests/sessions/permissions.rs
assertion_line: 1011 expression: value
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
--- ---
payload: payload:
detail: "invalid request: unknown permission id: missing-permission" detail: "invalid request: unknown permission id: missing-permission"

View file

@ -0,0 +1,48 @@
---
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 6
type: item.completed

View file

@ -1,7 +1,6 @@
--- ---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs source: server/packages/sandbox-agent/tests/sessions/questions.rs
assertion_line: 1078 expression: value
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
--- ---
payload: payload:
detail: "invalid request: unknown question id: missing-question" detail: "invalid request: unknown question id: missing-question"

View file

@ -0,0 +1,48 @@
---
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 6
type: item.completed

View file

@ -1,7 +1,6 @@
--- ---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs source: server/packages/sandbox-agent/tests/sessions/questions.rs
assertion_line: 1072 expression: value
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
--- ---
payload: payload:
detail: "invalid request: unknown question id: missing-question" detail: "invalid request: unknown question id: missing-question"

View file

@ -1,38 +1,43 @@
--- ---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
assertion_line: 1351 expression: value
expression: snapshot
--- ---
session_a: session_a:
- metadata: true - item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1 seq: 1
session: started type: item.started
source: daemon - delta:
synthetic: true delta: "<redacted>"
type: session.started item_id: "<redacted>"
- metadata: true native_item_id: "<redacted>"
seq: 2 seq: 2
session: started type: item.delta
source: agent - item:
synthetic: false content_types:
type: session.started - text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item: - item:
content_types: content_types:
- text - text
kind: message kind: message
role: assistant role: assistant
status: in_progress status: in_progress
seq: 3 seq: 4
source: agent
synthetic: false
type: item.started type: item.started
- delta: - delta:
delta: "<redacted>" delta: "<redacted>"
item_id: "<redacted>" item_id: "<redacted>"
native_item_id: "<redacted>" native_item_id: "<redacted>"
seq: 4 seq: 5
source: agent
synthetic: false
type: item.delta type: item.delta
- item: - item:
content_types: content_types:
@ -40,40 +45,44 @@ session_a:
kind: message kind: message
role: assistant role: assistant
status: completed status: completed
seq: 5 seq: 6
source: agent
synthetic: false
type: item.completed type: item.completed
session_b: session_b:
- metadata: true - item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1 seq: 1
session: started type: item.started
source: daemon - delta:
synthetic: true delta: "<redacted>"
type: session.started item_id: "<redacted>"
- metadata: true native_item_id: "<redacted>"
seq: 2 seq: 2
session: started type: item.delta
source: agent - item:
synthetic: false content_types:
type: session.started - text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item: - item:
content_types: content_types:
- text - text
kind: message kind: message
role: assistant role: assistant
status: in_progress status: in_progress
seq: 3 seq: 4
source: agent
synthetic: false
type: item.started type: item.started
- delta: - delta:
delta: "<redacted>" delta: "<redacted>"
item_id: "<redacted>" item_id: "<redacted>"
native_item_id: "<redacted>" native_item_id: "<redacted>"
seq: 4 seq: 5
source: agent
synthetic: false
type: item.delta type: item.delta
- item: - item:
content_types: content_types:
@ -81,7 +90,5 @@ session_b:
kind: message kind: message
role: assistant role: assistant
status: completed status: completed
seq: 5 seq: 6
source: agent
synthetic: false
type: item.completed type: item.completed

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
expression: value
---
healthy: true
nativeSessionId: "<redacted>"

View file

@ -0,0 +1,5 @@
---
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
expression: value
---
status: 204

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
expression: value
---
hasExpectedFields: true
sessionCount: 1

View file

@ -1,37 +1,42 @@
--- ---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
assertion_line: 811
expression: normalized expression: normalized
--- ---
- metadata: true - item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1 seq: 1
session: started type: item.started
source: daemon - delta:
synthetic: true delta: "<redacted>"
type: session.started item_id: "<redacted>"
- metadata: true native_item_id: "<redacted>"
seq: 2 seq: 2
session: started type: item.delta
source: agent - item:
synthetic: false content_types:
type: session.started - text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item: - item:
content_types: content_types:
- text - text
kind: message kind: message
role: assistant role: assistant
status: in_progress status: in_progress
seq: 3 seq: 4
source: agent
synthetic: false
type: item.started type: item.started
- delta: - delta:
delta: "<redacted>" delta: "<redacted>"
item_id: "<redacted>" item_id: "<redacted>"
native_item_id: "<redacted>" native_item_id: "<redacted>"
seq: 4 seq: 5
source: agent
synthetic: false
type: item.delta type: item.delta
- item: - item:
content_types: content_types:
@ -39,7 +44,5 @@ expression: normalized
kind: message kind: message
role: assistant role: assistant
status: completed status: completed
seq: 5 seq: 6
source: agent
synthetic: false
type: item.completed type: item.completed

View file

@ -1,29 +1,42 @@
--- ---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
assertion_line: 804
expression: normalized expression: normalized
--- ---
- metadata: true - item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1 seq: 1
session: started type: item.started
type: session.started - delta:
- metadata: true delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2 seq: 2
session: started type: item.delta
type: session.started - item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item: - item:
content_types: content_types:
- text - text
kind: message kind: message
role: assistant role: assistant
status: in_progress status: in_progress
seq: 3 seq: 4
type: item.started type: item.started
- delta: - delta:
delta: "<redacted>" delta: "<redacted>"
item_id: "<redacted>" item_id: "<redacted>"
native_item_id: "<redacted>" native_item_id: "<redacted>"
seq: 4 seq: 5
type: item.delta type: item.delta
- item: - item:
content_types: content_types:
@ -31,5 +44,5 @@ expression: normalized
kind: message kind: message
role: assistant role: assistant
status: completed status: completed
seq: 5 seq: 6
type: item.completed type: item.completed

View file

@ -0,0 +1,61 @@
// Status capability checks are isolated from baseline snapshots.
include!("../common/http.rs");
fn status_prompt(agent: AgentId) -> &'static str {
if agent == AgentId::Mock {
"status"
} else {
"Provide a short status update."
}
}
fn events_have_status(events: &[Value]) -> bool {
events.iter().any(|event| event_is_status_item(event))
|| events_have_content_type(events, "status")
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn status_events_present() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.status {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let session_id = format!("status-{}", config.agent.as_str());
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
.await;
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}/messages"),
Some(json!({ "message": status_prompt(config.agent) })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt");
let events = poll_events_until_match_from(
&app.app,
&session_id,
offset,
Duration::from_secs(120),
|events| events_have_status(events) || events.iter().any(is_error_event),
)
.await;
assert!(
events_have_status(&events),
"expected status events for {}",
config.agent
);
}
}