mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 05:02:11 +00:00
fix: add docker-setup action, runtime Dockerfile, and align release workflow
- Add .github/actions/docker-setup composite action (from rivet) - Add docker/runtime/Dockerfile for Docker image builds - Update release.yaml to match rivet patterns: - Use corepack enable instead of pnpm/action-setup - Add reuse_engine_version input - Add Docker job with Depot runners - Use --no-frozen-lockfile for pnpm install - Add id-token permission for setup job
This commit is contained in:
parent
f05389307a
commit
b49776145b
82 changed files with 1415 additions and 2430 deletions
31
.github/actions/docker-setup/action.yaml
vendored
Normal file
31
.github/actions/docker-setup/action.yaml
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
name: 'Docker Setup'
|
||||||
|
description: 'Set up Docker Buildx and log in to Docker Hub'
|
||||||
|
inputs:
|
||||||
|
docker_username:
|
||||||
|
description: 'Docker Hub username'
|
||||||
|
required: true
|
||||||
|
docker_password:
|
||||||
|
description: 'Docker Hub password'
|
||||||
|
required: true
|
||||||
|
github_token:
|
||||||
|
description: 'GitHub token'
|
||||||
|
required: true
|
||||||
|
runs:
|
||||||
|
using: 'composite'
|
||||||
|
steps:
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Log in to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
username: ${{ inputs.docker_username }}
|
||||||
|
password: ${{ inputs.docker_password }}
|
||||||
|
|
||||||
|
# This will be used as a secret to authenticate with Git repo pulls
|
||||||
|
- name: Create .netrc file
|
||||||
|
run: |
|
||||||
|
echo "machine github.com" > ${{ runner.temp }}/netrc
|
||||||
|
echo "login x-access-token" >> ${{ runner.temp }}/netrc
|
||||||
|
echo "password ${{ inputs.github_token }}" >> ${{ runner.temp }}/netrc
|
||||||
|
shell: bash
|
||||||
122
.github/workflows/release.yaml
vendored
122
.github/workflows/release.yaml
vendored
|
|
@ -4,14 +4,18 @@ on:
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
version:
|
version:
|
||||||
description: "Version (e.g. 0.1.0 or v0.1.0)"
|
description: 'Version'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
latest:
|
latest:
|
||||||
description: "Latest"
|
description: 'Latest'
|
||||||
required: true
|
required: true
|
||||||
type: boolean
|
type: boolean
|
||||||
default: true
|
default: true
|
||||||
|
reuse_engine_version:
|
||||||
|
description: 'Reuse artifacts from this version (skips building)'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
|
|
@ -27,7 +31,10 @@ jobs:
|
||||||
name: "Setup"
|
name: "Setup"
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
permissions:
|
permissions:
|
||||||
|
# Allow pushing to GitHub
|
||||||
contents: write
|
contents: write
|
||||||
|
# Allows authentication
|
||||||
|
id-token: write
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
|
|
@ -35,20 +42,29 @@ jobs:
|
||||||
|
|
||||||
- uses: dtolnay/rust-toolchain@stable
|
- uses: dtolnay/rust-toolchain@stable
|
||||||
|
|
||||||
- uses: pnpm/action-setup@v4
|
|
||||||
|
|
||||||
- uses: actions/setup-node@v4
|
- uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
cache: pnpm
|
|
||||||
|
- run: corepack enable
|
||||||
|
|
||||||
- name: Setup
|
- name: Setup
|
||||||
env:
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
|
R2_RELEASES_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
|
||||||
R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
|
R2_RELEASES_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
|
||||||
run: |
|
run: |
|
||||||
|
# Configure Git
|
||||||
|
git config --global user.name "github-actions[bot]"
|
||||||
|
git config --global user.email "github-actions[bot]@users.noreply.github.com"
|
||||||
|
|
||||||
|
# Authenticate with NPM
|
||||||
|
cat << EOF > ~/.npmrc
|
||||||
|
//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}
|
||||||
|
EOF
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
pnpm install
|
pnpm install --no-frozen-lockfile
|
||||||
|
|
||||||
# Install tsx globally
|
# Install tsx globally
|
||||||
npm install -g tsx
|
npm install -g tsx
|
||||||
|
|
@ -60,54 +76,57 @@ jobs:
|
||||||
CMD="$CMD --no-latest"
|
CMD="$CMD --no-latest"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -n "${{ inputs.reuse_engine_version }}" ]; then
|
||||||
|
CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\""
|
||||||
|
fi
|
||||||
|
|
||||||
eval "$CMD"
|
eval "$CMD"
|
||||||
|
|
||||||
binaries:
|
binaries:
|
||||||
name: "Build & Upload Binaries"
|
name: "Build & Upload Binaries"
|
||||||
needs: [setup]
|
needs: [setup]
|
||||||
|
if: ${{ !inputs.reuse_engine_version }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- platform: linux
|
- platform: linux
|
||||||
|
runner: depot-ubuntu-24.04-8
|
||||||
target: x86_64-unknown-linux-musl
|
target: x86_64-unknown-linux-musl
|
||||||
binary_ext: ""
|
binary_ext: ""
|
||||||
arch: x86_64
|
arch: x86_64
|
||||||
- platform: windows
|
- platform: windows
|
||||||
|
runner: depot-ubuntu-24.04-8
|
||||||
target: x86_64-pc-windows-gnu
|
target: x86_64-pc-windows-gnu
|
||||||
binary_ext: ".exe"
|
binary_ext: ".exe"
|
||||||
arch: x86_64
|
arch: x86_64
|
||||||
- platform: macos
|
- platform: macos
|
||||||
|
runner: depot-ubuntu-24.04-8
|
||||||
target: x86_64-apple-darwin
|
target: x86_64-apple-darwin
|
||||||
binary_ext: ""
|
binary_ext: ""
|
||||||
arch: x86_64
|
arch: x86_64
|
||||||
- platform: macos
|
- platform: macos
|
||||||
|
runner: depot-ubuntu-24.04-8
|
||||||
target: aarch64-apple-darwin
|
target: aarch64-apple-darwin
|
||||||
binary_ext: ""
|
binary_ext: ""
|
||||||
arch: aarch64
|
arch: aarch64
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ${{ matrix.runner }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- uses: pnpm/action-setup@v4
|
|
||||||
|
|
||||||
- uses: actions/setup-node@v4
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: pnpm
|
|
||||||
|
|
||||||
- name: Build inspector frontend
|
|
||||||
run: |
|
|
||||||
pnpm install
|
|
||||||
SANDBOX_AGENT_SKIP_INSPECTOR=1 pnpm --filter @sandbox-agent/inspector build
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
- name: Build binary
|
- name: Build binary
|
||||||
run: |
|
run: |
|
||||||
|
# Use Docker BuildKit
|
||||||
|
export DOCKER_BUILDKIT=1
|
||||||
|
|
||||||
|
# Build the binary using our Dockerfile
|
||||||
docker/release/build.sh ${{ matrix.target }}
|
docker/release/build.sh ${{ matrix.target }}
|
||||||
|
|
||||||
|
# Make sure dist directory exists and binary is there
|
||||||
ls -la dist/
|
ls -la dist/
|
||||||
|
|
||||||
- name: Upload to R2
|
- name: Upload to R2
|
||||||
|
|
@ -115,10 +134,11 @@ jobs:
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.R2_RELEASES_ACCESS_KEY_ID }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.R2_RELEASES_SECRET_ACCESS_KEY }}
|
||||||
run: |
|
run: |
|
||||||
# Install AWS CLI
|
# Install dependencies for AWS CLI
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install -y unzip curl
|
sudo apt-get install -y unzip curl
|
||||||
|
|
||||||
|
# Install AWS CLI
|
||||||
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
|
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
|
||||||
unzip awscliv2.zip
|
unzip awscliv2.zip
|
||||||
sudo ./aws/install --update
|
sudo ./aws/install --update
|
||||||
|
|
@ -126,7 +146,7 @@ jobs:
|
||||||
COMMIT_SHA_SHORT="${GITHUB_SHA::7}"
|
COMMIT_SHA_SHORT="${GITHUB_SHA::7}"
|
||||||
BINARY_PATH="dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}"
|
BINARY_PATH="dist/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}"
|
||||||
|
|
||||||
# Upload to commit directory for later promotion
|
# Must specify --checksum-algorithm for compatibility with R2
|
||||||
aws s3 cp \
|
aws s3 cp \
|
||||||
"${BINARY_PATH}" \
|
"${BINARY_PATH}" \
|
||||||
"s3://rivet-releases/sandbox-agent/${COMMIT_SHA_SHORT}/binaries/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" \
|
"s3://rivet-releases/sandbox-agent/${COMMIT_SHA_SHORT}/binaries/sandbox-agent-${{ matrix.target }}${{ matrix.binary_ext }}" \
|
||||||
|
|
@ -134,10 +154,48 @@ jobs:
|
||||||
--endpoint-url https://2a94c6a0ced8d35ea63cddc86c2681e7.r2.cloudflarestorage.com \
|
--endpoint-url https://2a94c6a0ced8d35ea63cddc86c2681e7.r2.cloudflarestorage.com \
|
||||||
--checksum-algorithm CRC32
|
--checksum-algorithm CRC32
|
||||||
|
|
||||||
|
docker:
|
||||||
|
name: "Build & Push Docker Images"
|
||||||
|
needs: [setup]
|
||||||
|
if: ${{ !inputs.reuse_engine_version }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
include:
|
||||||
|
- platform: linux/arm64
|
||||||
|
runner: depot-ubuntu-24.04-arm-8
|
||||||
|
arch_suffix: -arm64
|
||||||
|
- platform: linux/amd64
|
||||||
|
runner: depot-ubuntu-24.04-8
|
||||||
|
arch_suffix: -amd64
|
||||||
|
runs-on: ${{ matrix.runner }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Set outputs
|
||||||
|
id: vars
|
||||||
|
run: echo "sha_short=${GITHUB_SHA::7}" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- uses: ./.github/actions/docker-setup
|
||||||
|
with:
|
||||||
|
docker_username: ${{ secrets.DOCKER_CI_USERNAME }}
|
||||||
|
docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }}
|
||||||
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build & Push
|
||||||
|
uses: docker/build-push-action@v4
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
tags: rivetdev/sandbox-agent:${{ steps.vars.outputs.sha_short }}${{ matrix.arch_suffix }}
|
||||||
|
file: docker/runtime/Dockerfile
|
||||||
|
platforms: ${{ matrix.platform }}
|
||||||
|
|
||||||
complete:
|
complete:
|
||||||
name: "Complete"
|
name: "Complete"
|
||||||
needs: [setup, binaries]
|
needs: [setup, docker, binaries]
|
||||||
if: ${{ always() && !cancelled() && needs.setup.result == 'success' && needs.binaries.result == 'success' }}
|
if: ${{ always() && !cancelled() && needs.setup.result == 'success' && (needs.docker.result == 'success' || needs.docker.result == 'skipped') && (needs.binaries.result == 'success' || needs.binaries.result == 'skipped') }}
|
||||||
runs-on: ubuntu-24.04
|
runs-on: ubuntu-24.04
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
@ -146,17 +204,21 @@ jobs:
|
||||||
|
|
||||||
- uses: dtolnay/rust-toolchain@stable
|
- uses: dtolnay/rust-toolchain@stable
|
||||||
|
|
||||||
- uses: pnpm/action-setup@v4
|
|
||||||
|
|
||||||
- uses: actions/setup-node@v4
|
- uses: actions/setup-node@v4
|
||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
registry-url: "https://registry.npmjs.org"
|
registry-url: "https://registry.npmjs.org"
|
||||||
cache: pnpm
|
|
||||||
|
- run: corepack enable
|
||||||
|
|
||||||
|
- uses: ./.github/actions/docker-setup
|
||||||
|
with:
|
||||||
|
docker_username: ${{ secrets.DOCKER_CI_USERNAME }}
|
||||||
|
docker_password: ${{ secrets.DOCKER_CI_ACCESS_TOKEN }}
|
||||||
|
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Complete
|
- name: Complete
|
||||||
env:
|
env:
|
||||||
# https://cli.github.com/manual/gh_help_environment
|
|
||||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
|
CARGO_REGISTRY_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
|
||||||
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
||||||
|
|
@ -169,7 +231,7 @@ jobs:
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
pnpm install
|
pnpm install --no-frozen-lockfile
|
||||||
|
|
||||||
# Install tsx globally
|
# Install tsx globally
|
||||||
npm install -g tsx
|
npm install -g tsx
|
||||||
|
|
@ -181,4 +243,8 @@ jobs:
|
||||||
CMD="$CMD --no-latest"
|
CMD="$CMD --no-latest"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -n "${{ inputs.reuse_engine_version }}" ]; then
|
||||||
|
CMD="$CMD --reuse-engine-version \"${{ inputs.reuse_engine_version }}\""
|
||||||
|
fi
|
||||||
|
|
||||||
eval "$CMD"
|
eval "$CMD"
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,10 @@ members = ["server/packages/*"]
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Sandbox Agent Contributors"]
|
authors = [ "Rivet Gaming, LLC <developer@rivet.gg>" ]
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
repository = "https://github.com/rivet-dev/sandbox-agent"
|
repository = "https://github.com/rivet-dev/sandbox-agent"
|
||||||
description = "Universal agent API for AI coding assistants"
|
description = "Universal API for automatic coding agents in sandboxes. Supprots Claude Code, Codex, OpenCode, and Amp."
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
# Internal crates
|
# Internal crates
|
||||||
|
|
|
||||||
51
docker/runtime/Dockerfile
Normal file
51
docker/runtime/Dockerfile
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
# syntax=docker/dockerfile:1.10.0
|
||||||
|
|
||||||
|
# Build stage - compile the binary
|
||||||
|
FROM rust:1.88.0 AS builder
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
musl-tools \
|
||||||
|
musl-dev \
|
||||||
|
pkg-config \
|
||||||
|
ca-certificates \
|
||||||
|
git && \
|
||||||
|
apt-get clean && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN rustup target add x86_64-unknown-linux-musl
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Build static binary
|
||||||
|
RUN --mount=type=cache,target=/usr/local/cargo/registry \
|
||||||
|
--mount=type=cache,target=/usr/local/cargo/git \
|
||||||
|
--mount=type=cache,target=/build/target \
|
||||||
|
SANDBOX_AGENT_SKIP_INSPECTOR=1 \
|
||||||
|
RUSTFLAGS="-C target-feature=+crt-static" \
|
||||||
|
cargo build -p sandbox-agent --release --target x86_64-unknown-linux-musl && \
|
||||||
|
cp target/x86_64-unknown-linux-musl/release/sandbox-agent /sandbox-agent
|
||||||
|
|
||||||
|
# Runtime stage - minimal image
|
||||||
|
FROM debian:bookworm-slim
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
ca-certificates \
|
||||||
|
curl \
|
||||||
|
git && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy the binary from builder
|
||||||
|
COPY --from=builder /sandbox-agent /usr/local/bin/sandbox-agent
|
||||||
|
RUN chmod +x /usr/local/bin/sandbox-agent
|
||||||
|
|
||||||
|
# Create non-root user
|
||||||
|
RUN useradd -m -s /bin/bash sandbox
|
||||||
|
USER sandbox
|
||||||
|
WORKDIR /home/sandbox
|
||||||
|
|
||||||
|
EXPOSE 2468
|
||||||
|
|
||||||
|
ENTRYPOINT ["sandbox-agent"]
|
||||||
|
CMD ["--host", "0.0.0.0", "--port", "2468"]
|
||||||
|
|
@ -21,6 +21,7 @@ Capabilities tell you which features are supported for the selected agent:
|
||||||
- `tool_calls` and `tool_results` indicate tool execution events.
|
- `tool_calls` and `tool_results` indicate tool execution events.
|
||||||
- `questions` and `permissions` indicate HITL flows.
|
- `questions` and `permissions` indicate HITL flows.
|
||||||
- `plan_mode` indicates that the agent supports plan-only execution.
|
- `plan_mode` indicates that the agent supports plan-only execution.
|
||||||
|
- `reasoning` and `status` indicate that the agent can emit reasoning/status content parts.
|
||||||
|
|
||||||
Use these to enable or disable UI affordances (tool panels, approval buttons, etc.).
|
Use these to enable or disable UI affordances (tool panels, approval buttons, etc.).
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,8 @@
|
||||||
"title": "sandbox-agent",
|
"title": "sandbox-agent",
|
||||||
"description": "",
|
"description": "",
|
||||||
"contact": {
|
"contact": {
|
||||||
"name": "Sandbox Agent Contributors"
|
"name": "Rivet Gaming, LLC",
|
||||||
|
"email": "developer@rivet.gg"
|
||||||
},
|
},
|
||||||
"license": {
|
"license": {
|
||||||
"name": "Apache-2.0"
|
"name": "Apache-2.0"
|
||||||
|
|
@ -662,6 +663,7 @@
|
||||||
"sessionLifecycle",
|
"sessionLifecycle",
|
||||||
"errorEvents",
|
"errorEvents",
|
||||||
"reasoning",
|
"reasoning",
|
||||||
|
"status",
|
||||||
"commandExecution",
|
"commandExecution",
|
||||||
"fileChanges",
|
"fileChanges",
|
||||||
"mcpTools",
|
"mcpTools",
|
||||||
|
|
@ -706,6 +708,9 @@
|
||||||
"type": "boolean",
|
"type": "boolean",
|
||||||
"description": "Whether this agent uses a shared long-running server process (vs per-turn subprocess)"
|
"description": "Whether this agent uses a shared long-running server process (vs per-turn subprocess)"
|
||||||
},
|
},
|
||||||
|
"status": {
|
||||||
|
"type": "boolean"
|
||||||
|
},
|
||||||
"streamingDeltas": {
|
"streamingDeltas": {
|
||||||
"type": "boolean"
|
"type": "boolean"
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import { Daytona } from "@daytonaio/sdk";
|
||||||
import { pathToFileURL } from "node:url";
|
import { pathToFileURL } from "node:url";
|
||||||
import {
|
import {
|
||||||
ensureUrl,
|
ensureUrl,
|
||||||
|
logInspectorUrl,
|
||||||
runPrompt,
|
runPrompt,
|
||||||
waitForHealth,
|
waitForHealth,
|
||||||
} from "../shared/sandbox-agent-client.ts";
|
} from "../shared/sandbox-agent-client.ts";
|
||||||
|
|
@ -39,6 +40,7 @@ export async function setupDaytonaSandboxAgent(): Promise<{
|
||||||
|
|
||||||
const baseUrl = ensureUrl(preview.url);
|
const baseUrl = ensureUrl(preview.url);
|
||||||
await waitForHealth({ baseUrl, token, extraHeaders });
|
await waitForHealth({ baseUrl, token, extraHeaders });
|
||||||
|
logInspectorUrl({ baseUrl, token });
|
||||||
|
|
||||||
const cleanup = async () => {
|
const cleanup = async () => {
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import Docker from "dockerode";
|
||||||
import { pathToFileURL } from "node:url";
|
import { pathToFileURL } from "node:url";
|
||||||
import {
|
import {
|
||||||
ensureUrl,
|
ensureUrl,
|
||||||
|
logInspectorUrl,
|
||||||
runPrompt,
|
runPrompt,
|
||||||
waitForHealth,
|
waitForHealth,
|
||||||
} from "../shared/sandbox-agent-client.ts";
|
} from "../shared/sandbox-agent-client.ts";
|
||||||
|
|
@ -83,6 +84,7 @@ export async function setupDockerSandboxAgent(): Promise<{
|
||||||
|
|
||||||
const baseUrl = ensureUrl(`http://127.0.0.1:${hostPort}`);
|
const baseUrl = ensureUrl(`http://127.0.0.1:${hostPort}`);
|
||||||
await waitForHealth({ baseUrl, token });
|
await waitForHealth({ baseUrl, token });
|
||||||
|
logInspectorUrl({ baseUrl, token });
|
||||||
|
|
||||||
const cleanup = async () => {
|
const cleanup = async () => {
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import { Sandbox } from "@e2b/code-interpreter";
|
||||||
import { pathToFileURL } from "node:url";
|
import { pathToFileURL } from "node:url";
|
||||||
import {
|
import {
|
||||||
ensureUrl,
|
ensureUrl,
|
||||||
|
logInspectorUrl,
|
||||||
runPrompt,
|
runPrompt,
|
||||||
waitForHealth,
|
waitForHealth,
|
||||||
} from "../shared/sandbox-agent-client.ts";
|
} from "../shared/sandbox-agent-client.ts";
|
||||||
|
|
@ -45,6 +46,7 @@ export async function setupE2BSandboxAgent(): Promise<{
|
||||||
|
|
||||||
const baseUrl = ensureUrl(sandbox.getHost(port));
|
const baseUrl = ensureUrl(sandbox.getHost(port));
|
||||||
await waitForHealth({ baseUrl, token });
|
await waitForHealth({ baseUrl, token });
|
||||||
|
logInspectorUrl({ baseUrl, token });
|
||||||
|
|
||||||
const cleanup = async () => {
|
const cleanup = async () => {
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,27 @@ export function ensureUrl(rawUrl: string): string {
|
||||||
return `https://${rawUrl}`;
|
return `https://${rawUrl}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const INSPECTOR_URL = "https://inspect.sandboxagent.dev";
|
||||||
|
|
||||||
|
export function buildInspectorUrl({
|
||||||
|
baseUrl,
|
||||||
|
token,
|
||||||
|
}: {
|
||||||
|
baseUrl: string;
|
||||||
|
token?: string;
|
||||||
|
}): string {
|
||||||
|
const normalized = normalizeBaseUrl(ensureUrl(baseUrl));
|
||||||
|
const params = new URLSearchParams({ url: normalized });
|
||||||
|
if (token) {
|
||||||
|
params.set("token", token);
|
||||||
|
}
|
||||||
|
return `${INSPECTOR_URL}?${params.toString()}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function logInspectorUrl({ baseUrl, token }: { baseUrl: string; token?: string }): void {
|
||||||
|
console.log(`Inspector: ${buildInspectorUrl({ baseUrl, token })}`);
|
||||||
|
}
|
||||||
|
|
||||||
type HeaderOptions = {
|
type HeaderOptions = {
|
||||||
token?: string;
|
token?: string;
|
||||||
extraHeaders?: Record<string, string>;
|
extraHeaders?: Record<string, string>;
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import { Sandbox } from "@vercel/sandbox";
|
||||||
import { pathToFileURL } from "node:url";
|
import { pathToFileURL } from "node:url";
|
||||||
import {
|
import {
|
||||||
ensureUrl,
|
ensureUrl,
|
||||||
|
logInspectorUrl,
|
||||||
runPrompt,
|
runPrompt,
|
||||||
waitForHealth,
|
waitForHealth,
|
||||||
} from "../shared/sandbox-agent-client.ts";
|
} from "../shared/sandbox-agent-client.ts";
|
||||||
|
|
@ -61,6 +62,7 @@ export async function setupVercelSandboxAgent(): Promise<{
|
||||||
|
|
||||||
const baseUrl = ensureUrl(sandbox.domain(port));
|
const baseUrl = ensureUrl(sandbox.domain(port));
|
||||||
await waitForHealth({ baseUrl, token });
|
await waitForHealth({ baseUrl, token });
|
||||||
|
logInspectorUrl({ baseUrl, token });
|
||||||
|
|
||||||
const cleanup = async () => {
|
const cleanup = async () => {
|
||||||
try {
|
try {
|
||||||
|
|
|
||||||
|
|
@ -10,18 +10,23 @@ Place all new tests under `server/packages/**/tests/` (or a package-specific `te
|
||||||
- Agent flow coverage in `agent-flows/`
|
- Agent flow coverage in `agent-flows/`
|
||||||
- Agent management coverage in `agent-management/`
|
- Agent management coverage in `agent-management/`
|
||||||
- Shared server manager coverage in `server-manager/`
|
- Shared server manager coverage in `server-manager/`
|
||||||
- HTTP/SSE and snapshot coverage in `http/` (snapshots in `http/snapshots/`)
|
- HTTP endpoint snapshots in `http/` (snapshots in `http/snapshots/`)
|
||||||
|
- Session capability snapshots in `sessions/` (one file per capability, e.g. `session_lifecycle.rs`, `permissions.rs`, `questions.rs`, `reasoning.rs`, `status.rs`; snapshots in `sessions/snapshots/`)
|
||||||
- UI coverage in `ui/`
|
- UI coverage in `ui/`
|
||||||
- Shared helpers in `common/`
|
- Shared helpers in `common/`
|
||||||
- Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/`
|
- Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/`
|
||||||
|
|
||||||
## Snapshot tests
|
## Snapshot tests
|
||||||
|
|
||||||
The HTTP/SSE snapshot suite entrypoint lives in:
|
HTTP endpoint snapshot entrypoint:
|
||||||
- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` (includes `tests/http/http_sse_snapshots.rs`)
|
- `server/packages/sandbox-agent/tests/http_endpoints.rs`
|
||||||
|
|
||||||
|
Session snapshot entrypoint:
|
||||||
|
- `server/packages/sandbox-agent/tests/sessions.rs`
|
||||||
|
|
||||||
Snapshots are written to:
|
Snapshots are written to:
|
||||||
- `server/packages/sandbox-agent/tests/http/snapshots/`
|
- `server/packages/sandbox-agent/tests/http/snapshots/` (HTTP endpoint snapshots)
|
||||||
|
- `server/packages/sandbox-agent/tests/sessions/snapshots/` (session/capability snapshots)
|
||||||
|
|
||||||
## Agent selection
|
## Agent selection
|
||||||
|
|
||||||
|
|
@ -71,6 +76,7 @@ To keep snapshots deterministic:
|
||||||
- IDs, timestamps, native IDs
|
- IDs, timestamps, native IDs
|
||||||
- text content, tool inputs/outputs, provider-specific metadata
|
- text content, tool inputs/outputs, provider-specific metadata
|
||||||
- `source` and `synthetic` flags (these are implementation details)
|
- `source` and `synthetic` flags (these are implementation details)
|
||||||
|
- Scrub `reasoning` and `status` content from session-baseline snapshots to keep the core event skeleton consistent across agents; validate those content types separately in their capability-specific tests.
|
||||||
- The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly.
|
- The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly.
|
||||||
- Event streams are truncated after the first assistant or error event.
|
- Event streams are truncated after the first assistant or error event.
|
||||||
- Permission flow snapshots are truncated after the permission request (or first assistant) event.
|
- Permission flow snapshots are truncated after the permission request (or first assistant) event.
|
||||||
|
|
@ -81,14 +87,19 @@ To keep snapshots deterministic:
|
||||||
|
|
||||||
## Typical commands
|
## Typical commands
|
||||||
|
|
||||||
Run only Claude snapshots:
|
Run only Claude session snapshots:
|
||||||
```
|
```
|
||||||
SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test http_sse_snapshots
|
SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test sessions
|
||||||
```
|
```
|
||||||
|
|
||||||
Run all detected agents:
|
Run all detected session snapshots:
|
||||||
```
|
```
|
||||||
cargo test -p sandbox-agent --test http_sse_snapshots
|
cargo test -p sandbox-agent --test sessions
|
||||||
|
```
|
||||||
|
|
||||||
|
Run HTTP endpoint snapshots:
|
||||||
|
```
|
||||||
|
cargo test -p sandbox-agent --test http_endpoints
|
||||||
```
|
```
|
||||||
|
|
||||||
## Universal Schema
|
## Universal Schema
|
||||||
|
|
|
||||||
|
|
@ -2913,6 +2913,7 @@ pub struct AgentCapabilities {
|
||||||
pub session_lifecycle: bool,
|
pub session_lifecycle: bool,
|
||||||
pub error_events: bool,
|
pub error_events: bool,
|
||||||
pub reasoning: bool,
|
pub reasoning: bool,
|
||||||
|
pub status: bool,
|
||||||
pub command_execution: bool,
|
pub command_execution: bool,
|
||||||
pub file_changes: bool,
|
pub file_changes: bool,
|
||||||
pub mcp_tools: bool,
|
pub mcp_tools: bool,
|
||||||
|
|
@ -3512,6 +3513,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
|
||||||
session_lifecycle: false,
|
session_lifecycle: false,
|
||||||
error_events: false,
|
error_events: false,
|
||||||
reasoning: false,
|
reasoning: false,
|
||||||
|
status: false,
|
||||||
command_execution: false,
|
command_execution: false,
|
||||||
file_changes: false,
|
file_changes: false,
|
||||||
mcp_tools: false,
|
mcp_tools: false,
|
||||||
|
|
@ -3530,6 +3532,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
|
||||||
session_lifecycle: true,
|
session_lifecycle: true,
|
||||||
error_events: true,
|
error_events: true,
|
||||||
reasoning: true,
|
reasoning: true,
|
||||||
|
status: true,
|
||||||
command_execution: true,
|
command_execution: true,
|
||||||
file_changes: true,
|
file_changes: true,
|
||||||
mcp_tools: true,
|
mcp_tools: true,
|
||||||
|
|
@ -3548,6 +3551,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
|
||||||
session_lifecycle: true,
|
session_lifecycle: true,
|
||||||
error_events: true,
|
error_events: true,
|
||||||
reasoning: false,
|
reasoning: false,
|
||||||
|
status: true,
|
||||||
command_execution: false,
|
command_execution: false,
|
||||||
file_changes: false,
|
file_changes: false,
|
||||||
mcp_tools: false,
|
mcp_tools: false,
|
||||||
|
|
@ -3566,6 +3570,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
|
||||||
session_lifecycle: false,
|
session_lifecycle: false,
|
||||||
error_events: true,
|
error_events: true,
|
||||||
reasoning: false,
|
reasoning: false,
|
||||||
|
status: false,
|
||||||
command_execution: false,
|
command_execution: false,
|
||||||
file_changes: false,
|
file_changes: false,
|
||||||
mcp_tools: false,
|
mcp_tools: false,
|
||||||
|
|
@ -3584,6 +3589,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
|
||||||
session_lifecycle: true,
|
session_lifecycle: true,
|
||||||
error_events: true,
|
error_events: true,
|
||||||
reasoning: true,
|
reasoning: true,
|
||||||
|
status: true,
|
||||||
command_execution: true,
|
command_execution: true,
|
||||||
file_changes: true,
|
file_changes: true,
|
||||||
mcp_tools: true,
|
mcp_tools: true,
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use axum::body::{Body, Bytes};
|
use axum::body::{Body, Bytes};
|
||||||
|
|
@ -208,49 +208,65 @@ async fn send_message(app: &Router, session_id: &str) {
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
|
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn poll_events_until(
|
async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec<Value>, u64) {
|
||||||
app: &Router,
|
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||||
session_id: &str,
|
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||||
timeout: Duration,
|
assert_eq!(status, StatusCode::OK, "poll events");
|
||||||
) -> Vec<Value> {
|
let new_events = payload
|
||||||
let start = Instant::now();
|
.get("events")
|
||||||
let mut offset = 0u64;
|
.and_then(Value::as_array)
|
||||||
let mut events = Vec::new();
|
.cloned()
|
||||||
while start.elapsed() < timeout {
|
.unwrap_or_default();
|
||||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
let new_offset = new_events
|
||||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
.last()
|
||||||
assert_eq!(status, StatusCode::OK, "poll events");
|
.and_then(|event| event.get("sequence"))
|
||||||
let new_events = payload
|
.and_then(Value::as_u64)
|
||||||
.get("events")
|
.unwrap_or(offset);
|
||||||
.and_then(Value::as_array)
|
(new_events, new_offset)
|
||||||
.cloned()
|
|
||||||
.unwrap_or_default();
|
|
||||||
if !new_events.is_empty() {
|
|
||||||
if let Some(last) = new_events
|
|
||||||
.last()
|
|
||||||
.and_then(|event| event.get("sequence"))
|
|
||||||
.and_then(Value::as_u64)
|
|
||||||
{
|
|
||||||
offset = last;
|
|
||||||
}
|
|
||||||
events.extend(new_events);
|
|
||||||
if should_stop(&events) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
|
||||||
}
|
|
||||||
events
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn read_sse_events(
|
async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 {
|
||||||
|
let start = Instant::now();
|
||||||
|
let mut offset = 0u64;
|
||||||
|
loop {
|
||||||
|
if start.elapsed() >= timeout {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
|
||||||
|
if new_events.is_empty() {
|
||||||
|
if offset == 0 {
|
||||||
|
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
offset = new_offset;
|
||||||
|
}
|
||||||
|
offset
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn poll_events_until_from(
|
||||||
app: &Router,
|
app: &Router,
|
||||||
session_id: &str,
|
session_id: &str,
|
||||||
|
offset: u64,
|
||||||
|
timeout: Duration,
|
||||||
|
) -> Vec<Value> {
|
||||||
|
poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
||||||
|
poll_events_until_from(app, session_id, 0, timeout).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read_sse_events_from(
|
||||||
|
app: &Router,
|
||||||
|
session_id: &str,
|
||||||
|
offset: u64,
|
||||||
timeout: Duration,
|
timeout: Duration,
|
||||||
) -> Vec<Value> {
|
) -> Vec<Value> {
|
||||||
let request = Request::builder()
|
let request = Request::builder()
|
||||||
.method(Method::GET)
|
.method(Method::GET)
|
||||||
.uri(format!("/v1/sessions/{session_id}/events/sse?offset=0"))
|
.uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}"))
|
||||||
.body(Body::empty())
|
.body(Body::empty())
|
||||||
.expect("sse request");
|
.expect("sse request");
|
||||||
let response = app
|
let response = app
|
||||||
|
|
@ -291,6 +307,10 @@ async fn read_sse_events(
|
||||||
events
|
events
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
|
||||||
|
read_sse_events_from(app, session_id, 0, timeout).await
|
||||||
|
}
|
||||||
|
|
||||||
async fn read_turn_stream_events(
|
async fn read_turn_stream_events(
|
||||||
app: &Router,
|
app: &Router,
|
||||||
session_id: &str,
|
session_id: &str,
|
||||||
|
|
@ -431,7 +451,8 @@ fn normalize_events(events: &[Value]) -> Value {
|
||||||
!events.iter().any(is_unparsed_event),
|
!events.iter().any(is_unparsed_event),
|
||||||
"agent.unparsed event encountered"
|
"agent.unparsed event encountered"
|
||||||
);
|
);
|
||||||
let normalized = events
|
let scrubbed = scrub_events(events);
|
||||||
|
let normalized = scrubbed
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(idx, event)| normalize_event(event, idx + 1))
|
.map(|(idx, event)| normalize_event(event, idx + 1))
|
||||||
|
|
@ -439,6 +460,71 @@ fn normalize_events(events: &[Value]) -> Value {
|
||||||
Value::Array(normalized)
|
Value::Array(normalized)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn scrub_events(events: &[Value]) -> Vec<Value> {
|
||||||
|
let mut scrub_ids = HashSet::new();
|
||||||
|
let mut output = Vec::new();
|
||||||
|
|
||||||
|
for event in events {
|
||||||
|
let event_type = event.get("type").and_then(Value::as_str).unwrap_or("");
|
||||||
|
match event_type {
|
||||||
|
"item.started" | "item.completed" => {
|
||||||
|
if let Some(item) = event.get("data").and_then(|data| data.get("item")) {
|
||||||
|
if should_scrub_item(item) {
|
||||||
|
record_item_ids(item, &mut scrub_ids);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
output.push(event.clone());
|
||||||
|
}
|
||||||
|
"item.delta" => {
|
||||||
|
let item_id = event
|
||||||
|
.get("data")
|
||||||
|
.and_then(|data| data.get("item_id"))
|
||||||
|
.and_then(Value::as_str);
|
||||||
|
let native_item_id = event
|
||||||
|
.get("data")
|
||||||
|
.and_then(|data| data.get("native_item_id"))
|
||||||
|
.and_then(Value::as_str);
|
||||||
|
if item_id.is_some_and(|id| scrub_ids.contains(id))
|
||||||
|
|| native_item_id.is_some_and(|id| scrub_ids.contains(id))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
output.push(event.clone());
|
||||||
|
}
|
||||||
|
_ => output.push(event.clone()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output
|
||||||
|
}
|
||||||
|
|
||||||
|
fn should_scrub_item(item: &Value) -> bool {
|
||||||
|
if item
|
||||||
|
.get("kind")
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.is_some_and(|kind| kind == "status")
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let types = item_content_types(item);
|
||||||
|
let filtered = types
|
||||||
|
.iter()
|
||||||
|
.filter(|value| value.as_str() != "reasoning" && value.as_str() != "status")
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
types.iter().any(|value| value == "reasoning") && filtered.is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn record_item_ids(item: &Value, ids: &mut HashSet<String>) {
|
||||||
|
if let Some(id) = item.get("item_id").and_then(Value::as_str) {
|
||||||
|
ids.insert(id.to_string());
|
||||||
|
}
|
||||||
|
if let Some(id) = item.get("native_item_id").and_then(Value::as_str) {
|
||||||
|
ids.insert(id.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn truncate_after_first_stop(events: &[Value]) -> Vec<Value> {
|
fn truncate_after_first_stop(events: &[Value]) -> Vec<Value> {
|
||||||
if let Some(idx) = events
|
if let Some(idx) = events
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -455,12 +541,6 @@ fn normalize_event(event: &Value, seq: usize) -> Value {
|
||||||
if let Some(event_type) = event.get("type").and_then(Value::as_str) {
|
if let Some(event_type) = event.get("type").and_then(Value::as_str) {
|
||||||
map.insert("type".to_string(), Value::String(event_type.to_string()));
|
map.insert("type".to_string(), Value::String(event_type.to_string()));
|
||||||
}
|
}
|
||||||
if let Some(source) = event.get("source").and_then(Value::as_str) {
|
|
||||||
map.insert("source".to_string(), Value::String(source.to_string()));
|
|
||||||
}
|
|
||||||
if let Some(synthetic) = event.get("synthetic").and_then(Value::as_bool) {
|
|
||||||
map.insert("synthetic".to_string(), Value::Bool(synthetic));
|
|
||||||
}
|
|
||||||
let data = event.get("data").unwrap_or(&Value::Null);
|
let data = event.get("data").unwrap_or(&Value::Null);
|
||||||
match event.get("type").and_then(Value::as_str).unwrap_or("") {
|
match event.get("type").and_then(Value::as_str).unwrap_or("") {
|
||||||
"session.started" => {
|
"session.started" => {
|
||||||
|
|
@ -523,6 +603,7 @@ fn normalize_item(item: &Value) -> Value {
|
||||||
let types = content
|
let types = content
|
||||||
.iter()
|
.iter()
|
||||||
.filter_map(|part| part.get("type").and_then(Value::as_str))
|
.filter_map(|part| part.get("type").and_then(Value::as_str))
|
||||||
|
.filter(|value| *value != "reasoning" && *value != "status")
|
||||||
.map(|value| Value::String(value.to_string()))
|
.map(|value| Value::String(value.to_string()))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
map.insert("content_types".to_string(), Value::Array(types));
|
map.insert("content_types".to_string(), Value::Array(types));
|
||||||
|
|
@ -530,6 +611,42 @@ fn normalize_item(item: &Value) -> Value {
|
||||||
Value::Object(map)
|
Value::Object(map)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn item_content_types(item: &Value) -> Vec<String> {
|
||||||
|
item.get("content")
|
||||||
|
.and_then(Value::as_array)
|
||||||
|
.map(|content| {
|
||||||
|
content
|
||||||
|
.iter()
|
||||||
|
.filter_map(|part| part.get("type").and_then(Value::as_str))
|
||||||
|
.map(|value| value.to_string())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
|
.unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn event_content_types(event: &Value) -> Vec<String> {
|
||||||
|
event
|
||||||
|
.get("data")
|
||||||
|
.and_then(|data| data.get("item"))
|
||||||
|
.map(item_content_types)
|
||||||
|
.unwrap_or_default()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn event_is_status_item(event: &Value) -> bool {
|
||||||
|
event
|
||||||
|
.get("data")
|
||||||
|
.and_then(|data| data.get("item"))
|
||||||
|
.and_then(|item| item.get("kind"))
|
||||||
|
.and_then(Value::as_str)
|
||||||
|
.is_some_and(|kind| kind == "status")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn events_have_content_type(events: &[Value], content_type: &str) -> bool {
|
||||||
|
events
|
||||||
|
.iter()
|
||||||
|
.any(|event| event_content_types(event).iter().any(|t| t == content_type))
|
||||||
|
}
|
||||||
|
|
||||||
fn normalize_session_end(data: &Value) -> Value {
|
fn normalize_session_end(data: &Value) -> Value {
|
||||||
let mut map = Map::new();
|
let mut map = Map::new();
|
||||||
if let Some(reason) = data.get("reason").and_then(Value::as_str) {
|
if let Some(reason) = data.get("reason").and_then(Value::as_str) {
|
||||||
|
|
@ -717,6 +834,33 @@ fn snapshot_name(prefix: &str, agent: Option<AgentId>) -> String {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async fn poll_events_until_match_from<F>(
|
||||||
|
app: &Router,
|
||||||
|
session_id: &str,
|
||||||
|
offset: u64,
|
||||||
|
timeout: Duration,
|
||||||
|
stop: F,
|
||||||
|
) -> Vec<Value>
|
||||||
|
where
|
||||||
|
F: Fn(&[Value]) -> bool,
|
||||||
|
{
|
||||||
|
let start = Instant::now();
|
||||||
|
let mut offset = offset;
|
||||||
|
let mut events = Vec::new();
|
||||||
|
while start.elapsed() < timeout {
|
||||||
|
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
|
||||||
|
if !new_events.is_empty() {
|
||||||
|
offset = new_offset;
|
||||||
|
events.extend(new_events);
|
||||||
|
if stop(&events) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||||
|
}
|
||||||
|
events
|
||||||
|
}
|
||||||
|
|
||||||
async fn poll_events_until_match<F>(
|
async fn poll_events_until_match<F>(
|
||||||
app: &Router,
|
app: &Router,
|
||||||
session_id: &str,
|
session_id: &str,
|
||||||
|
|
@ -726,34 +870,7 @@ async fn poll_events_until_match<F>(
|
||||||
where
|
where
|
||||||
F: Fn(&[Value]) -> bool,
|
F: Fn(&[Value]) -> bool,
|
||||||
{
|
{
|
||||||
let start = Instant::now();
|
poll_events_until_match_from(app, session_id, 0, timeout, stop).await
|
||||||
let mut offset = 0u64;
|
|
||||||
let mut events = Vec::new();
|
|
||||||
while start.elapsed() < timeout {
|
|
||||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
|
||||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "poll events");
|
|
||||||
let new_events = payload
|
|
||||||
.get("events")
|
|
||||||
.and_then(Value::as_array)
|
|
||||||
.cloned()
|
|
||||||
.unwrap_or_default();
|
|
||||||
if !new_events.is_empty() {
|
|
||||||
if let Some(last) = new_events
|
|
||||||
.last()
|
|
||||||
.and_then(|event| event.get("sequence"))
|
|
||||||
.and_then(Value::as_u64)
|
|
||||||
{
|
|
||||||
offset = last;
|
|
||||||
}
|
|
||||||
events.extend(new_events);
|
|
||||||
if stop(&events) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
|
||||||
}
|
|
||||||
events
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_permission_id(events: &[Value]) -> Option<String> {
|
fn find_permission_id(events: &[Value]) -> Option<String> {
|
||||||
|
|
@ -800,9 +917,10 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
|
||||||
|
|
||||||
let session_id = format!("session-{}", config.agent.as_str());
|
let session_id = format!("session-{}", config.agent.as_str());
|
||||||
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
||||||
|
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
|
||||||
send_message(app, &session_id).await;
|
send_message(app, &session_id).await;
|
||||||
|
|
||||||
let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await;
|
let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await;
|
||||||
let events = truncate_after_first_stop(&events);
|
let events = truncate_after_first_stop(&events);
|
||||||
assert!(
|
assert!(
|
||||||
!events.is_empty(),
|
!events.is_empty(),
|
||||||
|
|
@ -816,7 +934,8 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
|
||||||
);
|
);
|
||||||
let normalized = normalize_events(&events);
|
let normalized = normalize_events(&events);
|
||||||
insta::with_settings!({
|
insta::with_settings!({
|
||||||
snapshot_suffix => snapshot_name("http_events", Some(config.agent)),
|
snapshot_suffix => snapshot_name("http_events", Some(AgentId::Mock)),
|
||||||
|
snapshot_path => "../sessions/snapshots",
|
||||||
}, {
|
}, {
|
||||||
insta::assert_yaml_snapshot!(normalized);
|
insta::assert_yaml_snapshot!(normalized);
|
||||||
});
|
});
|
||||||
|
|
@ -828,12 +947,14 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
|
||||||
|
|
||||||
let session_id = format!("sse-{}", config.agent.as_str());
|
let session_id = format!("sse-{}", config.agent.as_str());
|
||||||
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
||||||
|
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
|
||||||
|
|
||||||
let sse_task = {
|
let sse_task = {
|
||||||
let app = app.clone();
|
let app = app.clone();
|
||||||
let session_id = session_id.clone();
|
let session_id = session_id.clone();
|
||||||
|
let offset = offset;
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
read_sse_events(&app, &session_id, Duration::from_secs(120)).await
|
read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -853,7 +974,8 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
|
||||||
);
|
);
|
||||||
let normalized = normalize_events(&events);
|
let normalized = normalize_events(&events);
|
||||||
insta::with_settings!({
|
insta::with_settings!({
|
||||||
snapshot_suffix => snapshot_name("sse_events", Some(config.agent)),
|
snapshot_suffix => snapshot_name("sse_events", Some(AgentId::Mock)),
|
||||||
|
snapshot_path => "../sessions/snapshots",
|
||||||
}, {
|
}, {
|
||||||
insta::assert_yaml_snapshot!(normalized);
|
insta::assert_yaml_snapshot!(normalized);
|
||||||
});
|
});
|
||||||
|
|
@ -879,535 +1001,3 @@ async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) {
|
||||||
config.agent
|
config.agent
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
||||||
async fn auth_snapshots() {
|
|
||||||
let token = "test-token";
|
|
||||||
let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
|
|
||||||
|
|
||||||
let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "health should be public");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("auth_health_public", None),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(json!({
|
|
||||||
"status": status.as_u16(),
|
|
||||||
"payload": normalize_health(&payload),
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
|
|
||||||
let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
|
|
||||||
assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("auth_missing_token", None),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(json!({
|
|
||||||
"status": status.as_u16(),
|
|
||||||
"payload": payload,
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
|
|
||||||
let request = Request::builder()
|
|
||||||
.method(Method::GET)
|
|
||||||
.uri("/v1/agents")
|
|
||||||
.header(header::AUTHORIZATION, "Bearer wrong-token")
|
|
||||||
.body(Body::empty())
|
|
||||||
.expect("auth invalid request");
|
|
||||||
let (status, _headers, payload) = send_json_request(&app.app, request).await;
|
|
||||||
assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("auth_invalid_token", None),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(json!({
|
|
||||||
"status": status.as_u16(),
|
|
||||||
"payload": payload,
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
|
|
||||||
let request = Request::builder()
|
|
||||||
.method(Method::GET)
|
|
||||||
.uri("/v1/agents")
|
|
||||||
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
|
||||||
.body(Body::empty())
|
|
||||||
.expect("auth valid request");
|
|
||||||
let (status, _headers, payload) = send_json_request(&app.app, request).await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "valid token should allow request");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("auth_valid_token", None),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(json!({
|
|
||||||
"status": status.as_u16(),
|
|
||||||
"payload": normalize_agent_list(&payload),
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
||||||
async fn cors_snapshots() {
|
|
||||||
let cors = CorsLayer::new()
|
|
||||||
.allow_origin(vec![HeaderValue::from_static("http://example.com")])
|
|
||||||
.allow_methods([Method::GET, Method::POST])
|
|
||||||
.allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION])
|
|
||||||
.allow_credentials(true);
|
|
||||||
let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
|
|
||||||
|
|
||||||
let preflight = Request::builder()
|
|
||||||
.method(Method::OPTIONS)
|
|
||||||
.uri("/v1/health")
|
|
||||||
.header(header::ORIGIN, "http://example.com")
|
|
||||||
.header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
|
|
||||||
.header(
|
|
||||||
header::ACCESS_CONTROL_REQUEST_HEADERS,
|
|
||||||
"authorization,content-type",
|
|
||||||
)
|
|
||||||
.body(Body::empty())
|
|
||||||
.expect("cors preflight request");
|
|
||||||
let (status, headers, _payload) = send_request(&app.app, preflight).await;
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("cors_preflight", None),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
|
|
||||||
});
|
|
||||||
|
|
||||||
let actual = Request::builder()
|
|
||||||
.method(Method::GET)
|
|
||||||
.uri("/v1/health")
|
|
||||||
.header(header::ORIGIN, "http://example.com")
|
|
||||||
.body(Body::empty())
|
|
||||||
.expect("cors actual request");
|
|
||||||
let (status, headers, payload) = send_json_request(&app.app, actual).await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("cors_actual", None),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(json!({
|
|
||||||
"cors": snapshot_cors(status, &headers),
|
|
||||||
"payload": normalize_health(&payload),
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
||||||
async fn api_endpoints_snapshots() {
|
|
||||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
|
||||||
let app = TestApp::new();
|
|
||||||
|
|
||||||
let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "health status");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("health", None),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(normalize_health(&health));
|
|
||||||
});
|
|
||||||
|
|
||||||
// List agents (just verify the API returns correct agent IDs, not install state)
|
|
||||||
let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "agents list");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("agents_list", None),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
|
|
||||||
});
|
|
||||||
|
|
||||||
// Install agents (ensure they're available for subsequent tests)
|
|
||||||
for config in &configs {
|
|
||||||
let _guard = apply_credentials(&config.credentials);
|
|
||||||
let status = send_status(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!("/v1/agents/{}/install", config.agent.as_str()),
|
|
||||||
Some(json!({})),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(snapshot_status(status));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut session_ids = Vec::new();
|
|
||||||
for config in &configs {
|
|
||||||
let _guard = apply_credentials(&config.credentials);
|
|
||||||
let (status, modes) = send_json(
|
|
||||||
&app.app,
|
|
||||||
Method::GET,
|
|
||||||
&format!("/v1/agents/{}/modes", config.agent.as_str()),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "agent modes");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
|
|
||||||
});
|
|
||||||
|
|
||||||
let session_id = format!("snapshot-{}", config.agent.as_str());
|
|
||||||
let permission_mode = test_permission_mode(config.agent);
|
|
||||||
let (status, created) = send_json(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!("/v1/sessions/{session_id}"),
|
|
||||||
Some(json!({
|
|
||||||
"agent": config.agent.as_str(),
|
|
||||||
"permissionMode": permission_mode
|
|
||||||
})),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "create session");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("create_session", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(normalize_create_session(&created));
|
|
||||||
});
|
|
||||||
session_ids.push((config.agent, session_id));
|
|
||||||
}
|
|
||||||
|
|
||||||
let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
|
|
||||||
assert_eq!(status, StatusCode::OK, "list sessions");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("sessions_list", None),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(normalize_sessions(&sessions));
|
|
||||||
});
|
|
||||||
|
|
||||||
for (agent, session_id) in &session_ids {
|
|
||||||
let status = send_status(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!("/v1/sessions/{session_id}/messages"),
|
|
||||||
Some(json!({ "message": PROMPT })),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("send_message", Some(*agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(snapshot_status(status));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
||||||
async fn approval_flow_snapshots() {
|
|
||||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
|
||||||
let app = TestApp::new();
|
|
||||||
let capabilities = fetch_capabilities(&app.app).await;
|
|
||||||
|
|
||||||
for config in &configs {
|
|
||||||
// OpenCode doesn't support "plan" permission mode required for approval flows
|
|
||||||
if config.agent == AgentId::Opencode {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let caps = capabilities
|
|
||||||
.get(config.agent.as_str())
|
|
||||||
.expect("capabilities missing");
|
|
||||||
|
|
||||||
let _guard = apply_credentials(&config.credentials);
|
|
||||||
install_agent(&app.app, config.agent).await;
|
|
||||||
|
|
||||||
if caps.plan_mode && caps.permissions {
|
|
||||||
let permission_session = format!("perm-{}", config.agent.as_str());
|
|
||||||
create_session(&app.app, config.agent, &permission_session, "plan").await;
|
|
||||||
let status = send_status(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!("/v1/sessions/{permission_session}/messages"),
|
|
||||||
Some(json!({ "message": PERMISSION_PROMPT })),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
|
|
||||||
|
|
||||||
let permission_events = poll_events_until_match(
|
|
||||||
&app.app,
|
|
||||||
&permission_session,
|
|
||||||
Duration::from_secs(120),
|
|
||||||
|events| find_permission_id(events).is_some() || should_stop(events),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
let permission_events = truncate_permission_events(&permission_events);
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("permission_events", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(normalize_events(&permission_events));
|
|
||||||
});
|
|
||||||
|
|
||||||
if let Some(permission_id) = find_permission_id(&permission_events) {
|
|
||||||
let status = send_status(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!(
|
|
||||||
"/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
|
|
||||||
),
|
|
||||||
Some(json!({ "reply": "once" })),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(snapshot_status(status));
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let (status, payload) = send_json(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!(
|
|
||||||
"/v1/sessions/{permission_session}/permissions/missing-permission/reply"
|
|
||||||
),
|
|
||||||
Some(json!({ "reply": "once" })),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert!(!status.is_success(), "missing permission id should error");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(json!({
|
|
||||||
"status": status.as_u16(),
|
|
||||||
"payload": payload,
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if caps.questions {
|
|
||||||
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
|
|
||||||
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
|
|
||||||
let status = send_status(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!("/v1/sessions/{question_reply_session}/messages"),
|
|
||||||
Some(json!({ "message": QUESTION_PROMPT })),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
|
|
||||||
|
|
||||||
let question_events = poll_events_until_match(
|
|
||||||
&app.app,
|
|
||||||
&question_reply_session,
|
|
||||||
Duration::from_secs(120),
|
|
||||||
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
let question_events = truncate_question_events(&question_events);
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(normalize_events(&question_events));
|
|
||||||
});
|
|
||||||
|
|
||||||
if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
|
|
||||||
let status = send_status(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!(
|
|
||||||
"/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
|
|
||||||
),
|
|
||||||
Some(json!({ "answers": answers })),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("question_reply", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(snapshot_status(status));
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let (status, payload) = send_json(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!(
|
|
||||||
"/v1/sessions/{question_reply_session}/questions/missing-question/reply"
|
|
||||||
),
|
|
||||||
Some(json!({ "answers": [] })),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert!(!status.is_success(), "missing question id should error");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(json!({
|
|
||||||
"status": status.as_u16(),
|
|
||||||
"payload": payload,
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
|
|
||||||
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
|
|
||||||
let status = send_status(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!("/v1/sessions/{question_reject_session}/messages"),
|
|
||||||
Some(json!({ "message": QUESTION_PROMPT })),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
|
|
||||||
|
|
||||||
let reject_events = poll_events_until_match(
|
|
||||||
&app.app,
|
|
||||||
&question_reject_session,
|
|
||||||
Duration::from_secs(120),
|
|
||||||
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
let reject_events = truncate_question_events(&reject_events);
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(normalize_events(&reject_events));
|
|
||||||
});
|
|
||||||
|
|
||||||
if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
|
|
||||||
let status = send_status(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!(
|
|
||||||
"/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("question_reject", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(snapshot_status(status));
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
let (status, payload) = send_json(
|
|
||||||
&app.app,
|
|
||||||
Method::POST,
|
|
||||||
&format!(
|
|
||||||
"/v1/sessions/{question_reject_session}/questions/missing-question/reject"
|
|
||||||
),
|
|
||||||
None,
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
assert!(!status.is_success(), "missing question id reject should error");
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(json!({
|
|
||||||
"status": status.as_u16(),
|
|
||||||
"payload": payload,
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
||||||
async fn http_events_snapshots() {
|
|
||||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
|
||||||
let app = TestApp::new();
|
|
||||||
for config in &configs {
|
|
||||||
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
|
|
||||||
// See: https://github.com/opencode-ai/opencode/issues/XXX
|
|
||||||
if config.agent == AgentId::Opencode {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
run_http_events_snapshot(&app.app, config).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
|
|
||||||
let _guard = apply_credentials(&config.credentials);
|
|
||||||
install_agent(app, config.agent).await;
|
|
||||||
|
|
||||||
let session_a = format!("concurrent-a-{}", config.agent.as_str());
|
|
||||||
let session_b = format!("concurrent-b-{}", config.agent.as_str());
|
|
||||||
let perm_mode = test_permission_mode(config.agent);
|
|
||||||
create_session(app, config.agent, &session_a, perm_mode).await;
|
|
||||||
create_session(app, config.agent, &session_b, perm_mode).await;
|
|
||||||
|
|
||||||
let app_a = app.clone();
|
|
||||||
let app_b = app.clone();
|
|
||||||
let send_a = send_message(&app_a, &session_a);
|
|
||||||
let send_b = send_message(&app_b, &session_b);
|
|
||||||
tokio::join!(send_a, send_b);
|
|
||||||
|
|
||||||
let app_a = app.clone();
|
|
||||||
let app_b = app.clone();
|
|
||||||
let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120));
|
|
||||||
let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120));
|
|
||||||
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
|
|
||||||
let events_a = truncate_after_first_stop(&events_a);
|
|
||||||
let events_b = truncate_after_first_stop(&events_b);
|
|
||||||
|
|
||||||
assert!(
|
|
||||||
!events_a.is_empty(),
|
|
||||||
"no events collected for concurrent session a {}",
|
|
||||||
config.agent
|
|
||||||
);
|
|
||||||
assert!(
|
|
||||||
!events_b.is_empty(),
|
|
||||||
"no events collected for concurrent session b {}",
|
|
||||||
config.agent
|
|
||||||
);
|
|
||||||
assert!(
|
|
||||||
should_stop(&events_a),
|
|
||||||
"timed out waiting for assistant/error event for concurrent session a {}",
|
|
||||||
config.agent
|
|
||||||
);
|
|
||||||
assert!(
|
|
||||||
should_stop(&events_b),
|
|
||||||
"timed out waiting for assistant/error event for concurrent session b {}",
|
|
||||||
config.agent
|
|
||||||
);
|
|
||||||
|
|
||||||
let snapshot = json!({
|
|
||||||
"session_a": normalize_events(&events_a),
|
|
||||||
"session_b": normalize_events(&events_b),
|
|
||||||
});
|
|
||||||
insta::with_settings!({
|
|
||||||
snapshot_suffix => snapshot_name("concurrency_events", Some(config.agent)),
|
|
||||||
}, {
|
|
||||||
insta::assert_yaml_snapshot!(snapshot);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
||||||
async fn sse_events_snapshots() {
|
|
||||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
|
||||||
let app = TestApp::new();
|
|
||||||
for config in &configs {
|
|
||||||
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
|
|
||||||
// See: https://github.com/opencode-ai/opencode/issues/XXX
|
|
||||||
if config.agent == AgentId::Opencode {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
run_sse_events_snapshot(&app.app, config).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
||||||
async fn turn_stream_route() {
|
|
||||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
|
||||||
let app = TestApp::new();
|
|
||||||
for config in &configs {
|
|
||||||
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
|
|
||||||
// See: https://github.com/opencode-ai/opencode/issues/XXX
|
|
||||||
if config.agent == AgentId::Opencode {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
run_turn_stream_check(&app.app, config).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
||||||
async fn concurrency_snapshots() {
|
|
||||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
|
||||||
let app = TestApp::new();
|
|
||||||
for config in &configs {
|
|
||||||
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
|
|
||||||
// See: https://github.com/opencode-ai/opencode/issues/XXX
|
|
||||||
if config.agent == AgentId::Opencode {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
run_concurrency_snapshot(&app.app, config).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
165
server/packages/sandbox-agent/tests/http/agent_endpoints.rs
Normal file
165
server/packages/sandbox-agent/tests/http/agent_endpoints.rs
Normal file
|
|
@ -0,0 +1,165 @@
|
||||||
|
// Agent-specific HTTP endpoints live here; session-related snapshots are in tests/sessions/.
|
||||||
|
include!("../common/http.rs");
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn auth_snapshots() {
|
||||||
|
let token = "test-token";
|
||||||
|
let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
|
||||||
|
|
||||||
|
let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "health should be public");
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("auth_health_public", None),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(json!({
|
||||||
|
"status": status.as_u16(),
|
||||||
|
"payload": normalize_health(&payload),
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
|
||||||
|
let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
|
||||||
|
assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("auth_missing_token", None),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(json!({
|
||||||
|
"status": status.as_u16(),
|
||||||
|
"payload": payload,
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
|
||||||
|
let request = Request::builder()
|
||||||
|
.method(Method::GET)
|
||||||
|
.uri("/v1/agents")
|
||||||
|
.header(header::AUTHORIZATION, "Bearer wrong-token")
|
||||||
|
.body(Body::empty())
|
||||||
|
.expect("auth invalid request");
|
||||||
|
let (status, _headers, payload) = send_json_request(&app.app, request).await;
|
||||||
|
assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("auth_invalid_token", None),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(json!({
|
||||||
|
"status": status.as_u16(),
|
||||||
|
"payload": payload,
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
|
||||||
|
let request = Request::builder()
|
||||||
|
.method(Method::GET)
|
||||||
|
.uri("/v1/agents")
|
||||||
|
.header(header::AUTHORIZATION, format!("Bearer {token}"))
|
||||||
|
.body(Body::empty())
|
||||||
|
.expect("auth valid request");
|
||||||
|
let (status, _headers, payload) = send_json_request(&app.app, request).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "valid token should succeed");
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("auth_valid_token", None),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(json!({
|
||||||
|
"status": status.as_u16(),
|
||||||
|
"payload": normalize_agent_list(&payload),
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn cors_snapshots() {
|
||||||
|
let cors = CorsLayer::new()
|
||||||
|
.allow_origin("http://example.com".parse::<HeaderValue>().unwrap())
|
||||||
|
.allow_methods([Method::GET, Method::POST])
|
||||||
|
.allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION]);
|
||||||
|
let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
|
||||||
|
|
||||||
|
let preflight = Request::builder()
|
||||||
|
.method(Method::OPTIONS)
|
||||||
|
.uri("/v1/agents")
|
||||||
|
.header(header::ORIGIN, "http://example.com")
|
||||||
|
.header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
|
||||||
|
.header(
|
||||||
|
header::ACCESS_CONTROL_REQUEST_HEADERS,
|
||||||
|
"authorization,content-type",
|
||||||
|
)
|
||||||
|
.body(Body::empty())
|
||||||
|
.expect("cors preflight request");
|
||||||
|
let (status, headers, _payload) = send_request(&app.app, preflight).await;
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("cors_preflight", None),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
|
||||||
|
});
|
||||||
|
|
||||||
|
let actual = Request::builder()
|
||||||
|
.method(Method::GET)
|
||||||
|
.uri("/v1/health")
|
||||||
|
.header(header::ORIGIN, "http://example.com")
|
||||||
|
.body(Body::empty())
|
||||||
|
.expect("cors actual request");
|
||||||
|
let (status, headers, payload) = send_json_request(&app.app, actual).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("cors_actual", None),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(json!({
|
||||||
|
"cors": snapshot_cors(status, &headers),
|
||||||
|
"payload": normalize_health(&payload),
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn agent_endpoints_snapshots() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
let app = TestApp::new();
|
||||||
|
|
||||||
|
let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "health status");
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("health", None),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(normalize_health(&health));
|
||||||
|
});
|
||||||
|
|
||||||
|
// List agents (verify IDs only; install state is environment-dependent).
|
||||||
|
let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "agents list");
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("agents_list", None),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
|
||||||
|
});
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
let _guard = apply_credentials(&config.credentials);
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!("/v1/agents/{}/install", config.agent.as_str()),
|
||||||
|
Some(json!({})),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(snapshot_status(status));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
let _guard = apply_credentials(&config.credentials);
|
||||||
|
let (status, modes) = send_json(
|
||||||
|
&app.app,
|
||||||
|
Method::GET,
|
||||||
|
&format!("/v1/agents/{}/modes", config.agent.as_str()),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "agent modes");
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
assertion_line: 918
|
|
||||||
expression: normalize_create_session(&created)
|
|
||||||
---
|
|
||||||
healthy: true
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
expression: normalize_create_session(&created)
|
|
||||||
---
|
|
||||||
healthy: true
|
|
||||||
nativeSessionId: "<redacted>"
|
|
||||||
|
|
@ -1,7 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1053
|
|
||||||
expression: normalize_create_session(&created)
|
|
||||||
---
|
|
||||||
healthy: true
|
|
||||||
nativeSessionId: "<redacted>"
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
expression: normalize_create_session(&created)
|
|
||||||
---
|
|
||||||
agentSessionId: "<redacted>"
|
|
||||||
healthy: true
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
assertion_line: 943
|
|
||||||
expression: snapshot_status(status)
|
|
||||||
---
|
|
||||||
status: 204
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
assertion_line: 959
|
|
||||||
expression: snapshot_status(status)
|
|
||||||
---
|
|
||||||
status: 204
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1078
|
|
||||||
expression: snapshot_status(status)
|
|
||||||
---
|
|
||||||
status: 204
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
expression: snapshot_status(status)
|
|
||||||
---
|
|
||||||
status: 204
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
expression: normalize_sessions(&sessions)
|
|
||||||
---
|
|
||||||
hasExpectedFields: true
|
|
||||||
sessionCount: 1
|
|
||||||
|
|
@ -1,17 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1119
|
|
||||||
expression: normalize_events(&permission_events)
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
|
|
@ -1,131 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
expression: normalize_events(&permission_events)
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 3
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types: []
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 7
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 8
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 9
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 10
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 11
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 12
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 13
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 14
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- reasoning
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 15
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1112
|
|
||||||
expression: normalize_events(&permission_events)
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 3
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 4
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 5
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1017
|
|
||||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
|
||||||
---
|
|
||||||
payload:
|
|
||||||
detail: "invalid request: unknown permission id: missing-permission"
|
|
||||||
status: 400
|
|
||||||
title: Invalid Request
|
|
||||||
type: "urn:sandbox-agent:error:invalid_request"
|
|
||||||
status: 400
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1152
|
|
||||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
|
||||||
---
|
|
||||||
payload:
|
|
||||||
detail: "invalid request: unknown permission id: missing-permission"
|
|
||||||
status: 400
|
|
||||||
title: Invalid Request
|
|
||||||
type: "urn:sandbox-agent:error:invalid_request"
|
|
||||||
status: 400
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1151
|
|
||||||
expression: normalize_events(&reject_events)
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 3
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 4
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 5
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,331 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
expression: normalize_events(&reject_events)
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 3
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types: []
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 7
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 8
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 9
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 10
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 11
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 12
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 13
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 14
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 15
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 16
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 17
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 18
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 19
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 20
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 21
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 22
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 23
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 24
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 25
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 26
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 27
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 28
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 29
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 30
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 31
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 32
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 33
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 34
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 35
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 36
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 37
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 38
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 39
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- reasoning
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 40
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1236
|
|
||||||
expression: normalize_events(&reject_events)
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 3
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 4
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 5
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1151
|
|
||||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
|
||||||
---
|
|
||||||
payload:
|
|
||||||
detail: "invalid request: unknown question id: missing-question"
|
|
||||||
status: 400
|
|
||||||
title: Invalid Request
|
|
||||||
type: "urn:sandbox-agent:error:invalid_request"
|
|
||||||
status: 400
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1139
|
|
||||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
|
||||||
---
|
|
||||||
payload:
|
|
||||||
detail: "invalid request: unknown question id: missing-question"
|
|
||||||
status: 400
|
|
||||||
title: Invalid Request
|
|
||||||
type: "urn:sandbox-agent:error:invalid_request"
|
|
||||||
status: 400
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1276
|
|
||||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
|
||||||
---
|
|
||||||
payload:
|
|
||||||
detail: "invalid request: unknown question id: missing-question"
|
|
||||||
status: 400
|
|
||||||
title: Invalid Request
|
|
||||||
type: "urn:sandbox-agent:error:invalid_request"
|
|
||||||
status: 400
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1109
|
|
||||||
expression: normalize_events(&question_events)
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 3
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 4
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 5
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,315 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
expression: normalize_events(&question_events)
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 3
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types: []
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 7
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 8
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 9
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 10
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 11
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 12
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 13
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 14
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 15
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 16
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 17
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 18
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 19
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 20
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 21
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 22
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 23
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 24
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 25
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 26
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 27
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 28
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 29
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 30
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 31
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 32
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 33
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 34
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 35
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 36
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 37
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- reasoning
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 38
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1174
|
|
||||||
expression: normalize_events(&question_events)
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 3
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 4
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 5
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1214
|
|
||||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
|
||||||
---
|
|
||||||
payload:
|
|
||||||
detail: "invalid request: unknown question id: missing-question"
|
|
||||||
status: 400
|
|
||||||
title: Invalid Request
|
|
||||||
type: "urn:sandbox-agent:error:invalid_request"
|
|
||||||
status: 400
|
|
||||||
|
|
@ -1,201 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
expression: snapshot
|
|
||||||
---
|
|
||||||
session_a:
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 3
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types: []
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 7
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- item:
|
|
||||||
content_types: []
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 8
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
session_b:
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 3
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types: []
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 7
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 8
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 9
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 10
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 11
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 12
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 13
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 14
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- reasoning
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 15
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,67 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 1344
|
|
||||||
expression: snapshot
|
|
||||||
---
|
|
||||||
session_a:
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 3
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 4
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 5
|
|
||||||
type: item.completed
|
|
||||||
session_b:
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 3
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 4
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 5
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,171 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
|
||||||
expression: normalized
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 3
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types: []
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 7
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 8
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 9
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 10
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 11
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 12
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 13
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 14
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 15
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 16
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 17
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 18
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 19
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- reasoning
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 20
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 848
|
|
||||||
expression: normalized
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 3
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 4
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 5
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,73 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 848
|
|
||||||
expression: normalized
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- status
|
|
||||||
kind: status
|
|
||||||
role: system
|
|
||||||
status: completed
|
|
||||||
seq: 3
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: in_progress
|
|
||||||
seq: 4
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 5
|
|
||||||
source: daemon
|
|
||||||
synthetic: true
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: user
|
|
||||||
status: completed
|
|
||||||
seq: 6
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
- item:
|
|
||||||
content_types: []
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 7
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
|
||||||
- item:
|
|
||||||
content_types: []
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 8
|
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
---
|
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
|
||||||
assertion_line: 841
|
|
||||||
expression: normalized
|
|
||||||
---
|
|
||||||
- metadata: true
|
|
||||||
seq: 1
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- metadata: true
|
|
||||||
seq: 2
|
|
||||||
session: started
|
|
||||||
type: session.started
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: in_progress
|
|
||||||
seq: 3
|
|
||||||
type: item.started
|
|
||||||
- delta:
|
|
||||||
delta: "<redacted>"
|
|
||||||
item_id: "<redacted>"
|
|
||||||
native_item_id: "<redacted>"
|
|
||||||
seq: 4
|
|
||||||
type: item.delta
|
|
||||||
- item:
|
|
||||||
content_types:
|
|
||||||
- text
|
|
||||||
kind: message
|
|
||||||
role: assistant
|
|
||||||
status: completed
|
|
||||||
seq: 5
|
|
||||||
type: item.completed
|
|
||||||
2
server/packages/sandbox-agent/tests/http_endpoints.rs
Normal file
2
server/packages/sandbox-agent/tests/http_endpoints.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
#[path = "http/agent_endpoints.rs"]
|
||||||
|
mod agent_endpoints;
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
include!("http/http_sse_snapshots.rs");
|
|
||||||
2
server/packages/sandbox-agent/tests/sessions.rs
Normal file
2
server/packages/sandbox-agent/tests/sessions.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
#[path = "sessions/mod.rs"]
|
||||||
|
mod sessions;
|
||||||
5
server/packages/sandbox-agent/tests/sessions/mod.rs
Normal file
5
server/packages/sandbox-agent/tests/sessions/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
mod session_lifecycle;
|
||||||
|
mod permissions;
|
||||||
|
mod questions;
|
||||||
|
mod reasoning;
|
||||||
|
mod status;
|
||||||
88
server/packages/sandbox-agent/tests/sessions/permissions.rs
Normal file
88
server/packages/sandbox-agent/tests/sessions/permissions.rs
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
// Permission flow snapshots compare every agent to the mock baseline.
|
||||||
|
include!("../common/http.rs");
|
||||||
|
|
||||||
|
fn session_snapshot_suffix(prefix: &str) -> String {
|
||||||
|
snapshot_name(prefix, Some(AgentId::Mock))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assert_session_snapshot(prefix: &str, value: Value) {
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => session_snapshot_suffix(prefix),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn permission_flow_snapshots() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
let app = TestApp::new();
|
||||||
|
let capabilities = fetch_capabilities(&app.app).await;
|
||||||
|
let caps = capabilities
|
||||||
|
.get(config.agent.as_str())
|
||||||
|
.expect("capabilities missing");
|
||||||
|
if !(caps.plan_mode && caps.permissions) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let _guard = apply_credentials(&config.credentials);
|
||||||
|
install_agent(&app.app, config.agent).await;
|
||||||
|
|
||||||
|
let permission_session = format!("perm-{}", config.agent.as_str());
|
||||||
|
create_session(&app.app, config.agent, &permission_session, "plan").await;
|
||||||
|
let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await;
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!("/v1/sessions/{permission_session}/messages"),
|
||||||
|
Some(json!({ "message": PERMISSION_PROMPT })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
|
||||||
|
|
||||||
|
let permission_events = poll_events_until_match_from(
|
||||||
|
&app.app,
|
||||||
|
&permission_session,
|
||||||
|
offset,
|
||||||
|
Duration::from_secs(120),
|
||||||
|
|events| find_permission_id(events).is_some() || should_stop(events),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let permission_events = truncate_permission_events(&permission_events);
|
||||||
|
assert_session_snapshot("permission_events", normalize_events(&permission_events));
|
||||||
|
|
||||||
|
if let Some(permission_id) = find_permission_id(&permission_events) {
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!(
|
||||||
|
"/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
|
||||||
|
),
|
||||||
|
Some(json!({ "reply": "once" })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
|
||||||
|
assert_session_snapshot("permission_reply", snapshot_status(status));
|
||||||
|
} else {
|
||||||
|
let (status, payload) = send_json(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!(
|
||||||
|
"/v1/sessions/{permission_session}/permissions/missing-permission/reply"
|
||||||
|
),
|
||||||
|
Some(json!({ "reply": "once" })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert!(!status.is_success(), "missing permission id should error");
|
||||||
|
assert_session_snapshot(
|
||||||
|
"permission_reply_missing",
|
||||||
|
json!({
|
||||||
|
"status": status.as_u16(),
|
||||||
|
"payload": payload,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
145
server/packages/sandbox-agent/tests/sessions/questions.rs
Normal file
145
server/packages/sandbox-agent/tests/sessions/questions.rs
Normal file
|
|
@ -0,0 +1,145 @@
|
||||||
|
// Question flow snapshots compare every agent to the mock baseline.
|
||||||
|
include!("../common/http.rs");
|
||||||
|
|
||||||
|
fn session_snapshot_suffix(prefix: &str) -> String {
|
||||||
|
snapshot_name(prefix, Some(AgentId::Mock))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assert_session_snapshot(prefix: &str, value: Value) {
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => session_snapshot_suffix(prefix),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn question_flow_snapshots() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
let app = TestApp::new();
|
||||||
|
let capabilities = fetch_capabilities(&app.app).await;
|
||||||
|
let caps = capabilities
|
||||||
|
.get(config.agent.as_str())
|
||||||
|
.expect("capabilities missing");
|
||||||
|
if !caps.questions {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let _guard = apply_credentials(&config.credentials);
|
||||||
|
install_agent(&app.app, config.agent).await;
|
||||||
|
|
||||||
|
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
|
||||||
|
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
|
||||||
|
let reply_offset =
|
||||||
|
drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await;
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!("/v1/sessions/{question_reply_session}/messages"),
|
||||||
|
Some(json!({ "message": QUESTION_PROMPT })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
|
||||||
|
|
||||||
|
let question_events = poll_events_until_match_from(
|
||||||
|
&app.app,
|
||||||
|
&question_reply_session,
|
||||||
|
reply_offset,
|
||||||
|
Duration::from_secs(120),
|
||||||
|
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let question_events = truncate_question_events(&question_events);
|
||||||
|
assert_session_snapshot("question_reply_events", normalize_events(&question_events));
|
||||||
|
|
||||||
|
if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!(
|
||||||
|
"/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
|
||||||
|
),
|
||||||
|
Some(json!({ "answers": answers })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
|
||||||
|
assert_session_snapshot("question_reply", snapshot_status(status));
|
||||||
|
} else {
|
||||||
|
let (status, payload) = send_json(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!(
|
||||||
|
"/v1/sessions/{question_reply_session}/questions/missing-question/reply"
|
||||||
|
),
|
||||||
|
Some(json!({ "answers": [] })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert!(!status.is_success(), "missing question id should error");
|
||||||
|
assert_session_snapshot(
|
||||||
|
"question_reply_missing",
|
||||||
|
json!({
|
||||||
|
"status": status.as_u16(),
|
||||||
|
"payload": payload,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
|
||||||
|
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
|
||||||
|
let reject_offset =
|
||||||
|
drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await;
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!("/v1/sessions/{question_reject_session}/messages"),
|
||||||
|
Some(json!({ "message": QUESTION_PROMPT })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
|
||||||
|
|
||||||
|
let reject_events = poll_events_until_match_from(
|
||||||
|
&app.app,
|
||||||
|
&question_reject_session,
|
||||||
|
reject_offset,
|
||||||
|
Duration::from_secs(120),
|
||||||
|
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let reject_events = truncate_question_events(&reject_events);
|
||||||
|
assert_session_snapshot("question_reject_events", normalize_events(&reject_events));
|
||||||
|
|
||||||
|
if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!(
|
||||||
|
"/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
|
||||||
|
assert_session_snapshot("question_reject", snapshot_status(status));
|
||||||
|
} else {
|
||||||
|
let (status, payload) = send_json(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!(
|
||||||
|
"/v1/sessions/{question_reject_session}/questions/missing-question/reject"
|
||||||
|
),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert!(!status.is_success(), "missing question id reject should error");
|
||||||
|
assert_session_snapshot(
|
||||||
|
"question_reject_missing",
|
||||||
|
json!({
|
||||||
|
"status": status.as_u16(),
|
||||||
|
"payload": payload,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
56
server/packages/sandbox-agent/tests/sessions/reasoning.rs
Normal file
56
server/packages/sandbox-agent/tests/sessions/reasoning.rs
Normal file
|
|
@ -0,0 +1,56 @@
|
||||||
|
// Reasoning capability checks are isolated from baseline snapshots.
|
||||||
|
include!("../common/http.rs");
|
||||||
|
|
||||||
|
fn reasoning_prompt(agent: AgentId) -> &'static str {
|
||||||
|
if agent == AgentId::Mock {
|
||||||
|
"demo"
|
||||||
|
} else {
|
||||||
|
"Answer briefly and include your reasoning."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn reasoning_events_present() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
let app = TestApp::new();
|
||||||
|
let capabilities = fetch_capabilities(&app.app).await;
|
||||||
|
let caps = capabilities
|
||||||
|
.get(config.agent.as_str())
|
||||||
|
.expect("capabilities missing");
|
||||||
|
if !caps.reasoning {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let _guard = apply_credentials(&config.credentials);
|
||||||
|
install_agent(&app.app, config.agent).await;
|
||||||
|
|
||||||
|
let session_id = format!("reasoning-{}", config.agent.as_str());
|
||||||
|
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
|
||||||
|
.await;
|
||||||
|
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!("/v1/sessions/{session_id}/messages"),
|
||||||
|
Some(json!({ "message": reasoning_prompt(config.agent) })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt");
|
||||||
|
|
||||||
|
let events = poll_events_until_match_from(
|
||||||
|
&app.app,
|
||||||
|
&session_id,
|
||||||
|
offset,
|
||||||
|
Duration::from_secs(120),
|
||||||
|
|events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert!(
|
||||||
|
events_have_content_type(&events, "reasoning"),
|
||||||
|
"expected reasoning content for {}",
|
||||||
|
config.agent
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,192 @@
|
||||||
|
// Session lifecycle and streaming snapshots use the mock baseline as the single source of truth.
|
||||||
|
include!("../common/http.rs");
|
||||||
|
|
||||||
|
fn session_snapshot_suffix(prefix: &str) -> String {
|
||||||
|
snapshot_name(prefix, Some(AgentId::Mock))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assert_session_snapshot(prefix: &str, value: Value) {
|
||||||
|
insta::with_settings!({
|
||||||
|
snapshot_suffix => session_snapshot_suffix(prefix),
|
||||||
|
}, {
|
||||||
|
insta::assert_yaml_snapshot!(value);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn session_endpoints_snapshots() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
let app = TestApp::new();
|
||||||
|
let capabilities = fetch_capabilities(&app.app).await;
|
||||||
|
let caps = capabilities
|
||||||
|
.get(config.agent.as_str())
|
||||||
|
.expect("capabilities missing");
|
||||||
|
if !caps.session_lifecycle {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let _guard = apply_credentials(&config.credentials);
|
||||||
|
install_agent(&app.app, config.agent).await;
|
||||||
|
|
||||||
|
let session_id = format!("snapshot-{}", config.agent.as_str());
|
||||||
|
let permission_mode = test_permission_mode(config.agent);
|
||||||
|
let (status, created) = send_json(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!("/v1/sessions/{session_id}"),
|
||||||
|
Some(json!({
|
||||||
|
"agent": config.agent.as_str(),
|
||||||
|
"permissionMode": permission_mode
|
||||||
|
})),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "create session");
|
||||||
|
assert_session_snapshot("create_session", normalize_create_session(&created));
|
||||||
|
|
||||||
|
let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
|
||||||
|
assert_eq!(status, StatusCode::OK, "list sessions");
|
||||||
|
assert_session_snapshot("sessions_list", normalize_sessions(&sessions));
|
||||||
|
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!("/v1/sessions/{session_id}/messages"),
|
||||||
|
Some(json!({ "message": PROMPT })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
|
||||||
|
assert_session_snapshot("send_message", snapshot_status(status));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn http_events_snapshots() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
// OpenCode's embedded bun hangs when installing plugins, blocking event streaming.
|
||||||
|
if config.agent == AgentId::Opencode {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let app = TestApp::new();
|
||||||
|
let capabilities = fetch_capabilities(&app.app).await;
|
||||||
|
let caps = capabilities
|
||||||
|
.get(config.agent.as_str())
|
||||||
|
.expect("capabilities missing");
|
||||||
|
if !caps.session_lifecycle {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
run_http_events_snapshot(&app.app, config).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn sse_events_snapshots() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
|
||||||
|
if config.agent == AgentId::Opencode {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let app = TestApp::new();
|
||||||
|
let capabilities = fetch_capabilities(&app.app).await;
|
||||||
|
let caps = capabilities
|
||||||
|
.get(config.agent.as_str())
|
||||||
|
.expect("capabilities missing");
|
||||||
|
if !caps.session_lifecycle {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
run_sse_events_snapshot(&app.app, config).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn concurrency_snapshots() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
let app = TestApp::new();
|
||||||
|
let capabilities = fetch_capabilities(&app.app).await;
|
||||||
|
let caps = capabilities
|
||||||
|
.get(config.agent.as_str())
|
||||||
|
.expect("capabilities missing");
|
||||||
|
if !caps.session_lifecycle {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
run_concurrency_snapshot(&app.app, config).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn turn_stream_route() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
let app = TestApp::new();
|
||||||
|
let capabilities = fetch_capabilities(&app.app).await;
|
||||||
|
let caps = capabilities
|
||||||
|
.get(config.agent.as_str())
|
||||||
|
.expect("capabilities missing");
|
||||||
|
if !caps.session_lifecycle {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
run_turn_stream_check(&app.app, config).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
|
||||||
|
let _guard = apply_credentials(&config.credentials);
|
||||||
|
install_agent(app, config.agent).await;
|
||||||
|
|
||||||
|
let session_a = format!("concurrent-a-{}", config.agent.as_str());
|
||||||
|
let session_b = format!("concurrent-b-{}", config.agent.as_str());
|
||||||
|
let perm_mode = test_permission_mode(config.agent);
|
||||||
|
create_session(app, config.agent, &session_a, perm_mode).await;
|
||||||
|
create_session(app, config.agent, &session_b, perm_mode).await;
|
||||||
|
let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await;
|
||||||
|
let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await;
|
||||||
|
|
||||||
|
let app_a = app.clone();
|
||||||
|
let app_b = app.clone();
|
||||||
|
let send_a = send_message(&app_a, &session_a);
|
||||||
|
let send_b = send_message(&app_b, &session_b);
|
||||||
|
tokio::join!(send_a, send_b);
|
||||||
|
|
||||||
|
let app_a = app.clone();
|
||||||
|
let app_b = app.clone();
|
||||||
|
let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120));
|
||||||
|
let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120));
|
||||||
|
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
|
||||||
|
let events_a = truncate_after_first_stop(&events_a);
|
||||||
|
let events_b = truncate_after_first_stop(&events_b);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
!events_a.is_empty(),
|
||||||
|
"no events collected for concurrent session a {}",
|
||||||
|
config.agent
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
!events_b.is_empty(),
|
||||||
|
"no events collected for concurrent session b {}",
|
||||||
|
config.agent
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
should_stop(&events_a),
|
||||||
|
"timed out waiting for assistant/error event for concurrent session a {}",
|
||||||
|
config.agent
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
should_stop(&events_b),
|
||||||
|
"timed out waiting for assistant/error event for concurrent session b {}",
|
||||||
|
config.agent
|
||||||
|
);
|
||||||
|
|
||||||
|
let snapshot = json!({
|
||||||
|
"session_a": normalize_events(&events_a),
|
||||||
|
"session_b": normalize_events(&events_b),
|
||||||
|
});
|
||||||
|
assert_session_snapshot("concurrency_events", snapshot);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
|
||||||
|
expression: value
|
||||||
|
---
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
|
seq: 1
|
||||||
|
type: item.started
|
||||||
|
- delta:
|
||||||
|
delta: "<redacted>"
|
||||||
|
item_id: "<redacted>"
|
||||||
|
native_item_id: "<redacted>"
|
||||||
|
seq: 2
|
||||||
|
type: item.delta
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: completed
|
||||||
|
seq: 3
|
||||||
|
type: item.completed
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: assistant
|
||||||
|
status: in_progress
|
||||||
|
seq: 4
|
||||||
|
type: item.started
|
||||||
|
- delta:
|
||||||
|
delta: "<redacted>"
|
||||||
|
item_id: "<redacted>"
|
||||||
|
native_item_id: "<redacted>"
|
||||||
|
seq: 5
|
||||||
|
type: item.delta
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: assistant
|
||||||
|
status: completed
|
||||||
|
seq: 6
|
||||||
|
type: item.completed
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
---
|
---
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
|
||||||
assertion_line: 1011
|
expression: value
|
||||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
|
||||||
---
|
---
|
||||||
payload:
|
payload:
|
||||||
detail: "invalid request: unknown permission id: missing-permission"
|
detail: "invalid request: unknown permission id: missing-permission"
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||||
|
expression: value
|
||||||
|
---
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
|
seq: 1
|
||||||
|
type: item.started
|
||||||
|
- delta:
|
||||||
|
delta: "<redacted>"
|
||||||
|
item_id: "<redacted>"
|
||||||
|
native_item_id: "<redacted>"
|
||||||
|
seq: 2
|
||||||
|
type: item.delta
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: completed
|
||||||
|
seq: 3
|
||||||
|
type: item.completed
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: assistant
|
||||||
|
status: in_progress
|
||||||
|
seq: 4
|
||||||
|
type: item.started
|
||||||
|
- delta:
|
||||||
|
delta: "<redacted>"
|
||||||
|
item_id: "<redacted>"
|
||||||
|
native_item_id: "<redacted>"
|
||||||
|
seq: 5
|
||||||
|
type: item.delta
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: assistant
|
||||||
|
status: completed
|
||||||
|
seq: 6
|
||||||
|
type: item.completed
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
---
|
---
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||||
assertion_line: 1078
|
expression: value
|
||||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
|
||||||
---
|
---
|
||||||
payload:
|
payload:
|
||||||
detail: "invalid request: unknown question id: missing-question"
|
detail: "invalid request: unknown question id: missing-question"
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||||
|
expression: value
|
||||||
|
---
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
|
seq: 1
|
||||||
|
type: item.started
|
||||||
|
- delta:
|
||||||
|
delta: "<redacted>"
|
||||||
|
item_id: "<redacted>"
|
||||||
|
native_item_id: "<redacted>"
|
||||||
|
seq: 2
|
||||||
|
type: item.delta
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: completed
|
||||||
|
seq: 3
|
||||||
|
type: item.completed
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: assistant
|
||||||
|
status: in_progress
|
||||||
|
seq: 4
|
||||||
|
type: item.started
|
||||||
|
- delta:
|
||||||
|
delta: "<redacted>"
|
||||||
|
item_id: "<redacted>"
|
||||||
|
native_item_id: "<redacted>"
|
||||||
|
seq: 5
|
||||||
|
type: item.delta
|
||||||
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: assistant
|
||||||
|
status: completed
|
||||||
|
seq: 6
|
||||||
|
type: item.completed
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
---
|
---
|
||||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
source: server/packages/sandbox-agent/tests/sessions/questions.rs
|
||||||
assertion_line: 1072
|
expression: value
|
||||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
|
||||||
---
|
---
|
||||||
payload:
|
payload:
|
||||||
detail: "invalid request: unknown question id: missing-question"
|
detail: "invalid request: unknown question id: missing-question"
|
||||||
|
|
@ -1,38 +1,43 @@
|
||||||
---
|
---
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
|
||||||
assertion_line: 1351
|
expression: value
|
||||||
expression: snapshot
|
|
||||||
---
|
---
|
||||||
session_a:
|
session_a:
|
||||||
- metadata: true
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
seq: 1
|
seq: 1
|
||||||
session: started
|
type: item.started
|
||||||
source: daemon
|
- delta:
|
||||||
synthetic: true
|
delta: "<redacted>"
|
||||||
type: session.started
|
item_id: "<redacted>"
|
||||||
- metadata: true
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 2
|
||||||
session: started
|
type: item.delta
|
||||||
source: agent
|
- item:
|
||||||
synthetic: false
|
content_types:
|
||||||
type: session.started
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: completed
|
||||||
|
seq: 3
|
||||||
|
type: item.completed
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
- text
|
- text
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: in_progress
|
status: in_progress
|
||||||
seq: 3
|
seq: 4
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 4
|
seq: 5
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -40,40 +45,44 @@ session_a:
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: completed
|
status: completed
|
||||||
seq: 5
|
seq: 6
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
type: item.completed
|
||||||
session_b:
|
session_b:
|
||||||
- metadata: true
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
seq: 1
|
seq: 1
|
||||||
session: started
|
type: item.started
|
||||||
source: daemon
|
- delta:
|
||||||
synthetic: true
|
delta: "<redacted>"
|
||||||
type: session.started
|
item_id: "<redacted>"
|
||||||
- metadata: true
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 2
|
||||||
session: started
|
type: item.delta
|
||||||
source: agent
|
- item:
|
||||||
synthetic: false
|
content_types:
|
||||||
type: session.started
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: completed
|
||||||
|
seq: 3
|
||||||
|
type: item.completed
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
- text
|
- text
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: in_progress
|
status: in_progress
|
||||||
seq: 3
|
seq: 4
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 4
|
seq: 5
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -81,7 +90,5 @@ session_b:
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: completed
|
status: completed
|
||||||
seq: 5
|
seq: 6
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
type: item.completed
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
|
||||||
|
expression: value
|
||||||
|
---
|
||||||
|
healthy: true
|
||||||
|
nativeSessionId: "<redacted>"
|
||||||
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
|
||||||
|
expression: value
|
||||||
|
---
|
||||||
|
status: 204
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
---
|
||||||
|
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
|
||||||
|
expression: value
|
||||||
|
---
|
||||||
|
hasExpectedFields: true
|
||||||
|
sessionCount: 1
|
||||||
|
|
@ -1,37 +1,42 @@
|
||||||
---
|
---
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
|
||||||
assertion_line: 811
|
|
||||||
expression: normalized
|
expression: normalized
|
||||||
---
|
---
|
||||||
- metadata: true
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
seq: 1
|
seq: 1
|
||||||
session: started
|
type: item.started
|
||||||
source: daemon
|
- delta:
|
||||||
synthetic: true
|
delta: "<redacted>"
|
||||||
type: session.started
|
item_id: "<redacted>"
|
||||||
- metadata: true
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 2
|
||||||
session: started
|
type: item.delta
|
||||||
source: agent
|
- item:
|
||||||
synthetic: false
|
content_types:
|
||||||
type: session.started
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: completed
|
||||||
|
seq: 3
|
||||||
|
type: item.completed
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
- text
|
- text
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: in_progress
|
status: in_progress
|
||||||
seq: 3
|
seq: 4
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 4
|
seq: 5
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -39,7 +44,5 @@ expression: normalized
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: completed
|
status: completed
|
||||||
seq: 5
|
seq: 6
|
||||||
source: agent
|
|
||||||
synthetic: false
|
|
||||||
type: item.completed
|
type: item.completed
|
||||||
|
|
@ -1,29 +1,42 @@
|
||||||
---
|
---
|
||||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
|
||||||
assertion_line: 804
|
|
||||||
expression: normalized
|
expression: normalized
|
||||||
---
|
---
|
||||||
- metadata: true
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: in_progress
|
||||||
seq: 1
|
seq: 1
|
||||||
session: started
|
type: item.started
|
||||||
type: session.started
|
- delta:
|
||||||
- metadata: true
|
delta: "<redacted>"
|
||||||
|
item_id: "<redacted>"
|
||||||
|
native_item_id: "<redacted>"
|
||||||
seq: 2
|
seq: 2
|
||||||
session: started
|
type: item.delta
|
||||||
type: session.started
|
- item:
|
||||||
|
content_types:
|
||||||
|
- text
|
||||||
|
kind: message
|
||||||
|
role: user
|
||||||
|
status: completed
|
||||||
|
seq: 3
|
||||||
|
type: item.completed
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
- text
|
- text
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: in_progress
|
status: in_progress
|
||||||
seq: 3
|
seq: 4
|
||||||
type: item.started
|
type: item.started
|
||||||
- delta:
|
- delta:
|
||||||
delta: "<redacted>"
|
delta: "<redacted>"
|
||||||
item_id: "<redacted>"
|
item_id: "<redacted>"
|
||||||
native_item_id: "<redacted>"
|
native_item_id: "<redacted>"
|
||||||
seq: 4
|
seq: 5
|
||||||
type: item.delta
|
type: item.delta
|
||||||
- item:
|
- item:
|
||||||
content_types:
|
content_types:
|
||||||
|
|
@ -31,5 +44,5 @@ expression: normalized
|
||||||
kind: message
|
kind: message
|
||||||
role: assistant
|
role: assistant
|
||||||
status: completed
|
status: completed
|
||||||
seq: 5
|
seq: 6
|
||||||
type: item.completed
|
type: item.completed
|
||||||
61
server/packages/sandbox-agent/tests/sessions/status.rs
Normal file
61
server/packages/sandbox-agent/tests/sessions/status.rs
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
// Status capability checks are isolated from baseline snapshots.
|
||||||
|
include!("../common/http.rs");
|
||||||
|
|
||||||
|
fn status_prompt(agent: AgentId) -> &'static str {
|
||||||
|
if agent == AgentId::Mock {
|
||||||
|
"status"
|
||||||
|
} else {
|
||||||
|
"Provide a short status update."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn events_have_status(events: &[Value]) -> bool {
|
||||||
|
events.iter().any(|event| event_is_status_item(event))
|
||||||
|
|| events_have_content_type(events, "status")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn status_events_present() {
|
||||||
|
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||||
|
|
||||||
|
for config in &configs {
|
||||||
|
let app = TestApp::new();
|
||||||
|
let capabilities = fetch_capabilities(&app.app).await;
|
||||||
|
let caps = capabilities
|
||||||
|
.get(config.agent.as_str())
|
||||||
|
.expect("capabilities missing");
|
||||||
|
if !caps.status {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let _guard = apply_credentials(&config.credentials);
|
||||||
|
install_agent(&app.app, config.agent).await;
|
||||||
|
|
||||||
|
let session_id = format!("status-{}", config.agent.as_str());
|
||||||
|
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
|
||||||
|
.await;
|
||||||
|
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
|
||||||
|
let status = send_status(
|
||||||
|
&app.app,
|
||||||
|
Method::POST,
|
||||||
|
&format!("/v1/sessions/{session_id}/messages"),
|
||||||
|
Some(json!({ "message": status_prompt(config.agent) })),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt");
|
||||||
|
|
||||||
|
let events = poll_events_until_match_from(
|
||||||
|
&app.app,
|
||||||
|
&session_id,
|
||||||
|
offset,
|
||||||
|
Duration::from_secs(120),
|
||||||
|
|events| events_have_status(events) || events.iter().any(is_error_event),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
assert!(
|
||||||
|
events_have_status(&events),
|
||||||
|
"expected status events for {}",
|
||||||
|
config.agent
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue