fix: add docker-setup action, runtime Dockerfile, and align release workflow

- Add .github/actions/docker-setup composite action (from rivet)
- Add docker/runtime/Dockerfile for Docker image builds
- Update release.yaml to match rivet patterns:
  - Use corepack enable instead of pnpm/action-setup
  - Add reuse_engine_version input
  - Add Docker job with Depot runners
  - Use --no-frozen-lockfile for pnpm install
  - Add id-token permission for setup job
This commit is contained in:
Nathan Flurry 2026-01-27 19:29:54 -08:00
parent f05389307a
commit b49776145b
82 changed files with 1415 additions and 2430 deletions

View file

@ -10,18 +10,23 @@ Place all new tests under `server/packages/**/tests/` (or a package-specific `te
- Agent flow coverage in `agent-flows/`
- Agent management coverage in `agent-management/`
- Shared server manager coverage in `server-manager/`
- HTTP/SSE and snapshot coverage in `http/` (snapshots in `http/snapshots/`)
- HTTP endpoint snapshots in `http/` (snapshots in `http/snapshots/`)
- Session capability snapshots in `sessions/` (one file per capability, e.g. `session_lifecycle.rs`, `permissions.rs`, `questions.rs`, `reasoning.rs`, `status.rs`; snapshots in `sessions/snapshots/`)
- UI coverage in `ui/`
- Shared helpers in `common/`
- Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/`
## Snapshot tests
The HTTP/SSE snapshot suite entrypoint lives in:
- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` (includes `tests/http/http_sse_snapshots.rs`)
HTTP endpoint snapshot entrypoint:
- `server/packages/sandbox-agent/tests/http_endpoints.rs`
Session snapshot entrypoint:
- `server/packages/sandbox-agent/tests/sessions.rs`
Snapshots are written to:
- `server/packages/sandbox-agent/tests/http/snapshots/`
- `server/packages/sandbox-agent/tests/http/snapshots/` (HTTP endpoint snapshots)
- `server/packages/sandbox-agent/tests/sessions/snapshots/` (session/capability snapshots)
## Agent selection
@ -71,6 +76,7 @@ To keep snapshots deterministic:
- IDs, timestamps, native IDs
- text content, tool inputs/outputs, provider-specific metadata
- `source` and `synthetic` flags (these are implementation details)
- Scrub `reasoning` and `status` content from session-baseline snapshots to keep the core event skeleton consistent across agents; validate those content types separately in their capability-specific tests.
- The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly.
- Event streams are truncated after the first assistant or error event.
- Permission flow snapshots are truncated after the permission request (or first assistant) event.
@ -81,14 +87,19 @@ To keep snapshots deterministic:
## Typical commands
Run only Claude snapshots:
Run only Claude session snapshots:
```
SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test http_sse_snapshots
SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent --test sessions
```
Run all detected agents:
Run all detected session snapshots:
```
cargo test -p sandbox-agent --test http_sse_snapshots
cargo test -p sandbox-agent --test sessions
```
Run HTTP endpoint snapshots:
```
cargo test -p sandbox-agent --test http_endpoints
```
## Universal Schema

View file

@ -2913,6 +2913,7 @@ pub struct AgentCapabilities {
pub session_lifecycle: bool,
pub error_events: bool,
pub reasoning: bool,
pub status: bool,
pub command_execution: bool,
pub file_changes: bool,
pub mcp_tools: bool,
@ -3512,6 +3513,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: false,
error_events: false,
reasoning: false,
status: false,
command_execution: false,
file_changes: false,
mcp_tools: false,
@ -3530,6 +3532,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: true,
error_events: true,
reasoning: true,
status: true,
command_execution: true,
file_changes: true,
mcp_tools: true,
@ -3548,6 +3551,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: true,
error_events: true,
reasoning: false,
status: true,
command_execution: false,
file_changes: false,
mcp_tools: false,
@ -3566,6 +3570,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: false,
error_events: true,
reasoning: false,
status: false,
command_execution: false,
file_changes: false,
mcp_tools: false,
@ -3584,6 +3589,7 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities {
session_lifecycle: true,
error_events: true,
reasoning: true,
status: true,
command_execution: true,
file_changes: true,
mcp_tools: true,

View file

@ -1,4 +1,4 @@
use std::collections::{BTreeMap, HashMap};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::time::{Duration, Instant};
use axum::body::{Body, Bytes};
@ -208,49 +208,65 @@ async fn send_message(app: &Router, session_id: &str) {
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
}
async fn poll_events_until(
app: &Router,
session_id: &str,
timeout: Duration,
) -> Vec<Value> {
let start = Instant::now();
let mut offset = 0u64;
let mut events = Vec::new();
while start.elapsed() < timeout {
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
let (status, payload) = send_json(app, Method::GET, &path, None).await;
assert_eq!(status, StatusCode::OK, "poll events");
let new_events = payload
.get("events")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
if !new_events.is_empty() {
if let Some(last) = new_events
.last()
.and_then(|event| event.get("sequence"))
.and_then(Value::as_u64)
{
offset = last;
}
events.extend(new_events);
if should_stop(&events) {
break;
}
}
tokio::time::sleep(Duration::from_millis(800)).await;
}
events
async fn fetch_events_once(app: &Router, session_id: &str, offset: u64) -> (Vec<Value>, u64) {
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
let (status, payload) = send_json(app, Method::GET, &path, None).await;
assert_eq!(status, StatusCode::OK, "poll events");
let new_events = payload
.get("events")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
let new_offset = new_events
.last()
.and_then(|event| event.get("sequence"))
.and_then(Value::as_u64)
.unwrap_or(offset);
(new_events, new_offset)
}
async fn read_sse_events(
async fn drain_events(app: &Router, session_id: &str, timeout: Duration) -> u64 {
let start = Instant::now();
let mut offset = 0u64;
loop {
if start.elapsed() >= timeout {
break;
}
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
if new_events.is_empty() {
if offset == 0 {
tokio::time::sleep(Duration::from_millis(200)).await;
continue;
}
break;
}
offset = new_offset;
}
offset
}
async fn poll_events_until_from(
app: &Router,
session_id: &str,
offset: u64,
timeout: Duration,
) -> Vec<Value> {
poll_events_until_match_from(app, session_id, offset, timeout, should_stop).await
}
async fn poll_events_until(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
poll_events_until_from(app, session_id, 0, timeout).await
}
async fn read_sse_events_from(
app: &Router,
session_id: &str,
offset: u64,
timeout: Duration,
) -> Vec<Value> {
let request = Request::builder()
.method(Method::GET)
.uri(format!("/v1/sessions/{session_id}/events/sse?offset=0"))
.uri(format!("/v1/sessions/{session_id}/events/sse?offset={offset}"))
.body(Body::empty())
.expect("sse request");
let response = app
@ -291,6 +307,10 @@ async fn read_sse_events(
events
}
async fn read_sse_events(app: &Router, session_id: &str, timeout: Duration) -> Vec<Value> {
read_sse_events_from(app, session_id, 0, timeout).await
}
async fn read_turn_stream_events(
app: &Router,
session_id: &str,
@ -431,7 +451,8 @@ fn normalize_events(events: &[Value]) -> Value {
!events.iter().any(is_unparsed_event),
"agent.unparsed event encountered"
);
let normalized = events
let scrubbed = scrub_events(events);
let normalized = scrubbed
.iter()
.enumerate()
.map(|(idx, event)| normalize_event(event, idx + 1))
@ -439,6 +460,71 @@ fn normalize_events(events: &[Value]) -> Value {
Value::Array(normalized)
}
fn scrub_events(events: &[Value]) -> Vec<Value> {
let mut scrub_ids = HashSet::new();
let mut output = Vec::new();
for event in events {
let event_type = event.get("type").and_then(Value::as_str).unwrap_or("");
match event_type {
"item.started" | "item.completed" => {
if let Some(item) = event.get("data").and_then(|data| data.get("item")) {
if should_scrub_item(item) {
record_item_ids(item, &mut scrub_ids);
continue;
}
}
output.push(event.clone());
}
"item.delta" => {
let item_id = event
.get("data")
.and_then(|data| data.get("item_id"))
.and_then(Value::as_str);
let native_item_id = event
.get("data")
.and_then(|data| data.get("native_item_id"))
.and_then(Value::as_str);
if item_id.is_some_and(|id| scrub_ids.contains(id))
|| native_item_id.is_some_and(|id| scrub_ids.contains(id))
{
continue;
}
output.push(event.clone());
}
_ => output.push(event.clone()),
}
}
output
}
fn should_scrub_item(item: &Value) -> bool {
if item
.get("kind")
.and_then(Value::as_str)
.is_some_and(|kind| kind == "status")
{
return true;
}
let types = item_content_types(item);
let filtered = types
.iter()
.filter(|value| value.as_str() != "reasoning" && value.as_str() != "status")
.collect::<Vec<_>>();
types.iter().any(|value| value == "reasoning") && filtered.is_empty()
}
fn record_item_ids(item: &Value, ids: &mut HashSet<String>) {
if let Some(id) = item.get("item_id").and_then(Value::as_str) {
ids.insert(id.to_string());
}
if let Some(id) = item.get("native_item_id").and_then(Value::as_str) {
ids.insert(id.to_string());
}
}
fn truncate_after_first_stop(events: &[Value]) -> Vec<Value> {
if let Some(idx) = events
.iter()
@ -455,12 +541,6 @@ fn normalize_event(event: &Value, seq: usize) -> Value {
if let Some(event_type) = event.get("type").and_then(Value::as_str) {
map.insert("type".to_string(), Value::String(event_type.to_string()));
}
if let Some(source) = event.get("source").and_then(Value::as_str) {
map.insert("source".to_string(), Value::String(source.to_string()));
}
if let Some(synthetic) = event.get("synthetic").and_then(Value::as_bool) {
map.insert("synthetic".to_string(), Value::Bool(synthetic));
}
let data = event.get("data").unwrap_or(&Value::Null);
match event.get("type").and_then(Value::as_str).unwrap_or("") {
"session.started" => {
@ -523,6 +603,7 @@ fn normalize_item(item: &Value) -> Value {
let types = content
.iter()
.filter_map(|part| part.get("type").and_then(Value::as_str))
.filter(|value| *value != "reasoning" && *value != "status")
.map(|value| Value::String(value.to_string()))
.collect::<Vec<_>>();
map.insert("content_types".to_string(), Value::Array(types));
@ -530,6 +611,42 @@ fn normalize_item(item: &Value) -> Value {
Value::Object(map)
}
fn item_content_types(item: &Value) -> Vec<String> {
item.get("content")
.and_then(Value::as_array)
.map(|content| {
content
.iter()
.filter_map(|part| part.get("type").and_then(Value::as_str))
.map(|value| value.to_string())
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
fn event_content_types(event: &Value) -> Vec<String> {
event
.get("data")
.and_then(|data| data.get("item"))
.map(item_content_types)
.unwrap_or_default()
}
fn event_is_status_item(event: &Value) -> bool {
event
.get("data")
.and_then(|data| data.get("item"))
.and_then(|item| item.get("kind"))
.and_then(Value::as_str)
.is_some_and(|kind| kind == "status")
}
fn events_have_content_type(events: &[Value], content_type: &str) -> bool {
events
.iter()
.any(|event| event_content_types(event).iter().any(|t| t == content_type))
}
fn normalize_session_end(data: &Value) -> Value {
let mut map = Map::new();
if let Some(reason) = data.get("reason").and_then(Value::as_str) {
@ -717,6 +834,33 @@ fn snapshot_name(prefix: &str, agent: Option<AgentId>) -> String {
}
async fn poll_events_until_match_from<F>(
app: &Router,
session_id: &str,
offset: u64,
timeout: Duration,
stop: F,
) -> Vec<Value>
where
F: Fn(&[Value]) -> bool,
{
let start = Instant::now();
let mut offset = offset;
let mut events = Vec::new();
while start.elapsed() < timeout {
let (new_events, new_offset) = fetch_events_once(app, session_id, offset).await;
if !new_events.is_empty() {
offset = new_offset;
events.extend(new_events);
if stop(&events) {
break;
}
}
tokio::time::sleep(Duration::from_millis(800)).await;
}
events
}
async fn poll_events_until_match<F>(
app: &Router,
session_id: &str,
@ -726,34 +870,7 @@ async fn poll_events_until_match<F>(
where
F: Fn(&[Value]) -> bool,
{
let start = Instant::now();
let mut offset = 0u64;
let mut events = Vec::new();
while start.elapsed() < timeout {
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
let (status, payload) = send_json(app, Method::GET, &path, None).await;
assert_eq!(status, StatusCode::OK, "poll events");
let new_events = payload
.get("events")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
if !new_events.is_empty() {
if let Some(last) = new_events
.last()
.and_then(|event| event.get("sequence"))
.and_then(Value::as_u64)
{
offset = last;
}
events.extend(new_events);
if stop(&events) {
break;
}
}
tokio::time::sleep(Duration::from_millis(800)).await;
}
events
poll_events_until_match_from(app, session_id, 0, timeout, stop).await
}
fn find_permission_id(events: &[Value]) -> Option<String> {
@ -800,9 +917,10 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
let session_id = format!("session-{}", config.agent.as_str());
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
send_message(app, &session_id).await;
let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await;
let events = poll_events_until_from(app, &session_id, offset, Duration::from_secs(120)).await;
let events = truncate_after_first_stop(&events);
assert!(
!events.is_empty(),
@ -816,7 +934,8 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
);
let normalized = normalize_events(&events);
insta::with_settings!({
snapshot_suffix => snapshot_name("http_events", Some(config.agent)),
snapshot_suffix => snapshot_name("http_events", Some(AgentId::Mock)),
snapshot_path => "../sessions/snapshots",
}, {
insta::assert_yaml_snapshot!(normalized);
});
@ -828,12 +947,14 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
let session_id = format!("sse-{}", config.agent.as_str());
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
let offset = drain_events(app, &session_id, Duration::from_secs(6)).await;
let sse_task = {
let app = app.clone();
let session_id = session_id.clone();
let offset = offset;
tokio::spawn(async move {
read_sse_events(&app, &session_id, Duration::from_secs(120)).await
read_sse_events_from(&app, &session_id, offset, Duration::from_secs(120)).await
})
};
@ -853,7 +974,8 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
);
let normalized = normalize_events(&events);
insta::with_settings!({
snapshot_suffix => snapshot_name("sse_events", Some(config.agent)),
snapshot_suffix => snapshot_name("sse_events", Some(AgentId::Mock)),
snapshot_path => "../sessions/snapshots",
}, {
insta::assert_yaml_snapshot!(normalized);
});
@ -879,535 +1001,3 @@ async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) {
config.agent
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auth_snapshots() {
let token = "test-token";
let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health should be public");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_health_public", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_health(&payload),
}));
});
let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_missing_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, "Bearer wrong-token")
.body(Body::empty())
.expect("auth invalid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_invalid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, format!("Bearer {token}"))
.body(Body::empty())
.expect("auth valid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::OK, "valid token should allow request");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_valid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_agent_list(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn cors_snapshots() {
let cors = CorsLayer::new()
.allow_origin(vec![HeaderValue::from_static("http://example.com")])
.allow_methods([Method::GET, Method::POST])
.allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION])
.allow_credentials(true);
let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
let preflight = Request::builder()
.method(Method::OPTIONS)
.uri("/v1/health")
.header(header::ORIGIN, "http://example.com")
.header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
.header(
header::ACCESS_CONTROL_REQUEST_HEADERS,
"authorization,content-type",
)
.body(Body::empty())
.expect("cors preflight request");
let (status, headers, _payload) = send_request(&app.app, preflight).await;
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_preflight", None),
}, {
insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
});
let actual = Request::builder()
.method(Method::GET)
.uri("/v1/health")
.header(header::ORIGIN, "http://example.com")
.body(Body::empty())
.expect("cors actual request");
let (status, headers, payload) = send_json_request(&app.app, actual).await;
assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_actual", None),
}, {
insta::assert_yaml_snapshot!(json!({
"cors": snapshot_cors(status, &headers),
"payload": normalize_health(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn api_endpoints_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health status");
insta::with_settings!({
snapshot_suffix => snapshot_name("health", None),
}, {
insta::assert_yaml_snapshot!(normalize_health(&health));
});
// List agents (just verify the API returns correct agent IDs, not install state)
let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::OK, "agents list");
insta::with_settings!({
snapshot_suffix => snapshot_name("agents_list", None),
}, {
insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
});
// Install agents (ensure they're available for subsequent tests)
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/agents/{}/install", config.agent.as_str()),
Some(json!({})),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
}
let mut session_ids = Vec::new();
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let (status, modes) = send_json(
&app.app,
Method::GET,
&format!("/v1/agents/{}/modes", config.agent.as_str()),
None,
)
.await;
assert_eq!(status, StatusCode::OK, "agent modes");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
});
let session_id = format!("snapshot-{}", config.agent.as_str());
let permission_mode = test_permission_mode(config.agent);
let (status, created) = send_json(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}"),
Some(json!({
"agent": config.agent.as_str(),
"permissionMode": permission_mode
})),
)
.await;
assert_eq!(status, StatusCode::OK, "create session");
insta::with_settings!({
snapshot_suffix => snapshot_name("create_session", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_create_session(&created));
});
session_ids.push((config.agent, session_id));
}
let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
assert_eq!(status, StatusCode::OK, "list sessions");
insta::with_settings!({
snapshot_suffix => snapshot_name("sessions_list", None),
}, {
insta::assert_yaml_snapshot!(normalize_sessions(&sessions));
});
for (agent, session_id) in &session_ids {
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}/messages"),
Some(json!({ "message": PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
insta::with_settings!({
snapshot_suffix => snapshot_name("send_message", Some(*agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn approval_flow_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
for config in &configs {
// OpenCode doesn't support "plan" permission mode required for approval flows
if config.agent == AgentId::Opencode {
continue;
}
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
if caps.plan_mode && caps.permissions {
let permission_session = format!("perm-{}", config.agent.as_str());
create_session(&app.app, config.agent, &permission_session, "plan").await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{permission_session}/messages"),
Some(json!({ "message": PERMISSION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
let permission_events = poll_events_until_match(
&app.app,
&permission_session,
Duration::from_secs(120),
|events| find_permission_id(events).is_some() || should_stop(events),
)
.await;
let permission_events = truncate_permission_events(&permission_events);
insta::with_settings!({
snapshot_suffix => snapshot_name("permission_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_events(&permission_events));
});
if let Some(permission_id) = find_permission_id(&permission_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
insta::with_settings!({
snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/missing-permission/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert!(!status.is_success(), "missing permission id should error");
insta::with_settings!({
snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
}
}
if caps.questions {
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reply_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
let question_events = poll_events_until_match(
&app.app,
&question_reply_session,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
let question_events = truncate_question_events(&question_events);
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_events(&question_events));
});
if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
),
Some(json!({ "answers": answers })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reply", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/missing-question/reply"
),
Some(json!({ "answers": [] })),
)
.await;
assert!(!status.is_success(), "missing question id should error");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
}
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reject_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
let reject_events = poll_events_until_match(
&app.app,
&question_reject_session,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
let reject_events = truncate_question_events(&reject_events);
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_events(&reject_events));
});
if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
),
None,
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reject", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/missing-question/reject"
),
None,
)
.await;
assert!(!status.is_success(), "missing question id reject should error");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
}
}
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn http_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
// See: https://github.com/opencode-ai/opencode/issues/XXX
if config.agent == AgentId::Opencode {
continue;
}
run_http_events_snapshot(&app.app, config).await;
}
}
async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
let _guard = apply_credentials(&config.credentials);
install_agent(app, config.agent).await;
let session_a = format!("concurrent-a-{}", config.agent.as_str());
let session_b = format!("concurrent-b-{}", config.agent.as_str());
let perm_mode = test_permission_mode(config.agent);
create_session(app, config.agent, &session_a, perm_mode).await;
create_session(app, config.agent, &session_b, perm_mode).await;
let app_a = app.clone();
let app_b = app.clone();
let send_a = send_message(&app_a, &session_a);
let send_b = send_message(&app_b, &session_b);
tokio::join!(send_a, send_b);
let app_a = app.clone();
let app_b = app.clone();
let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120));
let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120));
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
let events_a = truncate_after_first_stop(&events_a);
let events_b = truncate_after_first_stop(&events_b);
assert!(
!events_a.is_empty(),
"no events collected for concurrent session a {}",
config.agent
);
assert!(
!events_b.is_empty(),
"no events collected for concurrent session b {}",
config.agent
);
assert!(
should_stop(&events_a),
"timed out waiting for assistant/error event for concurrent session a {}",
config.agent
);
assert!(
should_stop(&events_b),
"timed out waiting for assistant/error event for concurrent session b {}",
config.agent
);
let snapshot = json!({
"session_a": normalize_events(&events_a),
"session_b": normalize_events(&events_b),
});
insta::with_settings!({
snapshot_suffix => snapshot_name("concurrency_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn sse_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
// See: https://github.com/opencode-ai/opencode/issues/XXX
if config.agent == AgentId::Opencode {
continue;
}
run_sse_events_snapshot(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn turn_stream_route() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
// See: https://github.com/opencode-ai/opencode/issues/XXX
if config.agent == AgentId::Opencode {
continue;
}
run_turn_stream_check(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn concurrency_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
// See: https://github.com/opencode-ai/opencode/issues/XXX
if config.agent == AgentId::Opencode {
continue;
}
run_concurrency_snapshot(&app.app, config).await;
}
}

View file

@ -0,0 +1,165 @@
// Agent-specific HTTP endpoints live here; session-related snapshots are in tests/sessions/.
include!("../common/http.rs");
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auth_snapshots() {
let token = "test-token";
let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health should be public");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_health_public", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_health(&payload),
}));
});
let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_missing_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, "Bearer wrong-token")
.body(Body::empty())
.expect("auth invalid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_invalid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, format!("Bearer {token}"))
.body(Body::empty())
.expect("auth valid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::OK, "valid token should succeed");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_valid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_agent_list(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn cors_snapshots() {
let cors = CorsLayer::new()
.allow_origin("http://example.com".parse::<HeaderValue>().unwrap())
.allow_methods([Method::GET, Method::POST])
.allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION]);
let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
let preflight = Request::builder()
.method(Method::OPTIONS)
.uri("/v1/agents")
.header(header::ORIGIN, "http://example.com")
.header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
.header(
header::ACCESS_CONTROL_REQUEST_HEADERS,
"authorization,content-type",
)
.body(Body::empty())
.expect("cors preflight request");
let (status, headers, _payload) = send_request(&app.app, preflight).await;
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_preflight", None),
}, {
insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
});
let actual = Request::builder()
.method(Method::GET)
.uri("/v1/health")
.header(header::ORIGIN, "http://example.com")
.body(Body::empty())
.expect("cors actual request");
let (status, headers, payload) = send_json_request(&app.app, actual).await;
assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_actual", None),
}, {
insta::assert_yaml_snapshot!(json!({
"cors": snapshot_cors(status, &headers),
"payload": normalize_health(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn agent_endpoints_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health status");
insta::with_settings!({
snapshot_suffix => snapshot_name("health", None),
}, {
insta::assert_yaml_snapshot!(normalize_health(&health));
});
// List agents (verify IDs only; install state is environment-dependent).
let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::OK, "agents list");
insta::with_settings!({
snapshot_suffix => snapshot_name("agents_list", None),
}, {
insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
});
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/agents/{}/install", config.agent.as_str()),
Some(json!({})),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
}
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let (status, modes) = send_json(
&app.app,
Method::GET,
&format!("/v1/agents/{}/modes", config.agent.as_str()),
None,
)
.await;
assert_eq!(status, StatusCode::OK, "agent modes");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
});
}
}

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 918
expression: normalize_create_session(&created)
---
healthy: true

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_create_session(&created)
---
healthy: true
nativeSessionId: "<redacted>"

View file

@ -1,7 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1053
expression: normalize_create_session(&created)
---
healthy: true
nativeSessionId: "<redacted>"

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_create_session(&created)
---
agentSessionId: "<redacted>"
healthy: true

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 943
expression: snapshot_status(status)
---
status: 204

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 959
expression: snapshot_status(status)
---
status: 204

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1078
expression: snapshot_status(status)
---
status: 204

View file

@ -1,5 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: snapshot_status(status)
---
status: 204

View file

@ -1,6 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_sessions(&sessions)
---
hasExpectedFields: true
sessionCount: 1

View file

@ -1,17 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1119
expression: normalize_events(&permission_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started

View file

@ -1,131 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_events(&permission_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 15
source: agent
synthetic: false
type: item.completed

View file

@ -1,35 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1112
expression: normalize_events(&permission_events)
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1017
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown permission id: missing-permission"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1152
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown permission id: missing-permission"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,45 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1151
expression: normalize_events(&reject_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
source: daemon
synthetic: true
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
type: item.completed

View file

@ -1,331 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_events(&reject_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 15
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 16
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 17
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 18
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 19
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 20
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 21
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 22
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 23
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 24
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 25
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 26
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 27
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 28
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 29
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 30
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 31
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 32
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 33
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 34
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 35
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 36
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 37
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 38
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 39
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 40
source: agent
synthetic: false
type: item.completed

View file

@ -1,35 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1236
expression: normalize_events(&reject_events)
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1151
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1139
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1276
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,45 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1109
expression: normalize_events(&question_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
source: daemon
synthetic: true
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
type: item.completed

View file

@ -1,315 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalize_events(&question_events)
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 15
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 16
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 17
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 18
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 19
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 20
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 21
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 22
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 23
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 24
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 25
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 26
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 27
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 28
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 29
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 30
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 31
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 32
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 33
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 34
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 35
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 36
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 37
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 38
source: agent
synthetic: false
type: item.completed

View file

@ -1,35 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1174
expression: normalize_events(&question_events)
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,11 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1214
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -1,201 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: snapshot
---
session_a:
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types: []
kind: message
role: assistant
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
session_b:
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 15
source: agent
synthetic: false
type: item.completed

View file

@ -1,67 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1344
expression: snapshot
---
session_a:
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed
session_b:
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -1,171 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
expression: normalized
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 9
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 10
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 11
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 12
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 13
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 14
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 15
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 16
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 17
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 18
source: agent
synthetic: false
type: item.delta
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 19
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- reasoning
kind: message
role: assistant
status: completed
seq: 20
source: agent
synthetic: false
type: item.completed

View file

@ -1,45 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 848
expression: normalized
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: agent
synthetic: false
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
type: item.completed

View file

@ -1,73 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 848
expression: normalized
---
- metadata: true
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
seq: 2
session: started
source: agent
synthetic: false
type: session.started
- item:
content_types:
- status
kind: status
role: system
status: completed
seq: 3
source: agent
synthetic: false
type: item.completed
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 4
source: agent
synthetic: false
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
source: daemon
synthetic: true
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 6
source: agent
synthetic: false
type: item.completed
- item:
content_types: []
kind: message
role: assistant
status: in_progress
seq: 7
source: agent
synthetic: false
type: item.started
- item:
content_types: []
kind: message
role: assistant
status: completed
seq: 8
source: agent
synthetic: false
type: item.completed

View file

@ -1,35 +0,0 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 841
expression: normalized
---
- metadata: true
seq: 1
session: started
type: session.started
- metadata: true
seq: 2
session: started
type: session.started
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 5
type: item.completed

View file

@ -0,0 +1,2 @@
#[path = "http/agent_endpoints.rs"]
mod agent_endpoints;

View file

@ -1 +0,0 @@
include!("http/http_sse_snapshots.rs");

View file

@ -0,0 +1,2 @@
#[path = "sessions/mod.rs"]
mod sessions;

View file

@ -0,0 +1,5 @@
mod session_lifecycle;
mod permissions;
mod questions;
mod reasoning;
mod status;

View file

@ -0,0 +1,88 @@
// Permission flow snapshots compare every agent to the mock baseline.
include!("../common/http.rs");
fn session_snapshot_suffix(prefix: &str) -> String {
snapshot_name(prefix, Some(AgentId::Mock))
}
fn assert_session_snapshot(prefix: &str, value: Value) {
insta::with_settings!({
snapshot_suffix => session_snapshot_suffix(prefix),
}, {
insta::assert_yaml_snapshot!(value);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn permission_flow_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !(caps.plan_mode && caps.permissions) {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let permission_session = format!("perm-{}", config.agent.as_str());
create_session(&app.app, config.agent, &permission_session, "plan").await;
let offset = drain_events(&app.app, &permission_session, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{permission_session}/messages"),
Some(json!({ "message": PERMISSION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
let permission_events = poll_events_until_match_from(
&app.app,
&permission_session,
offset,
Duration::from_secs(120),
|events| find_permission_id(events).is_some() || should_stop(events),
)
.await;
let permission_events = truncate_permission_events(&permission_events);
assert_session_snapshot("permission_events", normalize_events(&permission_events));
if let Some(permission_id) = find_permission_id(&permission_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
assert_session_snapshot("permission_reply", snapshot_status(status));
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/missing-permission/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert!(!status.is_success(), "missing permission id should error");
assert_session_snapshot(
"permission_reply_missing",
json!({
"status": status.as_u16(),
"payload": payload,
}),
);
}
}
}

View file

@ -0,0 +1,145 @@
// Question flow snapshots compare every agent to the mock baseline.
include!("../common/http.rs");
fn session_snapshot_suffix(prefix: &str) -> String {
snapshot_name(prefix, Some(AgentId::Mock))
}
fn assert_session_snapshot(prefix: &str, value: Value) {
insta::with_settings!({
snapshot_suffix => session_snapshot_suffix(prefix),
}, {
insta::assert_yaml_snapshot!(value);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn question_flow_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.questions {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
let reply_offset =
drain_events(&app.app, &question_reply_session, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reply_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
let question_events = poll_events_until_match_from(
&app.app,
&question_reply_session,
reply_offset,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
let question_events = truncate_question_events(&question_events);
assert_session_snapshot("question_reply_events", normalize_events(&question_events));
if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
),
Some(json!({ "answers": answers })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
assert_session_snapshot("question_reply", snapshot_status(status));
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/missing-question/reply"
),
Some(json!({ "answers": [] })),
)
.await;
assert!(!status.is_success(), "missing question id should error");
assert_session_snapshot(
"question_reply_missing",
json!({
"status": status.as_u16(),
"payload": payload,
}),
);
}
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
let reject_offset =
drain_events(&app.app, &question_reject_session, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reject_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
let reject_events = poll_events_until_match_from(
&app.app,
&question_reject_session,
reject_offset,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
let reject_events = truncate_question_events(&reject_events);
assert_session_snapshot("question_reject_events", normalize_events(&reject_events));
if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
),
None,
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
assert_session_snapshot("question_reject", snapshot_status(status));
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/missing-question/reject"
),
None,
)
.await;
assert!(!status.is_success(), "missing question id reject should error");
assert_session_snapshot(
"question_reject_missing",
json!({
"status": status.as_u16(),
"payload": payload,
}),
);
}
}
}

View file

@ -0,0 +1,56 @@
// Reasoning capability checks are isolated from baseline snapshots.
include!("../common/http.rs");
fn reasoning_prompt(agent: AgentId) -> &'static str {
if agent == AgentId::Mock {
"demo"
} else {
"Answer briefly and include your reasoning."
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn reasoning_events_present() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.reasoning {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let session_id = format!("reasoning-{}", config.agent.as_str());
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
.await;
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}/messages"),
Some(json!({ "message": reasoning_prompt(config.agent) })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send reasoning prompt");
let events = poll_events_until_match_from(
&app.app,
&session_id,
offset,
Duration::from_secs(120),
|events| events_have_content_type(events, "reasoning") || events.iter().any(is_error_event),
)
.await;
assert!(
events_have_content_type(&events, "reasoning"),
"expected reasoning content for {}",
config.agent
);
}
}

View file

@ -0,0 +1,192 @@
// Session lifecycle and streaming snapshots use the mock baseline as the single source of truth.
include!("../common/http.rs");
fn session_snapshot_suffix(prefix: &str) -> String {
snapshot_name(prefix, Some(AgentId::Mock))
}
fn assert_session_snapshot(prefix: &str, value: Value) {
insta::with_settings!({
snapshot_suffix => session_snapshot_suffix(prefix),
}, {
insta::assert_yaml_snapshot!(value);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn session_endpoints_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let session_id = format!("snapshot-{}", config.agent.as_str());
let permission_mode = test_permission_mode(config.agent);
let (status, created) = send_json(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}"),
Some(json!({
"agent": config.agent.as_str(),
"permissionMode": permission_mode
})),
)
.await;
assert_eq!(status, StatusCode::OK, "create session");
assert_session_snapshot("create_session", normalize_create_session(&created));
let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
assert_eq!(status, StatusCode::OK, "list sessions");
assert_session_snapshot("sessions_list", normalize_sessions(&sessions));
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}/messages"),
Some(json!({ "message": PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
assert_session_snapshot("send_message", snapshot_status(status));
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn http_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking event streaming.
if config.agent == AgentId::Opencode {
continue;
}
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
run_http_events_snapshot(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn sse_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
if config.agent == AgentId::Opencode {
continue;
}
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
run_sse_events_snapshot(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn concurrency_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
run_concurrency_snapshot(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn turn_stream_route() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.session_lifecycle {
continue;
}
run_turn_stream_check(&app.app, config).await;
}
}
async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
let _guard = apply_credentials(&config.credentials);
install_agent(app, config.agent).await;
let session_a = format!("concurrent-a-{}", config.agent.as_str());
let session_b = format!("concurrent-b-{}", config.agent.as_str());
let perm_mode = test_permission_mode(config.agent);
create_session(app, config.agent, &session_a, perm_mode).await;
create_session(app, config.agent, &session_b, perm_mode).await;
let offset_a = drain_events(app, &session_a, Duration::from_secs(6)).await;
let offset_b = drain_events(app, &session_b, Duration::from_secs(6)).await;
let app_a = app.clone();
let app_b = app.clone();
let send_a = send_message(&app_a, &session_a);
let send_b = send_message(&app_b, &session_b);
tokio::join!(send_a, send_b);
let app_a = app.clone();
let app_b = app.clone();
let poll_a = poll_events_until_from(&app_a, &session_a, offset_a, Duration::from_secs(120));
let poll_b = poll_events_until_from(&app_b, &session_b, offset_b, Duration::from_secs(120));
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
let events_a = truncate_after_first_stop(&events_a);
let events_b = truncate_after_first_stop(&events_b);
assert!(
!events_a.is_empty(),
"no events collected for concurrent session a {}",
config.agent
);
assert!(
!events_b.is_empty(),
"no events collected for concurrent session b {}",
config.agent
);
assert!(
should_stop(&events_a),
"timed out waiting for assistant/error event for concurrent session a {}",
config.agent
);
assert!(
should_stop(&events_b),
"timed out waiting for assistant/error event for concurrent session b {}",
config.agent
);
let snapshot = json!({
"session_a": normalize_events(&events_a),
"session_b": normalize_events(&events_b),
});
assert_session_snapshot("concurrency_events", snapshot);
}

View file

@ -0,0 +1,48 @@
---
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
expression: value
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 6
type: item.completed

View file

@ -1,7 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1011
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
source: server/packages/sandbox-agent/tests/sessions/permissions.rs
expression: value
---
payload:
detail: "invalid request: unknown permission id: missing-permission"

View file

@ -0,0 +1,48 @@
---
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 6
type: item.completed

View file

@ -1,7 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1078
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
payload:
detail: "invalid request: unknown question id: missing-question"

View file

@ -0,0 +1,48 @@
---
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 5
type: item.delta
- item:
content_types:
- text
kind: message
role: assistant
status: completed
seq: 6
type: item.completed

View file

@ -1,7 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1072
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
source: server/packages/sandbox-agent/tests/sessions/questions.rs
expression: value
---
payload:
detail: "invalid request: unknown question id: missing-question"

View file

@ -1,38 +1,43 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 1351
expression: snapshot
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
expression: value
---
session_a:
- metadata: true
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
session: started
source: agent
synthetic: false
type: session.started
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
source: agent
synthetic: false
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: agent
synthetic: false
seq: 5
type: item.delta
- item:
content_types:
@ -40,40 +45,44 @@ session_a:
kind: message
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
seq: 6
type: item.completed
session_b:
- metadata: true
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
session: started
source: agent
synthetic: false
type: session.started
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
source: agent
synthetic: false
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: agent
synthetic: false
seq: 5
type: item.delta
- item:
content_types:
@ -81,7 +90,5 @@ session_b:
kind: message
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
seq: 6
type: item.completed

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
expression: value
---
healthy: true
nativeSessionId: "<redacted>"

View file

@ -0,0 +1,5 @@
---
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
expression: value
---
status: 204

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/sessions/session_lifecycle.rs
expression: value
---
hasExpectedFields: true
sessionCount: 1

View file

@ -1,37 +1,42 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 811
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
expression: normalized
---
- metadata: true
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
session: started
source: daemon
synthetic: true
type: session.started
- metadata: true
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
session: started
source: agent
synthetic: false
type: session.started
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
source: agent
synthetic: false
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
source: agent
synthetic: false
seq: 5
type: item.delta
- item:
content_types:
@ -39,7 +44,5 @@ expression: normalized
kind: message
role: assistant
status: completed
seq: 5
source: agent
synthetic: false
seq: 6
type: item.completed

View file

@ -1,29 +1,42 @@
---
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
assertion_line: 804
source: server/packages/sandbox-agent/tests/sessions/../common/http.rs
expression: normalized
---
- metadata: true
- item:
content_types:
- text
kind: message
role: user
status: in_progress
seq: 1
session: started
type: session.started
- metadata: true
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 2
session: started
type: session.started
type: item.delta
- item:
content_types:
- text
kind: message
role: user
status: completed
seq: 3
type: item.completed
- item:
content_types:
- text
kind: message
role: assistant
status: in_progress
seq: 3
seq: 4
type: item.started
- delta:
delta: "<redacted>"
item_id: "<redacted>"
native_item_id: "<redacted>"
seq: 4
seq: 5
type: item.delta
- item:
content_types:
@ -31,5 +44,5 @@ expression: normalized
kind: message
role: assistant
status: completed
seq: 5
seq: 6
type: item.completed

View file

@ -0,0 +1,61 @@
// Status capability checks are isolated from baseline snapshots.
include!("../common/http.rs");
fn status_prompt(agent: AgentId) -> &'static str {
if agent == AgentId::Mock {
"status"
} else {
"Provide a short status update."
}
}
fn events_have_status(events: &[Value]) -> bool {
events.iter().any(|event| event_is_status_item(event))
|| events_have_content_type(events, "status")
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn status_events_present() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
for config in &configs {
let app = TestApp::new();
let capabilities = fetch_capabilities(&app.app).await;
let caps = capabilities
.get(config.agent.as_str())
.expect("capabilities missing");
if !caps.status {
continue;
}
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let session_id = format!("status-{}", config.agent.as_str());
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent))
.await;
let offset = drain_events(&app.app, &session_id, Duration::from_secs(6)).await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}/messages"),
Some(json!({ "message": status_prompt(config.agent) })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send status prompt");
let events = poll_events_until_match_from(
&app.app,
&session_id,
offset,
Duration::from_secs(120),
|events| events_have_status(events) || events.iter().any(is_error_event),
)
.await;
assert!(
events_have_status(&events),
"expected status events for {}",
config.agent
);
}
}