feat: expand api snapshots and schema tooling

This commit is contained in:
Nathan Flurry 2026-01-26 00:13:17 -08:00
parent ee014b0838
commit 011ca27287
72 changed files with 29480 additions and 1081 deletions

1
server/AGENTS.md Symbolic link
View file

@ -0,0 +1 @@
CLAUDE.md

68
server/CLAUDE.md Normal file
View file

@ -0,0 +1,68 @@
# Server Testing
## Snapshot tests
The HTTP/SSE snapshot suite lives in:
- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs`
Snapshots are written to:
- `server/packages/sandbox-agent/tests/snapshots/`
## Agent selection
`SANDBOX_TEST_AGENTS` controls which agents run. It accepts a comma-separated list or `all`.
If it is **not set**, tests will auto-detect installed agents by checking:
- binaries on `PATH`, and
- the default install dir (`$XDG_DATA_HOME/sandbox-agent/bin` or `./.sandbox-agent/bin`)
If no agents are found, tests fail with a clear error.
## Credential handling
Credentials are pulled from the host by default via `extract_all_credentials`:
- environment variables (e.g. `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`)
- local CLI configs (Claude/Codex/Amp/OpenCode)
You can override host credentials for tests with:
- `SANDBOX_TEST_ANTHROPIC_API_KEY`
- `SANDBOX_TEST_OPENAI_API_KEY`
If `SANDBOX_TEST_AGENTS` includes an agent that requires a provider credential and it is missing,
tests fail before starting.
## Credential health checks
Before running agent tests, credentials are validated with minimal API calls:
- Anthropic: `GET https://api.anthropic.com/v1/models`
- `x-api-key` for API keys
- `Authorization: Bearer` for OAuth tokens
- `anthropic-version: 2023-06-01`
- OpenAI: `GET https://api.openai.com/v1/models` with `Authorization: Bearer`
401/403 yields a hard failure (`invalid credentials`). Other non-2xx responses or network
errors fail with a health-check error.
Health checks run in a blocking thread to avoid Tokio runtime drop errors inside async tests.
## Snapshot stability
To keep snapshots deterministic:
- Event streams are truncated after the first assistant or error event.
- Permission flow snapshots are truncated after the permission request (or first assistant) event.
- Unknown events are preserved as `kind: unknown` (raw payload in universal schema).
## Typical commands
Run only Claude snapshots:
```
SANDBOX_TEST_AGENTS=claude cargo test -p sandbox-agent-core --test http_sse_snapshots
```
Run all detected agents:
```
cargo test -p sandbox-agent-core --test http_sse_snapshots
```
## Universal Schema
When modifying agent conversion code in `server/packages/universal-agent-schema/src/agents/` or adding/changing properties on the universal schema, update the feature matrix in `README.md` to reflect which agents support which features.

View file

@ -6,7 +6,7 @@ authors.workspace = true
license.workspace = true
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
dirs = "5.0"
time = { version = "0.3", features = ["parsing", "formatting"] }
serde.workspace = true
serde_json.workspace = true
dirs.workspace = true
time.workspace = true

View file

@ -6,15 +6,15 @@ authors.workspace = true
license.workspace = true
[dependencies]
thiserror = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
sandbox-agent-agent-credentials = { path = "../agent-credentials" }
reqwest = { version = "0.11", features = ["blocking", "json", "rustls-tls"] }
flate2 = "1.0"
tar = "0.4"
zip = { version = "0.6", default-features = false, features = ["deflate"] }
url = "2.5"
dirs = "5.0"
tempfile = "3.10"
time = { version = "0.3", features = ["parsing", "formatting"] }
sandbox-agent-agent-credentials.workspace = true
thiserror.workspace = true
serde.workspace = true
serde_json.workspace = true
reqwest.workspace = true
flate2.workspace = true
tar.workspace = true
zip.workspace = true
url.workspace = true
dirs.workspace = true
tempfile.workspace = true
time.workspace = true

View file

@ -1,9 +1,17 @@
use std::env;
use std::path::PathBuf;
use std::time::Duration;
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION, CONTENT_TYPE};
use reqwest::StatusCode;
use thiserror::Error;
use crate::agents::AgentId;
use crate::credentials::{AuthType, ExtractedCredentials, ProviderCredentials};
use crate::credentials::{
extract_all_credentials, AuthType, CredentialExtractionOptions, ExtractedCredentials,
ProviderCredentials,
};
#[derive(Debug, Clone)]
pub struct TestAgentConfig {
@ -13,76 +21,121 @@ pub struct TestAgentConfig {
#[derive(Debug, Error)]
pub enum TestAgentConfigError {
#[error("no test agents configured (set SANDBOX_TEST_AGENTS)")]
#[error("no test agents detected (install agents or set SANDBOX_TEST_AGENTS)")]
NoAgentsConfigured,
#[error("unknown agent name: {0}")]
UnknownAgent(String),
#[error("missing credentials for {agent}: {missing}")]
MissingCredentials { agent: AgentId, missing: String },
#[error("invalid credentials for {provider} (status {status})")]
InvalidCredentials { provider: String, status: u16 },
#[error("credential health check failed for {provider}: {message}")]
HealthCheckFailed { provider: String, message: String },
}
const AGENTS_ENV: &str = "SANDBOX_TEST_AGENTS";
const ANTHROPIC_ENV: &str = "SANDBOX_TEST_ANTHROPIC_API_KEY";
const OPENAI_ENV: &str = "SANDBOX_TEST_OPENAI_API_KEY";
const ANTHROPIC_MODELS_URL: &str = "https://api.anthropic.com/v1/models";
const OPENAI_MODELS_URL: &str = "https://api.openai.com/v1/models";
const ANTHROPIC_VERSION: &str = "2023-06-01";
#[derive(Default)]
struct HealthCheckCache {
anthropic_ok: bool,
openai_ok: bool,
}
pub fn test_agents_from_env() -> Result<Vec<TestAgentConfig>, TestAgentConfigError> {
let raw_agents = env::var(AGENTS_ENV).unwrap_or_default();
let mut agents = Vec::new();
for entry in raw_agents.split(',') {
let trimmed = entry.trim();
if trimmed.is_empty() {
continue;
let mut agents = if raw_agents.trim().is_empty() {
detect_system_agents()
} else {
let mut agents = Vec::new();
for entry in raw_agents.split(',') {
let trimmed = entry.trim();
if trimmed.is_empty() {
continue;
}
if trimmed == "all" {
agents.extend([
AgentId::Claude,
AgentId::Codex,
AgentId::Opencode,
AgentId::Amp,
]);
continue;
}
let agent = AgentId::parse(trimmed)
.ok_or_else(|| TestAgentConfigError::UnknownAgent(trimmed.to_string()))?;
agents.push(agent);
}
if trimmed == "all" {
agents.extend([
AgentId::Claude,
AgentId::Codex,
AgentId::Opencode,
AgentId::Amp,
]);
continue;
}
let agent = AgentId::parse(trimmed)
.ok_or_else(|| TestAgentConfigError::UnknownAgent(trimmed.to_string()))?;
agents.push(agent);
}
agents
};
agents.sort_by(|a, b| a.as_str().cmp(b.as_str()));
agents.dedup();
if agents.is_empty() {
return Err(TestAgentConfigError::NoAgentsConfigured);
}
let anthropic_key = read_env_key(ANTHROPIC_ENV);
let openai_key = read_env_key(OPENAI_ENV);
let extracted = extract_all_credentials(&CredentialExtractionOptions::new());
let anthropic_cred = read_env_key(ANTHROPIC_ENV)
.map(|key| ProviderCredentials {
api_key: key,
source: "sandbox-test-env".to_string(),
auth_type: AuthType::ApiKey,
provider: "anthropic".to_string(),
})
.or_else(|| extracted.anthropic.clone());
let openai_cred = read_env_key(OPENAI_ENV)
.map(|key| ProviderCredentials {
api_key: key,
source: "sandbox-test-env".to_string(),
auth_type: AuthType::ApiKey,
provider: "openai".to_string(),
})
.or_else(|| extracted.openai.clone());
let mut health_cache = HealthCheckCache::default();
let mut configs = Vec::new();
for agent in agents {
let credentials = match agent {
AgentId::Claude | AgentId::Amp => {
let anthropic_key = anthropic_key.clone().ok_or_else(|| {
let anthropic_cred = anthropic_cred.clone().ok_or_else(|| {
TestAgentConfigError::MissingCredentials {
agent,
missing: ANTHROPIC_ENV.to_string(),
}
})?;
credentials_with(anthropic_key, None)
ensure_anthropic_ok(&mut health_cache, &anthropic_cred)?;
credentials_with(Some(anthropic_cred), None)
}
AgentId::Codex => {
let openai_key = openai_key.clone().ok_or_else(|| {
let openai_cred = openai_cred.clone().ok_or_else(|| {
TestAgentConfigError::MissingCredentials {
agent,
missing: OPENAI_ENV.to_string(),
}
})?;
credentials_with(None, Some(openai_key))
ensure_openai_ok(&mut health_cache, &openai_cred)?;
credentials_with(None, Some(openai_cred))
}
AgentId::Opencode => {
if anthropic_key.is_none() && openai_key.is_none() {
if anthropic_cred.is_none() && openai_cred.is_none() {
return Err(TestAgentConfigError::MissingCredentials {
agent,
missing: format!("{ANTHROPIC_ENV} or {OPENAI_ENV}"),
});
}
credentials_with(anthropic_key.clone(), openai_key.clone())
if let Some(cred) = anthropic_cred.as_ref() {
ensure_anthropic_ok(&mut health_cache, cred)?;
}
if let Some(cred) = openai_cred.as_ref() {
ensure_openai_ok(&mut health_cache, cred)?;
}
credentials_with(anthropic_cred.clone(), openai_cred.clone())
}
};
configs.push(TestAgentConfig { agent, credentials });
@ -91,6 +144,178 @@ pub fn test_agents_from_env() -> Result<Vec<TestAgentConfig>, TestAgentConfigErr
Ok(configs)
}
fn ensure_anthropic_ok(
cache: &mut HealthCheckCache,
credentials: &ProviderCredentials,
) -> Result<(), TestAgentConfigError> {
if cache.anthropic_ok {
return Ok(());
}
health_check_anthropic(credentials)?;
cache.anthropic_ok = true;
Ok(())
}
fn ensure_openai_ok(
cache: &mut HealthCheckCache,
credentials: &ProviderCredentials,
) -> Result<(), TestAgentConfigError> {
if cache.openai_ok {
return Ok(());
}
health_check_openai(credentials)?;
cache.openai_ok = true;
Ok(())
}
fn health_check_anthropic(credentials: &ProviderCredentials) -> Result<(), TestAgentConfigError> {
let credentials = credentials.clone();
run_blocking_check("anthropic", move || {
let client = Client::builder()
.timeout(Duration::from_secs(10))
.build()
.map_err(|err| TestAgentConfigError::HealthCheckFailed {
provider: "anthropic".to_string(),
message: err.to_string(),
})?;
let mut headers = HeaderMap::new();
match credentials.auth_type {
AuthType::ApiKey => {
headers.insert(
"x-api-key",
HeaderValue::from_str(&credentials.api_key).map_err(|_| {
TestAgentConfigError::HealthCheckFailed {
provider: "anthropic".to_string(),
message: "invalid anthropic api key header value".to_string(),
}
})?,
);
}
AuthType::Oauth => {
let value = format!("Bearer {}", credentials.api_key);
headers.insert(
AUTHORIZATION,
HeaderValue::from_str(&value).map_err(|_| {
TestAgentConfigError::HealthCheckFailed {
provider: "anthropic".to_string(),
message: "invalid anthropic oauth header value".to_string(),
}
})?,
);
}
}
headers.insert(
"anthropic-version",
HeaderValue::from_static(ANTHROPIC_VERSION),
);
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
let response = client
.get(ANTHROPIC_MODELS_URL)
.headers(headers)
.send()
.map_err(|err| TestAgentConfigError::HealthCheckFailed {
provider: "anthropic".to_string(),
message: err.to_string(),
})?;
handle_health_response("anthropic", response)
})
}
fn health_check_openai(credentials: &ProviderCredentials) -> Result<(), TestAgentConfigError> {
let credentials = credentials.clone();
run_blocking_check("openai", move || {
let client = Client::builder()
.timeout(Duration::from_secs(10))
.build()
.map_err(|err| TestAgentConfigError::HealthCheckFailed {
provider: "openai".to_string(),
message: err.to_string(),
})?;
let response = client
.get(OPENAI_MODELS_URL)
.bearer_auth(&credentials.api_key)
.send()
.map_err(|err| TestAgentConfigError::HealthCheckFailed {
provider: "openai".to_string(),
message: err.to_string(),
})?;
handle_health_response("openai", response)
})
}
fn handle_health_response(
provider: &str,
response: reqwest::blocking::Response,
) -> Result<(), TestAgentConfigError> {
let status = response.status();
if status.is_success() {
return Ok(());
}
if status == StatusCode::UNAUTHORIZED || status == StatusCode::FORBIDDEN {
return Err(TestAgentConfigError::InvalidCredentials {
provider: provider.to_string(),
status: status.as_u16(),
});
}
let body = response.text().unwrap_or_default();
let mut summary = body.trim().to_string();
if summary.len() > 200 {
summary.truncate(200);
}
Err(TestAgentConfigError::HealthCheckFailed {
provider: provider.to_string(),
message: format!("status {}: {}", status.as_u16(), summary),
})
}
fn run_blocking_check<F>(
provider: &str,
check: F,
) -> Result<(), TestAgentConfigError>
where
F: FnOnce() -> Result<(), TestAgentConfigError> + Send + 'static,
{
std::thread::spawn(check).join().unwrap_or_else(|_| {
Err(TestAgentConfigError::HealthCheckFailed {
provider: provider.to_string(),
message: "health check panicked".to_string(),
})
})
}
fn detect_system_agents() -> Vec<AgentId> {
let candidates = [AgentId::Claude, AgentId::Codex, AgentId::Opencode, AgentId::Amp];
let install_dir = default_install_dir();
candidates
.into_iter()
.filter(|agent| {
let binary = agent.binary_name();
find_in_path(binary) || install_dir.join(binary).exists()
})
.collect()
}
fn default_install_dir() -> PathBuf {
dirs::data_dir()
.map(|dir| dir.join("sandbox-agent").join("bin"))
.unwrap_or_else(|| PathBuf::from(".").join(".sandbox-agent").join("bin"))
}
fn find_in_path(binary_name: &str) -> bool {
let path_var = match env::var_os("PATH") {
Some(path) => path,
None => return false,
};
for path in env::split_paths(&path_var) {
let candidate = path.join(binary_name);
if candidate.exists() {
return true;
}
}
false
}
fn read_env_key(name: &str) -> Option<String> {
env::var(name).ok().and_then(|value| {
let trimmed = value.trim().to_string();
@ -103,25 +328,11 @@ fn read_env_key(name: &str) -> Option<String> {
}
fn credentials_with(
anthropic_key: Option<String>,
openai_key: Option<String>,
anthropic_cred: Option<ProviderCredentials>,
openai_cred: Option<ProviderCredentials>,
) -> ExtractedCredentials {
let mut credentials = ExtractedCredentials::default();
if let Some(key) = anthropic_key {
credentials.anthropic = Some(ProviderCredentials {
api_key: key,
source: "sandbox-test-env".to_string(),
auth_type: AuthType::ApiKey,
provider: "anthropic".to_string(),
});
}
if let Some(key) = openai_key {
credentials.openai = Some(ProviderCredentials {
api_key: key,
source: "sandbox-test-env".to_string(),
auth_type: AuthType::ApiKey,
provider: "openai".to_string(),
});
}
credentials.anthropic = anthropic_cred;
credentials.openai = openai_cred;
credentials
}

View file

@ -1,18 +0,0 @@
[package]
name = "sandbox-agent-agent-schema"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
regress = "0.10"
[build-dependencies]
typify = "0.4"
serde_json = "1.0"
schemars = "0.8"
prettyplease = "0.2"
syn = "2.0"

View file

@ -1,76 +0,0 @@
//! Generated types from AI coding agent JSON schemas.
//!
//! This crate provides Rust types for:
//! - OpenCode SDK
//! - Claude Code SDK
//! - Codex SDK
//! - AMP Code SDK
pub mod opencode {
//! OpenCode SDK types extracted from OpenAPI 3.1.1 spec.
include!(concat!(env!("OUT_DIR"), "/opencode.rs"));
}
pub mod claude {
//! Claude Code SDK types extracted from TypeScript definitions.
include!(concat!(env!("OUT_DIR"), "/claude.rs"));
}
pub mod codex {
//! Codex SDK types.
include!(concat!(env!("OUT_DIR"), "/codex.rs"));
}
pub mod amp {
//! AMP Code SDK types.
include!(concat!(env!("OUT_DIR"), "/amp.rs"));
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_claude_bash_input() {
let input = claude::BashInput {
command: "ls -la".to_string(),
timeout: Some(5000.0),
description: Some("List files".to_string()),
run_in_background: None,
simulated_sed_edit: None,
dangerously_disable_sandbox: None,
};
let json = serde_json::to_string(&input).unwrap();
assert!(json.contains("ls -la"));
let parsed: claude::BashInput = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.command, "ls -la");
}
#[test]
fn test_codex_thread_event() {
let event = codex::ThreadEvent {
type_: codex::ThreadEventType::ThreadCreated,
thread_id: Some("thread-123".to_string()),
item: None,
error: serde_json::Map::new(),
};
let json = serde_json::to_string(&event).unwrap();
assert!(json.contains("thread.created"));
}
#[test]
fn test_amp_message() {
let msg = amp::Message {
role: amp::MessageRole::User,
content: "Hello".to_string(),
tool_calls: vec![],
};
let json = serde_json::to_string(&msg).unwrap();
assert!(json.contains("user"));
assert!(json.contains("Hello"));
}
}

View file

@ -6,8 +6,8 @@ authors.workspace = true
license.workspace = true
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "1.0"
schemars = "0.8"
utoipa = "4.2"
serde.workspace = true
serde_json.workspace = true
thiserror.workspace = true
schemars.workspace = true
utoipa.workspace = true

View file

@ -0,0 +1,19 @@
[package]
name = "sandbox-agent-extracted-agent-schemas"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
[dependencies]
serde.workspace = true
serde_json.workspace = true
regress.workspace = true
chrono.workspace = true
[build-dependencies]
typify.workspace = true
serde_json.workspace = true
schemars.workspace = true
prettyplease.workspace = true
syn.workspace = true

View file

@ -4,7 +4,7 @@ use std::path::Path;
fn main() {
let out_dir = std::env::var("OUT_DIR").unwrap();
let schema_dir = Path::new("../../../resources/agent-schemas/dist");
let schema_dir = Path::new("../../../resources/agent-schemas/artifacts/json-schema");
let schemas = [
("opencode", "opencode.json"),

View file

@ -0,0 +1,111 @@
//! Generated types from AI coding agent JSON schemas.
//!
//! This crate provides Rust types for:
//! - OpenCode SDK
//! - Claude Code SDK
//! - Codex SDK
//! - AMP Code SDK
pub mod opencode {
//! OpenCode SDK types extracted from OpenAPI 3.1.1 spec.
include!(concat!(env!("OUT_DIR"), "/opencode.rs"));
}
pub mod claude {
//! Claude Code SDK types extracted from TypeScript definitions.
include!(concat!(env!("OUT_DIR"), "/claude.rs"));
}
pub mod codex {
//! Codex SDK types.
include!(concat!(env!("OUT_DIR"), "/codex.rs"));
}
pub mod amp {
//! AMP Code SDK types.
include!(concat!(env!("OUT_DIR"), "/amp.rs"));
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_claude_bash_input() {
let input = claude::BashInput {
command: "ls -la".to_string(),
timeout: Some(5000.0),
working_directory: None,
};
let json = serde_json::to_string(&input).unwrap();
assert!(json.contains("ls -la"));
let parsed: claude::BashInput = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.command, "ls -la");
}
#[test]
fn test_codex_server_notification() {
// Test ItemCompletedNotification with AgentMessage
let notification = codex::ServerNotification::ItemCompleted(
codex::ItemCompletedNotification {
item: codex::ThreadItem::AgentMessage {
id: "msg-123".to_string(),
text: "Hello from Codex".to_string(),
},
thread_id: "thread-123".to_string(),
turn_id: "turn-456".to_string(),
}
);
let json = serde_json::to_string(&notification).unwrap();
assert!(json.contains("item/completed"));
assert!(json.contains("Hello from Codex"));
assert!(json.contains("agentMessage"));
}
#[test]
fn test_codex_thread_item_variants() {
// Test UserMessage variant
let user_msg = codex::ThreadItem::UserMessage {
content: vec![codex::UserInput::Text {
text: "Hello".to_string(),
text_elements: vec![],
}],
id: "user-1".to_string(),
};
let json = serde_json::to_string(&user_msg).unwrap();
assert!(json.contains("userMessage"));
assert!(json.contains("Hello"));
// Test CommandExecution variant
let cmd = codex::ThreadItem::CommandExecution {
aggregated_output: Some("output".to_string()),
command: "ls -la".to_string(),
command_actions: vec![],
cwd: "/tmp".to_string(),
duration_ms: Some(100),
exit_code: Some(0),
id: "cmd-1".to_string(),
process_id: None,
status: codex::CommandExecutionStatus::Completed,
};
let json = serde_json::to_string(&cmd).unwrap();
assert!(json.contains("commandExecution"));
assert!(json.contains("ls -la"));
}
#[test]
fn test_amp_message() {
let msg = amp::Message {
role: amp::MessageRole::User,
content: "Hello".to_string(),
tool_calls: vec![],
};
let json = serde_json::to_string(&msg).unwrap();
assert!(json.contains("user"));
assert!(json.contains("Hello"));
}
}

View file

@ -7,11 +7,11 @@ license.workspace = true
build = "build.rs"
[dependencies]
tracing = "0.1"
tracing-logfmt = "0.3"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tracing.workspace = true
tracing-logfmt.workspace = true
tracing-subscriber.workspace = true
[build-dependencies]
sandbox-agent-core = { path = "../sandbox-agent" }
serde_json = "1.0"
utoipa = "4.2"
sandbox-agent-core.workspace = true
serde_json.workspace = true
utoipa.workspace = true

View file

@ -10,30 +10,30 @@ name = "sandbox-agent"
path = "src/main.rs"
[dependencies]
thiserror = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
axum = "0.7"
clap = { version = "4.5", features = ["derive"] }
futures = "0.3"
sandbox-agent-error = { path = "../error" }
sandbox-agent-agent-management = { path = "../agent-management" }
sandbox-agent-agent-credentials = { path = "../agent-credentials" }
sandbox-agent-universal-agent-schema = { path = "../universal-agent-schema" }
reqwest = { version = "0.11", features = ["blocking", "json", "rustls-tls", "stream"] }
dirs = "5.0"
time = { version = "0.3", features = ["parsing", "formatting"] }
tokio = { version = "1.36", features = ["macros", "rt-multi-thread", "signal", "time"] }
tokio-stream = { version = "0.1", features = ["sync"] }
tower-http = { version = "0.5", features = ["cors", "trace"] }
utoipa = { version = "4.2", features = ["axum_extras"] }
schemars = "0.8"
tracing = "0.1"
tracing-logfmt = "0.3"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
sandbox-agent-error.workspace = true
sandbox-agent-agent-management.workspace = true
sandbox-agent-agent-credentials.workspace = true
sandbox-agent-universal-agent-schema.workspace = true
thiserror.workspace = true
serde.workspace = true
serde_json.workspace = true
axum.workspace = true
clap.workspace = true
futures.workspace = true
reqwest.workspace = true
dirs.workspace = true
time.workspace = true
tokio.workspace = true
tokio-stream.workspace = true
tower-http.workspace = true
utoipa.workspace = true
schemars.workspace = true
tracing.workspace = true
tracing-logfmt.workspace = true
tracing-subscriber.workspace = true
[dev-dependencies]
http-body-util = "0.1"
insta = "1.41"
tempfile = "3.10"
tower = "0.4"
http-body-util.workspace = true
insta.workspace = true
tempfile.workspace = true
tower.workspace = true

View file

@ -1202,6 +1202,11 @@ async fn require_token(
req: Request<axum::body::Body>,
next: Next,
) -> Result<Response, ApiError> {
let path = req.uri().path();
if path == "/v1/health" || path == "/health" {
return Ok(next.run(req).await);
}
let expected = match &state.auth.token {
Some(token) => token.as_str(),
None => return Ok(next.run(req).await),
@ -1946,7 +1951,7 @@ fn parse_agent_line(agent: AgentId, line: &str, session_id: &str) -> Option<Even
convert_claude::event_to_universal_with_session(&value, session_id.to_string())
}
AgentId::Codex => match serde_json::from_value(value.clone()) {
Ok(event) => convert_codex::event_to_universal(&event),
Ok(notification) => convert_codex::notification_to_universal(&notification),
Err(err) => EventConversion::new(unparsed_message(
&value.to_string(),
&err.to_string(),

View file

@ -1,8 +1,8 @@
use std::collections::BTreeMap;
use std::time::{Duration, Instant};
use axum::body::Body;
use axum::http::{Method, Request, StatusCode};
use axum::body::{Body, Bytes};
use axum::http::{header, HeaderMap, HeaderValue, Method, Request, StatusCode};
use axum::Router;
use futures::StreamExt;
use http_body_util::BodyExt;
@ -13,9 +13,13 @@ use sandbox_agent_agent_management::agents::{AgentId, AgentManager};
use sandbox_agent_agent_management::testing::{test_agents_from_env, TestAgentConfig};
use sandbox_agent_agent_credentials::ExtractedCredentials;
use sandbox_agent_core::router::{build_router, AppState, AuthConfig};
use tower::ServiceExt;
use tower::util::ServiceExt;
use tower_http::cors::CorsLayer;
const PROMPT: &str = "Reply with exactly the single word OK.";
const PERMISSION_PROMPT: &str = "List files in the current directory using available tools.";
const QUESTION_PROMPT: &str =
"Ask the user a multiple-choice question with options yes/no using any built-in AskUserQuestion tool, then wait.";
struct TestApp {
app: Router,
@ -24,11 +28,22 @@ struct TestApp {
impl TestApp {
fn new() -> Self {
Self::new_with_auth(AuthConfig::disabled())
}
fn new_with_auth(auth: AuthConfig) -> Self {
Self::new_with_auth_and_cors(auth, None)
}
fn new_with_auth_and_cors(auth: AuthConfig, cors: Option<CorsLayer>) -> Self {
let install_dir = tempfile::tempdir().expect("create temp install dir");
let manager = AgentManager::new(install_dir.path())
.expect("create agent manager");
let state = AppState::new(AuthConfig::disabled(), manager);
let app = build_router(state);
let state = AppState::new(auth, manager);
let mut app = build_router(state);
if let Some(cors) = cors {
app = app.layer(cors);
}
Self {
app,
_install_dir: install_dir,
@ -112,6 +127,37 @@ async fn send_json(app: &Router, method: Method, path: &str, body: Option<Value>
(status, value)
}
async fn send_request(app: &Router, request: Request<Body>) -> (StatusCode, HeaderMap, Bytes) {
let response = app
.clone()
.oneshot(request)
.await
.expect("request handled");
let status = response.status();
let headers = response.headers().clone();
let bytes = response
.into_body()
.collect()
.await
.expect("read body")
.to_bytes();
(status, headers, bytes)
}
async fn send_json_request(
app: &Router,
request: Request<Body>,
) -> (StatusCode, HeaderMap, Value) {
let (status, headers, bytes) = send_request(app, request).await;
let value = if bytes.is_empty() {
Value::Null
} else {
serde_json::from_slice(&bytes)
.unwrap_or(Value::String(String::from_utf8_lossy(&bytes).to_string()))
};
(status, headers, value)
}
async fn send_status(app: &Router, method: Method, path: &str, body: Option<Value>) -> StatusCode {
let (status, _) = send_json(app, method, path, body).await;
status
@ -128,14 +174,14 @@ async fn install_agent(app: &Router, agent: AgentId) {
assert_eq!(status, StatusCode::NO_CONTENT, "install {agent}");
}
async fn create_session(app: &Router, agent: AgentId, session_id: &str) {
async fn create_session(app: &Router, agent: AgentId, session_id: &str, permission_mode: &str) {
let status = send_status(
app,
Method::POST,
&format!("/v1/sessions/{session_id}"),
Some(json!({
"agent": agent.as_str(),
"permissionMode": "bypass"
"permissionMode": permission_mode
})),
)
.await;
@ -211,7 +257,7 @@ async fn read_sse_events(
_ => break,
};
let next = tokio::time::timeout(remaining, stream.next()).await;
let chunk = match next {
let chunk: Bytes = match next {
Ok(Some(Ok(chunk))) => chunk,
Ok(Some(Err(_))) => break,
Ok(None) => break,
@ -267,6 +313,23 @@ fn is_error_event(event: &Value) -> bool {
.is_some()
}
fn is_permission_event(event: &Value) -> bool {
event
.get("data")
.and_then(|data| data.get("permissionAsked"))
.is_some()
}
fn truncate_permission_events(events: &[Value]) -> Vec<Value> {
if let Some(idx) = events.iter().position(is_permission_event) {
return events[..=idx].to_vec();
}
if let Some(idx) = events.iter().position(is_assistant_message) {
return events[..=idx].to_vec();
}
events.to_vec()
}
fn normalize_events(events: &[Value]) -> Value {
let normalized = events
.iter()
@ -276,6 +339,16 @@ fn normalize_events(events: &[Value]) -> Value {
Value::Array(normalized)
}
fn truncate_after_first_stop(events: &[Value]) -> Vec<Value> {
if let Some(idx) = events
.iter()
.position(|event| is_assistant_message(event) || is_error_event(event))
{
return events[..=idx].to_vec();
}
events.to_vec()
}
fn normalize_event(event: &Value, seq: usize) -> Value {
let mut map = Map::new();
map.insert("seq".to_string(), Value::Number(seq.into()));
@ -379,8 +452,239 @@ fn normalize_permission(permission: &Value) -> Value {
Value::Object(map)
}
fn snapshot_name(prefix: &str, agent: AgentId) -> String {
format!("{prefix}_{}", agent.as_str())
fn normalize_agent_list(value: &Value) -> Value {
let agents = value
.get("agents")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
let mut normalized = Vec::new();
for agent in agents {
let mut map = Map::new();
if let Some(id) = agent.get("id").and_then(Value::as_str) {
map.insert("id".to_string(), Value::String(id.to_string()));
}
// Skip installed/version/path fields - they depend on local environment
// and make snapshots non-deterministic
normalized.push(Value::Object(map));
}
normalized.sort_by(|a, b| {
a.get("id")
.and_then(Value::as_str)
.cmp(&b.get("id").and_then(Value::as_str))
});
json!({ "agents": normalized })
}
fn normalize_agent_modes(value: &Value) -> Value {
let modes = value
.get("modes")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
let mut normalized = Vec::new();
for mode in modes {
let mut map = Map::new();
if let Some(id) = mode.get("id").and_then(Value::as_str) {
map.insert("id".to_string(), Value::String(id.to_string()));
}
if let Some(name) = mode.get("name").and_then(Value::as_str) {
map.insert("name".to_string(), Value::String(name.to_string()));
}
if mode.get("description").is_some() {
map.insert("description".to_string(), Value::Bool(true));
}
normalized.push(Value::Object(map));
}
normalized.sort_by(|a, b| {
a.get("id")
.and_then(Value::as_str)
.cmp(&b.get("id").and_then(Value::as_str))
});
json!({ "modes": normalized })
}
fn normalize_sessions(value: &Value) -> Value {
let sessions = value
.get("sessions")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
let mut normalized = Vec::new();
for session in sessions {
let mut map = Map::new();
if let Some(session_id) = session.get("sessionId").and_then(Value::as_str) {
map.insert("sessionId".to_string(), Value::String(session_id.to_string()));
}
if let Some(agent) = session.get("agent").and_then(Value::as_str) {
map.insert("agent".to_string(), Value::String(agent.to_string()));
}
if let Some(agent_mode) = session.get("agentMode").and_then(Value::as_str) {
map.insert("agentMode".to_string(), Value::String(agent_mode.to_string()));
}
if let Some(permission_mode) = session.get("permissionMode").and_then(Value::as_str) {
map.insert("permissionMode".to_string(), Value::String(permission_mode.to_string()));
}
if session.get("model").is_some() {
map.insert("model".to_string(), Value::String("<redacted>".to_string()));
}
if session.get("variant").is_some() {
map.insert("variant".to_string(), Value::String("<redacted>".to_string()));
}
if session.get("agentSessionId").is_some() {
map.insert("agentSessionId".to_string(), Value::String("<redacted>".to_string()));
}
if let Some(ended) = session.get("ended").and_then(Value::as_bool) {
map.insert("ended".to_string(), Value::Bool(ended));
}
if session.get("eventCount").is_some() {
map.insert("eventCount".to_string(), Value::String("<redacted>".to_string()));
}
normalized.push(Value::Object(map));
}
normalized.sort_by(|a, b| {
a.get("sessionId")
.and_then(Value::as_str)
.cmp(&b.get("sessionId").and_then(Value::as_str))
});
json!({ "sessions": normalized })
}
fn normalize_create_session(value: &Value) -> Value {
let mut map = Map::new();
if let Some(healthy) = value.get("healthy").and_then(Value::as_bool) {
map.insert("healthy".to_string(), Value::Bool(healthy));
}
if value.get("agentSessionId").is_some() {
map.insert("agentSessionId".to_string(), Value::String("<redacted>".to_string()));
}
if let Some(error) = value.get("error") {
map.insert("error".to_string(), error.clone());
}
Value::Object(map)
}
fn normalize_health(value: &Value) -> Value {
let mut map = Map::new();
if let Some(status) = value.get("status").and_then(Value::as_str) {
map.insert("status".to_string(), Value::String(status.to_string()));
}
Value::Object(map)
}
fn snapshot_status(status: StatusCode) -> Value {
json!({ "status": status.as_u16() })
}
fn snapshot_cors(status: StatusCode, headers: &HeaderMap) -> Value {
let mut map = Map::new();
map.insert("status".to_string(), Value::Number(status.as_u16().into()));
for name in [
header::ACCESS_CONTROL_ALLOW_ORIGIN,
header::ACCESS_CONTROL_ALLOW_METHODS,
header::ACCESS_CONTROL_ALLOW_HEADERS,
header::ACCESS_CONTROL_ALLOW_CREDENTIALS,
header::VARY,
] {
if let Some(value) = headers.get(&name) {
map.insert(
name.as_str().to_string(),
Value::String(value.to_str().unwrap_or("<invalid>").to_string()),
);
}
}
Value::Object(map)
}
fn snapshot_name(prefix: &str, agent: Option<AgentId>) -> String {
match agent {
Some(agent) => format!("{prefix}_{}", agent.as_str()),
None => format!("{prefix}_global"),
}
}
async fn poll_events_until_match<F>(
app: &Router,
session_id: &str,
timeout: Duration,
stop: F,
) -> Vec<Value>
where
F: Fn(&[Value]) -> bool,
{
let start = Instant::now();
let mut offset = 0u64;
let mut events = Vec::new();
while start.elapsed() < timeout {
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
let (status, payload) = send_json(app, Method::GET, &path, None).await;
assert_eq!(status, StatusCode::OK, "poll events");
let new_events = payload
.get("events")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
if !new_events.is_empty() {
if let Some(last) = new_events
.last()
.and_then(|event| event.get("id"))
.and_then(Value::as_u64)
{
offset = last;
}
events.extend(new_events);
if stop(&events) {
break;
}
}
tokio::time::sleep(Duration::from_millis(800)).await;
}
events
}
fn find_permission_id(events: &[Value]) -> Option<String> {
events
.iter()
.find_map(|event| {
event
.get("data")
.and_then(|data| data.get("permissionAsked"))
.and_then(|permission| permission.get("id"))
.and_then(Value::as_str)
.map(|id| id.to_string())
})
}
fn find_question_id_and_answers(events: &[Value]) -> Option<(String, Vec<Vec<String>>)> {
let question = events.iter().find_map(|event| {
event
.get("data")
.and_then(|data| data.get("questionAsked"))
.cloned()
})?;
let id = question.get("id").and_then(Value::as_str)?.to_string();
let questions = question
.get("questions")
.and_then(Value::as_array)
.cloned()
.unwrap_or_default();
let mut answers = Vec::new();
for question in questions {
let option = question
.get("options")
.and_then(Value::as_array)
.and_then(|options| options.first())
.and_then(|option| option.get("label"))
.and_then(Value::as_str)
.map(|label| label.to_string());
if let Some(label) = option {
answers.push(vec![label]);
} else {
answers.push(Vec::new());
}
}
Some((id, answers))
}
async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
@ -388,10 +692,11 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
install_agent(app, config.agent).await;
let session_id = format!("session-{}", config.agent.as_str());
create_session(app, config.agent, &session_id).await;
create_session(app, config.agent, &session_id, "bypass").await;
send_message(app, &session_id).await;
let events = poll_events_until(app, &session_id, Duration::from_secs(120)).await;
let events = truncate_after_first_stop(&events);
assert!(
!events.is_empty(),
"no events collected for {}",
@ -404,7 +709,7 @@ async fn run_http_events_snapshot(app: &Router, config: &TestAgentConfig) {
);
let normalized = normalize_events(&events);
insta::with_settings!({
snapshot_suffix => snapshot_name("http_events", config.agent),
snapshot_suffix => snapshot_name("http_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalized);
});
@ -415,7 +720,7 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
install_agent(app, config.agent).await;
let session_id = format!("sse-{}", config.agent.as_str());
create_session(app, config.agent, &session_id).await;
create_session(app, config.agent, &session_id, "bypass").await;
let sse_task = {
let app = app.clone();
@ -428,6 +733,7 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
send_message(app, &session_id).await;
let events = sse_task.await.expect("sse task");
let events = truncate_after_first_stop(&events);
assert!(
!events.is_empty(),
"no sse events collected for {}",
@ -440,26 +746,494 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
);
let normalized = normalize_events(&events);
insta::with_settings!({
snapshot_suffix => snapshot_name("sse_events", config.agent),
snapshot_suffix => snapshot_name("sse_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalized);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auth_snapshots() {
let token = "test-token";
let app = TestApp::new_with_auth(AuthConfig::with_token(token.to_string()));
let (status, payload) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health should be public");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_health_public", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_health(&payload),
}));
});
let (status, payload) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "missing token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_missing_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, "Bearer wrong-token")
.body(Body::empty())
.expect("auth invalid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::UNAUTHORIZED, "invalid token should 401");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_invalid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
let request = Request::builder()
.method(Method::GET)
.uri("/v1/agents")
.header(header::AUTHORIZATION, format!("Bearer {token}"))
.body(Body::empty())
.expect("auth valid request");
let (status, _headers, payload) = send_json_request(&app.app, request).await;
assert_eq!(status, StatusCode::OK, "valid token should allow request");
insta::with_settings!({
snapshot_suffix => snapshot_name("auth_valid_token", None),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": normalize_agent_list(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn cors_snapshots() {
let cors = CorsLayer::new()
.allow_origin(vec![HeaderValue::from_static("http://example.com")])
.allow_methods([Method::GET, Method::POST])
.allow_headers([header::CONTENT_TYPE, header::AUTHORIZATION])
.allow_credentials(true);
let app = TestApp::new_with_auth_and_cors(AuthConfig::disabled(), Some(cors));
let preflight = Request::builder()
.method(Method::OPTIONS)
.uri("/v1/health")
.header(header::ORIGIN, "http://example.com")
.header(header::ACCESS_CONTROL_REQUEST_METHOD, "GET")
.header(
header::ACCESS_CONTROL_REQUEST_HEADERS,
"authorization,content-type",
)
.body(Body::empty())
.expect("cors preflight request");
let (status, headers, _payload) = send_request(&app.app, preflight).await;
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_preflight", None),
}, {
insta::assert_yaml_snapshot!(snapshot_cors(status, &headers));
});
let actual = Request::builder()
.method(Method::GET)
.uri("/v1/health")
.header(header::ORIGIN, "http://example.com")
.body(Body::empty())
.expect("cors actual request");
let (status, headers, payload) = send_json_request(&app.app, actual).await;
assert_eq!(status, StatusCode::OK, "cors actual request should succeed");
insta::with_settings!({
snapshot_suffix => snapshot_name("cors_actual", None),
}, {
insta::assert_yaml_snapshot!(json!({
"cors": snapshot_cors(status, &headers),
"payload": normalize_health(&payload),
}));
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn api_endpoints_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
let (status, health) = send_json(&app.app, Method::GET, "/v1/health", None).await;
assert_eq!(status, StatusCode::OK, "health status");
insta::with_settings!({
snapshot_suffix => snapshot_name("health", None),
}, {
insta::assert_yaml_snapshot!(normalize_health(&health));
});
// List agents (just verify the API returns correct agent IDs, not install state)
let (status, agents) = send_json(&app.app, Method::GET, "/v1/agents", None).await;
assert_eq!(status, StatusCode::OK, "agents list");
insta::with_settings!({
snapshot_suffix => snapshot_name("agents_list", None),
}, {
insta::assert_yaml_snapshot!(normalize_agent_list(&agents));
});
// Install agents (ensure they're available for subsequent tests)
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/agents/{}/install", config.agent.as_str()),
Some(json!({})),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "install agent");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_install", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
}
let mut session_ids = Vec::new();
for config in &configs {
let _guard = apply_credentials(&config.credentials);
let (status, modes) = send_json(
&app.app,
Method::GET,
&format!("/v1/agents/{}/modes", config.agent.as_str()),
None,
)
.await;
assert_eq!(status, StatusCode::OK, "agent modes");
insta::with_settings!({
snapshot_suffix => snapshot_name("agent_modes", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_agent_modes(&modes));
});
let session_id = format!("snapshot-{}", config.agent.as_str());
let (status, created) = send_json(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}"),
Some(json!({
"agent": config.agent.as_str(),
"permissionMode": "bypass"
})),
)
.await;
assert_eq!(status, StatusCode::OK, "create session");
insta::with_settings!({
snapshot_suffix => snapshot_name("create_session", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_create_session(&created));
});
session_ids.push((config.agent, session_id));
}
let (status, sessions) = send_json(&app.app, Method::GET, "/v1/sessions", None).await;
assert_eq!(status, StatusCode::OK, "list sessions");
insta::with_settings!({
snapshot_suffix => snapshot_name("sessions_list", None),
}, {
insta::assert_yaml_snapshot!(normalize_sessions(&sessions));
});
for (agent, session_id) in &session_ids {
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{session_id}/messages"),
Some(json!({ "message": PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
insta::with_settings!({
snapshot_suffix => snapshot_name("send_message", Some(*agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn approval_flow_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
let _guard = apply_credentials(&config.credentials);
install_agent(&app.app, config.agent).await;
let permission_session = format!("perm-{}", config.agent.as_str());
create_session(&app.app, config.agent, &permission_session, "plan").await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{permission_session}/messages"),
Some(json!({ "message": PERMISSION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt");
let permission_events = poll_events_until_match(
&app.app,
&permission_session,
Duration::from_secs(120),
|events| find_permission_id(events).is_some() || should_stop(events),
)
.await;
let permission_events = truncate_permission_events(&permission_events);
insta::with_settings!({
snapshot_suffix => snapshot_name("permission_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_events(&permission_events));
});
if let Some(permission_id) = find_permission_id(&permission_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/{permission_id}/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply permission");
insta::with_settings!({
snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{permission_session}/permissions/missing-permission/reply"
),
Some(json!({ "reply": "once" })),
)
.await;
assert!(!status.is_success(), "missing permission id should error");
insta::with_settings!({
snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
}
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reply_session, "bypass").await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reply_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt");
let question_events = poll_events_until_match(
&app.app,
&question_reply_session,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_events(&question_events));
});
if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/{question_id}/reply"
),
Some(json!({ "answers": answers })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reply question");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reply", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reply_session}/questions/missing-question/reply"
),
Some(json!({ "answers": [] })),
)
.await;
assert!(!status.is_success(), "missing question id should error");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
}
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reject_session, "bypass").await;
let status = send_status(
&app.app,
Method::POST,
&format!("/v1/sessions/{question_reject_session}/messages"),
Some(json!({ "message": QUESTION_PROMPT })),
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject");
let reject_events = poll_events_until_match(
&app.app,
&question_reject_session,
Duration::from_secs(120),
|events| find_question_id_and_answers(events).is_some() || should_stop(events),
)
.await;
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(normalize_events(&reject_events));
});
if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) {
let status = send_status(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/{question_id}/reject"
),
None,
)
.await;
assert_eq!(status, StatusCode::NO_CONTENT, "reject question");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reject", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot_status(status));
});
} else {
let (status, payload) = send_json(
&app.app,
Method::POST,
&format!(
"/v1/sessions/{question_reject_session}/questions/missing-question/reject"
),
None,
)
.await;
assert!(!status.is_success(), "missing question id reject should error");
insta::with_settings!({
snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(json!({
"status": status.as_u16(),
"payload": payload,
}));
});
}
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn http_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS");
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
run_http_events_snapshot(&app.app, config).await;
}
}
async fn run_concurrency_snapshot(app: &Router, config: &TestAgentConfig) {
let _guard = apply_credentials(&config.credentials);
install_agent(app, config.agent).await;
let session_a = format!("concurrent-a-{}", config.agent.as_str());
let session_b = format!("concurrent-b-{}", config.agent.as_str());
create_session(app, config.agent, &session_a, "bypass").await;
create_session(app, config.agent, &session_b, "bypass").await;
let app_a = app.clone();
let app_b = app.clone();
let send_a = send_message(&app_a, &session_a);
let send_b = send_message(&app_b, &session_b);
tokio::join!(send_a, send_b);
let app_a = app.clone();
let app_b = app.clone();
let poll_a = poll_events_until(&app_a, &session_a, Duration::from_secs(120));
let poll_b = poll_events_until(&app_b, &session_b, Duration::from_secs(120));
let (events_a, events_b) = tokio::join!(poll_a, poll_b);
let events_a = truncate_after_first_stop(&events_a);
let events_b = truncate_after_first_stop(&events_b);
assert!(
!events_a.is_empty(),
"no events collected for concurrent session a {}",
config.agent
);
assert!(
!events_b.is_empty(),
"no events collected for concurrent session b {}",
config.agent
);
assert!(
should_stop(&events_a),
"timed out waiting for assistant/error event for concurrent session a {}",
config.agent
);
assert!(
should_stop(&events_b),
"timed out waiting for assistant/error event for concurrent session b {}",
config.agent
);
let snapshot = json!({
"session_a": normalize_events(&events_a),
"session_b": normalize_events(&events_b),
});
insta::with_settings!({
snapshot_suffix => snapshot_name("concurrency_events", Some(config.agent)),
}, {
insta::assert_yaml_snapshot!(snapshot);
});
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn sse_events_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS");
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
run_sse_events_snapshot(&app.app, config).await;
}
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn concurrency_snapshots() {
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
let app = TestApp::new();
for config in &configs {
run_concurrency_snapshot(&app.app, config).await;
}
}

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 874
expression: snapshot_status(status)
---
status: 204

View file

@ -0,0 +1,12 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 900
expression: normalize_agent_modes(&modes)
---
modes:
- description: true
id: build
name: Build
- description: true
id: plan
name: Plan

View file

@ -0,0 +1,10 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 881
expression: normalize_agent_list(&agents)
---
agents:
- id: amp
- id: claude
- id: codex
- id: opencode

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 918
expression: normalize_create_session(&created)
---
healthy: true

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 850
expression: normalize_health(&health)
---
status: ok

View file

@ -0,0 +1,6 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 943
expression: snapshot_status(status)
---
status: 204

View file

@ -0,0 +1,15 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 928
expression: normalize_sessions(&sessions)
---
sessions:
- agent: claude
agentMode: build
agentSessionId: "<redacted>"
ended: false
eventCount: "<redacted>"
model: "<redacted>"
permissionMode: bypass
sessionId: snapshot-claude
variant: "<redacted>"

View file

@ -0,0 +1,21 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 978
expression: normalize_events(&permission_events)
---
- agent: claude
kind: started
seq: 1
started:
message: session.created
- agent: claude
kind: unknown
seq: 2
- agent: claude
kind: message
message:
parts:
- text: "<redacted>"
type: text
role: assistant
seq: 3

View file

@ -0,0 +1,11 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1011
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown permission id: missing-permission"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -0,0 +1,21 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1100
expression: normalize_events(&reject_events)
---
- agent: claude
kind: started
seq: 1
started:
message: session.created
- agent: claude
kind: unknown
seq: 2
- agent: claude
kind: message
message:
parts:
- text: "<redacted>"
type: text
role: assistant
seq: 3

View file

@ -0,0 +1,11 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1151
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -0,0 +1,21 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1039
expression: normalize_events(&question_events)
---
- agent: claude
kind: started
seq: 1
started:
message: session.created
- agent: claude
kind: unknown
seq: 2
- agent: claude
kind: message
message:
parts:
- text: "<redacted>"
type: text
role: assistant
seq: 3

View file

@ -0,0 +1,11 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1072
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: "invalid request: unknown question id: missing-question"
status: 400
title: Invalid Request
type: "urn:sandbox-agent:error:invalid_request"
status: 400

View file

@ -0,0 +1,8 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 765
expression: "json!({ \"status\": status.as_u16(), \"payload\": normalize_health(&payload), })"
---
payload:
status: ok
status: 200

View file

@ -0,0 +1,13 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 793
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: token invalid
details:
message: missing or invalid token
status: 401
title: Token Invalid
type: "urn:sandbox-agent:error:token_invalid"
status: 401

View file

@ -0,0 +1,13 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 776
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
---
payload:
detail: token invalid
details:
message: missing or invalid token
status: 401
title: Token Invalid
type: "urn:sandbox-agent:error:token_invalid"
status: 401

View file

@ -0,0 +1,12 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 810
expression: "json!({\n \"status\": status.as_u16(), \"payload\": normalize_agent_list(&payload),\n})"
---
payload:
agents:
- id: amp
- id: claude
- id: codex
- id: opencode
status: 200

View file

@ -0,0 +1,12 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 842
expression: "json!({\n \"cors\": snapshot_cors(status, &headers), \"payload\":\n normalize_health(&payload),\n})"
---
cors:
access-control-allow-credentials: "true"
access-control-allow-origin: "http://example.com"
status: 200
vary: "origin, access-control-request-method, access-control-request-headers"
payload:
status: ok

View file

@ -0,0 +1,11 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 818
expression: "snapshot_cors(status, &headers)"
---
access-control-allow-credentials: "true"
access-control-allow-headers: "content-type,authorization"
access-control-allow-methods: "GET,POST"
access-control-allow-origin: "http://example.com"
status: 200
vary: "origin, access-control-request-method, access-control-request-headers"

View file

@ -0,0 +1,39 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1232
expression: snapshot
---
session_a:
- agent: claude
kind: started
seq: 1
started:
message: session.created
- agent: claude
kind: unknown
seq: 2
- agent: claude
kind: message
message:
parts:
- text: "<redacted>"
type: text
role: assistant
seq: 3
session_b:
- agent: claude
kind: started
seq: 1
started:
message: session.created
- agent: claude
kind: unknown
seq: 2
- agent: claude
kind: message
message:
parts:
- text: "<redacted>"
type: text
role: assistant
seq: 3

View file

@ -0,0 +1,21 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 721
expression: normalized
---
- agent: claude
kind: started
seq: 1
started:
message: session.created
- agent: claude
kind: unknown
seq: 2
- agent: claude
kind: message
message:
parts:
- text: "<redacted>"
type: text
role: assistant
seq: 3

View file

@ -0,0 +1,21 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 729
expression: normalized
---
- agent: claude
kind: started
seq: 1
started:
message: session.created
- agent: claude
kind: unknown
seq: 2
- agent: claude
kind: message
message:
parts:
- text: "<redacted>"
type: text
role: assistant
seq: 3

View file

@ -6,9 +6,9 @@ authors.workspace = true
license.workspace = true
[dependencies]
sandbox-agent-agent-schema = { path = "../agent-schema" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
schemars = "0.8"
thiserror = "1.0"
utoipa = { version = "4.2", features = ["axum_extras"] }
sandbox-agent-extracted-agent-schemas.workspace = true
serde.workspace = true
serde_json.workspace = true
schemars.workspace = true
thiserror.workspace = true
utoipa.workspace = true

File diff suppressed because it is too large Load diff

View file

@ -4,7 +4,7 @@ use schemars::JsonSchema;
use thiserror::Error;
use utoipa::ToSchema;
pub use sandbox_agent_agent_schema::{amp, claude, codex, opencode};
pub use sandbox_agent_extracted_agent_schemas::{amp, claude, codex, opencode};
pub mod agents;

View file

@ -0,0 +1,12 @@
[package]
name = "sandbox-agent-universal-schema-gen"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
build = "build.rs"
[build-dependencies]
sandbox-agent-universal-agent-schema.workspace = true
schemars.workspace = true
serde_json.workspace = true

View file

@ -0,0 +1,26 @@
use std::{fs, path::Path};
fn main() {
println!("cargo:rerun-if-changed=../universal-agent-schema/src/lib.rs");
let schema = schemars::schema_for!(sandbox_agent_universal_agent_schema::UniversalEvent);
let workspace_root = std::env::var("CARGO_MANIFEST_DIR")
.map(|dir| {
Path::new(&dir)
.parent()
.unwrap()
.parent()
.unwrap()
.parent()
.unwrap()
.to_path_buf()
})
.unwrap();
let out_dir = workspace_root.join("spec");
fs::create_dir_all(&out_dir).unwrap();
let json = serde_json::to_string_pretty(&schema).expect("Failed to serialize JSON schema");
fs::write(out_dir.join("universal-schema.json"), json)
.expect("Failed to write universal-schema.json");
}

View file

@ -0,0 +1,2 @@
// This crate exists only to trigger the build.rs script
// which generates the universal JSON schema at build time.