mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 10:05:18 +00:00
fix: add agent_server_logs module import to lib.rs
This commit is contained in:
parent
be7aecb362
commit
7a5bb2b8b0
87 changed files with 2438 additions and 1671 deletions
|
|
@ -1,12 +1,27 @@
|
|||
# Server Testing
|
||||
|
||||
## Test placement
|
||||
|
||||
Place all new tests under `server/packages/**/tests/` (or a package-specific `tests/` folder). Avoid inline tests inside source files unless there is no viable alternative.
|
||||
|
||||
## Test locations (overview)
|
||||
|
||||
- Sandbox-agent integration tests live under `server/packages/sandbox-agent/tests/`:
|
||||
- Agent flow coverage in `agent-flows/`
|
||||
- Agent management coverage in `agent-management/`
|
||||
- Shared server manager coverage in `server-manager/`
|
||||
- HTTP/SSE and snapshot coverage in `http/` (snapshots in `http/snapshots/`)
|
||||
- UI coverage in `ui/`
|
||||
- Shared helpers in `common/`
|
||||
- Extracted agent schema roundtrip tests live under `server/packages/extracted-agent-schemas/tests/`
|
||||
|
||||
## Snapshot tests
|
||||
|
||||
The HTTP/SSE snapshot suite lives in:
|
||||
- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs`
|
||||
The HTTP/SSE snapshot suite entrypoint lives in:
|
||||
- `server/packages/sandbox-agent/tests/http_sse_snapshots.rs` (includes `tests/http/http_sse_snapshots.rs`)
|
||||
|
||||
Snapshots are written to:
|
||||
- `server/packages/sandbox-agent/tests/snapshots/`
|
||||
- `server/packages/sandbox-agent/tests/http/snapshots/`
|
||||
|
||||
## Agent selection
|
||||
|
||||
|
|
@ -47,9 +62,20 @@ Health checks run in a blocking thread to avoid Tokio runtime drop errors inside
|
|||
## Snapshot stability
|
||||
|
||||
To keep snapshots deterministic:
|
||||
- Use the mock agent as the **master** event sequence; all other agents must match its behavior 1:1.
|
||||
- Snapshots should compare a **canonical event skeleton** (event order matters) with strict ordering across:
|
||||
- `item.started` → `item.delta` → `item.completed`
|
||||
- presence/absence of `session.ended`
|
||||
- permission/question request and resolution flows
|
||||
- Scrub non-deterministic fields from snapshots:
|
||||
- IDs, timestamps, native IDs
|
||||
- text content, tool inputs/outputs, provider-specific metadata
|
||||
- `source` and `synthetic` flags (these are implementation details)
|
||||
- The sandbox-agent is responsible for emitting **synthetic events** so that real agents match the mock sequence exactly.
|
||||
- Event streams are truncated after the first assistant or error event.
|
||||
- Permission flow snapshots are truncated after the permission request (or first assistant) event.
|
||||
- Unknown events are preserved as `kind: unknown` (raw payload in universal schema).
|
||||
- Prefer snapshot-based event skeleton assertions over manual event-order assertions in tests.
|
||||
|
||||
## Typical commands
|
||||
|
||||
|
|
|
|||
|
|
@ -25,87 +25,3 @@ pub mod amp {
|
|||
//! AMP Code SDK types.
|
||||
include!(concat!(env!("OUT_DIR"), "/amp.rs"));
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_claude_bash_input() {
|
||||
let input = claude::BashInput {
|
||||
command: "ls -la".to_string(),
|
||||
timeout: Some(5000.0),
|
||||
working_directory: None,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&input).unwrap();
|
||||
assert!(json.contains("ls -la"));
|
||||
|
||||
let parsed: claude::BashInput = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(parsed.command, "ls -la");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_codex_server_notification() {
|
||||
// Test ItemCompletedNotification with AgentMessage
|
||||
let notification = codex::ServerNotification::ItemCompleted(
|
||||
codex::ItemCompletedNotification {
|
||||
item: codex::ThreadItem::AgentMessage {
|
||||
id: "msg-123".to_string(),
|
||||
text: "Hello from Codex".to_string(),
|
||||
},
|
||||
thread_id: "thread-123".to_string(),
|
||||
turn_id: "turn-456".to_string(),
|
||||
}
|
||||
);
|
||||
|
||||
let json = serde_json::to_string(¬ification).unwrap();
|
||||
assert!(json.contains("item/completed"));
|
||||
assert!(json.contains("Hello from Codex"));
|
||||
assert!(json.contains("agentMessage"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_codex_thread_item_variants() {
|
||||
// Test UserMessage variant
|
||||
let user_msg = codex::ThreadItem::UserMessage {
|
||||
content: vec![codex::UserInput::Text {
|
||||
text: "Hello".to_string(),
|
||||
text_elements: vec![],
|
||||
}],
|
||||
id: "user-1".to_string(),
|
||||
};
|
||||
let json = serde_json::to_string(&user_msg).unwrap();
|
||||
assert!(json.contains("userMessage"));
|
||||
assert!(json.contains("Hello"));
|
||||
|
||||
// Test CommandExecution variant
|
||||
let cmd = codex::ThreadItem::CommandExecution {
|
||||
aggregated_output: Some("output".to_string()),
|
||||
command: "ls -la".to_string(),
|
||||
command_actions: vec![],
|
||||
cwd: "/tmp".to_string(),
|
||||
duration_ms: Some(100),
|
||||
exit_code: Some(0),
|
||||
id: "cmd-1".to_string(),
|
||||
process_id: None,
|
||||
status: codex::CommandExecutionStatus::Completed,
|
||||
};
|
||||
let json = serde_json::to_string(&cmd).unwrap();
|
||||
assert!(json.contains("commandExecution"));
|
||||
assert!(json.contains("ls -la"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_amp_message() {
|
||||
let msg = amp::Message {
|
||||
role: amp::MessageRole::User,
|
||||
content: "Hello".to_string(),
|
||||
tool_calls: vec![],
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&msg).unwrap();
|
||||
assert!(json.contains("user"));
|
||||
assert!(json.contains("Hello"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,77 @@
|
|||
use sandbox_agent_extracted_agent_schemas::{amp, claude, codex};
|
||||
|
||||
#[test]
|
||||
fn test_claude_bash_input() {
|
||||
let input = claude::BashInput {
|
||||
command: "ls -la".to_string(),
|
||||
timeout: Some(5000.0),
|
||||
working_directory: None,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&input).unwrap();
|
||||
assert!(json.contains("ls -la"));
|
||||
|
||||
let parsed: claude::BashInput = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(parsed.command, "ls -la");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_codex_server_notification() {
|
||||
let notification = codex::ServerNotification::ItemCompleted(
|
||||
codex::ItemCompletedNotification {
|
||||
item: codex::ThreadItem::AgentMessage {
|
||||
id: "msg-123".to_string(),
|
||||
text: "Hello from Codex".to_string(),
|
||||
},
|
||||
thread_id: "thread-123".to_string(),
|
||||
turn_id: "turn-456".to_string(),
|
||||
},
|
||||
);
|
||||
|
||||
let json = serde_json::to_string(¬ification).unwrap();
|
||||
assert!(json.contains("item/completed"));
|
||||
assert!(json.contains("Hello from Codex"));
|
||||
assert!(json.contains("agentMessage"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_codex_thread_item_variants() {
|
||||
let user_msg = codex::ThreadItem::UserMessage {
|
||||
content: vec![codex::UserInput::Text {
|
||||
text: "Hello".to_string(),
|
||||
text_elements: vec![],
|
||||
}],
|
||||
id: "user-1".to_string(),
|
||||
};
|
||||
let json = serde_json::to_string(&user_msg).unwrap();
|
||||
assert!(json.contains("userMessage"));
|
||||
assert!(json.contains("Hello"));
|
||||
|
||||
let cmd = codex::ThreadItem::CommandExecution {
|
||||
aggregated_output: Some("output".to_string()),
|
||||
command: "ls -la".to_string(),
|
||||
command_actions: vec![],
|
||||
cwd: "/tmp".to_string(),
|
||||
duration_ms: Some(100),
|
||||
exit_code: Some(0),
|
||||
id: "cmd-1".to_string(),
|
||||
process_id: None,
|
||||
status: codex::CommandExecutionStatus::Completed,
|
||||
};
|
||||
let json = serde_json::to_string(&cmd).unwrap();
|
||||
assert!(json.contains("commandExecution"));
|
||||
assert!(json.contains("ls -la"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_amp_message() {
|
||||
let msg = amp::Message {
|
||||
role: amp::MessageRole::User,
|
||||
content: "Hello".to_string(),
|
||||
tool_calls: vec![],
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&msg).unwrap();
|
||||
assert!(json.contains("user"));
|
||||
assert!(json.contains("Hello"));
|
||||
}
|
||||
|
|
@ -32,9 +32,13 @@ tracing.workspace = true
|
|||
tracing-logfmt.workspace = true
|
||||
tracing-subscriber.workspace = true
|
||||
include_dir.workspace = true
|
||||
tempfile = { workspace = true, optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
http-body-util.workspace = true
|
||||
insta.workspace = true
|
||||
tempfile.workspace = true
|
||||
tower.workspace = true
|
||||
tempfile.workspace = true
|
||||
|
||||
[features]
|
||||
test-utils = ["tempfile"]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
//! Sandbox agent core utilities.
|
||||
|
||||
pub mod credentials;
|
||||
mod agent_server_logs;
|
||||
pub mod router;
|
||||
pub mod telemetry;
|
||||
pub mod ui;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use std::collections::HashMap;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use clap::{Args, Parser, Subcommand};
|
||||
use reqwest::blocking::Client as HttpClient;
|
||||
|
|
@ -16,7 +17,7 @@ use sandbox_agent::router::{
|
|||
};
|
||||
use sandbox_agent::telemetry;
|
||||
use sandbox_agent::router::{AgentListResponse, AgentModesResponse, CreateSessionResponse, EventsResponse};
|
||||
use sandbox_agent::router::build_router;
|
||||
use sandbox_agent::router::{build_router_with_state, shutdown_servers};
|
||||
use sandbox_agent::ui;
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
|
|
@ -352,8 +353,8 @@ fn run_server(cli: &Cli, server: &ServerArgs) -> Result<(), CliError> {
|
|||
|
||||
let agent_manager =
|
||||
AgentManager::new(default_install_dir()).map_err(|err| CliError::Server(err.to_string()))?;
|
||||
let state = AppState::new(auth, agent_manager);
|
||||
let mut router = build_router(state);
|
||||
let state = Arc::new(AppState::new(auth, agent_manager));
|
||||
let (mut router, state) = build_router_with_state(state);
|
||||
|
||||
if let Some(cors) = build_cors_layer(server)? {
|
||||
router = router.layer(cors);
|
||||
|
|
@ -384,7 +385,12 @@ fn run_server(cli: &Cli, server: &ServerArgs) -> Result<(), CliError> {
|
|||
} else {
|
||||
tracing::info!("inspector ui not embedded; set SANDBOX_AGENT_SKIP_INSPECTOR=1 to skip embedding during builds");
|
||||
}
|
||||
let shutdown_state = state.clone();
|
||||
axum::serve(listener, router)
|
||||
.with_graceful_shutdown(async move {
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
shutdown_servers(&shutdown_state).await;
|
||||
})
|
||||
.await
|
||||
.map_err(|err| CliError::Server(err.to_string()))
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#[path = "../common/mod.rs"]
|
||||
mod common;
|
||||
|
||||
use common::*;
|
||||
|
|
@ -29,8 +30,6 @@ async fn agent_basic_reply() {
|
|||
"no events collected for {}",
|
||||
config.agent.as_str()
|
||||
);
|
||||
expect_basic_sequence(&events);
|
||||
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
#[path = "../common/mod.rs"]
|
||||
mod common;
|
||||
|
||||
use common::*;
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
#[path = "../common/mod.rs"]
|
||||
mod common;
|
||||
|
||||
use common::*;
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
#[path = "../common/mod.rs"]
|
||||
mod common;
|
||||
|
||||
use common::*;
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
#[path = "../common/mod.rs"]
|
||||
mod common;
|
||||
|
||||
use common::*;
|
||||
6
server/packages/sandbox-agent/tests/agent-flows/mod.rs
Normal file
6
server/packages/sandbox-agent/tests/agent-flows/mod.rs
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
mod agent_basic_reply;
|
||||
mod agent_multi_turn;
|
||||
mod agent_permission_flow;
|
||||
mod agent_question_flow;
|
||||
mod agent_termination;
|
||||
mod agent_tool_flow;
|
||||
|
|
@ -0,0 +1 @@
|
|||
mod agents;
|
||||
2
server/packages/sandbox-agent/tests/agent_flows.rs
Normal file
2
server/packages/sandbox-agent/tests/agent_flows.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
#[path = "agent-flows/mod.rs"]
|
||||
mod agent_flows;
|
||||
2
server/packages/sandbox-agent/tests/agent_management.rs
Normal file
2
server/packages/sandbox-agent/tests/agent_management.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
#[path = "agent-management/mod.rs"]
|
||||
mod agent_management;
|
||||
|
|
@ -272,38 +272,6 @@ pub fn find_assistant_message_item(events: &[Value]) -> Option<String> {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn event_sequence(event: &Value) -> Option<u64> {
|
||||
event.get("sequence").and_then(Value::as_u64)
|
||||
}
|
||||
|
||||
pub fn find_item_event_seq(events: &[Value], event_type: &str, item_id: &str) -> Option<u64> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some(event_type) {
|
||||
return None;
|
||||
}
|
||||
match event_type {
|
||||
"item.delta" => {
|
||||
let data = event.get("data")?;
|
||||
let id = data.get("item_id")?.as_str()?;
|
||||
if id == item_id {
|
||||
event_sequence(event)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let item = event.get("data")?.get("item")?;
|
||||
let id = item.get("item_id")?.as_str()?;
|
||||
if id == item_id {
|
||||
event_sequence(event)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn find_permission_id(events: &[Value]) -> Option<String> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("permission.requested") {
|
||||
|
|
@ -372,17 +340,3 @@ pub fn has_tool_result(events: &[Value]) -> bool {
|
|||
item.get("kind").and_then(Value::as_str) == Some("tool_result")
|
||||
})
|
||||
}
|
||||
|
||||
pub fn expect_basic_sequence(events: &[Value]) {
|
||||
assert!(has_event_type(events, "session.started"), "session.started missing");
|
||||
let item_id = find_assistant_message_item(events).expect("assistant message missing");
|
||||
let started_seq = find_item_event_seq(events, "item.started", &item_id)
|
||||
.expect("item.started missing");
|
||||
// Intentionally require deltas here to validate our synthetic delta behavior.
|
||||
let delta_seq = find_item_event_seq(events, "item.delta", &item_id)
|
||||
.expect("item.delta missing");
|
||||
let completed_seq = find_item_event_seq(events, "item.completed", &item_id)
|
||||
.expect("item.completed missing");
|
||||
assert!(started_seq < delta_seq, "item.started must precede delta");
|
||||
assert!(delta_seq < completed_seq, "delta must precede completion");
|
||||
}
|
||||
|
|
|
|||
1388
server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
Normal file
1388
server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1016
|
||||
expression: snapshot_status(status)
|
||||
---
|
||||
status: 204
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1034
|
||||
expression: normalize_agent_modes(&modes)
|
||||
---
|
||||
modes:
|
||||
- description: true
|
||||
id: build
|
||||
name: Build
|
||||
- description: true
|
||||
id: plan
|
||||
name: Plan
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1053
|
||||
expression: normalize_create_session(&created)
|
||||
---
|
||||
healthy: true
|
||||
nativeSessionId: "<redacted>"
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1078
|
||||
expression: snapshot_status(status)
|
||||
---
|
||||
status: 204
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1119
|
||||
expression: normalize_events(&permission_events)
|
||||
---
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: session.started
|
||||
|
|
@ -1,19 +1,15 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 1025
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1112
|
||||
expression: normalize_events(&permission_events)
|
||||
---
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -22,16 +18,12 @@ expression: normalize_events(&permission_events)
|
|||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -40,6 +32,4 @@ expression: normalize_events(&permission_events)
|
|||
role: assistant
|
||||
status: completed
|
||||
seq: 5
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1152
|
||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
||||
---
|
||||
payload:
|
||||
detail: "invalid request: unknown permission id: missing-permission"
|
||||
status: 400
|
||||
title: Invalid Request
|
||||
type: "urn:sandbox-agent:error:invalid_request"
|
||||
status: 400
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1236
|
||||
expression: normalize_events(&reject_events)
|
||||
---
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 5
|
||||
type: item.completed
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1276
|
||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
||||
---
|
||||
payload:
|
||||
detail: "invalid request: unknown question id: missing-question"
|
||||
status: 400
|
||||
title: Invalid Request
|
||||
type: "urn:sandbox-agent:error:invalid_request"
|
||||
status: 400
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1174
|
||||
expression: normalize_events(&question_events)
|
||||
---
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 5
|
||||
type: item.completed
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1214
|
||||
expression: "json!({ \"status\": status.as_u16(), \"payload\": payload, })"
|
||||
---
|
||||
payload:
|
||||
detail: "invalid request: unknown question id: missing-question"
|
||||
status: 400
|
||||
title: Invalid Request
|
||||
type: "urn:sandbox-agent:error:invalid_request"
|
||||
status: 400
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 1259
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1351
|
||||
expression: snapshot
|
||||
---
|
||||
session_a:
|
||||
|
|
@ -23,16 +23,16 @@ session_a:
|
|||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
source: daemon
|
||||
synthetic: true
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
source: daemon
|
||||
synthetic: true
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -64,16 +64,16 @@ session_b:
|
|||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
source: daemon
|
||||
synthetic: true
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
source: daemon
|
||||
synthetic: true
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 1344
|
||||
expression: snapshot
|
||||
---
|
||||
session_a:
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 5
|
||||
type: item.completed
|
||||
session_b:
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 5
|
||||
type: item.completed
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 742
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 811
|
||||
expression: normalized
|
||||
---
|
||||
- metadata: true
|
||||
|
|
@ -22,16 +22,16 @@ expression: normalized
|
|||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
source: daemon
|
||||
synthetic: true
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
source: daemon
|
||||
synthetic: true
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 804
|
||||
expression: normalized
|
||||
---
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 5
|
||||
type: item.completed
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 775
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 848
|
||||
expression: normalized
|
||||
---
|
||||
- metadata: true
|
||||
|
|
@ -22,16 +22,16 @@ expression: normalized
|
|||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
source: daemon
|
||||
synthetic: true
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
source: daemon
|
||||
synthetic: true
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 848
|
||||
expression: normalized
|
||||
---
|
||||
- metadata: true
|
||||
|
|
@ -62,30 +63,11 @@ expression: normalized
|
|||
synthetic: false
|
||||
type: item.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
content_types: []
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 8
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 9
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- reasoning
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 10
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs
|
||||
assertion_line: 841
|
||||
expression: normalized
|
||||
---
|
||||
- metadata: true
|
||||
seq: 1
|
||||
session: started
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
session: started
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 3
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 5
|
||||
type: item.completed
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,136 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use sandbox_agent::router::test_utils::{exit_status, spawn_sleep_process, TestHarness};
|
||||
use sandbox_agent_agent_management::agents::AgentId;
|
||||
use sandbox_agent_universal_agent_schema::SessionEndReason;
|
||||
use tokio::time::{timeout, Duration};
|
||||
|
||||
async fn wait_for_exit(child: &Arc<std::sync::Mutex<Option<std::process::Child>>>) {
|
||||
for _ in 0..20 {
|
||||
let done = {
|
||||
let mut guard = child.lock().expect("child lock");
|
||||
match guard.as_mut() {
|
||||
Some(child) => child.try_wait().ok().flatten().is_some(),
|
||||
None => true,
|
||||
}
|
||||
};
|
||||
if done {
|
||||
return;
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn register_and_unregister_sessions() {
|
||||
let harness = TestHarness::new().await;
|
||||
harness
|
||||
.register_session(AgentId::Codex, "sess-1", Some("thread-1"))
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
harness
|
||||
.has_session_mapping(AgentId::Codex, "sess-1")
|
||||
.await
|
||||
);
|
||||
assert_eq!(
|
||||
harness
|
||||
.native_mapping(AgentId::Codex, "thread-1")
|
||||
.await
|
||||
.as_deref(),
|
||||
Some("sess-1")
|
||||
);
|
||||
|
||||
harness
|
||||
.unregister_session(AgentId::Codex, "sess-1", Some("thread-1"))
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
!harness
|
||||
.has_session_mapping(AgentId::Codex, "sess-1")
|
||||
.await
|
||||
);
|
||||
assert!(
|
||||
harness
|
||||
.native_mapping(AgentId::Codex, "thread-1")
|
||||
.await
|
||||
.is_none()
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn shutdown_marks_servers_stopped_and_kills_child() {
|
||||
let harness = TestHarness::new().await;
|
||||
let child = harness
|
||||
.insert_stdio_server(AgentId::Codex, Some(spawn_sleep_process()), 0)
|
||||
.await;
|
||||
|
||||
harness.shutdown().await;
|
||||
|
||||
assert!(matches!(
|
||||
harness.server_status(AgentId::Codex).await,
|
||||
Some(sandbox_agent::router::ServerStatus::Stopped)
|
||||
));
|
||||
|
||||
wait_for_exit(&child).await;
|
||||
let exited = {
|
||||
let mut guard = child.lock().expect("child lock");
|
||||
guard
|
||||
.as_mut()
|
||||
.and_then(|child| child.try_wait().ok().flatten())
|
||||
.is_some()
|
||||
};
|
||||
assert!(exited);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn handle_process_exit_marks_error_and_ends_sessions() {
|
||||
let harness = TestHarness::new().await;
|
||||
harness
|
||||
.insert_session("sess-1", AgentId::Codex, Some("thread-1"))
|
||||
.await;
|
||||
harness
|
||||
.register_session(AgentId::Codex, "sess-1", Some("thread-1"))
|
||||
.await;
|
||||
harness
|
||||
.insert_stdio_server(AgentId::Codex, None, 1)
|
||||
.await;
|
||||
|
||||
harness
|
||||
.handle_process_exit(AgentId::Codex, 1, exit_status(7))
|
||||
.await;
|
||||
|
||||
assert!(matches!(
|
||||
harness.server_status(AgentId::Codex).await,
|
||||
Some(sandbox_agent::router::ServerStatus::Error)
|
||||
));
|
||||
assert!(
|
||||
harness
|
||||
.server_last_error(AgentId::Codex)
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.contains("exited")
|
||||
);
|
||||
assert!(harness.session_ended("sess-1").await);
|
||||
assert!(matches!(
|
||||
harness.session_end_reason("sess-1").await,
|
||||
Some(SessionEndReason::Error)
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn auto_restart_notifier_emits_signal() {
|
||||
let harness = TestHarness::new().await;
|
||||
let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel();
|
||||
harness.set_restart_notifier(tx).await;
|
||||
harness.insert_http_server(AgentId::Mock, 2).await;
|
||||
|
||||
harness
|
||||
.handle_process_exit(AgentId::Mock, 2, exit_status(2))
|
||||
.await;
|
||||
|
||||
let received = timeout(Duration::from_millis(200), rx.recv())
|
||||
.await
|
||||
.expect("timeout");
|
||||
assert_eq!(received, Some(AgentId::Mock));
|
||||
}
|
||||
|
|
@ -0,0 +1 @@
|
|||
mod agent_server_manager;
|
||||
2
server/packages/sandbox-agent/tests/server_manager.rs
Normal file
2
server/packages/sandbox-agent/tests/server_manager.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
#[path = "server-manager/mod.rs"]
|
||||
mod server_manager;
|
||||
2
server/packages/sandbox-agent/tests/ui.rs
Normal file
2
server/packages/sandbox-agent/tests/ui.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
#[path = "ui/mod.rs"]
|
||||
mod ui;
|
||||
1
server/packages/sandbox-agent/tests/ui/mod.rs
Normal file
1
server/packages/sandbox-agent/tests/ui/mod.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
mod inspector_ui;
|
||||
Loading…
Add table
Add a link
Reference in a new issue