mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 06:04:43 +00:00
feat: add turn streaming and inspector updates
This commit is contained in:
parent
bf58891edf
commit
34d4f3693e
49 changed files with 4629 additions and 1146 deletions
|
|
@ -122,6 +122,9 @@ enum SessionsCommand {
|
|||
#[command(name = "send-message")]
|
||||
/// Send a message to an existing session.
|
||||
SendMessage(SessionMessageArgs),
|
||||
#[command(name = "send-message-stream")]
|
||||
/// Send a message and stream the response for one turn.
|
||||
SendMessageStream(SessionMessageStreamArgs),
|
||||
#[command(name = "terminate")]
|
||||
/// Terminate a session.
|
||||
Terminate(SessionTerminateArgs),
|
||||
|
|
@ -195,6 +198,17 @@ struct SessionMessageArgs {
|
|||
client: ClientArgs,
|
||||
}
|
||||
|
||||
#[derive(Args, Debug)]
|
||||
struct SessionMessageStreamArgs {
|
||||
session_id: String,
|
||||
#[arg(long, short = 'm')]
|
||||
message: String,
|
||||
#[arg(long)]
|
||||
include_raw: bool,
|
||||
#[command(flatten)]
|
||||
client: ClientArgs,
|
||||
}
|
||||
|
||||
#[derive(Args, Debug)]
|
||||
struct SessionEventsArgs {
|
||||
session_id: String,
|
||||
|
|
@ -443,6 +457,22 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
|
|||
let response = ctx.post(&path, &body)?;
|
||||
print_empty_response(response)
|
||||
}
|
||||
SessionsCommand::SendMessageStream(args) => {
|
||||
let ctx = ClientContext::new(cli, &args.client)?;
|
||||
let body = MessageRequest {
|
||||
message: args.message.clone(),
|
||||
};
|
||||
let path = format!("{API_PREFIX}/sessions/{}/messages/stream", args.session_id);
|
||||
let response = ctx.post_with_query(
|
||||
&path,
|
||||
&body,
|
||||
&[(
|
||||
"include_raw",
|
||||
if args.include_raw { Some("true".to_string()) } else { None },
|
||||
)],
|
||||
)?;
|
||||
print_text_response(response)
|
||||
}
|
||||
SessionsCommand::Terminate(args) => {
|
||||
let ctx = ClientContext::new(cli, &args.client)?;
|
||||
let path = format!("{API_PREFIX}/sessions/{}/terminate", args.session_id);
|
||||
|
|
@ -850,6 +880,21 @@ impl ClientContext {
|
|||
Ok(self.request(Method::POST, path).json(body).send()?)
|
||||
}
|
||||
|
||||
fn post_with_query<T: Serialize>(
|
||||
&self,
|
||||
path: &str,
|
||||
body: &T,
|
||||
query: &[(&str, Option<String>)],
|
||||
) -> Result<reqwest::blocking::Response, CliError> {
|
||||
let mut request = self.request(Method::POST, path).json(body);
|
||||
for (key, value) in query {
|
||||
if let Some(value) = value {
|
||||
request = request.query(&[(key, value)]);
|
||||
}
|
||||
}
|
||||
Ok(request.send()?)
|
||||
}
|
||||
|
||||
fn post_empty(&self, path: &str) -> Result<reqwest::blocking::Response, CliError> {
|
||||
Ok(self.request(Method::POST, path).send()?)
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -8,11 +8,15 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
|||
use reqwest::Client;
|
||||
use serde::Serialize;
|
||||
use time::OffsetDateTime;
|
||||
use tokio::time::Instant;
|
||||
|
||||
const TELEMETRY_URL: &str = "https://tc.rivet.dev";
|
||||
const TELEMETRY_ENV_DEBUG: &str = "SANDBOX_AGENT_TELEMETRY_DEBUG";
|
||||
const TELEMETRY_ID_FILE: &str = "telemetry_id";
|
||||
const TELEMETRY_TIMEOUT_MS: u64 = 800;
|
||||
const TELEMETRY_LAST_SENT_FILE: &str = "telemetry_last_sent";
|
||||
const TELEMETRY_TIMEOUT_MS: u64 = 2_000;
|
||||
const TELEMETRY_INTERVAL_SECS: u64 = 300;
|
||||
const TELEMETRY_MIN_GAP_SECS: i64 = 300;
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct TelemetryEvent {
|
||||
|
|
@ -49,7 +53,6 @@ struct OsInfo {
|
|||
#[derive(Debug, Serialize)]
|
||||
struct ProviderInfo {
|
||||
name: String,
|
||||
confidence: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
method: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
|
|
@ -69,11 +72,10 @@ pub fn telemetry_enabled(no_telemetry: bool) -> bool {
|
|||
}
|
||||
|
||||
pub fn log_enabled_message() {
|
||||
tracing::info!("anonymous telemetry is enabled; disable with --no-telemetry");
|
||||
tracing::info!("anonymous telemetry is enabled, disable with --no-telemetry");
|
||||
}
|
||||
|
||||
pub fn spawn_telemetry_task() {
|
||||
let event = build_event();
|
||||
tokio::spawn(async move {
|
||||
let client = match Client::builder()
|
||||
.timeout(Duration::from_millis(TELEMETRY_TIMEOUT_MS))
|
||||
|
|
@ -86,21 +88,38 @@ pub fn spawn_telemetry_task() {
|
|||
}
|
||||
};
|
||||
|
||||
if let Err(err) = client.post(TELEMETRY_URL).json(&event).send().await {
|
||||
tracing::debug!(error = %err, "telemetry request failed");
|
||||
attempt_send(&client).await;
|
||||
let start = Instant::now() + Duration::from_secs(TELEMETRY_INTERVAL_SECS);
|
||||
let mut interval = tokio::time::interval_at(start, Duration::from_secs(TELEMETRY_INTERVAL_SECS));
|
||||
loop {
|
||||
interval.tick().await;
|
||||
attempt_send(&client).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn build_event() -> TelemetryEvent {
|
||||
async fn attempt_send(client: &Client) {
|
||||
let dt = OffsetDateTime::now_utc().unix_timestamp();
|
||||
if !should_send(dt) {
|
||||
return;
|
||||
}
|
||||
|
||||
let event = build_event(dt);
|
||||
if let Err(err) = client.post(TELEMETRY_URL).json(&event).send().await {
|
||||
tracing::debug!(error = %err, "telemetry request failed");
|
||||
return;
|
||||
}
|
||||
write_last_sent(dt);
|
||||
}
|
||||
|
||||
fn build_event(dt: i64) -> TelemetryEvent {
|
||||
let eid = load_or_create_id();
|
||||
TelemetryEvent {
|
||||
p: "sandbox-agent".to_string(),
|
||||
dt,
|
||||
et: "sandbox".to_string(),
|
||||
eid,
|
||||
ev: "entity_snapshot".to_string(),
|
||||
ev: "entity_beacon".to_string(),
|
||||
d: TelemetryData {
|
||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
os: OsInfo {
|
||||
|
|
@ -138,9 +157,46 @@ fn load_or_create_id() -> String {
|
|||
}
|
||||
|
||||
fn telemetry_id_path() -> PathBuf {
|
||||
telemetry_dir().join(TELEMETRY_ID_FILE)
|
||||
}
|
||||
|
||||
fn telemetry_last_sent_path() -> PathBuf {
|
||||
telemetry_dir().join(TELEMETRY_LAST_SENT_FILE)
|
||||
}
|
||||
|
||||
fn telemetry_dir() -> PathBuf {
|
||||
dirs::data_dir()
|
||||
.map(|dir| dir.join("sandbox-agent").join(TELEMETRY_ID_FILE))
|
||||
.unwrap_or_else(|| PathBuf::from(".sandbox-agent").join(TELEMETRY_ID_FILE))
|
||||
.map(|dir| dir.join("sandbox-agent"))
|
||||
.unwrap_or_else(|| PathBuf::from(".sandbox-agent"))
|
||||
}
|
||||
|
||||
fn should_send(now: i64) -> bool {
|
||||
if let Some(last) = read_last_sent() {
|
||||
if now >= last && now - last < TELEMETRY_MIN_GAP_SECS {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn read_last_sent() -> Option<i64> {
|
||||
let path = telemetry_last_sent_path();
|
||||
fs::read_to_string(&path)
|
||||
.ok()
|
||||
.and_then(|value| value.trim().parse::<i64>().ok())
|
||||
}
|
||||
|
||||
fn write_last_sent(timestamp: i64) {
|
||||
let path = telemetry_last_sent_path();
|
||||
if let Some(parent) = path.parent() {
|
||||
if let Err(err) = fs::create_dir_all(parent) {
|
||||
tracing::debug!(error = %err, "failed to create telemetry directory");
|
||||
return;
|
||||
}
|
||||
}
|
||||
if let Ok(mut file) = fs::OpenOptions::new().create(true).write(true).truncate(true).open(&path) {
|
||||
let _ = file.write_all(timestamp.to_string().as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_id() -> String {
|
||||
|
|
@ -185,7 +241,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
]);
|
||||
return ProviderInfo {
|
||||
name: "e2b".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("env".to_string()),
|
||||
metadata,
|
||||
};
|
||||
|
|
@ -206,7 +261,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
]);
|
||||
return ProviderInfo {
|
||||
name: "vercel".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("env".to_string()),
|
||||
metadata,
|
||||
};
|
||||
|
|
@ -219,7 +273,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
]);
|
||||
return ProviderInfo {
|
||||
name: "modal".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("env".to_string()),
|
||||
metadata,
|
||||
};
|
||||
|
|
@ -232,7 +285,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
]);
|
||||
return ProviderInfo {
|
||||
name: "fly.io".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("env".to_string()),
|
||||
metadata,
|
||||
};
|
||||
|
|
@ -245,7 +297,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
]);
|
||||
return ProviderInfo {
|
||||
name: "replit".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("env".to_string()),
|
||||
metadata,
|
||||
};
|
||||
|
|
@ -254,7 +305,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
if env::var("CODESANDBOX_HOST").is_ok() || env::var("CSB_BASE_PREVIEW_HOST").is_ok() {
|
||||
return ProviderInfo {
|
||||
name: "codesandbox".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("env".to_string()),
|
||||
metadata: None,
|
||||
};
|
||||
|
|
@ -264,7 +314,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
let metadata = metadata_or_none([("name", env::var("CODESPACE_NAME").ok())]);
|
||||
return ProviderInfo {
|
||||
name: "github-codespaces".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("env".to_string()),
|
||||
metadata,
|
||||
};
|
||||
|
|
@ -274,7 +323,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
let metadata = metadata_or_none([("environment", env::var("RAILWAY_ENVIRONMENT").ok())]);
|
||||
return ProviderInfo {
|
||||
name: "railway".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("env".to_string()),
|
||||
metadata,
|
||||
};
|
||||
|
|
@ -284,7 +332,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
let metadata = metadata_or_none([("serviceId", env::var("RENDER_SERVICE_ID").ok())]);
|
||||
return ProviderInfo {
|
||||
name: "render".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("env".to_string()),
|
||||
metadata,
|
||||
};
|
||||
|
|
@ -293,7 +340,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
if detect_daytona() {
|
||||
return ProviderInfo {
|
||||
name: "daytona".to_string(),
|
||||
confidence: "medium".to_string(),
|
||||
method: Some("filesystem".to_string()),
|
||||
metadata: None,
|
||||
};
|
||||
|
|
@ -302,7 +348,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
if detect_docker() {
|
||||
return ProviderInfo {
|
||||
name: "docker".to_string(),
|
||||
confidence: "high".to_string(),
|
||||
method: Some("filesystem".to_string()),
|
||||
metadata: None,
|
||||
};
|
||||
|
|
@ -310,7 +355,6 @@ fn detect_provider() -> ProviderInfo {
|
|||
|
||||
ProviderInfo {
|
||||
name: "unknown".to_string(),
|
||||
confidence: "low".to_string(),
|
||||
method: None,
|
||||
metadata: None,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,657 +0,0 @@
|
|||
use std::collections::HashMap;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use axum::body::Body;
|
||||
use axum::http::{Method, Request, StatusCode};
|
||||
use axum::Router;
|
||||
use http_body_util::BodyExt;
|
||||
use serde_json::{json, Value};
|
||||
use tempfile::TempDir;
|
||||
use tower::util::ServiceExt;
|
||||
|
||||
use sandbox_agent_agent_management::agents::{AgentId, AgentManager};
|
||||
use sandbox_agent_agent_management::testing::test_agents_from_env;
|
||||
use sandbox_agent_agent_credentials::ExtractedCredentials;
|
||||
use sandbox_agent::router::{
|
||||
build_router,
|
||||
AgentCapabilities,
|
||||
AgentListResponse,
|
||||
AuthConfig,
|
||||
};
|
||||
|
||||
const PROMPT: &str = "Reply with exactly the single word OK.";
|
||||
const TOOL_PROMPT: &str =
|
||||
"Use the bash tool to run `ls` in the current directory. Do not answer without using the tool.";
|
||||
const QUESTION_PROMPT: &str =
|
||||
"Call the AskUserQuestion tool with exactly one yes/no question and wait for a reply. Do not answer yourself.";
|
||||
|
||||
/// Agent-agnostic event sequence tests.
|
||||
///
|
||||
/// These tests assert that the universal schema output is valid and consistent
|
||||
/// across agents, and they use capability flags from /v1/agents to skip
|
||||
/// unsupported flows.
|
||||
|
||||
struct TestApp {
|
||||
app: Router,
|
||||
_install_dir: TempDir,
|
||||
}
|
||||
|
||||
impl TestApp {
|
||||
fn new() -> Self {
|
||||
let install_dir = tempfile::tempdir().expect("create temp install dir");
|
||||
let manager = AgentManager::new(install_dir.path())
|
||||
.expect("create agent manager");
|
||||
let state = sandbox_agent::router::AppState::new(AuthConfig::disabled(), manager);
|
||||
let app = build_router(state);
|
||||
Self {
|
||||
app,
|
||||
_install_dir: install_dir,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct EnvGuard {
|
||||
saved: HashMap<String, Option<String>>,
|
||||
}
|
||||
|
||||
impl Drop for EnvGuard {
|
||||
fn drop(&mut self) {
|
||||
for (key, value) in &self.saved {
|
||||
match value {
|
||||
Some(value) => std::env::set_var(key, value),
|
||||
None => std::env::remove_var(key),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_credentials(creds: &ExtractedCredentials) -> EnvGuard {
|
||||
let keys = ["ANTHROPIC_API_KEY", "CLAUDE_API_KEY", "OPENAI_API_KEY", "CODEX_API_KEY"];
|
||||
let mut saved = HashMap::new();
|
||||
for key in keys {
|
||||
saved.insert(key.to_string(), std::env::var(key).ok());
|
||||
}
|
||||
|
||||
match creds.anthropic.as_ref() {
|
||||
Some(cred) => {
|
||||
std::env::set_var("ANTHROPIC_API_KEY", &cred.api_key);
|
||||
std::env::set_var("CLAUDE_API_KEY", &cred.api_key);
|
||||
}
|
||||
None => {
|
||||
std::env::remove_var("ANTHROPIC_API_KEY");
|
||||
std::env::remove_var("CLAUDE_API_KEY");
|
||||
}
|
||||
}
|
||||
|
||||
match creds.openai.as_ref() {
|
||||
Some(cred) => {
|
||||
std::env::set_var("OPENAI_API_KEY", &cred.api_key);
|
||||
std::env::set_var("CODEX_API_KEY", &cred.api_key);
|
||||
}
|
||||
None => {
|
||||
std::env::remove_var("OPENAI_API_KEY");
|
||||
std::env::remove_var("CODEX_API_KEY");
|
||||
}
|
||||
}
|
||||
|
||||
EnvGuard { saved }
|
||||
}
|
||||
|
||||
async fn send_json(
|
||||
app: &Router,
|
||||
method: Method,
|
||||
path: &str,
|
||||
body: Option<Value>,
|
||||
) -> (StatusCode, Value) {
|
||||
let request = Request::builder()
|
||||
.method(method)
|
||||
.uri(path)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body.map(|value| value.to_string()).unwrap_or_default()))
|
||||
.expect("request");
|
||||
let response = app
|
||||
.clone()
|
||||
.oneshot(request)
|
||||
.await
|
||||
.expect("response");
|
||||
let status = response.status();
|
||||
let bytes = response
|
||||
.into_body()
|
||||
.collect()
|
||||
.await
|
||||
.expect("body")
|
||||
.to_bytes();
|
||||
let payload = if bytes.is_empty() {
|
||||
Value::Null
|
||||
} else {
|
||||
serde_json::from_slice(&bytes).unwrap_or(Value::Null)
|
||||
};
|
||||
(status, payload)
|
||||
}
|
||||
|
||||
async fn send_status(app: &Router, method: Method, path: &str, body: Option<Value>) -> StatusCode {
|
||||
let (status, _) = send_json(app, method, path, body).await;
|
||||
status
|
||||
}
|
||||
|
||||
async fn install_agent(app: &Router, agent: AgentId) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/agents/{}/install", agent.as_str()),
|
||||
Some(json!({})),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "install agent {}", agent.as_str());
|
||||
}
|
||||
|
||||
async fn create_session(app: &Router, agent: AgentId, session_id: &str, permission_mode: &str) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}"),
|
||||
Some(json!({
|
||||
"agent": agent.as_str(),
|
||||
"permissionMode": permission_mode,
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::OK, "create session");
|
||||
}
|
||||
|
||||
async fn create_session_with_mode(
|
||||
app: &Router,
|
||||
agent: AgentId,
|
||||
session_id: &str,
|
||||
agent_mode: &str,
|
||||
permission_mode: &str,
|
||||
) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}"),
|
||||
Some(json!({
|
||||
"agent": agent.as_str(),
|
||||
"agentMode": agent_mode,
|
||||
"permissionMode": permission_mode,
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::OK, "create session");
|
||||
}
|
||||
|
||||
fn test_permission_mode(agent: AgentId) -> &'static str {
|
||||
match agent {
|
||||
AgentId::Opencode => "default",
|
||||
_ => "bypass",
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_message(app: &Router, session_id: &str, message: &str) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/messages"),
|
||||
Some(json!({ "message": message })),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
|
||||
}
|
||||
|
||||
async fn poll_events_until<F>(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
timeout: Duration,
|
||||
mut stop: F,
|
||||
) -> Vec<Value>
|
||||
where
|
||||
F: FnMut(&[Value]) -> bool,
|
||||
{
|
||||
let start = Instant::now();
|
||||
let mut offset = 0u64;
|
||||
let mut events = Vec::new();
|
||||
while start.elapsed() < timeout {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||
assert_eq!(status, StatusCode::OK, "poll events");
|
||||
let new_events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
if !new_events.is_empty() {
|
||||
if let Some(last) = new_events
|
||||
.last()
|
||||
.and_then(|event| event.get("sequence"))
|
||||
.and_then(Value::as_u64)
|
||||
{
|
||||
offset = last;
|
||||
}
|
||||
events.extend(new_events);
|
||||
if stop(&events) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
}
|
||||
events
|
||||
}
|
||||
|
||||
async fn fetch_capabilities(app: &Router) -> HashMap<String, AgentCapabilities> {
|
||||
let (status, payload) = send_json(app, Method::GET, "/v1/agents", None).await;
|
||||
assert_eq!(status, StatusCode::OK, "list agents");
|
||||
let response: AgentListResponse = serde_json::from_value(payload).expect("agents payload");
|
||||
response
|
||||
.agents
|
||||
.into_iter()
|
||||
.map(|agent| (agent.id, agent.capabilities))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn has_event_type(events: &[Value], event_type: &str) -> bool {
|
||||
events
|
||||
.iter()
|
||||
.any(|event| event.get("type").and_then(Value::as_str) == Some(event_type))
|
||||
}
|
||||
|
||||
fn find_assistant_message_item(events: &[Value]) -> Option<String> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("item.completed") {
|
||||
return None;
|
||||
}
|
||||
let item = event.get("data")?.get("item")?;
|
||||
let role = item.get("role")?.as_str()?;
|
||||
let kind = item.get("kind")?.as_str()?;
|
||||
if role != "assistant" || kind != "message" {
|
||||
return None;
|
||||
}
|
||||
item.get("item_id")?.as_str().map(|id| id.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
fn event_sequence(event: &Value) -> Option<u64> {
|
||||
event.get("sequence").and_then(Value::as_u64)
|
||||
}
|
||||
|
||||
fn find_item_event_seq(events: &[Value], event_type: &str, item_id: &str) -> Option<u64> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some(event_type) {
|
||||
return None;
|
||||
}
|
||||
match event_type {
|
||||
"item.delta" => {
|
||||
let data = event.get("data")?;
|
||||
let id = data.get("item_id")?.as_str()?;
|
||||
if id == item_id {
|
||||
event_sequence(event)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let item = event.get("data")?.get("item")?;
|
||||
let id = item.get("item_id")?.as_str()?;
|
||||
if id == item_id {
|
||||
event_sequence(event)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn find_permission_id(events: &[Value]) -> Option<String> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("permission.requested") {
|
||||
return None;
|
||||
}
|
||||
event
|
||||
.get("data")
|
||||
.and_then(|data| data.get("permission_id"))
|
||||
.and_then(Value::as_str)
|
||||
.map(|id| id.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
fn find_question_id(events: &[Value]) -> Option<String> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("question.requested") {
|
||||
return None;
|
||||
}
|
||||
event
|
||||
.get("data")
|
||||
.and_then(|data| data.get("question_id"))
|
||||
.and_then(Value::as_str)
|
||||
.map(|id| id.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
fn find_first_answer(events: &[Value]) -> Option<Vec<Vec<String>>> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("question.requested") {
|
||||
return None;
|
||||
}
|
||||
let options = event
|
||||
.get("data")
|
||||
.and_then(|data| data.get("options"))
|
||||
.and_then(Value::as_array)?;
|
||||
let option = options.first()?.as_str()?.to_string();
|
||||
Some(vec![vec![option]])
|
||||
})
|
||||
}
|
||||
|
||||
fn find_tool_call(events: &[Value]) -> Option<String> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("item.started")
|
||||
&& event.get("type").and_then(Value::as_str) != Some("item.completed")
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let item = event.get("data")?.get("item")?;
|
||||
let kind = item.get("kind")?.as_str()?;
|
||||
if kind != "tool_call" {
|
||||
return None;
|
||||
}
|
||||
item.get("item_id")?.as_str().map(|id| id.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
fn has_tool_result(events: &[Value]) -> bool {
|
||||
events.iter().any(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("item.completed") {
|
||||
return false;
|
||||
}
|
||||
let item = match event.get("data").and_then(|data| data.get("item")) {
|
||||
Some(item) => item,
|
||||
None => return false,
|
||||
};
|
||||
item.get("kind").and_then(Value::as_str) == Some("tool_result")
|
||||
})
|
||||
}
|
||||
|
||||
fn expect_basic_sequence(events: &[Value]) {
|
||||
assert!(has_event_type(events, "session.started"), "session.started missing");
|
||||
let item_id = find_assistant_message_item(events).expect("assistant message missing");
|
||||
let started_seq = find_item_event_seq(events, "item.started", &item_id)
|
||||
.expect("item.started missing");
|
||||
// Intentionally require deltas here to validate our synthetic delta behavior.
|
||||
let delta_seq = find_item_event_seq(events, "item.delta", &item_id)
|
||||
.expect("item.delta missing");
|
||||
let completed_seq = find_item_event_seq(events, "item.completed", &item_id)
|
||||
.expect("item.completed missing");
|
||||
assert!(started_seq < delta_seq, "item.started must precede delta");
|
||||
assert!(delta_seq < completed_seq, "delta must precede completion");
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_agnostic_basic_reply() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
let capabilities = fetch_capabilities(&app.app).await;
|
||||
|
||||
for config in &configs {
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("basic-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &session_id, "default").await;
|
||||
send_message(&app.app, &session_id, PROMPT).await;
|
||||
|
||||
let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
has_event_type(events, "error") || find_assistant_message_item(events).is_some()
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
!events.is_empty(),
|
||||
"no events collected for {}",
|
||||
config.agent.as_str()
|
||||
);
|
||||
expect_basic_sequence(&events);
|
||||
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
if caps.tool_calls {
|
||||
assert!(
|
||||
!events.iter().any(|event| {
|
||||
event.get("type").and_then(Value::as_str) == Some("agent.unparsed")
|
||||
}),
|
||||
"agent.unparsed event detected"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_agnostic_tool_flow() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
let capabilities = fetch_capabilities(&app.app).await;
|
||||
|
||||
for config in &configs {
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
if !caps.tool_calls {
|
||||
continue;
|
||||
}
|
||||
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("tool-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
||||
send_message(&app.app, &session_id, TOOL_PROMPT).await;
|
||||
|
||||
let start = Instant::now();
|
||||
let mut offset = 0u64;
|
||||
let mut events = Vec::new();
|
||||
let mut replied = false;
|
||||
while start.elapsed() < Duration::from_secs(180) {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||
let (status, payload) = send_json(&app.app, Method::GET, &path, None).await;
|
||||
assert_eq!(status, StatusCode::OK, "poll events");
|
||||
let new_events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
if !new_events.is_empty() {
|
||||
if let Some(last) = new_events
|
||||
.last()
|
||||
.and_then(|event| event.get("sequence"))
|
||||
.and_then(Value::as_u64)
|
||||
{
|
||||
offset = last;
|
||||
}
|
||||
events.extend(new_events);
|
||||
if !replied {
|
||||
if let Some(permission_id) = find_permission_id(&events) {
|
||||
let _ = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!(
|
||||
"/v1/sessions/{session_id}/permissions/{permission_id}/reply"
|
||||
),
|
||||
Some(json!({ "reply": "once" })),
|
||||
)
|
||||
.await;
|
||||
replied = true;
|
||||
}
|
||||
}
|
||||
if has_tool_result(&events) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
}
|
||||
|
||||
let tool_call = find_tool_call(&events);
|
||||
let tool_result = has_tool_result(&events);
|
||||
assert!(
|
||||
tool_call.is_some(),
|
||||
"tool_call missing for tool-capable agent {}",
|
||||
config.agent.as_str()
|
||||
);
|
||||
if tool_call.is_some() {
|
||||
assert!(
|
||||
tool_result,
|
||||
"tool_result missing after tool_call for {}",
|
||||
config.agent.as_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_agnostic_permission_flow() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
let capabilities = fetch_capabilities(&app.app).await;
|
||||
|
||||
for config in &configs {
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
if !(caps.plan_mode && caps.permissions) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("perm-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &session_id, "plan").await;
|
||||
send_message(&app.app, &session_id, TOOL_PROMPT).await;
|
||||
|
||||
let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
find_permission_id(events).is_some() || has_event_type(events, "error")
|
||||
})
|
||||
.await;
|
||||
|
||||
let permission_id = find_permission_id(&events).expect("permission.requested missing");
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/permissions/{permission_id}/reply"),
|
||||
Some(json!({ "reply": "once" })),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "permission reply");
|
||||
|
||||
let resolved = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
events.iter().any(|event| {
|
||||
event.get("type").and_then(Value::as_str) == Some("permission.resolved")
|
||||
})
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
resolved.iter().any(|event| {
|
||||
event.get("type").and_then(Value::as_str) == Some("permission.resolved")
|
||||
&& event
|
||||
.get("synthetic")
|
||||
.and_then(Value::as_bool)
|
||||
.unwrap_or(false)
|
||||
}),
|
||||
"permission.resolved should be synthetic"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_agnostic_question_flow() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
let capabilities = fetch_capabilities(&app.app).await;
|
||||
|
||||
for config in &configs {
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
if !caps.questions {
|
||||
continue;
|
||||
}
|
||||
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("question-{}", config.agent.as_str());
|
||||
create_session_with_mode(&app.app, config.agent, &session_id, "plan", "plan").await;
|
||||
send_message(&app.app, &session_id, QUESTION_PROMPT).await;
|
||||
|
||||
let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
find_question_id(events).is_some() || has_event_type(events, "error")
|
||||
})
|
||||
.await;
|
||||
|
||||
let question_id = find_question_id(&events).expect("question.requested missing");
|
||||
let answers = find_first_answer(&events).unwrap_or_else(|| vec![vec![]]);
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/questions/{question_id}/reply"),
|
||||
Some(json!({ "answers": answers })),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "question reply");
|
||||
|
||||
let resolved = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
events.iter().any(|event| {
|
||||
event.get("type").and_then(Value::as_str) == Some("question.resolved")
|
||||
})
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
resolved.iter().any(|event| {
|
||||
event.get("type").and_then(Value::as_str) == Some("question.resolved")
|
||||
&& event
|
||||
.get("synthetic")
|
||||
.and_then(Value::as_bool)
|
||||
.unwrap_or(false)
|
||||
}),
|
||||
"question.resolved should be synthetic"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_agnostic_termination() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
|
||||
for config in &configs {
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("terminate-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &session_id, "default").await;
|
||||
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/terminate"),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "terminate session");
|
||||
|
||||
let events = poll_events_until(&app.app, &session_id, Duration::from_secs(30), |events| {
|
||||
has_event_type(events, "session.ended")
|
||||
})
|
||||
.await;
|
||||
assert!(has_event_type(&events, "session.ended"), "missing session.ended");
|
||||
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/messages"),
|
||||
Some(json!({ "message": PROMPT })),
|
||||
)
|
||||
.await;
|
||||
assert!(!status.is_success(), "terminated session should reject messages");
|
||||
}
|
||||
}
|
||||
46
server/packages/sandbox-agent/tests/agent_basic_reply.rs
Normal file
46
server/packages/sandbox-agent/tests/agent_basic_reply.rs
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
mod common;
|
||||
|
||||
use common::*;
|
||||
use sandbox_agent_agent_management::testing::test_agents_from_env;
|
||||
use serde_json::Value;
|
||||
use std::time::Duration;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_basic_reply() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
let capabilities = fetch_capabilities(&app.app).await;
|
||||
|
||||
for config in &configs {
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("basic-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &session_id, "default").await;
|
||||
send_message(&app.app, &session_id, PROMPT).await;
|
||||
|
||||
let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
has_event_type(events, "error") || find_assistant_message_item(events).is_some()
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
!events.is_empty(),
|
||||
"no events collected for {}",
|
||||
config.agent.as_str()
|
||||
);
|
||||
expect_basic_sequence(&events);
|
||||
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
if caps.tool_calls {
|
||||
assert!(
|
||||
!events.iter().any(|event| {
|
||||
event.get("type").and_then(Value::as_str) == Some("agent.unparsed")
|
||||
}),
|
||||
"agent.unparsed event detected"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
457
server/packages/sandbox-agent/tests/agent_multi_turn.rs
Normal file
457
server/packages/sandbox-agent/tests/agent_multi_turn.rs
Normal file
|
|
@ -0,0 +1,457 @@
|
|||
//! Tests for multi-turn conversations to validate session resumption behavior.
|
||||
//!
|
||||
//! This test validates that:
|
||||
//! 1. Sessions can handle multiple messages (multi-turn conversations)
|
||||
//! 2. Agents that support resumption (Claude, Amp, OpenCode) can continue after process exit
|
||||
//! 3. Codex supports multi-turn via the shared app-server model (single process, multiple threads)
|
||||
//! 4. The mock agent correctly supports multi-turn as the reference implementation
|
||||
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use axum::body::Body;
|
||||
use axum::http::{Method, Request, StatusCode};
|
||||
use axum::Router;
|
||||
use http_body_util::BodyExt;
|
||||
use serde_json::{json, Value};
|
||||
use tempfile::TempDir;
|
||||
|
||||
use sandbox_agent::router::{build_router, AppState, AuthConfig};
|
||||
use sandbox_agent_agent_management::agents::{AgentId, AgentManager};
|
||||
use sandbox_agent_agent_management::testing::test_agents_from_env;
|
||||
use sandbox_agent_agent_credentials::ExtractedCredentials;
|
||||
use std::collections::BTreeMap;
|
||||
use tower::util::ServiceExt;
|
||||
|
||||
const FIRST_PROMPT: &str = "Reply with exactly the word FIRST.";
|
||||
const SECOND_PROMPT: &str = "Reply with exactly the word SECOND.";
|
||||
|
||||
struct TestApp {
|
||||
app: Router,
|
||||
_install_dir: TempDir,
|
||||
}
|
||||
|
||||
impl TestApp {
|
||||
fn new() -> Self {
|
||||
let install_dir = tempfile::tempdir().expect("create temp install dir");
|
||||
let manager = AgentManager::new(install_dir.path()).expect("create agent manager");
|
||||
let state = AppState::new(AuthConfig::disabled(), manager);
|
||||
let app = build_router(state);
|
||||
Self {
|
||||
app,
|
||||
_install_dir: install_dir,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct EnvGuard {
|
||||
saved: BTreeMap<String, Option<String>>,
|
||||
}
|
||||
|
||||
impl Drop for EnvGuard {
|
||||
fn drop(&mut self) {
|
||||
for (key, value) in &self.saved {
|
||||
match value {
|
||||
Some(value) => std::env::set_var(key, value),
|
||||
None => std::env::remove_var(key),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_credentials(creds: &ExtractedCredentials) -> EnvGuard {
|
||||
let keys = [
|
||||
"ANTHROPIC_API_KEY",
|
||||
"CLAUDE_API_KEY",
|
||||
"OPENAI_API_KEY",
|
||||
"CODEX_API_KEY",
|
||||
];
|
||||
let mut saved = BTreeMap::new();
|
||||
for key in keys {
|
||||
saved.insert(key.to_string(), std::env::var(key).ok());
|
||||
}
|
||||
|
||||
match creds.anthropic.as_ref() {
|
||||
Some(cred) => {
|
||||
std::env::set_var("ANTHROPIC_API_KEY", &cred.api_key);
|
||||
std::env::set_var("CLAUDE_API_KEY", &cred.api_key);
|
||||
}
|
||||
None => {
|
||||
std::env::remove_var("ANTHROPIC_API_KEY");
|
||||
std::env::remove_var("CLAUDE_API_KEY");
|
||||
}
|
||||
}
|
||||
|
||||
match creds.openai.as_ref() {
|
||||
Some(cred) => {
|
||||
std::env::set_var("OPENAI_API_KEY", &cred.api_key);
|
||||
std::env::set_var("CODEX_API_KEY", &cred.api_key);
|
||||
}
|
||||
None => {
|
||||
std::env::remove_var("OPENAI_API_KEY");
|
||||
std::env::remove_var("CODEX_API_KEY");
|
||||
}
|
||||
}
|
||||
|
||||
EnvGuard { saved }
|
||||
}
|
||||
|
||||
async fn send_json(
|
||||
app: &Router,
|
||||
method: Method,
|
||||
path: &str,
|
||||
body: Option<Value>,
|
||||
) -> (StatusCode, Value) {
|
||||
let mut builder = Request::builder().method(method).uri(path);
|
||||
let body = if let Some(body) = body {
|
||||
builder = builder.header("content-type", "application/json");
|
||||
Body::from(body.to_string())
|
||||
} else {
|
||||
Body::empty()
|
||||
};
|
||||
let request = builder.body(body).expect("request");
|
||||
let response = app.clone().oneshot(request).await.expect("request handled");
|
||||
let status = response.status();
|
||||
let bytes = response
|
||||
.into_body()
|
||||
.collect()
|
||||
.await
|
||||
.expect("read body")
|
||||
.to_bytes();
|
||||
let value = if bytes.is_empty() {
|
||||
Value::Null
|
||||
} else {
|
||||
serde_json::from_slice(&bytes)
|
||||
.unwrap_or(Value::String(String::from_utf8_lossy(&bytes).to_string()))
|
||||
};
|
||||
(status, value)
|
||||
}
|
||||
|
||||
async fn send_status(app: &Router, method: Method, path: &str, body: Option<Value>) -> StatusCode {
|
||||
let (status, _) = send_json(app, method, path, body).await;
|
||||
status
|
||||
}
|
||||
|
||||
async fn install_agent(app: &Router, agent: AgentId) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/agents/{}/install", agent.as_str()),
|
||||
Some(json!({})),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "install {agent}");
|
||||
}
|
||||
|
||||
fn test_permission_mode(agent: AgentId) -> &'static str {
|
||||
match agent {
|
||||
AgentId::Opencode => "default",
|
||||
_ => "bypass",
|
||||
}
|
||||
}
|
||||
|
||||
async fn create_session(app: &Router, agent: AgentId, session_id: &str) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}"),
|
||||
Some(json!({
|
||||
"agent": agent.as_str(),
|
||||
"permissionMode": test_permission_mode(agent)
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::OK, "create session {agent}");
|
||||
}
|
||||
|
||||
/// Send a message and return the status code (allows checking for errors)
|
||||
async fn send_message_with_status(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
message: &str,
|
||||
) -> (StatusCode, Value) {
|
||||
send_json(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/messages"),
|
||||
Some(json!({ "message": message })),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Wait for a specific number of assistant responses (item.completed with role=assistant)
|
||||
async fn wait_for_n_responses(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
n: usize,
|
||||
timeout: Duration,
|
||||
) -> bool {
|
||||
let start = Instant::now();
|
||||
while start.elapsed() < timeout {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset=0&limit=1000");
|
||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||
if status != StatusCode::OK {
|
||||
return false;
|
||||
}
|
||||
let events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
let completed_count = events.iter().filter(|e| is_assistant_completed(e)).count();
|
||||
if completed_count >= n {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for errors
|
||||
for event in &events {
|
||||
if is_error_event(event) {
|
||||
eprintln!("Error event: {:?}", event);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(300)).await;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Wait for an assistant response (item.completed with role=assistant)
|
||||
async fn wait_for_response(app: &Router, session_id: &str, timeout: Duration) -> bool {
|
||||
wait_for_n_responses(app, session_id, 1, timeout).await
|
||||
}
|
||||
|
||||
fn is_assistant_completed(event: &Value) -> bool {
|
||||
event
|
||||
.get("type")
|
||||
.and_then(Value::as_str)
|
||||
.map(|t| t == "item.completed")
|
||||
.unwrap_or(false)
|
||||
&& event
|
||||
.get("data")
|
||||
.and_then(|d| d.get("item"))
|
||||
.and_then(|i| i.get("role"))
|
||||
.and_then(Value::as_str)
|
||||
.map(|r| r == "assistant")
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn is_session_ended(event: &Value) -> bool {
|
||||
event
|
||||
.get("type")
|
||||
.and_then(Value::as_str)
|
||||
.map(|t| t == "session.ended")
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn is_error_event(event: &Value) -> bool {
|
||||
matches!(
|
||||
event.get("type").and_then(Value::as_str),
|
||||
Some("error") | Some("agent.unparsed")
|
||||
)
|
||||
}
|
||||
|
||||
/// Count assistant responses in the event stream
|
||||
async fn count_assistant_responses(app: &Router, session_id: &str) -> usize {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset=0&limit=1000");
|
||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||
if status != StatusCode::OK {
|
||||
eprintln!("Failed to get events: status={}", status);
|
||||
return 0;
|
||||
}
|
||||
let events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
// Debug: print all event types
|
||||
eprintln!("All events ({}):", events.len());
|
||||
for (i, e) in events.iter().enumerate() {
|
||||
let event_type = e.get("type").and_then(Value::as_str).unwrap_or("?");
|
||||
let role = e
|
||||
.get("data")
|
||||
.and_then(|d| d.get("item"))
|
||||
.and_then(|i| i.get("role"))
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("-");
|
||||
eprintln!(" [{}] type={}, role={}", i, event_type, role);
|
||||
}
|
||||
|
||||
let count = events.iter().filter(|e| is_assistant_completed(e)).count();
|
||||
eprintln!("Assistant completed count: {}", count);
|
||||
count
|
||||
}
|
||||
|
||||
/// Test multi-turn conversation for a specific agent
|
||||
async fn test_multi_turn_for_agent(app: &Router, agent: AgentId) -> Result<(), String> {
|
||||
let session_id = format!("multi-turn-{}", agent.as_str());
|
||||
eprintln!("\n=== Testing multi-turn for {} ===", agent);
|
||||
|
||||
// Create session
|
||||
create_session(app, agent, &session_id).await;
|
||||
eprintln!("Session created: {}", session_id);
|
||||
|
||||
// Send first message
|
||||
eprintln!("Sending first message...");
|
||||
let (status, body) = send_message_with_status(app, &session_id, FIRST_PROMPT).await;
|
||||
eprintln!("First message status: {}", status);
|
||||
if status != StatusCode::NO_CONTENT {
|
||||
return Err(format!(
|
||||
"First message failed with status {}: {:?}",
|
||||
status, body
|
||||
));
|
||||
}
|
||||
|
||||
// Wait for first response
|
||||
eprintln!("Waiting for first response...");
|
||||
let got_first = wait_for_response(app, &session_id, Duration::from_secs(120)).await;
|
||||
if !got_first {
|
||||
return Err("Timed out waiting for first response".to_string());
|
||||
}
|
||||
eprintln!("Got first response");
|
||||
|
||||
// Small delay to ensure session state is updated
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
|
||||
// Send second message - this is the critical test
|
||||
eprintln!("Sending second message...");
|
||||
let (status, body) = send_message_with_status(app, &session_id, SECOND_PROMPT).await;
|
||||
eprintln!("Second message status: {}, body: {:?}", status, body);
|
||||
if status != StatusCode::NO_CONTENT {
|
||||
return Err(format!(
|
||||
"Second message failed with status {}: {:?}",
|
||||
status, body
|
||||
));
|
||||
}
|
||||
|
||||
// Wait for second response - specifically wait for 2 completed responses
|
||||
eprintln!("Waiting for second response (total 2)...");
|
||||
let got_both = wait_for_n_responses(app, &session_id, 2, Duration::from_secs(120)).await;
|
||||
if !got_both {
|
||||
// Debug: show what we got
|
||||
let response_count = count_assistant_responses(app, &session_id).await;
|
||||
return Err(format!(
|
||||
"Timed out waiting for second response (got {} completed)",
|
||||
response_count
|
||||
));
|
||||
}
|
||||
eprintln!("Got both responses");
|
||||
|
||||
// Verify we got two assistant responses
|
||||
let response_count = count_assistant_responses(app, &session_id).await;
|
||||
eprintln!("Final response count: {}", response_count);
|
||||
if response_count < 2 {
|
||||
return Err(format!(
|
||||
"Expected at least 2 assistant responses, got {}",
|
||||
response_count
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn multi_turn_mock_agent() {
|
||||
let test_app = TestApp::new();
|
||||
|
||||
// Mock agent should always support multi-turn as the reference implementation
|
||||
let result = test_multi_turn_for_agent(&test_app.app, AgentId::Mock).await;
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Mock agent multi-turn failed: {:?}",
|
||||
result.err()
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn multi_turn_real_agents() {
|
||||
let configs = match test_agents_from_env() {
|
||||
Ok(configs) => configs,
|
||||
Err(err) => {
|
||||
eprintln!("Failed to get agent configs: {:?}. Skipping multi-turn test.", err);
|
||||
return;
|
||||
}
|
||||
};
|
||||
if configs.is_empty() {
|
||||
eprintln!("No agents configured for testing. Skipping multi-turn test.");
|
||||
return;
|
||||
}
|
||||
|
||||
let test_app = TestApp::new();
|
||||
|
||||
for config in configs {
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&test_app.app, config.agent).await;
|
||||
|
||||
let result = test_multi_turn_for_agent(&test_app.app, config.agent).await;
|
||||
|
||||
match config.agent {
|
||||
AgentId::Claude | AgentId::Amp | AgentId::Opencode => {
|
||||
// These agents should support multi-turn via resumption
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"{} multi-turn failed (should support resumption): {:?}",
|
||||
config.agent,
|
||||
result.err()
|
||||
);
|
||||
}
|
||||
AgentId::Codex => {
|
||||
// Codex now supports multi-turn via the shared app-server model
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"{} multi-turn failed (should support shared app-server): {:?}",
|
||||
config.agent,
|
||||
result.err()
|
||||
);
|
||||
}
|
||||
AgentId::Mock => {
|
||||
// Mock is tested separately
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Test that verifies the session can be reopened after ending
|
||||
#[tokio::test]
|
||||
async fn session_reopen_after_end() {
|
||||
let test_app = TestApp::new();
|
||||
let session_id = "reopen-test";
|
||||
|
||||
// Create session with mock agent
|
||||
create_session(&test_app.app, AgentId::Mock, session_id).await;
|
||||
|
||||
// Send "end" command to mock agent to end the session
|
||||
let (status, _) = send_message_with_status(&test_app.app, session_id, "end").await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT);
|
||||
|
||||
// Wait for session to end
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
|
||||
// Verify session is ended
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset=0&limit=100");
|
||||
let (_, payload) = send_json(&test_app.app, Method::GET, &path, None).await;
|
||||
let events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
let has_ended = events.iter().any(|e| is_session_ended(e));
|
||||
assert!(has_ended, "Session should be ended after 'end' command");
|
||||
|
||||
// Try to send another message - mock agent supports resume so this should work
|
||||
// (or fail if we haven't implemented reopen for mock)
|
||||
let (status, body) = send_message_with_status(&test_app.app, session_id, "hello again").await;
|
||||
|
||||
// For mock agent, the session should be reopenable since mock is in agent_supports_resume
|
||||
// But mock's session.ended is triggered differently than real agents
|
||||
// This test documents the current behavior
|
||||
if status == StatusCode::NO_CONTENT {
|
||||
eprintln!("Mock agent session was successfully reopened after end");
|
||||
} else {
|
||||
eprintln!(
|
||||
"Mock agent session could not be reopened (status {}): {:?}",
|
||||
status, body
|
||||
);
|
||||
}
|
||||
}
|
||||
63
server/packages/sandbox-agent/tests/agent_permission_flow.rs
Normal file
63
server/packages/sandbox-agent/tests/agent_permission_flow.rs
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
mod common;
|
||||
|
||||
use common::*;
|
||||
use sandbox_agent_agent_management::testing::test_agents_from_env;
|
||||
use std::time::Duration;
|
||||
use axum::http::Method;
|
||||
use serde_json::json;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_permission_flow() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
let capabilities = fetch_capabilities(&app.app).await;
|
||||
|
||||
for config in &configs {
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
if !(caps.plan_mode && caps.permissions) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("perm-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &session_id, "plan").await;
|
||||
send_message(&app.app, &session_id, TOOL_PROMPT).await;
|
||||
|
||||
let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
find_permission_id(events).is_some() || has_event_type(events, "error")
|
||||
})
|
||||
.await;
|
||||
|
||||
let permission_id = find_permission_id(&events).expect("permission.requested missing");
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/permissions/{permission_id}/reply"),
|
||||
Some(json!({ "reply": "once" })),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, axum::http::StatusCode::NO_CONTENT, "permission reply");
|
||||
|
||||
let resolved = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
events.iter().any(|event| {
|
||||
event.get("type").and_then(serde_json::Value::as_str) == Some("permission.resolved")
|
||||
})
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
resolved.iter().any(|event| {
|
||||
event.get("type").and_then(serde_json::Value::as_str) == Some("permission.resolved")
|
||||
&& event
|
||||
.get("synthetic")
|
||||
.and_then(serde_json::Value::as_bool)
|
||||
.unwrap_or(false)
|
||||
}),
|
||||
"permission.resolved should be synthetic"
|
||||
);
|
||||
}
|
||||
}
|
||||
64
server/packages/sandbox-agent/tests/agent_question_flow.rs
Normal file
64
server/packages/sandbox-agent/tests/agent_question_flow.rs
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
mod common;
|
||||
|
||||
use common::*;
|
||||
use sandbox_agent_agent_management::testing::test_agents_from_env;
|
||||
use std::time::Duration;
|
||||
use axum::http::Method;
|
||||
use serde_json::json;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_question_flow() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
let capabilities = fetch_capabilities(&app.app).await;
|
||||
|
||||
for config in &configs {
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
if !caps.questions {
|
||||
continue;
|
||||
}
|
||||
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("question-{}", config.agent.as_str());
|
||||
create_session_with_mode(&app.app, config.agent, &session_id, "plan", "plan").await;
|
||||
send_message(&app.app, &session_id, QUESTION_PROMPT).await;
|
||||
|
||||
let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
find_question_id(events).is_some() || has_event_type(events, "error")
|
||||
})
|
||||
.await;
|
||||
|
||||
let question_id = find_question_id(&events).expect("question.requested missing");
|
||||
let answers = find_first_answer(&events).unwrap_or_else(|| vec![vec![]]);
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/questions/{question_id}/reply"),
|
||||
Some(json!({ "answers": answers })),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, axum::http::StatusCode::NO_CONTENT, "question reply");
|
||||
|
||||
let resolved = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| {
|
||||
events.iter().any(|event| {
|
||||
event.get("type").and_then(serde_json::Value::as_str) == Some("question.resolved")
|
||||
})
|
||||
})
|
||||
.await;
|
||||
|
||||
assert!(
|
||||
resolved.iter().any(|event| {
|
||||
event.get("type").and_then(serde_json::Value::as_str) == Some("question.resolved")
|
||||
&& event
|
||||
.get("synthetic")
|
||||
.and_then(serde_json::Value::as_bool)
|
||||
.unwrap_or(false)
|
||||
}),
|
||||
"question.resolved should be synthetic"
|
||||
);
|
||||
}
|
||||
}
|
||||
45
server/packages/sandbox-agent/tests/agent_termination.rs
Normal file
45
server/packages/sandbox-agent/tests/agent_termination.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
mod common;
|
||||
|
||||
use common::*;
|
||||
use sandbox_agent_agent_management::testing::test_agents_from_env;
|
||||
use std::time::Duration;
|
||||
use axum::http::Method;
|
||||
use serde_json::json;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_termination() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
|
||||
for config in &configs {
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("terminate-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &session_id, "default").await;
|
||||
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/terminate"),
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, axum::http::StatusCode::NO_CONTENT, "terminate session");
|
||||
|
||||
let events = poll_events_until(&app.app, &session_id, Duration::from_secs(30), |events| {
|
||||
has_event_type(events, "session.ended")
|
||||
})
|
||||
.await;
|
||||
assert!(has_event_type(&events, "session.ended"), "missing session.ended");
|
||||
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/messages"),
|
||||
Some(json!({ "message": PROMPT })),
|
||||
)
|
||||
.await;
|
||||
assert!(!status.is_success(), "terminated session should reject messages");
|
||||
}
|
||||
}
|
||||
94
server/packages/sandbox-agent/tests/agent_tool_flow.rs
Normal file
94
server/packages/sandbox-agent/tests/agent_tool_flow.rs
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
mod common;
|
||||
|
||||
use common::*;
|
||||
use sandbox_agent_agent_management::testing::test_agents_from_env;
|
||||
use serde_json::Value;
|
||||
use std::time::{Duration, Instant};
|
||||
use axum::http::Method;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn agent_tool_flow() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
let capabilities = fetch_capabilities(&app.app).await;
|
||||
|
||||
for config in &configs {
|
||||
let caps = capabilities
|
||||
.get(config.agent.as_str())
|
||||
.expect("capabilities missing");
|
||||
if !caps.tool_calls {
|
||||
continue;
|
||||
}
|
||||
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(&app.app, config.agent).await;
|
||||
|
||||
let session_id = format!("tool-{}", config.agent.as_str());
|
||||
create_session(
|
||||
&app.app,
|
||||
config.agent,
|
||||
&session_id,
|
||||
test_permission_mode(config.agent),
|
||||
)
|
||||
.await;
|
||||
send_message(&app.app, &session_id, TOOL_PROMPT).await;
|
||||
|
||||
let start = Instant::now();
|
||||
let mut offset = 0u64;
|
||||
let mut events = Vec::new();
|
||||
let mut replied = false;
|
||||
while start.elapsed() < Duration::from_secs(180) {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||
let (status, payload) = send_json(&app.app, Method::GET, &path, None).await;
|
||||
assert_eq!(status, axum::http::StatusCode::OK, "poll events");
|
||||
let new_events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
if !new_events.is_empty() {
|
||||
if let Some(last) = new_events
|
||||
.last()
|
||||
.and_then(|event| event.get("sequence"))
|
||||
.and_then(Value::as_u64)
|
||||
{
|
||||
offset = last;
|
||||
}
|
||||
events.extend(new_events);
|
||||
if !replied {
|
||||
if let Some(permission_id) = find_permission_id(&events) {
|
||||
let _ = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
&format!(
|
||||
"/v1/sessions/{session_id}/permissions/{permission_id}/reply"
|
||||
),
|
||||
Some(serde_json::json!({ "reply": "once" })),
|
||||
)
|
||||
.await;
|
||||
replied = true;
|
||||
}
|
||||
}
|
||||
if has_tool_result(&events) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
}
|
||||
|
||||
let tool_call = find_tool_call(&events);
|
||||
let tool_result = has_tool_result(&events);
|
||||
assert!(
|
||||
tool_call.is_some(),
|
||||
"tool_call missing for tool-capable agent {}",
|
||||
config.agent.as_str()
|
||||
);
|
||||
if tool_call.is_some() {
|
||||
assert!(
|
||||
tool_result,
|
||||
"tool_result missing after tool_call for {}",
|
||||
config.agent.as_str()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
388
server/packages/sandbox-agent/tests/common/mod.rs
Normal file
388
server/packages/sandbox-agent/tests/common/mod.rs
Normal file
|
|
@ -0,0 +1,388 @@
|
|||
use std::collections::HashMap;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use axum::body::Body;
|
||||
use axum::http::{Method, Request, StatusCode};
|
||||
use axum::Router;
|
||||
use http_body_util::BodyExt;
|
||||
use serde_json::{json, Value};
|
||||
use tempfile::TempDir;
|
||||
use tower::util::ServiceExt;
|
||||
|
||||
use sandbox_agent::router::{
|
||||
build_router,
|
||||
AgentCapabilities,
|
||||
AgentListResponse,
|
||||
AuthConfig,
|
||||
};
|
||||
use sandbox_agent_agent_credentials::ExtractedCredentials;
|
||||
use sandbox_agent_agent_management::agents::{AgentId, AgentManager};
|
||||
|
||||
pub const PROMPT: &str = "Reply with exactly the single word OK.";
|
||||
pub const TOOL_PROMPT: &str =
|
||||
"Use the bash tool to run `ls` in the current directory. Do not answer without using the tool.";
|
||||
pub const QUESTION_PROMPT: &str =
|
||||
"Call the AskUserQuestion tool with exactly one yes/no question and wait for a reply. Do not answer yourself.";
|
||||
|
||||
pub struct TestApp {
|
||||
pub app: Router,
|
||||
_install_dir: TempDir,
|
||||
}
|
||||
|
||||
impl TestApp {
|
||||
pub fn new() -> Self {
|
||||
let install_dir = tempfile::tempdir().expect("create temp install dir");
|
||||
let manager = AgentManager::new(install_dir.path())
|
||||
.expect("create agent manager");
|
||||
let state = sandbox_agent::router::AppState::new(AuthConfig::disabled(), manager);
|
||||
let app = build_router(state);
|
||||
Self {
|
||||
app,
|
||||
_install_dir: install_dir,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct EnvGuard {
|
||||
saved: HashMap<String, Option<String>>,
|
||||
}
|
||||
|
||||
impl Drop for EnvGuard {
|
||||
fn drop(&mut self) {
|
||||
for (key, value) in &self.saved {
|
||||
match value {
|
||||
Some(value) => std::env::set_var(key, value),
|
||||
None => std::env::remove_var(key),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_credentials(creds: &ExtractedCredentials) -> EnvGuard {
|
||||
let keys = ["ANTHROPIC_API_KEY", "CLAUDE_API_KEY", "OPENAI_API_KEY", "CODEX_API_KEY"];
|
||||
let mut saved = HashMap::new();
|
||||
for key in keys {
|
||||
saved.insert(key.to_string(), std::env::var(key).ok());
|
||||
}
|
||||
|
||||
match creds.anthropic.as_ref() {
|
||||
Some(cred) => {
|
||||
std::env::set_var("ANTHROPIC_API_KEY", &cred.api_key);
|
||||
std::env::set_var("CLAUDE_API_KEY", &cred.api_key);
|
||||
}
|
||||
None => {
|
||||
std::env::remove_var("ANTHROPIC_API_KEY");
|
||||
std::env::remove_var("CLAUDE_API_KEY");
|
||||
}
|
||||
}
|
||||
|
||||
match creds.openai.as_ref() {
|
||||
Some(cred) => {
|
||||
std::env::set_var("OPENAI_API_KEY", &cred.api_key);
|
||||
std::env::set_var("CODEX_API_KEY", &cred.api_key);
|
||||
}
|
||||
None => {
|
||||
std::env::remove_var("OPENAI_API_KEY");
|
||||
std::env::remove_var("CODEX_API_KEY");
|
||||
}
|
||||
}
|
||||
|
||||
EnvGuard { saved }
|
||||
}
|
||||
|
||||
pub async fn send_json(
|
||||
app: &Router,
|
||||
method: Method,
|
||||
path: &str,
|
||||
body: Option<Value>,
|
||||
) -> (StatusCode, Value) {
|
||||
let request = Request::builder()
|
||||
.method(method)
|
||||
.uri(path)
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(body.map(|value| value.to_string()).unwrap_or_default()))
|
||||
.expect("request");
|
||||
let response = app
|
||||
.clone()
|
||||
.oneshot(request)
|
||||
.await
|
||||
.expect("response");
|
||||
let status = response.status();
|
||||
let bytes = response
|
||||
.into_body()
|
||||
.collect()
|
||||
.await
|
||||
.expect("body")
|
||||
.to_bytes();
|
||||
let payload = if bytes.is_empty() {
|
||||
Value::Null
|
||||
} else {
|
||||
serde_json::from_slice(&bytes).unwrap_or(Value::Null)
|
||||
};
|
||||
(status, payload)
|
||||
}
|
||||
|
||||
pub async fn send_status(
|
||||
app: &Router,
|
||||
method: Method,
|
||||
path: &str,
|
||||
body: Option<Value>,
|
||||
) -> StatusCode {
|
||||
let (status, _) = send_json(app, method, path, body).await;
|
||||
status
|
||||
}
|
||||
|
||||
pub async fn install_agent(app: &Router, agent: AgentId) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/agents/{}/install", agent.as_str()),
|
||||
Some(json!({})),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "install agent {}", agent.as_str());
|
||||
}
|
||||
|
||||
pub async fn create_session(
|
||||
app: &Router,
|
||||
agent: AgentId,
|
||||
session_id: &str,
|
||||
permission_mode: &str,
|
||||
) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}"),
|
||||
Some(json!({
|
||||
"agent": agent.as_str(),
|
||||
"permissionMode": permission_mode,
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::OK, "create session");
|
||||
}
|
||||
|
||||
pub async fn create_session_with_mode(
|
||||
app: &Router,
|
||||
agent: AgentId,
|
||||
session_id: &str,
|
||||
agent_mode: &str,
|
||||
permission_mode: &str,
|
||||
) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}"),
|
||||
Some(json!({
|
||||
"agent": agent.as_str(),
|
||||
"agentMode": agent_mode,
|
||||
"permissionMode": permission_mode,
|
||||
})),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::OK, "create session");
|
||||
}
|
||||
|
||||
pub fn test_permission_mode(agent: AgentId) -> &'static str {
|
||||
match agent {
|
||||
AgentId::Opencode => "default",
|
||||
_ => "bypass",
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn send_message(app: &Router, session_id: &str, message: &str) {
|
||||
let status = send_status(
|
||||
app,
|
||||
Method::POST,
|
||||
&format!("/v1/sessions/{session_id}/messages"),
|
||||
Some(json!({ "message": message })),
|
||||
)
|
||||
.await;
|
||||
assert_eq!(status, StatusCode::NO_CONTENT, "send message");
|
||||
}
|
||||
|
||||
pub async fn poll_events_until<F>(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
timeout: Duration,
|
||||
mut stop: F,
|
||||
) -> Vec<Value>
|
||||
where
|
||||
F: FnMut(&[Value]) -> bool,
|
||||
{
|
||||
let start = Instant::now();
|
||||
let mut offset = 0u64;
|
||||
let mut events = Vec::new();
|
||||
while start.elapsed() < timeout {
|
||||
let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200");
|
||||
let (status, payload) = send_json(app, Method::GET, &path, None).await;
|
||||
assert_eq!(status, StatusCode::OK, "poll events");
|
||||
let new_events = payload
|
||||
.get("events")
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
if !new_events.is_empty() {
|
||||
if let Some(last) = new_events
|
||||
.last()
|
||||
.and_then(|event| event.get("sequence"))
|
||||
.and_then(Value::as_u64)
|
||||
{
|
||||
offset = last;
|
||||
}
|
||||
events.extend(new_events);
|
||||
if stop(&events) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(800)).await;
|
||||
}
|
||||
events
|
||||
}
|
||||
|
||||
pub async fn fetch_capabilities(app: &Router) -> HashMap<String, AgentCapabilities> {
|
||||
let (status, payload) = send_json(app, Method::GET, "/v1/agents", None).await;
|
||||
assert_eq!(status, StatusCode::OK, "list agents");
|
||||
let response: AgentListResponse = serde_json::from_value(payload).expect("agents payload");
|
||||
response
|
||||
.agents
|
||||
.into_iter()
|
||||
.map(|agent| (agent.id, agent.capabilities))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn has_event_type(events: &[Value], event_type: &str) -> bool {
|
||||
events
|
||||
.iter()
|
||||
.any(|event| event.get("type").and_then(Value::as_str) == Some(event_type))
|
||||
}
|
||||
|
||||
pub fn find_assistant_message_item(events: &[Value]) -> Option<String> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("item.completed") {
|
||||
return None;
|
||||
}
|
||||
let item = event.get("data")?.get("item")?;
|
||||
let role = item.get("role")?.as_str()?;
|
||||
let kind = item.get("kind")?.as_str()?;
|
||||
if role != "assistant" || kind != "message" {
|
||||
return None;
|
||||
}
|
||||
item.get("item_id")?.as_str().map(|id| id.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn event_sequence(event: &Value) -> Option<u64> {
|
||||
event.get("sequence").and_then(Value::as_u64)
|
||||
}
|
||||
|
||||
pub fn find_item_event_seq(events: &[Value], event_type: &str, item_id: &str) -> Option<u64> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some(event_type) {
|
||||
return None;
|
||||
}
|
||||
match event_type {
|
||||
"item.delta" => {
|
||||
let data = event.get("data")?;
|
||||
let id = data.get("item_id")?.as_str()?;
|
||||
if id == item_id {
|
||||
event_sequence(event)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let item = event.get("data")?.get("item")?;
|
||||
let id = item.get("item_id")?.as_str()?;
|
||||
if id == item_id {
|
||||
event_sequence(event)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn find_permission_id(events: &[Value]) -> Option<String> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("permission.requested") {
|
||||
return None;
|
||||
}
|
||||
event
|
||||
.get("data")
|
||||
.and_then(|data| data.get("permission_id"))
|
||||
.and_then(Value::as_str)
|
||||
.map(|id| id.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn find_question_id(events: &[Value]) -> Option<String> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("question.requested") {
|
||||
return None;
|
||||
}
|
||||
event
|
||||
.get("data")
|
||||
.and_then(|data| data.get("question_id"))
|
||||
.and_then(Value::as_str)
|
||||
.map(|id| id.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn find_first_answer(events: &[Value]) -> Option<Vec<Vec<String>>> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("question.requested") {
|
||||
return None;
|
||||
}
|
||||
let options = event
|
||||
.get("data")
|
||||
.and_then(|data| data.get("options"))
|
||||
.and_then(Value::as_array)?;
|
||||
let option = options.first()?.as_str()?.to_string();
|
||||
Some(vec![vec![option]])
|
||||
})
|
||||
}
|
||||
|
||||
pub fn find_tool_call(events: &[Value]) -> Option<String> {
|
||||
events.iter().find_map(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("item.started")
|
||||
&& event.get("type").and_then(Value::as_str) != Some("item.completed")
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let item = event.get("data")?.get("item")?;
|
||||
let kind = item.get("kind")?.as_str()?;
|
||||
if kind != "tool_call" {
|
||||
return None;
|
||||
}
|
||||
item.get("item_id")?.as_str().map(|id| id.to_string())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn has_tool_result(events: &[Value]) -> bool {
|
||||
events.iter().any(|event| {
|
||||
if event.get("type").and_then(Value::as_str) != Some("item.completed") {
|
||||
return false;
|
||||
}
|
||||
let item = match event.get("data").and_then(|data| data.get("item")) {
|
||||
Some(item) => item,
|
||||
None => return false,
|
||||
};
|
||||
item.get("kind").and_then(Value::as_str) == Some("tool_result")
|
||||
})
|
||||
}
|
||||
|
||||
pub fn expect_basic_sequence(events: &[Value]) {
|
||||
assert!(has_event_type(events, "session.started"), "session.started missing");
|
||||
let item_id = find_assistant_message_item(events).expect("assistant message missing");
|
||||
let started_seq = find_item_event_seq(events, "item.started", &item_id)
|
||||
.expect("item.started missing");
|
||||
// Intentionally require deltas here to validate our synthetic delta behavior.
|
||||
let delta_seq = find_item_event_seq(events, "item.delta", &item_id)
|
||||
.expect("item.delta missing");
|
||||
let completed_seq = find_item_event_seq(events, "item.completed", &item_id)
|
||||
.expect("item.completed missing");
|
||||
assert!(started_seq < delta_seq, "item.started must precede delta");
|
||||
assert!(delta_seq < completed_seq, "delta must precede completion");
|
||||
}
|
||||
|
|
@ -291,6 +291,57 @@ async fn read_sse_events(
|
|||
events
|
||||
}
|
||||
|
||||
async fn read_turn_stream_events(
|
||||
app: &Router,
|
||||
session_id: &str,
|
||||
timeout: Duration,
|
||||
) -> Vec<Value> {
|
||||
let request = Request::builder()
|
||||
.method(Method::POST)
|
||||
.uri(format!("/v1/sessions/{session_id}/messages/stream"))
|
||||
.header("content-type", "application/json")
|
||||
.body(Body::from(json!({ "message": PROMPT }).to_string()))
|
||||
.expect("turn stream request");
|
||||
let response = app
|
||||
.clone()
|
||||
.oneshot(request)
|
||||
.await
|
||||
.expect("turn stream response");
|
||||
assert_eq!(response.status(), StatusCode::OK, "turn stream status");
|
||||
|
||||
let mut stream = response.into_body().into_data_stream();
|
||||
let mut buffer = String::new();
|
||||
let mut events = Vec::new();
|
||||
let start = Instant::now();
|
||||
let mut ended = false;
|
||||
loop {
|
||||
let remaining = match timeout.checked_sub(start.elapsed()) {
|
||||
Some(remaining) if !remaining.is_zero() => remaining,
|
||||
_ => break,
|
||||
};
|
||||
let next = tokio::time::timeout(remaining, stream.next()).await;
|
||||
let chunk: Bytes = match next {
|
||||
Ok(Some(Ok(chunk))) => chunk,
|
||||
Ok(Some(Err(_))) => break,
|
||||
Ok(None) => {
|
||||
ended = true;
|
||||
break;
|
||||
}
|
||||
Err(_) => break,
|
||||
};
|
||||
buffer.push_str(&String::from_utf8_lossy(&chunk));
|
||||
while let Some(idx) = buffer.find("\n\n") {
|
||||
let block = buffer[..idx].to_string();
|
||||
buffer = buffer[idx + 2..].to_string();
|
||||
if let Some(event) = parse_sse_block(&block) {
|
||||
events.push(event);
|
||||
}
|
||||
}
|
||||
}
|
||||
assert!(ended, "turn stream did not close before timeout");
|
||||
events
|
||||
}
|
||||
|
||||
fn parse_sse_block(block: &str) -> Option<Value> {
|
||||
let mut data_lines = Vec::new();
|
||||
for line in block.lines() {
|
||||
|
|
@ -798,6 +849,27 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) {
|
|||
});
|
||||
}
|
||||
|
||||
async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) {
|
||||
let _guard = apply_credentials(&config.credentials);
|
||||
install_agent(app, config.agent).await;
|
||||
|
||||
let session_id = format!("turn-{}", config.agent.as_str());
|
||||
create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await;
|
||||
|
||||
let events = read_turn_stream_events(app, &session_id, Duration::from_secs(120)).await;
|
||||
let events = truncate_after_first_stop(&events);
|
||||
assert!(
|
||||
!events.is_empty(),
|
||||
"no turn stream events collected for {}",
|
||||
config.agent
|
||||
);
|
||||
assert!(
|
||||
should_stop(&events),
|
||||
"timed out waiting for assistant/error event for {}",
|
||||
config.agent
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auth_snapshots() {
|
||||
let token = "test-token";
|
||||
|
|
@ -1294,6 +1366,20 @@ async fn sse_events_snapshots() {
|
|||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn turn_stream_route() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
let app = TestApp::new();
|
||||
for config in &configs {
|
||||
// OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming.
|
||||
// See: https://github.com/opencode-ai/opencode/issues/XXX
|
||||
if config.agent == AgentId::Opencode {
|
||||
continue;
|
||||
}
|
||||
run_turn_stream_check(&app.app, config).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn concurrency_snapshots() {
|
||||
let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents");
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 881
|
||||
expression: normalize_agent_list(&agents)
|
||||
---
|
||||
agents:
|
||||
- id: amp
|
||||
- id: claude
|
||||
- id: codex
|
||||
- id: mock
|
||||
- id: opencode
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 934
|
||||
expression: normalize_create_session(&created)
|
||||
---
|
||||
healthy: true
|
||||
nativeSessionId: "<redacted>"
|
||||
|
|
|
|||
|
|
@ -1,36 +1,379 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 984
|
||||
expression: normalize_events(&permission_events)
|
||||
---
|
||||
- agent: codex
|
||||
kind: started
|
||||
- metadata: true
|
||||
seq: 1
|
||||
started:
|
||||
message: session.created
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
started:
|
||||
message: thread/started
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 3
|
||||
started:
|
||||
message: turn/started
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 4
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
role: assistant
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 6
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types: []
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 7
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 8
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 9
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 10
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 11
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 12
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 13
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 14
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 15
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 16
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 17
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 18
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 19
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 20
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 21
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 22
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 23
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 24
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 25
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 26
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 27
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 28
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 29
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 30
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 31
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 32
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 33
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 34
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 35
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 36
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 37
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 38
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 39
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 40
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 41
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 42
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 43
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 44
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 45
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- reasoning
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 46
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
|
|
|
|||
|
|
@ -1,36 +1,275 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 1106
|
||||
expression: normalize_events(&reject_events)
|
||||
---
|
||||
- agent: codex
|
||||
kind: started
|
||||
- metadata: true
|
||||
seq: 1
|
||||
started:
|
||||
message: session.created
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
started:
|
||||
message: thread/started
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 3
|
||||
started:
|
||||
message: turn/started
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 4
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
role: assistant
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 6
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types: []
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 7
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 8
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 9
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 10
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 11
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 12
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 13
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 14
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 15
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 16
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 17
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 18
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 19
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 20
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 21
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 22
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 23
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 24
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 25
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 26
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 27
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 28
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 29
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 30
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 31
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 32
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- reasoning
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 33
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
|
|
|
|||
|
|
@ -1,36 +1,72 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 1045
|
||||
expression: normalize_events(&question_events)
|
||||
---
|
||||
- agent: codex
|
||||
kind: started
|
||||
- metadata: true
|
||||
seq: 1
|
||||
started:
|
||||
message: session.created
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
started:
|
||||
message: thread/started
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 3
|
||||
started:
|
||||
message: turn/started
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 4
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
role: assistant
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 6
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types: []
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 7
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- item:
|
||||
content_types: []
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 8
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 810
|
||||
expression: "json!({\n \"status\": status.as_u16(), \"payload\": normalize_agent_list(&payload),\n})"
|
||||
---
|
||||
payload:
|
||||
|
|
@ -8,5 +7,6 @@ payload:
|
|||
- id: amp
|
||||
- id: claude
|
||||
- id: codex
|
||||
- id: mock
|
||||
- id: opencode
|
||||
status: 200
|
||||
|
|
|
|||
|
|
@ -1,69 +1,224 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 1214
|
||||
expression: snapshot
|
||||
---
|
||||
session_a:
|
||||
- agent: codex
|
||||
kind: started
|
||||
- metadata: true
|
||||
seq: 1
|
||||
started:
|
||||
message: session.created
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 2
|
||||
started:
|
||||
message: thread/started
|
||||
- agent: codex
|
||||
kind: started
|
||||
seq: 3
|
||||
started:
|
||||
message: turn/started
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 3
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
role: assistant
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 5
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types: []
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 6
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 7
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 8
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 9
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 10
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 11
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 12
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- reasoning
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 13
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
session_b:
|
||||
- agent: codex
|
||||
kind: started
|
||||
- metadata: true
|
||||
seq: 1
|
||||
started:
|
||||
message: session.created
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 2
|
||||
started:
|
||||
message: thread/started
|
||||
- agent: codex
|
||||
kind: started
|
||||
seq: 3
|
||||
started:
|
||||
message: turn/started
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 3
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
role: assistant
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 5
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types: []
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 6
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 7
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 8
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 9
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 10
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 11
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- reasoning
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 12
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
|
|
|
|||
|
|
@ -1,36 +1,91 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 697
|
||||
expression: normalized
|
||||
---
|
||||
- agent: codex
|
||||
kind: started
|
||||
- metadata: true
|
||||
seq: 1
|
||||
started:
|
||||
message: session.created
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: session.started
|
||||
- metadata: true
|
||||
seq: 2
|
||||
started:
|
||||
message: thread/started
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 3
|
||||
started:
|
||||
message: turn/started
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 4
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
role: assistant
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 5
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 6
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types: []
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 7
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 8
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 9
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- reasoning
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 10
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
|
|
|
|||
|
|
@ -1,36 +1,109 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 734
|
||||
expression: normalized
|
||||
---
|
||||
- agent: codex
|
||||
kind: started
|
||||
- metadata: true
|
||||
seq: 1
|
||||
started:
|
||||
message: session.created
|
||||
- agent: codex
|
||||
kind: started
|
||||
session: started
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: session.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 2
|
||||
started:
|
||||
message: thread/started
|
||||
- agent: codex
|
||||
kind: started
|
||||
seq: 3
|
||||
started:
|
||||
message: turn/started
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: in_progress
|
||||
seq: 3
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 4
|
||||
- agent: codex
|
||||
kind: message
|
||||
message:
|
||||
parts:
|
||||
- text: "<redacted>"
|
||||
type: text
|
||||
role: assistant
|
||||
source: daemon
|
||||
synthetic: true
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- text
|
||||
kind: message
|
||||
role: user
|
||||
status: completed
|
||||
seq: 5
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- item:
|
||||
content_types: []
|
||||
kind: message
|
||||
role: assistant
|
||||
status: in_progress
|
||||
seq: 6
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.started
|
||||
- item:
|
||||
content_types:
|
||||
- status
|
||||
kind: status
|
||||
role: system
|
||||
status: completed
|
||||
seq: 7
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 8
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 9
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 10
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- delta:
|
||||
delta: "<redacted>"
|
||||
item_id: "<redacted>"
|
||||
native_item_id: "<redacted>"
|
||||
seq: 11
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.delta
|
||||
- item:
|
||||
content_types:
|
||||
- reasoning
|
||||
kind: message
|
||||
role: assistant
|
||||
status: completed
|
||||
seq: 12
|
||||
source: agent
|
||||
synthetic: false
|
||||
type: item.completed
|
||||
|
|
|
|||
|
|
@ -31,10 +31,10 @@ pub fn event_to_universal_with_session(
|
|||
let event_type = event.get("type").and_then(Value::as_str).unwrap_or("");
|
||||
let mut conversions = match event_type {
|
||||
"system" => vec![system_event_to_universal(event)],
|
||||
"assistant" => assistant_event_to_universal(event),
|
||||
"tool_use" => tool_use_event_to_universal(event, session_id),
|
||||
"assistant" => assistant_event_to_universal(event, &session_id),
|
||||
"tool_use" => tool_use_event_to_universal(event, &session_id),
|
||||
"tool_result" => tool_result_event_to_universal(event),
|
||||
"result" => result_event_to_universal(event),
|
||||
"result" => result_event_to_universal(event, &session_id),
|
||||
_ => return Err(format!("unsupported Claude event type: {event_type}")),
|
||||
};
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ fn system_event_to_universal(event: &Value) -> EventConversion {
|
|||
.with_raw(Some(event.clone()))
|
||||
}
|
||||
|
||||
fn assistant_event_to_universal(event: &Value) -> Vec<EventConversion> {
|
||||
fn assistant_event_to_universal(event: &Value, session_id: &str) -> Vec<EventConversion> {
|
||||
let mut conversions = Vec::new();
|
||||
let content = event
|
||||
.get("message")
|
||||
|
|
@ -62,7 +62,8 @@ fn assistant_event_to_universal(event: &Value) -> Vec<EventConversion> {
|
|||
.cloned()
|
||||
.unwrap_or_default();
|
||||
|
||||
let message_id = next_temp_id("tmp_claude_message");
|
||||
// Use session-based native_item_id so `result` event can reference the same item
|
||||
let native_message_id = format!("{session_id}_message");
|
||||
let mut message_parts = Vec::new();
|
||||
|
||||
for block in content {
|
||||
|
|
@ -85,9 +86,9 @@ fn assistant_event_to_universal(event: &Value) -> Vec<EventConversion> {
|
|||
.unwrap_or_else(|| next_temp_id("tmp_claude_tool"));
|
||||
let arguments = serde_json::to_string(&input).unwrap_or_else(|_| "{}".to_string());
|
||||
let tool_item = UniversalItem {
|
||||
item_id: next_temp_id("tmp_claude_tool_item"),
|
||||
item_id: String::new(),
|
||||
native_item_id: Some(call_id.clone()),
|
||||
parent_id: Some(message_id.clone()),
|
||||
parent_id: Some(native_message_id.clone()),
|
||||
kind: ItemKind::ToolCall,
|
||||
role: Some(ItemRole::Assistant),
|
||||
content: vec![ContentPart::ToolCall {
|
||||
|
|
@ -106,21 +107,23 @@ fn assistant_event_to_universal(event: &Value) -> Vec<EventConversion> {
|
|||
}
|
||||
}
|
||||
|
||||
// `assistant` event emits item.started + item.delta only (in-progress state)
|
||||
// The `result` event will emit item.completed to finalize
|
||||
let message_item = UniversalItem {
|
||||
item_id: message_id,
|
||||
native_item_id: None,
|
||||
item_id: String::new(),
|
||||
native_item_id: Some(native_message_id.clone()),
|
||||
parent_id: None,
|
||||
kind: ItemKind::Message,
|
||||
role: Some(ItemRole::Assistant),
|
||||
content: message_parts.clone(),
|
||||
status: ItemStatus::Completed,
|
||||
status: ItemStatus::InProgress,
|
||||
};
|
||||
|
||||
conversions.extend(message_events(message_item, message_parts, true));
|
||||
conversions.extend(message_started_events(message_item, message_parts));
|
||||
conversions
|
||||
}
|
||||
|
||||
fn tool_use_event_to_universal(event: &Value, session_id: String) -> Vec<EventConversion> {
|
||||
fn tool_use_event_to_universal(event: &Value, session_id: &str) -> Vec<EventConversion> {
|
||||
let mut conversions = Vec::new();
|
||||
let tool_use = event.get("tool_use");
|
||||
let name = tool_use
|
||||
|
|
@ -156,7 +159,7 @@ fn tool_use_event_to_universal(event: &Value, session_id: String) -> Vec<EventCo
|
|||
|
||||
let arguments = serde_json::to_string(&input).unwrap_or_else(|_| "{}".to_string());
|
||||
let tool_item = UniversalItem {
|
||||
item_id: next_temp_id("tmp_claude_tool_item"),
|
||||
item_id: String::new(),
|
||||
native_item_id: Some(id.clone()),
|
||||
parent_id: None,
|
||||
kind: ItemKind::ToolCall,
|
||||
|
|
@ -222,22 +225,30 @@ fn tool_result_event_to_universal(event: &Value) -> Vec<EventConversion> {
|
|||
conversions
|
||||
}
|
||||
|
||||
fn result_event_to_universal(event: &Value) -> Vec<EventConversion> {
|
||||
fn result_event_to_universal(event: &Value, session_id: &str) -> Vec<EventConversion> {
|
||||
// The `result` event completes the message started by `assistant`.
|
||||
// Use the same native_item_id so they link to the same universal item.
|
||||
let native_message_id = format!("{session_id}_message");
|
||||
let result_text = event
|
||||
.get("result")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
|
||||
let message_item = UniversalItem {
|
||||
item_id: next_temp_id("tmp_claude_result"),
|
||||
native_item_id: None,
|
||||
item_id: String::new(),
|
||||
native_item_id: Some(native_message_id),
|
||||
parent_id: None,
|
||||
kind: ItemKind::Message,
|
||||
role: Some(ItemRole::Assistant),
|
||||
content: vec![ContentPart::Text { text: result_text.clone() }],
|
||||
content: vec![ContentPart::Text { text: result_text }],
|
||||
status: ItemStatus::Completed,
|
||||
};
|
||||
message_events(message_item, vec![ContentPart::Text { text: result_text }], true)
|
||||
|
||||
vec![EventConversion::new(
|
||||
UniversalEventType::ItemCompleted,
|
||||
UniversalEventData::Item(ItemEventData { item: message_item }),
|
||||
)]
|
||||
}
|
||||
|
||||
fn item_events(item: UniversalItem, synthetic_start: bool) -> Vec<EventConversion> {
|
||||
|
|
@ -260,20 +271,18 @@ fn item_events(item: UniversalItem, synthetic_start: bool) -> Vec<EventConversio
|
|||
events
|
||||
}
|
||||
|
||||
fn message_events(item: UniversalItem, parts: Vec<ContentPart>, synthetic_start: bool) -> Vec<EventConversion> {
|
||||
/// Emits item.started + item.delta only (for `assistant` event).
|
||||
/// The item.completed will come from the `result` event.
|
||||
fn message_started_events(item: UniversalItem, parts: Vec<ContentPart>) -> Vec<EventConversion> {
|
||||
let mut events = Vec::new();
|
||||
if synthetic_start {
|
||||
let mut started_item = item.clone();
|
||||
started_item.status = ItemStatus::InProgress;
|
||||
events.push(
|
||||
EventConversion::new(
|
||||
UniversalEventType::ItemStarted,
|
||||
UniversalEventData::Item(ItemEventData { item: started_item }),
|
||||
)
|
||||
.synthetic(),
|
||||
);
|
||||
}
|
||||
|
||||
// Emit item.started (in-progress)
|
||||
events.push(EventConversion::new(
|
||||
UniversalEventType::ItemStarted,
|
||||
UniversalEventData::Item(ItemEventData { item: item.clone() }),
|
||||
));
|
||||
|
||||
// Emit item.delta with the text content
|
||||
let mut delta_text = String::new();
|
||||
for part in &parts {
|
||||
if let ContentPart::Text { text } = part {
|
||||
|
|
@ -281,23 +290,16 @@ fn message_events(item: UniversalItem, parts: Vec<ContentPart>, synthetic_start:
|
|||
}
|
||||
}
|
||||
if !delta_text.is_empty() {
|
||||
events.push(
|
||||
EventConversion::new(
|
||||
UniversalEventType::ItemDelta,
|
||||
UniversalEventData::ItemDelta(crate::ItemDeltaData {
|
||||
item_id: item.item_id.clone(),
|
||||
native_item_id: item.native_item_id.clone(),
|
||||
delta: delta_text,
|
||||
}),
|
||||
)
|
||||
.synthetic(),
|
||||
);
|
||||
events.push(EventConversion::new(
|
||||
UniversalEventType::ItemDelta,
|
||||
UniversalEventData::ItemDelta(crate::ItemDeltaData {
|
||||
item_id: item.item_id.clone(),
|
||||
native_item_id: item.native_item_id.clone(),
|
||||
delta: delta_text,
|
||||
}),
|
||||
));
|
||||
}
|
||||
|
||||
events.push(EventConversion::new(
|
||||
UniversalEventType::ItemCompleted,
|
||||
UniversalEventData::Item(ItemEventData { item }),
|
||||
));
|
||||
events
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue