diff --git a/docs/conversion.md b/docs/conversion.mdx similarity index 100% rename from docs/conversion.md rename to docs/conversion.mdx diff --git a/frontend/packages/inspector/src/App.tsx b/frontend/packages/inspector/src/App.tsx index 8b2e478..374885f 100644 --- a/frontend/packages/inspector/src/App.tsx +++ b/frontend/packages/inspector/src/App.tsx @@ -51,10 +51,24 @@ const getDefaultEndpoint = () => { return origin; }; +const getInitialConnection = () => { + if (typeof window === "undefined") { + return { endpoint: "http://127.0.0.1:2468", token: "" }; + } + const params = new URLSearchParams(window.location.search); + const urlParam = params.get("url")?.trim(); + const tokenParam = params.get("token") ?? ""; + return { + endpoint: urlParam && urlParam.length > 0 ? urlParam : getDefaultEndpoint(), + token: tokenParam + }; +}; + export default function App() { const issueTrackerUrl = "https://github.com/rivet-dev/sandbox-agent/issues/new"; - const [endpoint, setEndpoint] = useState(getDefaultEndpoint); - const [token, setToken] = useState(""); + const initialConnectionRef = useRef(getInitialConnection()); + const [endpoint, setEndpoint] = useState(initialConnectionRef.current.endpoint); + const [token, setToken] = useState(initialConnectionRef.current.token); const [connected, setConnected] = useState(false); const [connecting, setConnecting] = useState(false); const [connectError, setConnectError] = useState(null); diff --git a/frontend/packages/inspector/src/components/agents/CapabilityBadges.tsx b/frontend/packages/inspector/src/components/agents/CapabilityBadges.tsx index 8e91c89..1994fd7 100644 --- a/frontend/packages/inspector/src/components/agents/CapabilityBadges.tsx +++ b/frontend/packages/inspector/src/components/agents/CapabilityBadges.tsx @@ -5,6 +5,7 @@ import { Brain, Download, FileDiff, + Gauge, GitBranch, HelpCircle, Image, @@ -30,6 +31,7 @@ const badges = [ { key: "sessionLifecycle", label: "Lifecycle", icon: PlayCircle }, { key: "errorEvents", label: "Errors", icon: AlertTriangle }, { key: "reasoning", label: "Reasoning", icon: Brain }, + { key: "status", label: "Status", icon: Gauge }, { key: "commandExecution", label: "Commands", icon: Terminal }, { key: "fileChanges", label: "File Changes", icon: FileDiff }, { key: "mcpTools", label: "MCP", icon: Plug }, diff --git a/frontend/packages/inspector/src/types/agents.ts b/frontend/packages/inspector/src/types/agents.ts index 47bf4f9..a8e3acc 100644 --- a/frontend/packages/inspector/src/types/agents.ts +++ b/frontend/packages/inspector/src/types/agents.ts @@ -8,6 +8,7 @@ export type AgentCapabilitiesView = AgentCapabilities & { sessionLifecycle?: boolean; errorEvents?: boolean; reasoning?: boolean; + status?: boolean; commandExecution?: boolean; fileChanges?: boolean; mcpTools?: boolean; @@ -26,6 +27,7 @@ export const emptyCapabilities: AgentCapabilitiesView = { sessionLifecycle: false, errorEvents: false, reasoning: false, + status: false, commandExecution: false, fileChanges: false, mcpTools: false, diff --git a/sdks/typescript/package.json b/sdks/typescript/package.json index d0a6c58..af3e62f 100644 --- a/sdks/typescript/package.json +++ b/sdks/typescript/package.json @@ -1,7 +1,7 @@ { "name": "sandbox-agent", "version": "0.1.0", - "description": "TypeScript SDK for sandbox-agent", + "description": "Universal API for automatic coding agents in sandboxes. Supprots Claude Code, Codex, OpenCode, and Amp.", "license": "Apache-2.0", "repository": { "type": "git", diff --git a/sdks/typescript/src/generated/openapi.ts b/sdks/typescript/src/generated/openapi.ts index 910415c..652db55 100644 --- a/sdks/typescript/src/generated/openapi.ts +++ b/sdks/typescript/src/generated/openapi.ts @@ -67,6 +67,7 @@ export interface components { sessionLifecycle: boolean; /** @description Whether this agent uses a shared long-running server process (vs per-turn subprocess) */ sharedProcess: boolean; + status: boolean; streamingDeltas: boolean; textMessages: boolean; toolCalls: boolean; diff --git a/server/CLAUDE.md b/server/CLAUDE.md index 1167240..6de8a1e 100644 --- a/server/CLAUDE.md +++ b/server/CLAUDE.md @@ -76,6 +76,8 @@ To keep snapshots deterministic: - Permission flow snapshots are truncated after the permission request (or first assistant) event. - Unknown events are preserved as `kind: unknown` (raw payload in universal schema). - Prefer snapshot-based event skeleton assertions over manual event-order assertions in tests. +- **Never update snapshots based on any agent that is not the mock agent.** The mock agent is the source of truth for snapshots; other agents must be compared against the mock snapshots without regenerating them. +- Agent-specific endpoints keep per-agent snapshots; any session-related snapshots must use the mock baseline as the single source of truth. ## Typical commands diff --git a/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs index 5cbfbee..75fdd9e 100644 --- a/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs +++ b/server/packages/sandbox-agent/tests/http/http_sse_snapshots.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::time::{Duration, Instant}; use axum::body::{Body, Bytes}; @@ -12,7 +12,7 @@ use tempfile::TempDir; use sandbox_agent_agent_management::agents::{AgentId, AgentManager}; use sandbox_agent_agent_management::testing::{test_agents_from_env, TestAgentConfig}; use sandbox_agent_agent_credentials::ExtractedCredentials; -use sandbox_agent::router::{build_router, AppState, AuthConfig}; +use sandbox_agent::router::{build_router, AgentCapabilities, AgentListResponse, AppState, AuthConfig}; use tower::util::ServiceExt; use tower_http::cors::CorsLayer; @@ -455,6 +455,12 @@ fn normalize_event(event: &Value, seq: usize) -> Value { if let Some(event_type) = event.get("type").and_then(Value::as_str) { map.insert("type".to_string(), Value::String(event_type.to_string())); } + if let Some(source) = event.get("source").and_then(Value::as_str) { + map.insert("source".to_string(), Value::String(source.to_string())); + } + if let Some(synthetic) = event.get("synthetic").and_then(Value::as_bool) { + map.insert("synthetic".to_string(), Value::Bool(synthetic)); + } let data = event.get("data").unwrap_or(&Value::Null); match event.get("type").and_then(Value::as_str).unwrap_or("") { "session.started" => { @@ -668,6 +674,17 @@ fn normalize_health(value: &Value) -> Value { Value::Object(map) } +async fn fetch_capabilities(app: &Router) -> HashMap { + let (status, payload) = send_json(app, Method::GET, "/v1/agents", None).await; + assert_eq!(status, StatusCode::OK, "list agents"); + let response: AgentListResponse = serde_json::from_value(payload).expect("agents payload"); + response + .agents + .into_iter() + .map(|agent| (agent.id, agent.capabilities)) + .collect() +} + fn snapshot_status(status: StatusCode) -> Value { json!({ "status": status.as_u16() }) } @@ -1077,200 +1094,208 @@ async fn api_endpoints_snapshots() { async fn approval_flow_snapshots() { let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; for config in &configs { // OpenCode doesn't support "plan" permission mode required for approval flows if config.agent == AgentId::Opencode { continue; } + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); let _guard = apply_credentials(&config.credentials); install_agent(&app.app, config.agent).await; - let permission_session = format!("perm-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &permission_session, "plan").await; - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{permission_session}/messages"), - Some(json!({ "message": PERMISSION_PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt"); - - let permission_events = poll_events_until_match( - &app.app, - &permission_session, - Duration::from_secs(120), - |events| find_permission_id(events).is_some() || should_stop(events), - ) - .await; - let permission_events = truncate_permission_events(&permission_events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("permission_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_events(&permission_events)); - }); - - if let Some(permission_id) = find_permission_id(&permission_events) { + if caps.plan_mode && caps.permissions { + let permission_session = format!("perm-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &permission_session, "plan").await; let status = send_status( &app.app, Method::POST, - &format!( - "/v1/sessions/{permission_session}/permissions/{permission_id}/reply" - ), - Some(json!({ "reply": "once" })), + &format!("/v1/sessions/{permission_session}/messages"), + Some(json!({ "message": PERMISSION_PROMPT })), ) .await; - assert_eq!(status, StatusCode::NO_CONTENT, "reply permission"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } else { - let (status, payload) = send_json( + assert_eq!(status, StatusCode::NO_CONTENT, "send permission prompt"); + + let permission_events = poll_events_until_match( &app.app, - Method::POST, - &format!( - "/v1/sessions/{permission_session}/permissions/missing-permission/reply" - ), - Some(json!({ "reply": "once" })), + &permission_session, + Duration::from_secs(120), + |events| find_permission_id(events).is_some() || should_stop(events), ) .await; - assert!(!status.is_success(), "missing permission id should error"); + let permission_events = truncate_permission_events(&permission_events); insta::with_settings!({ - snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)), + snapshot_suffix => snapshot_name("permission_events", Some(config.agent)), }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); + insta::assert_yaml_snapshot!(normalize_events(&permission_events)); }); + + if let Some(permission_id) = find_permission_id(&permission_events) { + let status = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{permission_session}/permissions/{permission_id}/reply" + ), + Some(json!({ "reply": "once" })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "reply permission"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("permission_reply", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(snapshot_status(status)); + }); + } else { + let (status, payload) = send_json( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{permission_session}/permissions/missing-permission/reply" + ), + Some(json!({ "reply": "once" })), + ) + .await; + assert!(!status.is_success(), "missing permission id should error"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("permission_reply_missing", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + } } - let question_reply_session = format!("question-reply-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &question_reply_session, "plan").await; - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{question_reply_session}/messages"), - Some(json!({ "message": QUESTION_PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt"); - - let question_events = poll_events_until_match( - &app.app, - &question_reply_session, - Duration::from_secs(120), - |events| find_question_id_and_answers(events).is_some() || should_stop(events), - ) - .await; - let question_events = truncate_question_events(&question_events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_events(&question_events)); - }); - - if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) { + if caps.questions { + let question_reply_session = format!("question-reply-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &question_reply_session, "plan").await; let status = send_status( &app.app, Method::POST, - &format!( - "/v1/sessions/{question_reply_session}/questions/{question_id}/reply" - ), - Some(json!({ "answers": answers })), + &format!("/v1/sessions/{question_reply_session}/messages"), + Some(json!({ "message": QUESTION_PROMPT })), ) .await; - assert_eq!(status, StatusCode::NO_CONTENT, "reply question"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reply", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } else { - let (status, payload) = send_json( + assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt"); + + let question_events = poll_events_until_match( &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reply_session}/questions/missing-question/reply" - ), - Some(json!({ "answers": [] })), + &question_reply_session, + Duration::from_secs(120), + |events| find_question_id_and_answers(events).is_some() || should_stop(events), ) .await; - assert!(!status.is_success(), "missing question id should error"); + let question_events = truncate_question_events(&question_events); insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)), + snapshot_suffix => snapshot_name("question_reply_events", Some(config.agent)), }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); + insta::assert_yaml_snapshot!(normalize_events(&question_events)); }); - } - let question_reject_session = format!("question-reject-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &question_reject_session, "plan").await; - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{question_reject_session}/messages"), - Some(json!({ "message": QUESTION_PROMPT })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject"); + if let Some((question_id, answers)) = find_question_id_and_answers(&question_events) { + let status = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reply_session}/questions/{question_id}/reply" + ), + Some(json!({ "answers": answers })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "reply question"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reply", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(snapshot_status(status)); + }); + } else { + let (status, payload) = send_json( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reply_session}/questions/missing-question/reply" + ), + Some(json!({ "answers": [] })), + ) + .await; + assert!(!status.is_success(), "missing question id should error"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reply_missing", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + } - let reject_events = poll_events_until_match( - &app.app, - &question_reject_session, - Duration::from_secs(120), - |events| find_question_id_and_answers(events).is_some() || should_stop(events), - ) - .await; - let reject_events = truncate_question_events(&reject_events); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(normalize_events(&reject_events)); - }); - - if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) { + let question_reject_session = format!("question-reject-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &question_reject_session, "plan").await; let status = send_status( &app.app, Method::POST, - &format!( - "/v1/sessions/{question_reject_session}/questions/{question_id}/reject" - ), - None, + &format!("/v1/sessions/{question_reject_session}/messages"), + Some(json!({ "message": QUESTION_PROMPT })), ) .await; - assert_eq!(status, StatusCode::NO_CONTENT, "reject question"); - insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reject", Some(config.agent)), - }, { - insta::assert_yaml_snapshot!(snapshot_status(status)); - }); - } else { - let (status, payload) = send_json( + assert_eq!(status, StatusCode::NO_CONTENT, "send question prompt reject"); + + let reject_events = poll_events_until_match( &app.app, - Method::POST, - &format!( - "/v1/sessions/{question_reject_session}/questions/missing-question/reject" - ), - None, + &question_reject_session, + Duration::from_secs(120), + |events| find_question_id_and_answers(events).is_some() || should_stop(events), ) .await; - assert!(!status.is_success(), "missing question id reject should error"); + let reject_events = truncate_question_events(&reject_events); insta::with_settings!({ - snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)), + snapshot_suffix => snapshot_name("question_reject_events", Some(config.agent)), }, { - insta::assert_yaml_snapshot!(json!({ - "status": status.as_u16(), - "payload": payload, - })); + insta::assert_yaml_snapshot!(normalize_events(&reject_events)); }); + + if let Some((question_id, _)) = find_question_id_and_answers(&reject_events) { + let status = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reject_session}/questions/{question_id}/reject" + ), + None, + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "reject question"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reject", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(snapshot_status(status)); + }); + } else { + let (status, payload) = send_json( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{question_reject_session}/questions/missing-question/reject" + ), + None, + ) + .await; + assert!(!status.is_success(), "missing question id reject should error"); + insta::with_settings!({ + snapshot_suffix => snapshot_name("question_reject_missing", Some(config.agent)), + }, { + insta::assert_yaml_snapshot!(json!({ + "status": status.as_u16(), + "payload": payload, + })); + }); + } } } }