From c91595d338112357b8d17b4d9e51e9c7af9b0c5b Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Mon, 26 Jan 2026 20:44:58 -0800 Subject: [PATCH] fix: normalize claude system events and refresh tests --- CLAUDE.md | 2 ++ README.md | 21 ++++++++++++--- ROADMAP.md | 2 ++ pnpm-lock.yaml | 18 ++++++++++++- .../packages/agent-management/src/agents.rs | 26 ++++++++++++++----- .../sandbox-agent/tests/http_sse_snapshots.rs | 6 ++--- ...ow_snapshots@permission_events_claude.snap | 5 ++-- ...apshots@question_reject_events_claude.snap | 5 ++-- ...napshots@question_reply_events_claude.snap | 4 ++- ...cy_snapshot@concurrency_events_claude.snap | 9 ++++--- ...tp_events_snapshot@http_events_claude.snap | 5 ++-- ...sse_events_snapshot@sse_events_claude.snap | 5 ++-- .../src/agents/claude.rs | 14 ++++++++++ todo.md | 2 ++ 14 files changed, 99 insertions(+), 25 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 172221a..47e5162 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -34,6 +34,8 @@ Universal schema guidance: - Do not make breaking changes to API endpoints. - When changing API routes, ensure the HTTP/SSE test suite has full coverage of every route. - When agent schema changes, ensure API tests cover the new schema and event shapes end-to-end. +- Never use synthetic data or mocked responses in tests. +- Never manually write agent types; always use generated types in `resources/agent-schemas/`. If types are broken, fix the generated types. ### CLI ⇄ HTTP endpoint map (keep in sync) diff --git a/README.md b/README.md index 3d2810d..be7db10 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,18 @@ Universal API for running Claude Code, Codex, OpenCode, and Amp inside sandboxes. - **Any coding agent**: Universal API to interact with all agents with full feature coverage -- **Server Mode**: Run as HTTP server from any sandbox provider or as TypeScript & Python SDK +- **Server, stdin/stdout, or SDK mode**: Run as an HTTP server, CLI using stdin/stdout, or with the SDK - **Universal session schema**: Universal schema to store agent transcripts - **Supports your sandbox provider**: Daytona, E2B, Vercel Sandboxes, and more - **Lightweight, portable Rust binary**: Install anywhere with 1 curl command - **OpenAPI spec**: Versioned API schema tracked in `sdks/openapi/openapi.json` +Coming soon: + +- **Vercel AI SDK Compatibility**: Works with existing AI SDK tooling, like `useChat` +- **Auto-configure MCP & Skills**: Auto-load MCP servers & skills for your agents +- **Process & logs manager**: Manage processes, logs, and ports for your agents to run background processes + ## Agent Support | Feature | [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) | [Codex](https://github.com/openai/codex) | [OpenCode](https://github.com/opencode-ai/opencode) | [Amp](https://ampcode.com) | @@ -62,13 +68,22 @@ Features out of scope: ## FAQ **Why not use PTY?** -PTY-based approaches require parsing terminal escape sequences and dealing with interactive prompts. The agents we support all have machine-readable output modes (JSONL, HTTP APIs) that provide structured events, making integration more reliable. + +PTY-based approaches require parsing terminal escape sequences and dealing with interactive prompts. + +The agents we support all have machine-readable output modes (JSONL, HTTP APIs) that provide structured events, making integration more reliable. **Why not use features that already exist on sandbox provider APIs?** -Sandbox providers focus on infrastructure (containers, VMs, networking). This project focuses specifically on coding agent orchestration—session management, HITL (human-in-the-loop) flows, and universal event schemas. These concerns are complementary. + +Sandbox providers focus on infrastructure (containers, VMs, networking). + +This project focuses specifically on coding agent orchestration: session management, HITL (human-in-the-loop) flows, and universal event schemas. These concerns are complementary. **Does it support [platform]?** The server is a single Rust binary that runs anywhere with a curl install. If your platform can run Linux binaries (Docker, VMs, etc.), it works. See the deployment guides for E2B, Daytona, Vercel Sandboxes, and Docker. **Can I use this with my personal API keys?** Yes. Use `sandbox-agent credentials extract-env` to extract API keys from your local agent configs (Claude Code, Codex, OpenCode, Amp) and pass them to the sandbox environment. + +**Why rust?** +TODO diff --git a/ROADMAP.md b/ROADMAP.md index 71521bd..759898e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,5 +1,7 @@ ## soon +- implement stdin/stdout +- switch sdk to use sdtin/stdout for embedded mdoe - discuss actor arch in readme + give example - skillfile - specifically include the release checklist diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 8592209..717e0f3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -12,7 +12,7 @@ importers: specifier: ^2.4.0 version: 2.7.6 - frontend/packages/web: + frontend/packages/inspector: dependencies: lucide-react: specifier: ^0.469.0 @@ -43,6 +43,12 @@ importers: specifier: ^5.4.7 version: 5.4.21(@types/node@22.19.7) + frontend/packages/website: + devDependencies: + vite: + specifier: ^5.4.7 + version: 5.4.21(@types/node@22.19.7) + resources/agent-schemas: dependencies: '@anthropic-ai/claude-code': @@ -68,6 +74,16 @@ importers: specifier: ^4.19.0 version: 4.21.0 + sdks/cli: {} + + sdks/cli/platforms/darwin-arm64: {} + + sdks/cli/platforms/darwin-x64: {} + + sdks/cli/platforms/linux-x64: {} + + sdks/cli/platforms/win32-x64: {} + sdks/typescript: devDependencies: '@types/node': diff --git a/server/packages/agent-management/src/agents.rs b/server/packages/agent-management/src/agents.rs index 9c68aba..5a74632 100644 --- a/server/packages/agent-management/src/agents.rs +++ b/server/packages/agent-management/src/agents.rs @@ -112,11 +112,13 @@ impl AgentManager { pub fn install(&self, agent: AgentId, options: InstallOptions) -> Result { let install_path = self.binary_path(agent); - if install_path.exists() && !options.reinstall { - return Ok(InstallResult { - path: install_path, - version: self.version(agent).unwrap_or(None), - }); + if !options.reinstall { + if let Ok(existing_path) = self.resolve_binary(agent) { + return Ok(InstallResult { + path: existing_path, + version: self.version(agent).unwrap_or(None), + }); + } } fs::create_dir_all(&self.install_dir)?; @@ -135,7 +137,9 @@ impl AgentManager { } pub fn is_installed(&self, agent: AgentId) -> bool { - self.binary_path(agent).exists() || find_in_path(agent.binary_name()).is_some() + self.binary_path(agent).exists() + || find_in_path(agent.binary_name()).is_some() + || default_install_dir().join(agent.binary_name()).exists() } pub fn binary_path(&self, agent: AgentId) -> PathBuf { @@ -368,6 +372,10 @@ impl AgentManager { if let Some(path) = find_in_path(agent.binary_name()) { return Ok(path); } + let fallback = default_install_dir().join(agent.binary_name()); + if fallback.exists() { + return Ok(fallback); + } Err(AgentError::BinaryNotFound { agent }) } } @@ -780,6 +788,12 @@ fn find_in_path(binary_name: &str) -> Option { None } +fn default_install_dir() -> PathBuf { + dirs::data_dir() + .map(|dir| dir.join("sandbox-agent").join("bin")) + .unwrap_or_else(|| PathBuf::from(".").join(".sandbox-agent").join("bin")) +} + fn download_bytes(url: &Url) -> Result, AgentError> { let client = Client::builder().build()?; let mut response = client.get(url.clone()).send()?; diff --git a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs index e8b8e1d..3294305 100644 --- a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs +++ b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs @@ -19,7 +19,7 @@ use tower_http::cors::CorsLayer; const PROMPT: &str = "Reply with exactly the single word OK."; const PERMISSION_PROMPT: &str = "List files in the current directory using available tools."; const QUESTION_PROMPT: &str = - "Ask the user a multiple-choice question with options yes/no using any built-in AskUserQuestion tool, then wait."; + "Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself."; struct TestApp { app: Router, @@ -1022,7 +1022,7 @@ async fn approval_flow_snapshots() { } let question_reply_session = format!("question-reply-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &question_reply_session, test_permission_mode(config.agent)).await; + create_session(&app.app, config.agent, &question_reply_session, "plan").await; let status = send_status( &app.app, Method::POST, @@ -1083,7 +1083,7 @@ async fn approval_flow_snapshots() { } let question_reject_session = format!("question-reject-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &question_reject_session, test_permission_mode(config.agent)).await; + create_session(&app.app, config.agent, &question_reject_session, "plan").await; let status = send_status( &app.app, Method::POST, diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap index e6e25c0..6f1749c 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap @@ -1,6 +1,5 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 978 expression: normalize_events(&permission_events) --- - agent: claude @@ -9,8 +8,10 @@ expression: normalize_events(&permission_events) started: message: session.created - agent: claude - kind: unknown + kind: started seq: 2 + started: + message: system.init - agent: claude kind: message message: diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap index 159ca8d..57d467f 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap @@ -1,6 +1,5 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1100 expression: normalize_events(&reject_events) --- - agent: claude @@ -9,8 +8,10 @@ expression: normalize_events(&reject_events) started: message: session.created - agent: claude - kind: unknown + kind: started seq: 2 + started: + message: system.init - agent: claude kind: message message: diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap index 99f687c..35076ce 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap @@ -8,8 +8,10 @@ expression: normalize_events(&question_events) started: message: session.created - agent: claude - kind: unknown + kind: started seq: 2 + started: + message: system.init - agent: claude kind: message message: diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap index 012d604..1951476 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap @@ -1,6 +1,5 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1232 expression: snapshot --- session_a: @@ -10,8 +9,10 @@ session_a: started: message: session.created - agent: claude - kind: unknown + kind: started seq: 2 + started: + message: system.init - agent: claude kind: message message: @@ -27,8 +28,10 @@ session_b: started: message: session.created - agent: claude - kind: unknown + kind: started seq: 2 + started: + message: system.init - agent: claude kind: message message: diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap index ff7d53d..310840e 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap @@ -1,6 +1,5 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 721 expression: normalized --- - agent: claude @@ -9,8 +8,10 @@ expression: normalized started: message: session.created - agent: claude - kind: unknown + kind: started seq: 2 + started: + message: system.init - agent: claude kind: message message: diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap index e8d3a35..310840e 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap @@ -1,6 +1,5 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 729 expression: normalized --- - agent: claude @@ -9,8 +8,10 @@ expression: normalized started: message: session.created - agent: claude - kind: unknown + kind: started seq: 2 + started: + message: system.init - agent: claude kind: message message: diff --git a/server/packages/universal-agent-schema/src/agents/claude.rs b/server/packages/universal-agent-schema/src/agents/claude.rs index 13fa2da..f147c47 100644 --- a/server/packages/universal-agent-schema/src/agents/claude.rs +++ b/server/packages/universal-agent-schema/src/agents/claude.rs @@ -7,6 +7,7 @@ use crate::{ QuestionInfo, QuestionOption, QuestionRequest, + Started, UniversalEventData, UniversalMessage, UniversalMessageParsed, @@ -20,6 +21,7 @@ pub fn event_to_universal_with_session( ) -> EventConversion { let event_type = event.get("type").and_then(Value::as_str).unwrap_or(""); match event_type { + "system" => system_event_to_universal(event), "assistant" => assistant_event_to_universal(event), "tool_use" => tool_use_event_to_universal(event, session_id), "tool_result" => tool_result_event_to_universal(event), @@ -114,6 +116,18 @@ fn assistant_event_to_universal(event: &Value) -> EventConversion { EventConversion::new(UniversalEventData::Message { message }) } +fn system_event_to_universal(event: &Value) -> EventConversion { + let subtype = event + .get("subtype") + .and_then(Value::as_str) + .unwrap_or("system"); + let started = Started { + message: Some(format!("system.{subtype}")), + details: Some(event.clone()), + }; + EventConversion::new(UniversalEventData::Started { started }) +} + fn tool_use_event_to_universal(event: &Value, session_id: String) -> EventConversion { let tool_use = event.get("tool_use"); let name = tool_use diff --git a/todo.md b/todo.md index 2202d51..60936f5 100644 --- a/todo.md +++ b/todo.md @@ -8,6 +8,8 @@ - [x] Implement 2-way converters: - [x] Universal input message <-> agent-specific input - [x] Universal event <-> agent-specific event +- [x] Normalize Claude system/init events into universal started events +- [x] Support Codex CLI type-based event format in universal converter - [x] Enforce agentMode vs permissionMode semantics + defaults at the API boundary - [x] Ensure session id vs agentSessionId semantics are respected and surfaced consistently