fix: normalize claude system events and refresh tests

2026-04-15 07:04:48 +00:00 · 2026-01-26 20:44:58 -08:00 · 2026-01-26 20:44:58 -08:00 · c91595d338
commit c91595d338
parent fdeef51f9c
14 changed files with 99 additions and 25 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -34,6 +34,8 @@ Universal schema guidance:
 - Do not make breaking changes to API endpoints.
 - When changing API routes, ensure the HTTP/SSE test suite has full coverage of every route.
 - When agent schema changes, ensure API tests cover the new schema and event shapes end-to-end.
+- Never use synthetic data or mocked responses in tests.
+- Never manually write agent types; always use generated types in `resources/agent-schemas/`. If types are broken, fix the generated types.

 ### CLI ⇄ HTTP endpoint map (keep in sync)

--- a/README.md
+++ b/README.md
@ -3,12 +3,18 @@
 Universal API for running Claude Code, Codex, OpenCode, and Amp inside sandboxes.

 - **Any coding agent**: Universal API to interact with all agents with full feature coverage
- **Server Mode**: Run as HTTP server from any sandbox provider or as TypeScript & Python SDK
+- **Server, stdin/stdout, or SDK mode**: Run as an HTTP server, CLI using stdin/stdout, or with the SDK
 - **Universal session schema**: Universal schema to store agent transcripts
 - **Supports your sandbox provider**: Daytona, E2B, Vercel Sandboxes, and more
 - **Lightweight, portable Rust binary**: Install anywhere with 1 curl command
 - **OpenAPI spec**: Versioned API schema tracked in `sdks/openapi/openapi.json`

+Coming soon:
+
+- **Vercel AI SDK Compatibility**: Works with existing AI SDK tooling, like `useChat`
+- **Auto-configure MCP & Skills**: Auto-load MCP servers & skills for your agents
+- **Process & logs manager**: Manage processes, logs, and ports for your agents to run background processes
+
 ## Agent Support

 | Feature | [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) | [Codex](https://github.com/openai/codex) | [OpenCode](https://github.com/opencode-ai/opencode) | [Amp](https://ampcode.com) |
@ -62,13 +68,22 @@ Features out of scope:
 ## FAQ

 **Why not use PTY?**
-PTY-based approaches require parsing terminal escape sequences and dealing with interactive prompts. The agents we support all have machine-readable output modes (JSONL, HTTP APIs) that provide structured events, making integration more reliable.
+
+PTY-based approaches require parsing terminal escape sequences and dealing with interactive prompts.
+
+The agents we support all have machine-readable output modes (JSONL, HTTP APIs) that provide structured events, making integration more reliable.

 **Why not use features that already exist on sandbox provider APIs?**
-Sandbox providers focus on infrastructure (containers, VMs, networking). This project focuses specifically on coding agent orchestration—session management, HITL (human-in-the-loop) flows, and universal event schemas. These concerns are complementary.
+
+Sandbox providers focus on infrastructure (containers, VMs, networking).
+
+This project focuses specifically on coding agent orchestration: session management, HITL (human-in-the-loop) flows, and universal event schemas. These concerns are complementary.

 **Does it support [platform]?**
 The server is a single Rust binary that runs anywhere with a curl install. If your platform can run Linux binaries (Docker, VMs, etc.), it works. See the deployment guides for E2B, Daytona, Vercel Sandboxes, and Docker.

 **Can I use this with my personal API keys?**
 Yes. Use `sandbox-agent credentials extract-env` to extract API keys from your local agent configs (Claude Code, Codex, OpenCode, Amp) and pass them to the sandbox environment.
+
+**Why rust?**
+TODO
--- a/ROADMAP.md
+++ b/ROADMAP.md
@ -1,5 +1,7 @@
 ## soon

+- implement stdin/stdout
+- switch sdk to use sdtin/stdout for embedded mdoe
 - discuss actor arch in readme + give example
 - skillfile
    - specifically include the release checklist
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -12,7 +12,7 @@ importers:
        specifier: ^2.4.0
        version: 2.7.6

-  frontend/packages/web:
+  frontend/packages/inspector:
    dependencies:
      lucide-react:
        specifier: ^0.469.0
@ -43,6 +43,12 @@ importers:
        specifier: ^5.4.7
        version: 5.4.21(@types/node@22.19.7)

+  frontend/packages/website:
+    devDependencies:
+      vite:
+        specifier: ^5.4.7
+        version: 5.4.21(@types/node@22.19.7)
+
  resources/agent-schemas:
    dependencies:
      '@anthropic-ai/claude-code':
@ -68,6 +74,16 @@ importers:
        specifier: ^4.19.0
        version: 4.21.0

+  sdks/cli: {}
+
+  sdks/cli/platforms/darwin-arm64: {}
+
+  sdks/cli/platforms/darwin-x64: {}
+
+  sdks/cli/platforms/linux-x64: {}
+
+  sdks/cli/platforms/win32-x64: {}
+
  sdks/typescript:
    devDependencies:
      '@types/node':
--- a/server/packages/agent-management/src/agents.rs
+++ b/server/packages/agent-management/src/agents.rs
@ -112,11 +112,13 @@ impl AgentManager {

    pub fn install(&self, agent: AgentId, options: InstallOptions) -> Result<InstallResult, AgentError> {
        let install_path = self.binary_path(agent);
-        if install_path.exists() && !options.reinstall {
-            return Ok(InstallResult {
-                path: install_path,
-                version: self.version(agent).unwrap_or(None),
-            });
+        if !options.reinstall {
+            if let Ok(existing_path) = self.resolve_binary(agent) {
+                return Ok(InstallResult {
+                    path: existing_path,
+                    version: self.version(agent).unwrap_or(None),
+                });
+            }
        }

        fs::create_dir_all(&self.install_dir)?;
@ -135,7 +137,9 @@ impl AgentManager {
    }

    pub fn is_installed(&self, agent: AgentId) -> bool {
-        self.binary_path(agent).exists() || find_in_path(agent.binary_name()).is_some()
+        self.binary_path(agent).exists()
+            || find_in_path(agent.binary_name()).is_some()
+            || default_install_dir().join(agent.binary_name()).exists()
    }

    pub fn binary_path(&self, agent: AgentId) -> PathBuf {
@ -368,6 +372,10 @@ impl AgentManager {
        if let Some(path) = find_in_path(agent.binary_name()) {
            return Ok(path);
        }
+        let fallback = default_install_dir().join(agent.binary_name());
+        if fallback.exists() {
+            return Ok(fallback);
+        }
        Err(AgentError::BinaryNotFound { agent })
    }
 }
@ -780,6 +788,12 @@ fn find_in_path(binary_name: &str) -> Option<PathBuf> {
    None
 }

+fn default_install_dir() -> PathBuf {
+    dirs::data_dir()
+        .map(|dir| dir.join("sandbox-agent").join("bin"))
+        .unwrap_or_else(|| PathBuf::from(".").join(".sandbox-agent").join("bin"))
+}
+
 fn download_bytes(url: &Url) -> Result<Vec<u8>, AgentError> {
    let client = Client::builder().build()?;
    let mut response = client.get(url.clone()).send()?;
--- a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs
+++ b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs
@ -19,7 +19,7 @@ use tower_http::cors::CorsLayer;
 const PROMPT: &str = "Reply with exactly the single word OK.";
 const PERMISSION_PROMPT: &str = "List files in the current directory using available tools.";
 const QUESTION_PROMPT: &str =
-    "Ask the user a multiple-choice question with options yes/no using any built-in AskUserQuestion tool, then wait.";
+    "Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself.";

 struct TestApp {
    app: Router,
@ -1022,7 +1022,7 @@ async fn approval_flow_snapshots() {
        }

        let question_reply_session = format!("question-reply-{}", config.agent.as_str());
-        create_session(&app.app, config.agent, &question_reply_session, test_permission_mode(config.agent)).await;
+        create_session(&app.app, config.agent, &question_reply_session, "plan").await;
        let status = send_status(
            &app.app,
            Method::POST,
@ -1083,7 +1083,7 @@ async fn approval_flow_snapshots() {
        }

        let question_reject_session = format!("question-reject-{}", config.agent.as_str());
-        create_session(&app.app, config.agent, &question_reject_session, test_permission_mode(config.agent)).await;
+        create_session(&app.app, config.agent, &question_reject_session, "plan").await;
        let status = send_status(
            &app.app,
            Method::POST,
--- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_claude.snap
@ -1,6 +1,5 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 978
 expression: normalize_events(&permission_events)
 ---
 - agent: claude
@ -9,8 +8,10 @@ expression: normalize_events(&permission_events)
  started:
    message: session.created
 - agent: claude
-  kind: unknown
+  kind: started
  seq: 2
+  started:
+    message: system.init
 - agent: claude
  kind: message
  message:
--- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_claude.snap
@ -1,6 +1,5 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1100
 expression: normalize_events(&reject_events)
 ---
 - agent: claude
@ -9,8 +8,10 @@ expression: normalize_events(&reject_events)
  started:
    message: session.created
 - agent: claude
-  kind: unknown
+  kind: started
  seq: 2
+  started:
+    message: system.init
 - agent: claude
  kind: message
  message:
--- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_claude.snap
@ -8,8 +8,10 @@ expression: normalize_events(&question_events)
  started:
    message: session.created
 - agent: claude
-  kind: unknown
+  kind: started
  seq: 2
+  started:
+    message: system.init
 - agent: claude
  kind: message
  message:
--- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_claude.snap
@ -1,6 +1,5 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 1232
 expression: snapshot
 ---
 session_a:
@ -10,8 +9,10 @@ session_a:
    started:
      message: session.created
  - agent: claude
-    kind: unknown
+    kind: started
    seq: 2
+    started:
+      message: system.init
  - agent: claude
    kind: message
    message:
@ -27,8 +28,10 @@ session_b:
    started:
      message: session.created
  - agent: claude
-    kind: unknown
+    kind: started
    seq: 2
+    started:
+      message: system.init
  - agent: claude
    kind: message
    message:
--- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_claude.snap
@ -1,6 +1,5 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 721
 expression: normalized
 ---
 - agent: claude
@ -9,8 +8,10 @@ expression: normalized
  started:
    message: session.created
 - agent: claude
-  kind: unknown
+  kind: started
  seq: 2
+  started:
+    message: system.init
 - agent: claude
  kind: message
  message:
--- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap
+++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_claude.snap
@ -1,6 +1,5 @@
 ---
 source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
-assertion_line: 729
 expression: normalized
 ---
 - agent: claude
@ -9,8 +8,10 @@ expression: normalized
  started:
    message: session.created
 - agent: claude
-  kind: unknown
+  kind: started
  seq: 2
+  started:
+    message: system.init
 - agent: claude
  kind: message
  message:
--- a/server/packages/universal-agent-schema/src/agents/claude.rs
+++ b/server/packages/universal-agent-schema/src/agents/claude.rs
@ -7,6 +7,7 @@ use crate::{
    QuestionInfo,
    QuestionOption,
    QuestionRequest,
+    Started,
    UniversalEventData,
    UniversalMessage,
    UniversalMessageParsed,
@ -20,6 +21,7 @@ pub fn event_to_universal_with_session(
 ) -> EventConversion {
    let event_type = event.get("type").and_then(Value::as_str).unwrap_or("");
    match event_type {
+        "system" => system_event_to_universal(event),
        "assistant" => assistant_event_to_universal(event),
        "tool_use" => tool_use_event_to_universal(event, session_id),
        "tool_result" => tool_result_event_to_universal(event),
@ -114,6 +116,18 @@ fn assistant_event_to_universal(event: &Value) -> EventConversion {
    EventConversion::new(UniversalEventData::Message { message })
 }

+fn system_event_to_universal(event: &Value) -> EventConversion {
+    let subtype = event
+        .get("subtype")
+        .and_then(Value::as_str)
+        .unwrap_or("system");
+    let started = Started {
+        message: Some(format!("system.{subtype}")),
+        details: Some(event.clone()),
+    };
+    EventConversion::new(UniversalEventData::Started { started })
+}
+
 fn tool_use_event_to_universal(event: &Value, session_id: String) -> EventConversion {
    let tool_use = event.get("tool_use");
    let name = tool_use
--- a/todo.md
+++ b/todo.md
@ -8,6 +8,8 @@
 - [x] Implement 2-way converters:
  - [x] Universal input message <-> agent-specific input
  - [x] Universal event <-> agent-specific event
+- [x] Normalize Claude system/init events into universal started events
+- [x] Support Codex CLI type-based event format in universal converter
 - [x] Enforce agentMode vs permissionMode semantics + defaults at the API boundary
 - [x] Ensure session id vs agentSessionId semantics are respected and surfaced consistently