fix: normalize claude system events and refresh tests

This commit is contained in:
Nathan Flurry 2026-01-26 20:44:58 -08:00
parent fdeef51f9c
commit c91595d338
14 changed files with 99 additions and 25 deletions

View file

@ -34,6 +34,8 @@ Universal schema guidance:
- Do not make breaking changes to API endpoints.
- When changing API routes, ensure the HTTP/SSE test suite has full coverage of every route.
- When agent schema changes, ensure API tests cover the new schema and event shapes end-to-end.
- Never use synthetic data or mocked responses in tests.
- Never manually write agent types; always use generated types in `resources/agent-schemas/`. If types are broken, fix the generated types.
### CLI ⇄ HTTP endpoint map (keep in sync)

View file

@ -3,12 +3,18 @@
Universal API for running Claude Code, Codex, OpenCode, and Amp inside sandboxes.
- **Any coding agent**: Universal API to interact with all agents with full feature coverage
- **Server Mode**: Run as HTTP server from any sandbox provider or as TypeScript & Python SDK
- **Server, stdin/stdout, or SDK mode**: Run as an HTTP server, CLI using stdin/stdout, or with the SDK
- **Universal session schema**: Universal schema to store agent transcripts
- **Supports your sandbox provider**: Daytona, E2B, Vercel Sandboxes, and more
- **Lightweight, portable Rust binary**: Install anywhere with 1 curl command
- **OpenAPI spec**: Versioned API schema tracked in `sdks/openapi/openapi.json`
Coming soon:
- **Vercel AI SDK Compatibility**: Works with existing AI SDK tooling, like `useChat`
- **Auto-configure MCP & Skills**: Auto-load MCP servers & skills for your agents
- **Process & logs manager**: Manage processes, logs, and ports for your agents to run background processes
## Agent Support
| Feature | [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) | [Codex](https://github.com/openai/codex) | [OpenCode](https://github.com/opencode-ai/opencode) | [Amp](https://ampcode.com) |
@ -62,13 +68,22 @@ Features out of scope:
## FAQ
**Why not use PTY?**
PTY-based approaches require parsing terminal escape sequences and dealing with interactive prompts. The agents we support all have machine-readable output modes (JSONL, HTTP APIs) that provide structured events, making integration more reliable.
PTY-based approaches require parsing terminal escape sequences and dealing with interactive prompts.
The agents we support all have machine-readable output modes (JSONL, HTTP APIs) that provide structured events, making integration more reliable.
**Why not use features that already exist on sandbox provider APIs?**
Sandbox providers focus on infrastructure (containers, VMs, networking). This project focuses specifically on coding agent orchestration—session management, HITL (human-in-the-loop) flows, and universal event schemas. These concerns are complementary.
Sandbox providers focus on infrastructure (containers, VMs, networking).
This project focuses specifically on coding agent orchestration: session management, HITL (human-in-the-loop) flows, and universal event schemas. These concerns are complementary.
**Does it support [platform]?**
The server is a single Rust binary that runs anywhere with a curl install. If your platform can run Linux binaries (Docker, VMs, etc.), it works. See the deployment guides for E2B, Daytona, Vercel Sandboxes, and Docker.
**Can I use this with my personal API keys?**
Yes. Use `sandbox-agent credentials extract-env` to extract API keys from your local agent configs (Claude Code, Codex, OpenCode, Amp) and pass them to the sandbox environment.
**Why rust?**
TODO

View file

@ -1,5 +1,7 @@
## soon
- implement stdin/stdout
- switch sdk to use sdtin/stdout for embedded mdoe
- discuss actor arch in readme + give example
- skillfile
- specifically include the release checklist

18
pnpm-lock.yaml generated
View file

@ -12,7 +12,7 @@ importers:
specifier: ^2.4.0
version: 2.7.6
frontend/packages/web:
frontend/packages/inspector:
dependencies:
lucide-react:
specifier: ^0.469.0
@ -43,6 +43,12 @@ importers:
specifier: ^5.4.7
version: 5.4.21(@types/node@22.19.7)
frontend/packages/website:
devDependencies:
vite:
specifier: ^5.4.7
version: 5.4.21(@types/node@22.19.7)
resources/agent-schemas:
dependencies:
'@anthropic-ai/claude-code':
@ -68,6 +74,16 @@ importers:
specifier: ^4.19.0
version: 4.21.0
sdks/cli: {}
sdks/cli/platforms/darwin-arm64: {}
sdks/cli/platforms/darwin-x64: {}
sdks/cli/platforms/linux-x64: {}
sdks/cli/platforms/win32-x64: {}
sdks/typescript:
devDependencies:
'@types/node':

View file

@ -112,11 +112,13 @@ impl AgentManager {
pub fn install(&self, agent: AgentId, options: InstallOptions) -> Result<InstallResult, AgentError> {
let install_path = self.binary_path(agent);
if install_path.exists() && !options.reinstall {
return Ok(InstallResult {
path: install_path,
version: self.version(agent).unwrap_or(None),
});
if !options.reinstall {
if let Ok(existing_path) = self.resolve_binary(agent) {
return Ok(InstallResult {
path: existing_path,
version: self.version(agent).unwrap_or(None),
});
}
}
fs::create_dir_all(&self.install_dir)?;
@ -135,7 +137,9 @@ impl AgentManager {
}
pub fn is_installed(&self, agent: AgentId) -> bool {
self.binary_path(agent).exists() || find_in_path(agent.binary_name()).is_some()
self.binary_path(agent).exists()
|| find_in_path(agent.binary_name()).is_some()
|| default_install_dir().join(agent.binary_name()).exists()
}
pub fn binary_path(&self, agent: AgentId) -> PathBuf {
@ -368,6 +372,10 @@ impl AgentManager {
if let Some(path) = find_in_path(agent.binary_name()) {
return Ok(path);
}
let fallback = default_install_dir().join(agent.binary_name());
if fallback.exists() {
return Ok(fallback);
}
Err(AgentError::BinaryNotFound { agent })
}
}
@ -780,6 +788,12 @@ fn find_in_path(binary_name: &str) -> Option<PathBuf> {
None
}
fn default_install_dir() -> PathBuf {
dirs::data_dir()
.map(|dir| dir.join("sandbox-agent").join("bin"))
.unwrap_or_else(|| PathBuf::from(".").join(".sandbox-agent").join("bin"))
}
fn download_bytes(url: &Url) -> Result<Vec<u8>, AgentError> {
let client = Client::builder().build()?;
let mut response = client.get(url.clone()).send()?;

View file

@ -19,7 +19,7 @@ use tower_http::cors::CorsLayer;
const PROMPT: &str = "Reply with exactly the single word OK.";
const PERMISSION_PROMPT: &str = "List files in the current directory using available tools.";
const QUESTION_PROMPT: &str =
"Ask the user a multiple-choice question with options yes/no using any built-in AskUserQuestion tool, then wait.";
"Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself.";
struct TestApp {
app: Router,
@ -1022,7 +1022,7 @@ async fn approval_flow_snapshots() {
}
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reply_session, test_permission_mode(config.agent)).await;
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
let status = send_status(
&app.app,
Method::POST,
@ -1083,7 +1083,7 @@ async fn approval_flow_snapshots() {
}
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
create_session(&app.app, config.agent, &question_reject_session, test_permission_mode(config.agent)).await;
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
let status = send_status(
&app.app,
Method::POST,

View file

@ -1,6 +1,5 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 978
expression: normalize_events(&permission_events)
---
- agent: claude
@ -9,8 +8,10 @@ expression: normalize_events(&permission_events)
started:
message: session.created
- agent: claude
kind: unknown
kind: started
seq: 2
started:
message: system.init
- agent: claude
kind: message
message:

View file

@ -1,6 +1,5 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1100
expression: normalize_events(&reject_events)
---
- agent: claude
@ -9,8 +8,10 @@ expression: normalize_events(&reject_events)
started:
message: session.created
- agent: claude
kind: unknown
kind: started
seq: 2
started:
message: system.init
- agent: claude
kind: message
message:

View file

@ -8,8 +8,10 @@ expression: normalize_events(&question_events)
started:
message: session.created
- agent: claude
kind: unknown
kind: started
seq: 2
started:
message: system.init
- agent: claude
kind: message
message:

View file

@ -1,6 +1,5 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 1232
expression: snapshot
---
session_a:
@ -10,8 +9,10 @@ session_a:
started:
message: session.created
- agent: claude
kind: unknown
kind: started
seq: 2
started:
message: system.init
- agent: claude
kind: message
message:
@ -27,8 +28,10 @@ session_b:
started:
message: session.created
- agent: claude
kind: unknown
kind: started
seq: 2
started:
message: system.init
- agent: claude
kind: message
message:

View file

@ -1,6 +1,5 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 721
expression: normalized
---
- agent: claude
@ -9,8 +8,10 @@ expression: normalized
started:
message: session.created
- agent: claude
kind: unknown
kind: started
seq: 2
started:
message: system.init
- agent: claude
kind: message
message:

View file

@ -1,6 +1,5 @@
---
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
assertion_line: 729
expression: normalized
---
- agent: claude
@ -9,8 +8,10 @@ expression: normalized
started:
message: session.created
- agent: claude
kind: unknown
kind: started
seq: 2
started:
message: system.init
- agent: claude
kind: message
message:

View file

@ -7,6 +7,7 @@ use crate::{
QuestionInfo,
QuestionOption,
QuestionRequest,
Started,
UniversalEventData,
UniversalMessage,
UniversalMessageParsed,
@ -20,6 +21,7 @@ pub fn event_to_universal_with_session(
) -> EventConversion {
let event_type = event.get("type").and_then(Value::as_str).unwrap_or("");
match event_type {
"system" => system_event_to_universal(event),
"assistant" => assistant_event_to_universal(event),
"tool_use" => tool_use_event_to_universal(event, session_id),
"tool_result" => tool_result_event_to_universal(event),
@ -114,6 +116,18 @@ fn assistant_event_to_universal(event: &Value) -> EventConversion {
EventConversion::new(UniversalEventData::Message { message })
}
fn system_event_to_universal(event: &Value) -> EventConversion {
let subtype = event
.get("subtype")
.and_then(Value::as_str)
.unwrap_or("system");
let started = Started {
message: Some(format!("system.{subtype}")),
details: Some(event.clone()),
};
EventConversion::new(UniversalEventData::Started { started })
}
fn tool_use_event_to_universal(event: &Value, session_id: String) -> EventConversion {
let tool_use = event.get("tool_use");
let name = tool_use

View file

@ -8,6 +8,8 @@
- [x] Implement 2-way converters:
- [x] Universal input message <-> agent-specific input
- [x] Universal event <-> agent-specific event
- [x] Normalize Claude system/init events into universal started events
- [x] Support Codex CLI type-based event format in universal converter
- [x] Enforce agentMode vs permissionMode semantics + defaults at the API boundary
- [x] Ensure session id vs agentSessionId semantics are respected and surfaced consistently