mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 07:04:48 +00:00
fix: normalize claude system events and refresh tests
This commit is contained in:
parent
fdeef51f9c
commit
c91595d338
14 changed files with 99 additions and 25 deletions
|
|
@ -34,6 +34,8 @@ Universal schema guidance:
|
|||
- Do not make breaking changes to API endpoints.
|
||||
- When changing API routes, ensure the HTTP/SSE test suite has full coverage of every route.
|
||||
- When agent schema changes, ensure API tests cover the new schema and event shapes end-to-end.
|
||||
- Never use synthetic data or mocked responses in tests.
|
||||
- Never manually write agent types; always use generated types in `resources/agent-schemas/`. If types are broken, fix the generated types.
|
||||
|
||||
### CLI ⇄ HTTP endpoint map (keep in sync)
|
||||
|
||||
|
|
|
|||
21
README.md
21
README.md
|
|
@ -3,12 +3,18 @@
|
|||
Universal API for running Claude Code, Codex, OpenCode, and Amp inside sandboxes.
|
||||
|
||||
- **Any coding agent**: Universal API to interact with all agents with full feature coverage
|
||||
- **Server Mode**: Run as HTTP server from any sandbox provider or as TypeScript & Python SDK
|
||||
- **Server, stdin/stdout, or SDK mode**: Run as an HTTP server, CLI using stdin/stdout, or with the SDK
|
||||
- **Universal session schema**: Universal schema to store agent transcripts
|
||||
- **Supports your sandbox provider**: Daytona, E2B, Vercel Sandboxes, and more
|
||||
- **Lightweight, portable Rust binary**: Install anywhere with 1 curl command
|
||||
- **OpenAPI spec**: Versioned API schema tracked in `sdks/openapi/openapi.json`
|
||||
|
||||
Coming soon:
|
||||
|
||||
- **Vercel AI SDK Compatibility**: Works with existing AI SDK tooling, like `useChat`
|
||||
- **Auto-configure MCP & Skills**: Auto-load MCP servers & skills for your agents
|
||||
- **Process & logs manager**: Manage processes, logs, and ports for your agents to run background processes
|
||||
|
||||
## Agent Support
|
||||
|
||||
| Feature | [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) | [Codex](https://github.com/openai/codex) | [OpenCode](https://github.com/opencode-ai/opencode) | [Amp](https://ampcode.com) |
|
||||
|
|
@ -62,13 +68,22 @@ Features out of scope:
|
|||
## FAQ
|
||||
|
||||
**Why not use PTY?**
|
||||
PTY-based approaches require parsing terminal escape sequences and dealing with interactive prompts. The agents we support all have machine-readable output modes (JSONL, HTTP APIs) that provide structured events, making integration more reliable.
|
||||
|
||||
PTY-based approaches require parsing terminal escape sequences and dealing with interactive prompts.
|
||||
|
||||
The agents we support all have machine-readable output modes (JSONL, HTTP APIs) that provide structured events, making integration more reliable.
|
||||
|
||||
**Why not use features that already exist on sandbox provider APIs?**
|
||||
Sandbox providers focus on infrastructure (containers, VMs, networking). This project focuses specifically on coding agent orchestration—session management, HITL (human-in-the-loop) flows, and universal event schemas. These concerns are complementary.
|
||||
|
||||
Sandbox providers focus on infrastructure (containers, VMs, networking).
|
||||
|
||||
This project focuses specifically on coding agent orchestration: session management, HITL (human-in-the-loop) flows, and universal event schemas. These concerns are complementary.
|
||||
|
||||
**Does it support [platform]?**
|
||||
The server is a single Rust binary that runs anywhere with a curl install. If your platform can run Linux binaries (Docker, VMs, etc.), it works. See the deployment guides for E2B, Daytona, Vercel Sandboxes, and Docker.
|
||||
|
||||
**Can I use this with my personal API keys?**
|
||||
Yes. Use `sandbox-agent credentials extract-env` to extract API keys from your local agent configs (Claude Code, Codex, OpenCode, Amp) and pass them to the sandbox environment.
|
||||
|
||||
**Why rust?**
|
||||
TODO
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
## soon
|
||||
|
||||
- implement stdin/stdout
|
||||
- switch sdk to use sdtin/stdout for embedded mdoe
|
||||
- discuss actor arch in readme + give example
|
||||
- skillfile
|
||||
- specifically include the release checklist
|
||||
|
|
|
|||
18
pnpm-lock.yaml
generated
18
pnpm-lock.yaml
generated
|
|
@ -12,7 +12,7 @@ importers:
|
|||
specifier: ^2.4.0
|
||||
version: 2.7.6
|
||||
|
||||
frontend/packages/web:
|
||||
frontend/packages/inspector:
|
||||
dependencies:
|
||||
lucide-react:
|
||||
specifier: ^0.469.0
|
||||
|
|
@ -43,6 +43,12 @@ importers:
|
|||
specifier: ^5.4.7
|
||||
version: 5.4.21(@types/node@22.19.7)
|
||||
|
||||
frontend/packages/website:
|
||||
devDependencies:
|
||||
vite:
|
||||
specifier: ^5.4.7
|
||||
version: 5.4.21(@types/node@22.19.7)
|
||||
|
||||
resources/agent-schemas:
|
||||
dependencies:
|
||||
'@anthropic-ai/claude-code':
|
||||
|
|
@ -68,6 +74,16 @@ importers:
|
|||
specifier: ^4.19.0
|
||||
version: 4.21.0
|
||||
|
||||
sdks/cli: {}
|
||||
|
||||
sdks/cli/platforms/darwin-arm64: {}
|
||||
|
||||
sdks/cli/platforms/darwin-x64: {}
|
||||
|
||||
sdks/cli/platforms/linux-x64: {}
|
||||
|
||||
sdks/cli/platforms/win32-x64: {}
|
||||
|
||||
sdks/typescript:
|
||||
devDependencies:
|
||||
'@types/node':
|
||||
|
|
|
|||
|
|
@ -112,11 +112,13 @@ impl AgentManager {
|
|||
|
||||
pub fn install(&self, agent: AgentId, options: InstallOptions) -> Result<InstallResult, AgentError> {
|
||||
let install_path = self.binary_path(agent);
|
||||
if install_path.exists() && !options.reinstall {
|
||||
return Ok(InstallResult {
|
||||
path: install_path,
|
||||
version: self.version(agent).unwrap_or(None),
|
||||
});
|
||||
if !options.reinstall {
|
||||
if let Ok(existing_path) = self.resolve_binary(agent) {
|
||||
return Ok(InstallResult {
|
||||
path: existing_path,
|
||||
version: self.version(agent).unwrap_or(None),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fs::create_dir_all(&self.install_dir)?;
|
||||
|
|
@ -135,7 +137,9 @@ impl AgentManager {
|
|||
}
|
||||
|
||||
pub fn is_installed(&self, agent: AgentId) -> bool {
|
||||
self.binary_path(agent).exists() || find_in_path(agent.binary_name()).is_some()
|
||||
self.binary_path(agent).exists()
|
||||
|| find_in_path(agent.binary_name()).is_some()
|
||||
|| default_install_dir().join(agent.binary_name()).exists()
|
||||
}
|
||||
|
||||
pub fn binary_path(&self, agent: AgentId) -> PathBuf {
|
||||
|
|
@ -368,6 +372,10 @@ impl AgentManager {
|
|||
if let Some(path) = find_in_path(agent.binary_name()) {
|
||||
return Ok(path);
|
||||
}
|
||||
let fallback = default_install_dir().join(agent.binary_name());
|
||||
if fallback.exists() {
|
||||
return Ok(fallback);
|
||||
}
|
||||
Err(AgentError::BinaryNotFound { agent })
|
||||
}
|
||||
}
|
||||
|
|
@ -780,6 +788,12 @@ fn find_in_path(binary_name: &str) -> Option<PathBuf> {
|
|||
None
|
||||
}
|
||||
|
||||
fn default_install_dir() -> PathBuf {
|
||||
dirs::data_dir()
|
||||
.map(|dir| dir.join("sandbox-agent").join("bin"))
|
||||
.unwrap_or_else(|| PathBuf::from(".").join(".sandbox-agent").join("bin"))
|
||||
}
|
||||
|
||||
fn download_bytes(url: &Url) -> Result<Vec<u8>, AgentError> {
|
||||
let client = Client::builder().build()?;
|
||||
let mut response = client.get(url.clone()).send()?;
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ use tower_http::cors::CorsLayer;
|
|||
const PROMPT: &str = "Reply with exactly the single word OK.";
|
||||
const PERMISSION_PROMPT: &str = "List files in the current directory using available tools.";
|
||||
const QUESTION_PROMPT: &str =
|
||||
"Ask the user a multiple-choice question with options yes/no using any built-in AskUserQuestion tool, then wait.";
|
||||
"Use the AskUserQuestion tool to ask exactly one yes/no question, then wait for a reply. Do not answer yourself.";
|
||||
|
||||
struct TestApp {
|
||||
app: Router,
|
||||
|
|
@ -1022,7 +1022,7 @@ async fn approval_flow_snapshots() {
|
|||
}
|
||||
|
||||
let question_reply_session = format!("question-reply-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &question_reply_session, test_permission_mode(config.agent)).await;
|
||||
create_session(&app.app, config.agent, &question_reply_session, "plan").await;
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
|
|
@ -1083,7 +1083,7 @@ async fn approval_flow_snapshots() {
|
|||
}
|
||||
|
||||
let question_reject_session = format!("question-reject-{}", config.agent.as_str());
|
||||
create_session(&app.app, config.agent, &question_reject_session, test_permission_mode(config.agent)).await;
|
||||
create_session(&app.app, config.agent, &question_reject_session, "plan").await;
|
||||
let status = send_status(
|
||||
&app.app,
|
||||
Method::POST,
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 978
|
||||
expression: normalize_events(&permission_events)
|
||||
---
|
||||
- agent: claude
|
||||
|
|
@ -9,8 +8,10 @@ expression: normalize_events(&permission_events)
|
|||
started:
|
||||
message: session.created
|
||||
- agent: claude
|
||||
kind: unknown
|
||||
kind: started
|
||||
seq: 2
|
||||
started:
|
||||
message: system.init
|
||||
- agent: claude
|
||||
kind: message
|
||||
message:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 1100
|
||||
expression: normalize_events(&reject_events)
|
||||
---
|
||||
- agent: claude
|
||||
|
|
@ -9,8 +8,10 @@ expression: normalize_events(&reject_events)
|
|||
started:
|
||||
message: session.created
|
||||
- agent: claude
|
||||
kind: unknown
|
||||
kind: started
|
||||
seq: 2
|
||||
started:
|
||||
message: system.init
|
||||
- agent: claude
|
||||
kind: message
|
||||
message:
|
||||
|
|
|
|||
|
|
@ -8,8 +8,10 @@ expression: normalize_events(&question_events)
|
|||
started:
|
||||
message: session.created
|
||||
- agent: claude
|
||||
kind: unknown
|
||||
kind: started
|
||||
seq: 2
|
||||
started:
|
||||
message: system.init
|
||||
- agent: claude
|
||||
kind: message
|
||||
message:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 1232
|
||||
expression: snapshot
|
||||
---
|
||||
session_a:
|
||||
|
|
@ -10,8 +9,10 @@ session_a:
|
|||
started:
|
||||
message: session.created
|
||||
- agent: claude
|
||||
kind: unknown
|
||||
kind: started
|
||||
seq: 2
|
||||
started:
|
||||
message: system.init
|
||||
- agent: claude
|
||||
kind: message
|
||||
message:
|
||||
|
|
@ -27,8 +28,10 @@ session_b:
|
|||
started:
|
||||
message: session.created
|
||||
- agent: claude
|
||||
kind: unknown
|
||||
kind: started
|
||||
seq: 2
|
||||
started:
|
||||
message: system.init
|
||||
- agent: claude
|
||||
kind: message
|
||||
message:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 721
|
||||
expression: normalized
|
||||
---
|
||||
- agent: claude
|
||||
|
|
@ -9,8 +8,10 @@ expression: normalized
|
|||
started:
|
||||
message: session.created
|
||||
- agent: claude
|
||||
kind: unknown
|
||||
kind: started
|
||||
seq: 2
|
||||
started:
|
||||
message: system.init
|
||||
- agent: claude
|
||||
kind: message
|
||||
message:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
---
|
||||
source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs
|
||||
assertion_line: 729
|
||||
expression: normalized
|
||||
---
|
||||
- agent: claude
|
||||
|
|
@ -9,8 +8,10 @@ expression: normalized
|
|||
started:
|
||||
message: session.created
|
||||
- agent: claude
|
||||
kind: unknown
|
||||
kind: started
|
||||
seq: 2
|
||||
started:
|
||||
message: system.init
|
||||
- agent: claude
|
||||
kind: message
|
||||
message:
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use crate::{
|
|||
QuestionInfo,
|
||||
QuestionOption,
|
||||
QuestionRequest,
|
||||
Started,
|
||||
UniversalEventData,
|
||||
UniversalMessage,
|
||||
UniversalMessageParsed,
|
||||
|
|
@ -20,6 +21,7 @@ pub fn event_to_universal_with_session(
|
|||
) -> EventConversion {
|
||||
let event_type = event.get("type").and_then(Value::as_str).unwrap_or("");
|
||||
match event_type {
|
||||
"system" => system_event_to_universal(event),
|
||||
"assistant" => assistant_event_to_universal(event),
|
||||
"tool_use" => tool_use_event_to_universal(event, session_id),
|
||||
"tool_result" => tool_result_event_to_universal(event),
|
||||
|
|
@ -114,6 +116,18 @@ fn assistant_event_to_universal(event: &Value) -> EventConversion {
|
|||
EventConversion::new(UniversalEventData::Message { message })
|
||||
}
|
||||
|
||||
fn system_event_to_universal(event: &Value) -> EventConversion {
|
||||
let subtype = event
|
||||
.get("subtype")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("system");
|
||||
let started = Started {
|
||||
message: Some(format!("system.{subtype}")),
|
||||
details: Some(event.clone()),
|
||||
};
|
||||
EventConversion::new(UniversalEventData::Started { started })
|
||||
}
|
||||
|
||||
fn tool_use_event_to_universal(event: &Value, session_id: String) -> EventConversion {
|
||||
let tool_use = event.get("tool_use");
|
||||
let name = tool_use
|
||||
|
|
|
|||
2
todo.md
2
todo.md
|
|
@ -8,6 +8,8 @@
|
|||
- [x] Implement 2-way converters:
|
||||
- [x] Universal input message <-> agent-specific input
|
||||
- [x] Universal event <-> agent-specific event
|
||||
- [x] Normalize Claude system/init events into universal started events
|
||||
- [x] Support Codex CLI type-based event format in universal converter
|
||||
- [x] Enforce agentMode vs permissionMode semantics + defaults at the API boundary
|
||||
- [x] Ensure session id vs agentSessionId semantics are respected and surfaced consistently
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue