feat: stream sessions and discover agent modes

This commit is contained in:
Nathan Flurry 2026-01-25 01:55:44 -08:00
parent e6b19ed2b6
commit 7b6d7ee917
8 changed files with 2763 additions and 218 deletions

File diff suppressed because it is too large Load diff

View file

@ -15,17 +15,17 @@ axum = "0.7"
clap = { version = "4.5", features = ["derive"] }
futures = "0.3"
sandbox-daemon-error = { path = "../error" }
reqwest = { version = "0.11", features = ["blocking", "json", "rustls-tls"] }
flate2 = "1.0"
tar = "0.4"
zip = { version = "0.6", default-features = false, features = ["deflate"] }
url = "2.5"
sandbox-daemon-agent-management = { path = "../agent-management" }
sandbox-daemon-agent-credentials = { path = "../agent-credentials" }
sandbox-daemon-universal-agent-schema = { path = "../universal-agent-schema" }
reqwest = { version = "0.11", features = ["blocking", "json", "rustls-tls", "stream"] }
dirs = "5.0"
tempfile = "3.10"
time = { version = "0.3", features = ["parsing"] }
tokio = { version = "1.36", features = ["macros", "rt-multi-thread", "signal"] }
time = { version = "0.3", features = ["parsing", "formatting"] }
tokio = { version = "1.36", features = ["macros", "rt-multi-thread", "signal", "time"] }
tokio-stream = { version = "0.1", features = ["sync"] }
tower-http = { version = "0.5", features = ["cors"] }
utoipa = { version = "4.2", features = ["axum_extras"] }
schemars = "0.8"
[dev-dependencies]
tempfile = "3.10"

View file

@ -1,8 +1,10 @@
use std::io::Write;
use std::path::PathBuf;
use clap::{Args, Parser, Subcommand};
use reqwest::blocking::Client as HttpClient;
use reqwest::Method;
use sandbox_daemon_agent_management::agents::AgentManager;
use sandbox_daemon_core::router::{
AgentInstallRequest, AppState, AuthConfig, CreateSessionRequest, MessageRequest,
PermissionReply, PermissionReplyRequest, QuestionReplyRequest,
@ -14,6 +16,8 @@ use serde_json::Value;
use thiserror::Error;
use tower_http::cors::{Any, CorsLayer};
const API_PREFIX: &str = "/v1";
#[derive(Parser, Debug)]
#[command(name = "sandbox-daemon")]
#[command(about = "Sandbox daemon for managing coding agents", version)]
@ -125,10 +129,6 @@ struct CreateSessionArgs {
model: Option<String>,
#[arg(long)]
variant: Option<String>,
#[arg(long = "agent-token")]
agent_token: Option<String>,
#[arg(long)]
validate_token: bool,
#[arg(long)]
agent_version: Option<String>,
#[command(flatten)]
@ -237,7 +237,9 @@ fn run_server(cli: &Cli) -> Result<(), CliError> {
return Err(CliError::MissingToken);
};
let state = AppState { auth };
let agent_manager =
AgentManager::new(default_install_dir()).map_err(|err| CliError::Server(err.to_string()))?;
let state = AppState::new(auth, agent_manager);
let mut router = build_router(state);
if let Some(cors) = build_cors_layer(cli)? {
@ -258,6 +260,12 @@ fn run_server(cli: &Cli) -> Result<(), CliError> {
})
}
fn default_install_dir() -> PathBuf {
dirs::data_dir()
.map(|dir| dir.join("sandbox-daemon").join("bin"))
.unwrap_or_else(|| PathBuf::from(".").join(".sandbox-daemon").join("bin"))
}
fn run_client(command: &Command, cli: &Cli) -> Result<(), CliError> {
match command {
Command::Agents(subcommand) => run_agents(&subcommand.command, cli),
@ -269,7 +277,7 @@ fn run_agents(command: &AgentsCommand, cli: &Cli) -> Result<(), CliError> {
match command {
AgentsCommand::List(args) => {
let ctx = ClientContext::new(cli, args)?;
let response = ctx.get("/agents")?;
let response = ctx.get(&format!("{API_PREFIX}/agents"))?;
print_json_response::<AgentListResponse>(response)
}
AgentsCommand::Install(args) => {
@ -277,13 +285,13 @@ fn run_agents(command: &AgentsCommand, cli: &Cli) -> Result<(), CliError> {
let body = AgentInstallRequest {
reinstall: if args.reinstall { Some(true) } else { None },
};
let path = format!("/agents/{}/install", args.agent);
let path = format!("{API_PREFIX}/agents/{}/install", args.agent);
let response = ctx.post(&path, &body)?;
print_empty_response(response)
}
AgentsCommand::Modes(args) => {
let ctx = ClientContext::new(cli, &args.client)?;
let path = format!("/agents/{}/modes", args.agent);
let path = format!("{API_PREFIX}/agents/{}/modes", args.agent);
let response = ctx.get(&path)?;
print_json_response::<AgentModesResponse>(response)
}
@ -300,11 +308,9 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
permission_mode: args.permission_mode.clone(),
model: args.model.clone(),
variant: args.variant.clone(),
token: args.agent_token.clone(),
validate_token: if args.validate_token { Some(true) } else { None },
agent_version: args.agent_version.clone(),
};
let path = format!("/sessions/{}", args.session_id);
let path = format!("{API_PREFIX}/sessions/{}", args.session_id);
let response = ctx.post(&path, &body)?;
print_json_response::<CreateSessionResponse>(response)
}
@ -313,19 +319,19 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
let body = MessageRequest {
message: args.message.clone(),
};
let path = format!("/sessions/{}/messages", args.session_id);
let path = format!("{API_PREFIX}/sessions/{}/messages", args.session_id);
let response = ctx.post(&path, &body)?;
print_empty_response(response)
}
SessionsCommand::GetMessages(args) | SessionsCommand::Events(args) => {
let ctx = ClientContext::new(cli, &args.client)?;
let path = format!("/sessions/{}/events", args.session_id);
let path = format!("{API_PREFIX}/sessions/{}/events", args.session_id);
let response = ctx.get_with_query(&path, &[ ("offset", args.offset), ("limit", args.limit) ])?;
print_json_response::<EventsResponse>(response)
}
SessionsCommand::EventsSse(args) => {
let ctx = ClientContext::new(cli, &args.client)?;
let path = format!("/sessions/{}/events/sse", args.session_id);
let path = format!("{API_PREFIX}/sessions/{}/events/sse", args.session_id);
let response = ctx.get_with_query(&path, &[("offset", args.offset)])?;
print_text_response(response)
}
@ -334,7 +340,7 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
let answers: Vec<Vec<String>> = serde_json::from_str(&args.answers)?;
let body = QuestionReplyRequest { answers };
let path = format!(
"/sessions/{}/questions/{}/reply",
"{API_PREFIX}/sessions/{}/questions/{}/reply",
args.session_id, args.question_id
);
let response = ctx.post(&path, &body)?;
@ -343,7 +349,7 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
SessionsCommand::RejectQuestion(args) => {
let ctx = ClientContext::new(cli, &args.client)?;
let path = format!(
"/sessions/{}/questions/{}/reject",
"{API_PREFIX}/sessions/{}/questions/{}/reject",
args.session_id, args.question_id
);
let response = ctx.post_empty(&path)?;
@ -355,7 +361,7 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
reply: args.reply.clone(),
};
let path = format!(
"/sessions/{}/permissions/{}/reply",
"{API_PREFIX}/sessions/{}/permissions/{}/reply",
args.session_id, args.permission_id
);
let response = ctx.post(&path, &body)?;

File diff suppressed because it is too large Load diff

134
spec.md
View file

@ -4,6 +4,8 @@ i need to build a library that is a universal api to work with agents
- agent = claude code, codex, and opencode -> the acutal binary/sdk that runs the coding agent
- agent mode = what the agent does, for example build/plan agent mode
- agent (id) vs agent mode: `agent` selects the implementation (claude/codex/opencode/amp), `agentMode` selects behavior (build/plan/custom). These are different from `permissionMode` (capability restrictions).
- session id vs agent session id: session id is the primary id provided by the client; agent session id is the underlying id from the agent and must be exposed but is not the primary id.
- model = claude, codex, gemni, etc -> the model that's use din the agent
- variant = variant on the model if exists, eg low, mid, high, xhigh for codex
@ -27,7 +29,6 @@ this also needs to support quesitons (ie human in the loop)
these agents all have differnet ways of working with them.
- claude code uses headless mode
- codex uses a typescript sdk
- opencode uses a server
## component: daemon
@ -60,13 +61,18 @@ sandbox-daemon sessions get-messages --endpoint xxxx --token xxxx
### http api
POST /agents/{}/install (this will install the agent)
{}
POST /v1/agents/{}/install (this will install the agent)
{ reinstall?: boolean }
- `reinstall: true` forces download even if installed version matches latest.
GET /agents/{}/modes
GET /v1/agents/{}/modes
< { modes: [{ id: "build", name: "Build", description: "..." }, ...] }
POST /sessions/{} (will install agent if not already installed)
GET /v1/agents
< { agents: [{ id: "claude" | "codex" | "opencode" | "amp", installed: boolean, version?: string, path?: string }] }
- Version should be checked at request time. `path` reflects the configured install location.
POST /v1/sessions/{} (will install agent if not already installed)
>
{
agent: "claude" | "codex" | "opencode",
@ -74,15 +80,16 @@ POST /sessions/{} (will install agent if not already installed)
permissionMode?: "default" | "plan" | "bypass", // Permission restrictions
model?: string,
variant?: string,
token?: string,
validateToken?: boolean,
agentVersion?: string
}
<
{
healthy: boolean,
error?: AgentError
error?: AgentError,
agentSessionId?: string
}
- The client-provided session id is primary; `agentSessionId` is the underlying agent id (may be unknown until first prompt).
- Auth uses the daemon-level token (`Authorization` / `x-sandbox-token`); per-session tokens are not supported.
// agentMode vs permissionMode:
// - agentMode = what the agent DOES (behavior, system prompt)
@ -96,28 +103,28 @@ POST /sessions/{} (will install agent if not already installed)
// - permissionMode "bypass" = skip all permission checks (dangerous)
// - agentMode "plan" != permissionMode "plan" (one is behavior, one is restriction)
POST /sessions/{}/messages
POST /v1/sessions/{}/messages
{
message: string
}
GET /sessions/{}/events?offset=x&limit=x
GET /v1/sessions/{}/events?offset=x&limit=x
<
{
events: UniversalEvent[],
hasMore: bool
}
GET /sessions/{}/events/sse?offset=x
GET /v1/sessions/{}/events/sse?offset=x
- same as above but using sse
POST /sessions/{}/questions/{questionId}/reply
{ answers: string[][] } // Array per question of selected option labels
POST /v1/sessions/{}/questions/{questionId}/reply
{ answers: string[][] } // Array per question of selected option labels (multi-select supported)
POST /sessions/{}/questions/{questionId}/reject
POST /v1/sessions/{}/questions/{questionId}/reject
{}
POST /sessions/{}/permissions/{permissionId}/reply
POST /v1/sessions/{}/permissions/{permissionId}/reply
{ reply: "once" | "always" | "reject" }
note: Claude's plan approval (ExitPlanMode) is converted to a question event with approve/reject options. No separate endpoint needed.
@ -125,6 +132,16 @@ note: Claude's plan approval (ExitPlanMode) is converted to a question event wit
types:
type UniversalEvent =
{
id: number, // Monotonic per-session id (used for offset)
timestamp: string, // RFC3339
sessionId: string, // Primary id provided by client
agent: string, // Agent id (claude/codex/opencode/amp)
agentSessionId?: string, // Underlying agent session/thread id (not primary)
data: UniversalEventData
}
type UniversalEventData =
| { message: UniversalMessage }
| { started: Started }
| { error: CrashInfo }
@ -135,6 +152,34 @@ type UniversalEvent =
type AgentError = { tokenError: ... } | { processExisted: ... } | { installFailed: ... } | etc
### error taxonomy
All error responses use RFC 7807 Problem Details and map to a Rust `thiserror` enum. Canonical `type` values should be stable strings (e.g. `urn:sandbox-daemon:error:agent_not_installed`).
Required error types:
- `invalid_request` (400): malformed JSON, missing fields, invalid enum values
- `unsupported_agent` (400): unknown agent id
- `agent_not_installed` (404): agent binary missing
- `install_failed` (500): install attempted and failed
- `agent_process_exited` (500): agent subprocess exited unexpectedly
- `token_invalid` (401): token missing/invalid when required
- `permission_denied` (403): operation not allowed by permissionMode or config
- `session_not_found` (404): unknown session id
- `session_already_exists` (409): attempting to create session with existing id
- `mode_not_supported` (400): agentMode not available for agent
- `stream_error` (502): streaming/I/O failure
- `timeout` (504): agent or request timed out
The Rust error enum should capture context (agent id, session id, exit code, stderr, etc.) and translate to Problem Details in the HTTP layer and CLI. The `AgentError` payloads used in JSON responses should be derived from the same enum so HTTP and CLI stay consistent.
### offset semantics
- `offset` is the last-seen `UniversalEvent.id` (exclusive).
- `GET /v1/sessions/{id}/events` returns events with `id > offset`, ordered ascending.
- `offset` defaults to `0` (or the earliest id) if not provided.
- SSE endpoint uses the same semantics and continues streaming events after the initial batch.
### schema converters
we need to have a 2 way conversion for both:
@ -222,6 +267,13 @@ A single long-running server handles multiple sessions. The daemon connects to t
| OpenCode | Shared server | Native server support, lower latency |
| Amp | Subprocess per session | No server mode available |
#### agent mode discovery
- **OpenCode**: discover via server API (see `client.app.agents()` in `research/agents/opencode.md`).
- **Codex**: no discovery; hardcode supported modes (behavior via prompt prefixes).
- **Claude Code**: no discovery; hardcode supported modes (behavior mostly via prompt/policy).
- **Amp**: no discovery; hardcode supported modes (typically just `build`).
#### installation
Before spawning, agents must be installed. **We curl raw binaries directly** - no npm, brew, install scripts, or other package managers.
@ -384,11 +436,12 @@ this machine is already authenticated with codex & claude & opencode (for codex)
## testing frontend
in frontend/packages/web/ build a vite server that:
in frontend/packages/web/ build a vite + react app that:
- connect screen: prompts the user to provide an endpoint & optional token
- shows instructions on how to run the sandbox-daemon (including cors)
- agent screen: provides a full agent ui
- if gets error or cors error, instruct the user to ensure they have cors flags enabled
- agent screen: provides a full agent ui covering all of the features. also includes a log of all http requests in the ui with a copy button for the curl command
## component: sdks
@ -397,6 +450,11 @@ we need to auto-generate types from our json schema for these languages
- typescript sdk
- expose our http api as a typescript sdk
- update claude.md to specify that when changing api, we need to update the typescript sdk + the cli to interact with it
- impelment two main entrypoint: connect to endpoint + token or run locally (which spawns this binary as a subprocess, add todo to set up release pipeline and auto-pull the binary)
### typescript sdk approach
Use OpenAPI (from utoipa) + `openapi-typescript` to generate types, and implement a thin custom client wrapper (fetch-based) around the generated types. Avoid full client generators to keep the output small and stable.
## examples
@ -432,45 +490,3 @@ write a readme that doubles as docs for:
- typescript sdk
use the collapsible github sections for things like each api endpoint or each typescript sdk endpoint to collapse more info. this keeps the page readable.
## spec todo
- generate common denominator with conversion functions
- how should we handle the tokens for auth?
## future problems to visit
- api features
- list agent modes available
- list models available
- handle planning mode
- api key gateway
- configuring mcp/skills/etc
- process management inside container
- otel
- better authentication systems
- s3-based file system
- ai sdk compatibility for their ecosystem (useChat, etc)
- resumable messages
- todo lists
- all other features
- misc
- bootstrap tool that extracts tokens from the current system
- skill
- pre-package these as bun binaries instead of npm installations
- build & release pipeline with musl
- agent feature matrix for api features
- tunnels
## future work
- mcp integration (can connect to given endpoints)
- provide a pty to access the agent data
- other agent features like file system
- python sdk
## misc
comparison to agentapi:
- it does not use the pty since we need to get more information from the agent

View file

@ -1,3 +1,5 @@
# Open Questions
# Open Questions / Ambiguities
- None yet.
- OpenCode server HTTP paths and payloads may differ; current implementation assumes `POST /session`, `POST /session/{id}/prompt`, and `GET /event/subscribe` with JSON `data:` SSE frames.
- OpenCode question/permission reply endpoints are assumed as `POST /question/reply`, `/question/reject`, `/permission/reply` with `requestID` fields; confirm actual API shape.
- SSE events may not always include `sessionID`/`sessionId` fields; confirm if filtering should use a different field.

View file

@ -1,5 +1,7 @@
# Required Tests
- `test_agents_install_version_spawn` (installs, checks version, spawns prompt for Claude/Codex/OpenCode; Amp spawn runs only if `~/.amp/config.json` exists)
- daemon http api: smoke tests for each endpoint response shape/status
- cli: subcommands hit expected endpoints and handle error responses
- Session manager streams JSONL line-by-line for Claude/Codex/Amp and yields incremental events.
- `/sessions/{id}/messages` returns immediately while background ingestion populates `/events` and `/events/sse`.
- SSE subscription delivers live events after the initial offset batch.
- OpenCode server mode: create session, send prompt, and receive SSE events filtered to the session.
- OpenCode question/permission reply endpoints forward to server APIs.

105
todo.md
View file

@ -1,9 +1,98 @@
# TODO
# TODO (from spec.md)
- [x] Scaffold `engine/packages/sandbox-daemon` crate
- [x] Implement agent management modules (install/version/spawn basics)
- [x] Add tests for agent install/version/spawn
- [x] Track required tests in `spec/required-tests.md`
- [x] Track open questions in `spec/im-not-sure.md`
- [ ] Hook sandbox/session management into the daemon router handlers
- [ ] Replace noop schemas with universal agent schema and remove the old schema
## Universal API + Types
- [x] Define universal base types for agent input/output (common denominator across schemas)
- [x] Add universal question + permission types (HITL) and ensure they are supported end-to-end
- [x] Define `UniversalEvent` + `UniversalEventData` union and `AgentError` shape
- [x] Define a universal message type for "failed to parse" with raw JSON payload
- [x] Implement 2-way converters:
- [x] Universal input message <-> agent-specific input
- [x] Universal event <-> agent-specific event
- [x] Enforce agentMode vs permissionMode semantics + defaults at the API boundary
- [x] Ensure session id vs agentSessionId semantics are respected and surfaced consistently
## Daemon (Rust HTTP server)
- [x] Build axum router + utoipa + schemars integration
- [x] Implement RFC 7807 Problem Details error responses backed by a `thiserror` enum
- [x] Implement canonical error `type` values + required error variants from spec
- [x] Implement offset semantics for events (exclusive last-seen id, default offset 0)
- [x] Implement SSE endpoint for events with same semantics as JSON endpoint
- [x] Replace in-memory session store with sandbox session manager (questions/permissions routing, long-lived processes)
## CLI
- [x] Implement clap CLI flags: `--token`, `--no-token`, `--host`, `--port`, CORS flags
- [x] Implement a CLI endpoint for every HTTP endpoint
- [ ] Update `CLAUDE.md` to keep CLI endpoints in sync with HTTP API changes
- [x] Prefix CLI API requests with `/v1`
## HTTP API Endpoints
- [x] POST `/agents/{}/install` with `reinstall` handling
- [x] GET `/agents/{}/modes` (mode discovery or hardcoded)
- [x] GET `/agents` (installed/version/path; version checked at request time)
- [x] POST `/sessions/{}` (create session, install if needed, return health + agentSessionId)
- [x] POST `/sessions/{}/messages` (send prompt)
- [x] GET `/sessions/{}/events` (pagination with offset/limit)
- [x] GET `/sessions/{}/events/sse` (streaming)
- [x] POST `/sessions/{}/questions/{questionId}/reply`
- [x] POST `/sessions/{}/questions/{questionId}/reject`
- [x] POST `/sessions/{}/permissions/{permissionId}/reply`
- [x] Prefix all HTTP API endpoints with `/v1`
## Agent Management
- [x] Implement install/version/spawn basics for Claude/Codex/OpenCode/Amp
- [x] Implement agent install URL patterns + platform mappings for supported OS/arch
- [x] Parse JSONL output for subprocess agents and extract session/result metadata
- [x] Map permissionMode to agent CLI flags (Claude/Codex/Amp)
- [x] Implement session resume flags for Claude/OpenCode/Amp (Codex unsupported)
- [x] Replace sandbox-daemon core agent modules with new agent-management crate (delete originals)
- [x] Stabilize agent-management crate API and fix build issues (sandbox-daemon currently wired to WIP crate)
- [x] Implement OpenCode shared server lifecycle (`opencode serve`, health, restart)
- [x] Implement OpenCode HTTP session APIs + SSE event stream integration
- [x] Implement JSONL parsing for subprocess agents and map to `UniversalEvent`
- [x] Capture agent session id from events and expose as `agentSessionId`
- [x] Handle agent process exit and map to `agent_process_exited` error
- [x] Implement agentMode discovery rules (OpenCode API, hardcoded others)
- [x] Enforce permissionMode behavior (default/plan/bypass) for subprocesses
## Credentials
- [x] Implement credential extraction module (Claude/Codex/OpenCode)
- [x] Add Amp credential extraction (config-based)
- [x] Move credential extraction into `agent-credentials` crate
- [ ] Pass extracted credentials into subprocess env vars per agent
- [ ] Ensure OpenCode server reads credentials from config on startup
## Testing
- [ ] Build a universal agent test suite that exercises all features (messages, questions, permissions, etc.) using HTTP API
- [ ] Run the full suite against every agent (Claude/Codex/OpenCode/Amp) without mocks
- [x] Add real install/version/spawn tests for Claude/Codex/OpenCode (Amp conditional)
- [x] Expand agent lifecycle tests (reinstall, session id extraction, resume, plan mode)
- [ ] Add OpenCode server-mode tests (session create, prompt, SSE)
- [ ] Add tests for question/permission flows using deterministic prompts
## Frontend (frontend/packages/web)
- [x] Build Vite + React app with connect screen (endpoint + optional token)
- [x] Add instructions to run sandbox-daemon (including CORS)
- [x] Implement full agent UI covering all features
- [x] Add HTTP request log with copyable curl command
## TypeScript SDK
- [x] Generate OpenAPI from utoipa and run `openapi-typescript`
- [x] Implement a thin fetch-based client wrapper
- [x] Update `CLAUDE.md` to require SDK + CLI updates when API changes
- [x] Prefix SDK requests with `/v1`
## Examples + Tests
- [ ] Add examples for Docker, E2B, Daytona, Vercel Sandboxes, Cloudflare Sandboxes
- [ ] Add Vitest unit test for each example (Cloudflare requires special setup)
## Documentation
- [ ] Write README covering architecture, agent compatibility, and deployment guide
- [ ] Add universal API feature checklist (questions, approve plan, etc.)
- [ ] Document CLI, HTTP API, frontend app, and TypeScript SDK usage
- [ ] Use collapsible sections for endpoints and SDK methods
---
- implement release pipeline
- implement e2b example
- implement typescript "start locally" by pulling form server using version