mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 06:04:43 +00:00
feat: stream sessions and discover agent modes
This commit is contained in:
parent
e6b19ed2b6
commit
7b6d7ee917
8 changed files with 2763 additions and 218 deletions
1001
engine/packages/agent-management/src/agents.rs
Normal file
1001
engine/packages/agent-management/src/agents.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -15,17 +15,17 @@ axum = "0.7"
|
|||
clap = { version = "4.5", features = ["derive"] }
|
||||
futures = "0.3"
|
||||
sandbox-daemon-error = { path = "../error" }
|
||||
reqwest = { version = "0.11", features = ["blocking", "json", "rustls-tls"] }
|
||||
flate2 = "1.0"
|
||||
tar = "0.4"
|
||||
zip = { version = "0.6", default-features = false, features = ["deflate"] }
|
||||
url = "2.5"
|
||||
sandbox-daemon-agent-management = { path = "../agent-management" }
|
||||
sandbox-daemon-agent-credentials = { path = "../agent-credentials" }
|
||||
sandbox-daemon-universal-agent-schema = { path = "../universal-agent-schema" }
|
||||
reqwest = { version = "0.11", features = ["blocking", "json", "rustls-tls", "stream"] }
|
||||
dirs = "5.0"
|
||||
tempfile = "3.10"
|
||||
time = { version = "0.3", features = ["parsing"] }
|
||||
tokio = { version = "1.36", features = ["macros", "rt-multi-thread", "signal"] }
|
||||
time = { version = "0.3", features = ["parsing", "formatting"] }
|
||||
tokio = { version = "1.36", features = ["macros", "rt-multi-thread", "signal", "time"] }
|
||||
tokio-stream = { version = "0.1", features = ["sync"] }
|
||||
tower-http = { version = "0.5", features = ["cors"] }
|
||||
utoipa = { version = "4.2", features = ["axum_extras"] }
|
||||
schemars = "0.8"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.10"
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::{Args, Parser, Subcommand};
|
||||
use reqwest::blocking::Client as HttpClient;
|
||||
use reqwest::Method;
|
||||
use sandbox_daemon_agent_management::agents::AgentManager;
|
||||
use sandbox_daemon_core::router::{
|
||||
AgentInstallRequest, AppState, AuthConfig, CreateSessionRequest, MessageRequest,
|
||||
PermissionReply, PermissionReplyRequest, QuestionReplyRequest,
|
||||
|
|
@ -14,6 +16,8 @@ use serde_json::Value;
|
|||
use thiserror::Error;
|
||||
use tower_http::cors::{Any, CorsLayer};
|
||||
|
||||
const API_PREFIX: &str = "/v1";
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "sandbox-daemon")]
|
||||
#[command(about = "Sandbox daemon for managing coding agents", version)]
|
||||
|
|
@ -125,10 +129,6 @@ struct CreateSessionArgs {
|
|||
model: Option<String>,
|
||||
#[arg(long)]
|
||||
variant: Option<String>,
|
||||
#[arg(long = "agent-token")]
|
||||
agent_token: Option<String>,
|
||||
#[arg(long)]
|
||||
validate_token: bool,
|
||||
#[arg(long)]
|
||||
agent_version: Option<String>,
|
||||
#[command(flatten)]
|
||||
|
|
@ -237,7 +237,9 @@ fn run_server(cli: &Cli) -> Result<(), CliError> {
|
|||
return Err(CliError::MissingToken);
|
||||
};
|
||||
|
||||
let state = AppState { auth };
|
||||
let agent_manager =
|
||||
AgentManager::new(default_install_dir()).map_err(|err| CliError::Server(err.to_string()))?;
|
||||
let state = AppState::new(auth, agent_manager);
|
||||
let mut router = build_router(state);
|
||||
|
||||
if let Some(cors) = build_cors_layer(cli)? {
|
||||
|
|
@ -258,6 +260,12 @@ fn run_server(cli: &Cli) -> Result<(), CliError> {
|
|||
})
|
||||
}
|
||||
|
||||
fn default_install_dir() -> PathBuf {
|
||||
dirs::data_dir()
|
||||
.map(|dir| dir.join("sandbox-daemon").join("bin"))
|
||||
.unwrap_or_else(|| PathBuf::from(".").join(".sandbox-daemon").join("bin"))
|
||||
}
|
||||
|
||||
fn run_client(command: &Command, cli: &Cli) -> Result<(), CliError> {
|
||||
match command {
|
||||
Command::Agents(subcommand) => run_agents(&subcommand.command, cli),
|
||||
|
|
@ -269,7 +277,7 @@ fn run_agents(command: &AgentsCommand, cli: &Cli) -> Result<(), CliError> {
|
|||
match command {
|
||||
AgentsCommand::List(args) => {
|
||||
let ctx = ClientContext::new(cli, args)?;
|
||||
let response = ctx.get("/agents")?;
|
||||
let response = ctx.get(&format!("{API_PREFIX}/agents"))?;
|
||||
print_json_response::<AgentListResponse>(response)
|
||||
}
|
||||
AgentsCommand::Install(args) => {
|
||||
|
|
@ -277,13 +285,13 @@ fn run_agents(command: &AgentsCommand, cli: &Cli) -> Result<(), CliError> {
|
|||
let body = AgentInstallRequest {
|
||||
reinstall: if args.reinstall { Some(true) } else { None },
|
||||
};
|
||||
let path = format!("/agents/{}/install", args.agent);
|
||||
let path = format!("{API_PREFIX}/agents/{}/install", args.agent);
|
||||
let response = ctx.post(&path, &body)?;
|
||||
print_empty_response(response)
|
||||
}
|
||||
AgentsCommand::Modes(args) => {
|
||||
let ctx = ClientContext::new(cli, &args.client)?;
|
||||
let path = format!("/agents/{}/modes", args.agent);
|
||||
let path = format!("{API_PREFIX}/agents/{}/modes", args.agent);
|
||||
let response = ctx.get(&path)?;
|
||||
print_json_response::<AgentModesResponse>(response)
|
||||
}
|
||||
|
|
@ -300,11 +308,9 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
|
|||
permission_mode: args.permission_mode.clone(),
|
||||
model: args.model.clone(),
|
||||
variant: args.variant.clone(),
|
||||
token: args.agent_token.clone(),
|
||||
validate_token: if args.validate_token { Some(true) } else { None },
|
||||
agent_version: args.agent_version.clone(),
|
||||
};
|
||||
let path = format!("/sessions/{}", args.session_id);
|
||||
let path = format!("{API_PREFIX}/sessions/{}", args.session_id);
|
||||
let response = ctx.post(&path, &body)?;
|
||||
print_json_response::<CreateSessionResponse>(response)
|
||||
}
|
||||
|
|
@ -313,19 +319,19 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
|
|||
let body = MessageRequest {
|
||||
message: args.message.clone(),
|
||||
};
|
||||
let path = format!("/sessions/{}/messages", args.session_id);
|
||||
let path = format!("{API_PREFIX}/sessions/{}/messages", args.session_id);
|
||||
let response = ctx.post(&path, &body)?;
|
||||
print_empty_response(response)
|
||||
}
|
||||
SessionsCommand::GetMessages(args) | SessionsCommand::Events(args) => {
|
||||
let ctx = ClientContext::new(cli, &args.client)?;
|
||||
let path = format!("/sessions/{}/events", args.session_id);
|
||||
let path = format!("{API_PREFIX}/sessions/{}/events", args.session_id);
|
||||
let response = ctx.get_with_query(&path, &[ ("offset", args.offset), ("limit", args.limit) ])?;
|
||||
print_json_response::<EventsResponse>(response)
|
||||
}
|
||||
SessionsCommand::EventsSse(args) => {
|
||||
let ctx = ClientContext::new(cli, &args.client)?;
|
||||
let path = format!("/sessions/{}/events/sse", args.session_id);
|
||||
let path = format!("{API_PREFIX}/sessions/{}/events/sse", args.session_id);
|
||||
let response = ctx.get_with_query(&path, &[("offset", args.offset)])?;
|
||||
print_text_response(response)
|
||||
}
|
||||
|
|
@ -334,7 +340,7 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
|
|||
let answers: Vec<Vec<String>> = serde_json::from_str(&args.answers)?;
|
||||
let body = QuestionReplyRequest { answers };
|
||||
let path = format!(
|
||||
"/sessions/{}/questions/{}/reply",
|
||||
"{API_PREFIX}/sessions/{}/questions/{}/reply",
|
||||
args.session_id, args.question_id
|
||||
);
|
||||
let response = ctx.post(&path, &body)?;
|
||||
|
|
@ -343,7 +349,7 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
|
|||
SessionsCommand::RejectQuestion(args) => {
|
||||
let ctx = ClientContext::new(cli, &args.client)?;
|
||||
let path = format!(
|
||||
"/sessions/{}/questions/{}/reject",
|
||||
"{API_PREFIX}/sessions/{}/questions/{}/reject",
|
||||
args.session_id, args.question_id
|
||||
);
|
||||
let response = ctx.post_empty(&path)?;
|
||||
|
|
@ -355,7 +361,7 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> {
|
|||
reply: args.reply.clone(),
|
||||
};
|
||||
let path = format!(
|
||||
"/sessions/{}/permissions/{}/reply",
|
||||
"{API_PREFIX}/sessions/{}/permissions/{}/reply",
|
||||
args.session_id, args.permission_id
|
||||
);
|
||||
let response = ctx.post(&path, &body)?;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
134
spec.md
134
spec.md
|
|
@ -4,6 +4,8 @@ i need to build a library that is a universal api to work with agents
|
|||
|
||||
- agent = claude code, codex, and opencode -> the acutal binary/sdk that runs the coding agent
|
||||
- agent mode = what the agent does, for example build/plan agent mode
|
||||
- agent (id) vs agent mode: `agent` selects the implementation (claude/codex/opencode/amp), `agentMode` selects behavior (build/plan/custom). These are different from `permissionMode` (capability restrictions).
|
||||
- session id vs agent session id: session id is the primary id provided by the client; agent session id is the underlying id from the agent and must be exposed but is not the primary id.
|
||||
- model = claude, codex, gemni, etc -> the model that's use din the agent
|
||||
- variant = variant on the model if exists, eg low, mid, high, xhigh for codex
|
||||
|
||||
|
|
@ -27,7 +29,6 @@ this also needs to support quesitons (ie human in the loop)
|
|||
these agents all have differnet ways of working with them.
|
||||
|
||||
- claude code uses headless mode
|
||||
- codex uses a typescript sdk
|
||||
- opencode uses a server
|
||||
|
||||
## component: daemon
|
||||
|
|
@ -60,13 +61,18 @@ sandbox-daemon sessions get-messages --endpoint xxxx --token xxxx
|
|||
|
||||
### http api
|
||||
|
||||
POST /agents/{}/install (this will install the agent)
|
||||
{}
|
||||
POST /v1/agents/{}/install (this will install the agent)
|
||||
{ reinstall?: boolean }
|
||||
- `reinstall: true` forces download even if installed version matches latest.
|
||||
|
||||
GET /agents/{}/modes
|
||||
GET /v1/agents/{}/modes
|
||||
< { modes: [{ id: "build", name: "Build", description: "..." }, ...] }
|
||||
|
||||
POST /sessions/{} (will install agent if not already installed)
|
||||
GET /v1/agents
|
||||
< { agents: [{ id: "claude" | "codex" | "opencode" | "amp", installed: boolean, version?: string, path?: string }] }
|
||||
- Version should be checked at request time. `path` reflects the configured install location.
|
||||
|
||||
POST /v1/sessions/{} (will install agent if not already installed)
|
||||
>
|
||||
{
|
||||
agent: "claude" | "codex" | "opencode",
|
||||
|
|
@ -74,15 +80,16 @@ POST /sessions/{} (will install agent if not already installed)
|
|||
permissionMode?: "default" | "plan" | "bypass", // Permission restrictions
|
||||
model?: string,
|
||||
variant?: string,
|
||||
token?: string,
|
||||
validateToken?: boolean,
|
||||
agentVersion?: string
|
||||
}
|
||||
<
|
||||
{
|
||||
healthy: boolean,
|
||||
error?: AgentError
|
||||
error?: AgentError,
|
||||
agentSessionId?: string
|
||||
}
|
||||
- The client-provided session id is primary; `agentSessionId` is the underlying agent id (may be unknown until first prompt).
|
||||
- Auth uses the daemon-level token (`Authorization` / `x-sandbox-token`); per-session tokens are not supported.
|
||||
|
||||
// agentMode vs permissionMode:
|
||||
// - agentMode = what the agent DOES (behavior, system prompt)
|
||||
|
|
@ -96,28 +103,28 @@ POST /sessions/{} (will install agent if not already installed)
|
|||
// - permissionMode "bypass" = skip all permission checks (dangerous)
|
||||
// - agentMode "plan" != permissionMode "plan" (one is behavior, one is restriction)
|
||||
|
||||
POST /sessions/{}/messages
|
||||
POST /v1/sessions/{}/messages
|
||||
{
|
||||
message: string
|
||||
}
|
||||
|
||||
GET /sessions/{}/events?offset=x&limit=x
|
||||
GET /v1/sessions/{}/events?offset=x&limit=x
|
||||
<
|
||||
{
|
||||
events: UniversalEvent[],
|
||||
hasMore: bool
|
||||
}
|
||||
|
||||
GET /sessions/{}/events/sse?offset=x
|
||||
GET /v1/sessions/{}/events/sse?offset=x
|
||||
- same as above but using sse
|
||||
|
||||
POST /sessions/{}/questions/{questionId}/reply
|
||||
{ answers: string[][] } // Array per question of selected option labels
|
||||
POST /v1/sessions/{}/questions/{questionId}/reply
|
||||
{ answers: string[][] } // Array per question of selected option labels (multi-select supported)
|
||||
|
||||
POST /sessions/{}/questions/{questionId}/reject
|
||||
POST /v1/sessions/{}/questions/{questionId}/reject
|
||||
{}
|
||||
|
||||
POST /sessions/{}/permissions/{permissionId}/reply
|
||||
POST /v1/sessions/{}/permissions/{permissionId}/reply
|
||||
{ reply: "once" | "always" | "reject" }
|
||||
|
||||
note: Claude's plan approval (ExitPlanMode) is converted to a question event with approve/reject options. No separate endpoint needed.
|
||||
|
|
@ -125,6 +132,16 @@ note: Claude's plan approval (ExitPlanMode) is converted to a question event wit
|
|||
types:
|
||||
|
||||
type UniversalEvent =
|
||||
{
|
||||
id: number, // Monotonic per-session id (used for offset)
|
||||
timestamp: string, // RFC3339
|
||||
sessionId: string, // Primary id provided by client
|
||||
agent: string, // Agent id (claude/codex/opencode/amp)
|
||||
agentSessionId?: string, // Underlying agent session/thread id (not primary)
|
||||
data: UniversalEventData
|
||||
}
|
||||
|
||||
type UniversalEventData =
|
||||
| { message: UniversalMessage }
|
||||
| { started: Started }
|
||||
| { error: CrashInfo }
|
||||
|
|
@ -135,6 +152,34 @@ type UniversalEvent =
|
|||
|
||||
type AgentError = { tokenError: ... } | { processExisted: ... } | { installFailed: ... } | etc
|
||||
|
||||
### error taxonomy
|
||||
|
||||
All error responses use RFC 7807 Problem Details and map to a Rust `thiserror` enum. Canonical `type` values should be stable strings (e.g. `urn:sandbox-daemon:error:agent_not_installed`).
|
||||
|
||||
Required error types:
|
||||
|
||||
- `invalid_request` (400): malformed JSON, missing fields, invalid enum values
|
||||
- `unsupported_agent` (400): unknown agent id
|
||||
- `agent_not_installed` (404): agent binary missing
|
||||
- `install_failed` (500): install attempted and failed
|
||||
- `agent_process_exited` (500): agent subprocess exited unexpectedly
|
||||
- `token_invalid` (401): token missing/invalid when required
|
||||
- `permission_denied` (403): operation not allowed by permissionMode or config
|
||||
- `session_not_found` (404): unknown session id
|
||||
- `session_already_exists` (409): attempting to create session with existing id
|
||||
- `mode_not_supported` (400): agentMode not available for agent
|
||||
- `stream_error` (502): streaming/I/O failure
|
||||
- `timeout` (504): agent or request timed out
|
||||
|
||||
The Rust error enum should capture context (agent id, session id, exit code, stderr, etc.) and translate to Problem Details in the HTTP layer and CLI. The `AgentError` payloads used in JSON responses should be derived from the same enum so HTTP and CLI stay consistent.
|
||||
|
||||
### offset semantics
|
||||
|
||||
- `offset` is the last-seen `UniversalEvent.id` (exclusive).
|
||||
- `GET /v1/sessions/{id}/events` returns events with `id > offset`, ordered ascending.
|
||||
- `offset` defaults to `0` (or the earliest id) if not provided.
|
||||
- SSE endpoint uses the same semantics and continues streaming events after the initial batch.
|
||||
|
||||
### schema converters
|
||||
|
||||
we need to have a 2 way conversion for both:
|
||||
|
|
@ -222,6 +267,13 @@ A single long-running server handles multiple sessions. The daemon connects to t
|
|||
| OpenCode | Shared server | Native server support, lower latency |
|
||||
| Amp | Subprocess per session | No server mode available |
|
||||
|
||||
#### agent mode discovery
|
||||
|
||||
- **OpenCode**: discover via server API (see `client.app.agents()` in `research/agents/opencode.md`).
|
||||
- **Codex**: no discovery; hardcode supported modes (behavior via prompt prefixes).
|
||||
- **Claude Code**: no discovery; hardcode supported modes (behavior mostly via prompt/policy).
|
||||
- **Amp**: no discovery; hardcode supported modes (typically just `build`).
|
||||
|
||||
#### installation
|
||||
|
||||
Before spawning, agents must be installed. **We curl raw binaries directly** - no npm, brew, install scripts, or other package managers.
|
||||
|
|
@ -384,11 +436,12 @@ this machine is already authenticated with codex & claude & opencode (for codex)
|
|||
|
||||
## testing frontend
|
||||
|
||||
in frontend/packages/web/ build a vite server that:
|
||||
in frontend/packages/web/ build a vite + react app that:
|
||||
|
||||
- connect screen: prompts the user to provide an endpoint & optional token
|
||||
- shows instructions on how to run the sandbox-daemon (including cors)
|
||||
- agent screen: provides a full agent ui
|
||||
- if gets error or cors error, instruct the user to ensure they have cors flags enabled
|
||||
- agent screen: provides a full agent ui covering all of the features. also includes a log of all http requests in the ui with a copy button for the curl command
|
||||
|
||||
## component: sdks
|
||||
|
||||
|
|
@ -397,6 +450,11 @@ we need to auto-generate types from our json schema for these languages
|
|||
- typescript sdk
|
||||
- expose our http api as a typescript sdk
|
||||
- update claude.md to specify that when changing api, we need to update the typescript sdk + the cli to interact with it
|
||||
- impelment two main entrypoint: connect to endpoint + token or run locally (which spawns this binary as a subprocess, add todo to set up release pipeline and auto-pull the binary)
|
||||
|
||||
### typescript sdk approach
|
||||
|
||||
Use OpenAPI (from utoipa) + `openapi-typescript` to generate types, and implement a thin custom client wrapper (fetch-based) around the generated types. Avoid full client generators to keep the output small and stable.
|
||||
|
||||
## examples
|
||||
|
||||
|
|
@ -432,45 +490,3 @@ write a readme that doubles as docs for:
|
|||
- typescript sdk
|
||||
|
||||
use the collapsible github sections for things like each api endpoint or each typescript sdk endpoint to collapse more info. this keeps the page readable.
|
||||
|
||||
## spec todo
|
||||
|
||||
- generate common denominator with conversion functions
|
||||
- how should we handle the tokens for auth?
|
||||
|
||||
## future problems to visit
|
||||
|
||||
- api features
|
||||
- list agent modes available
|
||||
- list models available
|
||||
- handle planning mode
|
||||
- api key gateway
|
||||
- configuring mcp/skills/etc
|
||||
- process management inside container
|
||||
- otel
|
||||
- better authentication systems
|
||||
- s3-based file system
|
||||
- ai sdk compatibility for their ecosystem (useChat, etc)
|
||||
- resumable messages
|
||||
- todo lists
|
||||
- all other features
|
||||
- misc
|
||||
- bootstrap tool that extracts tokens from the current system
|
||||
- skill
|
||||
- pre-package these as bun binaries instead of npm installations
|
||||
- build & release pipeline with musl
|
||||
- agent feature matrix for api features
|
||||
- tunnels
|
||||
|
||||
## future work
|
||||
|
||||
- mcp integration (can connect to given endpoints)
|
||||
- provide a pty to access the agent data
|
||||
- other agent features like file system
|
||||
- python sdk
|
||||
|
||||
## misc
|
||||
|
||||
comparison to agentapi:
|
||||
- it does not use the pty since we need to get more information from the agent
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
# Open Questions
|
||||
# Open Questions / Ambiguities
|
||||
|
||||
- None yet.
|
||||
- OpenCode server HTTP paths and payloads may differ; current implementation assumes `POST /session`, `POST /session/{id}/prompt`, and `GET /event/subscribe` with JSON `data:` SSE frames.
|
||||
- OpenCode question/permission reply endpoints are assumed as `POST /question/reply`, `/question/reject`, `/permission/reply` with `requestID` fields; confirm actual API shape.
|
||||
- SSE events may not always include `sessionID`/`sessionId` fields; confirm if filtering should use a different field.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
# Required Tests
|
||||
|
||||
- `test_agents_install_version_spawn` (installs, checks version, spawns prompt for Claude/Codex/OpenCode; Amp spawn runs only if `~/.amp/config.json` exists)
|
||||
- daemon http api: smoke tests for each endpoint response shape/status
|
||||
- cli: subcommands hit expected endpoints and handle error responses
|
||||
- Session manager streams JSONL line-by-line for Claude/Codex/Amp and yields incremental events.
|
||||
- `/sessions/{id}/messages` returns immediately while background ingestion populates `/events` and `/events/sse`.
|
||||
- SSE subscription delivers live events after the initial offset batch.
|
||||
- OpenCode server mode: create session, send prompt, and receive SSE events filtered to the session.
|
||||
- OpenCode question/permission reply endpoints forward to server APIs.
|
||||
|
|
|
|||
105
todo.md
105
todo.md
|
|
@ -1,9 +1,98 @@
|
|||
# TODO
|
||||
# TODO (from spec.md)
|
||||
|
||||
- [x] Scaffold `engine/packages/sandbox-daemon` crate
|
||||
- [x] Implement agent management modules (install/version/spawn basics)
|
||||
- [x] Add tests for agent install/version/spawn
|
||||
- [x] Track required tests in `spec/required-tests.md`
|
||||
- [x] Track open questions in `spec/im-not-sure.md`
|
||||
- [ ] Hook sandbox/session management into the daemon router handlers
|
||||
- [ ] Replace noop schemas with universal agent schema and remove the old schema
|
||||
## Universal API + Types
|
||||
- [x] Define universal base types for agent input/output (common denominator across schemas)
|
||||
- [x] Add universal question + permission types (HITL) and ensure they are supported end-to-end
|
||||
- [x] Define `UniversalEvent` + `UniversalEventData` union and `AgentError` shape
|
||||
- [x] Define a universal message type for "failed to parse" with raw JSON payload
|
||||
- [x] Implement 2-way converters:
|
||||
- [x] Universal input message <-> agent-specific input
|
||||
- [x] Universal event <-> agent-specific event
|
||||
- [x] Enforce agentMode vs permissionMode semantics + defaults at the API boundary
|
||||
- [x] Ensure session id vs agentSessionId semantics are respected and surfaced consistently
|
||||
|
||||
## Daemon (Rust HTTP server)
|
||||
- [x] Build axum router + utoipa + schemars integration
|
||||
- [x] Implement RFC 7807 Problem Details error responses backed by a `thiserror` enum
|
||||
- [x] Implement canonical error `type` values + required error variants from spec
|
||||
- [x] Implement offset semantics for events (exclusive last-seen id, default offset 0)
|
||||
- [x] Implement SSE endpoint for events with same semantics as JSON endpoint
|
||||
- [x] Replace in-memory session store with sandbox session manager (questions/permissions routing, long-lived processes)
|
||||
|
||||
## CLI
|
||||
- [x] Implement clap CLI flags: `--token`, `--no-token`, `--host`, `--port`, CORS flags
|
||||
- [x] Implement a CLI endpoint for every HTTP endpoint
|
||||
- [ ] Update `CLAUDE.md` to keep CLI endpoints in sync with HTTP API changes
|
||||
- [x] Prefix CLI API requests with `/v1`
|
||||
|
||||
## HTTP API Endpoints
|
||||
- [x] POST `/agents/{}/install` with `reinstall` handling
|
||||
- [x] GET `/agents/{}/modes` (mode discovery or hardcoded)
|
||||
- [x] GET `/agents` (installed/version/path; version checked at request time)
|
||||
- [x] POST `/sessions/{}` (create session, install if needed, return health + agentSessionId)
|
||||
- [x] POST `/sessions/{}/messages` (send prompt)
|
||||
- [x] GET `/sessions/{}/events` (pagination with offset/limit)
|
||||
- [x] GET `/sessions/{}/events/sse` (streaming)
|
||||
- [x] POST `/sessions/{}/questions/{questionId}/reply`
|
||||
- [x] POST `/sessions/{}/questions/{questionId}/reject`
|
||||
- [x] POST `/sessions/{}/permissions/{permissionId}/reply`
|
||||
- [x] Prefix all HTTP API endpoints with `/v1`
|
||||
|
||||
## Agent Management
|
||||
- [x] Implement install/version/spawn basics for Claude/Codex/OpenCode/Amp
|
||||
- [x] Implement agent install URL patterns + platform mappings for supported OS/arch
|
||||
- [x] Parse JSONL output for subprocess agents and extract session/result metadata
|
||||
- [x] Map permissionMode to agent CLI flags (Claude/Codex/Amp)
|
||||
- [x] Implement session resume flags for Claude/OpenCode/Amp (Codex unsupported)
|
||||
- [x] Replace sandbox-daemon core agent modules with new agent-management crate (delete originals)
|
||||
- [x] Stabilize agent-management crate API and fix build issues (sandbox-daemon currently wired to WIP crate)
|
||||
- [x] Implement OpenCode shared server lifecycle (`opencode serve`, health, restart)
|
||||
- [x] Implement OpenCode HTTP session APIs + SSE event stream integration
|
||||
- [x] Implement JSONL parsing for subprocess agents and map to `UniversalEvent`
|
||||
- [x] Capture agent session id from events and expose as `agentSessionId`
|
||||
- [x] Handle agent process exit and map to `agent_process_exited` error
|
||||
- [x] Implement agentMode discovery rules (OpenCode API, hardcoded others)
|
||||
- [x] Enforce permissionMode behavior (default/plan/bypass) for subprocesses
|
||||
|
||||
## Credentials
|
||||
- [x] Implement credential extraction module (Claude/Codex/OpenCode)
|
||||
- [x] Add Amp credential extraction (config-based)
|
||||
- [x] Move credential extraction into `agent-credentials` crate
|
||||
- [ ] Pass extracted credentials into subprocess env vars per agent
|
||||
- [ ] Ensure OpenCode server reads credentials from config on startup
|
||||
|
||||
## Testing
|
||||
- [ ] Build a universal agent test suite that exercises all features (messages, questions, permissions, etc.) using HTTP API
|
||||
- [ ] Run the full suite against every agent (Claude/Codex/OpenCode/Amp) without mocks
|
||||
- [x] Add real install/version/spawn tests for Claude/Codex/OpenCode (Amp conditional)
|
||||
- [x] Expand agent lifecycle tests (reinstall, session id extraction, resume, plan mode)
|
||||
- [ ] Add OpenCode server-mode tests (session create, prompt, SSE)
|
||||
- [ ] Add tests for question/permission flows using deterministic prompts
|
||||
|
||||
## Frontend (frontend/packages/web)
|
||||
- [x] Build Vite + React app with connect screen (endpoint + optional token)
|
||||
- [x] Add instructions to run sandbox-daemon (including CORS)
|
||||
- [x] Implement full agent UI covering all features
|
||||
- [x] Add HTTP request log with copyable curl command
|
||||
|
||||
## TypeScript SDK
|
||||
- [x] Generate OpenAPI from utoipa and run `openapi-typescript`
|
||||
- [x] Implement a thin fetch-based client wrapper
|
||||
- [x] Update `CLAUDE.md` to require SDK + CLI updates when API changes
|
||||
- [x] Prefix SDK requests with `/v1`
|
||||
|
||||
## Examples + Tests
|
||||
- [ ] Add examples for Docker, E2B, Daytona, Vercel Sandboxes, Cloudflare Sandboxes
|
||||
- [ ] Add Vitest unit test for each example (Cloudflare requires special setup)
|
||||
|
||||
## Documentation
|
||||
- [ ] Write README covering architecture, agent compatibility, and deployment guide
|
||||
- [ ] Add universal API feature checklist (questions, approve plan, etc.)
|
||||
- [ ] Document CLI, HTTP API, frontend app, and TypeScript SDK usage
|
||||
- [ ] Use collapsible sections for endpoints and SDK methods
|
||||
|
||||
---
|
||||
|
||||
- implement release pipeline
|
||||
- implement e2b example
|
||||
- implement typescript "start locally" by pulling form server using version
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue