From d24f983e2c6216b53865bafdbda9a36fbe620e1a Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 27 Jan 2026 03:42:41 -0800 Subject: [PATCH] feat: add mock server mode for UI testing --- CLAUDE.md | 2 + README.md | 8 + ROADMAP.md | 1 + bugs.md | 2 - docs/building-chat-ui.mdx | 167 ++++ docs/docs.json | 9 +- docs/sdks/typescript.mdx | 130 ++++ frontend/packages/inspector/index.html | 31 + frontend/packages/inspector/src/App.tsx | 30 +- frontend/packages/inspector/vite.config.ts | 10 +- package.json | 3 +- server/packages/sandbox-agent/src/main.rs | 12 +- server/packages/sandbox-agent/src/router.rs | 710 +++++++++++++++++- .../sandbox-agent/tests/agent_agnostic.rs | 7 +- .../sandbox-agent/tests/http_sse_snapshots.rs | 4 +- .../sandbox-agent/tests/inspector_ui.rs | 4 +- spec/im-not-sure.md | 5 - spec/required-tests.md | 7 - spec/universal-schema.json | 553 -------------- spec/universal-schema.md | 143 ---- todo.md | 118 +-- 21 files changed, 1108 insertions(+), 848 deletions(-) delete mode 100644 bugs.md create mode 100644 docs/building-chat-ui.mdx create mode 100644 docs/sdks/typescript.mdx delete mode 100644 spec/im-not-sure.md delete mode 100644 spec/required-tests.md delete mode 100644 spec/universal-schema.json delete mode 100644 spec/universal-schema.md diff --git a/CLAUDE.md b/CLAUDE.md index 7a0af06..b3e3299 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -34,10 +34,12 @@ Universal schema guidance: - Do not make breaking changes to API endpoints. - When changing API routes, ensure the HTTP/SSE test suite has full coverage of every route. - When agent schema changes, ensure API tests cover the new schema and event shapes end-to-end. +- When the universal schema changes, update mock-mode events to cover the new fields or event types. - Update `docs/conversion.md` whenever agent-native schema terms, synthetic events, identifier mappings, or conversion logic change. - Never use synthetic data or mocked responses in tests. - Never manually write agent types; always use generated types in `resources/agent-schemas/`. If types are broken, fix the generated types. - The universal schema must provide consistent behavior across providers; avoid requiring frontend/client logic to special-case agents. +- The UI must reflect every field in AgentCapabilities; keep it in sync with the README feature matrix and `agent_capabilities_for`. - When parsing agent data, if something is unexpected or does not match the schema, bail out and surface the error rather than trying to continue with partial parsing. - When defining the universal schema, choose the option most compatible with native agent APIs, and add synthetics to fill gaps for other agents. - Use `docs/glossary.md` as the source of truth for universal schema terminology and keep it updated alongside schema changes. diff --git a/README.md b/README.md index e79c5ab..b19a7d6 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,10 @@ Docs ### Server +- Install server + - curl (fastest & does not require npm) + - npm i -g (slower) + - npx (for quick runs) - Run server - Auth @@ -71,6 +75,10 @@ Docs Docs +### Tip: Extracting API Keys + +TODO: npx command to get API keys + ## Project Goals This project aims to solve 3 problems with agents: diff --git a/ROADMAP.md b/ROADMAP.md index 299cf8e..35f4ca1 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,5 +1,6 @@ ## launch +- examples for daytona, e2b - provide mock data for validating your rendering - provides history with all items, then iterates thorugh all items on a stream - this is a special type of serve function diff --git a/bugs.md b/bugs.md deleted file mode 100644 index e5e065b..0000000 --- a/bugs.md +++ /dev/null @@ -1,2 +0,0 @@ -- openai exteacted credentials do not work - diff --git a/docs/building-chat-ui.mdx b/docs/building-chat-ui.mdx new file mode 100644 index 0000000..40a0310 --- /dev/null +++ b/docs/building-chat-ui.mdx @@ -0,0 +1,167 @@ +--- +title: "Building a Chat UI" +description: "Design a client that renders universal session events consistently across providers." +--- + +This guide explains how to build a chat UI that works across all agents using the universal event +stream. + +## High-level flow + +1. List agents and read their capabilities. +2. Create a session for the selected agent. +3. Send user messages. +4. Subscribe to events (polling or SSE). +5. Render items and deltas into a stable message timeline. + +## Use agent capabilities + +Capabilities tell you which features are supported for the selected agent: + +- `tool_calls` and `tool_results` indicate tool execution events. +- `questions` and `permissions` indicate HITL flows. +- `plan_mode` indicates that the agent supports plan-only execution. + +Use these to enable or disable UI affordances (tool panels, approval buttons, etc.). + +## Event model + +Every event includes: + +- `event_id`, `sequence`, and `time` for ordering. +- `session_id` for the universal session. +- `native_session_id` for provider-specific debugging. +- `event_type` with one of: + - `session.started`, `session.ended` + - `item.started`, `item.delta`, `item.completed` + - `permission.requested`, `permission.resolved` + - `question.requested`, `question.resolved` + - `error`, `agent.unparsed` +- `data` which holds the payload for the event type. +- `synthetic` and `source` to show daemon-generated events. +- `raw` (optional) when `include_raw=true`. + +## Rendering items + +Items are emitted in three phases: + +- `item.started`: first snapshot of a message or tool item. +- `item.delta`: incremental updates (token streaming or synthetic deltas). +- `item.completed`: final snapshot. + +Recommended render flow: + +```ts +type ItemState = { + item: UniversalItem; + deltas: string[]; +}; + +const items = new Map(); +const order: string[] = []; + +function applyEvent(event: UniversalEvent) { + if (event.event_type === "item.started") { + const item = event.data.item; + items.set(item.item_id, { item, deltas: [] }); + order.push(item.item_id); + } + + if (event.event_type === "item.delta") { + const { item_id, delta } = event.data; + const state = items.get(item_id); + if (state) { + state.deltas.push(delta); + } + } + + if (event.event_type === "item.completed") { + const item = event.data.item; + const state = items.get(item.item_id); + if (state) { + state.item = item; + } + } +} +``` + +When rendering, combine the item content with accumulated deltas. If you receive a delta before a +started event (should not happen), treat it as an error. + +## Content parts + +Each `UniversalItem` has `content` parts. Your UI can branch on `part.type`: + +- `text` for normal chat text. +- `tool_call` and `tool_result` for tool execution. +- `file_ref` for file read/write/patch previews. +- `reasoning` if you display public reasoning text. +- `status` for progress updates. +- `image` for image outputs. + +Treat `item.kind` as the primary layout decision (message vs tool call vs system), and use content +parts for the detailed rendering. + +## Questions and permissions + +Question and permission events are out-of-band from item flow. Render them as modal or inline UI +blocks that must be resolved via: + +- `POST /v1/sessions/{session_id}/questions/{question_id}/reply` +- `POST /v1/sessions/{session_id}/questions/{question_id}/reject` +- `POST /v1/sessions/{session_id}/permissions/{permission_id}/reply` + +If an agent does not advertise these capabilities, keep those UI controls hidden. + +## Error and unparsed events + +- `error` events are structured failures from the daemon or agent. +- `agent.unparsed` indicates the provider emitted something the converter could not parse. + +Treat `agent.unparsed` as a hard failure in development so you can fix converters quickly. + +## Event ordering + +Prefer `sequence` for ordering. It is monotonic for a given session. The `time` field is for +timestamps, not ordering. + +## Handling session end + +`session.ended` includes the reason and who terminated it. Disable input after a terminal event. + +## Optional raw payloads + +If you need provider-level debugging, pass `include_raw=true` when streaming or polling events to +receive the `raw` payload for each event. + +## SSE vs polling + +- SSE gives low-latency updates and simplifies streaming UIs. +- Polling is simpler to debug and works in any environment. + +Both yield the same event payloads. + +## Mock mode for UI testing + +Run the server with `--mock` to emit a looping, feature-complete event history for UI development: + +```bash +sandbox-agent server --mock --no-token +``` + +Behavior in mock mode: + +- Sessions emit a fixed history that covers every event type and content part. +- The history repeats in a loop, with ~200ms between events and a ~2s pause between loops. +- `session.started` and `session.ended` are included in every loop so UIs can exercise lifecycle handling. +- `send-message` is accepted but does not change the mock stream. + +If your UI stops rendering after `session.ended`, disable that behavior while testing mock mode so the +loop remains visible. + +## Reference implementation + +The [Inspector chat UI](https://github.com/rivet-dev/sandbox-agent/blob/main/frontend/packages/inspector/src/App.tsx) +is a complete reference implementation showing how to build a chat interface using the universal event +stream. It demonstrates session management, event rendering, item lifecycle handling, and HITL approval +flows. diff --git a/docs/docs.json b/docs/docs.json index a13e72a..ce63fcf 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -30,7 +30,14 @@ { "group": "Operations", "pages": [ - "frontend" + "frontend", + "building-chat-ui" + ] + }, + { + "group": "SDKs", + "pages": [ + "sdks/typescript" ] } ] diff --git a/docs/sdks/typescript.mdx b/docs/sdks/typescript.mdx new file mode 100644 index 0000000..3e6f080 --- /dev/null +++ b/docs/sdks/typescript.mdx @@ -0,0 +1,130 @@ +--- +title: "TypeScript SDK" +description: "Use the generated client to manage sessions and stream events." +--- + +The TypeScript SDK is generated from the OpenAPI spec that ships with the daemon. It provides a typed +client for sessions, events, and agent operations. + +## Install + +```bash +npm install sandbox-agent +``` + +## Create a client + +```ts +import { SandboxDaemonClient } from "sandbox-agent"; + +const client = new SandboxDaemonClient({ + baseUrl: "http://127.0.0.1:2468", + token: process.env.SANDBOX_TOKEN, +}); +``` + +Or with the factory helper: + +```ts +import { createSandboxDaemonClient } from "sandbox-agent"; + +const client = createSandboxDaemonClient({ + baseUrl: "http://127.0.0.1:2468", +}); +``` + +## Autospawn (Node only) + +If you run locally, the SDK can launch the daemon for you. + +```ts +import { connectSandboxDaemonClient } from "sandbox-agent"; + +const client = await connectSandboxDaemonClient({ + spawn: { enabled: true }, +}); + +await client.dispose(); +``` + +Autospawn uses the local `sandbox-agent` binary. Install `@sandbox-agent/cli` (recommended) or set +`SANDBOX_AGENT_BIN` to a custom path. + +## Sessions and messages + +```ts +await client.createSession("demo-session", { + agent: "codex", + agent_mode: "default", + permission_mode: "plan", +}); + +await client.postMessage("demo-session", { message: "Hello" }); +``` + +List agents and pick a compatible one: + +```ts +const agents = await client.listAgents(); +const codex = agents.agents.find((agent) => agent.id === "codex"); +console.log(codex?.capabilities); +``` + +## Poll events + +```ts +const events = await client.getEvents("demo-session", { + offset: 0, + limit: 200, + include_raw: false, +}); + +for (const event of events.events) { + console.log(event.event_type, event.data); +} +``` + +## Stream events (SSE) + +```ts +for await (const event of client.streamEvents("demo-session", { + offset: 0, + include_raw: false, +})) { + console.log(event.event_type, event.data); +} +``` + +The SDK parses `text/event-stream` into `UniversalEvent` objects. If you want full control, use +`getEventsSse()` and parse the stream yourself. + +## Optional raw payloads + +Set `include_raw: true` on `getEvents` or `streamEvents` to include the raw provider payload in +`event.raw`. This is useful for debugging and conversion analysis. + +## Error handling + +All HTTP errors throw `SandboxDaemonError`: + +```ts +import { SandboxDaemonError } from "sandbox-agent"; + +try { + await client.postMessage("missing-session", { message: "Hi" }); +} catch (error) { + if (error instanceof SandboxDaemonError) { + console.error(error.status, error.problem); + } +} +``` + +## Types + +The SDK exports OpenAPI-derived types for events, items, and capabilities: + +```ts +import type { UniversalEvent, UniversalItem, AgentCapabilities } from "sandbox-agent"; +``` + +See `docs/universal-api.mdx` for the universal schema fields and semantics. diff --git a/frontend/packages/inspector/index.html b/frontend/packages/inspector/index.html index 2371c28..6cbfbf8 100644 --- a/frontend/packages/inspector/index.html +++ b/frontend/packages/inspector/index.html @@ -1314,6 +1314,37 @@ white-space: nowrap; } + /* Capability Badges */ + .capability-badges { + display: flex; + flex-wrap: wrap; + gap: 6px; + } + + .capability-badge { + display: inline-flex; + align-items: center; + gap: 4px; + padding: 3px 8px; + border-radius: 4px; + font-size: 10px; + font-weight: 500; + } + + .capability-badge.enabled { + background: rgba(48, 209, 88, 0.12); + color: var(--success); + } + + .capability-badge.disabled { + background: rgba(255, 255, 255, 0.04); + color: var(--muted-2); + } + + .capability-badge svg { + flex-shrink: 0; + } + /* Scrollbar */ .messages-container::-webkit-scrollbar, .debug-content::-webkit-scrollbar { diff --git a/frontend/packages/inspector/src/App.tsx b/frontend/packages/inspector/src/App.tsx index 0dfcc53..8c256a8 100644 --- a/frontend/packages/inspector/src/App.tsx +++ b/frontend/packages/inspector/src/App.tsx @@ -2,6 +2,7 @@ import { Clipboard, Cloud, Download, + GitBranch, HelpCircle, MessageSquare, PauseCircle, @@ -11,6 +12,7 @@ import { Send, Shield, Terminal, + Wrench, Zap } from "lucide-react"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; @@ -85,14 +87,24 @@ const formatJson = (value: unknown) => { const escapeSingleQuotes = (value: string) => value.replace(/'/g, `'\\''`); -const formatCapabilities = (capabilities: AgentCapabilities) => { - const parts = [ - `planMode ${capabilities.planMode ? "✓" : "—"}`, - `permissions ${capabilities.permissions ? "✓" : "—"}`, - `questions ${capabilities.questions ? "✓" : "—"}`, - `toolCalls ${capabilities.toolCalls ? "✓" : "—"}` +const CapabilityBadges = ({ capabilities }: { capabilities: AgentCapabilities }) => { + const items = [ + { key: "planMode", label: "Plan", icon: GitBranch, enabled: capabilities.planMode }, + { key: "permissions", label: "Perms", icon: Shield, enabled: capabilities.permissions }, + { key: "questions", label: "Q&A", icon: HelpCircle, enabled: capabilities.questions }, + { key: "toolCalls", label: "Tools", icon: Wrench, enabled: capabilities.toolCalls } ]; - return parts.join(" · "); + + return ( +
+ {items.map(({ key, label, icon: Icon, enabled }) => ( + + + {label} + + ))} +
+ ); }; const buildCurl = (method: string, url: string, body?: string, token?: string) => { @@ -1459,8 +1471,8 @@ export default function App() { {agent.version ? `v${agent.version}` : "Version unknown"} {agent.path && {agent.path}} -
- Capabilities: {formatCapabilities(agent.capabilities ?? emptyCapabilities)} +
+
{modesByAgent[agent.id] && modesByAgent[agent.id].length > 0 && (
diff --git a/frontend/packages/inspector/vite.config.ts b/frontend/packages/inspector/vite.config.ts index 068e57f..5280cd7 100644 --- a/frontend/packages/inspector/vite.config.ts +++ b/frontend/packages/inspector/vite.config.ts @@ -5,6 +5,12 @@ export default defineConfig(({ command }) => ({ base: command === "build" ? "/ui/" : "/", plugins: [react()], server: { - port: 5173 - } + port: 5173, + proxy: { + "/v1": { + target: "http://localhost:2468", + changeOrigin: true, + }, + }, + }, })); diff --git a/package.json b/package.json index e0d62c1..6835c0a 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,8 @@ "build": "turbo run build", "dev": "turbo run dev --parallel", "generate": "turbo run generate", - "typecheck": "turbo run typecheck" + "typecheck": "turbo run typecheck", + "docs": "pnpm dlx mintlify dev docs" }, "devDependencies": { "turbo": "^2.4.0" diff --git a/server/packages/sandbox-agent/src/main.rs b/server/packages/sandbox-agent/src/main.rs index 7cf4205..6871938 100644 --- a/server/packages/sandbox-agent/src/main.rs +++ b/server/packages/sandbox-agent/src/main.rs @@ -11,7 +11,7 @@ use sandbox_agent_agent_management::credentials::{ ProviderCredentials, }; use sandbox_agent::router::{ - AgentInstallRequest, AppState, AuthConfig, CreateSessionRequest, MessageRequest, + AgentInstallRequest, AppState, AuthConfig, CreateSessionRequest, MessageRequest, MockConfig, PermissionReply, PermissionReplyRequest, QuestionReplyRequest, }; use sandbox_agent::router::{AgentListResponse, AgentModesResponse, CreateSessionResponse, EventsResponse}; @@ -72,6 +72,9 @@ struct ServerArgs { #[arg(long = "cors-allow-credentials", short = 'C')] cors_allow_credentials: bool, + + #[arg(long)] + mock: bool, } #[derive(Args, Debug)] @@ -334,7 +337,12 @@ fn run_server(cli: &Cli, server: &ServerArgs) -> Result<(), CliError> { let agent_manager = AgentManager::new(default_install_dir()).map_err(|err| CliError::Server(err.to_string()))?; - let state = AppState::new(auth, agent_manager); + let mock = if server.mock { + MockConfig::enabled() + } else { + MockConfig::disabled() + }; + let state = AppState::new(auth, agent_manager, mock); let mut router = build_router(state); if let Some(cors) = build_cors_layer(server)? { diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index e82f619..0855f16 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -65,6 +65,34 @@ use sandbox_agent_agent_management::credentials::{ }; use crate::ui; +const MOCK_EVENT_DELAY_MS: u64 = 200; +const MOCK_LOOP_DELAY_MS: u64 = 2000; + +#[derive(Debug, Clone)] +pub struct MockConfig { + enabled: bool, + event_delay: Duration, + loop_delay: Duration, +} + +impl MockConfig { + pub fn disabled() -> Self { + Self { + enabled: false, + event_delay: Duration::ZERO, + loop_delay: Duration::ZERO, + } + } + + pub fn enabled() -> Self { + Self { + enabled: true, + event_delay: Duration::from_millis(MOCK_EVENT_DELAY_MS), + loop_delay: Duration::from_millis(MOCK_LOOP_DELAY_MS), + } + } +} + #[derive(Debug)] pub struct AppState { auth: AuthConfig, @@ -73,9 +101,9 @@ pub struct AppState { } impl AppState { - pub fn new(auth: AuthConfig, agent_manager: AgentManager) -> Self { + pub fn new(auth: AuthConfig, agent_manager: AgentManager, mock: MockConfig) -> Self { let agent_manager = Arc::new(agent_manager); - let session_manager = Arc::new(SessionManager::new(agent_manager.clone())); + let session_manager = Arc::new(SessionManager::new(agent_manager.clone(), mock)); Self { auth, agent_manager, @@ -565,6 +593,7 @@ struct SessionManager { sessions: Mutex>, opencode_server: Mutex>, http_client: Client, + mock: MockConfig, } #[derive(Debug)] @@ -580,12 +609,13 @@ struct SessionSubscription { } impl SessionManager { - fn new(agent_manager: Arc) -> Self { + fn new(agent_manager: Arc, mock: MockConfig) -> Self { Self { agent_manager, sessions: Mutex::new(HashMap::new()), opencode_server: Mutex::new(None), http_client: Client::new(), + mock, } } @@ -602,6 +632,10 @@ impl SessionManager { } } + if self.mock.enabled { + return self.create_mock_session(session_id, agent_id, request).await; + } + let manager = self.agent_manager.clone(); let agent_version = request.agent_version.clone(); let agent_name = request.agent.clone(); @@ -660,6 +694,32 @@ impl SessionManager { }) } + async fn create_mock_session( + self: &Arc, + session_id: String, + agent_id: AgentId, + request: CreateSessionRequest, + ) -> Result { + let mut session = SessionState::new(session_id.clone(), agent_id, &request)?; + session.native_session_id = Some(format!("mock-{session_id}")); + let native_session_id = session.native_session_id.clone(); + + let mut sessions = self.sessions.lock().await; + sessions.insert(session_id.clone(), session); + drop(sessions); + + let manager = Arc::clone(self); + tokio::spawn(async move { + manager.run_mock_loop(session_id).await; + }); + + Ok(CreateSessionResponse { + healthy: true, + error: None, + native_session_id, + }) + } + async fn agent_modes(&self, agent: AgentId) -> Result, SandboxError> { if agent != AgentId::Opencode { return Ok(agent_modes_for(agent)); @@ -683,6 +743,11 @@ impl SessionManager { session_id: String, message: String, ) -> Result<(), SandboxError> { + if self.mock.enabled { + self.session_snapshot(&session_id, false).await?; + return Ok(()); + } + let session_snapshot = self.session_snapshot(&session_id, false).await?; if session_snapshot.agent == AgentId::Opencode { self.ensure_opencode_stream(session_id.clone()).await?; @@ -833,6 +898,43 @@ impl SessionManager { question_id: &str, answers: Vec>, ) -> Result<(), SandboxError> { + if self.mock.enabled { + let pending = { + let mut sessions = self.sessions.lock().await; + let session = sessions.get_mut(session_id).ok_or_else(|| SandboxError::SessionNotFound { + session_id: session_id.to_string(), + })?; + if let Some(err) = session.ended_error() { + return Err(err); + } + session.take_question(question_id) + }; + let (prompt, options) = match pending { + Some(pending) => (pending.prompt, pending.options), + None => ( + "Mock question prompt".to_string(), + vec!["Option A".to_string(), "Option B".to_string()], + ), + }; + let response = answers + .first() + .and_then(|inner| inner.first()) + .cloned(); + let resolved = EventConversion::new( + UniversalEventType::QuestionResolved, + UniversalEventData::Question(QuestionEventData { + question_id: question_id.to_string(), + prompt, + options, + response, + status: QuestionStatus::Answered, + }), + ) + .synthetic(); + let _ = self.record_conversions(session_id, vec![resolved]).await; + return Ok(()); + } + let (agent, native_session_id, pending_question) = { let mut sessions = self.sessions.lock().await; let session = sessions.get_mut(session_id).ok_or_else(|| SandboxError::SessionNotFound { @@ -891,6 +993,39 @@ impl SessionManager { session_id: &str, question_id: &str, ) -> Result<(), SandboxError> { + if self.mock.enabled { + let pending = { + let mut sessions = self.sessions.lock().await; + let session = sessions.get_mut(session_id).ok_or_else(|| SandboxError::SessionNotFound { + session_id: session_id.to_string(), + })?; + if let Some(err) = session.ended_error() { + return Err(err); + } + session.take_question(question_id) + }; + let (prompt, options) = match pending { + Some(pending) => (pending.prompt, pending.options), + None => ( + "Mock question prompt".to_string(), + vec!["Option A".to_string(), "Option B".to_string()], + ), + }; + let resolved = EventConversion::new( + UniversalEventType::QuestionResolved, + UniversalEventData::Question(QuestionEventData { + question_id: question_id.to_string(), + prompt, + options, + response: None, + status: QuestionStatus::Rejected, + }), + ) + .synthetic(); + let _ = self.record_conversions(session_id, vec![resolved]).await; + return Ok(()); + } + let (agent, native_session_id, pending_question) = { let mut sessions = self.sessions.lock().await; let session = sessions.get_mut(session_id).ok_or_else(|| SandboxError::SessionNotFound { @@ -945,6 +1080,40 @@ impl SessionManager { permission_id: &str, reply: PermissionReply, ) -> Result<(), SandboxError> { + if self.mock.enabled { + let pending = { + let mut sessions = self.sessions.lock().await; + let session = sessions.get_mut(session_id).ok_or_else(|| SandboxError::SessionNotFound { + session_id: session_id.to_string(), + })?; + if let Some(err) = session.ended_error() { + return Err(err); + } + session.take_permission(permission_id) + }; + + let (action, metadata) = match pending { + Some(pending) => (pending.action, pending.metadata), + None => ("mock.permission".to_string(), None), + }; + let status = match reply { + PermissionReply::Reject => PermissionStatus::Denied, + PermissionReply::Once | PermissionReply::Always => PermissionStatus::Approved, + }; + let resolved = EventConversion::new( + UniversalEventType::PermissionResolved, + UniversalEventData::Permission(PermissionEventData { + permission_id: permission_id.to_string(), + action, + status, + metadata, + }), + ) + .synthetic(); + let _ = self.record_conversions(session_id, vec![resolved]).await; + return Ok(()); + } + let reply_for_status = reply.clone(); let (agent, native_session_id, codex_sender, pending_permission) = { let mut sessions = self.sessions.lock().await; @@ -1055,6 +1224,50 @@ impl SessionManager { Ok(()) } + async fn run_mock_loop(self: Arc, session_id: String) { + let mut cycle = 0_u64; + let event_delay = self.mock.event_delay; + let loop_delay = self.mock.loop_delay; + + loop { + if self.is_session_ended(&session_id).await { + return; + } + let snapshot = match self.session_snapshot(&session_id, true).await { + Ok(snapshot) => snapshot, + Err(_) => return, + }; + cycle = cycle.saturating_add(1); + let conversions = mock_event_conversions(cycle, &snapshot); + for conversion in conversions { + if self + .record_conversions(&session_id, vec![conversion]) + .await + .is_err() + { + return; + } + if event_delay != Duration::ZERO { + sleep(event_delay).await; + } + if self.is_session_ended(&session_id).await { + return; + } + } + if loop_delay != Duration::ZERO { + sleep(loop_delay).await; + } + } + } + + async fn is_session_ended(&self, session_id: &str) -> bool { + let sessions = self.sessions.lock().await; + match sessions.get(session_id) { + Some(session) => session.ended, + None => true, + } + } + async fn session_snapshot( &self, session_id: &str, @@ -1756,10 +1969,22 @@ pub struct AgentModesResponse { #[derive(Debug, Clone, Serialize, Deserialize, ToSchema, JsonSchema)] #[serde(rename_all = "camelCase")] pub struct AgentCapabilities { + // TODO: add agent-agnostic tests that cover every capability flag here. pub plan_mode: bool, pub permissions: bool, pub questions: bool, pub tool_calls: bool, + pub tool_results: bool, + pub text_messages: bool, + pub images: bool, + pub file_attachments: bool, + pub session_lifecycle: bool, + pub error_events: bool, + pub reasoning: bool, + pub command_execution: bool, + pub file_changes: bool, + pub mcp_tools: bool, + pub streaming_deltas: bool, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema, JsonSchema)] @@ -2246,24 +2471,68 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { permissions: false, questions: false, tool_calls: false, + tool_results: false, + text_messages: true, + images: false, + file_attachments: false, + session_lifecycle: false, + error_events: false, + reasoning: false, + command_execution: false, + file_changes: false, + mcp_tools: false, + streaming_deltas: false, }, AgentId::Codex => AgentCapabilities { plan_mode: true, permissions: true, questions: false, tool_calls: true, + tool_results: true, + text_messages: true, + images: true, + file_attachments: true, + session_lifecycle: true, + error_events: true, + reasoning: true, + command_execution: true, + file_changes: true, + mcp_tools: true, + streaming_deltas: true, }, AgentId::Opencode => AgentCapabilities { plan_mode: false, permissions: false, questions: false, tool_calls: true, + tool_results: true, + text_messages: true, + images: true, + file_attachments: true, + session_lifecycle: true, + error_events: true, + reasoning: false, + command_execution: false, + file_changes: false, + mcp_tools: false, + streaming_deltas: true, }, AgentId::Amp => AgentCapabilities { plan_mode: false, permissions: false, questions: false, tool_calls: true, + tool_results: true, + text_messages: true, + images: false, + file_attachments: false, + session_lifecycle: false, + error_events: true, + reasoning: false, + command_execution: false, + file_changes: false, + mcp_tools: false, + streaming_deltas: false, }, } } @@ -3172,6 +3441,441 @@ fn text_delta_from_parts(parts: &[ContentPart]) -> Option { } } +fn mock_event_conversions(loop_index: u64, session: &SessionSnapshot) -> Vec { + let prefix = format!("mock_{loop_index}"); + let system_native = format!("{prefix}_system"); + let user_native = format!("{prefix}_user"); + let assistant_native = format!("{prefix}_assistant"); + let status_native = format!("{prefix}_status"); + let tool_call_native = format!("{prefix}_tool_call"); + let tool_result_native = format!("{prefix}_tool_result"); + let image_native = format!("{prefix}_image"); + let unknown_native = format!("{prefix}_unknown"); + let permission_id = format!("{prefix}_permission"); + let permission_deny_id = format!("{prefix}_permission_denied"); + let question_id = format!("{prefix}_question"); + let question_reject_id = format!("{prefix}_question_reject"); + let call_id = format!("{prefix}_call"); + + let metadata = json!({ + "agent": session.agent.as_str(), + "agentMode": session.agent_mode.clone(), + "permissionMode": session.permission_mode.clone(), + "model": session.model.clone(), + "variant": session.variant.clone(), + "mockCycle": loop_index, + }); + + let mut events = Vec::new(); + + events.push( + EventConversion::new( + UniversalEventType::SessionStarted, + UniversalEventData::SessionStarted(SessionStartedData { + metadata: Some(metadata), + }), + ) + .synthetic(), + ); + + events.push(mock_item_event( + UniversalEventType::ItemStarted, + mock_item( + system_native.clone(), + ItemKind::System, + ItemRole::System, + ItemStatus::InProgress, + vec![ContentPart::Text { + text: "System ready for mock events.".to_string(), + }], + ), + )); + events.push(mock_item_event( + UniversalEventType::ItemCompleted, + mock_item( + system_native, + ItemKind::System, + ItemRole::System, + ItemStatus::Completed, + vec![ContentPart::Text { + text: "System ready for mock events.".to_string(), + }], + ), + )); + + events.push(mock_item_event( + UniversalEventType::ItemStarted, + mock_item( + user_native.clone(), + ItemKind::Message, + ItemRole::User, + ItemStatus::InProgress, + vec![ContentPart::Text { + text: "User: run the mock pipeline.".to_string(), + }], + ), + )); + events.push(mock_item_event( + UniversalEventType::ItemCompleted, + mock_item( + user_native, + ItemKind::Message, + ItemRole::User, + ItemStatus::Completed, + vec![ContentPart::Text { + text: "User: run the mock pipeline.".to_string(), + }], + ), + )); + + let assistant_parts = vec![ + ContentPart::Text { + text: "Mock assistant response with rich content.".to_string(), + }, + ContentPart::Reasoning { + text: "Public reasoning for display.".to_string(), + visibility: ReasoningVisibility::Public, + }, + ContentPart::Reasoning { + text: "Private reasoning hidden by default.".to_string(), + visibility: ReasoningVisibility::Private, + }, + ContentPart::Json { + json: json!({ + "stage": "analysis", + "ok": true, + "cycle": loop_index + }), + }, + ]; + events.push(mock_item_event( + UniversalEventType::ItemStarted, + mock_item( + assistant_native.clone(), + ItemKind::Message, + ItemRole::Assistant, + ItemStatus::InProgress, + assistant_parts.clone(), + ), + )); + events.push(mock_delta(assistant_native.clone(), "Mock assistant ")); + events.push(mock_delta(assistant_native.clone(), "streaming delta.")); + events.push(mock_item_event( + UniversalEventType::ItemCompleted, + mock_item( + assistant_native, + ItemKind::Message, + ItemRole::Assistant, + ItemStatus::Completed, + assistant_parts, + ), + )); + + events.push(mock_item_event( + UniversalEventType::ItemStarted, + mock_item( + status_native.clone(), + ItemKind::Status, + ItemRole::Assistant, + ItemStatus::InProgress, + vec![ContentPart::Status { + label: "Indexing".to_string(), + detail: Some("2 files".to_string()), + }], + ), + )); + events.push(mock_item_event( + UniversalEventType::ItemCompleted, + mock_item( + status_native, + ItemKind::Status, + ItemRole::Assistant, + ItemStatus::Completed, + vec![ContentPart::Status { + label: "Indexing".to_string(), + detail: Some("Done".to_string()), + }], + ), + )); + + let tool_call_part = ContentPart::ToolCall { + name: "mock.search".to_string(), + arguments: "{\"query\":\"example\"}".to_string(), + call_id: call_id.clone(), + }; + events.push(mock_item_event( + UniversalEventType::ItemStarted, + mock_item( + tool_call_native.clone(), + ItemKind::ToolCall, + ItemRole::Assistant, + ItemStatus::InProgress, + vec![tool_call_part.clone()], + ), + )); + events.push(mock_item_event( + UniversalEventType::ItemCompleted, + mock_item( + tool_call_native, + ItemKind::ToolCall, + ItemRole::Assistant, + ItemStatus::Completed, + vec![tool_call_part], + ), + )); + + let tool_result_parts = vec![ + ContentPart::ToolResult { + call_id: call_id.clone(), + output: "mock search results".to_string(), + }, + ContentPart::FileRef { + path: format!("{prefix}/readme.md"), + action: FileAction::Read, + diff: None, + }, + ContentPart::FileRef { + path: format!("{prefix}/output.txt"), + action: FileAction::Write, + diff: Some("+mock output\n".to_string()), + }, + ContentPart::FileRef { + path: format!("{prefix}/patch.txt"), + action: FileAction::Patch, + diff: Some("@@ -1,1 +1,1 @@\n-old\n+new\n".to_string()), + }, + ]; + events.push(mock_item_event( + UniversalEventType::ItemStarted, + mock_item( + tool_result_native.clone(), + ItemKind::ToolResult, + ItemRole::Tool, + ItemStatus::InProgress, + tool_result_parts.clone(), + ), + )); + events.push(mock_item_event( + UniversalEventType::ItemCompleted, + mock_item( + tool_result_native, + ItemKind::ToolResult, + ItemRole::Tool, + ItemStatus::Failed, + tool_result_parts, + ), + )); + + let image_parts = vec![ + ContentPart::Text { + text: "Here is a mock image output.".to_string(), + }, + ContentPart::Image { + path: format!("{prefix}/image.png"), + mime: Some("image/png".to_string()), + }, + ]; + events.push(mock_item_event( + UniversalEventType::ItemStarted, + mock_item( + image_native.clone(), + ItemKind::Message, + ItemRole::Assistant, + ItemStatus::InProgress, + image_parts.clone(), + ), + )); + events.push(mock_item_event( + UniversalEventType::ItemCompleted, + mock_item( + image_native, + ItemKind::Message, + ItemRole::Assistant, + ItemStatus::Completed, + image_parts, + ), + )); + + events.push(mock_item_event( + UniversalEventType::ItemStarted, + mock_item( + unknown_native.clone(), + ItemKind::Unknown, + ItemRole::Assistant, + ItemStatus::InProgress, + vec![ContentPart::Text { + text: "Unknown item kind example.".to_string(), + }], + ), + )); + events.push(mock_item_event( + UniversalEventType::ItemCompleted, + mock_item( + unknown_native, + ItemKind::Unknown, + ItemRole::Assistant, + ItemStatus::Completed, + vec![ContentPart::Text { + text: "Unknown item kind example.".to_string(), + }], + ), + )); + + let permission_metadata = json!({ + "codexRequestKind": "commandExecution", + "command": "echo mock" + }); + events.push(EventConversion::new( + UniversalEventType::PermissionRequested, + UniversalEventData::Permission(PermissionEventData { + permission_id: permission_id.clone(), + action: "command_execution".to_string(), + status: PermissionStatus::Requested, + metadata: Some(permission_metadata), + }), + )); + events.push(EventConversion::new( + UniversalEventType::PermissionResolved, + UniversalEventData::Permission(PermissionEventData { + permission_id: permission_id, + action: "command_execution".to_string(), + status: PermissionStatus::Approved, + metadata: None, + }), + )); + + let permission_metadata_deny = json!({ + "codexRequestKind": "fileChange", + "path": format!("{prefix}/deny.txt") + }); + events.push(EventConversion::new( + UniversalEventType::PermissionRequested, + UniversalEventData::Permission(PermissionEventData { + permission_id: permission_deny_id.clone(), + action: "file_change".to_string(), + status: PermissionStatus::Requested, + metadata: Some(permission_metadata_deny), + }), + )); + events.push(EventConversion::new( + UniversalEventType::PermissionResolved, + UniversalEventData::Permission(PermissionEventData { + permission_id: permission_deny_id, + action: "file_change".to_string(), + status: PermissionStatus::Denied, + metadata: None, + }), + )); + + events.push(EventConversion::new( + UniversalEventType::QuestionRequested, + UniversalEventData::Question(QuestionEventData { + question_id: question_id.clone(), + prompt: "Choose a color".to_string(), + options: vec!["Red".to_string(), "Blue".to_string()], + response: None, + status: QuestionStatus::Requested, + }), + )); + events.push(EventConversion::new( + UniversalEventType::QuestionResolved, + UniversalEventData::Question(QuestionEventData { + question_id: question_id, + prompt: "Choose a color".to_string(), + options: vec!["Red".to_string(), "Blue".to_string()], + response: Some("Blue".to_string()), + status: QuestionStatus::Answered, + }), + )); + + events.push(EventConversion::new( + UniversalEventType::QuestionRequested, + UniversalEventData::Question(QuestionEventData { + question_id: question_reject_id.clone(), + prompt: "Allow mock experiment?".to_string(), + options: vec!["Yes".to_string(), "No".to_string()], + response: None, + status: QuestionStatus::Requested, + }), + )); + events.push(EventConversion::new( + UniversalEventType::QuestionResolved, + UniversalEventData::Question(QuestionEventData { + question_id: question_reject_id, + prompt: "Allow mock experiment?".to_string(), + options: vec!["Yes".to_string(), "No".to_string()], + response: None, + status: QuestionStatus::Rejected, + }), + )); + + events.push( + EventConversion::new( + UniversalEventType::Error, + UniversalEventData::Error(ErrorData { + message: "Mock error event.".to_string(), + code: Some("mock_error".to_string()), + details: Some(json!({ "cycle": loop_index })), + }), + ) + .synthetic(), + ); + events.push(agent_unparsed( + "mock.stream", + "unsupported payload", + json!({ "raw": "mock" }), + )); + + events.push( + EventConversion::new( + UniversalEventType::SessionEnded, + UniversalEventData::SessionEnded(SessionEndedData { + reason: SessionEndReason::Completed, + terminated_by: TerminatedBy::Agent, + }), + ) + .synthetic(), + ); + + events +} + +fn mock_item( + native_item_id: String, + kind: ItemKind, + role: ItemRole, + status: ItemStatus, + content: Vec, +) -> UniversalItem { + UniversalItem { + item_id: String::new(), + native_item_id: Some(native_item_id), + parent_id: None, + kind, + role: Some(role), + content, + status, + } +} + +fn mock_item_event(event_type: UniversalEventType, item: UniversalItem) -> EventConversion { + EventConversion::new( + event_type, + UniversalEventData::Item(ItemEventData { item }), + ) +} + +fn mock_delta(native_item_id: String, delta: &str) -> EventConversion { + EventConversion::new( + UniversalEventType::ItemDelta, + UniversalEventData::ItemDelta(ItemDeltaData { + item_id: String::new(), + native_item_id: Some(native_item_id), + delta: delta.to_string(), + }), + ) +} + fn agent_unparsed(location: &str, error: &str, raw: Value) -> EventConversion { EventConversion::new( UniversalEventType::AgentUnparsed, diff --git a/server/packages/sandbox-agent/tests/agent_agnostic.rs b/server/packages/sandbox-agent/tests/agent_agnostic.rs index 1de37dc..1f28143 100644 --- a/server/packages/sandbox-agent/tests/agent_agnostic.rs +++ b/server/packages/sandbox-agent/tests/agent_agnostic.rs @@ -17,6 +17,7 @@ use sandbox_agent::router::{ AgentCapabilities, AgentListResponse, AuthConfig, + MockConfig, }; const PROMPT: &str = "Reply with exactly the single word OK."; @@ -41,7 +42,11 @@ impl TestApp { let install_dir = tempfile::tempdir().expect("create temp install dir"); let manager = AgentManager::new(install_dir.path()) .expect("create agent manager"); - let state = sandbox_agent::router::AppState::new(AuthConfig::disabled(), manager); + let state = sandbox_agent::router::AppState::new( + AuthConfig::disabled(), + manager, + MockConfig::disabled(), + ); let app = build_router(state); Self { app, diff --git a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs index 8352f2b..73bf69b 100644 --- a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs +++ b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs @@ -12,7 +12,7 @@ use tempfile::TempDir; use sandbox_agent_agent_management::agents::{AgentId, AgentManager}; use sandbox_agent_agent_management::testing::{test_agents_from_env, TestAgentConfig}; use sandbox_agent_agent_credentials::ExtractedCredentials; -use sandbox_agent::router::{build_router, AppState, AuthConfig}; +use sandbox_agent::router::{build_router, AppState, AuthConfig, MockConfig}; use tower::util::ServiceExt; use tower_http::cors::CorsLayer; @@ -39,7 +39,7 @@ impl TestApp { let install_dir = tempfile::tempdir().expect("create temp install dir"); let manager = AgentManager::new(install_dir.path()) .expect("create agent manager"); - let state = AppState::new(auth, manager); + let state = AppState::new(auth, manager, MockConfig::disabled()); let mut app = build_router(state); if let Some(cors) = cors { app = app.layer(cors); diff --git a/server/packages/sandbox-agent/tests/inspector_ui.rs b/server/packages/sandbox-agent/tests/inspector_ui.rs index f98a758..91776f6 100644 --- a/server/packages/sandbox-agent/tests/inspector_ui.rs +++ b/server/packages/sandbox-agent/tests/inspector_ui.rs @@ -2,7 +2,7 @@ use axum::body::Body; use axum::http::{Request, StatusCode}; use http_body_util::BodyExt; use sandbox_agent_agent_management::agents::AgentManager; -use sandbox_agent::router::{build_router, AppState, AuthConfig}; +use sandbox_agent::router::{build_router, AppState, AuthConfig, MockConfig}; use sandbox_agent::ui; use tempfile::TempDir; use tower::util::ServiceExt; @@ -15,7 +15,7 @@ async fn serves_inspector_ui() { let install_dir = TempDir::new().expect("create temp install dir"); let manager = AgentManager::new(install_dir.path()).expect("create agent manager"); - let state = AppState::new(AuthConfig::disabled(), manager); + let state = AppState::new(AuthConfig::disabled(), manager, MockConfig::disabled()); let app = build_router(state); let request = Request::builder() diff --git a/spec/im-not-sure.md b/spec/im-not-sure.md deleted file mode 100644 index 290178a..0000000 --- a/spec/im-not-sure.md +++ /dev/null @@ -1,5 +0,0 @@ -# Open Questions / Ambiguities - -- OpenCode server HTTP paths and payloads may differ; current implementation assumes `POST /session`, `POST /session/{id}/prompt`, and `GET /event/subscribe` with JSON `data:` SSE frames. -- OpenCode question/permission reply endpoints are assumed as `POST /question/reply`, `/question/reject`, `/permission/reply` with `requestID` fields; confirm actual API shape. -- SSE events may not always include `sessionID`/`sessionId` fields; confirm if filtering should use a different field. diff --git a/spec/required-tests.md b/spec/required-tests.md deleted file mode 100644 index 96a144f..0000000 --- a/spec/required-tests.md +++ /dev/null @@ -1,7 +0,0 @@ -# Required Tests - -- Session manager streams JSONL line-by-line for Claude/Codex/Amp and yields incremental events. -- `/sessions/{id}/messages` returns immediately while background ingestion populates `/events` and `/events/sse`. -- SSE subscription delivers live events after the initial offset batch. -- OpenCode server mode: create session, send prompt, and receive SSE events filtered to the session. -- OpenCode question/permission reply endpoints forward to server APIs. diff --git a/spec/universal-schema.json b/spec/universal-schema.json deleted file mode 100644 index 0170a82..0000000 --- a/spec/universal-schema.json +++ /dev/null @@ -1,553 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "UniversalEvent", - "type": "object", - "required": [ - "data", - "event_id", - "sequence", - "session_id", - "source", - "synthetic", - "time", - "type" - ], - "properties": { - "data": { - "$ref": "#/definitions/UniversalEventData" - }, - "event_id": { - "type": "string" - }, - "native_session_id": { - "type": [ - "string", - "null" - ] - }, - "raw": true, - "sequence": { - "type": "integer", - "format": "uint64", - "minimum": 0.0 - }, - "session_id": { - "type": "string" - }, - "source": { - "$ref": "#/definitions/EventSource" - }, - "synthetic": { - "type": "boolean" - }, - "time": { - "type": "string" - }, - "type": { - "$ref": "#/definitions/UniversalEventType" - } - }, - "definitions": { - "AgentUnparsedData": { - "type": "object", - "required": [ - "error", - "location" - ], - "properties": { - "error": { - "type": "string" - }, - "location": { - "type": "string" - }, - "raw_hash": { - "type": [ - "string", - "null" - ] - } - } - }, - "ContentPart": { - "oneOf": [ - { - "type": "object", - "required": [ - "text", - "type" - ], - "properties": { - "text": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "text" - ] - } - } - }, - { - "type": "object", - "required": [ - "json", - "type" - ], - "properties": { - "json": true, - "type": { - "type": "string", - "enum": [ - "json" - ] - } - } - }, - { - "type": "object", - "required": [ - "arguments", - "call_id", - "name", - "type" - ], - "properties": { - "arguments": { - "type": "string" - }, - "call_id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "tool_call" - ] - } - } - }, - { - "type": "object", - "required": [ - "call_id", - "output", - "type" - ], - "properties": { - "call_id": { - "type": "string" - }, - "output": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "tool_result" - ] - } - } - }, - { - "type": "object", - "required": [ - "action", - "path", - "type" - ], - "properties": { - "action": { - "$ref": "#/definitions/FileAction" - }, - "diff": { - "type": [ - "string", - "null" - ] - }, - "path": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "file_ref" - ] - } - } - }, - { - "type": "object", - "required": [ - "text", - "type", - "visibility" - ], - "properties": { - "text": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "reasoning" - ] - }, - "visibility": { - "$ref": "#/definitions/ReasoningVisibility" - } - } - }, - { - "type": "object", - "required": [ - "path", - "type" - ], - "properties": { - "mime": { - "type": [ - "string", - "null" - ] - }, - "path": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "image" - ] - } - } - }, - { - "type": "object", - "required": [ - "label", - "type" - ], - "properties": { - "detail": { - "type": [ - "string", - "null" - ] - }, - "label": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "status" - ] - } - } - } - ] - }, - "ErrorData": { - "type": "object", - "required": [ - "message" - ], - "properties": { - "code": { - "type": [ - "string", - "null" - ] - }, - "details": true, - "message": { - "type": "string" - } - } - }, - "EventSource": { - "type": "string", - "enum": [ - "agent", - "daemon" - ] - }, - "FileAction": { - "type": "string", - "enum": [ - "read", - "write", - "patch" - ] - }, - "ItemDeltaData": { - "type": "object", - "required": [ - "delta", - "item_id" - ], - "properties": { - "delta": { - "type": "string" - }, - "item_id": { - "type": "string" - }, - "native_item_id": { - "type": [ - "string", - "null" - ] - } - } - }, - "ItemEventData": { - "type": "object", - "required": [ - "item" - ], - "properties": { - "item": { - "$ref": "#/definitions/UniversalItem" - } - } - }, - "ItemKind": { - "type": "string", - "enum": [ - "message", - "tool_call", - "tool_result", - "system", - "status", - "unknown" - ] - }, - "ItemRole": { - "type": "string", - "enum": [ - "user", - "assistant", - "system", - "tool" - ] - }, - "ItemStatus": { - "type": "string", - "enum": [ - "in_progress", - "completed", - "failed" - ] - }, - "PermissionEventData": { - "type": "object", - "required": [ - "action", - "permission_id", - "status" - ], - "properties": { - "action": { - "type": "string" - }, - "metadata": true, - "permission_id": { - "type": "string" - }, - "status": { - "$ref": "#/definitions/PermissionStatus" - } - } - }, - "PermissionStatus": { - "type": "string", - "enum": [ - "requested", - "approved", - "denied" - ] - }, - "QuestionEventData": { - "type": "object", - "required": [ - "options", - "prompt", - "question_id", - "status" - ], - "properties": { - "options": { - "type": "array", - "items": { - "type": "string" - } - }, - "prompt": { - "type": "string" - }, - "question_id": { - "type": "string" - }, - "response": { - "type": [ - "string", - "null" - ] - }, - "status": { - "$ref": "#/definitions/QuestionStatus" - } - } - }, - "QuestionStatus": { - "type": "string", - "enum": [ - "requested", - "answered", - "rejected" - ] - }, - "ReasoningVisibility": { - "type": "string", - "enum": [ - "public", - "private" - ] - }, - "SessionEndReason": { - "type": "string", - "enum": [ - "completed", - "error", - "terminated" - ] - }, - "SessionEndedData": { - "type": "object", - "required": [ - "reason", - "terminated_by" - ], - "properties": { - "reason": { - "$ref": "#/definitions/SessionEndReason" - }, - "terminated_by": { - "$ref": "#/definitions/TerminatedBy" - } - } - }, - "SessionStartedData": { - "type": "object", - "properties": { - "metadata": true - } - }, - "TerminatedBy": { - "type": "string", - "enum": [ - "agent", - "daemon" - ] - }, - "UniversalEventData": { - "anyOf": [ - { - "$ref": "#/definitions/SessionStartedData" - }, - { - "$ref": "#/definitions/SessionEndedData" - }, - { - "$ref": "#/definitions/ItemEventData" - }, - { - "$ref": "#/definitions/ItemDeltaData" - }, - { - "$ref": "#/definitions/ErrorData" - }, - { - "$ref": "#/definitions/PermissionEventData" - }, - { - "$ref": "#/definitions/QuestionEventData" - }, - { - "$ref": "#/definitions/AgentUnparsedData" - } - ] - }, - "UniversalEventType": { - "type": "string", - "enum": [ - "session.started", - "session.ended", - "item.started", - "item.delta", - "item.completed", - "error", - "permission.requested", - "permission.resolved", - "question.requested", - "question.resolved", - "agent.unparsed" - ] - }, - "UniversalItem": { - "type": "object", - "required": [ - "content", - "item_id", - "kind", - "status" - ], - "properties": { - "content": { - "type": "array", - "items": { - "$ref": "#/definitions/ContentPart" - } - }, - "item_id": { - "type": "string" - }, - "kind": { - "$ref": "#/definitions/ItemKind" - }, - "native_item_id": { - "type": [ - "string", - "null" - ] - }, - "parent_id": { - "type": [ - "string", - "null" - ] - }, - "role": { - "anyOf": [ - { - "$ref": "#/definitions/ItemRole" - }, - { - "type": "null" - } - ] - }, - "status": { - "$ref": "#/definitions/ItemStatus" - } - } - } - } -} \ No newline at end of file diff --git a/spec/universal-schema.md b/spec/universal-schema.md deleted file mode 100644 index ba73122..0000000 --- a/spec/universal-schema.md +++ /dev/null @@ -1,143 +0,0 @@ -# Universal Schema (Single Version, Breaking) - -This document defines the canonical universal session + event model. It replaces prior versions; there is no v2. The design prioritizes compatibility with native agent APIs and fills gaps with explicit synthetics. - -Principles -- Most-compatible-first: choose semantics that map cleanly to native APIs (Codex/OpenCode/Amp/Claude). -- Uniform behavior: clients should not special-case agents; the daemon normalizes differences. -- Synthetics fill gaps: when a provider lacks a feature (session start/end, deltas, user messages), we synthesize events with `source=daemon`. -- Raw preservation: always keep native payloads in `raw` for agent-sourced events. -- UI coverage: update the inspector/UI to the new schema and ensure UI tests cover all session features (messages, deltas, tools, permissions, questions, errors, termination). - -Identifiers -- session_id: daemon-generated session identifier. -- native_session_id: provider thread/session/run identifier (thread_id is merged here). -- item_id: daemon-generated identifier for any universal item. -- native_item_id: provider-native item/message identifier if available; otherwise null. - -Event envelope -```json -{ - "event_id": "evt_...", - "sequence": 42, - "time": "2026-01-27T19:10:11Z", - "session_id": "sess_...", - "native_session_id": "provider_...", - "synthetic": false, - "source": "agent|daemon", - "type": "session.started|session.ended|item.started|item.delta|item.completed|error|permission.requested|permission.resolved|question.requested|question.resolved|agent.unparsed", - "data": { "..." : "..." }, - "raw": { "..." : "..." } -} -``` - -Notes: -- `source=agent` for native events; `source=daemon` for synthetics. -- `synthetic` is always present and mirrors whether the event is daemon-produced. -- `raw` is always present. It may be null unless the client opts in to raw payloads; when opt-in is enabled, raw is populated for all events. -- For synthetic events derived from native payloads, include the underlying payload in `raw` when possible. -- Parsing failures emit agent.unparsed (source=daemon, synthetic=true) and should be treated as test failures. - -Raw payload opt-in -- Events endpoints accept `include_raw=true` to populate the `raw` field. -- When `include_raw` is not set or false, `raw` is still present but null. -- Applies to both HTTP and SSE event streams. - -Item model -```json -{ - "item_id": "itm_...", - "native_item_id": "provider_item_...", - "parent_id": "itm_parent_or_null", - "kind": "message|tool_call|tool_result|system|status|unknown", - "role": "user|assistant|system|tool|null", - "content": [ { "type": "...", "...": "..." } ], - "status": "in_progress|completed|failed" -} -``` - -Content parts (non-exhaustive; extend as needed) -- text: `{ "type": "text", "text": "..." }` -- json: `{ "type": "json", "json": { ... } }` -- tool_call: `{ "type": "tool_call", "name": "...", "arguments": "...", "call_id": "..." }` -- tool_result: `{ "type": "tool_result", "call_id": "...", "output": "..." }` -- file_ref: `{ "type": "file_ref", "path": "...", "action": "read|write|patch", "diff": "..." }` -- reasoning: `{ "type": "reasoning", "text": "...", "visibility": "public|private" }` -- image: `{ "type": "image", "path": "...", "mime": "..." }` -- status: `{ "type": "status", "label": "...", "detail": "..." }` - -Event types - -session.started -```json -{ "metadata": { "...": "..." } } -``` - -session.ended -```json -{ "reason": "completed|error|terminated", "terminated_by": "agent|daemon" } -``` - -item.started -```json -{ "item": { ...Item } } -``` - -item.delta -```json -{ "item_id": "itm_...", "native_item_id": "provider_item_or_null", "delta": "text fragment" } -``` - -item.completed -```json -{ "item": { ...Item } } -``` - -error -```json -{ "message": "...", "code": "optional", "details": { "...": "..." } } -``` - -agent.unparsed -```json -{ "error": "parse failure message", "location": "agent parser name", "raw_hash": "optional" } -``` - -permission.requested / permission.resolved -```json -{ "permission_id": "...", "action": "...", "status": "requested|approved|denied", "metadata": { "...": "..." } } -``` - -question.requested / question.resolved -```json -{ "question_id": "...", "prompt": "...", "options": ["..."], "response": "...", "status": "requested|answered|rejected" } -``` - -Delta policy (uniform across agents) -- Always emit item.delta for messages. -- For agents without native deltas (Claude/Amp), emit a single synthetic delta containing the full final content immediately before item.completed. -- For Codex/OpenCode, forward native deltas as-is and still emit item.completed with the final content. - -User messages -- If the provider emits user messages (Codex/OpenCode/Amp), map directly to message items with role=user. -- If the provider does not emit user messages (Claude), synthesize user message items from the input we send; mark source=daemon and set native_item_id=null. - -Tool normalization -- Tool calls/results are always emitted as their own items (kind=tool_call/tool_result) with parent_id pointing to the originating message item. -- Codex: mcp tool call progress and tool items map directly. -- OpenCode: tool parts in message.part.updated are mapped into tool items with lifecycle states. -- Amp: tool_call/tool_result map directly. -- Claude: synthesize tool items from CLI tool usage where possible; if insufficient, omit tool items and preserve raw payloads. - -OpenCode ordering rule -- OpenCode may emit message.part.updated before message.updated. -- When a part delta arrives first, create a stub item.started (source=daemon) for the parent message item, then emit item.delta. - -Session lifecycle -- If an agent does not emit a session start/end, emit session.started/session.ended synthetically (source=daemon). -- session.ended uses terminated_by=daemon when our termination API is used; terminated_by=agent when the provider ends the session. - -Native ID mapping -- native_session_id is the only provider session identifier. -- native_item_id preserves the provider item/message id when available; otherwise null. -- item_id is always daemon-generated. diff --git a/todo.md b/todo.md index 8e659fa..dffeefd 100644 --- a/todo.md +++ b/todo.md @@ -1,116 +1,4 @@ -# TODO (from spec.md) +# Todo -## Universal API + Types -- [x] Define universal base types for agent input/output (common denominator across schemas) -- [x] Add universal question + permission types (HITL) and ensure they are supported end-to-end -- [x] Define `UniversalEvent` + `UniversalEventData` union and `AgentError` shape -- [x] Define a universal message type for "failed to parse" with raw JSON payload -- [x] Implement 2-way converters: - - [x] Universal input message <-> agent-specific input - - [x] Universal event <-> agent-specific event -- [x] Normalize Claude system/init events into universal started events -- [x] Support Codex CLI type-based event format in universal converter -- [x] Enforce agentMode vs permissionMode semantics + defaults at the API boundary -- [x] Ensure session id vs agentSessionId semantics are respected and surfaced consistently - -## Daemon (Rust HTTP server) -- [x] Build axum router + utoipa + schemars integration -- [x] Implement RFC 7807 Problem Details error responses backed by a `thiserror` enum -- [x] Implement canonical error `type` values + required error variants from spec -- [x] Implement offset semantics for events (exclusive last-seen id, default offset 0) -- [x] Implement SSE endpoint for events with same semantics as JSON endpoint -- [x] Replace in-memory session store with sandbox session manager (questions/permissions routing, long-lived processes) -- [x] Remove legacy token header support -- [x] Embed inspector frontend and serve it at `/ui` -- [x] Log inspector URL when starting the HTTP server - -## CLI -- [x] Implement clap CLI flags: `--token`, `--no-token`, `--host`, `--port`, CORS flags -- [x] Implement a CLI endpoint for every HTTP endpoint -- [x] Update `CLAUDE.md` to keep CLI endpoints in sync with HTTP API changes -- [x] Prefix CLI API requests with `/v1` -- [x] Add CLI credentials extractor subcommand -- [x] Move daemon startup to `server` subcommand -- [x] Add `sandbox-daemon` CLI alias - -## HTTP API Endpoints -- [x] POST `/agents/{}/install` with `reinstall` handling -- [x] GET `/agents/{}/modes` (mode discovery or hardcoded) -- [x] GET `/agents` (installed/version/path; version checked at request time) -- [x] POST `/sessions/{}` (create session, install if needed, return health + agentSessionId) -- [x] POST `/sessions/{}/messages` (send prompt) -- [x] GET `/sessions/{}/events` (pagination with offset/limit) -- [x] GET `/sessions/{}/events/sse` (streaming) -- [x] POST `/sessions/{}/questions/{questionId}/reply` -- [x] POST `/sessions/{}/questions/{questionId}/reject` -- [x] POST `/sessions/{}/permissions/{permissionId}/reply` -- [x] Prefix all HTTP API endpoints with `/v1` - -## Agent Management -- [x] Implement install/version/spawn basics for Claude/Codex/OpenCode/Amp -- [x] Implement agent install URL patterns + platform mappings for supported OS/arch -- [x] Parse JSONL output for subprocess agents and extract session/result metadata -- [x] Migrate Codex subprocess to App Server JSON-RPC protocol -- [x] Map permissionMode to agent CLI flags (Claude/Codex/Amp) -- [x] Implement session resume flags for Claude/OpenCode/Amp (Codex unsupported) -- [x] Replace sandbox-agent core agent modules with new agent-management crate (delete originals) -- [x] Stabilize agent-management crate API and fix build issues (sandbox-agent currently wired to WIP crate) -- [x] Implement OpenCode shared server lifecycle (`opencode serve`, health, restart) -- [x] Implement OpenCode HTTP session APIs + SSE event stream integration -- [x] Implement JSONL parsing for subprocess agents and map to `UniversalEvent` -- [x] Capture agent session id from events and expose as `agentSessionId` -- [x] Handle agent process exit and map to `agent_process_exited` error -- [x] Implement agentMode discovery rules (OpenCode API, hardcoded others) -- [x] Enforce permissionMode behavior (default/plan/bypass) for subprocesses - -## Credentials -- [x] Implement credential extraction module (Claude/Codex/OpenCode) -- [x] Add Amp credential extraction (config-based) -- [x] Move credential extraction into `agent-credentials` crate -- [ ] Pass extracted credentials into subprocess env vars per agent -- [ ] Ensure OpenCode server reads credentials from config on startup - -## Testing -- [ ] Build a universal agent test suite that exercises all features (messages, questions, permissions, etc.) using HTTP API -- [ ] Run the full suite against every agent (Claude/Codex/OpenCode/Amp) without mocks -- [x] Add real install/version/spawn tests for Claude/Codex/OpenCode (Amp conditional) -- [x] Expand agent lifecycle tests (reinstall, session id extraction, resume, plan mode) -- [x] Add OpenCode server-mode tests (session create, prompt, SSE) -- [ ] Add tests for question/permission flows using deterministic prompts -- [x] Add HTTP/SSE snapshot tests for real agents (env-configured) -- [x] Add snapshot coverage for auth, CORS, and concurrent sessions -- [x] Add inspector UI route test - -## Frontend (frontend/packages/inspector) -- [x] Build Vite + React app with connect screen (endpoint + optional token) -- [x] Add instructions to run sandbox-agent (including CORS) -- [x] Implement full agent UI covering all features -- [x] Add HTTP request log with copyable curl command -- [x] Add Content-Type header to CORS callout command -- [x] Default inspector endpoint to current origin and auto-connect via health check -- [x] Update inspector to universal schema events (items, deltas, approvals, errors) - -## TypeScript SDK -- [x] Generate OpenAPI from utoipa and run `openapi-typescript` -- [x] Implement a thin fetch-based client wrapper -- [x] Update `CLAUDE.md` to require SDK + CLI updates when API changes -- [x] Prefix SDK requests with `/v1` - -## Examples + Tests -- [ ] Add examples for Docker, E2B, Daytona, Vercel Sandboxes, Cloudflare Sandboxes -- [ ] Add Vitest unit test for each example (Cloudflare requires special setup) - -## Documentation -- [ ] Write README covering architecture, agent compatibility, and deployment guide -- [ ] Add universal API feature checklist (questions, approve plan, etc.) -- [ ] Document CLI, HTTP API, frontend app, and TypeScript SDK usage -- [ ] Use collapsible sections for endpoints and SDK methods -- [x] Integrate OpenAPI spec with Mintlify (docs/openapi.json + validation) - ---- - -- [x] implement release pipeline -- implement e2b example -- implement typescript "start locally" by pulling form server using version -- [x] Move agent schema sources to src/agents -- [x] Add Vercel AI SDK UIMessage schema extractor +- [x] Add server --mock mode with looping mock session events. +- [x] Document mock mode in building chat UI docs and update CLAUDE.md guidance.