From 34d4f3693e3f1e5edbffa5c866de2da60234abe1 Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Tue, 27 Jan 2026 06:18:43 -0800 Subject: [PATCH] feat: add turn streaming and inspector updates --- CLAUDE.md | 2 + README.md | 1 + docs/architecture.mdx | 58 +- docs/building-chat-ui.mdx | 8 +- docs/cli.mdx | 10 + docs/openapi.json | 63 ++ docs/quickstart.mdx | 11 + docs/sdks/typescript.mdx | 15 +- docs/telemetry.mdx | 3 +- frontend/packages/inspector/index.html | 88 +- frontend/packages/inspector/src/App.tsx | 147 ++- .../src/components/SessionSidebar.tsx | 78 +- .../src/components/chat/ChatMessages.tsx | 3 + .../src/components/chat/ChatPanel.tsx | 83 +- .../src/components/chat/ChatSetup.tsx | 81 +- .../src/components/debug/AgentsTab.tsx | 14 +- .../src/components/debug/DebugPanel.tsx | 21 +- .../src/components/debug/EventsTab.tsx | 16 +- research/agents/codex.md | 64 ++ scripts/release/main.ts | 39 +- sdks/typescript/src/client.ts | 93 +- sdks/typescript/src/generated/openapi.ts | 34 + sdks/typescript/src/index.ts | 1 + sdks/typescript/src/types.ts | 1 + sdks/typescript/tests/client.test.ts | 25 + server/packages/sandbox-agent/src/main.rs | 45 + server/packages/sandbox-agent/src/router.rs | 859 ++++++++++++++++-- .../packages/sandbox-agent/src/telemetry.rs | 88 +- .../sandbox-agent/tests/agent_agnostic.rs | 657 -------------- .../sandbox-agent/tests/agent_basic_reply.rs | 46 + .../sandbox-agent/tests/agent_multi_turn.rs | 457 ++++++++++ .../tests/agent_permission_flow.rs | 63 ++ .../tests/agent_question_flow.rs | 64 ++ .../sandbox-agent/tests/agent_termination.rs | 45 + .../sandbox-agent/tests/agent_tool_flow.rs | 94 ++ .../sandbox-agent/tests/common/mod.rs | 388 ++++++++ .../sandbox-agent/tests/http_sse_snapshots.rs | 86 ++ ...ndpoints_snapshots@agents_list_global.snap | 2 +- ...points_snapshots@create_session_codex.snap | 2 +- ...low_snapshots@permission_events_codex.snap | 395 +++++++- ...napshots@question_reject_events_codex.snap | 291 +++++- ...snapshots@question_reply_events_codex.snap | 88 +- ...uth_snapshots@auth_valid_token_global.snap | 2 +- ...ncy_snapshot@concurrency_events_codex.snap | 261 ++++-- ...ttp_events_snapshot@http_events_codex.snap | 107 ++- ..._sse_events_snapshot@sse_events_codex.snap | 127 ++- .../src/agents/claude.rs | 92 +- spec/universal-schema.json | 553 +++++++++++ todo.md | 4 + 49 files changed, 4629 insertions(+), 1146 deletions(-) delete mode 100644 server/packages/sandbox-agent/tests/agent_agnostic.rs create mode 100644 server/packages/sandbox-agent/tests/agent_basic_reply.rs create mode 100644 server/packages/sandbox-agent/tests/agent_multi_turn.rs create mode 100644 server/packages/sandbox-agent/tests/agent_permission_flow.rs create mode 100644 server/packages/sandbox-agent/tests/agent_question_flow.rs create mode 100644 server/packages/sandbox-agent/tests/agent_termination.rs create mode 100644 server/packages/sandbox-agent/tests/agent_tool_flow.rs create mode 100644 server/packages/sandbox-agent/tests/common/mod.rs create mode 100644 spec/universal-schema.json diff --git a/CLAUDE.md b/CLAUDE.md index b07c536..21d2ceb 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -24,6 +24,7 @@ Research on how different agents operate (CLI flags, streaming formats, HITL pat Universal schema guidance: - The universal schema should cover the full feature set of all agents. - Conversions must be best-effort overlap without being lossy; preserve raw payloads when needed. +- **The mock agent acts as the reference implementation** for correct event behavior. Real agents should use synthetic events to match the mock agent's event patterns (e.g., emitting both daemon synthetic and agent native `session.started` events, proper `item.started` → `item.delta` → `item.completed` sequences). ## Spec Tracking @@ -54,6 +55,7 @@ Universal schema guidance: - `sandbox-agent sessions list` ↔ `GET /v1/sessions` - `sandbox-agent sessions create` ↔ `POST /v1/sessions/{sessionId}` - `sandbox-agent sessions send-message` ↔ `POST /v1/sessions/{sessionId}/messages` +- `sandbox-agent sessions send-message-stream` ↔ `POST /v1/sessions/{sessionId}/messages/stream` - `sandbox-agent sessions events` / `get-messages` ↔ `GET /v1/sessions/{sessionId}/events` - `sandbox-agent sessions events-sse` ↔ `GET /v1/sessions/{sessionId}/events/sse` - `sandbox-agent sessions reply-question` ↔ `POST /v1/sessions/{sessionId}/questions/{questionId}/reply` diff --git a/README.md b/README.md index b71b04b..14f7f5c 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,7 @@ Create a session and send a message: ```bash sandbox-agent sessions create my-session --agent codex --endpoint http://127.0.0.1:2468 --token "$SANDBOX_TOKEN" sandbox-agent sessions send-message my-session --message "Hello" --endpoint http://127.0.0.1:2468 --token "$SANDBOX_TOKEN" +sandbox-agent sessions send-message-stream my-session --message "Hello" --endpoint http://127.0.0.1:2468 --token "$SANDBOX_TOKEN" ``` Docs: https://rivet.dev/docs/cli diff --git a/docs/architecture.mdx b/docs/architecture.mdx index c8a2b83..165f04f 100644 --- a/docs/architecture.mdx +++ b/docs/architecture.mdx @@ -105,6 +105,7 @@ Each session tracks: POST /v1/sessions/{sessionId} Create session, auto-install agent ↓ POST /v1/sessions/{id}/messages Spawn agent subprocess, stream output +POST /v1/sessions/{id}/messages/stream Post and stream a single turn ↓ GET /v1/sessions/{id}/events Poll for new events (offset-based) GET /v1/sessions/{id}/events/sse Subscribe to SSE stream @@ -133,16 +134,30 @@ When a message is sent: ## Agent Execution -Each agent has a different execution model and communication pattern. +Each agent has a different execution model and communication pattern. There are two main architectural patterns: + +### Architecture Patterns + +**Subprocess Model (Claude, Amp):** +- New process spawned per message/turn +- Process terminates after turn completes +- Multi-turn via CLI resume flags (`--resume`, `--continue`) +- Simple but has process spawn overhead + +**Client/Server Model (OpenCode, Codex):** +- Single long-running server process +- Multiple sessions/threads multiplexed via RPC +- Multi-turn via server-side thread persistence +- More efficient for repeated interactions ### Overview -| Agent | Execution Model | Binary Source | Session Resume | -|-------|-----------------|---------------|----------------| -| Claude Code | CLI subprocess | GCS (Anthropic) | Yes (`--resume`) | -| Codex | App Server subprocess (JSON-RPC) | GitHub releases | No | -| OpenCode | HTTP server + SSE | GitHub releases | Yes (server-side) | -| Amp | CLI subprocess | GCS (Amp) | Yes (`--continue`) | +| Agent | Architecture | Binary Source | Multi-Turn Method | +|-------|--------------|---------------|-------------------| +| Claude Code | Subprocess (per-turn) | GCS (Anthropic) | `--resume` flag | +| Codex | **Shared Server (JSON-RPC)** | GitHub releases | **Thread persistence** | +| OpenCode | HTTP Server (SSE) | GitHub releases | Server-side sessions | +| Amp | Subprocess (per-turn) | GCS (Amp) | `--continue` flag | ### Claude Code @@ -161,15 +176,25 @@ claude --print --output-format stream-json --verbose \ ### Codex -Spawned as a subprocess using the App Server JSON-RPC protocol: +Uses a **shared app-server process** that handles multiple sessions via JSON-RPC over stdio: ```bash codex app-server ``` -- JSON-RPC over stdio (JSONL) -- Uses `initialize`, `thread/start`, and `turn/start` requests -- Approval requests arrive as server JSON-RPC requests +**Daemon flow:** +1. First Codex session triggers `codex app-server` spawn +2. Performs `initialize` / `initialized` handshake +3. Each session creation sends `thread/start` → receives `thread_id` +4. Messages sent via `turn/start` with `thread_id` +5. Notifications routed back to session by `thread_id` + +**Key characteristics:** +- Single process handles all Codex sessions +- JSON-RPC over stdio (JSONL format) +- Thread IDs map to daemon session IDs +- Approval requests arrive as server-to-client JSON-RPC requests +- Process lifetime matches daemon lifetime (not per-turn) ### OpenCode @@ -208,12 +233,21 @@ amp [--execute|--print] [--output-format stream-json] \ ### Communication Patterns -**Subprocess agents (Claude, Codex, Amp):** +**Per-turn subprocess agents (Claude, Amp):** 1. Agent CLI spawned with appropriate flags 2. Stdout/stderr read line-by-line 3. Each line parsed as JSON 4. Events converted via `parse_agent_line()` → agent-specific converter 5. Universal events recorded and broadcast to SSE subscribers +6. Process terminated on turn completion + +**Shared stdio server agent (Codex):** +1. Single `codex app-server` process started on first session +2. `initialize`/`initialized` handshake performed once +3. New sessions send `thread/start`, receive `thread_id` +4. Messages sent via `turn/start` with `thread_id` +5. Notifications read from stdout, routed by `thread_id` +6. Process persists across sessions and turns **HTTP server agent (OpenCode):** 1. Server started on available port (if not running) diff --git a/docs/building-chat-ui.mdx b/docs/building-chat-ui.mdx index 191a45d..a2bd9b5 100644 --- a/docs/building-chat-ui.mdx +++ b/docs/building-chat-ui.mdx @@ -131,13 +131,15 @@ timestamps, not ordering. ## Optional raw payloads -If you need provider-level debugging, pass `include_raw=true` when streaming or polling events to -receive the `raw` payload for each event. +If you need provider-level debugging, pass `include_raw=true` when streaming or polling events +(including one-turn streams) to receive the `raw` payload for each event. -## SSE vs polling +## SSE vs polling vs turn streaming - SSE gives low-latency updates and simplifies streaming UIs. - Polling is simpler to debug and works in any environment. +- Turn streaming (`POST /v1/sessions/{session_id}/messages/stream`) is a one-shot stream tied to a + single prompt. The stream closes automatically once the turn completes. Both yield the same event payloads. diff --git a/docs/cli.mdx b/docs/cli.mdx index a55e22d..938a8e0 100644 --- a/docs/cli.mdx +++ b/docs/cli.mdx @@ -67,6 +67,16 @@ sandbox-agent sessions send-message my-session \ ``` +
+sessions send-message-stream + +```bash +sandbox-agent sessions send-message-stream my-session \ + --message "Summarize the repository" \ + --endpoint http://127.0.0.1:2468 +``` +
+
sessions events diff --git a/docs/openapi.json b/docs/openapi.json index e81a8ec..59f66fe 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -408,6 +408,60 @@ } } }, + "/v1/sessions/{session_id}/messages/stream": { + "post": { + "tags": [ + "sessions" + ], + "operationId": "post_message_stream", + "parameters": [ + { + "name": "session_id", + "in": "path", + "description": "Session id", + "required": true, + "schema": { + "type": "string" + } + }, + { + "name": "include_raw", + "in": "query", + "description": "Include raw provider payloads", + "required": false, + "schema": { + "type": "boolean", + "nullable": true + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/MessageRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "SSE event stream" + }, + "404": { + "description": "", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProblemDetails" + } + } + } + } + } + } + }, "/v1/sessions/{session_id}/permissions/{permission_id}/reply": { "post": { "tags": [ @@ -1431,6 +1485,15 @@ "daemon" ] }, + "TurnStreamQuery": { + "type": "object", + "properties": { + "includeRaw": { + "type": "boolean", + "nullable": true + } + } + }, "UniversalEvent": { "type": "object", "required": [ diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index de289cf..435ab0a 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -70,6 +70,15 @@ curl "http://127.0.0.1:2468/v1/sessions/my-session/events/sse?offset=0" \ -H "Authorization: Bearer $SANDBOX_TOKEN" ``` +For a single-turn stream (post a message and get one streamed response): + +```bash +curl -N -X POST "http://127.0.0.1:2468/v1/sessions/my-session/messages/stream" \ + -H "Authorization: Bearer $SANDBOX_TOKEN" \ + -H "content-type: application/json" \ + -d '{"message":"Hello"}' +``` + ## 5. CLI shortcuts The CLI mirrors the HTTP API: @@ -78,4 +87,6 @@ The CLI mirrors the HTTP API: sandbox-agent sessions create my-session --agent claude --endpoint http://127.0.0.1:2468 --token "$SANDBOX_TOKEN" sandbox-agent sessions send-message my-session --message "Hello" --endpoint http://127.0.0.1:2468 --token "$SANDBOX_TOKEN" + +sandbox-agent sessions send-message-stream my-session --message "Hello" --endpoint http://127.0.0.1:2468 --token "$SANDBOX_TOKEN" ``` diff --git a/docs/sdks/typescript.mdx b/docs/sdks/typescript.mdx index 604c48b..82472ff 100644 --- a/docs/sdks/typescript.mdx +++ b/docs/sdks/typescript.mdx @@ -86,10 +86,21 @@ for await (const event of client.streamEvents("demo-session", { The SDK parses `text/event-stream` into `UniversalEvent` objects. If you want full control, use `getEventsSse()` and parse the stream yourself. +## Stream a single turn + +```ts +for await (const event of client.streamTurn("demo-session", { message: "Hello" })) { + console.log(event.type, event.data); +} +``` + +This method posts the message and streams only the next turn. For manual control, call +`postMessageStream()` and parse the SSE response yourself. + ## Optional raw payloads -Set `includeRaw: true` on `getEvents` or `streamEvents` to include the raw provider payload in -`event.raw`. This is useful for debugging and conversion analysis. +Set `includeRaw: true` on `getEvents`, `streamEvents`, or `streamTurn` to include the raw provider +payload in `event.raw`. This is useful for debugging and conversion analysis. ## Error handling diff --git a/docs/telemetry.mdx b/docs/telemetry.mdx index 82d24c1..029dd4c 100644 --- a/docs/telemetry.mdx +++ b/docs/telemetry.mdx @@ -3,7 +3,7 @@ title: "Telemetry" description: "Anonymous telemetry collected by sandbox-agent." --- -sandbox-agent sends a small, anonymous telemetry payload on startup to help us understand usage and improve reliability. +sandbox-agent sends a small, anonymous telemetry payload on startup and then every 5 minutes to help us understand usage and improve reliability. ## What gets sent @@ -12,6 +12,7 @@ sandbox-agent sends a small, anonymous telemetry payload on startup to help us u - Detected sandbox provider (for example: Docker, E2B, Vercel Sandboxes). Each sandbox gets a random anonymous ID stored on disk so usage can be counted without identifying users. +The last successful send time is also stored on disk, and heartbeats are rate-limited to at most one every 5 minutes. ## Opting out diff --git a/frontend/packages/inspector/index.html b/frontend/packages/inspector/index.html index 20891db..0f09aa1 100644 --- a/frontend/packages/inspector/index.html +++ b/frontend/packages/inspector/index.html @@ -383,7 +383,7 @@ flex-direction: column; border-right: 1px solid var(--border); background: var(--surface-2); - overflow: hidden; + overflow: visible; } .sidebar-header { @@ -394,12 +394,15 @@ align-items: center; justify-content: space-between; flex-shrink: 0; + overflow: visible; } .sidebar-header-actions { display: flex; align-items: center; gap: 8px; + position: relative; + overflow: visible; } .sidebar-icon-btn { @@ -449,6 +452,53 @@ background: var(--accent-hover); } + .sidebar-add-menu-wrapper { + position: relative; + } + + .sidebar-add-menu { + position: absolute; + top: 30px; + right: 0; + min-width: 140px; + background: var(--surface); + border: 1px solid var(--border-2); + border-radius: 8px; + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.35); + padding: 6px; + display: flex; + flex-direction: column; + gap: 4px; + z-index: 60; + } + + .sidebar-add-option { + background: transparent; + border: 1px solid transparent; + color: var(--text); + text-align: left; + padding: 6px 8px; + border-radius: 6px; + font-size: 12px; + cursor: pointer; + transition: all var(--transition); + } + + .sidebar-add-option:hover { + background: var(--accent); + color: #fff; + } + + .sidebar-add-status { + padding: 6px 8px; + font-size: 11px; + color: var(--muted); + } + + .sidebar-add-status.error { + color: var(--danger); + } + .session-list { flex: 1; overflow-y: auto; @@ -520,6 +570,10 @@ font-size: 11px; } + .sidebar-empty.error { + color: var(--danger); + } + /* Chat Panel */ .chat-panel { display: flex; @@ -560,6 +614,21 @@ color: var(--text-secondary); } + .session-agent-display { + font-size: 11px; + font-weight: 600; + color: var(--accent); + background: color-mix(in srgb, var(--accent) 18%, transparent); + padding: 2px 6px; + border-radius: 999px; + } + + .panel-header-right { + display: flex; + align-items: center; + gap: 8px; + } + .messages-container { flex: 1; overflow-y: auto; @@ -947,6 +1016,13 @@ width: 50px; } + .setup-select-small:disabled, + .setup-select:disabled, + .setup-input:disabled { + opacity: 0.55; + cursor: not-allowed; + } + .setup-stream-btn { display: flex; align-items: center; @@ -973,6 +1049,16 @@ color: var(--accent); } + .setup-stream-btn:disabled { + cursor: default; + opacity: 0.6; + } + + .setup-stream-btn:disabled:hover { + border-color: var(--border-2); + color: var(--muted); + } + .setup-stream-btn.active { background: var(--accent); border-color: var(--accent); diff --git a/frontend/packages/inspector/src/App.tsx b/frontend/packages/inspector/src/App.tsx index 87e010e..8b2e478 100644 --- a/frontend/packages/inspector/src/App.tsx +++ b/frontend/packages/inspector/src/App.tsx @@ -62,6 +62,12 @@ export default function App() { const [agents, setAgents] = useState([]); const [modesByAgent, setModesByAgent] = useState>({}); const [sessions, setSessions] = useState([]); + const [agentsLoading, setAgentsLoading] = useState(false); + const [agentsError, setAgentsError] = useState(null); + const [sessionsLoading, setSessionsLoading] = useState(false); + const [sessionsError, setSessionsError] = useState(null); + const [modesLoadingByAgent, setModesLoadingByAgent] = useState>({}); + const [modesErrorByAgent, setModesErrorByAgent] = useState>({}); const [agentId, setAgentId] = useState("claude"); const [agentMode, setAgentMode] = useState(""); @@ -75,10 +81,12 @@ export default function App() { const [events, setEvents] = useState([]); const [offset, setOffset] = useState(0); const offsetRef = useRef(0); + const [eventsLoading, setEventsLoading] = useState(false); const [polling, setPolling] = useState(false); const pollTimerRef = useRef(null); - const [streamMode, setStreamMode] = useState<"poll" | "sse">("sse"); + const [turnStreaming, setTurnStreaming] = useState(false); + const [streamMode, setStreamMode] = useState<"poll" | "sse" | "turn">("sse"); const [eventError, setEventError] = useState(null); const [questionSelections, setQuestionSelections] = useState>({}); @@ -95,6 +103,7 @@ export default function App() { const clientRef = useRef(null); const sseAbortRef = useRef(null); + const turnAbortRef = useRef(null); const logRequest = useCallback((entry: RequestLog) => { setRequestLog((prev) => { @@ -200,9 +209,18 @@ export default function App() { setEventError(null); stopPolling(); stopSse(); + stopTurnStream(); + setAgents([]); + setSessions([]); + setAgentsLoading(false); + setSessionsLoading(false); + setAgentsError(null); + setSessionsError(null); }; const refreshAgents = async () => { + setAgentsLoading(true); + setAgentsError(null); try { const data = await getClient().listAgents(); const agentList = data.agents ?? []; @@ -213,17 +231,23 @@ export default function App() { } } } catch (error) { - setConnectError(getErrorMessage(error, "Unable to refresh agents")); + setAgentsError(getErrorMessage(error, "Unable to refresh agents")); + } finally { + setAgentsLoading(false); } }; const fetchSessions = async () => { + setSessionsLoading(true); + setSessionsError(null); try { const data = await getClient().listSessions(); const sessionList = data.sessions ?? []; setSessions(sessionList); } catch { - // Silently fail - sessions list is supplementary + setSessionsError("Unable to load sessions."); + } finally { + setSessionsLoading(false); } }; @@ -237,22 +261,32 @@ export default function App() { }; const loadModes = async (targetId: string) => { + setModesLoadingByAgent((prev) => ({ ...prev, [targetId]: true })); + setModesErrorByAgent((prev) => ({ ...prev, [targetId]: null })); try { const data = await getClient().getAgentModes(targetId); const modes = data.modes ?? []; setModesByAgent((prev) => ({ ...prev, [targetId]: modes })); } catch { - // Silently fail - modes are optional + setModesErrorByAgent((prev) => ({ ...prev, [targetId]: "Unable to load modes." })); + } finally { + setModesLoadingByAgent((prev) => ({ ...prev, [targetId]: false })); } }; const sendMessage = async () => { - if (!message.trim()) return; + const prompt = message.trim(); + if (!prompt || !sessionId || turnStreaming) return; setSessionError(null); - try { - await getClient().postMessage(sessionId, { message }); - setMessage(""); + setMessage(""); + if (streamMode === "turn") { + await startTurnStream(prompt); + return; + } + + try { + await getClient().postMessage(sessionId, { message: prompt }); if (!polling) { if (streamMode === "poll") { startPolling(); @@ -266,6 +300,7 @@ export default function App() { }; const selectSession = (session: SessionInfo) => { + stopTurnStream(); setSessionId(session.sessionId); setAgentId(session.agent); setAgentMode(session.agentMode); @@ -278,7 +313,12 @@ export default function App() { setSessionError(null); }; - const createNewSession = async () => { + const createNewSession = async (nextAgentId?: string) => { + stopTurnStream(); + const selectedAgent = nextAgentId ?? agentId; + if (nextAgentId) { + setAgentId(nextAgentId); + } const chars = "abcdefghijklmnopqrstuvwxyz0123456789"; let id = "session-"; for (let i = 0; i < 8; i++) { @@ -297,7 +337,7 @@ export default function App() { permissionMode?: string; model?: string; variant?: string; - } = { agent: agentId }; + } = { agent: selectedAgent }; if (agentMode) body.agentMode = agentMode; if (permissionMode) body.permissionMode = permissionMode; if (model) body.model = model; @@ -320,6 +360,7 @@ export default function App() { const fetchEvents = useCallback(async () => { if (!sessionId) return; + setEventsLoading(true); try { const response = await getClient().getEvents(sessionId, { offset: offsetRef.current, @@ -330,6 +371,8 @@ export default function App() { setEventError(null); } catch (error) { setEventError(getErrorMessage(error, "Unable to fetch events")); + } finally { + setEventsLoading(false); } }, [appendEvents, getClient, sessionId]); @@ -394,6 +437,48 @@ export default function App() { setPolling(false); }; + const startTurnStream = async (prompt: string) => { + stopPolling(); + stopSse(); + if (turnAbortRef.current) return; + if (!sessionId) { + setEventError("Select or create a session first."); + return; + } + setEventError(null); + setTurnStreaming(true); + const controller = new AbortController(); + turnAbortRef.current = controller; + try { + for await (const event of getClient().streamTurn( + sessionId, + { message: prompt }, + undefined, + controller.signal + )) { + appendEvents([event]); + } + } catch (error) { + if (controller.signal.aborted) { + return; + } + setEventError(getErrorMessage(error, "Turn stream error.")); + } finally { + if (turnAbortRef.current === controller) { + turnAbortRef.current = null; + setTurnStreaming(false); + } + } + }; + + const stopTurnStream = () => { + if (turnAbortRef.current) { + turnAbortRef.current.abort(); + turnAbortRef.current = null; + } + setTurnStreaming(false); + }; + const resetEvents = () => { setEvents([]); setOffset(0); @@ -580,6 +665,7 @@ export default function App() { return () => { stopPolling(); stopSse(); + stopTurnStream(); }; }, []); @@ -604,6 +690,7 @@ export default function App() { useEffect(() => { if (!connected || !sessionId || polling) return; + if (streamMode === "turn") return; const hasSession = sessions.some((session) => session.sessionId === sessionId); if (!hasSession) return; if (streamMode === "poll") { @@ -613,6 +700,15 @@ export default function App() { } }, [connected, sessionId, polling, streamMode, sessions]); + useEffect(() => { + if (streamMode === "turn") { + stopPolling(); + stopSse(); + } else if (turnStreaming) { + stopTurnStream(); + } + }, [streamMode, turnStreaming]); + useEffect(() => { messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); }, [transcriptEntries]); @@ -633,6 +729,16 @@ export default function App() { const availableAgents = agents.length ? agents.map((agent) => agent.id) : defaultAgents; const currentAgent = agents.find((agent) => agent.id === agentId); const activeModes = modesByAgent[agentId] ?? []; + const modesLoading = modesLoadingByAgent[agentId] ?? false; + const modesError = modesErrorByAgent[agentId] ?? null; + const agentDisplayNames: Record = { + claude: "Claude Code", + codex: "Codex", + opencode: "OpenCode", + amp: "Amp", + mock: "Mock" + }; + const agentLabel = agentDisplayNames[agentId] ?? agentId; const handleKeyDown = (event: React.KeyboardEvent) => { if (event.key === "Enter" && !event.shiftKey) { @@ -642,6 +748,9 @@ export default function App() { }; const toggleStream = () => { + if (streamMode === "turn") { + return; + } if (polling) { if (streamMode === "poll") { stopPolling(); @@ -695,11 +804,17 @@ export default function App() { onSelectSession={selectSession} onRefresh={fetchSessions} onCreateSession={createNewSession} + availableAgents={availableAgents} + agentsLoading={agentsLoading} + agentsError={agentsError} + sessionsLoading={sessionsLoading} + sessionsError={sessionsError} /> setRequestLog([])} @@ -749,6 +868,8 @@ export default function App() { modesByAgent={modesByAgent} onRefreshAgents={refreshAgents} onInstallAgent={installAgent} + agentsLoading={agentsLoading} + agentsError={agentsError} /> diff --git a/frontend/packages/inspector/src/components/SessionSidebar.tsx b/frontend/packages/inspector/src/components/SessionSidebar.tsx index a246ca9..a063c75 100644 --- a/frontend/packages/inspector/src/components/SessionSidebar.tsx +++ b/frontend/packages/inspector/src/components/SessionSidebar.tsx @@ -1,4 +1,5 @@ import { Plus, RefreshCw } from "lucide-react"; +import { useEffect, useRef, useState } from "react"; import type { SessionInfo } from "sandbox-agent"; const SessionSidebar = ({ @@ -6,14 +7,47 @@ const SessionSidebar = ({ selectedSessionId, onSelectSession, onRefresh, - onCreateSession + onCreateSession, + availableAgents, + agentsLoading, + agentsError, + sessionsLoading, + sessionsError }: { sessions: SessionInfo[]; selectedSessionId: string; onSelectSession: (session: SessionInfo) => void; onRefresh: () => void; - onCreateSession: () => void; + onCreateSession: (agentId: string) => void; + availableAgents: string[]; + agentsLoading: boolean; + agentsError: string | null; + sessionsLoading: boolean; + sessionsError: string | null; }) => { + const [showMenu, setShowMenu] = useState(false); + const menuRef = useRef(null); + + useEffect(() => { + if (!showMenu) return; + const handler = (event: MouseEvent) => { + if (!menuRef.current) return; + if (!menuRef.current.contains(event.target as Node)) { + setShowMenu(false); + } + }; + document.addEventListener("mousedown", handler); + return () => document.removeEventListener("mousedown", handler); + }, [showMenu]); + + const agentLabels: Record = { + claude: "Claude Code", + codex: "Codex", + opencode: "OpenCode", + amp: "Amp", + mock: "Mock" + }; + return (
@@ -22,14 +56,46 @@ const SessionSidebar = ({ - +
+ + {showMenu && ( +
+ {agentsLoading &&
Loading agents...
} + {agentsError &&
{agentsError}
} + {!agentsLoading && !agentsError && availableAgents.length === 0 && ( +
No agents available.
+ )} + {!agentsLoading && !agentsError && + availableAgents.map((id) => ( + + ))} +
+ )} +
- {sessions.length === 0 ? ( + {sessionsLoading ? ( +
Loading sessions...
+ ) : sessionsError ? ( +
{sessionsError}
+ ) : sessions.length === 0 ? (
No sessions yet.
) : ( sessions.map((session) => ( diff --git a/frontend/packages/inspector/src/components/chat/ChatMessages.tsx b/frontend/packages/inspector/src/components/chat/ChatMessages.tsx index 1940ac7..49f7049 100644 --- a/frontend/packages/inspector/src/components/chat/ChatMessages.tsx +++ b/frontend/packages/inspector/src/components/chat/ChatMessages.tsx @@ -5,10 +5,12 @@ import type { TimelineEntry } from "./types"; const ChatMessages = ({ entries, sessionError, + eventError, messagesEndRef }: { entries: TimelineEntry[]; sessionError: string | null; + eventError: string | null; messagesEndRef: React.RefObject; }) => { return ( @@ -67,6 +69,7 @@ const ChatMessages = ({ ); })} {sessionError &&
{sessionError}
} + {eventError &&
{eventError}
}
); diff --git a/frontend/packages/inspector/src/components/chat/ChatPanel.tsx b/frontend/packages/inspector/src/components/chat/ChatPanel.tsx index fb4da6b..8f58b08 100644 --- a/frontend/packages/inspector/src/components/chat/ChatPanel.tsx +++ b/frontend/packages/inspector/src/components/chat/ChatPanel.tsx @@ -1,4 +1,4 @@ -import { MessageSquare, Plus, Terminal } from "lucide-react"; +import { MessageSquare, PauseCircle, PlayCircle, Plus, Terminal } from "lucide-react"; import type { AgentModeInfo, PermissionEventData, QuestionEventData } from "sandbox-agent"; import ApprovalsTab from "../debug/ApprovalsTab"; import ChatInput from "./ChatInput"; @@ -9,6 +9,7 @@ import type { TimelineEntry } from "./types"; const ChatPanel = ({ sessionId, polling, + turnStreaming, transcriptEntries, sessionError, message, @@ -17,22 +18,24 @@ const ChatPanel = ({ onKeyDown, onCreateSession, messagesEndRef, - agentId, + agentLabel, agentMode, permissionMode, model, variant, streamMode, - availableAgents, activeModes, currentAgentVersion, - onAgentChange, + hasSession, + modesLoading, + modesError, onAgentModeChange, onPermissionModeChange, onModelChange, onVariantChange, onStreamModeChange, onToggleStream, + eventError, questionRequests, permissionRequests, questionSelections, @@ -43,6 +46,7 @@ const ChatPanel = ({ }: { sessionId: string; polling: boolean; + turnStreaming: boolean; transcriptEntries: TimelineEntry[]; sessionError: string | null; message: string; @@ -51,22 +55,24 @@ const ChatPanel = ({ onKeyDown: (event: React.KeyboardEvent) => void; onCreateSession: () => void; messagesEndRef: React.RefObject; - agentId: string; + agentLabel: string; agentMode: string; permissionMode: string; model: string; variant: string; - streamMode: "poll" | "sse"; - availableAgents: string[]; + streamMode: "poll" | "sse" | "turn"; activeModes: AgentModeInfo[]; currentAgentVersion?: string | null; - onAgentChange: (value: string) => void; + hasSession: boolean; + modesLoading: boolean; + modesError: string | null; onAgentModeChange: (value: string) => void; onPermissionModeChange: (value: string) => void; onModelChange: (value: string) => void; onVariantChange: (value: string) => void; - onStreamModeChange: (value: "poll" | "sse") => void; + onStreamModeChange: (value: "poll" | "sse" | "turn") => void; onToggleStream: () => void; + eventError: string | null; questionRequests: QuestionEventData[]; permissionRequests: PermissionEventData[]; questionSelections: Record; @@ -76,16 +82,57 @@ const ChatPanel = ({ onReplyPermission: (requestId: string, reply: "once" | "always" | "reject") => void; }) => { const hasApprovals = questionRequests.length > 0 || permissionRequests.length > 0; + const isTurnMode = streamMode === "turn"; + const isStreaming = isTurnMode ? turnStreaming : polling; + const turnLabel = turnStreaming ? "Streaming" : "On Send"; return (
- Session + {sessionId ? "Session" : "No Session"} {sessionId && {sessionId}} + {sessionId && {agentLabel}} +
+
+
+ + +
- {polling && Live}
@@ -109,6 +156,7 @@ const ChatPanel = ({ )} @@ -135,27 +183,24 @@ const ChatPanel = ({ onSendMessage={onSendMessage} onKeyDown={onKeyDown} placeholder={sessionId ? "Send a message..." : "Select or create a session first"} - disabled={!sessionId} + disabled={!sessionId || turnStreaming} />
); diff --git a/frontend/packages/inspector/src/components/chat/ChatSetup.tsx b/frontend/packages/inspector/src/components/chat/ChatSetup.tsx index 00f7341..0a0eb97 100644 --- a/frontend/packages/inspector/src/components/chat/ChatSetup.tsx +++ b/frontend/packages/inspector/src/components/chat/ChatSetup.tsx @@ -1,60 +1,53 @@ -import { PauseCircle, PlayCircle } from "lucide-react"; import type { AgentModeInfo } from "sandbox-agent"; const ChatSetup = ({ - agentId, + agentLabel, agentMode, permissionMode, model, variant, - streamMode, - polling, - availableAgents, activeModes, currentAgentVersion, - onAgentChange, + hasSession, + modesLoading, + modesError, onAgentModeChange, onPermissionModeChange, onModelChange, - onVariantChange, - onStreamModeChange, - onToggleStream + onVariantChange }: { - agentId: string; + agentLabel: string; agentMode: string; permissionMode: string; model: string; variant: string; - streamMode: "poll" | "sse"; - polling: boolean; - availableAgents: string[]; activeModes: AgentModeInfo[]; currentAgentVersion?: string | null; - onAgentChange: (value: string) => void; + hasSession: boolean; + modesLoading: boolean; + modesError: string | null; onAgentModeChange: (value: string) => void; onPermissionModeChange: (value: string) => void; onModelChange: (value: string) => void; onVariantChange: (value: string) => void; - onStreamModeChange: (value: "poll" | "sse") => void; - onToggleStream: () => void; }) => { + const agentVersionLabel = currentAgentVersion + ? `${agentLabel} v${currentAgentVersion}` + : agentLabel; return (
- - onVariantChange(e.target.value)} placeholder="Variant" title="Variant" + disabled={!hasSession} /> -
- - -
- - {currentAgentVersion && ( - - v{currentAgentVersion} + {hasSession && ( + + {agentVersionLabel} )}
diff --git a/frontend/packages/inspector/src/components/debug/AgentsTab.tsx b/frontend/packages/inspector/src/components/debug/AgentsTab.tsx index 6485c62..545b734 100644 --- a/frontend/packages/inspector/src/components/debug/AgentsTab.tsx +++ b/frontend/packages/inspector/src/components/debug/AgentsTab.tsx @@ -8,23 +8,31 @@ const AgentsTab = ({ defaultAgents, modesByAgent, onRefresh, - onInstall + onInstall, + loading, + error }: { agents: AgentInfo[]; defaultAgents: string[]; modesByAgent: Record; onRefresh: () => void; onInstall: (agentId: string, reinstall: boolean) => void; + loading: boolean; + error: string | null; }) => { return ( <>
-
- {agents.length === 0 &&
No agents reported. Click refresh to check.
} + {error &&
{error}
} + {loading &&
Loading agents...
} + {!loading && agents.length === 0 && ( +
No agents reported. Click refresh to check.
+ )} {(agents.length ? agents diff --git a/frontend/packages/inspector/src/components/debug/DebugPanel.tsx b/frontend/packages/inspector/src/components/debug/DebugPanel.tsx index ff44e73..7a1beba 100644 --- a/frontend/packages/inspector/src/components/debug/DebugPanel.tsx +++ b/frontend/packages/inspector/src/components/debug/DebugPanel.tsx @@ -14,6 +14,8 @@ const DebugPanel = ({ offset, onFetchEvents, onResetEvents, + eventsLoading, + eventsError, requestLog, copiedLogId, onClearRequestLog, @@ -22,7 +24,9 @@ const DebugPanel = ({ defaultAgents, modesByAgent, onRefreshAgents, - onInstallAgent + onInstallAgent, + agentsLoading, + agentsError }: { debugTab: DebugTab; onDebugTabChange: (tab: DebugTab) => void; @@ -30,6 +34,8 @@ const DebugPanel = ({ offset: number; onFetchEvents: () => void; onResetEvents: () => void; + eventsLoading: boolean; + eventsError: string | null; requestLog: RequestLog[]; copiedLogId: number | null; onClearRequestLog: () => void; @@ -39,6 +45,8 @@ const DebugPanel = ({ modesByAgent: Record; onRefreshAgents: () => void; onInstallAgent: (agentId: string, reinstall: boolean) => void; + agentsLoading: boolean; + agentsError: string | null; }) => { return (
@@ -69,7 +77,14 @@ const DebugPanel = ({ )} {debugTab === "events" && ( - + )} {debugTab === "agents" && ( @@ -79,6 +94,8 @@ const DebugPanel = ({ modesByAgent={modesByAgent} onRefresh={onRefreshAgents} onInstall={onInstallAgent} + loading={agentsLoading} + error={agentsError} /> )}
diff --git a/frontend/packages/inspector/src/components/debug/EventsTab.tsx b/frontend/packages/inspector/src/components/debug/EventsTab.tsx index 9f992d6..0f68fdc 100644 --- a/frontend/packages/inspector/src/components/debug/EventsTab.tsx +++ b/frontend/packages/inspector/src/components/debug/EventsTab.tsx @@ -8,12 +8,16 @@ const EventsTab = ({ events, offset, onFetch, - onClear + onClear, + loading, + error }: { events: UniversalEvent[]; offset: number; onFetch: () => void; onClear: () => void; + loading: boolean; + error: string | null; }) => { const [collapsedEvents, setCollapsedEvents] = useState>({}); @@ -28,8 +32,8 @@ const EventsTab = ({
Offset: {offset}
-
+ {error &&
{error}
} + {events.length === 0 ? ( -
No events yet. Start streaming to receive events.
+
+ {loading ? "Loading events..." : "No events yet. Start streaming to receive events."} +
) : (
{[...events].reverse().map((event) => { diff --git a/research/agents/codex.md b/research/agents/codex.md index 2686a4a..b0e4098 100644 --- a/research/agents/codex.md +++ b/research/agents/codex.md @@ -254,6 +254,70 @@ Codex output is converted via `convertCodexOutput()`: - Use `resumeThread(threadId)` to continue conversation - Thread ID is captured from `thread.started` event or thread object +## Shared App-Server Architecture (Daemon Implementation) + +The sandbox daemon uses a **single shared Codex app-server process** to handle multiple sessions, similar to OpenCode's server model. This differs from Claude/Amp which spawn a new process per turn. + +### Architecture Comparison + +| Agent | Model | Process Lifetime | Session ID | +|-------|-------|------------------|------------| +| Claude | Subprocess | Per-turn (killed on TurnCompleted) | `--resume` flag | +| Amp | Subprocess | Per-turn | `--continue` flag | +| OpenCode | HTTP Server | Daemon lifetime | Session ID via API | +| **Codex** | **Stdio Server** | **Daemon lifetime** | **Thread ID via JSON-RPC** | + +### Daemon Flow + +1. **First Codex session created**: Spawns `codex app-server` process, performs `initialize`/`initialized` handshake +2. **Session creation**: Sends `thread/start` request, captures `thread_id` as `native_session_id` +3. **Message sent**: Sends `turn/start` request with `thread_id`, streams notifications back to session +4. **Multi-turn**: Reuses same `thread_id`, process stays alive, no respawn needed +5. **Daemon shutdown**: Process terminated with daemon + +### Why This Approach? + +1. **Performance**: No process spawn overhead per message +2. **Multi-turn support**: Thread persists in server memory, no resume needed +3. **Consistent with OpenCode**: Similar server-based pattern reduces code complexity +4. **API alignment**: Matches Codex's intended app-server usage pattern + +### Protocol Details + +The shared server uses JSON-RPC 2.0 for request/response correlation: + +``` +Daemon Codex App-Server + | | + |-- initialize {id: 1} ------------>| + |<-- response {id: 1} --------------| + |-- initialized (notification) ---->| + | | + |-- thread/start {id: 2} ---------->| + |<-- response {id: 2, thread.id} ---| + |<-- thread/started (notification) -| + | | + |-- turn/start {id: 3, threadId} -->| + |<-- turn/started (notification) ---| + |<-- item/* (notifications) --------| + |<-- turn/completed (notification) -| +``` + +### Thread-to-Session Routing + +Notifications are routed to the correct session by extracting `threadId` from each notification: + +```rust +fn codex_thread_id_from_server_notification(notification) -> Option { + // All thread-scoped notifications include threadId field + match notification { + TurnStarted(params) => Some(params.thread_id), + ItemCompleted(params) => Some(params.thread_id), + // ... etc + } +} +``` + ## Notes - SDK is dynamically imported to reduce bundle size diff --git a/scripts/release/main.ts b/scripts/release/main.ts index 179e756..b0558c1 100755 --- a/scripts/release/main.ts +++ b/scripts/release/main.ts @@ -148,6 +148,19 @@ function isStable(version: string) { return parseSemver(version).prerelease.length === 0; } +function getNpmTag(version: string, latest: boolean) { + if (latest) return null; + const prerelease = parseSemver(version).prerelease; + if (prerelease.length === 0) { + return "next"; + } + const hasRc = prerelease.some((part) => part.toLowerCase().startsWith("rc")); + if (hasRc) { + return "rc"; + } + throw new Error(`Prerelease versions must use rc tag when not latest: ${version}`); +} + function getAllGitVersions() { try { execFileSync("git", ["fetch", "--tags", "--force", "--quiet"], { @@ -411,18 +424,22 @@ function publishCrates(rootDir: string, version: string) { } } -function publishNpmSdk(rootDir: string, version: string) { +function publishNpmSdk(rootDir: string, version: string, latest: boolean) { const sdkDir = path.join(rootDir, "sdks", "typescript"); console.log("==> Publishing TypeScript SDK to npm"); + const npmTag = getNpmTag(version, latest); run("npm", ["version", version, "--no-git-tag-version", "--allow-same-version"], { cwd: sdkDir }); run("pnpm", ["install"], { cwd: sdkDir }); run("pnpm", ["run", "build"], { cwd: sdkDir }); - run("npm", ["publish", "--access", "public"], { cwd: sdkDir }); + const publishArgs = ["publish", "--access", "public"]; + if (npmTag) publishArgs.push("--tag", npmTag); + run("npm", publishArgs, { cwd: sdkDir }); } -function publishNpmCli(rootDir: string, version: string) { +function publishNpmCli(rootDir: string, version: string, latest: boolean) { const cliDir = path.join(rootDir, "sdks", "cli"); const distDir = path.join(rootDir, "dist"); + const npmTag = getNpmTag(version, latest); for (const [target, info] of Object.entries(PLATFORM_MAP)) { const platformDir = path.join(cliDir, "platforms", info.pkg); @@ -436,7 +453,9 @@ function publishNpmCli(rootDir: string, version: string) { console.log(`==> Publishing @sandbox-agent/cli-${info.pkg}`); run("npm", ["version", version, "--no-git-tag-version", "--allow-same-version"], { cwd: platformDir }); - run("npm", ["publish", "--access", "public"], { cwd: platformDir }); + const publishArgs = ["publish", "--access", "public"]; + if (npmTag) publishArgs.push("--tag", npmTag); + run("npm", publishArgs, { cwd: platformDir }); } console.log("==> Publishing @sandbox-agent/cli"); @@ -447,7 +466,9 @@ function publishNpmCli(rootDir: string, version: string) { pkg.optionalDependencies[dep] = version; } fs.writeFileSync(pkgPath, JSON.stringify(pkg, null, 2) + "\n"); - run("npm", ["publish", "--access", "public"], { cwd: cliDir }); + const publishArgs = ["publish", "--access", "public"]; + if (npmTag) publishArgs.push("--tag", npmTag); + run("npm", publishArgs, { cwd: cliDir }); } function validateGit(rootDir: string) { @@ -542,10 +563,10 @@ async function main() { publishCrates(rootDir, version); } if (flags.has("--publish-npm-sdk")) { - publishNpmSdk(rootDir, version); + publishNpmSdk(rootDir, version, latest); } if (flags.has("--publish-npm-cli")) { - publishNpmCli(rootDir, version); + publishNpmCli(rootDir, version, latest); } if (flags.has("--upload-typescript")) { uploadTypescriptArtifacts(rootDir, version, latest); @@ -626,11 +647,11 @@ async function main() { } if (shouldRun("publish-npm-sdk")) { - publishNpmSdk(rootDir, version); + publishNpmSdk(rootDir, version, latest); } if (shouldRun("publish-npm-cli")) { - publishNpmCli(rootDir, version); + publishNpmCli(rootDir, version, latest); } if (shouldRun("upload-typescript")) { diff --git a/sdks/typescript/src/client.ts b/sdks/typescript/src/client.ts index 5ad325d..2e96cf8 100644 --- a/sdks/typescript/src/client.ts +++ b/sdks/typescript/src/client.ts @@ -13,6 +13,7 @@ import type { ProblemDetails, QuestionReplyRequest, SessionListResponse, + TurnStreamQuery, UniversalEvent, } from "./types.ts"; @@ -142,45 +143,37 @@ export class SandboxAgent { }); } + async postMessageStream( + sessionId: string, + request: MessageRequest, + query?: TurnStreamQuery, + signal?: AbortSignal, + ): Promise { + return this.requestRaw("POST", `${API_PREFIX}/sessions/${encodeURIComponent(sessionId)}/messages/stream`, { + query, + body: request, + accept: "text/event-stream", + signal, + }); + } + async *streamEvents( sessionId: string, query?: EventsQuery, signal?: AbortSignal, ): AsyncGenerator { const response = await this.getEventsSse(sessionId, query, signal); - if (!response.body) { - throw new Error("SSE stream is not readable in this environment."); - } + yield* this.parseSseStream(response); + } - const reader = response.body.getReader(); - const decoder = new TextDecoder(); - let buffer = ""; - - while (true) { - const { done, value } = await reader.read(); - if (done) { - break; - } - // Normalize CRLF to LF for consistent parsing - buffer += decoder.decode(value, { stream: true }).replace(/\r\n/g, "\n"); - let index = buffer.indexOf("\n\n"); - while (index !== -1) { - const chunk = buffer.slice(0, index); - buffer = buffer.slice(index + 2); - const dataLines = chunk - .split("\n") - .filter((line) => line.startsWith("data:")); - if (dataLines.length > 0) { - const payload = dataLines - .map((line) => line.slice(5).trim()) - .join("\n"); - if (payload) { - yield JSON.parse(payload) as UniversalEvent; - } - } - index = buffer.indexOf("\n\n"); - } - } + async *streamTurn( + sessionId: string, + request: MessageRequest, + query?: TurnStreamQuery, + signal?: AbortSignal, + ): AsyncGenerator { + const response = await this.postMessageStream(sessionId, request, query, signal); + yield* this.parseSseStream(response); } async replyQuestion( @@ -297,6 +290,42 @@ export class SandboxAgent { return undefined; } } + + private async *parseSseStream(response: Response): AsyncGenerator { + if (!response.body) { + throw new Error("SSE stream is not readable in this environment."); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + // Normalize CRLF to LF for consistent parsing + buffer += decoder.decode(value, { stream: true }).replace(/\r\n/g, "\n"); + let index = buffer.indexOf("\n\n"); + while (index !== -1) { + const chunk = buffer.slice(0, index); + buffer = buffer.slice(index + 2); + const dataLines = chunk + .split("\n") + .filter((line) => line.startsWith("data:")); + if (dataLines.length > 0) { + const payload = dataLines + .map((line) => line.slice(5).trim()) + .join("\n"); + if (payload) { + yield JSON.parse(payload) as UniversalEvent; + } + } + index = buffer.indexOf("\n\n"); + } + } + } } const normalizeSpawnOptions = ( diff --git a/sdks/typescript/src/generated/openapi.ts b/sdks/typescript/src/generated/openapi.ts index 3d7eb62..bbdc5ea 100644 --- a/sdks/typescript/src/generated/openapi.ts +++ b/sdks/typescript/src/generated/openapi.ts @@ -32,6 +32,9 @@ export interface paths { "/v1/sessions/{session_id}/messages": { post: operations["post_message"]; }; + "/v1/sessions/{session_id}/messages/stream": { + post: operations["post_message_stream"]; + }; "/v1/sessions/{session_id}/permissions/{permission_id}/reply": { post: operations["reply_permission"]; }; @@ -258,6 +261,9 @@ export interface components { }; /** @enum {string} */ TerminatedBy: "agent" | "daemon"; + TurnStreamQuery: { + includeRaw?: boolean | null; + }; UniversalEvent: { data: components["schemas"]["UniversalEventData"]; event_id: string; @@ -480,6 +486,34 @@ export interface operations { }; }; }; + post_message_stream: { + parameters: { + query?: { + /** @description Include raw provider payloads */ + include_raw?: boolean | null; + }; + path: { + /** @description Session id */ + session_id: string; + }; + }; + requestBody: { + content: { + "application/json": components["schemas"]["MessageRequest"]; + }; + }; + responses: { + /** @description SSE event stream */ + 200: { + content: never; + }; + 404: { + content: { + "application/json": components["schemas"]["ProblemDetails"]; + }; + }; + }; + }; reply_permission: { parameters: { path: { diff --git a/sdks/typescript/src/index.ts b/sdks/typescript/src/index.ts index 8bc9f91..e734f42 100644 --- a/sdks/typescript/src/index.ts +++ b/sdks/typescript/src/index.ts @@ -41,6 +41,7 @@ export type { SessionListResponse, SessionStartedData, TerminatedBy, + TurnStreamQuery, UniversalEvent, UniversalEventData, UniversalEventType, diff --git a/sdks/typescript/src/types.ts b/sdks/typescript/src/types.ts index ebdf04f..e0c43df 100644 --- a/sdks/typescript/src/types.ts +++ b/sdks/typescript/src/types.ts @@ -39,6 +39,7 @@ export type SessionInfo = S["SessionInfo"]; export type SessionListResponse = S["SessionListResponse"]; export type SessionStartedData = S["SessionStartedData"]; export type TerminatedBy = S["TerminatedBy"]; +export type TurnStreamQuery = S["TurnStreamQuery"]; export type UniversalEvent = S["UniversalEvent"]; export type UniversalEventData = S["UniversalEventData"]; export type UniversalEventType = S["UniversalEventType"]; diff --git a/sdks/typescript/tests/client.test.ts b/sdks/typescript/tests/client.test.ts index 8aab690..f7d6314 100644 --- a/sdks/typescript/tests/client.test.ts +++ b/sdks/typescript/tests/client.test.ts @@ -164,6 +164,31 @@ describe("SandboxAgent", () => { }); }); + describe("postMessageStream", () => { + it("posts message and requests SSE", async () => { + const mockFetch = vi.fn().mockResolvedValue( + new Response("", { + status: 200, + headers: { "Content-Type": "text/event-stream" }, + }) + ); + const client = await SandboxAgent.connect({ + baseUrl: "http://localhost:8080", + fetch: mockFetch, + }); + + await client.postMessageStream("test-session", { message: "Hello" }, { includeRaw: true }); + + expect(mockFetch).toHaveBeenCalledWith( + "http://localhost:8080/v1/sessions/test-session/messages/stream?includeRaw=true", + expect.objectContaining({ + method: "POST", + body: JSON.stringify({ message: "Hello" }), + }) + ); + }); + }); + describe("getEvents", () => { it("returns events", async () => { const events = { events: [], hasMore: false }; diff --git a/server/packages/sandbox-agent/src/main.rs b/server/packages/sandbox-agent/src/main.rs index ab5ba71..972c697 100644 --- a/server/packages/sandbox-agent/src/main.rs +++ b/server/packages/sandbox-agent/src/main.rs @@ -122,6 +122,9 @@ enum SessionsCommand { #[command(name = "send-message")] /// Send a message to an existing session. SendMessage(SessionMessageArgs), + #[command(name = "send-message-stream")] + /// Send a message and stream the response for one turn. + SendMessageStream(SessionMessageStreamArgs), #[command(name = "terminate")] /// Terminate a session. Terminate(SessionTerminateArgs), @@ -195,6 +198,17 @@ struct SessionMessageArgs { client: ClientArgs, } +#[derive(Args, Debug)] +struct SessionMessageStreamArgs { + session_id: String, + #[arg(long, short = 'm')] + message: String, + #[arg(long)] + include_raw: bool, + #[command(flatten)] + client: ClientArgs, +} + #[derive(Args, Debug)] struct SessionEventsArgs { session_id: String, @@ -443,6 +457,22 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> { let response = ctx.post(&path, &body)?; print_empty_response(response) } + SessionsCommand::SendMessageStream(args) => { + let ctx = ClientContext::new(cli, &args.client)?; + let body = MessageRequest { + message: args.message.clone(), + }; + let path = format!("{API_PREFIX}/sessions/{}/messages/stream", args.session_id); + let response = ctx.post_with_query( + &path, + &body, + &[( + "include_raw", + if args.include_raw { Some("true".to_string()) } else { None }, + )], + )?; + print_text_response(response) + } SessionsCommand::Terminate(args) => { let ctx = ClientContext::new(cli, &args.client)?; let path = format!("{API_PREFIX}/sessions/{}/terminate", args.session_id); @@ -850,6 +880,21 @@ impl ClientContext { Ok(self.request(Method::POST, path).json(body).send()?) } + fn post_with_query( + &self, + path: &str, + body: &T, + query: &[(&str, Option)], + ) -> Result { + let mut request = self.request(Method::POST, path).json(body); + for (key, value) in query { + if let Some(value) = value { + request = request.query(&[(key, value)]); + } + } + Ok(request.send()?) + } + fn post_empty(&self, path: &str) -> Result { Ok(self.request(Method::POST, path).send()?) } diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 5e18db2..b3fc715 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -1,8 +1,9 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap, HashSet, VecDeque}; use std::convert::Infallible; use std::io::{BufRead, BufReader, Write}; use std::net::TcpListener; use std::process::Stdio; +use std::sync::atomic::{AtomicI64, Ordering}; use std::sync::Arc; use std::time::Duration; @@ -28,7 +29,7 @@ use sandbox_agent_universal_agent_schema::{ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; -use tokio::sync::{broadcast, mpsc, Mutex}; +use tokio::sync::{broadcast, mpsc, oneshot, Mutex}; use tokio::time::sleep; use tokio_stream::wrappers::BroadcastStream; use tower_http::trace::TraceLayer; @@ -89,6 +90,7 @@ pub fn build_router(state: AppState) -> Router { .route("/sessions", get(list_sessions)) .route("/sessions/:session_id", post(create_session)) .route("/sessions/:session_id/messages", post(post_message)) + .route("/sessions/:session_id/messages/stream", post(post_message_stream)) .route("/sessions/:session_id/terminate", post(terminate_session)) .route("/sessions/:session_id/events", get(get_events)) .route("/sessions/:session_id/events/sse", get(get_events_sse)) @@ -129,6 +131,7 @@ pub fn build_router(state: AppState) -> Router { list_sessions, create_session, post_message, + post_message_stream, terminate_session, get_events, get_events_sse, @@ -151,6 +154,7 @@ pub fn build_router(state: AppState) -> Router { CreateSessionResponse, MessageRequest, EventsQuery, + TurnStreamQuery, EventsResponse, UniversalEvent, UniversalEventData, @@ -488,6 +492,14 @@ impl SessionState { } fn ended_error(&self) -> Option { + self.ended_error_for_messages(false) + } + + /// Returns an error if the session cannot accept new messages. + /// `for_new_message` should be true when checking before sending a new message - + /// this allows agents that support resumption (Claude, Amp, OpenCode) to continue + /// after their process exits successfully. + fn ended_error_for_messages(&self, for_new_message: bool) -> Option { if !self.ended { return None; } @@ -496,6 +508,15 @@ impl SessionState { message: "session terminated".to_string(), }); } + // For agents that support resumption (Claude, Amp, OpenCode), allow new messages + // after the process exits with success (Completed reason). The new message will + // spawn a fresh process with --resume/--continue to continue the conversation. + if for_new_message + && matches!(self.ended_reason, Some(SessionEndReason::Completed)) + && agent_supports_resume(self.agent) + { + return None; + } Some(SandboxError::AgentProcessExited { agent: self.agent.as_str().to_string(), exit_code: self.ended_exit_code, @@ -542,8 +563,9 @@ impl SessionState { #[derive(Debug)] struct SessionManager { agent_manager: Arc, - sessions: Mutex>, + sessions: Mutex>, opencode_server: Mutex>, + codex_server: Mutex>>, http_client: Client, } @@ -554,6 +576,92 @@ struct OpencodeServer { child: Option, } +/// Shared Codex app-server process that handles multiple sessions via JSON-RPC. +/// Similar to OpenCode's server model - a single long-running process that multiplexes +/// multiple thread (session) conversations. +struct CodexServer { + /// Sender for writing to the process stdin + stdin_sender: mpsc::UnboundedSender, + /// Pending JSON-RPC requests awaiting responses, keyed by request ID + pending_requests: std::sync::Mutex>>, + /// Next request ID for JSON-RPC + next_id: AtomicI64, + /// Whether initialize/initialized handshake has completed + initialized: std::sync::Mutex, + /// Mapping from thread_id to session_id for routing notifications + thread_sessions: std::sync::Mutex>, +} + +impl std::fmt::Debug for CodexServer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CodexServer") + .field("next_id", &self.next_id.load(Ordering::SeqCst)) + .finish() + } +} + +impl CodexServer { + fn new(stdin_sender: mpsc::UnboundedSender) -> Self { + Self { + stdin_sender, + pending_requests: std::sync::Mutex::new(HashMap::new()), + next_id: AtomicI64::new(1), + initialized: std::sync::Mutex::new(false), + thread_sessions: std::sync::Mutex::new(HashMap::new()), + } + } + + fn next_request_id(&self) -> i64 { + self.next_id.fetch_add(1, Ordering::SeqCst) + } + + fn send_request(&self, id: i64, request: &impl Serialize) -> Option> { + let (tx, rx) = oneshot::channel(); + { + let mut pending = self.pending_requests.lock().unwrap(); + pending.insert(id, tx); + } + let line = serde_json::to_string(request).ok()?; + self.stdin_sender.send(line).ok()?; + Some(rx) + } + + fn send_notification(&self, notification: &impl Serialize) -> bool { + let Ok(line) = serde_json::to_string(notification) else { + return false; + }; + self.stdin_sender.send(line).is_ok() + } + + fn complete_request(&self, id: i64, result: Value) { + let tx = { + let mut pending = self.pending_requests.lock().unwrap(); + pending.remove(&id) + }; + if let Some(tx) = tx { + let _ = tx.send(result); + } + } + + fn register_thread(&self, thread_id: String, session_id: String) { + let mut sessions = self.thread_sessions.lock().unwrap(); + sessions.insert(thread_id, session_id); + } + + fn session_for_thread(&self, thread_id: &str) -> Option { + let sessions = self.thread_sessions.lock().unwrap(); + sessions.get(thread_id).cloned() + } + + fn is_initialized(&self) -> bool { + *self.initialized.lock().unwrap() + } + + fn set_initialized(&self) { + *self.initialized.lock().unwrap() = true; + } +} + struct SessionSubscription { initial_events: Vec, receiver: broadcast::Receiver, @@ -563,12 +671,27 @@ impl SessionManager { fn new(agent_manager: Arc) -> Self { Self { agent_manager, - sessions: Mutex::new(HashMap::new()), + sessions: Mutex::new(Vec::new()), opencode_server: Mutex::new(None), + codex_server: Mutex::new(None), http_client: Client::new(), } } + fn session_ref<'a>( + sessions: &'a [SessionState], + session_id: &str, + ) -> Option<&'a SessionState> { + sessions.iter().find(|session| session.session_id == session_id) + } + + fn session_mut<'a>( + sessions: &'a mut [SessionState], + session_id: &str, + ) -> Option<&'a mut SessionState> { + sessions.iter_mut().find(|session| session.session_id == session_id) + } + async fn create_session( self: &Arc, session_id: String, @@ -577,7 +700,7 @@ impl SessionManager { let agent_id = parse_agent_id(&request.agent)?; { let sessions = self.sessions.lock().await; - if sessions.contains_key(&session_id) { + if sessions.iter().any(|session| session.session_id == session_id) { return Err(SandboxError::SessionAlreadyExists { session_id }); } } @@ -608,6 +731,20 @@ impl SessionManager { let opencode_session_id = self.create_opencode_session().await?; session.native_session_id = Some(opencode_session_id); } + if agent_id == AgentId::Codex { + // Create a thread in the shared Codex app-server + let snapshot = SessionSnapshot { + session_id: session_id.clone(), + agent: agent_id, + agent_mode: session.agent_mode.clone(), + permission_mode: session.permission_mode.clone(), + model: session.model.clone(), + variant: session.variant.clone(), + native_session_id: None, + }; + let thread_id = self.create_codex_thread(&session_id, &snapshot).await?; + session.native_session_id = Some(thread_id); + } if agent_id == AgentId::Mock { session.native_session_id = Some(format!("mock-{session_id}")); } @@ -629,12 +766,21 @@ impl SessionManager { .with_native_session(session.native_session_id.clone()); session.record_conversions(vec![started]); if agent_id == AgentId::Mock { + // Emit native session.started like real agents do + let native_started = EventConversion::new( + UniversalEventType::SessionStarted, + UniversalEventData::SessionStarted(SessionStartedData { + metadata: Some(json!({ "mock": true })), + }), + ) + .with_native_session(session.native_session_id.clone()); + session.record_conversions(vec![native_started]); session.record_conversions(mock_prompt_conversions("mock_0")); } let native_session_id = session.native_session_id.clone(); let mut sessions = self.sessions.lock().await; - sessions.insert(session_id.clone(), session); + sessions.push(session); drop(sessions); if agent_id == AgentId::Opencode { @@ -671,7 +817,8 @@ impl SessionManager { session_id: String, message: String, ) -> Result<(), SandboxError> { - let session_snapshot = self.session_snapshot(&session_id, false).await?; + // Use allow_ended=true and do explicit check to allow resumable agents + let session_snapshot = self.session_snapshot_for_message(&session_id).await?; if session_snapshot.agent == AgentId::Mock { self.send_mock_message(session_id, message).await?; return Ok(()); @@ -682,6 +829,14 @@ impl SessionManager { .await?; return Ok(()); } + if session_snapshot.agent == AgentId::Codex { + // Use the shared Codex app-server + self.send_codex_turn(&session_snapshot, &message).await?; + return Ok(()); + } + + // Reopen the session if it was ended (for resumable agents) + self.reopen_session_if_ended(&session_id).await; let manager = self.agent_manager.clone(); let prompt = message; @@ -714,14 +869,28 @@ impl SessionManager { Ok(()) } + /// Reopens a session that was ended by an agent process completing. + /// This allows resumable agents (Claude, Amp, OpenCode) to continue conversations. + async fn reopen_session_if_ended(&self, session_id: &str) { + let mut sessions = self.sessions.lock().await; + if let Some(session) = Self::session_mut(&mut sessions, session_id) { + if session.ended && agent_supports_resume(session.agent) { + session.ended = false; + session.ended_exit_code = None; + session.ended_message = None; + session.ended_reason = None; + session.terminated_by = None; + } + } + } + async fn terminate_session(&self, session_id: String) -> Result<(), SandboxError> { let mut sessions = self.sessions.lock().await; - let session = - sessions - .get_mut(&session_id) - .ok_or_else(|| SandboxError::SessionNotFound { - session_id: session_id.clone(), - })?; + let session = Self::session_mut(&mut sessions, &session_id).ok_or_else(|| { + SandboxError::SessionNotFound { + session_id: session_id.clone(), + } + })?; if session.ended { return Ok(()); } @@ -752,11 +921,11 @@ impl SessionManager { include_raw: bool, ) -> Result { let sessions = self.sessions.lock().await; - let session = sessions - .get(session_id) - .ok_or_else(|| SandboxError::SessionNotFound { + let session = Self::session_ref(&sessions, session_id).ok_or_else(|| { + SandboxError::SessionNotFound { session_id: session_id.to_string(), - })?; + } + })?; let mut events: Vec = session .events @@ -789,7 +958,8 @@ impl SessionManager { async fn list_sessions(&self) -> Vec { let sessions = self.sessions.lock().await; sessions - .values() + .iter() + .rev() .map(|state| SessionInfo { session_id: state.session_id.clone(), agent: state.agent.as_str().to_string(), @@ -810,11 +980,11 @@ impl SessionManager { offset: u64, ) -> Result { let sessions = self.sessions.lock().await; - let session = sessions - .get(session_id) - .ok_or_else(|| SandboxError::SessionNotFound { + let session = Self::session_ref(&sessions, session_id).ok_or_else(|| { + SandboxError::SessionNotFound { session_id: session_id.to_string(), - })?; + } + })?; let initial_events = session .events .iter() @@ -828,6 +998,34 @@ impl SessionManager { }) } + async fn subscribe_for_turn( + &self, + session_id: &str, + ) -> Result<(SessionSnapshot, SessionSubscription), SandboxError> { + let sessions = self.sessions.lock().await; + let session = Self::session_ref(&sessions, session_id).ok_or_else(|| { + SandboxError::SessionNotFound { + session_id: session_id.to_string(), + } + })?; + if let Some(err) = session.ended_error() { + return Err(err); + } + let offset = session.next_event_sequence; + let initial_events = session + .events + .iter() + .filter(|event| event.sequence > offset) + .cloned() + .collect::>(); + let receiver = session.broadcaster.subscribe(); + let subscription = SessionSubscription { + initial_events, + receiver, + }; + Ok((SessionSnapshot::from(session), subscription)) + } + async fn reply_question( &self, session_id: &str, @@ -836,12 +1034,11 @@ impl SessionManager { ) -> Result<(), SandboxError> { let (agent, native_session_id, pending_question) = { let mut sessions = self.sessions.lock().await; - let session = - sessions - .get_mut(session_id) - .ok_or_else(|| SandboxError::SessionNotFound { - session_id: session_id.to_string(), - })?; + let session = Self::session_mut(&mut sessions, session_id).ok_or_else(|| { + SandboxError::SessionNotFound { + session_id: session_id.to_string(), + } + })?; let pending = session.take_question(question_id); if pending.is_none() { return Err(SandboxError::InvalidRequest { @@ -895,12 +1092,11 @@ impl SessionManager { ) -> Result<(), SandboxError> { let (agent, native_session_id, pending_question) = { let mut sessions = self.sessions.lock().await; - let session = - sessions - .get_mut(session_id) - .ok_or_else(|| SandboxError::SessionNotFound { - session_id: session_id.to_string(), - })?; + let session = Self::session_mut(&mut sessions, session_id).ok_or_else(|| { + SandboxError::SessionNotFound { + session_id: session_id.to_string(), + } + })?; let pending = session.take_question(question_id); if pending.is_none() { return Err(SandboxError::InvalidRequest { @@ -954,12 +1150,11 @@ impl SessionManager { let reply_for_status = reply.clone(); let (agent, native_session_id, codex_sender, pending_permission) = { let mut sessions = self.sessions.lock().await; - let session = - sessions - .get_mut(session_id) - .ok_or_else(|| SandboxError::SessionNotFound { - session_id: session_id.to_string(), - })?; + let session = Self::session_mut(&mut sessions, session_id).ok_or_else(|| { + SandboxError::SessionNotFound { + session_id: session_id.to_string(), + } + })?; let pending = session.take_permission(permission_id); if pending.is_none() { return Err(SandboxError::InvalidRequest { @@ -1072,21 +1267,21 @@ impl SessionManager { Ok(()) } - async fn session_snapshot( + /// Gets a session snapshot for sending a new message. + /// Uses the `for_new_message` check which allows agents that support resumption + /// (Claude, Amp, OpenCode) to continue after their process exits successfully. + async fn session_snapshot_for_message( &self, session_id: &str, - allow_ended: bool, ) -> Result { let sessions = self.sessions.lock().await; - let session = sessions - .get(session_id) - .ok_or_else(|| SandboxError::SessionNotFound { + let session = Self::session_ref(&sessions, session_id).ok_or_else(|| { + SandboxError::SessionNotFound { session_id: session_id.to_string(), - })?; - if !allow_ended { - if let Some(err) = session.ended_error() { - return Err(err); } + })?; + if let Some(err) = session.ended_error_for_messages(true) { + return Err(err); } Ok(SessionSnapshot::from(session)) } @@ -1098,12 +1293,11 @@ impl SessionManager { ) -> Result<(), SandboxError> { let prefix = { let mut sessions = self.sessions.lock().await; - let session = - sessions - .get_mut(&session_id) - .ok_or_else(|| SandboxError::SessionNotFound { - session_id: session_id.to_string(), - })?; + let session = Self::session_mut(&mut sessions, &session_id).ok_or_else(|| { + SandboxError::SessionNotFound { + session_id: session_id.to_string(), + } + })?; if let Some(err) = session.ended_error() { return Err(err); } @@ -1187,7 +1381,7 @@ impl SessionManager { codex_sender = Some(writer_tx.clone()); { let mut sessions = self.sessions.lock().await; - if let Some(session) = sessions.get_mut(&session_id) { + if let Some(session) = Self::session_mut(&mut sessions, &session_id) { session.set_codex_sender(Some(writer_tx)); } } @@ -1224,7 +1418,7 @@ impl SessionManager { if agent == AgentId::Codex { let mut sessions = self.sessions.lock().await; - if let Some(session) = sessions.get_mut(&session_id) { + if let Some(session) = Self::session_mut(&mut sessions, &session_id) { session.set_codex_sender(None); } } @@ -1314,12 +1508,11 @@ impl SessionManager { conversions: Vec, ) -> Result, SandboxError> { let mut sessions = self.sessions.lock().await; - let session = - sessions - .get_mut(session_id) - .ok_or_else(|| SandboxError::SessionNotFound { - session_id: session_id.to_string(), - })?; + let session = Self::session_mut(&mut sessions, session_id).ok_or_else(|| { + SandboxError::SessionNotFound { + session_id: session_id.to_string(), + } + })?; Ok(session.record_conversions(conversions)) } @@ -1350,7 +1543,7 @@ impl SessionManager { terminated_by: TerminatedBy, ) { let mut sessions = self.sessions.lock().await; - if let Some(session) = sessions.get_mut(session_id) { + if let Some(session) = Self::session_mut(&mut sessions, session_id) { if session.ended { return; } @@ -1380,12 +1573,11 @@ impl SessionManager { let native_session_id = { let mut sessions = self.sessions.lock().await; - let session = - sessions - .get_mut(&session_id) - .ok_or_else(|| SandboxError::SessionNotFound { - session_id: session_id.clone(), - })?; + let session = Self::session_mut(&mut sessions, &session_id).ok_or_else(|| { + SandboxError::SessionNotFound { + session_id: session_id.clone(), + } + })?; if session.opencode_stream_started { return Ok(()); } @@ -1583,6 +1775,333 @@ impl SessionManager { }) } + /// Ensures a shared Codex app-server process is running. + /// Spawns the process if not already running, sets up stdin/stdout tasks, + /// and performs the initialize handshake if needed. + async fn ensure_codex_server(self: &Arc) -> Result, SandboxError> { + // Fast path: return existing server + { + let guard = self.codex_server.lock().await; + if let Some(server) = guard.as_ref() { + return Ok(server.clone()); + } + } + + // Spawn the codex app-server process + let manager = self.agent_manager.clone(); + let (stdin_tx, stdin_rx) = mpsc::unbounded_channel::(); + let (stdout_tx, stdout_rx) = mpsc::unbounded_channel::(); + + let _child = tokio::task::spawn_blocking(move || -> Result { + let path = manager + .resolve_binary(AgentId::Codex) + .map_err(|err| map_spawn_error(AgentId::Codex, err))?; + let mut command = std::process::Command::new(path); + command + .arg("app-server") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let mut child = command.spawn().map_err(|err| SandboxError::StreamError { + message: err.to_string(), + })?; + + let stdin = child.stdin.take().ok_or_else(|| SandboxError::StreamError { + message: "codex stdin unavailable".to_string(), + })?; + let stdout = child.stdout.take().ok_or_else(|| SandboxError::StreamError { + message: "codex stdout unavailable".to_string(), + })?; + + // Stdin writer task + let stdin_rx_mut = std::sync::Mutex::new(stdin_rx); + std::thread::spawn(move || { + let mut stdin = stdin; + let mut rx = stdin_rx_mut.lock().unwrap(); + while let Some(line) = rx.blocking_recv() { + if writeln!(stdin, "{line}").is_err() { + break; + } + if stdin.flush().is_err() { + break; + } + } + }); + + // Stdout reader task + std::thread::spawn(move || { + let reader = BufReader::new(stdout); + for line in reader.lines() { + let Ok(line) = line else { break }; + if stdout_tx.send(line).is_err() { + break; + } + } + }); + + Ok(child) + }) + .await + .map_err(|err| SandboxError::StreamError { + message: err.to_string(), + })??; + + let server = Arc::new(CodexServer::new(stdin_tx)); + + // Store server before spawning notification handler + { + let mut guard = self.codex_server.lock().await; + if let Some(existing) = guard.as_ref() { + // Another task beat us to it + return Ok(existing.clone()); + } + *guard = Some(server.clone()); + } + + // Spawn notification routing task + let server_for_task = server.clone(); + let self_for_task = Arc::clone(self); + tokio::spawn(async move { + self_for_task + .handle_codex_server_output(server_for_task, stdout_rx) + .await; + }); + + // Perform initialize handshake + self.codex_server_initialize(&server).await?; + + Ok(server) + } + + /// Handles output from the Codex app-server, routing responses and notifications. + async fn handle_codex_server_output( + self: Arc, + server: Arc, + mut stdout_rx: mpsc::UnboundedReceiver, + ) { + while let Some(line) = stdout_rx.recv().await { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + + let value: Value = match serde_json::from_str(trimmed) { + Ok(v) => v, + Err(_) => continue, + }; + + let message: codex_schema::JsonrpcMessage = match serde_json::from_value(value.clone()) { + Ok(m) => m, + Err(_) => continue, + }; + + match message { + codex_schema::JsonrpcMessage::Response(response) => { + // Route response to waiting request + if let Some(id) = codex_request_id_to_i64(&response.id) { + server.complete_request(id, response.result.clone()); + } + } + codex_schema::JsonrpcMessage::Notification(_) => { + // Route notification to correct session by thread_id + if let Ok(notification) = + serde_json::from_value::(value.clone()) + { + if let Some(thread_id) = codex_thread_id_from_server_notification(¬ification) { + if let Some(session_id) = server.session_for_thread(&thread_id) { + let conversions = match convert_codex::notification_to_universal(¬ification) { + Ok(c) => c, + Err(err) => vec![agent_unparsed("codex", &err, value.clone())], + }; + let _ = self.record_conversions(&session_id, conversions).await; + } + } + } + } + codex_schema::JsonrpcMessage::Request(_) => { + // Handle server requests (permission requests) + if let Ok(request) = + serde_json::from_value::(value.clone()) + { + if let Some(thread_id) = codex_thread_id_from_server_request(&request) { + if let Some(session_id) = server.session_for_thread(&thread_id) { + match codex_request_to_universal(&request) { + Ok(mut conversions) => { + for conversion in &mut conversions { + conversion.raw = Some(value.clone()); + } + let _ = self.record_conversions(&session_id, conversions).await; + } + Err(err) => { + let _ = self + .record_conversions( + &session_id, + vec![agent_unparsed("codex", &err, value.clone())], + ) + .await; + } + } + } + } + } + } + codex_schema::JsonrpcMessage::Error(error) => { + // Log error but don't have a session to route to + eprintln!("Codex server error: {:?}", error); + } + } + } + } + + /// Performs the initialize/initialized handshake with the Codex server. + async fn codex_server_initialize(&self, server: &CodexServer) -> Result<(), SandboxError> { + if server.is_initialized() { + return Ok(()); + } + + let id = server.next_request_id(); + let request = codex_schema::ClientRequest::Initialize { + id: codex_schema::RequestId::from(id), + params: codex_schema::InitializeParams { + client_info: codex_schema::ClientInfo { + name: "sandbox-agent".to_string(), + title: Some("sandbox-agent".to_string()), + version: env!("CARGO_PKG_VERSION").to_string(), + }, + }, + }; + + let rx = server + .send_request(id, &request) + .ok_or_else(|| SandboxError::StreamError { + message: "failed to send initialize request".to_string(), + })?; + + // Wait for initialize response with timeout + let result = tokio::time::timeout(Duration::from_secs(30), rx).await; + match result { + Ok(Ok(_)) => { + // Send initialized notification + let notification = codex_schema::JsonrpcNotification { + method: "initialized".to_string(), + params: None, + }; + server.send_notification(¬ification); + server.set_initialized(); + Ok(()) + } + Ok(Err(_)) => Err(SandboxError::StreamError { + message: "initialize request cancelled".to_string(), + }), + Err(_) => Err(SandboxError::StreamError { + message: "initialize request timed out".to_string(), + }), + } + } + + /// Creates a new Codex thread/session via the shared app-server. + async fn create_codex_thread( + self: &Arc, + session_id: &str, + session: &SessionSnapshot, + ) -> Result { + let server = self.ensure_codex_server().await?; + + let id = server.next_request_id(); + let mut params = codex_schema::ThreadStartParams::default(); + params.approval_policy = codex_approval_policy(Some(&session.permission_mode)); + params.sandbox = codex_sandbox_mode(Some(&session.permission_mode)); + params.model = session.model.clone(); + + let request = codex_schema::ClientRequest::ThreadStart { + id: codex_schema::RequestId::from(id), + params, + }; + + let rx = server + .send_request(id, &request) + .ok_or_else(|| SandboxError::StreamError { + message: "failed to send thread/start request".to_string(), + })?; + + // Wait for thread/start response + let result = tokio::time::timeout(Duration::from_secs(30), rx).await; + match result { + Ok(Ok(response)) => { + // Extract thread_id from response + let thread_id = response + .get("thread") + .and_then(|t| t.get("id")) + .and_then(Value::as_str) + .or_else(|| response.get("threadId").and_then(Value::as_str)) + .ok_or_else(|| SandboxError::StreamError { + message: "thread/start response missing thread id".to_string(), + })? + .to_string(); + + // Register thread -> session mapping + server.register_thread(thread_id.clone(), session_id.to_string()); + + Ok(thread_id) + } + Ok(Err(_)) => Err(SandboxError::StreamError { + message: "thread/start request cancelled".to_string(), + }), + Err(_) => Err(SandboxError::StreamError { + message: "thread/start request timed out".to_string(), + }), + } + } + + /// Sends a turn/start request to an existing Codex thread. + async fn send_codex_turn( + self: &Arc, + session: &SessionSnapshot, + prompt: &str, + ) -> Result<(), SandboxError> { + let server = self.ensure_codex_server().await?; + + let thread_id = session + .native_session_id + .as_ref() + .ok_or_else(|| SandboxError::InvalidRequest { + message: "missing Codex thread id".to_string(), + })?; + + let id = server.next_request_id(); + let prompt_text = codex_prompt_for_mode(prompt, Some(&session.agent_mode)); + let params = codex_schema::TurnStartParams { + approval_policy: codex_approval_policy(Some(&session.permission_mode)), + collaboration_mode: None, + cwd: None, + effort: None, + input: vec![codex_schema::UserInput::Text { + text: prompt_text, + text_elements: Vec::new(), + }], + model: session.model.clone(), + output_schema: None, + sandbox_policy: codex_sandbox_policy(Some(&session.permission_mode)), + summary: None, + thread_id: thread_id.clone(), + }; + + let request = codex_schema::ClientRequest::TurnStart { + id: codex_schema::RequestId::from(id), + params, + }; + + // Send but don't wait for response - notifications will stream back + server + .send_request(id, &request) + .ok_or_else(|| SandboxError::StreamError { + message: "failed to send turn/start request".to_string(), + })?; + + Ok(()) + } + async fn fetch_opencode_modes(&self) -> Result, SandboxError> { let base_url = self.ensure_opencode_server().await?; let endpoints = [ @@ -1959,7 +2478,14 @@ pub struct EventsQuery { pub offset: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub limit: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none", alias = "include_raw")] + pub include_raw: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, ToSchema, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct TurnStreamQuery { + #[serde(default, skip_serializing_if = "Option::is_none", alias = "include_raw")] pub include_raw: Option, } @@ -2170,6 +2696,39 @@ async fn post_message( Ok(StatusCode::NO_CONTENT) } +#[utoipa::path( + post, + path = "/v1/sessions/{session_id}/messages/stream", + request_body = MessageRequest, + params( + ("session_id" = String, Path, description = "Session id"), + ("include_raw" = Option, Query, description = "Include raw provider payloads") + ), + responses( + (status = 200, description = "SSE event stream"), + (status = 404, body = ProblemDetails) + ), + tag = "sessions" +)] +async fn post_message_stream( + State(state): State>, + Path(session_id): Path, + Query(query): Query, + Json(request): Json, +) -> Result>>, ApiError> { + let include_raw = query.include_raw.unwrap_or(false); + let (snapshot, subscription) = state + .session_manager + .subscribe_for_turn(&session_id) + .await?; + state + .session_manager + .send_message(session_id, request.message) + .await?; + let stream = stream_turn_events(subscription, snapshot.agent, include_raw); + Ok(Sse::new(stream)) +} + #[utoipa::path( post, path = "/v1/sessions/{session_id}/terminate", @@ -2355,6 +2914,12 @@ fn all_agents() -> [AgentId; 5] { ] } +/// Returns true if the agent supports resuming a session after its process exits. +/// These agents can use --resume/--continue to continue a conversation. +fn agent_supports_resume(agent: AgentId) -> bool { + matches!(agent, AgentId::Claude | AgentId::Amp | AgentId::Opencode | AgentId::Codex) +} + fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { match agent { // Headless Claude CLI does not expose AskUserQuestion and does not emit tool_result, @@ -3067,6 +3632,63 @@ fn codex_should_emit_notification(notification: &codex_schema::ServerNotificatio true } +/// Extracts thread_id from a Codex server notification. +fn codex_thread_id_from_server_notification( + notification: &codex_schema::ServerNotification, +) -> Option { + match notification { + codex_schema::ServerNotification::ThreadStarted(params) => Some(params.thread.id.clone()), + codex_schema::ServerNotification::TurnStarted(params) => Some(params.thread_id.clone()), + codex_schema::ServerNotification::TurnCompleted(params) => Some(params.thread_id.clone()), + codex_schema::ServerNotification::ItemStarted(params) => Some(params.thread_id.clone()), + codex_schema::ServerNotification::ItemCompleted(params) => Some(params.thread_id.clone()), + codex_schema::ServerNotification::ItemAgentMessageDelta(params) => { + Some(params.thread_id.clone()) + } + codex_schema::ServerNotification::ItemReasoningTextDelta(params) => { + Some(params.thread_id.clone()) + } + codex_schema::ServerNotification::ItemReasoningSummaryTextDelta(params) => { + Some(params.thread_id.clone()) + } + codex_schema::ServerNotification::ItemCommandExecutionOutputDelta(params) => { + Some(params.thread_id.clone()) + } + codex_schema::ServerNotification::ItemFileChangeOutputDelta(params) => { + Some(params.thread_id.clone()) + } + codex_schema::ServerNotification::ItemMcpToolCallProgress(params) => { + Some(params.thread_id.clone()) + } + codex_schema::ServerNotification::ThreadTokenUsageUpdated(params) => { + Some(params.thread_id.clone()) + } + codex_schema::ServerNotification::TurnDiffUpdated(params) => Some(params.thread_id.clone()), + codex_schema::ServerNotification::TurnPlanUpdated(params) => Some(params.thread_id.clone()), + codex_schema::ServerNotification::ItemCommandExecutionTerminalInteraction(params) => { + Some(params.thread_id.clone()) + } + codex_schema::ServerNotification::ItemReasoningSummaryPartAdded(params) => { + Some(params.thread_id.clone()) + } + codex_schema::ServerNotification::ThreadCompacted(params) => Some(params.thread_id.clone()), + _ => None, + } +} + +/// Extracts thread_id from a Codex server request. +fn codex_thread_id_from_server_request(request: &codex_schema::ServerRequest) -> Option { + match request { + codex_schema::ServerRequest::ItemCommandExecutionRequestApproval { params, .. } => { + Some(params.thread_id.clone()) + } + codex_schema::ServerRequest::ItemFileChangeRequestApproval { params, .. } => { + Some(params.thread_id.clone()) + } + _ => None, + } +} + fn codex_request_to_universal( request: &codex_schema::ServerRequest, ) -> Result, String> { @@ -3173,6 +3795,14 @@ fn codex_request_id_from_value(value: &Value) -> Option } } +/// Extracts i64 from a RequestId (for matching request/response pairs). +fn codex_request_id_to_i64(id: &codex_schema::RequestId) -> Option { + match id { + codex_schema::RequestId::Variant1(n) => Some(*n), + codex_schema::RequestId::Variant0(s) => s.parse().ok(), + } +} + fn codex_command_decision_for_reply( reply: PermissionReply, ) -> codex_schema::CommandExecutionApprovalDecision { @@ -4095,6 +4725,93 @@ fn now_rfc3339() -> String { .unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string()) } +struct TurnStreamState { + initial_events: VecDeque, + receiver: broadcast::Receiver, + include_raw: bool, + done: bool, + agent: AgentId, +} + +fn stream_turn_events( + subscription: SessionSubscription, + agent: AgentId, + include_raw: bool, +) -> impl futures::Stream> { + let state = TurnStreamState { + initial_events: VecDeque::from(subscription.initial_events), + receiver: subscription.receiver, + include_raw, + done: false, + agent, + }; + stream::unfold(state, |mut state| async move { + if state.done { + return None; + } + + let mut event = if let Some(event) = state.initial_events.pop_front() { + event + } else { + loop { + match state.receiver.recv().await { + Ok(event) => break event, + Err(broadcast::error::RecvError::Lagged(_)) => continue, + Err(broadcast::error::RecvError::Closed) => return None, + } + } + }; + + if !state.include_raw { + event.raw = None; + } + + if is_turn_terminal(&event, state.agent) { + state.done = true; + } + + Some((Ok::(to_sse_event(event)), state)) + }) +} + +fn is_turn_terminal(event: &UniversalEvent, agent: AgentId) -> bool { + match event.event_type { + UniversalEventType::SessionEnded + | UniversalEventType::Error + | UniversalEventType::AgentUnparsed + | UniversalEventType::PermissionRequested + | UniversalEventType::QuestionRequested => true, + UniversalEventType::ItemCompleted => { + let UniversalEventData::Item(ItemEventData { item }) = &event.data else { + return false; + }; + if let Some(label) = status_label(item) { + if label == "turn.completed" || label == "session.idle" { + return true; + } + } + if matches!(item.role, Some(ItemRole::Assistant)) && item.kind == ItemKind::Message { + return agent != AgentId::Codex; + } + false + } + _ => false, + } +} + +fn status_label(item: &UniversalItem) -> Option<&str> { + if item.kind != ItemKind::Status { + return None; + } + item.content.iter().find_map(|part| { + if let ContentPart::Status { label, .. } = part { + Some(label.as_str()) + } else { + None + } + }) +} + fn to_sse_event(event: UniversalEvent) -> Event { Event::default() .json_data(&event) diff --git a/server/packages/sandbox-agent/src/telemetry.rs b/server/packages/sandbox-agent/src/telemetry.rs index 990e661..74ba268 100644 --- a/server/packages/sandbox-agent/src/telemetry.rs +++ b/server/packages/sandbox-agent/src/telemetry.rs @@ -8,11 +8,15 @@ use std::time::{Duration, SystemTime, UNIX_EPOCH}; use reqwest::Client; use serde::Serialize; use time::OffsetDateTime; +use tokio::time::Instant; const TELEMETRY_URL: &str = "https://tc.rivet.dev"; const TELEMETRY_ENV_DEBUG: &str = "SANDBOX_AGENT_TELEMETRY_DEBUG"; const TELEMETRY_ID_FILE: &str = "telemetry_id"; -const TELEMETRY_TIMEOUT_MS: u64 = 800; +const TELEMETRY_LAST_SENT_FILE: &str = "telemetry_last_sent"; +const TELEMETRY_TIMEOUT_MS: u64 = 2_000; +const TELEMETRY_INTERVAL_SECS: u64 = 300; +const TELEMETRY_MIN_GAP_SECS: i64 = 300; #[derive(Debug, Serialize)] struct TelemetryEvent { @@ -49,7 +53,6 @@ struct OsInfo { #[derive(Debug, Serialize)] struct ProviderInfo { name: String, - confidence: String, #[serde(skip_serializing_if = "Option::is_none")] method: Option, #[serde(skip_serializing_if = "Option::is_none")] @@ -69,11 +72,10 @@ pub fn telemetry_enabled(no_telemetry: bool) -> bool { } pub fn log_enabled_message() { - tracing::info!("anonymous telemetry is enabled; disable with --no-telemetry"); + tracing::info!("anonymous telemetry is enabled, disable with --no-telemetry"); } pub fn spawn_telemetry_task() { - let event = build_event(); tokio::spawn(async move { let client = match Client::builder() .timeout(Duration::from_millis(TELEMETRY_TIMEOUT_MS)) @@ -86,21 +88,38 @@ pub fn spawn_telemetry_task() { } }; - if let Err(err) = client.post(TELEMETRY_URL).json(&event).send().await { - tracing::debug!(error = %err, "telemetry request failed"); + attempt_send(&client).await; + let start = Instant::now() + Duration::from_secs(TELEMETRY_INTERVAL_SECS); + let mut interval = tokio::time::interval_at(start, Duration::from_secs(TELEMETRY_INTERVAL_SECS)); + loop { + interval.tick().await; + attempt_send(&client).await; } }); } -fn build_event() -> TelemetryEvent { +async fn attempt_send(client: &Client) { let dt = OffsetDateTime::now_utc().unix_timestamp(); + if !should_send(dt) { + return; + } + + let event = build_event(dt); + if let Err(err) = client.post(TELEMETRY_URL).json(&event).send().await { + tracing::debug!(error = %err, "telemetry request failed"); + return; + } + write_last_sent(dt); +} + +fn build_event(dt: i64) -> TelemetryEvent { let eid = load_or_create_id(); TelemetryEvent { p: "sandbox-agent".to_string(), dt, et: "sandbox".to_string(), eid, - ev: "entity_snapshot".to_string(), + ev: "entity_beacon".to_string(), d: TelemetryData { version: env!("CARGO_PKG_VERSION").to_string(), os: OsInfo { @@ -138,9 +157,46 @@ fn load_or_create_id() -> String { } fn telemetry_id_path() -> PathBuf { + telemetry_dir().join(TELEMETRY_ID_FILE) +} + +fn telemetry_last_sent_path() -> PathBuf { + telemetry_dir().join(TELEMETRY_LAST_SENT_FILE) +} + +fn telemetry_dir() -> PathBuf { dirs::data_dir() - .map(|dir| dir.join("sandbox-agent").join(TELEMETRY_ID_FILE)) - .unwrap_or_else(|| PathBuf::from(".sandbox-agent").join(TELEMETRY_ID_FILE)) + .map(|dir| dir.join("sandbox-agent")) + .unwrap_or_else(|| PathBuf::from(".sandbox-agent")) +} + +fn should_send(now: i64) -> bool { + if let Some(last) = read_last_sent() { + if now >= last && now - last < TELEMETRY_MIN_GAP_SECS { + return false; + } + } + true +} + +fn read_last_sent() -> Option { + let path = telemetry_last_sent_path(); + fs::read_to_string(&path) + .ok() + .and_then(|value| value.trim().parse::().ok()) +} + +fn write_last_sent(timestamp: i64) { + let path = telemetry_last_sent_path(); + if let Some(parent) = path.parent() { + if let Err(err) = fs::create_dir_all(parent) { + tracing::debug!(error = %err, "failed to create telemetry directory"); + return; + } + } + if let Ok(mut file) = fs::OpenOptions::new().create(true).write(true).truncate(true).open(&path) { + let _ = file.write_all(timestamp.to_string().as_bytes()); + } } fn generate_id() -> String { @@ -185,7 +241,6 @@ fn detect_provider() -> ProviderInfo { ]); return ProviderInfo { name: "e2b".to_string(), - confidence: "high".to_string(), method: Some("env".to_string()), metadata, }; @@ -206,7 +261,6 @@ fn detect_provider() -> ProviderInfo { ]); return ProviderInfo { name: "vercel".to_string(), - confidence: "high".to_string(), method: Some("env".to_string()), metadata, }; @@ -219,7 +273,6 @@ fn detect_provider() -> ProviderInfo { ]); return ProviderInfo { name: "modal".to_string(), - confidence: "high".to_string(), method: Some("env".to_string()), metadata, }; @@ -232,7 +285,6 @@ fn detect_provider() -> ProviderInfo { ]); return ProviderInfo { name: "fly.io".to_string(), - confidence: "high".to_string(), method: Some("env".to_string()), metadata, }; @@ -245,7 +297,6 @@ fn detect_provider() -> ProviderInfo { ]); return ProviderInfo { name: "replit".to_string(), - confidence: "high".to_string(), method: Some("env".to_string()), metadata, }; @@ -254,7 +305,6 @@ fn detect_provider() -> ProviderInfo { if env::var("CODESANDBOX_HOST").is_ok() || env::var("CSB_BASE_PREVIEW_HOST").is_ok() { return ProviderInfo { name: "codesandbox".to_string(), - confidence: "high".to_string(), method: Some("env".to_string()), metadata: None, }; @@ -264,7 +314,6 @@ fn detect_provider() -> ProviderInfo { let metadata = metadata_or_none([("name", env::var("CODESPACE_NAME").ok())]); return ProviderInfo { name: "github-codespaces".to_string(), - confidence: "high".to_string(), method: Some("env".to_string()), metadata, }; @@ -274,7 +323,6 @@ fn detect_provider() -> ProviderInfo { let metadata = metadata_or_none([("environment", env::var("RAILWAY_ENVIRONMENT").ok())]); return ProviderInfo { name: "railway".to_string(), - confidence: "high".to_string(), method: Some("env".to_string()), metadata, }; @@ -284,7 +332,6 @@ fn detect_provider() -> ProviderInfo { let metadata = metadata_or_none([("serviceId", env::var("RENDER_SERVICE_ID").ok())]); return ProviderInfo { name: "render".to_string(), - confidence: "high".to_string(), method: Some("env".to_string()), metadata, }; @@ -293,7 +340,6 @@ fn detect_provider() -> ProviderInfo { if detect_daytona() { return ProviderInfo { name: "daytona".to_string(), - confidence: "medium".to_string(), method: Some("filesystem".to_string()), metadata: None, }; @@ -302,7 +348,6 @@ fn detect_provider() -> ProviderInfo { if detect_docker() { return ProviderInfo { name: "docker".to_string(), - confidence: "high".to_string(), method: Some("filesystem".to_string()), metadata: None, }; @@ -310,7 +355,6 @@ fn detect_provider() -> ProviderInfo { ProviderInfo { name: "unknown".to_string(), - confidence: "low".to_string(), method: None, metadata: None, } diff --git a/server/packages/sandbox-agent/tests/agent_agnostic.rs b/server/packages/sandbox-agent/tests/agent_agnostic.rs deleted file mode 100644 index 1de37dc..0000000 --- a/server/packages/sandbox-agent/tests/agent_agnostic.rs +++ /dev/null @@ -1,657 +0,0 @@ -use std::collections::HashMap; -use std::time::{Duration, Instant}; - -use axum::body::Body; -use axum::http::{Method, Request, StatusCode}; -use axum::Router; -use http_body_util::BodyExt; -use serde_json::{json, Value}; -use tempfile::TempDir; -use tower::util::ServiceExt; - -use sandbox_agent_agent_management::agents::{AgentId, AgentManager}; -use sandbox_agent_agent_management::testing::test_agents_from_env; -use sandbox_agent_agent_credentials::ExtractedCredentials; -use sandbox_agent::router::{ - build_router, - AgentCapabilities, - AgentListResponse, - AuthConfig, -}; - -const PROMPT: &str = "Reply with exactly the single word OK."; -const TOOL_PROMPT: &str = - "Use the bash tool to run `ls` in the current directory. Do not answer without using the tool."; -const QUESTION_PROMPT: &str = - "Call the AskUserQuestion tool with exactly one yes/no question and wait for a reply. Do not answer yourself."; - -/// Agent-agnostic event sequence tests. -/// -/// These tests assert that the universal schema output is valid and consistent -/// across agents, and they use capability flags from /v1/agents to skip -/// unsupported flows. - -struct TestApp { - app: Router, - _install_dir: TempDir, -} - -impl TestApp { - fn new() -> Self { - let install_dir = tempfile::tempdir().expect("create temp install dir"); - let manager = AgentManager::new(install_dir.path()) - .expect("create agent manager"); - let state = sandbox_agent::router::AppState::new(AuthConfig::disabled(), manager); - let app = build_router(state); - Self { - app, - _install_dir: install_dir, - } - } -} - -struct EnvGuard { - saved: HashMap>, -} - -impl Drop for EnvGuard { - fn drop(&mut self) { - for (key, value) in &self.saved { - match value { - Some(value) => std::env::set_var(key, value), - None => std::env::remove_var(key), - } - } - } -} - -fn apply_credentials(creds: &ExtractedCredentials) -> EnvGuard { - let keys = ["ANTHROPIC_API_KEY", "CLAUDE_API_KEY", "OPENAI_API_KEY", "CODEX_API_KEY"]; - let mut saved = HashMap::new(); - for key in keys { - saved.insert(key.to_string(), std::env::var(key).ok()); - } - - match creds.anthropic.as_ref() { - Some(cred) => { - std::env::set_var("ANTHROPIC_API_KEY", &cred.api_key); - std::env::set_var("CLAUDE_API_KEY", &cred.api_key); - } - None => { - std::env::remove_var("ANTHROPIC_API_KEY"); - std::env::remove_var("CLAUDE_API_KEY"); - } - } - - match creds.openai.as_ref() { - Some(cred) => { - std::env::set_var("OPENAI_API_KEY", &cred.api_key); - std::env::set_var("CODEX_API_KEY", &cred.api_key); - } - None => { - std::env::remove_var("OPENAI_API_KEY"); - std::env::remove_var("CODEX_API_KEY"); - } - } - - EnvGuard { saved } -} - -async fn send_json( - app: &Router, - method: Method, - path: &str, - body: Option, -) -> (StatusCode, Value) { - let request = Request::builder() - .method(method) - .uri(path) - .header("content-type", "application/json") - .body(Body::from(body.map(|value| value.to_string()).unwrap_or_default())) - .expect("request"); - let response = app - .clone() - .oneshot(request) - .await - .expect("response"); - let status = response.status(); - let bytes = response - .into_body() - .collect() - .await - .expect("body") - .to_bytes(); - let payload = if bytes.is_empty() { - Value::Null - } else { - serde_json::from_slice(&bytes).unwrap_or(Value::Null) - }; - (status, payload) -} - -async fn send_status(app: &Router, method: Method, path: &str, body: Option) -> StatusCode { - let (status, _) = send_json(app, method, path, body).await; - status -} - -async fn install_agent(app: &Router, agent: AgentId) { - let status = send_status( - app, - Method::POST, - &format!("/v1/agents/{}/install", agent.as_str()), - Some(json!({})), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "install agent {}", agent.as_str()); -} - -async fn create_session(app: &Router, agent: AgentId, session_id: &str, permission_mode: &str) { - let status = send_status( - app, - Method::POST, - &format!("/v1/sessions/{session_id}"), - Some(json!({ - "agent": agent.as_str(), - "permissionMode": permission_mode, - })), - ) - .await; - assert_eq!(status, StatusCode::OK, "create session"); -} - -async fn create_session_with_mode( - app: &Router, - agent: AgentId, - session_id: &str, - agent_mode: &str, - permission_mode: &str, -) { - let status = send_status( - app, - Method::POST, - &format!("/v1/sessions/{session_id}"), - Some(json!({ - "agent": agent.as_str(), - "agentMode": agent_mode, - "permissionMode": permission_mode, - })), - ) - .await; - assert_eq!(status, StatusCode::OK, "create session"); -} - -fn test_permission_mode(agent: AgentId) -> &'static str { - match agent { - AgentId::Opencode => "default", - _ => "bypass", - } -} - -async fn send_message(app: &Router, session_id: &str, message: &str) { - let status = send_status( - app, - Method::POST, - &format!("/v1/sessions/{session_id}/messages"), - Some(json!({ "message": message })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "send message"); -} - -async fn poll_events_until( - app: &Router, - session_id: &str, - timeout: Duration, - mut stop: F, -) -> Vec -where - F: FnMut(&[Value]) -> bool, -{ - let start = Instant::now(); - let mut offset = 0u64; - let mut events = Vec::new(); - while start.elapsed() < timeout { - let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); - let (status, payload) = send_json(app, Method::GET, &path, None).await; - assert_eq!(status, StatusCode::OK, "poll events"); - let new_events = payload - .get("events") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - if !new_events.is_empty() { - if let Some(last) = new_events - .last() - .and_then(|event| event.get("sequence")) - .and_then(Value::as_u64) - { - offset = last; - } - events.extend(new_events); - if stop(&events) { - break; - } - } - tokio::time::sleep(Duration::from_millis(800)).await; - } - events -} - -async fn fetch_capabilities(app: &Router) -> HashMap { - let (status, payload) = send_json(app, Method::GET, "/v1/agents", None).await; - assert_eq!(status, StatusCode::OK, "list agents"); - let response: AgentListResponse = serde_json::from_value(payload).expect("agents payload"); - response - .agents - .into_iter() - .map(|agent| (agent.id, agent.capabilities)) - .collect() -} - -fn has_event_type(events: &[Value], event_type: &str) -> bool { - events - .iter() - .any(|event| event.get("type").and_then(Value::as_str) == Some(event_type)) -} - -fn find_assistant_message_item(events: &[Value]) -> Option { - events.iter().find_map(|event| { - if event.get("type").and_then(Value::as_str) != Some("item.completed") { - return None; - } - let item = event.get("data")?.get("item")?; - let role = item.get("role")?.as_str()?; - let kind = item.get("kind")?.as_str()?; - if role != "assistant" || kind != "message" { - return None; - } - item.get("item_id")?.as_str().map(|id| id.to_string()) - }) -} - -fn event_sequence(event: &Value) -> Option { - event.get("sequence").and_then(Value::as_u64) -} - -fn find_item_event_seq(events: &[Value], event_type: &str, item_id: &str) -> Option { - events.iter().find_map(|event| { - if event.get("type").and_then(Value::as_str) != Some(event_type) { - return None; - } - match event_type { - "item.delta" => { - let data = event.get("data")?; - let id = data.get("item_id")?.as_str()?; - if id == item_id { - event_sequence(event) - } else { - None - } - } - _ => { - let item = event.get("data")?.get("item")?; - let id = item.get("item_id")?.as_str()?; - if id == item_id { - event_sequence(event) - } else { - None - } - } - } - }) -} - -fn find_permission_id(events: &[Value]) -> Option { - events.iter().find_map(|event| { - if event.get("type").and_then(Value::as_str) != Some("permission.requested") { - return None; - } - event - .get("data") - .and_then(|data| data.get("permission_id")) - .and_then(Value::as_str) - .map(|id| id.to_string()) - }) -} - -fn find_question_id(events: &[Value]) -> Option { - events.iter().find_map(|event| { - if event.get("type").and_then(Value::as_str) != Some("question.requested") { - return None; - } - event - .get("data") - .and_then(|data| data.get("question_id")) - .and_then(Value::as_str) - .map(|id| id.to_string()) - }) -} - -fn find_first_answer(events: &[Value]) -> Option>> { - events.iter().find_map(|event| { - if event.get("type").and_then(Value::as_str) != Some("question.requested") { - return None; - } - let options = event - .get("data") - .and_then(|data| data.get("options")) - .and_then(Value::as_array)?; - let option = options.first()?.as_str()?.to_string(); - Some(vec![vec![option]]) - }) -} - -fn find_tool_call(events: &[Value]) -> Option { - events.iter().find_map(|event| { - if event.get("type").and_then(Value::as_str) != Some("item.started") - && event.get("type").and_then(Value::as_str) != Some("item.completed") - { - return None; - } - let item = event.get("data")?.get("item")?; - let kind = item.get("kind")?.as_str()?; - if kind != "tool_call" { - return None; - } - item.get("item_id")?.as_str().map(|id| id.to_string()) - }) -} - -fn has_tool_result(events: &[Value]) -> bool { - events.iter().any(|event| { - if event.get("type").and_then(Value::as_str) != Some("item.completed") { - return false; - } - let item = match event.get("data").and_then(|data| data.get("item")) { - Some(item) => item, - None => return false, - }; - item.get("kind").and_then(Value::as_str) == Some("tool_result") - }) -} - -fn expect_basic_sequence(events: &[Value]) { - assert!(has_event_type(events, "session.started"), "session.started missing"); - let item_id = find_assistant_message_item(events).expect("assistant message missing"); - let started_seq = find_item_event_seq(events, "item.started", &item_id) - .expect("item.started missing"); - // Intentionally require deltas here to validate our synthetic delta behavior. - let delta_seq = find_item_event_seq(events, "item.delta", &item_id) - .expect("item.delta missing"); - let completed_seq = find_item_event_seq(events, "item.completed", &item_id) - .expect("item.completed missing"); - assert!(started_seq < delta_seq, "item.started must precede delta"); - assert!(delta_seq < completed_seq, "delta must precede completion"); -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn agent_agnostic_basic_reply() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - let capabilities = fetch_capabilities(&app.app).await; - - for config in &configs { - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_id = format!("basic-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &session_id, "default").await; - send_message(&app.app, &session_id, PROMPT).await; - - let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { - has_event_type(events, "error") || find_assistant_message_item(events).is_some() - }) - .await; - - assert!( - !events.is_empty(), - "no events collected for {}", - config.agent.as_str() - ); - expect_basic_sequence(&events); - - let caps = capabilities - .get(config.agent.as_str()) - .expect("capabilities missing"); - if caps.tool_calls { - assert!( - !events.iter().any(|event| { - event.get("type").and_then(Value::as_str) == Some("agent.unparsed") - }), - "agent.unparsed event detected" - ); - } - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn agent_agnostic_tool_flow() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - let capabilities = fetch_capabilities(&app.app).await; - - for config in &configs { - let caps = capabilities - .get(config.agent.as_str()) - .expect("capabilities missing"); - if !caps.tool_calls { - continue; - } - - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_id = format!("tool-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &session_id, test_permission_mode(config.agent)).await; - send_message(&app.app, &session_id, TOOL_PROMPT).await; - - let start = Instant::now(); - let mut offset = 0u64; - let mut events = Vec::new(); - let mut replied = false; - while start.elapsed() < Duration::from_secs(180) { - let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); - let (status, payload) = send_json(&app.app, Method::GET, &path, None).await; - assert_eq!(status, StatusCode::OK, "poll events"); - let new_events = payload - .get("events") - .and_then(Value::as_array) - .cloned() - .unwrap_or_default(); - if !new_events.is_empty() { - if let Some(last) = new_events - .last() - .and_then(|event| event.get("sequence")) - .and_then(Value::as_u64) - { - offset = last; - } - events.extend(new_events); - if !replied { - if let Some(permission_id) = find_permission_id(&events) { - let _ = send_status( - &app.app, - Method::POST, - &format!( - "/v1/sessions/{session_id}/permissions/{permission_id}/reply" - ), - Some(json!({ "reply": "once" })), - ) - .await; - replied = true; - } - } - if has_tool_result(&events) { - break; - } - } - tokio::time::sleep(Duration::from_millis(800)).await; - } - - let tool_call = find_tool_call(&events); - let tool_result = has_tool_result(&events); - assert!( - tool_call.is_some(), - "tool_call missing for tool-capable agent {}", - config.agent.as_str() - ); - if tool_call.is_some() { - assert!( - tool_result, - "tool_result missing after tool_call for {}", - config.agent.as_str() - ); - } - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn agent_agnostic_permission_flow() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - let capabilities = fetch_capabilities(&app.app).await; - - for config in &configs { - let caps = capabilities - .get(config.agent.as_str()) - .expect("capabilities missing"); - if !(caps.plan_mode && caps.permissions) { - continue; - } - - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_id = format!("perm-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &session_id, "plan").await; - send_message(&app.app, &session_id, TOOL_PROMPT).await; - - let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { - find_permission_id(events).is_some() || has_event_type(events, "error") - }) - .await; - - let permission_id = find_permission_id(&events).expect("permission.requested missing"); - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_id}/permissions/{permission_id}/reply"), - Some(json!({ "reply": "once" })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "permission reply"); - - let resolved = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { - events.iter().any(|event| { - event.get("type").and_then(Value::as_str) == Some("permission.resolved") - }) - }) - .await; - - assert!( - resolved.iter().any(|event| { - event.get("type").and_then(Value::as_str) == Some("permission.resolved") - && event - .get("synthetic") - .and_then(Value::as_bool) - .unwrap_or(false) - }), - "permission.resolved should be synthetic" - ); - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn agent_agnostic_question_flow() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - let capabilities = fetch_capabilities(&app.app).await; - - for config in &configs { - let caps = capabilities - .get(config.agent.as_str()) - .expect("capabilities missing"); - if !caps.questions { - continue; - } - - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_id = format!("question-{}", config.agent.as_str()); - create_session_with_mode(&app.app, config.agent, &session_id, "plan", "plan").await; - send_message(&app.app, &session_id, QUESTION_PROMPT).await; - - let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { - find_question_id(events).is_some() || has_event_type(events, "error") - }) - .await; - - let question_id = find_question_id(&events).expect("question.requested missing"); - let answers = find_first_answer(&events).unwrap_or_else(|| vec![vec![]]); - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_id}/questions/{question_id}/reply"), - Some(json!({ "answers": answers })), - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "question reply"); - - let resolved = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { - events.iter().any(|event| { - event.get("type").and_then(Value::as_str) == Some("question.resolved") - }) - }) - .await; - - assert!( - resolved.iter().any(|event| { - event.get("type").and_then(Value::as_str) == Some("question.resolved") - && event - .get("synthetic") - .and_then(Value::as_bool) - .unwrap_or(false) - }), - "question.resolved should be synthetic" - ); - } -} - -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn agent_agnostic_termination() { - let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); - let app = TestApp::new(); - - for config in &configs { - let _guard = apply_credentials(&config.credentials); - install_agent(&app.app, config.agent).await; - - let session_id = format!("terminate-{}", config.agent.as_str()); - create_session(&app.app, config.agent, &session_id, "default").await; - - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_id}/terminate"), - None, - ) - .await; - assert_eq!(status, StatusCode::NO_CONTENT, "terminate session"); - - let events = poll_events_until(&app.app, &session_id, Duration::from_secs(30), |events| { - has_event_type(events, "session.ended") - }) - .await; - assert!(has_event_type(&events, "session.ended"), "missing session.ended"); - - let status = send_status( - &app.app, - Method::POST, - &format!("/v1/sessions/{session_id}/messages"), - Some(json!({ "message": PROMPT })), - ) - .await; - assert!(!status.is_success(), "terminated session should reject messages"); - } -} diff --git a/server/packages/sandbox-agent/tests/agent_basic_reply.rs b/server/packages/sandbox-agent/tests/agent_basic_reply.rs new file mode 100644 index 0000000..7136341 --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent_basic_reply.rs @@ -0,0 +1,46 @@ +mod common; + +use common::*; +use sandbox_agent_agent_management::testing::test_agents_from_env; +use serde_json::Value; +use std::time::Duration; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn agent_basic_reply() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + + for config in &configs { + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let session_id = format!("basic-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &session_id, "default").await; + send_message(&app.app, &session_id, PROMPT).await; + + let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { + has_event_type(events, "error") || find_assistant_message_item(events).is_some() + }) + .await; + + assert!( + !events.is_empty(), + "no events collected for {}", + config.agent.as_str() + ); + expect_basic_sequence(&events); + + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if caps.tool_calls { + assert!( + !events.iter().any(|event| { + event.get("type").and_then(Value::as_str) == Some("agent.unparsed") + }), + "agent.unparsed event detected" + ); + } + } +} diff --git a/server/packages/sandbox-agent/tests/agent_multi_turn.rs b/server/packages/sandbox-agent/tests/agent_multi_turn.rs new file mode 100644 index 0000000..47babc1 --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent_multi_turn.rs @@ -0,0 +1,457 @@ +//! Tests for multi-turn conversations to validate session resumption behavior. +//! +//! This test validates that: +//! 1. Sessions can handle multiple messages (multi-turn conversations) +//! 2. Agents that support resumption (Claude, Amp, OpenCode) can continue after process exit +//! 3. Codex supports multi-turn via the shared app-server model (single process, multiple threads) +//! 4. The mock agent correctly supports multi-turn as the reference implementation + +use std::time::{Duration, Instant}; + +use axum::body::Body; +use axum::http::{Method, Request, StatusCode}; +use axum::Router; +use http_body_util::BodyExt; +use serde_json::{json, Value}; +use tempfile::TempDir; + +use sandbox_agent::router::{build_router, AppState, AuthConfig}; +use sandbox_agent_agent_management::agents::{AgentId, AgentManager}; +use sandbox_agent_agent_management::testing::test_agents_from_env; +use sandbox_agent_agent_credentials::ExtractedCredentials; +use std::collections::BTreeMap; +use tower::util::ServiceExt; + +const FIRST_PROMPT: &str = "Reply with exactly the word FIRST."; +const SECOND_PROMPT: &str = "Reply with exactly the word SECOND."; + +struct TestApp { + app: Router, + _install_dir: TempDir, +} + +impl TestApp { + fn new() -> Self { + let install_dir = tempfile::tempdir().expect("create temp install dir"); + let manager = AgentManager::new(install_dir.path()).expect("create agent manager"); + let state = AppState::new(AuthConfig::disabled(), manager); + let app = build_router(state); + Self { + app, + _install_dir: install_dir, + } + } +} + +struct EnvGuard { + saved: BTreeMap>, +} + +impl Drop for EnvGuard { + fn drop(&mut self) { + for (key, value) in &self.saved { + match value { + Some(value) => std::env::set_var(key, value), + None => std::env::remove_var(key), + } + } + } +} + +fn apply_credentials(creds: &ExtractedCredentials) -> EnvGuard { + let keys = [ + "ANTHROPIC_API_KEY", + "CLAUDE_API_KEY", + "OPENAI_API_KEY", + "CODEX_API_KEY", + ]; + let mut saved = BTreeMap::new(); + for key in keys { + saved.insert(key.to_string(), std::env::var(key).ok()); + } + + match creds.anthropic.as_ref() { + Some(cred) => { + std::env::set_var("ANTHROPIC_API_KEY", &cred.api_key); + std::env::set_var("CLAUDE_API_KEY", &cred.api_key); + } + None => { + std::env::remove_var("ANTHROPIC_API_KEY"); + std::env::remove_var("CLAUDE_API_KEY"); + } + } + + match creds.openai.as_ref() { + Some(cred) => { + std::env::set_var("OPENAI_API_KEY", &cred.api_key); + std::env::set_var("CODEX_API_KEY", &cred.api_key); + } + None => { + std::env::remove_var("OPENAI_API_KEY"); + std::env::remove_var("CODEX_API_KEY"); + } + } + + EnvGuard { saved } +} + +async fn send_json( + app: &Router, + method: Method, + path: &str, + body: Option, +) -> (StatusCode, Value) { + let mut builder = Request::builder().method(method).uri(path); + let body = if let Some(body) = body { + builder = builder.header("content-type", "application/json"); + Body::from(body.to_string()) + } else { + Body::empty() + }; + let request = builder.body(body).expect("request"); + let response = app.clone().oneshot(request).await.expect("request handled"); + let status = response.status(); + let bytes = response + .into_body() + .collect() + .await + .expect("read body") + .to_bytes(); + let value = if bytes.is_empty() { + Value::Null + } else { + serde_json::from_slice(&bytes) + .unwrap_or(Value::String(String::from_utf8_lossy(&bytes).to_string())) + }; + (status, value) +} + +async fn send_status(app: &Router, method: Method, path: &str, body: Option) -> StatusCode { + let (status, _) = send_json(app, method, path, body).await; + status +} + +async fn install_agent(app: &Router, agent: AgentId) { + let status = send_status( + app, + Method::POST, + &format!("/v1/agents/{}/install", agent.as_str()), + Some(json!({})), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "install {agent}"); +} + +fn test_permission_mode(agent: AgentId) -> &'static str { + match agent { + AgentId::Opencode => "default", + _ => "bypass", + } +} + +async fn create_session(app: &Router, agent: AgentId, session_id: &str) { + let status = send_status( + app, + Method::POST, + &format!("/v1/sessions/{session_id}"), + Some(json!({ + "agent": agent.as_str(), + "permissionMode": test_permission_mode(agent) + })), + ) + .await; + assert_eq!(status, StatusCode::OK, "create session {agent}"); +} + +/// Send a message and return the status code (allows checking for errors) +async fn send_message_with_status( + app: &Router, + session_id: &str, + message: &str, +) -> (StatusCode, Value) { + send_json( + app, + Method::POST, + &format!("/v1/sessions/{session_id}/messages"), + Some(json!({ "message": message })), + ) + .await +} + +/// Wait for a specific number of assistant responses (item.completed with role=assistant) +async fn wait_for_n_responses( + app: &Router, + session_id: &str, + n: usize, + timeout: Duration, +) -> bool { + let start = Instant::now(); + while start.elapsed() < timeout { + let path = format!("/v1/sessions/{session_id}/events?offset=0&limit=1000"); + let (status, payload) = send_json(app, Method::GET, &path, None).await; + if status != StatusCode::OK { + return false; + } + let events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + + let completed_count = events.iter().filter(|e| is_assistant_completed(e)).count(); + if completed_count >= n { + return true; + } + + // Check for errors + for event in &events { + if is_error_event(event) { + eprintln!("Error event: {:?}", event); + return false; + } + } + + tokio::time::sleep(Duration::from_millis(300)).await; + } + false +} + +/// Wait for an assistant response (item.completed with role=assistant) +async fn wait_for_response(app: &Router, session_id: &str, timeout: Duration) -> bool { + wait_for_n_responses(app, session_id, 1, timeout).await +} + +fn is_assistant_completed(event: &Value) -> bool { + event + .get("type") + .and_then(Value::as_str) + .map(|t| t == "item.completed") + .unwrap_or(false) + && event + .get("data") + .and_then(|d| d.get("item")) + .and_then(|i| i.get("role")) + .and_then(Value::as_str) + .map(|r| r == "assistant") + .unwrap_or(false) +} + +fn is_session_ended(event: &Value) -> bool { + event + .get("type") + .and_then(Value::as_str) + .map(|t| t == "session.ended") + .unwrap_or(false) +} + +fn is_error_event(event: &Value) -> bool { + matches!( + event.get("type").and_then(Value::as_str), + Some("error") | Some("agent.unparsed") + ) +} + +/// Count assistant responses in the event stream +async fn count_assistant_responses(app: &Router, session_id: &str) -> usize { + let path = format!("/v1/sessions/{session_id}/events?offset=0&limit=1000"); + let (status, payload) = send_json(app, Method::GET, &path, None).await; + if status != StatusCode::OK { + eprintln!("Failed to get events: status={}", status); + return 0; + } + let events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + + // Debug: print all event types + eprintln!("All events ({}):", events.len()); + for (i, e) in events.iter().enumerate() { + let event_type = e.get("type").and_then(Value::as_str).unwrap_or("?"); + let role = e + .get("data") + .and_then(|d| d.get("item")) + .and_then(|i| i.get("role")) + .and_then(Value::as_str) + .unwrap_or("-"); + eprintln!(" [{}] type={}, role={}", i, event_type, role); + } + + let count = events.iter().filter(|e| is_assistant_completed(e)).count(); + eprintln!("Assistant completed count: {}", count); + count +} + +/// Test multi-turn conversation for a specific agent +async fn test_multi_turn_for_agent(app: &Router, agent: AgentId) -> Result<(), String> { + let session_id = format!("multi-turn-{}", agent.as_str()); + eprintln!("\n=== Testing multi-turn for {} ===", agent); + + // Create session + create_session(app, agent, &session_id).await; + eprintln!("Session created: {}", session_id); + + // Send first message + eprintln!("Sending first message..."); + let (status, body) = send_message_with_status(app, &session_id, FIRST_PROMPT).await; + eprintln!("First message status: {}", status); + if status != StatusCode::NO_CONTENT { + return Err(format!( + "First message failed with status {}: {:?}", + status, body + )); + } + + // Wait for first response + eprintln!("Waiting for first response..."); + let got_first = wait_for_response(app, &session_id, Duration::from_secs(120)).await; + if !got_first { + return Err("Timed out waiting for first response".to_string()); + } + eprintln!("Got first response"); + + // Small delay to ensure session state is updated + tokio::time::sleep(Duration::from_millis(500)).await; + + // Send second message - this is the critical test + eprintln!("Sending second message..."); + let (status, body) = send_message_with_status(app, &session_id, SECOND_PROMPT).await; + eprintln!("Second message status: {}, body: {:?}", status, body); + if status != StatusCode::NO_CONTENT { + return Err(format!( + "Second message failed with status {}: {:?}", + status, body + )); + } + + // Wait for second response - specifically wait for 2 completed responses + eprintln!("Waiting for second response (total 2)..."); + let got_both = wait_for_n_responses(app, &session_id, 2, Duration::from_secs(120)).await; + if !got_both { + // Debug: show what we got + let response_count = count_assistant_responses(app, &session_id).await; + return Err(format!( + "Timed out waiting for second response (got {} completed)", + response_count + )); + } + eprintln!("Got both responses"); + + // Verify we got two assistant responses + let response_count = count_assistant_responses(app, &session_id).await; + eprintln!("Final response count: {}", response_count); + if response_count < 2 { + return Err(format!( + "Expected at least 2 assistant responses, got {}", + response_count + )); + } + + Ok(()) +} + +#[tokio::test] +async fn multi_turn_mock_agent() { + let test_app = TestApp::new(); + + // Mock agent should always support multi-turn as the reference implementation + let result = test_multi_turn_for_agent(&test_app.app, AgentId::Mock).await; + assert!( + result.is_ok(), + "Mock agent multi-turn failed: {:?}", + result.err() + ); +} + +#[tokio::test] +async fn multi_turn_real_agents() { + let configs = match test_agents_from_env() { + Ok(configs) => configs, + Err(err) => { + eprintln!("Failed to get agent configs: {:?}. Skipping multi-turn test.", err); + return; + } + }; + if configs.is_empty() { + eprintln!("No agents configured for testing. Skipping multi-turn test."); + return; + } + + let test_app = TestApp::new(); + + for config in configs { + let _guard = apply_credentials(&config.credentials); + install_agent(&test_app.app, config.agent).await; + + let result = test_multi_turn_for_agent(&test_app.app, config.agent).await; + + match config.agent { + AgentId::Claude | AgentId::Amp | AgentId::Opencode => { + // These agents should support multi-turn via resumption + assert!( + result.is_ok(), + "{} multi-turn failed (should support resumption): {:?}", + config.agent, + result.err() + ); + } + AgentId::Codex => { + // Codex now supports multi-turn via the shared app-server model + assert!( + result.is_ok(), + "{} multi-turn failed (should support shared app-server): {:?}", + config.agent, + result.err() + ); + } + AgentId::Mock => { + // Mock is tested separately + } + } + } +} + +/// Test that verifies the session can be reopened after ending +#[tokio::test] +async fn session_reopen_after_end() { + let test_app = TestApp::new(); + let session_id = "reopen-test"; + + // Create session with mock agent + create_session(&test_app.app, AgentId::Mock, session_id).await; + + // Send "end" command to mock agent to end the session + let (status, _) = send_message_with_status(&test_app.app, session_id, "end").await; + assert_eq!(status, StatusCode::NO_CONTENT); + + // Wait for session to end + tokio::time::sleep(Duration::from_millis(500)).await; + + // Verify session is ended + let path = format!("/v1/sessions/{session_id}/events?offset=0&limit=100"); + let (_, payload) = send_json(&test_app.app, Method::GET, &path, None).await; + let events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + let has_ended = events.iter().any(|e| is_session_ended(e)); + assert!(has_ended, "Session should be ended after 'end' command"); + + // Try to send another message - mock agent supports resume so this should work + // (or fail if we haven't implemented reopen for mock) + let (status, body) = send_message_with_status(&test_app.app, session_id, "hello again").await; + + // For mock agent, the session should be reopenable since mock is in agent_supports_resume + // But mock's session.ended is triggered differently than real agents + // This test documents the current behavior + if status == StatusCode::NO_CONTENT { + eprintln!("Mock agent session was successfully reopened after end"); + } else { + eprintln!( + "Mock agent session could not be reopened (status {}): {:?}", + status, body + ); + } +} diff --git a/server/packages/sandbox-agent/tests/agent_permission_flow.rs b/server/packages/sandbox-agent/tests/agent_permission_flow.rs new file mode 100644 index 0000000..5047305 --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent_permission_flow.rs @@ -0,0 +1,63 @@ +mod common; + +use common::*; +use sandbox_agent_agent_management::testing::test_agents_from_env; +use std::time::Duration; +use axum::http::Method; +use serde_json::json; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn agent_permission_flow() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + + for config in &configs { + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !(caps.plan_mode && caps.permissions) { + continue; + } + + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let session_id = format!("perm-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &session_id, "plan").await; + send_message(&app.app, &session_id, TOOL_PROMPT).await; + + let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { + find_permission_id(events).is_some() || has_event_type(events, "error") + }) + .await; + + let permission_id = find_permission_id(&events).expect("permission.requested missing"); + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}/permissions/{permission_id}/reply"), + Some(json!({ "reply": "once" })), + ) + .await; + assert_eq!(status, axum::http::StatusCode::NO_CONTENT, "permission reply"); + + let resolved = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { + events.iter().any(|event| { + event.get("type").and_then(serde_json::Value::as_str) == Some("permission.resolved") + }) + }) + .await; + + assert!( + resolved.iter().any(|event| { + event.get("type").and_then(serde_json::Value::as_str) == Some("permission.resolved") + && event + .get("synthetic") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false) + }), + "permission.resolved should be synthetic" + ); + } +} diff --git a/server/packages/sandbox-agent/tests/agent_question_flow.rs b/server/packages/sandbox-agent/tests/agent_question_flow.rs new file mode 100644 index 0000000..0c85aae --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent_question_flow.rs @@ -0,0 +1,64 @@ +mod common; + +use common::*; +use sandbox_agent_agent_management::testing::test_agents_from_env; +use std::time::Duration; +use axum::http::Method; +use serde_json::json; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn agent_question_flow() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + + for config in &configs { + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.questions { + continue; + } + + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let session_id = format!("question-{}", config.agent.as_str()); + create_session_with_mode(&app.app, config.agent, &session_id, "plan", "plan").await; + send_message(&app.app, &session_id, QUESTION_PROMPT).await; + + let events = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { + find_question_id(events).is_some() || has_event_type(events, "error") + }) + .await; + + let question_id = find_question_id(&events).expect("question.requested missing"); + let answers = find_first_answer(&events).unwrap_or_else(|| vec![vec![]]); + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}/questions/{question_id}/reply"), + Some(json!({ "answers": answers })), + ) + .await; + assert_eq!(status, axum::http::StatusCode::NO_CONTENT, "question reply"); + + let resolved = poll_events_until(&app.app, &session_id, Duration::from_secs(120), |events| { + events.iter().any(|event| { + event.get("type").and_then(serde_json::Value::as_str) == Some("question.resolved") + }) + }) + .await; + + assert!( + resolved.iter().any(|event| { + event.get("type").and_then(serde_json::Value::as_str) == Some("question.resolved") + && event + .get("synthetic") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false) + }), + "question.resolved should be synthetic" + ); + } +} diff --git a/server/packages/sandbox-agent/tests/agent_termination.rs b/server/packages/sandbox-agent/tests/agent_termination.rs new file mode 100644 index 0000000..809baa1 --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent_termination.rs @@ -0,0 +1,45 @@ +mod common; + +use common::*; +use sandbox_agent_agent_management::testing::test_agents_from_env; +use std::time::Duration; +use axum::http::Method; +use serde_json::json; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn agent_termination() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + + for config in &configs { + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let session_id = format!("terminate-{}", config.agent.as_str()); + create_session(&app.app, config.agent, &session_id, "default").await; + + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}/terminate"), + None, + ) + .await; + assert_eq!(status, axum::http::StatusCode::NO_CONTENT, "terminate session"); + + let events = poll_events_until(&app.app, &session_id, Duration::from_secs(30), |events| { + has_event_type(events, "session.ended") + }) + .await; + assert!(has_event_type(&events, "session.ended"), "missing session.ended"); + + let status = send_status( + &app.app, + Method::POST, + &format!("/v1/sessions/{session_id}/messages"), + Some(json!({ "message": PROMPT })), + ) + .await; + assert!(!status.is_success(), "terminated session should reject messages"); + } +} diff --git a/server/packages/sandbox-agent/tests/agent_tool_flow.rs b/server/packages/sandbox-agent/tests/agent_tool_flow.rs new file mode 100644 index 0000000..297306b --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent_tool_flow.rs @@ -0,0 +1,94 @@ +mod common; + +use common::*; +use sandbox_agent_agent_management::testing::test_agents_from_env; +use serde_json::Value; +use std::time::{Duration, Instant}; +use axum::http::Method; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn agent_tool_flow() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + let capabilities = fetch_capabilities(&app.app).await; + + for config in &configs { + let caps = capabilities + .get(config.agent.as_str()) + .expect("capabilities missing"); + if !caps.tool_calls { + continue; + } + + let _guard = apply_credentials(&config.credentials); + install_agent(&app.app, config.agent).await; + + let session_id = format!("tool-{}", config.agent.as_str()); + create_session( + &app.app, + config.agent, + &session_id, + test_permission_mode(config.agent), + ) + .await; + send_message(&app.app, &session_id, TOOL_PROMPT).await; + + let start = Instant::now(); + let mut offset = 0u64; + let mut events = Vec::new(); + let mut replied = false; + while start.elapsed() < Duration::from_secs(180) { + let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); + let (status, payload) = send_json(&app.app, Method::GET, &path, None).await; + assert_eq!(status, axum::http::StatusCode::OK, "poll events"); + let new_events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + if !new_events.is_empty() { + if let Some(last) = new_events + .last() + .and_then(|event| event.get("sequence")) + .and_then(Value::as_u64) + { + offset = last; + } + events.extend(new_events); + if !replied { + if let Some(permission_id) = find_permission_id(&events) { + let _ = send_status( + &app.app, + Method::POST, + &format!( + "/v1/sessions/{session_id}/permissions/{permission_id}/reply" + ), + Some(serde_json::json!({ "reply": "once" })), + ) + .await; + replied = true; + } + } + if has_tool_result(&events) { + break; + } + } + tokio::time::sleep(Duration::from_millis(800)).await; + } + + let tool_call = find_tool_call(&events); + let tool_result = has_tool_result(&events); + assert!( + tool_call.is_some(), + "tool_call missing for tool-capable agent {}", + config.agent.as_str() + ); + if tool_call.is_some() { + assert!( + tool_result, + "tool_result missing after tool_call for {}", + config.agent.as_str() + ); + } + } +} diff --git a/server/packages/sandbox-agent/tests/common/mod.rs b/server/packages/sandbox-agent/tests/common/mod.rs new file mode 100644 index 0000000..9c74437 --- /dev/null +++ b/server/packages/sandbox-agent/tests/common/mod.rs @@ -0,0 +1,388 @@ +use std::collections::HashMap; +use std::time::{Duration, Instant}; + +use axum::body::Body; +use axum::http::{Method, Request, StatusCode}; +use axum::Router; +use http_body_util::BodyExt; +use serde_json::{json, Value}; +use tempfile::TempDir; +use tower::util::ServiceExt; + +use sandbox_agent::router::{ + build_router, + AgentCapabilities, + AgentListResponse, + AuthConfig, +}; +use sandbox_agent_agent_credentials::ExtractedCredentials; +use sandbox_agent_agent_management::agents::{AgentId, AgentManager}; + +pub const PROMPT: &str = "Reply with exactly the single word OK."; +pub const TOOL_PROMPT: &str = + "Use the bash tool to run `ls` in the current directory. Do not answer without using the tool."; +pub const QUESTION_PROMPT: &str = + "Call the AskUserQuestion tool with exactly one yes/no question and wait for a reply. Do not answer yourself."; + +pub struct TestApp { + pub app: Router, + _install_dir: TempDir, +} + +impl TestApp { + pub fn new() -> Self { + let install_dir = tempfile::tempdir().expect("create temp install dir"); + let manager = AgentManager::new(install_dir.path()) + .expect("create agent manager"); + let state = sandbox_agent::router::AppState::new(AuthConfig::disabled(), manager); + let app = build_router(state); + Self { + app, + _install_dir: install_dir, + } + } +} + +pub struct EnvGuard { + saved: HashMap>, +} + +impl Drop for EnvGuard { + fn drop(&mut self) { + for (key, value) in &self.saved { + match value { + Some(value) => std::env::set_var(key, value), + None => std::env::remove_var(key), + } + } + } +} + +pub fn apply_credentials(creds: &ExtractedCredentials) -> EnvGuard { + let keys = ["ANTHROPIC_API_KEY", "CLAUDE_API_KEY", "OPENAI_API_KEY", "CODEX_API_KEY"]; + let mut saved = HashMap::new(); + for key in keys { + saved.insert(key.to_string(), std::env::var(key).ok()); + } + + match creds.anthropic.as_ref() { + Some(cred) => { + std::env::set_var("ANTHROPIC_API_KEY", &cred.api_key); + std::env::set_var("CLAUDE_API_KEY", &cred.api_key); + } + None => { + std::env::remove_var("ANTHROPIC_API_KEY"); + std::env::remove_var("CLAUDE_API_KEY"); + } + } + + match creds.openai.as_ref() { + Some(cred) => { + std::env::set_var("OPENAI_API_KEY", &cred.api_key); + std::env::set_var("CODEX_API_KEY", &cred.api_key); + } + None => { + std::env::remove_var("OPENAI_API_KEY"); + std::env::remove_var("CODEX_API_KEY"); + } + } + + EnvGuard { saved } +} + +pub async fn send_json( + app: &Router, + method: Method, + path: &str, + body: Option, +) -> (StatusCode, Value) { + let request = Request::builder() + .method(method) + .uri(path) + .header("content-type", "application/json") + .body(Body::from(body.map(|value| value.to_string()).unwrap_or_default())) + .expect("request"); + let response = app + .clone() + .oneshot(request) + .await + .expect("response"); + let status = response.status(); + let bytes = response + .into_body() + .collect() + .await + .expect("body") + .to_bytes(); + let payload = if bytes.is_empty() { + Value::Null + } else { + serde_json::from_slice(&bytes).unwrap_or(Value::Null) + }; + (status, payload) +} + +pub async fn send_status( + app: &Router, + method: Method, + path: &str, + body: Option, +) -> StatusCode { + let (status, _) = send_json(app, method, path, body).await; + status +} + +pub async fn install_agent(app: &Router, agent: AgentId) { + let status = send_status( + app, + Method::POST, + &format!("/v1/agents/{}/install", agent.as_str()), + Some(json!({})), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "install agent {}", agent.as_str()); +} + +pub async fn create_session( + app: &Router, + agent: AgentId, + session_id: &str, + permission_mode: &str, +) { + let status = send_status( + app, + Method::POST, + &format!("/v1/sessions/{session_id}"), + Some(json!({ + "agent": agent.as_str(), + "permissionMode": permission_mode, + })), + ) + .await; + assert_eq!(status, StatusCode::OK, "create session"); +} + +pub async fn create_session_with_mode( + app: &Router, + agent: AgentId, + session_id: &str, + agent_mode: &str, + permission_mode: &str, +) { + let status = send_status( + app, + Method::POST, + &format!("/v1/sessions/{session_id}"), + Some(json!({ + "agent": agent.as_str(), + "agentMode": agent_mode, + "permissionMode": permission_mode, + })), + ) + .await; + assert_eq!(status, StatusCode::OK, "create session"); +} + +pub fn test_permission_mode(agent: AgentId) -> &'static str { + match agent { + AgentId::Opencode => "default", + _ => "bypass", + } +} + +pub async fn send_message(app: &Router, session_id: &str, message: &str) { + let status = send_status( + app, + Method::POST, + &format!("/v1/sessions/{session_id}/messages"), + Some(json!({ "message": message })), + ) + .await; + assert_eq!(status, StatusCode::NO_CONTENT, "send message"); +} + +pub async fn poll_events_until( + app: &Router, + session_id: &str, + timeout: Duration, + mut stop: F, +) -> Vec +where + F: FnMut(&[Value]) -> bool, +{ + let start = Instant::now(); + let mut offset = 0u64; + let mut events = Vec::new(); + while start.elapsed() < timeout { + let path = format!("/v1/sessions/{session_id}/events?offset={offset}&limit=200"); + let (status, payload) = send_json(app, Method::GET, &path, None).await; + assert_eq!(status, StatusCode::OK, "poll events"); + let new_events = payload + .get("events") + .and_then(Value::as_array) + .cloned() + .unwrap_or_default(); + if !new_events.is_empty() { + if let Some(last) = new_events + .last() + .and_then(|event| event.get("sequence")) + .and_then(Value::as_u64) + { + offset = last; + } + events.extend(new_events); + if stop(&events) { + break; + } + } + tokio::time::sleep(Duration::from_millis(800)).await; + } + events +} + +pub async fn fetch_capabilities(app: &Router) -> HashMap { + let (status, payload) = send_json(app, Method::GET, "/v1/agents", None).await; + assert_eq!(status, StatusCode::OK, "list agents"); + let response: AgentListResponse = serde_json::from_value(payload).expect("agents payload"); + response + .agents + .into_iter() + .map(|agent| (agent.id, agent.capabilities)) + .collect() +} + +pub fn has_event_type(events: &[Value], event_type: &str) -> bool { + events + .iter() + .any(|event| event.get("type").and_then(Value::as_str) == Some(event_type)) +} + +pub fn find_assistant_message_item(events: &[Value]) -> Option { + events.iter().find_map(|event| { + if event.get("type").and_then(Value::as_str) != Some("item.completed") { + return None; + } + let item = event.get("data")?.get("item")?; + let role = item.get("role")?.as_str()?; + let kind = item.get("kind")?.as_str()?; + if role != "assistant" || kind != "message" { + return None; + } + item.get("item_id")?.as_str().map(|id| id.to_string()) + }) +} + +pub fn event_sequence(event: &Value) -> Option { + event.get("sequence").and_then(Value::as_u64) +} + +pub fn find_item_event_seq(events: &[Value], event_type: &str, item_id: &str) -> Option { + events.iter().find_map(|event| { + if event.get("type").and_then(Value::as_str) != Some(event_type) { + return None; + } + match event_type { + "item.delta" => { + let data = event.get("data")?; + let id = data.get("item_id")?.as_str()?; + if id == item_id { + event_sequence(event) + } else { + None + } + } + _ => { + let item = event.get("data")?.get("item")?; + let id = item.get("item_id")?.as_str()?; + if id == item_id { + event_sequence(event) + } else { + None + } + } + } + }) +} + +pub fn find_permission_id(events: &[Value]) -> Option { + events.iter().find_map(|event| { + if event.get("type").and_then(Value::as_str) != Some("permission.requested") { + return None; + } + event + .get("data") + .and_then(|data| data.get("permission_id")) + .and_then(Value::as_str) + .map(|id| id.to_string()) + }) +} + +pub fn find_question_id(events: &[Value]) -> Option { + events.iter().find_map(|event| { + if event.get("type").and_then(Value::as_str) != Some("question.requested") { + return None; + } + event + .get("data") + .and_then(|data| data.get("question_id")) + .and_then(Value::as_str) + .map(|id| id.to_string()) + }) +} + +pub fn find_first_answer(events: &[Value]) -> Option>> { + events.iter().find_map(|event| { + if event.get("type").and_then(Value::as_str) != Some("question.requested") { + return None; + } + let options = event + .get("data") + .and_then(|data| data.get("options")) + .and_then(Value::as_array)?; + let option = options.first()?.as_str()?.to_string(); + Some(vec![vec![option]]) + }) +} + +pub fn find_tool_call(events: &[Value]) -> Option { + events.iter().find_map(|event| { + if event.get("type").and_then(Value::as_str) != Some("item.started") + && event.get("type").and_then(Value::as_str) != Some("item.completed") + { + return None; + } + let item = event.get("data")?.get("item")?; + let kind = item.get("kind")?.as_str()?; + if kind != "tool_call" { + return None; + } + item.get("item_id")?.as_str().map(|id| id.to_string()) + }) +} + +pub fn has_tool_result(events: &[Value]) -> bool { + events.iter().any(|event| { + if event.get("type").and_then(Value::as_str) != Some("item.completed") { + return false; + } + let item = match event.get("data").and_then(|data| data.get("item")) { + Some(item) => item, + None => return false, + }; + item.get("kind").and_then(Value::as_str) == Some("tool_result") + }) +} + +pub fn expect_basic_sequence(events: &[Value]) { + assert!(has_event_type(events, "session.started"), "session.started missing"); + let item_id = find_assistant_message_item(events).expect("assistant message missing"); + let started_seq = find_item_event_seq(events, "item.started", &item_id) + .expect("item.started missing"); + // Intentionally require deltas here to validate our synthetic delta behavior. + let delta_seq = find_item_event_seq(events, "item.delta", &item_id) + .expect("item.delta missing"); + let completed_seq = find_item_event_seq(events, "item.completed", &item_id) + .expect("item.completed missing"); + assert!(started_seq < delta_seq, "item.started must precede delta"); + assert!(delta_seq < completed_seq, "delta must precede completion"); +} diff --git a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs index 8352f2b..a4a376a 100644 --- a/server/packages/sandbox-agent/tests/http_sse_snapshots.rs +++ b/server/packages/sandbox-agent/tests/http_sse_snapshots.rs @@ -291,6 +291,57 @@ async fn read_sse_events( events } +async fn read_turn_stream_events( + app: &Router, + session_id: &str, + timeout: Duration, +) -> Vec { + let request = Request::builder() + .method(Method::POST) + .uri(format!("/v1/sessions/{session_id}/messages/stream")) + .header("content-type", "application/json") + .body(Body::from(json!({ "message": PROMPT }).to_string())) + .expect("turn stream request"); + let response = app + .clone() + .oneshot(request) + .await + .expect("turn stream response"); + assert_eq!(response.status(), StatusCode::OK, "turn stream status"); + + let mut stream = response.into_body().into_data_stream(); + let mut buffer = String::new(); + let mut events = Vec::new(); + let start = Instant::now(); + let mut ended = false; + loop { + let remaining = match timeout.checked_sub(start.elapsed()) { + Some(remaining) if !remaining.is_zero() => remaining, + _ => break, + }; + let next = tokio::time::timeout(remaining, stream.next()).await; + let chunk: Bytes = match next { + Ok(Some(Ok(chunk))) => chunk, + Ok(Some(Err(_))) => break, + Ok(None) => { + ended = true; + break; + } + Err(_) => break, + }; + buffer.push_str(&String::from_utf8_lossy(&chunk)); + while let Some(idx) = buffer.find("\n\n") { + let block = buffer[..idx].to_string(); + buffer = buffer[idx + 2..].to_string(); + if let Some(event) = parse_sse_block(&block) { + events.push(event); + } + } + } + assert!(ended, "turn stream did not close before timeout"); + events +} + fn parse_sse_block(block: &str) -> Option { let mut data_lines = Vec::new(); for line in block.lines() { @@ -798,6 +849,27 @@ async fn run_sse_events_snapshot(app: &Router, config: &TestAgentConfig) { }); } +async fn run_turn_stream_check(app: &Router, config: &TestAgentConfig) { + let _guard = apply_credentials(&config.credentials); + install_agent(app, config.agent).await; + + let session_id = format!("turn-{}", config.agent.as_str()); + create_session(app, config.agent, &session_id, test_permission_mode(config.agent)).await; + + let events = read_turn_stream_events(app, &session_id, Duration::from_secs(120)).await; + let events = truncate_after_first_stop(&events); + assert!( + !events.is_empty(), + "no turn stream events collected for {}", + config.agent + ); + assert!( + should_stop(&events), + "timed out waiting for assistant/error event for {}", + config.agent + ); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn auth_snapshots() { let token = "test-token"; @@ -1294,6 +1366,20 @@ async fn sse_events_snapshots() { } } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn turn_stream_route() { + let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); + let app = TestApp::new(); + for config in &configs { + // OpenCode's embedded bun hangs when installing plugins, blocking SSE event streaming. + // See: https://github.com/opencode-ai/opencode/issues/XXX + if config.agent == AgentId::Opencode { + continue; + } + run_turn_stream_check(&app.app, config).await; + } +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn concurrency_snapshots() { let configs = test_agents_from_env().expect("configure SANDBOX_TEST_AGENTS or install agents"); diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap index d805aca..096eda6 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@agents_list_global.snap @@ -1,10 +1,10 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 881 expression: normalize_agent_list(&agents) --- agents: - id: amp - id: claude - id: codex + - id: mock - id: opencode diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap index 5a36cc2..25be48c 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__api_endpoints_snapshots@create_session_codex.snap @@ -1,6 +1,6 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 934 expression: normalize_create_session(&created) --- healthy: true +nativeSessionId: "" diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap index 65c8e58..61f19af 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@permission_events_codex.snap @@ -1,36 +1,379 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 984 expression: normalize_events(&permission_events) --- -- agent: codex - kind: started +- metadata: true seq: 1 - started: - message: session.created -- agent: codex - kind: started + session: started + source: daemon + synthetic: true + type: session.started +- metadata: true seq: 2 - started: - message: thread/started -- agent: codex - kind: started + session: started + source: agent + synthetic: false + type: session.started +- item: + content_types: + - status + kind: status + role: system + status: completed seq: 3 - started: - message: turn/started -- agent: codex - kind: message - message: - parts: - - text: "" - type: text + source: agent + synthetic: false + type: item.completed +- item: + content_types: + - text + kind: message role: user + status: in_progress seq: 4 -- agent: codex - kind: message - message: - parts: - - text: "" - type: text - role: assistant + source: agent + synthetic: false + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" seq: 5 + source: daemon + synthetic: true + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed + seq: 6 + source: agent + synthetic: false + type: item.completed +- item: + content_types: [] + kind: message + role: assistant + status: in_progress + seq: 7 + source: agent + synthetic: false + type: item.started +- item: + content_types: + - status + kind: status + role: system + status: completed + seq: 8 + source: agent + synthetic: false + type: item.completed +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 9 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 10 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 11 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 12 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 13 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 14 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 15 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 16 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 17 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 18 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 19 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 20 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 21 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 22 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 23 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 24 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 25 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 26 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 27 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 28 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 29 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 30 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 31 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 32 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 33 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 34 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 35 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 36 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 37 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 38 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 39 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 40 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 41 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 42 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 43 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 44 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 45 + source: agent + synthetic: false + type: item.delta +- item: + content_types: + - reasoning + kind: message + role: assistant + status: completed + seq: 46 + source: agent + synthetic: false + type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap index 0e3a6a0..77a4652 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reject_events_codex.snap @@ -1,36 +1,275 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1106 expression: normalize_events(&reject_events) --- -- agent: codex - kind: started +- metadata: true seq: 1 - started: - message: session.created -- agent: codex - kind: started + session: started + source: daemon + synthetic: true + type: session.started +- metadata: true seq: 2 - started: - message: thread/started -- agent: codex - kind: started + session: started + source: agent + synthetic: false + type: session.started +- item: + content_types: + - status + kind: status + role: system + status: completed seq: 3 - started: - message: turn/started -- agent: codex - kind: message - message: - parts: - - text: "" - type: text + source: agent + synthetic: false + type: item.completed +- item: + content_types: + - text + kind: message role: user + status: in_progress seq: 4 -- agent: codex - kind: message - message: - parts: - - text: "" - type: text - role: assistant + source: agent + synthetic: false + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" seq: 5 + source: daemon + synthetic: true + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed + seq: 6 + source: agent + synthetic: false + type: item.completed +- item: + content_types: [] + kind: message + role: assistant + status: in_progress + seq: 7 + source: agent + synthetic: false + type: item.started +- item: + content_types: + - status + kind: status + role: system + status: completed + seq: 8 + source: agent + synthetic: false + type: item.completed +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 9 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 10 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 11 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 12 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 13 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 14 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 15 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 16 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 17 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 18 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 19 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 20 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 21 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 22 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 23 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 24 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 25 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 26 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 27 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 28 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 29 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 30 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 31 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 32 + source: agent + synthetic: false + type: item.delta +- item: + content_types: + - reasoning + kind: message + role: assistant + status: completed + seq: 33 + source: agent + synthetic: false + type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap index 7fbb0ec..e6b96f0 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__approval_flow_snapshots@question_reply_events_codex.snap @@ -1,36 +1,72 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1045 expression: normalize_events(&question_events) --- -- agent: codex - kind: started +- metadata: true seq: 1 - started: - message: session.created -- agent: codex - kind: started + session: started + source: daemon + synthetic: true + type: session.started +- metadata: true seq: 2 - started: - message: thread/started -- agent: codex - kind: started + session: started + source: agent + synthetic: false + type: session.started +- item: + content_types: + - status + kind: status + role: system + status: completed seq: 3 - started: - message: turn/started -- agent: codex - kind: message - message: - parts: - - text: "" - type: text + source: agent + synthetic: false + type: item.completed +- item: + content_types: + - text + kind: message role: user + status: in_progress seq: 4 -- agent: codex - kind: message - message: - parts: - - text: "" - type: text - role: assistant + source: agent + synthetic: false + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" seq: 5 + source: daemon + synthetic: true + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed + seq: 6 + source: agent + synthetic: false + type: item.completed +- item: + content_types: [] + kind: message + role: assistant + status: in_progress + seq: 7 + source: agent + synthetic: false + type: item.started +- item: + content_types: [] + kind: message + role: assistant + status: completed + seq: 8 + source: agent + synthetic: false + type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap index 62b3f52..6503ec0 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__auth_snapshots@auth_valid_token_global.snap @@ -1,6 +1,5 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 810 expression: "json!({\n \"status\": status.as_u16(), \"payload\": normalize_agent_list(&payload),\n})" --- payload: @@ -8,5 +7,6 @@ payload: - id: amp - id: claude - id: codex + - id: mock - id: opencode status: 200 diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap index a279f1f..9092b3d 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_concurrency_snapshot@concurrency_events_codex.snap @@ -1,69 +1,224 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 1214 expression: snapshot --- session_a: - - agent: codex - kind: started + - metadata: true seq: 1 - started: - message: session.created - - agent: codex - kind: started + session: started + source: daemon + synthetic: true + type: session.started + - item: + content_types: + - status + kind: status + role: system + status: completed seq: 2 - started: - message: thread/started - - agent: codex - kind: started - seq: 3 - started: - message: turn/started - - agent: codex - kind: message - message: - parts: - - text: "" - type: text + source: agent + synthetic: false + type: item.completed + - item: + content_types: + - text + kind: message role: user + status: in_progress + seq: 3 + source: agent + synthetic: false + type: item.started + - delta: + delta: "" + item_id: "" + native_item_id: "" seq: 4 - - agent: codex - kind: message - message: - parts: - - text: "" - type: text - role: assistant + source: daemon + synthetic: true + type: item.delta + - item: + content_types: + - text + kind: message + role: user + status: completed seq: 5 + source: agent + synthetic: false + type: item.completed + - item: + content_types: [] + kind: message + role: assistant + status: in_progress + seq: 6 + source: agent + synthetic: false + type: item.started + - item: + content_types: + - status + kind: status + role: system + status: completed + seq: 7 + source: agent + synthetic: false + type: item.completed + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 8 + source: agent + synthetic: false + type: item.delta + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 9 + source: agent + synthetic: false + type: item.delta + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 10 + source: agent + synthetic: false + type: item.delta + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 11 + source: agent + synthetic: false + type: item.delta + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 12 + source: agent + synthetic: false + type: item.delta + - item: + content_types: + - reasoning + kind: message + role: assistant + status: completed + seq: 13 + source: agent + synthetic: false + type: item.completed session_b: - - agent: codex - kind: started + - metadata: true seq: 1 - started: - message: session.created - - agent: codex - kind: started + session: started + source: daemon + synthetic: true + type: session.started + - item: + content_types: + - status + kind: status + role: system + status: completed seq: 2 - started: - message: thread/started - - agent: codex - kind: started - seq: 3 - started: - message: turn/started - - agent: codex - kind: message - message: - parts: - - text: "" - type: text + source: agent + synthetic: false + type: item.completed + - item: + content_types: + - text + kind: message role: user + status: in_progress + seq: 3 + source: agent + synthetic: false + type: item.started + - delta: + delta: "" + item_id: "" + native_item_id: "" seq: 4 - - agent: codex - kind: message - message: - parts: - - text: "" - type: text - role: assistant + source: daemon + synthetic: true + type: item.delta + - item: + content_types: + - text + kind: message + role: user + status: completed seq: 5 + source: agent + synthetic: false + type: item.completed + - item: + content_types: [] + kind: message + role: assistant + status: in_progress + seq: 6 + source: agent + synthetic: false + type: item.started + - item: + content_types: + - status + kind: status + role: system + status: completed + seq: 7 + source: agent + synthetic: false + type: item.completed + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 8 + source: agent + synthetic: false + type: item.delta + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 9 + source: agent + synthetic: false + type: item.delta + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 10 + source: agent + synthetic: false + type: item.delta + - delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 11 + source: agent + synthetic: false + type: item.delta + - item: + content_types: + - reasoning + kind: message + role: assistant + status: completed + seq: 12 + source: agent + synthetic: false + type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap index 4e7c929..99d8675 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_http_events_snapshot@http_events_codex.snap @@ -1,36 +1,91 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 697 expression: normalized --- -- agent: codex - kind: started +- metadata: true seq: 1 - started: - message: session.created -- agent: codex - kind: started + session: started + source: daemon + synthetic: true + type: session.started +- metadata: true seq: 2 - started: - message: thread/started -- agent: codex - kind: started + session: started + source: agent + synthetic: false + type: session.started +- item: + content_types: + - status + kind: status + role: system + status: completed seq: 3 - started: - message: turn/started -- agent: codex - kind: message - message: - parts: - - text: "" - type: text + source: agent + synthetic: false + type: item.completed +- item: + content_types: + - text + kind: message role: user + status: in_progress seq: 4 -- agent: codex - kind: message - message: - parts: - - text: "" - type: text - role: assistant + source: agent + synthetic: false + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" seq: 5 + source: daemon + synthetic: true + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed + seq: 6 + source: agent + synthetic: false + type: item.completed +- item: + content_types: [] + kind: message + role: assistant + status: in_progress + seq: 7 + source: agent + synthetic: false + type: item.started +- item: + content_types: + - status + kind: status + role: system + status: completed + seq: 8 + source: agent + synthetic: false + type: item.completed +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 9 + source: agent + synthetic: false + type: item.delta +- item: + content_types: + - reasoning + kind: message + role: assistant + status: completed + seq: 10 + source: agent + synthetic: false + type: item.completed diff --git a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap index d00b732..e503c9d 100644 --- a/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap +++ b/server/packages/sandbox-agent/tests/snapshots/http_sse_snapshots__run_sse_events_snapshot@sse_events_codex.snap @@ -1,36 +1,109 @@ --- source: server/packages/sandbox-agent/tests/http_sse_snapshots.rs -assertion_line: 734 expression: normalized --- -- agent: codex - kind: started +- metadata: true seq: 1 - started: - message: session.created -- agent: codex - kind: started + session: started + source: daemon + synthetic: true + type: session.started +- item: + content_types: + - status + kind: status + role: system + status: completed seq: 2 - started: - message: thread/started -- agent: codex - kind: started - seq: 3 - started: - message: turn/started -- agent: codex - kind: message - message: - parts: - - text: "" - type: text + source: agent + synthetic: false + type: item.completed +- item: + content_types: + - text + kind: message role: user + status: in_progress + seq: 3 + source: agent + synthetic: false + type: item.started +- delta: + delta: "" + item_id: "" + native_item_id: "" seq: 4 -- agent: codex - kind: message - message: - parts: - - text: "" - type: text - role: assistant + source: daemon + synthetic: true + type: item.delta +- item: + content_types: + - text + kind: message + role: user + status: completed seq: 5 + source: agent + synthetic: false + type: item.completed +- item: + content_types: [] + kind: message + role: assistant + status: in_progress + seq: 6 + source: agent + synthetic: false + type: item.started +- item: + content_types: + - status + kind: status + role: system + status: completed + seq: 7 + source: agent + synthetic: false + type: item.completed +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 8 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 9 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 10 + source: agent + synthetic: false + type: item.delta +- delta: + delta: "" + item_id: "" + native_item_id: "" + seq: 11 + source: agent + synthetic: false + type: item.delta +- item: + content_types: + - reasoning + kind: message + role: assistant + status: completed + seq: 12 + source: agent + synthetic: false + type: item.completed diff --git a/server/packages/universal-agent-schema/src/agents/claude.rs b/server/packages/universal-agent-schema/src/agents/claude.rs index de15d9c..8d16c75 100644 --- a/server/packages/universal-agent-schema/src/agents/claude.rs +++ b/server/packages/universal-agent-schema/src/agents/claude.rs @@ -31,10 +31,10 @@ pub fn event_to_universal_with_session( let event_type = event.get("type").and_then(Value::as_str).unwrap_or(""); let mut conversions = match event_type { "system" => vec![system_event_to_universal(event)], - "assistant" => assistant_event_to_universal(event), - "tool_use" => tool_use_event_to_universal(event, session_id), + "assistant" => assistant_event_to_universal(event, &session_id), + "tool_use" => tool_use_event_to_universal(event, &session_id), "tool_result" => tool_result_event_to_universal(event), - "result" => result_event_to_universal(event), + "result" => result_event_to_universal(event, &session_id), _ => return Err(format!("unsupported Claude event type: {event_type}")), }; @@ -53,7 +53,7 @@ fn system_event_to_universal(event: &Value) -> EventConversion { .with_raw(Some(event.clone())) } -fn assistant_event_to_universal(event: &Value) -> Vec { +fn assistant_event_to_universal(event: &Value, session_id: &str) -> Vec { let mut conversions = Vec::new(); let content = event .get("message") @@ -62,7 +62,8 @@ fn assistant_event_to_universal(event: &Value) -> Vec { .cloned() .unwrap_or_default(); - let message_id = next_temp_id("tmp_claude_message"); + // Use session-based native_item_id so `result` event can reference the same item + let native_message_id = format!("{session_id}_message"); let mut message_parts = Vec::new(); for block in content { @@ -85,9 +86,9 @@ fn assistant_event_to_universal(event: &Value) -> Vec { .unwrap_or_else(|| next_temp_id("tmp_claude_tool")); let arguments = serde_json::to_string(&input).unwrap_or_else(|_| "{}".to_string()); let tool_item = UniversalItem { - item_id: next_temp_id("tmp_claude_tool_item"), + item_id: String::new(), native_item_id: Some(call_id.clone()), - parent_id: Some(message_id.clone()), + parent_id: Some(native_message_id.clone()), kind: ItemKind::ToolCall, role: Some(ItemRole::Assistant), content: vec![ContentPart::ToolCall { @@ -106,21 +107,23 @@ fn assistant_event_to_universal(event: &Value) -> Vec { } } + // `assistant` event emits item.started + item.delta only (in-progress state) + // The `result` event will emit item.completed to finalize let message_item = UniversalItem { - item_id: message_id, - native_item_id: None, + item_id: String::new(), + native_item_id: Some(native_message_id.clone()), parent_id: None, kind: ItemKind::Message, role: Some(ItemRole::Assistant), content: message_parts.clone(), - status: ItemStatus::Completed, + status: ItemStatus::InProgress, }; - conversions.extend(message_events(message_item, message_parts, true)); + conversions.extend(message_started_events(message_item, message_parts)); conversions } -fn tool_use_event_to_universal(event: &Value, session_id: String) -> Vec { +fn tool_use_event_to_universal(event: &Value, session_id: &str) -> Vec { let mut conversions = Vec::new(); let tool_use = event.get("tool_use"); let name = tool_use @@ -156,7 +159,7 @@ fn tool_use_event_to_universal(event: &Value, session_id: String) -> Vec Vec { conversions } -fn result_event_to_universal(event: &Value) -> Vec { +fn result_event_to_universal(event: &Value, session_id: &str) -> Vec { + // The `result` event completes the message started by `assistant`. + // Use the same native_item_id so they link to the same universal item. + let native_message_id = format!("{session_id}_message"); let result_text = event .get("result") .and_then(Value::as_str) .unwrap_or("") .to_string(); + let message_item = UniversalItem { - item_id: next_temp_id("tmp_claude_result"), - native_item_id: None, + item_id: String::new(), + native_item_id: Some(native_message_id), parent_id: None, kind: ItemKind::Message, role: Some(ItemRole::Assistant), - content: vec![ContentPart::Text { text: result_text.clone() }], + content: vec![ContentPart::Text { text: result_text }], status: ItemStatus::Completed, }; - message_events(message_item, vec![ContentPart::Text { text: result_text }], true) + + vec![EventConversion::new( + UniversalEventType::ItemCompleted, + UniversalEventData::Item(ItemEventData { item: message_item }), + )] } fn item_events(item: UniversalItem, synthetic_start: bool) -> Vec { @@ -260,20 +271,18 @@ fn item_events(item: UniversalItem, synthetic_start: bool) -> Vec, synthetic_start: bool) -> Vec { +/// Emits item.started + item.delta only (for `assistant` event). +/// The item.completed will come from the `result` event. +fn message_started_events(item: UniversalItem, parts: Vec) -> Vec { let mut events = Vec::new(); - if synthetic_start { - let mut started_item = item.clone(); - started_item.status = ItemStatus::InProgress; - events.push( - EventConversion::new( - UniversalEventType::ItemStarted, - UniversalEventData::Item(ItemEventData { item: started_item }), - ) - .synthetic(), - ); - } + // Emit item.started (in-progress) + events.push(EventConversion::new( + UniversalEventType::ItemStarted, + UniversalEventData::Item(ItemEventData { item: item.clone() }), + )); + + // Emit item.delta with the text content let mut delta_text = String::new(); for part in &parts { if let ContentPart::Text { text } = part { @@ -281,23 +290,16 @@ fn message_events(item: UniversalItem, parts: Vec, synthetic_start: } } if !delta_text.is_empty() { - events.push( - EventConversion::new( - UniversalEventType::ItemDelta, - UniversalEventData::ItemDelta(crate::ItemDeltaData { - item_id: item.item_id.clone(), - native_item_id: item.native_item_id.clone(), - delta: delta_text, - }), - ) - .synthetic(), - ); + events.push(EventConversion::new( + UniversalEventType::ItemDelta, + UniversalEventData::ItemDelta(crate::ItemDeltaData { + item_id: item.item_id.clone(), + native_item_id: item.native_item_id.clone(), + delta: delta_text, + }), + )); } - events.push(EventConversion::new( - UniversalEventType::ItemCompleted, - UniversalEventData::Item(ItemEventData { item }), - )); events } diff --git a/spec/universal-schema.json b/spec/universal-schema.json new file mode 100644 index 0000000..0170a82 --- /dev/null +++ b/spec/universal-schema.json @@ -0,0 +1,553 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "UniversalEvent", + "type": "object", + "required": [ + "data", + "event_id", + "sequence", + "session_id", + "source", + "synthetic", + "time", + "type" + ], + "properties": { + "data": { + "$ref": "#/definitions/UniversalEventData" + }, + "event_id": { + "type": "string" + }, + "native_session_id": { + "type": [ + "string", + "null" + ] + }, + "raw": true, + "sequence": { + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "session_id": { + "type": "string" + }, + "source": { + "$ref": "#/definitions/EventSource" + }, + "synthetic": { + "type": "boolean" + }, + "time": { + "type": "string" + }, + "type": { + "$ref": "#/definitions/UniversalEventType" + } + }, + "definitions": { + "AgentUnparsedData": { + "type": "object", + "required": [ + "error", + "location" + ], + "properties": { + "error": { + "type": "string" + }, + "location": { + "type": "string" + }, + "raw_hash": { + "type": [ + "string", + "null" + ] + } + } + }, + "ContentPart": { + "oneOf": [ + { + "type": "object", + "required": [ + "text", + "type" + ], + "properties": { + "text": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "text" + ] + } + } + }, + { + "type": "object", + "required": [ + "json", + "type" + ], + "properties": { + "json": true, + "type": { + "type": "string", + "enum": [ + "json" + ] + } + } + }, + { + "type": "object", + "required": [ + "arguments", + "call_id", + "name", + "type" + ], + "properties": { + "arguments": { + "type": "string" + }, + "call_id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "tool_call" + ] + } + } + }, + { + "type": "object", + "required": [ + "call_id", + "output", + "type" + ], + "properties": { + "call_id": { + "type": "string" + }, + "output": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "tool_result" + ] + } + } + }, + { + "type": "object", + "required": [ + "action", + "path", + "type" + ], + "properties": { + "action": { + "$ref": "#/definitions/FileAction" + }, + "diff": { + "type": [ + "string", + "null" + ] + }, + "path": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "file_ref" + ] + } + } + }, + { + "type": "object", + "required": [ + "text", + "type", + "visibility" + ], + "properties": { + "text": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "reasoning" + ] + }, + "visibility": { + "$ref": "#/definitions/ReasoningVisibility" + } + } + }, + { + "type": "object", + "required": [ + "path", + "type" + ], + "properties": { + "mime": { + "type": [ + "string", + "null" + ] + }, + "path": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "image" + ] + } + } + }, + { + "type": "object", + "required": [ + "label", + "type" + ], + "properties": { + "detail": { + "type": [ + "string", + "null" + ] + }, + "label": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "status" + ] + } + } + } + ] + }, + "ErrorData": { + "type": "object", + "required": [ + "message" + ], + "properties": { + "code": { + "type": [ + "string", + "null" + ] + }, + "details": true, + "message": { + "type": "string" + } + } + }, + "EventSource": { + "type": "string", + "enum": [ + "agent", + "daemon" + ] + }, + "FileAction": { + "type": "string", + "enum": [ + "read", + "write", + "patch" + ] + }, + "ItemDeltaData": { + "type": "object", + "required": [ + "delta", + "item_id" + ], + "properties": { + "delta": { + "type": "string" + }, + "item_id": { + "type": "string" + }, + "native_item_id": { + "type": [ + "string", + "null" + ] + } + } + }, + "ItemEventData": { + "type": "object", + "required": [ + "item" + ], + "properties": { + "item": { + "$ref": "#/definitions/UniversalItem" + } + } + }, + "ItemKind": { + "type": "string", + "enum": [ + "message", + "tool_call", + "tool_result", + "system", + "status", + "unknown" + ] + }, + "ItemRole": { + "type": "string", + "enum": [ + "user", + "assistant", + "system", + "tool" + ] + }, + "ItemStatus": { + "type": "string", + "enum": [ + "in_progress", + "completed", + "failed" + ] + }, + "PermissionEventData": { + "type": "object", + "required": [ + "action", + "permission_id", + "status" + ], + "properties": { + "action": { + "type": "string" + }, + "metadata": true, + "permission_id": { + "type": "string" + }, + "status": { + "$ref": "#/definitions/PermissionStatus" + } + } + }, + "PermissionStatus": { + "type": "string", + "enum": [ + "requested", + "approved", + "denied" + ] + }, + "QuestionEventData": { + "type": "object", + "required": [ + "options", + "prompt", + "question_id", + "status" + ], + "properties": { + "options": { + "type": "array", + "items": { + "type": "string" + } + }, + "prompt": { + "type": "string" + }, + "question_id": { + "type": "string" + }, + "response": { + "type": [ + "string", + "null" + ] + }, + "status": { + "$ref": "#/definitions/QuestionStatus" + } + } + }, + "QuestionStatus": { + "type": "string", + "enum": [ + "requested", + "answered", + "rejected" + ] + }, + "ReasoningVisibility": { + "type": "string", + "enum": [ + "public", + "private" + ] + }, + "SessionEndReason": { + "type": "string", + "enum": [ + "completed", + "error", + "terminated" + ] + }, + "SessionEndedData": { + "type": "object", + "required": [ + "reason", + "terminated_by" + ], + "properties": { + "reason": { + "$ref": "#/definitions/SessionEndReason" + }, + "terminated_by": { + "$ref": "#/definitions/TerminatedBy" + } + } + }, + "SessionStartedData": { + "type": "object", + "properties": { + "metadata": true + } + }, + "TerminatedBy": { + "type": "string", + "enum": [ + "agent", + "daemon" + ] + }, + "UniversalEventData": { + "anyOf": [ + { + "$ref": "#/definitions/SessionStartedData" + }, + { + "$ref": "#/definitions/SessionEndedData" + }, + { + "$ref": "#/definitions/ItemEventData" + }, + { + "$ref": "#/definitions/ItemDeltaData" + }, + { + "$ref": "#/definitions/ErrorData" + }, + { + "$ref": "#/definitions/PermissionEventData" + }, + { + "$ref": "#/definitions/QuestionEventData" + }, + { + "$ref": "#/definitions/AgentUnparsedData" + } + ] + }, + "UniversalEventType": { + "type": "string", + "enum": [ + "session.started", + "session.ended", + "item.started", + "item.delta", + "item.completed", + "error", + "permission.requested", + "permission.resolved", + "question.requested", + "question.resolved", + "agent.unparsed" + ] + }, + "UniversalItem": { + "type": "object", + "required": [ + "content", + "item_id", + "kind", + "status" + ], + "properties": { + "content": { + "type": "array", + "items": { + "$ref": "#/definitions/ContentPart" + } + }, + "item_id": { + "type": "string" + }, + "kind": { + "$ref": "#/definitions/ItemKind" + }, + "native_item_id": { + "type": [ + "string", + "null" + ] + }, + "parent_id": { + "type": [ + "string", + "null" + ] + }, + "role": { + "anyOf": [ + { + "$ref": "#/definitions/ItemRole" + }, + { + "type": "null" + } + ] + }, + "status": { + "$ref": "#/definitions/ItemStatus" + } + } + } + } +} \ No newline at end of file diff --git a/todo.md b/todo.md index 485bd07..e27747f 100644 --- a/todo.md +++ b/todo.md @@ -2,3 +2,7 @@ - [x] Replace server --mock flag with built-in mock agent and update UI approvals layout. - [x] Add telemetry module with opt-out flag and sandbox provider detection. +- [x] Add turn-stream message endpoint with SSE response and tests. +- [x] Update CLI + TypeScript SDK/OpenAPI for turn streaming. +- [x] Add inspector UI mode for turn stream and wire send flow. +- [x] Refresh docs for new endpoint and UI mode.