fix: fix checking if provider is authenticated

2026-04-15 11:02:20 +00:00 · 2026-02-06 19:40:45 -08:00 · 2026-02-06 19:40:45 -08:00 · 80ce95f886
commit 80ce95f886
parent b76d83577a
13 changed files with 801 additions and 6 deletions
--- a/docs/credentials.mdx
+++ b/docs/credentials.mdx
@ -0,0 +1,133 @@
+---
+title: "Credentials"
+description: "How sandbox-agent discovers and uses provider credentials."
+icon: "key"
+---
+
+Sandbox-agent automatically discovers API credentials from environment variables and agent config files. Credentials are used to authenticate with AI providers (Anthropic, OpenAI) when spawning agents.
+
+## Credential sources
+
+Credentials are extracted in priority order. The first valid credential found for each provider is used.
+
+### Environment variables (highest priority)
+
+| Variable | Provider |
+|----------|----------|
+| `ANTHROPIC_API_KEY` | Anthropic |
+| `CLAUDE_API_KEY` | Anthropic (fallback) |
+| `OPENAI_API_KEY` | OpenAI |
+| `CODEX_API_KEY` | OpenAI (fallback) |
+
+### Agent config files
+
+If no environment variable is set, sandbox-agent checks agent-specific config files:
+
+| Agent | Config path | Provider |
+|-------|-------------|----------|
+| Amp | `~/.amp/config.json` | Anthropic |
+| Claude Code | `~/.claude.json`, `~/.claude/.credentials.json` | Anthropic |
+| Codex | `~/.codex/auth.json` | OpenAI |
+| OpenCode | `~/.local/share/opencode/auth.json` | Both |
+
+OAuth tokens are supported for Claude Code, Codex, and OpenCode. Expired tokens are automatically skipped.
+
+## Provider requirements by agent
+
+| Agent | Required provider |
+|-------|-------------------|
+| Claude Code | Anthropic |
+| Amp | Anthropic |
+| Codex | OpenAI |
+| OpenCode | Anthropic or OpenAI |
+| Mock | None |
+
+## Error handling behavior
+
+Sandbox-agent uses a **best-effort, fail-forward** approach to credentials:
+
+### Extraction failures are silent
+
+If a config file is missing, unreadable, or malformed, extraction continues to the next source. No errors are thrown. Missing credentials simply mean the provider is marked as unavailable.
+
+```
+~/.claude.json missing     → try ~/.claude/.credentials.json
+~/.claude/.credentials.json missing → try OpenCode config
+All sources exhausted      → anthropic = None (not an error)
+```
+
+### Agents spawn without credential validation
+
+When you send a message to a session, sandbox-agent does **not** pre-validate credentials. The agent process is spawned with whatever credentials were found (or none), and the agent's native error surfaces if authentication fails.
+
+This design:
+- Lets you test agent error handling behavior
+- Avoids duplicating provider-specific auth validation
+- Ensures sandbox-agent faithfully proxies agent behavior
+
+For example, sending a message to Claude Code without Anthropic credentials will spawn the agent, which will then emit its own "ANTHROPIC_API_KEY not set" error through the event stream.
+
+## Checking credential status
+
+### API endpoint
+
+The `GET /v1/agents` endpoint includes a `credentialsAvailable` field for each agent:
+
+```json
+{
+  "agents": [
+    {
+      "id": "claude",
+      "installed": true,
+      "credentialsAvailable": true,
+      ...
+    },
+    {
+      "id": "codex",
+      "installed": true,
+      "credentialsAvailable": false,
+      ...
+    }
+  ]
+}
+```
+
+### TypeScript SDK
+
+```typescript
+const { agents } = await client.listAgents();
+for (const agent of agents) {
+  console.log(`${agent.id}: ${agent.credentialsAvailable ? 'authenticated' : 'no credentials'}`);
+}
+```
+
+### OpenCode compatibility
+
+The `/opencode/provider` endpoint returns a `connected` array listing providers with valid credentials:
+
+```json
+{
+  "all": [...],
+  "connected": ["claude", "mock"]
+}
+```
+
+## Passing credentials explicitly
+
+You can override auto-discovered credentials by setting environment variables before starting sandbox-agent:
+
+```bash
+export ANTHROPIC_API_KEY=sk-ant-...
+export OPENAI_API_KEY=sk-...
+sandbox-agent daemon start
+```
+
+Or when using the SDK in embedded mode:
+
+```typescript
+const client = await SandboxAgentClient.spawn({
+  env: {
+    ANTHROPIC_API_KEY: process.env.MY_ANTHROPIC_KEY,
+  },
+});
+```
--- a/docs/docs.json
+++ b/docs/docs.json
@ -70,6 +70,7 @@
 					"cli",
 					"inspector",
 					"session-transcript-schema",
+					"credentials",
 					"gigacode",
 					{
 						"group": "AI",
--- a/docs/openapi.json
+++ b/docs/openapi.json
@ -805,12 +805,17 @@
        "required": [
          "id",
          "installed",
+          "credentialsAvailable",
          "capabilities"
        ],
        "properties": {
          "capabilities": {
            "$ref": "#/components/schemas/AgentCapabilities"
          },
+          "credentialsAvailable": {
+            "type": "boolean",
+            "description": "Whether the agent's required provider credentials are available"
+          },
          "id": {
            "type": "string"
          },
--- a/frontend/packages/inspector/src/components/debug/AgentsTab.tsx
+++ b/frontend/packages/inspector/src/components/debug/AgentsTab.tsx
@ -39,6 +39,7 @@ const AgentsTab = ({
        : defaultAgents.map((id) => ({
            id,
            installed: false,
+            credentialsAvailable: false,
            version: undefined,
            path: undefined,
            capabilities: emptyFeatureCoverage
@ -49,6 +50,9 @@ const AgentsTab = ({
            <span className={`pill ${agent.installed ? "success" : "danger"}`}>
              {agent.installed ? "Installed" : "Missing"}
            </span>
+            <span className={`pill ${agent.credentialsAvailable ? "success" : "warning"}`}>
+              {agent.credentialsAvailable ? "Authenticated" : "No Credentials"}
+            </span>
          </div>
          <div className="card-meta">
            {agent.version ? `v${agent.version}` : "Version unknown"}
--- a/research/agents/amp.md
+++ b/research/agents/amp.md
@ -415,6 +415,31 @@ if let Some(model) = options.model.as_deref() {
 3. **Wait for Amp API** — Amp may add model/mode discovery in a future release
 4. **Scrape ampcode.com** — Check if the web UI exposes available modes/models

+## Command Execution & Process Management
+
+### Agent Tool Execution
+
+Amp executes commands via the `Bash` tool, similar to Claude Code. Synchronous execution, blocks the agent turn. Permission rules can pre-authorize specific commands:
+
+```typescript
+{ tool: "Bash", matches: { command: "git *" }, action: "allow" }
+```
+
+### No User-Initiated Command Injection
+
+Amp does not expose any mechanism for external clients to inject command results into the agent's context. No `!` prefix equivalent, no command injection API.
+
+### Comparison
+
+| Capability | Supported? | Notes |
+|-----------|-----------|-------|
+| Agent runs commands | Yes (`Bash` tool) | Synchronous, blocks agent turn |
+| User runs commands → agent sees output | No | |
+| External API for command injection | No | |
+| Command source tracking | No | |
+| Background process management | No | Shell `&` only |
+| PTY / interactive terminal | No | |
+
 ## Notes

 - Amp is similar to Claude Code (same streaming format)
--- a/research/agents/claude.md
+++ b/research/agents/claude.md
@ -279,6 +279,44 @@ x-api-key: <ANTHROPIC_API_KEY>
 anthropic-version: 2023-06-01
 ```

+## Command Execution & Process Management
+
+### Agent Tool Execution
+
+The agent executes commands via the `Bash` tool. This is synchronous - the agent blocks until the command exits. Tool schema:
+
+```json
+{
+  "command": "string",
+  "timeout": "number",
+  "workingDirectory": "string"
+}
+```
+
+There is no background process support. If the agent needs a long-running process (e.g., dev server), it uses shell backgrounding (`&`) within a single `Bash` tool call.
+
+### User-Initiated Command Execution (`!` prefix)
+
+Claude Code's TUI supports `!command` syntax where the user types `!npm test` to run a command directly. The output is injected into the conversation as a user message so the agent can see it on the next turn.
+
+**This is a client-side TUI feature only.** It is not exposed in the API schema or streaming protocol. The CLI runs the command locally and stuffs the output into the next user message. There is no protocol-level concept of "user ran a command" vs "agent ran a command."
+
+### No External Command Injection API
+
+External clients (SDKs, frontends) cannot programmatically inject command results into Claude's conversation context. The only way to provide command output to the agent is:
+- Include it in the user prompt text
+- Use the `!` prefix in the interactive TUI
+
+### Comparison
+
+| Capability | Supported? | Notes |
+|-----------|-----------|-------|
+| Agent runs commands | Yes (`Bash` tool) | Synchronous, blocks agent turn |
+| User runs commands → agent sees output | Yes (`!cmd` in TUI) | Client-side only, not in protocol |
+| External API for command injection | No | |
+| Background process management | No | Shell `&` only |
+| PTY / interactive terminal | No | |
+
 ## Notes

 - Claude CLI manages its own OAuth refresh internally
--- a/research/agents/codex.md
+++ b/research/agents/codex.md
@ -347,6 +347,68 @@ Requires a running Codex app-server process. Send the JSON-RPC request to the ap
 - Requires an active app-server process (cannot query models without starting one)
 - No standalone CLI command like `codex models`

+## Command Execution & Process Management
+
+### Agent Tool Execution
+
+Codex executes commands via `LocalShellAction`. The agent proposes a command, and external clients approve/deny via JSON-RPC (`item/commandExecution/requestApproval`).
+
+### Command Source Tracking (`ExecCommandSource`)
+
+Codex is the only agent that explicitly tracks **who initiated a command** at the protocol level:
+
+```json
+{
+  "ExecCommandSource": {
+    "enum": ["agent", "user_shell", "unified_exec_startup", "unified_exec_interaction"]
+  }
+}
+```
+
+| Source | Meaning |
+|--------|---------|
+| `agent` | Agent decided to run this command via tool call |
+| `user_shell` | User ran a command in a shell (equivalent to Claude Code's `!` prefix) |
+| `unified_exec_startup` | Startup script ran this command |
+| `unified_exec_interaction` | Interactive execution |
+
+This means user-initiated shell commands are **first-class protocol events** in Codex, not a client-side hack like Claude Code's `!` prefix.
+
+### Command Execution Events
+
+Codex emits structured events for command execution:
+
+- `exec_command_begin` - Command started (includes `source`, `command`, `cwd`, `turn_id`)
+- `exec_command_output_delta` - Streaming output chunk (includes `stream: stdout|stderr`)
+- `exec_command_end` - Command completed (includes `exit_code`, `source`)
+
+### Parsed Command Analysis (`CommandAction`)
+
+Codex provides semantic analysis of what a command does:
+
+```json
+{
+  "commandActions": [
+    { "type": "read", "path": "/src/main.ts" },
+    { "type": "write", "path": "/src/utils.ts" },
+    { "type": "install", "package": "lodash" }
+  ]
+}
+```
+
+Action types: `read`, `write`, `listFiles`, `search`, `install`, `remove`, `other`.
+
+### Comparison
+
+| Capability | Supported? | Notes |
+|-----------|-----------|-------|
+| Agent runs commands | Yes (`LocalShellAction`) | With approval workflow |
+| User runs commands → agent sees output | Yes (`user_shell` source) | First-class protocol event |
+| External API for command injection | Yes (JSON-RPC approval) | Can approve/deny before execution |
+| Command source tracking | Yes (`ExecCommandSource` enum) | Distinguishes agent vs user vs startup |
+| Background process management | No | |
+| PTY / interactive terminal | No | |
+
 ## Notes

 - SDK is dynamically imported to reduce bundle size
--- a/research/agents/opencode.md
+++ b/research/agents/opencode.md
@ -585,6 +585,60 @@ const response = await client.provider.list();

 When an OpenCode server is running, call `GET /provider` on its HTTP port. Returns full model metadata including capabilities, costs, context limits, and modalities.

+## Command Execution & Process Management
+
+### Agent Tool Execution
+
+The agent executes commands via internal tools (not exposed in the HTTP API). The agent's tool calls are synchronous within its turn. Tool parts have states: `pending`, `running`, `completed`, `error`.
+
+### PTY System (`/pty/*`) - User-Facing Terminals
+
+Separate from the agent's command execution. PTYs are server-scoped interactive terminals for the user:
+
+- `POST /pty` - Create PTY (command, args, cwd, title, env)
+- `GET /pty` - List all PTYs
+- `GET /pty/{ptyID}` - Get PTY info
+- `PUT /pty/{ptyID}` - Update PTY (title, resize via `size: {rows, cols}`)
+- `DELETE /pty/{ptyID}` - Kill and remove PTY
+- `GET /pty/{ptyID}/connect` - WebSocket for bidirectional I/O
+
+PTY events (globally broadcast via SSE): `pty.created`, `pty.updated`, `pty.exited`, `pty.deleted`.
+
+The agent does NOT use the PTY system. PTYs are for the user's interactive terminal panel, independent of any AI session.
+
+### Session Commands (`/session/{id}/command`, `/session/{id}/shell`) - Context Injection
+
+External clients can inject command results into an AI session's conversation context:
+
+- `POST /session/{sessionID}/command` - Executes a command and records the result as an `AssistantMessage` in the session. Required fields: `command`, `arguments`. The output becomes part of the AI's context for subsequent turns.
+- `POST /session/{sessionID}/shell` - Similar but wraps in `sh -c`. Required fields: `command`, `agent`.
+- `GET /command` - Lists available command definitions (metadata, not execution).
+
+Session commands emit `command.executed` events with `sessionID` + `messageID`.
+
+**Key distinction**: These endpoints execute commands directly (not via the AI), then inject the output into the session as if the AI produced it. The AI doesn't actively run the command - it just finds the output in its conversation history on the next turn.
+
+### Three Separate Execution Mechanisms
+
+| Mechanism | Who uses it | Scoped to | AI sees output? |
+|-----------|-------------|-----------|----------------|
+| Agent tools (internal) | AI agent | Session turn | Yes (immediate) |
+| PTY (`/pty/*`) | User/frontend | Server (global) | No |
+| Session commands (`/session/{id}/*`) | Frontend/SDK client | Session | Yes (next turn) |
+
+The agent has no tool to interact with PTYs and cannot access the session command endpoints. When the agent needs to run a background process, it uses its internal bash-equivalent tool with shell backgrounding (`&`).
+
+### Comparison
+
+| Capability | Supported? | Notes |
+|-----------|-----------|-------|
+| Agent runs commands | Yes (internal tools) | Synchronous, blocks agent turn |
+| User runs commands → agent sees output | Yes (`/session/{id}/command`) | HTTP API, first-class |
+| External API for command injection | Yes | Session-scoped endpoints |
+| Command source tracking | Implicit | Endpoint implies source (no enum) |
+| Background process management | No | Shell `&` only for agent |
+| PTY / interactive terminal | Yes (`/pty/*`) | Server-scoped, WebSocket I/O |
+
 ## Notes

 - OpenCode is the most feature-rich runtime (streaming, questions, permissions)
--- a/research/process-terminal-design.md
+++ b/research/process-terminal-design.md
@ -0,0 +1,374 @@
+# Research: Process & Terminal System Design
+
+Research on PTY/terminal and process management APIs across sandbox platforms, with design recommendations for sandbox-agent.
+
+## Competitive Landscape
+
+### Transport Comparison
+
+| Platform | PTY Transport | Command Transport | Unified? |
+|----------|--------------|-------------------|----------|
+| **OpenCode** | WebSocket (`/pty/{id}/connect`) | REST (session-scoped, AI-mediated) | No |
+| **E2B** | gRPC server-stream (output) + unary RPC (input) | Same gRPC service | Yes |
+| **Daytona** | WebSocket | REST | No |
+| **Kubernetes** | WebSocket (channel byte mux) | Same WebSocket | Yes |
+| **Docker** | HTTP connection hijack | Same connection | Yes |
+| **Fly.io** | SSH over WireGuard | REST (sync, 60s max) | No |
+| **Vercel Sandboxes** | No PTY API | REST SDK (async generator for logs) | N/A |
+| **Gitpod** | gRPC (Listen=output, Write=input) | Same gRPC service | Yes |
+
+### Resize Mechanism
+
+| Platform | How | Notes |
+|----------|-----|-------|
+| **OpenCode** | `PUT /pty/{id}` with `size: {rows, cols}` | Separate REST call |
+| **E2B** | Separate `Update` RPC | Separate gRPC call |
+| **Daytona** | Separate HTTP POST | Sends SIGWINCH |
+| **Kubernetes** | In-band WebSocket message (channel byte 4) | `{"Width": N, "Height": N}` |
+| **Docker** | `POST /exec/{id}/resize?h=N&w=N` | Separate REST call |
+| **Gitpod** | Separate `SetSize` RPC | Separate gRPC call |
+
+**Consensus**: Almost all platforms use a separate call for resize. Only Kubernetes does it in-band. Since resize is a control signal (not data), a separate mechanism is cleaner.
+
+### I/O Multiplexing
+
+I/O multiplexing is how platforms distinguish between stdout, stderr, and PTY data on a shared connection.
+
+| Platform | Method | Detail |
+|----------|--------|--------|
+| **Docker** | 8-byte binary header per frame | Byte 0 = stream type (0=stdin, 1=stdout, 2=stderr). When TTY=true, no mux (raw stream). |
+| **Kubernetes** | 1-byte channel prefix per WebSocket message | 0=stdin, 1=stdout, 2=stderr, 3=error, 4=resize, 255=close |
+| **E2B** | gRPC `oneof` in protobuf | `DataEvent.output` is `oneof { bytes stdout, bytes stderr, bytes pty }` |
+| **OpenCode** | None | PTY is a unified stream. Commands capture stdout/stderr separately in response. |
+| **Daytona** | None | PTY is unified. Commands return structured `{stdout, stderr}`. |
+
+**Key insight**: When a process runs with a PTY allocated, stdout and stderr are merged by the kernel into a single stream. Multiplexing only matters for non-PTY command execution. OpenCode and Daytona handle this by keeping PTY (unified stream) and commands (structured response) as separate APIs.
+
+### Reconnection
+
+| Platform | Method | Replays missed output? |
+|----------|--------|----------------------|
+| **E2B** | `Connect` RPC by PID or tag | No - only new events from reconnect point |
+| **Daytona** | New WebSocket to same PTY session | No |
+| **Kubernetes** | Not supported (connection = session) | N/A |
+| **Docker** | Not supported (connection = session) | N/A |
+| **OpenCode** | `GET /pty/{id}/connect` (WebSocket) | Unknown (not documented) |
+
+### Process Identification
+
+| Platform | ID Type | Notes |
+|----------|---------|-------|
+| **OpenCode** | String (`pty_N`) | Pattern `^pty.*` |
+| **E2B** | PID (uint32) or tag (string) | Dual selector |
+| **Daytona** | Session ID / PID | |
+| **Docker** | Exec ID (string, server-generated) | |
+| **Kubernetes** | Connection-scoped | No ID - the WebSocket IS the process |
+| **Gitpod** | Alias (string) | Human-readable |
+
+### Scoping
+
+| Platform | PTY Scope | Command Scope |
+|----------|-----------|---------------|
+| **OpenCode** | Server-wide (global) | Session-specific (AI-mediated) |
+| **E2B** | Sandbox-wide | Sandbox-wide |
+| **Daytona** | Sandbox-wide | Sandbox-wide |
+| **Docker** | Container-scoped | Container-scoped |
+| **Kubernetes** | Pod-scoped | Pod-scoped |
+
+## Key Questions & Analysis
+
+### Q: Should PTY transport be WebSocket?
+
+**Yes.** WebSocket is the right choice for PTY I/O:
+- Bidirectional: client sends keystrokes, server sends terminal output
+- Low latency: no HTTP request overhead per keystroke
+- Persistent connection: terminal sessions are long-lived
+- Industry consensus: OpenCode, Daytona, and Kubernetes all use WebSocket for PTY
+
+### Q: Should command transport be WebSocket or REST?
+
+**REST is sufficient for commands. WebSocket is not needed.**
+
+The distinction comes down to the nature of each operation:
+
+- **PTY**: Long-lived, bidirectional, interactive. User types, terminal responds. Needs WebSocket.
+- **Commands**: Request-response. Client says "run `ls -la`", server runs it, returns stdout/stderr/exit_code. This is a natural REST operation.
+
+The "full duplex" question: commands don't need full duplex because:
+1. Input is sent once at invocation (the command string)
+2. Output is collected and returned when the process exits
+3. There's no ongoing interactive input during execution
+
+For **streaming output** of long-running commands (e.g., `npm install`), there are two clean options:
+1. **SSE**: Server-Sent Events for output streaming (output-only, which is all you need)
+2. **PTY**: If the user needs to interact with the process (send ctrl+c, provide stdin), they should use a PTY instead
+
+This matches how OpenCode separates the two: commands are REST, PTYs are WebSocket.
+
+**Recommendation**: Keep commands as REST. If a command needs streaming output or interactive input, the user should create a PTY instead. This avoids building a second WebSocket protocol for a use case that PTYs already cover.
+
+### Q: Should resize be WebSocket in-band or separate POST?
+
+**Separate endpoint (PUT or POST).**
+
+Reasons:
+- Resize is a control signal, not data. Mixing it into the data stream requires a framing protocol to distinguish resize messages from terminal input.
+- OpenCode already defines `PUT /pty/{id}` with `size: {rows, cols}` - this is the existing spec.
+- E2B, Daytona, Docker, and Gitpod all use separate calls.
+- Only Kubernetes does in-band (because their channel-byte protocol already has a mux layer).
+- A separate endpoint is simpler to implement, test, and debug.
+
+**Recommendation**: Use `PUT /pty/{id}` with `size` field (matching OpenCode spec). Alternatively, a dedicated `POST /pty/{id}/resize` if we want to keep update and resize semantically separate.
+
+### Q: What is I/O multiplexing?
+
+I/O multiplexing is the mechanism for distinguishing between different data streams (stdout, stderr, stdin, control signals) on a single connection.
+
+**When it matters**: Non-PTY command execution where stdout and stderr need to be kept separate.
+
+**When it doesn't matter**: PTY sessions. When a PTY is allocated, the kernel merges stdout and stderr into a single stream (the PTY master fd). There is only one output stream. This is why terminals show stdout and stderr interleaved - the PTY doesn't distinguish them.
+
+**For sandbox-agent**: Since PTYs are unified streams and commands use REST (separate stdout/stderr in the JSON response), we don't need a multiplexing protocol. The API design naturally separates the two cases.
+
+### Q: How should reconnect work?
+
+**Reconnect is an application-level concept, not just HTTP/WebSocket reconnection.**
+
+The distinction:
+
+- **HTTP/WebSocket reconnect**: The transport-level connection drops and is re-established. This is handled by the client library automatically (retry logic, exponential backoff). The server doesn't need to know.
+- **Process reconnect**: The client disconnects from a running process but the process keeps running. Later, the client (or a different client) connects to the same process and starts receiving output again.
+
+**E2B's model**: Disconnecting a stream (via AbortController) leaves the process running. `Connect` RPC by PID or tag re-establishes the output stream. Missed output during disconnection is lost. This works because:
+1. Processes are long-lived (servers, shells)
+2. For terminals, the screen state can be recovered by the shell/application redrawing
+3. For commands, if you care about all output, don't disconnect
+
+**Recommendation for sandbox-agent**: Reconnect should be supported at the application level:
+1. `GET /pty/{id}/connect` (WebSocket) can be called multiple times for the same PTY
+2. If the WebSocket drops, the PTY process keeps running
+3. Client reconnects by opening a new WebSocket to the same endpoint
+4. No output replay (too complex, rarely needed - terminal apps redraw on reconnect via SIGWINCH)
+5. This is essentially what OpenCode's `/pty/{id}/connect` endpoint already implies
+
+This naturally leads to the **persistent process system** concept (see below).
+
+### Q: How are PTY events different from PTY transport?
+
+Two completely separate channels serving different purposes:
+
+**PTY Events** (via SSE on `/event` or `/sessions/{id}/events/sse`):
+- Lifecycle notifications: `pty.created`, `pty.updated`, `pty.exited`, `pty.deleted`
+- Lightweight JSON metadata (PTY id, status, exit code)
+- Broadcast to all subscribers
+- Used by UIs to update PTY lists, show status indicators, handle cleanup
+
+**PTY Transport** (via WebSocket on `/pty/{id}/connect`):
+- Raw terminal I/O: binary input/output bytes
+- High-frequency, high-bandwidth
+- Point-to-point (one client connected to one PTY)
+- Used by terminal emulators (xterm.js) to render the terminal
+
+**Analogy**: Events are like email notifications ("a new terminal was opened"). Transport is like the phone call (the actual terminal session).
+
+### Q: How are PTY and commands different in OpenCode?
+
+They serve fundamentally different purposes:
+
+**PTY (`/pty/*`)** - Direct execution environment:
+- Server-scoped (not tied to any AI session)
+- Creates a real terminal process
+- User interacts directly via WebSocket
+- Not part of the AI conversation
+- Think: "the terminal panel in VS Code"
+
+**Commands (`/session/{sessionID}/command`, `/session/{sessionID}/shell`)** - AI-mediated execution:
+- Session-scoped (tied to an AI session)
+- The command is sent **to the AI assistant** for execution
+- Creates an `AssistantMessage` in the session's conversation history
+- Output becomes part of the AI's context
+- Think: "asking Claude to run a command as a tool call"
+
+**Why commands are session-specific**: Because they're AI operations, not direct execution. When you call `POST /session/{id}/command`, the server:
+1. Creates an assistant message in the session
+2. Runs the command
+3. Captures output as message parts
+4. Emits `message.part.updated` events
+5. The AI can see this output in subsequent turns
+
+This is how the AI "uses terminal tools" - the command infrastructure provides the bridge between the AI session and system execution.
+
+### Q: Should scoping be system-wide?
+
+**Yes, for both PTY and commands.**
+
+Current OpenCode behavior:
+- PTYs: Already server-wide (global)
+- Commands: Session-scoped (for AI context injection)
+
+**For sandbox-agent**, since we're the orchestration layer (not the AI):
+- **PTYs**: System-wide. Any client should be able to list, connect to, or manage any PTY.
+- **Commands/processes**: System-wide. Process execution is a system primitive, not an AI primitive. If a caller wants to associate a process with a session, they can do so at their layer.
+
+The session-scoping of commands in OpenCode is an OpenCode-specific concern (AI context injection). Sandbox-agent should provide the lower-level primitive (system-wide process execution) and let the OpenCode compat layer handle the session association.
+
+## Persistent Process System
+
+### The Concept
+
+A persistent process system means:
+1. **Spawn** a process (PTY or command) via API
+2. Process runs independently of any client connection
+3. **Connect/disconnect** to the process I/O at will
+4. Process continues running through disconnections
+5. **Query** process status, list running processes
+6. **Kill/signal** processes explicitly
+
+This is distinct from the typical "connection = process lifetime" model (Kubernetes, Docker exec) where closing the connection kills the process.
+
+### How E2B Does It
+
+E2B's `Process` service is the best reference implementation:
+
+```
+Start(cmd, pty?) → stream of events (output)
+Connect(pid/tag) → stream of events (reconnect)
+SendInput(pid, data) → ok
+Update(pid, size) → ok (resize)
+SendSignal(pid, signal) → ok
+List() → running processes
+```
+
+Key design choices:
+- **Unified service**: PTY and command are the same service, differentiated by the `pty` field in `StartRequest`
+- **Process outlives connection**: Disconnecting the output stream (aborting the `Start`/`Connect` RPC) does NOT kill the process
+- **Explicit termination**: Must call `SendSignal(SIGKILL)` to stop a process
+- **Tag-based selection**: Processes can be tagged at creation for later lookup without knowing the PID
+
+### Recommendation for Sandbox-Agent
+
+Sandbox-agent should implement a **persistent process manager** that:
+
+1. **Is system-wide** (not session-scoped)
+2. **Supports both PTY and non-PTY modes**
+3. **Decouples process lifetime from connection lifetime**
+4. **Exposes via both REST (lifecycle) and WebSocket (I/O)**
+
+#### Proposed API Surface
+
+**Process Lifecycle (REST)**:
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| `POST` | `/v1/processes` | Create/spawn a process (PTY or command) |
+| `GET` | `/v1/processes` | List all processes |
+| `GET` | `/v1/processes/{id}` | Get process info (status, pid, exit code) |
+| `DELETE` | `/v1/processes/{id}` | Kill process (SIGTERM, then SIGKILL) |
+| `POST` | `/v1/processes/{id}/signal` | Send signal (SIGTERM, SIGKILL, SIGINT, etc.) |
+| `POST` | `/v1/processes/{id}/resize` | Resize PTY (rows, cols) |
+| `POST` | `/v1/processes/{id}/input` | Send stdin/pty input (REST fallback) |
+
+**Process I/O (WebSocket)**:
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| `GET` | `/v1/processes/{id}/connect` | WebSocket for bidirectional I/O |
+
+**Process Events (SSE)**:
+| Event | Description |
+|-------|-------------|
+| `process.created` | Process spawned |
+| `process.updated` | Process metadata changed |
+| `process.exited` | Process terminated (includes exit code) |
+| `process.deleted` | Process record removed |
+
+#### Create Request
+
+```json
+{
+  "command": "bash",
+  "args": ["-i", "-l"],
+  "cwd": "/workspace",
+  "env": {"TERM": "xterm-256color"},
+  "pty": {                         // Optional - if present, allocate PTY
+    "rows": 24,
+    "cols": 80
+  },
+  "tag": "main-terminal",          // Optional - for lookup by name
+  "label": "Terminal 1"            // Optional - display name
+}
+```
+
+#### Process Object
+
+```json
+{
+  "id": "proc_abc123",
+  "tag": "main-terminal",
+  "label": "Terminal 1",
+  "command": "bash",
+  "args": ["-i", "-l"],
+  "cwd": "/workspace",
+  "pid": 12345,
+  "pty": true,
+  "status": "running",             // "running" | "exited"
+  "exit_code": null,               // Set when exited
+  "created_at": "2025-01-15T...",
+  "exited_at": null
+}
+```
+
+#### OpenCode Compatibility Layer
+
+The OpenCode compat layer maps to this system:
+
+| OpenCode Endpoint | Maps To |
+|-------------------|---------|
+| `POST /pty` | `POST /v1/processes` (with `pty` field) |
+| `GET /pty` | `GET /v1/processes?pty=true` |
+| `GET /pty/{id}` | `GET /v1/processes/{id}` |
+| `PUT /pty/{id}` | `POST /v1/processes/{id}/resize` + metadata update |
+| `DELETE /pty/{id}` | `DELETE /v1/processes/{id}` |
+| `GET /pty/{id}/connect` | `GET /v1/processes/{id}/connect` |
+| `POST /session/{id}/command` | Create process + capture output into session |
+| `POST /session/{id}/shell` | Create process (shell mode) + capture output into session |
+
+### Open Questions
+
+1. **Output buffering for reconnect**: Should we buffer recent output (e.g., last 64KB) so reconnecting clients get some history? E2B doesn't do this, but it would improve UX for flaky connections.
+
+2. **Process limits**: Should there be a max number of concurrent processes? E2B doesn't expose one, but sandbox environments have limited resources.
+
+3. **Auto-cleanup**: Should processes be auto-cleaned after exiting? Options:
+   - Keep forever until explicitly deleted
+   - Auto-delete after N seconds/minutes
+   - Keep metadata but release resources
+
+4. **Input via REST vs WebSocket-only**: The REST `POST /processes/{id}/input` endpoint is useful for one-shot input (e.g., "send ctrl+c") without establishing a WebSocket. E2B has both `SendInput` (unary) and `StreamInput` (streaming) for this reason.
+
+5. **Multiple WebSocket connections to same process**: Should we allow multiple clients to connect to the same process simultaneously? (Pair programming, monitoring). E2B supports this via multiple `Connect` calls.
+
+## User-Initiated Command Injection ("Run command, give AI context")
+
+A common pattern across agents: the user (or frontend) runs a command and the output is injected into the AI's conversation context. This is distinct from the agent running a command via its own tools.
+
+| Agent | Feature | Mechanism | Protocol-level? |
+|-------|---------|-----------|----------------|
+| **Claude Code** | `!command` prefix in TUI | CLI runs command locally, injects output as user message | No - client-side hack, not in API schema |
+| **Codex** | `user_shell` source | `ExecCommandSource` enum distinguishes `agent` vs `user_shell` vs `unified_exec_*` | Yes - first-class protocol event |
+| **OpenCode** | `/session/{id}/command` | HTTP endpoint runs command, records result as `AssistantMessage` | Yes - HTTP API |
+| **Amp** | N/A | Not supported | N/A |
+
+**Design implication for sandbox-agent**: The process system should support an optional `session_id` field when creating a process. If provided, the process output is associated with that session so the agent can see it. If not provided, the process runs independently (like a PTY). This unifies:
+- User interactive terminals (no session association)
+- User-initiated commands for AI context (session association)
+- Agent-initiated background processes (session association)
+
+## Sources
+
+- [E2B Process Proto](https://github.com/e2b-dev/E2B) - `process.proto` gRPC service definition
+- [E2B JS SDK](https://github.com/e2b-dev/E2B/tree/main/packages/js-sdk) - `commands/pty.ts`, `commands/index.ts`
+- [Daytona SDK](https://www.daytona.io/docs/en/typescript-sdk/process/) - REST + WebSocket PTY API
+- [Kubernetes RemoteCommand](https://github.com/kubernetes/apimachinery/blob/master/pkg/util/remotecommand/constants.go) - WebSocket subprotocol
+- [Docker Engine API](https://docker-docs.uclv.cu/engine/api/v1.21/) - Exec API with stream multiplexing
+- [Fly.io Machines API](https://fly.io/docs/machines/api/) - REST exec with 60s limit
+- [Gitpod terminal.proto](https://codeberg.org/kanishka-reading-list/gitpod/src/branch/main/components/supervisor-api/terminal.proto) - gRPC terminal service
+- [OpenCode OpenAPI Spec](https://github.com/opencode-ai/opencode) - PTY and session command endpoints
--- a/sdks/typescript/src/generated/openapi.ts
+++ b/sdks/typescript/src/generated/openapi.ts
@ -87,6 +87,8 @@ export interface components {
    };
    AgentInfo: {
      capabilities: components["schemas"]["AgentCapabilities"];
+      /** @description Whether the agent's required provider credentials are available */
+      credentialsAvailable: boolean;
      id: string;
      installed: boolean;
      path?: string | null;
--- a/server/packages/agent-credentials/src/lib.rs
+++ b/server/packages/agent-credentials/src/lib.rs
@ -63,7 +63,9 @@ pub fn extract_claude_credentials(
    ];

    for path in config_paths {
-        let data = read_json_file(&path)?;
+        let Some(data) = read_json_file(&path) else {
+            continue;
+        };
        for key_path in &key_paths {
            if let Some(key) = read_string_field(&data, key_path) {
                if key.starts_with("sk-ant-") {
--- a/server/packages/sandbox-agent/src/opencode_compat.rs
+++ b/server/packages/sandbox-agent/src/opencode_compat.rs
@ -21,10 +21,14 @@ use serde::{Deserialize, Serialize};
 use serde_json::{json, Value};
 use tokio::sync::{broadcast, Mutex};
 use tokio::time::interval;
+use tracing::warn;
 use utoipa::{IntoParams, OpenApi, ToSchema};

 use crate::router::{AgentModelInfo, AppState, CreateSessionRequest, PermissionReply};
 use sandbox_agent_agent_management::agents::AgentId;
+use sandbox_agent_agent_management::credentials::{
+    extract_all_credentials, CredentialExtractionOptions, ExtractedCredentials,
+};
 use sandbox_agent_error::SandboxError;
 use sandbox_agent_universal_agent_schema::{
    ContentPart, FileAction, ItemDeltaData, ItemEventData, ItemKind, ItemRole, ItemStatus,
@ -233,6 +237,8 @@ struct OpenCodeModelCache {
    group_names: HashMap<String, String>,
    default_group: String,
    default_model: String,
+    /// Group IDs that have valid credentials available
+    connected: Vec<String>,
 }

 pub struct OpenCodeState {
@ -637,6 +643,21 @@ async fn opencode_model_cache(state: &OpenCodeAppState) -> OpenCodeModelCache {
 }

 async fn build_opencode_model_cache(state: &OpenCodeAppState) -> OpenCodeModelCache {
+    // Check credentials upfront
+    let credentials = match tokio::task::spawn_blocking(|| {
+        extract_all_credentials(&CredentialExtractionOptions::new())
+    })
+    .await
+    {
+        Ok(creds) => creds,
+        Err(err) => {
+            warn!("Failed to extract credentials for model cache: {err}");
+            ExtractedCredentials::default()
+        }
+    };
+    let has_anthropic = credentials.anthropic.is_some();
+    let has_openai = credentials.openai.is_some();
+
    let mut entries = Vec::new();
    let mut model_lookup = HashMap::new();
    let mut ambiguous_models = HashSet::new();
@ -735,6 +756,28 @@ async fn build_opencode_model_cache(state: &OpenCodeAppState) -> OpenCodeModelCa
        }
    }

+    // Build connected list based on credential availability
+    let mut connected = Vec::new();
+    for group_id in group_names.keys() {
+        let is_connected = match group_agents.get(group_id) {
+            Some(AgentId::Claude) | Some(AgentId::Amp) => has_anthropic,
+            Some(AgentId::Codex) => has_openai,
+            Some(AgentId::Opencode) => {
+                // Check the specific provider for opencode groups (e.g., "opencode:anthropic")
+                match opencode_group_provider(group_id) {
+                    Some("anthropic") => has_anthropic,
+                    Some("openai") => has_openai,
+                    _ => has_anthropic || has_openai,
+                }
+            }
+            Some(AgentId::Mock) => true,
+            None => false,
+        };
+        if is_connected {
+            connected.push(group_id.clone());
+        }
+    }
+
    OpenCodeModelCache {
        entries,
        model_lookup,
@ -743,6 +786,7 @@ async fn build_opencode_model_cache(state: &OpenCodeAppState) -> OpenCodeModelCa
        group_names,
        default_group,
        default_model,
+        connected,
    }
 }

@ -3962,7 +4006,6 @@ async fn oc_provider_list(State(state): State<Arc<OpenCodeAppState>>) -> impl In
    }
    let mut providers = Vec::new();
    let mut defaults = serde_json::Map::new();
-    let mut connected = Vec::new();
    for (group_id, entries) in grouped {
        let mut models = serde_json::Map::new();
        for entry in entries {
@ -3982,12 +4025,12 @@ async fn oc_provider_list(State(state): State<Arc<OpenCodeAppState>>) -> impl In
        if let Some(default_model) = cache.group_defaults.get(&group_id) {
            defaults.insert(group_id.clone(), Value::String(default_model.clone()));
        }
-        connected.push(group_id);
    }
+    // Use the connected list from cache (based on credential availability)
    let providers = json!({
        "all": providers,
        "default": Value::Object(defaults),
-        "connected": connected
+        "connected": cache.connected
    });
    (StatusCode::OK, Json(providers))
 }
--- a/server/packages/sandbox-agent/src/router.rs
+++ b/server/packages/sandbox-agent/src/router.rs
@ -1798,8 +1798,14 @@ impl SessionManager {
        agent: AgentId,
    ) -> Result<AgentModelsResponse, SandboxError> {
        match agent {
-            AgentId::Claude => self.fetch_claude_models().await,
-            AgentId::Codex => self.fetch_codex_models().await,
+            AgentId::Claude => match self.fetch_claude_models().await {
+                Ok(response) if !response.models.is_empty() => Ok(response),
+                _ => Ok(claude_fallback_models()),
+            },
+            AgentId::Codex => match self.fetch_codex_models().await {
+                Ok(response) if !response.models.is_empty() => Ok(response),
+                _ => Ok(codex_fallback_models()),
+            },
            AgentId::Opencode => match self.fetch_opencode_models().await {
                Ok(models) => Ok(models),
                Err(_) => Ok(AgentModelsResponse {
@ -3927,6 +3933,8 @@ pub struct ServerStatusInfo {
 pub struct AgentInfo {
    pub id: String,
    pub installed: bool,
+    /// Whether the agent's required provider credentials are available
+    pub credentials_available: bool,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub version: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
@ -4194,6 +4202,10 @@ async fn list_agents(

    let agents =
        tokio::task::spawn_blocking(move || {
+            let credentials = extract_all_credentials(&CredentialExtractionOptions::new());
+            let has_anthropic = credentials.anthropic.is_some();
+            let has_openai = credentials.openai.is_some();
+
            all_agents()
                .into_iter()
                .map(|agent_id| {
@ -4202,6 +4214,13 @@ async fn list_agents(
                    let path = manager.resolve_binary(agent_id).ok();
                    let capabilities = agent_capabilities_for(agent_id);

+                    let credentials_available = match agent_id {
+                        AgentId::Claude | AgentId::Amp => has_anthropic,
+                        AgentId::Codex => has_openai,
+                        AgentId::Opencode => has_anthropic || has_openai,
+                        AgentId::Mock => true,
+                    };
+
                    // Add server_status for agents with shared processes
                    let server_status =
                        if capabilities.shared_process {
@ -4221,6 +4240,7 @@ async fn list_agents(
                    AgentInfo {
                        id: agent_id.as_str().to_string(),
                        installed,
+                        credentials_available,
                        version,
                        path: path.map(|path| path.to_string_lossy().to_string()),
                        capabilities,
@ -4742,6 +4762,38 @@ fn mock_models_response() -> AgentModelsResponse {
    }
 }

+fn claude_fallback_models() -> AgentModelsResponse {
+    let models = ["claude-sonnet-4-20250514", "claude-opus-4-20250514"]
+        .into_iter()
+        .map(|id| AgentModelInfo {
+            id: id.to_string(),
+            name: None,
+            variants: None,
+            default_variant: None,
+        })
+        .collect();
+    AgentModelsResponse {
+        models,
+        default_model: Some("claude-sonnet-4-20250514".to_string()),
+    }
+}
+
+fn codex_fallback_models() -> AgentModelsResponse {
+    let models = ["gpt-4o", "o3", "o4-mini"]
+        .into_iter()
+        .map(|id| AgentModelInfo {
+            id: id.to_string(),
+            name: None,
+            variants: Some(codex_variants()),
+            default_variant: Some("medium".to_string()),
+        })
+        .collect();
+    AgentModelsResponse {
+        models,
+        default_model: Some("gpt-4o".to_string()),
+    }
+}
+
 fn amp_variants() -> Vec<String> {
    vec!["medium", "high", "xhigh"]
        .into_iter()