From 2f26f76d9bb83e09feb0e9a9e3190660d1d43b0a Mon Sep 17 00:00:00 2001 From: Nathan Flurry Date: Thu, 5 Feb 2026 11:32:39 -0800 Subject: [PATCH] feat: add raw session args/opts for agent passthrough --- docs/openapi.json | 26 ++++- docs/sdks/typescript.mdx | 20 ++++ docs/session-transcript-schema.mdx | 8 ++ research/agents/amp.md | 34 +++++-- research/agents/claude.md | 31 +++++- research/agents/codex.md | 94 ++++++++++++++++--- research/agents/opencode.md | 46 ++++++--- sdks/typescript/src/generated/openapi.ts | 10 ++ .../packages/agent-management/src/agents.rs | 38 +++++++- server/packages/sandbox-agent/src/main.rs | 2 + .../sandbox-agent/src/opencode_compat.rs | 2 + server/packages/sandbox-agent/src/router.rs | 40 ++++++++ .../tests/agent-management/mod.rs | 1 + .../agent-management/raw_session_args.rs | 50 ++++++++++ 14 files changed, 365 insertions(+), 37 deletions(-) create mode 100644 server/packages/sandbox-agent/tests/agent-management/raw_session_args.rs diff --git a/docs/openapi.json b/docs/openapi.json index 76c76f0..713e1c4 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -669,7 +669,9 @@ "mcpTools", "streamingDeltas", "itemStarted", - "sharedProcess" + "sharedProcess", + "rawSessionArgs", + "rawSessionOptions" ], "properties": { "commandExecution": { @@ -702,6 +704,14 @@ "questions": { "type": "boolean" }, + "rawSessionArgs": { + "type": "boolean", + "description": "Whether this agent supports raw CLI arguments passed at session creation" + }, + "rawSessionOptions": { + "type": "boolean", + "description": "Whether this agent supports raw options passed at session creation" + }, "reasoning": { "type": "boolean" }, @@ -1074,6 +1084,20 @@ "variant": { "type": "string", "nullable": true + }, + "rawSessionArgs": { + "type": "array", + "items": { + "type": "string" + }, + "nullable": true, + "description": "Raw CLI arguments to pass to the agent (for CLI-based agents like Claude, OpenCode, Amp)" + }, + "rawSessionOptions": { + "type": "object", + "additionalProperties": true, + "nullable": true, + "description": "Raw options to pass to the agent (for long-running server agents like Codex)" } } }, diff --git a/docs/sdks/typescript.mdx b/docs/sdks/typescript.mdx index bf338a7..b8d316b 100644 --- a/docs/sdks/typescript.mdx +++ b/docs/sdks/typescript.mdx @@ -62,6 +62,26 @@ await client.createSession("demo-session", { await client.postMessage("demo-session", { message: "Hello" }); ``` +### Raw session arguments + +Pass low-level arguments directly to agents at session creation: + +```ts +// CLI args for Claude, OpenCode, Amp (not Codex) +await client.createSession("my-session", { + agent: "claude", + rawSessionArgs: ["--max-turns", "5"], +}); + +// Options passed through agent's native protocol (long-running servers only) +await client.createSession("my-session", { + agent: "codex", + rawSessionOptions: { sandbox: "workspace-write" }, +}); +``` + +Check `capabilities.rawSessionArgs` and `capabilities.rawSessionOptions` to see what each agent supports. + List agents and inspect feature coverage (available on `capabilities`): ```ts diff --git a/docs/session-transcript-schema.mdx b/docs/session-transcript-schema.mdx index a5a0158..1c92e65 100644 --- a/docs/session-transcript-schema.mdx +++ b/docs/session-transcript-schema.mdx @@ -29,6 +29,8 @@ This table shows which agent feature coverage appears in the universal event str | File Changes | - | ✓ | - | - | | MCP Tools | - | ✓ | - | - | | Streaming Deltas | ✓ | ✓ | ✓ | - | +| Raw Session Args | ✓ | | ✓ | ✓ | +| Raw Session Options| ✓ | ✓ | ✓ | ✓ | Agents: [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/overview) · [Codex](https://github.com/openai/codex) · [OpenCode](https://github.com/opencode-ai/opencode) · [Amp](https://ampcode.com) @@ -76,6 +78,12 @@ Agents: [Claude Code](https://docs.anthropic.com/en/docs/agents-and-tools/claude Native streaming of content deltas. When not supported, the daemon emits a single synthetic delta before `item.completed`. + + Pass raw CLI arguments directly to the agent at session creation via `rawSessionArgs`. Only supported for CLI-based agents (Claude, OpenCode, Amp). Codex uses JSON-RPC, so CLI args are not applicable. + + + Pass raw options to the agent at session creation via `rawSessionOptions`. For long-running server agents, options are passed through the agent's native protocol. For Codex, options are merged into the `thread/start` config. + Want support for another agent? [Open an issue](https://github.com/rivet-dev/sandbox-agent/issues/new) to request it. diff --git a/research/agents/amp.md b/research/agents/amp.md index f6b800f..97c63d0 100644 --- a/research/agents/amp.md +++ b/research/agents/amp.md @@ -25,16 +25,32 @@ amp --print --output-format stream-json --dangerously-skip-permissions "prompt" amp --continue SESSION_ID "follow up" ``` -### Key CLI Flags +### Custom Args (CLI Flags) -| Flag | Description | -|------|-------------| -| `--print` | Output mode (non-interactive) | -| `--output-format stream-json` | JSONL streaming output | -| `--dangerously-skip-permissions` | Skip permission prompts | -| `--continue SESSION_ID` | Resume existing session | -| `--model MODEL` | Specify model | -| `--toolbox TOOLBOX` | Toolbox configuration | +#### Core Flags + +| Flag | Type | Description | +|------|------|-------------| +| `--print` | bool | Output mode (non-interactive) | +| `--execute` | bool | Alternative output mode (some versions) | +| `--output-format stream-json` | string | JSONL streaming output | +| `--model MODEL` | string | Specify model to use | +| `--continue SESSION_ID` | string | Resume existing session | + +#### Permission Flags + +| Flag | Type | Description | +|------|------|-------------| +| `--dangerously-skip-permissions` | bool | Skip all permission prompts | + +#### Configuration Flags + +| Flag | Type | Description | +|------|------|-------------| +| `--toolbox NAME` | string | Toolbox configuration to use | +| `--mcp-config FILE` | path | Path to MCP configuration file | + +**Note:** Amp CLI flags vary by version. The daemon uses flag detection (`amp --help`) to determine which flags are supported before invocation. ## Credential Discovery diff --git a/research/agents/claude.md b/research/agents/claude.md index 42a552b..521e936 100644 --- a/research/agents/claude.md +++ b/research/agents/claude.md @@ -71,17 +71,42 @@ claude \ "PROMPT" ``` -### Arguments +### Core Arguments | Flag | Description | |------|-------------| -| `--print` | Output mode | +| `--print` | Output mode (non-interactive) | | `--output-format stream-json` | Newline-delimited JSON streaming | | `--verbose` | Verbose output | | `--dangerously-skip-permissions` | Skip permission prompts | | `--resume SESSION_ID` | Resume existing session | | `--model MODEL_ID` | Specify model (e.g., `claude-sonnet-4-20250514`) | -| `--permission-mode plan` | Plan mode (read-only exploration) | +| `--permission-mode MODE` | Permission mode (`plan`, `acceptEdits`) | + +### Custom Args (Session Configuration) + +These flags can be passed to customize agent behavior at session start: + +| Flag | Type | Description | +|------|------|-------------| +| `--max-turns N` | int | Maximum number of agent turns before stopping | +| `--system-prompt TEXT` | string | Custom system prompt (replaces default) | +| `--append-system-prompt TEXT` | string | Text to append to the system prompt | +| `--allowed-tools TOOLS` | string (comma-sep) | Comma-separated list of allowed tools | +| `--disallowed-tools TOOLS` | string (comma-sep) | Comma-separated list of disallowed tools | +| `--mcp-server NAME` | string | Add an MCP server by name | +| `--timeout-secs N` | int | Timeout in seconds (overrides default 300s) | +| `--profile NAME` | string | Use a specific configuration profile | + +### Streaming Input Mode Flags + +When using `--input-format stream-json` for streaming input: + +| Flag | Description | +|------|-------------| +| `--input-format stream-json` | Accept streaming JSON input via stdin | +| `--permission-prompt-tool stdio` | Handle permission prompts via stdio | +| `--include-partial-messages` | Include partial message events in output | ### Environment Variables diff --git a/research/agents/codex.md b/research/agents/codex.md index b0e4098..2483ec2 100644 --- a/research/agents/codex.md +++ b/research/agents/codex.md @@ -41,17 +41,54 @@ codex exec --dangerously-bypass-approvals-and-sandbox "prompt" codex exec resume --last # Resume previous session ``` -### Key CLI Flags -| Flag | Description | -|------|-------------| -| `--json` | Print events to stdout as JSONL | -| `-m, --model MODEL` | Model to use | -| `-s, --sandbox MODE` | `read-only`, `workspace-write`, `danger-full-access` | -| `--full-auto` | Auto-approve with workspace-write sandbox | -| `--dangerously-bypass-approvals-and-sandbox` | Skip all prompts (dangerous) | -| `-C, --cd DIR` | Working directory | -| `-o, --output-last-message FILE` | Write final response to file | -| `--output-schema FILE` | JSON Schema for structured output | +### Custom Args (CLI Flags) + +#### Core Flags + +| Flag | Type | Description | +|------|------|-------------| +| `-m, --model MODEL` | string | Model to use (e.g., `o3`, `gpt-4o`) | +| `--json` | bool | Print events to stdout as JSONL | +| `-C, --cd DIR` | path | Working directory for the agent | +| `-o, --output-last-message FILE` | path | Write final response to file | + +#### Permission & Sandbox Flags + +| Flag | Type | Values | Description | +|------|------|--------|-------------| +| `-s, --sandbox MODE` | enum | `read-only`, `workspace-write`, `danger-full-access` | Sandbox policy for shell commands | +| `-a, --ask-for-approval POLICY` | enum | `untrusted`, `on-failure`, `on-request`, `never` | When to require human approval | +| `--full-auto` | bool | - | Convenience alias: `-a on-request --sandbox workspace-write` | +| `--dangerously-bypass-approvals-and-sandbox` | bool | - | Skip all prompts and sandboxing (DANGEROUS) | + +#### Configuration Overrides + +| Flag | Type | Description | +|------|------|-------------| +| `-c, --config key=value` | string | Override config values (parsed as TOML) | +| `-p, --profile NAME` | string | Use a configuration profile from config.toml | +| `--enable FEATURE` | string | Enable a feature flag (repeatable) | +| `--disable FEATURE` | string | Disable a feature flag (repeatable) | + +Config override examples: +```bash +codex -c model="o3" +codex -c 'sandbox_permissions=["disk-full-read-access"]' +codex -c shell_environment_policy.inherit=all +``` + +#### Additional Capabilities + +| Flag | Type | Description | +|------|------|-------------| +| `-i, --image FILE` | path[] | Attach image(s) to the initial prompt (repeatable) | +| `--add-dir DIR` | path[] | Additional directories that should be writable (repeatable) | +| `--search` | bool | Enable live web search via `web_search` tool | +| `--output-schema FILE` | path | JSON Schema file for structured output | +| `--skip-git-repo-check` | bool | Allow running outside a Git repository | +| `--oss` | bool | Use local open source model provider (LM Studio/Ollama) | +| `--local-provider PROVIDER` | enum | `lmstudio`, `ollama`, `ollama-chat` | +| `--color COLOR` | enum | `always`, `never`, `auto` | ### Session Management ```bash @@ -140,6 +177,41 @@ Codex App Server uses JSON-RPC 2.0 over JSONL/stdin/stdout (no port required). - `thread/start` → starts a new thread - `turn/start` → sends user input for a thread +### Custom Args (JSON-RPC Parameters) + +#### `thread/start` Parameters + +| Field | Type | Description | +|-------|------|-------------| +| `approval_policy` | enum | `Never`, `Untrusted` - when to ask for approval | +| `sandbox` | enum | `ReadOnly`, `DangerFullAccess` - sandbox mode | +| `model` | string | Model to use for this thread | +| `cwd` | string | Working directory | + +#### `turn/start` Parameters + +| Field | Type | Description | +|-------|------|-------------| +| `thread_id` | string | Thread ID from `thread/start` response | +| `input` | array | User input (e.g., `[{ "type": "text", "text": "..." }]`) | +| `approval_policy` | enum | Override approval policy for this turn | +| `sandbox_policy` | enum | Override sandbox policy for this turn | +| `model` | string | Override model for this turn | +| `cwd` | string | Override working directory | +| `effort` | string | Reasoning effort level | +| `output_schema` | object | JSON Schema for structured output | +| `summary` | string | Summary context for the turn | +| `collaboration_mode` | string | Collaboration mode (if supported) | + +#### App Server CLI Flags + +| Flag | Description | +|------|-------------| +| `-c, --config key=value` | Override config (same as interactive mode) | +| `--enable FEATURE` | Enable feature flag | +| `--disable FEATURE` | Disable feature flag | +| `--analytics-default-enabled` | Enable analytics by default (for first-party use) | + ### Event Notifications (examples) ```json diff --git a/research/agents/opencode.md b/research/agents/opencode.md index 8a19fd2..0b013bc 100644 --- a/research/agents/opencode.md +++ b/research/agents/opencode.md @@ -34,18 +34,40 @@ opencode run -s SESSION_ID "prompt" # Continue specific session opencode run -f file1.ts -f file2.ts "review these files" ``` -### Key CLI Flags -| Flag | Description | -|------|-------------| -| `--format json` | Output raw JSON events (for parsing) | -| `-m, --model PROVIDER/MODEL` | Model in format `provider/model` | -| `--agent AGENT` | Agent to use (`build`, `plan`) | -| `-c, --continue` | Continue last session | -| `-s, --session ID` | Continue specific session | -| `-f, --file FILE` | Attach file(s) to message | -| `--attach URL` | Attach to running server | -| `--port PORT` | Local server port | -| `--variant VARIANT` | Reasoning effort (e.g., `high`, `max`) | +### Custom Args (CLI Flags) + +#### Core Flags + +| Flag | Type | Description | +|------|------|-------------| +| `-m, --model PROVIDER/MODEL` | string | Model in format `provider/model` (e.g., `anthropic/claude-sonnet-4-20250514`) | +| `--agent AGENT` | string | Agent to use (`build`, `plan`, or custom agent ID) | +| `--format FORMAT` | enum | `default` (formatted) or `json` (raw JSON events) | +| `--variant VARIANT` | string | Reasoning effort level (e.g., `high`, `max`, `minimal`) | + +#### Session Flags + +| Flag | Type | Description | +|------|------|-------------| +| `-c, --continue` | bool | Continue the last session | +| `-s, --session ID` | string | Continue a specific session by ID | +| `--title TEXT` | string | Title for the session (uses truncated prompt if omitted) | +| `--share` | bool | Share the session publicly | + +#### Input/Output Flags + +| Flag | Type | Description | +|------|------|-------------| +| `-f, --file FILE` | path[] | Attach file(s) to message (repeatable) | +| `--attach URL` | string | Attach to a running OpenCode server (e.g., `http://localhost:4096`) | +| `--port PORT` | int | Port for the local server (random if not specified) | + +#### Debugging Flags + +| Flag | Type | Values | Description | +|------|------|--------|-------------| +| `--log-level LEVEL` | enum | `DEBUG`, `INFO`, `WARN`, `ERROR` | Log verbosity level | +| `--print-logs` | bool | - | Print logs to stderr | ### Headless Server Mode ```bash diff --git a/sdks/typescript/src/generated/openapi.ts b/sdks/typescript/src/generated/openapi.ts index 52816ad..e452eea 100644 --- a/sdks/typescript/src/generated/openapi.ts +++ b/sdks/typescript/src/generated/openapi.ts @@ -64,6 +64,10 @@ export interface components { permissions: boolean; planMode: boolean; questions: boolean; + /** @description Whether this agent supports raw CLI arguments passed at session creation */ + rawSessionArgs: boolean; + /** @description Whether this agent supports raw options passed at session creation */ + rawSessionOptions: boolean; reasoning: boolean; sessionLifecycle: boolean; /** @description Whether this agent uses a shared long-running server process (vs per-turn subprocess) */ @@ -156,6 +160,12 @@ export interface components { model?: string | null; permissionMode?: string | null; variant?: string | null; + /** @description Raw CLI arguments to pass to the agent (for CLI-based agents like Claude, OpenCode, Amp) */ + rawSessionArgs?: string[] | null; + /** @description Raw options to pass to the agent (for long-running server agents like Codex) */ + rawSessionOptions?: { + [key: string]: unknown; + } | null; }; CreateSessionResponse: { error?: components["schemas"]["AgentError"] | null; diff --git a/server/packages/agent-management/src/agents.rs b/server/packages/agent-management/src/agents.rs index e110c96..272fd64 100644 --- a/server/packages/agent-management/src/agents.rs +++ b/server/packages/agent-management/src/agents.rs @@ -237,6 +237,10 @@ impl AgentManager { } _ => {} } + // Apply raw CLI args + for arg in &options.raw_args { + command.arg(arg); + } if options.streaming_input { command .arg("--input-format") @@ -268,6 +272,10 @@ impl AgentManager { if let Some(session_id) = options.session_id.as_deref() { command.arg("-s").arg(session_id); } + // Apply raw CLI args + for arg in &options.raw_args { + command.arg(arg); + } command.arg(&options.prompt); } AgentId::Amp => { @@ -583,6 +591,10 @@ impl AgentManager { } _ => {} } + // Apply raw CLI args + for arg in &options.raw_args { + command.arg(arg); + } if options.streaming_input { command .arg("--input-format") @@ -614,6 +626,10 @@ impl AgentManager { if let Some(session_id) = options.session_id.as_deref() { command.arg("-s").arg(session_id); } + // Apply raw CLI args + for arg in &options.raw_args { + command.arg(arg); + } command.arg(&options.prompt); } AgentId::Amp => { @@ -682,6 +698,8 @@ pub struct SpawnOptions { pub env: HashMap, /// Use stream-json input via stdin (Claude only). pub streaming_input: bool, + /// Raw CLI arguments to pass to the agent (for CLI-based agents). + pub raw_args: Vec, } impl SpawnOptions { @@ -696,6 +714,7 @@ impl SpawnOptions { working_dir: None, env: HashMap::new(), streaming_input: false, + raw_args: Vec::new(), } } } @@ -1054,7 +1073,12 @@ fn spawn_amp( if let Some(session_id) = options.session_id.as_deref() { command.arg("--continue").arg(session_id); } - command.args(&args).arg(&options.prompt); + command.args(&args); + // Apply raw CLI args + for arg in &options.raw_args { + command.arg(arg); + } + command.arg(&options.prompt); for (key, value) in &options.env { command.env(key, value); } @@ -1095,6 +1119,10 @@ fn build_amp_command(path: &Path, working_dir: &Path, options: &SpawnOptions) -> if flags.dangerously_skip_permissions && options.permission_mode.as_deref() == Some("bypass") { command.arg("--dangerously-skip-permissions"); } + // Apply raw CLI args + for arg in &options.raw_args { + command.arg(arg); + } command.arg(&options.prompt); for (key, value) in &options.env { command.env(key, value); @@ -1157,6 +1185,10 @@ fn spawn_amp_fallback( if !args.is_empty() { command.args(&args); } + // Apply raw CLI args + for arg in &options.raw_args { + command.arg(arg); + } command.arg(&options.prompt); for (key, value) in &options.env { command.env(key, value); @@ -1175,6 +1207,10 @@ fn spawn_amp_fallback( if let Some(session_id) = options.session_id.as_deref() { command.arg("--continue").arg(session_id); } + // Apply raw CLI args + for arg in &options.raw_args { + command.arg(arg); + } command.arg(&options.prompt); for (key, value) in &options.env { command.env(key, value); diff --git a/server/packages/sandbox-agent/src/main.rs b/server/packages/sandbox-agent/src/main.rs index 46a9efe..3fa4f81 100644 --- a/server/packages/sandbox-agent/src/main.rs +++ b/server/packages/sandbox-agent/src/main.rs @@ -591,6 +591,8 @@ fn run_sessions(command: &SessionsCommand, cli: &Cli) -> Result<(), CliError> { model: args.model.clone(), variant: args.variant.clone(), agent_version: args.agent_version.clone(), + raw_session_args: None, + raw_session_options: None, }; let path = format!("{API_PREFIX}/sessions/{}", args.session_id); let response = ctx.post(&path, &body)?; diff --git a/server/packages/sandbox-agent/src/opencode_compat.rs b/server/packages/sandbox-agent/src/opencode_compat.rs index 427d440..6d6f8ab 100644 --- a/server/packages/sandbox-agent/src/opencode_compat.rs +++ b/server/packages/sandbox-agent/src/opencode_compat.rs @@ -379,6 +379,8 @@ async fn ensure_backing_session( model: None, variant: None, agent_version: None, + raw_session_args: None, + raw_session_options: None, }; match state .inner diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs index 92460d5..442b899 100644 --- a/server/packages/sandbox-agent/src/router.rs +++ b/server/packages/sandbox-agent/src/router.rs @@ -323,6 +323,8 @@ struct SessionState { model: Option, variant: Option, native_session_id: Option, + raw_session_args: Option>, + raw_session_options: Option, ended: bool, ended_exit_code: Option, ended_message: Option, @@ -381,6 +383,8 @@ impl SessionState { model: request.model.clone(), variant: request.variant.clone(), native_session_id: None, + raw_session_args: request.raw_session_args.clone(), + raw_session_options: request.raw_session_options.clone(), ended: false, ended_exit_code: None, ended_message: None, @@ -1614,6 +1618,8 @@ impl SessionManager { model: session.model.clone(), variant: session.variant.clone(), native_session_id: None, + raw_session_args: session.raw_session_args.clone(), + raw_session_options: session.raw_session_options.clone(), }; let thread_id = self.create_codex_thread(&session_id, &snapshot).await?; session.native_session_id = Some(thread_id); @@ -3079,6 +3085,15 @@ impl SessionManager { params.sandbox = codex_sandbox_mode(Some(&session.permission_mode)); params.model = session.model.clone(); + // Merge raw_session_options into the config field if provided + if let Some(serde_json::Value::Object(raw_options)) = &session.raw_session_options { + let mut config = params.config.take().unwrap_or_default(); + for (key, value) in raw_options { + config.insert(key.clone(), value.clone()); + } + params.config = Some(config); + } + let request = codex_schema::ClientRequest::ThreadStart { id: codex_schema::RequestId::from(id), params, @@ -3488,6 +3503,10 @@ pub struct AgentCapabilities { pub item_started: bool, /// Whether this agent uses a shared long-running server process (vs per-turn subprocess) pub shared_process: bool, + /// Whether this agent supports raw CLI arguments passed at session creation + pub raw_session_args: bool, + /// Whether this agent supports raw options passed at session creation (long-running server agents) + pub raw_session_options: bool, } /// Status of a shared server process for an agent @@ -3575,6 +3594,12 @@ pub struct CreateSessionRequest { pub variant: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub agent_version: Option, + /// Raw CLI arguments to pass to the agent (for CLI-based agents like Claude, OpenCode, Amp) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub raw_session_args: Option>, + /// Raw options to pass to the agent (for long-running server agents like Codex) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub raw_session_options: Option, } #[derive(Debug, Clone, Serialize, Deserialize, ToSchema, JsonSchema)] @@ -4120,6 +4145,8 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { streaming_deltas: true, item_started: false, shared_process: false, // per-turn subprocess with --resume + raw_session_args: true, + raw_session_options: false, }, AgentId::Codex => AgentCapabilities { plan_mode: true, @@ -4140,6 +4167,8 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { streaming_deltas: true, item_started: true, shared_process: true, // shared app-server via JSON-RPC + raw_session_args: false, + raw_session_options: true, }, AgentId::Opencode => AgentCapabilities { plan_mode: false, @@ -4160,6 +4189,8 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { streaming_deltas: true, item_started: true, shared_process: true, // shared HTTP server + raw_session_args: true, + raw_session_options: false, }, AgentId::Amp => AgentCapabilities { plan_mode: false, @@ -4180,6 +4211,8 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { streaming_deltas: false, item_started: false, shared_process: false, // per-turn subprocess with --continue + raw_session_args: true, + raw_session_options: false, }, AgentId::Mock => AgentCapabilities { plan_mode: true, @@ -4200,6 +4233,8 @@ fn agent_capabilities_for(agent: AgentId) -> AgentCapabilities { streaming_deltas: true, item_started: true, shared_process: false, // in-memory mock (no subprocess) + raw_session_args: false, + raw_session_options: false, }, } } @@ -4434,6 +4469,7 @@ fn build_spawn_options( None } }); + options.raw_args = session.raw_session_args.clone().unwrap_or_default(); if let Some(anthropic) = credentials.anthropic { options .env @@ -6461,6 +6497,8 @@ struct SessionSnapshot { model: Option, variant: Option, native_session_id: Option, + raw_session_args: Option>, + raw_session_options: Option, } impl From<&SessionState> for SessionSnapshot { @@ -6473,6 +6511,8 @@ impl From<&SessionState> for SessionSnapshot { model: session.model.clone(), variant: session.variant.clone(), native_session_id: session.native_session_id.clone(), + raw_session_args: session.raw_session_args.clone(), + raw_session_options: session.raw_session_options.clone(), } } } diff --git a/server/packages/sandbox-agent/tests/agent-management/mod.rs b/server/packages/sandbox-agent/tests/agent-management/mod.rs index fcf8db0..8b831bc 100644 --- a/server/packages/sandbox-agent/tests/agent-management/mod.rs +++ b/server/packages/sandbox-agent/tests/agent-management/mod.rs @@ -1 +1,2 @@ mod agents; +mod raw_session_args; diff --git a/server/packages/sandbox-agent/tests/agent-management/raw_session_args.rs b/server/packages/sandbox-agent/tests/agent-management/raw_session_args.rs new file mode 100644 index 0000000..186f492 --- /dev/null +++ b/server/packages/sandbox-agent/tests/agent-management/raw_session_args.rs @@ -0,0 +1,50 @@ +use sandbox_agent_agent_management::agents::{AgentId, AgentManager, InstallOptions, SpawnOptions}; + +/// Tests that raw_args are passed to CLI-based agents. +/// We use `--version` as a raw arg which causes agents to print version info and exit. +#[test] +fn test_raw_args_version_flag() -> Result<(), Box> { + let temp_dir = tempfile::tempdir()?; + let manager = AgentManager::new(temp_dir.path().join("bin"))?; + + // Test Claude with --version + manager.install(AgentId::Claude, InstallOptions::default())?; + let mut spawn = SpawnOptions::new("test"); + spawn.raw_args = vec!["--version".to_string()]; + let result = manager.spawn(AgentId::Claude, spawn)?; + let output = format!("{}{}", result.stdout, result.stderr); + assert!( + output.to_lowercase().contains("version") + || output.contains("claude") + || result.status.code() == Some(0), + "Claude --version failed: {output}" + ); + + // Test OpenCode with --version + manager.install(AgentId::Opencode, InstallOptions::default())?; + let mut spawn = SpawnOptions::new("test"); + spawn.raw_args = vec!["--version".to_string()]; + let result = manager.spawn(AgentId::Opencode, spawn)?; + let output = format!("{}{}", result.stdout, result.stderr); + assert!( + output.to_lowercase().contains("version") + || output.contains("opencode") + || result.status.code() == Some(0), + "OpenCode --version failed: {output}" + ); + + // Test Amp with --version + manager.install(AgentId::Amp, InstallOptions::default())?; + let mut spawn = SpawnOptions::new("test"); + spawn.raw_args = vec!["--version".to_string()]; + let result = manager.spawn(AgentId::Amp, spawn)?; + let output = format!("{}{}", result.stdout, result.stderr); + assert!( + output.to_lowercase().contains("version") + || output.contains("amp") + || result.status.code() == Some(0), + "Amp --version failed: {output}" + ); + + Ok(()) +}