diff --git a/CLAUDE.md b/CLAUDE.md index 624602a..6266297 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -33,6 +33,11 @@ - `docs/agent-capabilities.mdx` lists models/modes/thought levels per agent. Update it when adding a new agent or changing `fallback_config_options`. If its "Last updated" date is >2 weeks old, re-run `cd scripts/agent-configs && npx tsx dump.ts` and update the doc to match. Source data: `scripts/agent-configs/resources/*.json` and hardcoded entries in `server/packages/sandbox-agent/src/router/support.rs` (`fallback_config_options`). - Some agent models are gated by subscription (e.g. Claude `opus`). The live report only shows models available to the current credentials. The static doc and JSON resource files should list all known models regardless of subscription tier. +## Docker Test Image + +- Docker-backed Rust and TypeScript tests build `docker/test-agent/Dockerfile` directly in-process and cache the image tag only in memory (`OnceLock` in Rust, module-level variable in TypeScript). +- Do not add cross-process image-build scripts unless there is a concrete need for them. + ## Install Version References - Channel policy: diff --git a/scripts/test-rig/ensure-image.sh b/scripts/test-rig/ensure-image.sh deleted file mode 100755 index 56e7f69..0000000 --- a/scripts/test-rig/ensure-image.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -IMAGE_TAG="${SANDBOX_AGENT_TEST_IMAGE:-sandbox-agent-test:dev}" -LOCK_DIR="$ROOT_DIR/.context/docker-test-image.lock" -STAMP_FILE="$ROOT_DIR/.context/docker-test-image.stamp" - -INPUTS=( - "$ROOT_DIR/Cargo.toml" - "$ROOT_DIR/Cargo.lock" - "$ROOT_DIR/server" - "$ROOT_DIR/gigacode" - "$ROOT_DIR/resources/agent-schemas/artifacts" - "$ROOT_DIR/scripts/agent-configs" - "$ROOT_DIR/docker/test-agent/Dockerfile" -) - -release_lock() { - if [[ -d "$LOCK_DIR" ]]; then - rm -rf "$LOCK_DIR" - fi -} - -latest_input_mtime() { - find "${INPUTS[@]}" -type f -exec stat -f '%m' {} + | sort -nr | head -n1 -} - -image_is_ready() { - if ! docker image inspect "$IMAGE_TAG" >/dev/null 2>&1; then - return 1 - fi - - if [[ ! -f "$STAMP_FILE" ]]; then - return 1 - fi - - local stamp_mtime - stamp_mtime="$(stat -f '%m' "$STAMP_FILE")" - local latest_mtime - latest_mtime="$(latest_input_mtime)" - - [[ -n "$latest_mtime" && "$stamp_mtime" -ge "$latest_mtime" ]] -} - -mkdir -p "$ROOT_DIR/.context" - -if image_is_ready; then - printf '%s\n' "$IMAGE_TAG" - exit 0 -fi - -while ! mkdir "$LOCK_DIR" 2>/dev/null; do - sleep 1 -done - -trap release_lock EXIT - -if image_is_ready; then - printf '%s\n' "$IMAGE_TAG" - exit 0 -fi - -docker build \ - --tag "$IMAGE_TAG" \ - --file "$ROOT_DIR/docker/test-agent/Dockerfile" \ - "$ROOT_DIR" \ - >/dev/null - -touch "$STAMP_FILE" - -printf '%s\n' "$IMAGE_TAG" diff --git a/sdks/typescript/tests/helpers/docker.ts b/sdks/typescript/tests/helpers/docker.ts index 71837cc..28c03ad 100644 --- a/sdks/typescript/tests/helpers/docker.ts +++ b/sdks/typescript/tests/helpers/docker.ts @@ -5,9 +5,9 @@ import { fileURLToPath } from "node:url"; const __dirname = dirname(fileURLToPath(import.meta.url)); const REPO_ROOT = resolve(__dirname, "../../../.."); -const ENSURE_IMAGE = resolve(REPO_ROOT, "scripts/test-rig/ensure-image.sh"); const CONTAINER_PORT = 3000; const DEFAULT_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; +const DEFAULT_IMAGE_TAG = "sandbox-agent-test:dev"; const STANDARD_PATHS = new Set([ "/usr/local/sbin", "/usr/local/bin", @@ -184,11 +184,22 @@ function ensureImage(): string { return cachedImage; } - cachedImage = execFileSync("bash", [ENSURE_IMAGE], { - cwd: REPO_ROOT, - encoding: "utf8", - stdio: ["ignore", "pipe", "pipe"], - }).trim(); + cachedImage = process.env.SANDBOX_AGENT_TEST_IMAGE ?? DEFAULT_IMAGE_TAG; + execFileSync( + "docker", + [ + "build", + "--tag", + cachedImage, + "--file", + resolve(REPO_ROOT, "docker/test-agent/Dockerfile"), + REPO_ROOT, + ], + { + cwd: REPO_ROOT, + stdio: ["ignore", "ignore", "pipe"], + }, + ); return cachedImage; } diff --git a/server/packages/sandbox-agent/src/desktop_errors.rs b/server/packages/sandbox-agent/src/desktop_errors.rs index 5a3a016..67f99b9 100644 --- a/server/packages/sandbox-agent/src/desktop_errors.rs +++ b/server/packages/sandbox-agent/src/desktop_errors.rs @@ -29,7 +29,7 @@ impl DesktopProblem { install_command: Option, processes: Vec, ) -> Self { - let message = if missing_dependencies.is_empty() { + let mut message = if missing_dependencies.is_empty() { "Desktop dependencies are not installed".to_string() } else { format!( @@ -37,6 +37,11 @@ impl DesktopProblem { missing_dependencies.join(", ") ) }; + if let Some(command) = install_command.as_ref() { + message.push_str(&format!( + ". Run `{command}` to install them, or install the required tools manually." + )); + } Self::new( 503, "Desktop Dependencies Missing", @@ -186,3 +191,27 @@ impl DesktopProblem { self } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn dependencies_missing_detail_includes_install_command() { + let problem = DesktopProblem::dependencies_missing( + vec!["Xvfb".to_string(), "openbox".to_string()], + Some("sandbox-agent install desktop --yes".to_string()), + Vec::new(), + ); + let details = problem.to_problem_details(); + let detail = details.detail.expect("detail"); + assert!(detail.contains("Desktop dependencies are not installed: Xvfb, openbox")); + assert!(detail.contains("sandbox-agent install desktop --yes")); + assert_eq!( + details.extensions.get("installCommand"), + Some(&Value::String( + "sandbox-agent install desktop --yes".to_string() + )) + ); + } +} diff --git a/server/packages/sandbox-agent/src/desktop_install.rs b/server/packages/sandbox-agent/src/desktop_install.rs index 5b8a306..f6bea21 100644 --- a/server/packages/sandbox-agent/src/desktop_install.rs +++ b/server/packages/sandbox-agent/src/desktop_install.rs @@ -5,6 +5,11 @@ use std::process::Command as ProcessCommand; use clap::ValueEnum; +const AUTOMATIC_INSTALL_SUPPORTED_DISTROS: &str = + "Automatic desktop dependency installation is supported on Debian/Ubuntu (apt), Fedora/RHEL (dnf), and Alpine (apk)."; +const AUTOMATIC_INSTALL_UNSUPPORTED_ENVS: &str = + "Automatic installation is not supported on macOS, Windows, or Linux distributions without apt, dnf, or apk."; + #[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] pub enum DesktopPackageManager { Apt, @@ -20,17 +25,29 @@ pub struct DesktopInstallRequest { pub no_fonts: bool, } +pub(crate) fn desktop_platform_support_message() -> String { + format!("Desktop APIs are only supported on Linux. {AUTOMATIC_INSTALL_SUPPORTED_DISTROS}") +} + +fn linux_install_support_message() -> String { + format!("{AUTOMATIC_INSTALL_SUPPORTED_DISTROS} {AUTOMATIC_INSTALL_UNSUPPORTED_ENVS}") +} + pub fn install_desktop(request: DesktopInstallRequest) -> Result<(), String> { if std::env::consts::OS != "linux" { - return Err( - "desktop installation is only supported on Linux hosts and sandboxes".to_string(), - ); + return Err(format!( + "desktop installation is only supported on Linux. {}", + linux_install_support_message() + )); } let package_manager = match request.package_manager { Some(value) => value, None => detect_package_manager().ok_or_else(|| { - "could not detect a supported package manager (expected apt, dnf, or apk)".to_string() + format!( + "could not detect a supported package manager. {} Install the desktop dependencies manually on this distribution.", + linux_install_support_message() + ) })?, }; @@ -268,6 +285,26 @@ impl fmt::Display for DesktopPackageManager { mod tests { use super::*; + #[test] + fn desktop_platform_support_message_mentions_linux_and_supported_distros() { + let message = desktop_platform_support_message(); + assert!(message.contains("only supported on Linux")); + assert!(message.contains("Debian/Ubuntu (apt)")); + assert!(message.contains("Fedora/RHEL (dnf)")); + assert!(message.contains("Alpine (apk)")); + } + + #[test] + fn linux_install_support_message_mentions_unsupported_environments() { + let message = linux_install_support_message(); + assert!(message.contains("Debian/Ubuntu (apt)")); + assert!(message.contains("Fedora/RHEL (dnf)")); + assert!(message.contains("Alpine (apk)")); + assert!(message.contains("macOS")); + assert!(message.contains("Windows")); + assert!(message.contains("without apt, dnf, or apk")); + } + #[test] fn desktop_packages_support_no_fonts() { let packages = desktop_packages(DesktopPackageManager::Apt, true); diff --git a/server/packages/sandbox-agent/src/desktop_runtime.rs b/server/packages/sandbox-agent/src/desktop_runtime.rs index f434f83..d0623ba 100644 --- a/server/packages/sandbox-agent/src/desktop_runtime.rs +++ b/server/packages/sandbox-agent/src/desktop_runtime.rs @@ -9,6 +9,7 @@ use tokio::process::{Child, Command}; use tokio::sync::Mutex; use crate::desktop_errors::DesktopProblem; +use crate::desktop_install::desktop_platform_support_message; use crate::desktop_types::{ DesktopActionResponse, DesktopDisplayInfoResponse, DesktopErrorInfo, DesktopKeyboardPressRequest, DesktopKeyboardTypeRequest, DesktopMouseButton, @@ -138,9 +139,7 @@ impl DesktopRuntime { let mut state = self.inner.lock().await; if !self.platform_supported() { - let problem = DesktopProblem::unsupported_platform( - "Desktop APIs are only supported on Linux hosts and sandboxes", - ); + let problem = DesktopProblem::unsupported_platform(desktop_platform_support_message()); self.record_problem_locked(&mut state, &problem); state.state = DesktopState::Failed; return Err(problem); @@ -182,6 +181,7 @@ impl DesktopRuntime { height, dpi: Some(dpi), }; + let environment = self.base_environment(&display)?; state.state = DesktopState::Starting; state.display_num = display_num; @@ -189,13 +189,21 @@ impl DesktopRuntime { state.resolution = Some(resolution.clone()); state.started_at = None; state.last_error = None; - state.environment = self.base_environment(&display)?; + state.environment = environment; state.install_command = None; - self.start_dbus_locked(&mut state).await?; - self.start_xvfb_locked(&mut state, &resolution).await?; - self.wait_for_socket(display_num).await?; - self.start_openbox_locked(&mut state).await?; + if let Err(problem) = self.start_dbus_locked(&mut state).await { + return Err(self.fail_start_locked(&mut state, problem).await); + } + if let Err(problem) = self.start_xvfb_locked(&mut state, &resolution).await { + return Err(self.fail_start_locked(&mut state, problem).await); + } + if let Err(problem) = self.wait_for_socket(display_num).await { + return Err(self.fail_start_locked(&mut state, problem).await); + } + if let Err(problem) = self.start_openbox_locked(&mut state).await { + return Err(self.fail_start_locked(&mut state, problem).await); + } let ready = DesktopReadyContext { display, @@ -203,23 +211,15 @@ impl DesktopRuntime { resolution, }; - let display_info = self - .query_display_info_locked(&state, &ready) - .await - .map_err(|problem| { - self.record_problem_locked(&mut state, &problem); - state.state = DesktopState::Failed; - problem - })?; + let display_info = match self.query_display_info_locked(&state, &ready).await { + Ok(display_info) => display_info, + Err(problem) => return Err(self.fail_start_locked(&mut state, problem).await), + }; state.resolution = Some(display_info.resolution.clone()); - self.capture_screenshot_locked(&state, None) - .await - .map_err(|problem| { - self.record_problem_locked(&mut state, &problem); - state.state = DesktopState::Failed; - problem - })?; + if let Err(problem) = self.capture_screenshot_locked(&state, None).await { + return Err(self.fail_start_locked(&mut state, problem).await); + } state.state = DesktopState::Active; state.started_at = Some(chrono::Utc::now().to_rfc3339()); @@ -403,12 +403,7 @@ impl DesktopRuntime { let mut state = self.inner.lock().await; let ready = self.ensure_ready_locked(&mut state).await?; - let args = vec![ - "type".to_string(), - "--delay".to_string(), - request.delay_ms.unwrap_or(10).to_string(), - request.text, - ]; + let args = type_text_args(request.text, request.delay_ms.unwrap_or(10)); self.run_input_command_locked(&state, &ready, args).await?; Ok(DesktopActionResponse { ok: true }) } @@ -423,7 +418,7 @@ impl DesktopRuntime { let mut state = self.inner.lock().await; let ready = self.ensure_ready_locked(&mut state).await?; - let args = vec!["key".to_string(), request.key]; + let args = press_key_args(request.key); self.run_input_command_locked(&state, &ready, args).await?; Ok(DesktopActionResponse { ok: true }) } @@ -496,10 +491,8 @@ impl DesktopRuntime { if !self.platform_supported() { state.state = DesktopState::Failed; state.last_error = Some( - DesktopProblem::unsupported_platform( - "Desktop APIs are only supported on Linux hosts and sandboxes", - ) - .to_error_info(), + DesktopProblem::unsupported_platform(desktop_platform_support_message()) + .to_error_info(), ); return; } @@ -527,6 +520,15 @@ impl DesktopRuntime { return; } + if state.state == DesktopState::Failed + && state.display.is_none() + && state.xvfb.is_none() + && state.openbox.is_none() + && state.dbus_pid.is_none() + { + return; + } + let Some(display) = state.display.clone() else { state.state = DesktopState::Failed; state.last_error = Some( @@ -737,6 +739,24 @@ impl DesktopRuntime { } } + async fn fail_start_locked( + &self, + state: &mut DesktopRuntimeStateData, + problem: DesktopProblem, + ) -> DesktopProblem { + self.record_problem_locked(state, &problem); + self.write_runtime_log_locked(state, "desktop runtime startup failed; cleaning up"); + self.stop_openbox_locked(state).await; + self.stop_xvfb_locked(state).await; + self.stop_dbus_locked(state); + state.state = DesktopState::Failed; + state.display = None; + state.resolution = None; + state.started_at = None; + state.environment.clear(); + problem + } + async fn capture_screenshot_locked( &self, state: &DesktopRuntimeStateData, @@ -1274,6 +1294,20 @@ fn parse_mouse_position(bytes: &[u8]) -> Result Vec { + vec![ + "type".to_string(), + "--delay".to_string(), + delay_ms.to_string(), + "--".to_string(), + text, + ] +} + +fn press_key_args(key: String) -> Vec { + vec!["key".to_string(), "--".to_string(), key] +} + fn validate_start_request(width: u32, height: u32, dpi: u32) -> Result<(), DesktopProblem> { if width == 0 || height == 0 { return Err(DesktopProblem::invalid_action( @@ -1356,4 +1390,16 @@ mod tests { let error = validate_png(b"not png").expect_err("validation should fail"); assert!(error.contains("PNG")); } + + #[test] + fn type_text_args_insert_double_dash_before_user_text() { + let args = type_text_args("--help".to_string(), 5); + assert_eq!(args, vec!["type", "--delay", "5", "--", "--help"]); + } + + #[test] + fn press_key_args_insert_double_dash_before_user_key() { + let args = press_key_args("--help".to_string()); + assert_eq!(args, vec!["key", "--", "--help"]); + } } diff --git a/server/packages/sandbox-agent/tests/support/docker.rs b/server/packages/sandbox-agent/tests/support/docker.rs index dd32a01..9305d95 100644 --- a/server/packages/sandbox-agent/tests/support/docker.rs +++ b/server/packages/sandbox-agent/tests/support/docker.rs @@ -10,10 +10,12 @@ use std::thread; use std::time::{Duration, SystemTime, UNIX_EPOCH}; use sandbox_agent::router::AuthConfig; +use serial_test::serial; use tempfile::TempDir; const CONTAINER_PORT: u16 = 3000; const DEFAULT_PATH: &str = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"; +const DEFAULT_IMAGE_TAG: &str = "sandbox-agent-test:dev"; const STANDARD_PATHS: &[&str] = &[ "/usr/local/sbin", "/usr/local/bin", @@ -183,24 +185,26 @@ fn ensure_test_image() -> String { IMAGE_TAG .get_or_init(|| { let repo_root = repo_root(); - let script = repo_root - .join("scripts") - .join("test-rig") - .join("ensure-image.sh"); - let output = Command::new("/bin/bash") - .arg(&script) + let image_tag = std::env::var("SANDBOX_AGENT_TEST_IMAGE") + .unwrap_or_else(|_| DEFAULT_IMAGE_TAG.to_string()); + let output = Command::new(docker_bin()) + .args(["build", "--tag", &image_tag, "--file"]) + .arg( + repo_root + .join("docker") + .join("test-agent") + .join("Dockerfile"), + ) + .arg(&repo_root) .output() - .expect("run ensure-image.sh"); + .expect("build sandbox-agent test image"); if !output.status.success() { panic!( "failed to build sandbox-agent test image: {}", String::from_utf8_lossy(&output.stderr) ); } - String::from_utf8(output.stdout) - .expect("image tag utf8") - .trim() - .to_string() + image_tag }) .clone() } @@ -235,12 +239,6 @@ fn build_env( "LOCALAPPDATA".to_string(), layout.local_appdata.to_string_lossy().to_string(), ); - if let Some(value) = std::env::var_os("XDG_STATE_HOME") { - env.insert( - "XDG_STATE_HOME".to_string(), - PathBuf::from(value).to_string_lossy().to_string(), - ); - } for (key, value) in std::env::vars() { if key == "PATH" { @@ -549,3 +547,47 @@ fn docker_bin() -> &'static Path { }) .as_path() } + +#[cfg(test)] +mod tests { + use super::*; + + struct EnvVarGuard { + key: &'static str, + old: Option, + } + + impl EnvVarGuard { + fn set(key: &'static str, value: &Path) -> Self { + let old = std::env::var_os(key); + std::env::set_var(key, value); + Self { key, old } + } + } + + impl Drop for EnvVarGuard { + fn drop(&mut self) { + match self.old.as_ref() { + Some(value) => std::env::set_var(self.key, value), + None => std::env::remove_var(self.key), + } + } + } + + #[test] + #[serial] + fn build_env_keeps_test_local_xdg_state_home() { + let root = tempfile::tempdir().expect("create docker support tempdir"); + let host_state = tempfile::tempdir().expect("create host xdg state tempdir"); + let _guard = EnvVarGuard::set("XDG_STATE_HOME", host_state.path()); + + let layout = TestLayout::new(root.path()); + layout.create(); + + let env = build_env(&layout, &AuthConfig::disabled(), &TestAppOptions::default()); + assert_eq!( + env.get("XDG_STATE_HOME"), + Some(&layout.xdg_state_home.to_string_lossy().to_string()) + ); + } +}