feat: desktop computer-use APIs with windows, launch/open, and neko streaming

Adds desktop computer-use endpoints (windows, screenshots, mouse/keyboard,
launch/open), enhances neko-based streaming integration, updates inspector
UI with desktop debug tab, and adds common software test infrastructure.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Nathan Flurry 2026-03-17 02:35:52 -07:00
parent 2d8508d6e2
commit dff7614b11
17 changed files with 4045 additions and 136 deletions

View file

@ -113,6 +113,40 @@ impl DesktopProblem {
.with_processes(processes)
}
pub fn window_not_found(message: impl Into<String>) -> Self {
Self::new(404, "Window Not Found", "window_not_found", message)
}
pub fn no_focused_window() -> Self {
Self::new(
404,
"No Focused Window",
"no_focused_window",
"No window currently has focus",
)
}
pub fn stream_already_active(message: impl Into<String>) -> Self {
Self::new(
409,
"Stream Already Active",
"stream_already_active",
message,
)
}
pub fn stream_not_active(message: impl Into<String>) -> Self {
Self::new(409, "Stream Not Active", "stream_not_active", message)
}
pub fn clipboard_failed(message: impl Into<String>) -> Self {
Self::new(500, "Clipboard Failed", "clipboard_failed", message)
}
pub fn app_not_found(message: impl Into<String>) -> Self {
Self::new(404, "App Not Found", "app_not_found", message)
}
pub fn to_problem_details(&self) -> ProblemDetails {
let mut extensions = Map::new();
extensions.insert("code".to_string(), Value::String(self.code.to_string()));

View file

@ -74,6 +74,8 @@ struct DesktopRuntimeStateData {
xvfb: Option<ManagedDesktopProcess>,
openbox: Option<ManagedDesktopProcess>,
dbus_pid: Option<u32>,
streaming_config: Option<crate::desktop_streaming::StreamingConfig>,
recording_fps: Option<u32>,
}
#[derive(Debug)]
@ -138,26 +140,10 @@ impl DesktopScreenshotOptions {
impl Default for DesktopRuntimeConfig {
fn default() -> Self {
let display_num = std::env::var("SANDBOX_AGENT_DESKTOP_DISPLAY_NUM")
.ok()
.and_then(|value| value.parse::<i32>().ok())
.filter(|value| *value > 0)
.unwrap_or(DEFAULT_DISPLAY_NUM);
let state_dir = std::env::var("SANDBOX_AGENT_DESKTOP_STATE_DIR")
.ok()
.map(PathBuf::from)
.unwrap_or_else(default_state_dir);
let assume_linux_for_tests = std::env::var("SANDBOX_AGENT_DESKTOP_TEST_ASSUME_LINUX")
.ok()
.map(|value| value == "1" || value.eq_ignore_ascii_case("true"))
.unwrap_or(false);
Self {
state_dir,
display_num,
assume_linux_for_tests,
state_dir: default_state_dir(),
display_num: DEFAULT_DISPLAY_NUM,
assume_linux_for_tests: false,
}
}
}
@ -189,6 +175,8 @@ impl DesktopRuntime {
xvfb: None,
openbox: None,
dbus_pid: None,
streaming_config: None,
recording_fps: None,
})),
config,
}
@ -200,6 +188,15 @@ impl DesktopRuntime {
let mut response = self.snapshot_locked(&state);
drop(state);
self.append_neko_process(&mut response).await;
// Include the current window list when the desktop is active so callers
// get windows for free when polling status (avoids a separate request).
if response.state == DesktopState::Active {
if let Ok(window_list) = self.list_windows().await {
response.windows = window_list.windows;
}
}
response
}
@ -248,7 +245,9 @@ impl DesktopRuntime {
let dpi = request.dpi.unwrap_or(DEFAULT_DPI);
validate_start_request(width, height, dpi)?;
let display_num = self.choose_display_num()?;
// Override display_num if provided in request
let display_num =
self.choose_display_num_from(request.display_num.unwrap_or(self.config.display_num))?;
let display = format!(":{display_num}");
let resolution = DesktopResolution {
width,
@ -257,6 +256,29 @@ impl DesktopRuntime {
};
let environment = self.base_environment(&display)?;
// Store streaming and recording config for later use
state.streaming_config = if request.stream_video_codec.is_some()
|| request.stream_audio_codec.is_some()
|| request.stream_frame_rate.is_some()
|| request.webrtc_port_range.is_some()
{
Some(crate::desktop_streaming::StreamingConfig {
video_codec: request
.stream_video_codec
.unwrap_or_else(|| "vp8".to_string()),
audio_codec: request
.stream_audio_codec
.unwrap_or_else(|| "opus".to_string()),
frame_rate: request.stream_frame_rate.unwrap_or(30).clamp(1, 60),
webrtc_port_range: request
.webrtc_port_range
.unwrap_or_else(|| "59050-59070".to_string()),
})
} else {
None
};
state.recording_fps = request.recording_fps.map(|fps| fps.clamp(1, 60));
state.state = DesktopState::Starting;
state.display_num = display_num;
state.display = Some(display.clone());
@ -344,6 +366,8 @@ impl DesktopRuntime {
state.missing_dependencies = self.detect_missing_dependencies();
state.install_command = self.install_command_for(&state.missing_dependencies);
state.environment.clear();
state.streaming_config = None;
state.recording_fps = None;
let mut response = self.snapshot_locked(&state);
drop(state);
@ -360,11 +384,17 @@ impl DesktopRuntime {
query: DesktopScreenshotQuery,
) -> Result<DesktopScreenshotData, DesktopProblem> {
let options = screenshot_options(query.format, query.quality, query.scale)?;
let show_cursor = query.show_cursor.unwrap_or(false);
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let bytes = self
let mut bytes = self
.capture_screenshot_locked(&state, Some(&ready), &options)
.await?;
if show_cursor {
bytes = self
.composite_cursor(&state, &ready, bytes, &options)
.await?;
}
Ok(DesktopScreenshotData {
bytes,
content_type: options.content_type(),
@ -377,12 +407,27 @@ impl DesktopRuntime {
) -> Result<DesktopScreenshotData, DesktopProblem> {
validate_region(&query)?;
let options = screenshot_options(query.format, query.quality, query.scale)?;
let show_cursor = query.show_cursor.unwrap_or(false);
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let crop = format!("{}x{}+{}+{}", query.width, query.height, query.x, query.y);
let bytes = self
let mut bytes = self
.capture_screenshot_with_crop_locked(&state, &ready, &crop, &options)
.await?;
if show_cursor {
bytes = self
.composite_cursor_region(
&state,
&ready,
bytes,
&options,
query.x,
query.y,
query.width,
query.height,
)
.await?;
}
Ok(DesktopScreenshotData {
bytes,
content_type: options.content_type(),
@ -598,6 +643,21 @@ impl DesktopRuntime {
let (x, y, width, height) = self
.window_geometry_locked(&state, &ready, &window_id)
.await?;
let is_active = active_window_id
.as_deref()
.map(|active| active == window_id)
.unwrap_or(false);
// Filter out noise: window-manager chrome, toolkit internals, and
// invisible helper windows. Always keep the active window so the
// caller can track focus even when the WM itself is focused.
if !is_active {
let trimmed = title.trim();
if trimmed.is_empty() || trimmed == "Openbox" || (width < 120 && height < 80) {
continue;
}
}
windows.push(DesktopWindowInfo {
id: window_id.clone(),
title,
@ -605,10 +665,7 @@ impl DesktopRuntime {
y,
width,
height,
is_active: active_window_id
.as_deref()
.map(|active| active == window_id)
.unwrap_or(false),
is_active,
});
}
Ok(DesktopWindowListResponse { windows })
@ -658,9 +715,10 @@ impl DesktopRuntime {
})?;
let environment = state.environment.clone();
let display = display.to_string();
let streaming_config = state.streaming_config.clone();
drop(state);
self.streaming_manager
.start(&display, resolution, &environment)
.start(&display, resolution, &environment, streaming_config, None)
.await
}
@ -1503,14 +1561,21 @@ impl DesktopRuntime {
}
fn choose_display_num(&self) -> Result<i32, DesktopProblem> {
self.choose_display_num_from(self.config.display_num)
}
fn choose_display_num_from(&self, start: i32) -> Result<i32, DesktopProblem> {
if start <= 0 {
return Err(DesktopProblem::invalid_action("displayNum must be > 0"));
}
for offset in 0..MAX_DISPLAY_PROBE {
let candidate = self.config.display_num + offset;
let candidate = start + offset;
if !socket_path(candidate).exists() {
return Ok(candidate);
}
}
Err(DesktopProblem::runtime_failed(
"unable to find an available X display starting at :99",
format!("unable to find an available X display starting at :{start}"),
None,
Vec::new(),
))
@ -1579,6 +1644,7 @@ impl DesktopRuntime {
install_command: state.install_command.clone(),
processes: self.processes_locked(state),
runtime_log_path: Some(state.runtime_log_path.to_string_lossy().to_string()),
windows: Vec::new(),
}
}
@ -1656,6 +1722,391 @@ impl DesktopRuntime {
.open(&state.runtime_log_path)
.and_then(|mut file| std::io::Write::write_all(&mut file, line.as_bytes()));
}
pub async fn get_clipboard(
&self,
selection: Option<String>,
) -> Result<crate::desktop_types::DesktopClipboardResponse, DesktopProblem> {
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let sel = selection.unwrap_or_else(|| "clipboard".to_string());
let args = vec!["-selection".to_string(), sel.clone(), "-o".to_string()];
let output = run_command_output("xclip", &args, &ready.environment, INPUT_TIMEOUT)
.await
.map_err(|err| {
DesktopProblem::clipboard_failed(format!("failed to read clipboard: {err}"))
})?;
if !output.status.success() {
// Empty clipboard is not an error
return Ok(crate::desktop_types::DesktopClipboardResponse {
text: String::new(),
selection: sel,
});
}
Ok(crate::desktop_types::DesktopClipboardResponse {
text: String::from_utf8_lossy(&output.stdout).to_string(),
selection: sel,
})
}
pub async fn set_clipboard(
&self,
request: crate::desktop_types::DesktopClipboardWriteRequest,
) -> Result<DesktopActionResponse, DesktopProblem> {
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let sel = request.selection.unwrap_or_else(|| "clipboard".to_string());
let selections: Vec<String> = if sel == "both" {
vec!["clipboard".to_string(), "primary".to_string()]
} else {
vec![sel]
};
for selection in &selections {
let args = vec![
"-selection".to_string(),
selection.clone(),
"-i".to_string(),
];
let output = run_command_output_with_stdin(
"xclip",
&args,
&ready.environment,
INPUT_TIMEOUT,
request.text.as_bytes().to_vec(),
)
.await
.map_err(|err| {
DesktopProblem::clipboard_failed(format!("failed to write clipboard: {err}"))
})?;
if !output.status.success() {
return Err(DesktopProblem::clipboard_failed(format!(
"clipboard write failed: {}",
String::from_utf8_lossy(&output.stderr).trim()
)));
}
}
Ok(DesktopActionResponse { ok: true })
}
pub async fn focused_window(&self) -> Result<DesktopWindowInfo, DesktopProblem> {
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let window_id = self
.active_window_id_locked(&state, &ready)
.await?
.ok_or_else(DesktopProblem::no_focused_window)?;
let title = self.window_title_locked(&state, &ready, &window_id).await?;
let (x, y, width, height) = self
.window_geometry_locked(&state, &ready, &window_id)
.await?;
Ok(DesktopWindowInfo {
id: window_id,
title,
x,
y,
width,
height,
is_active: true,
})
}
pub async fn focus_window(&self, window_id: &str) -> Result<DesktopWindowInfo, DesktopProblem> {
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let args = vec![
"windowactivate".to_string(),
"--sync".to_string(),
window_id.to_string(),
"windowfocus".to_string(),
"--sync".to_string(),
window_id.to_string(),
];
self.run_input_command_locked(&state, &ready, args)
.await
.map_err(|_| {
DesktopProblem::window_not_found(format!("Window {window_id} not found"))
})?;
self.window_info_locked(&state, &ready, window_id).await
}
pub async fn move_window(
&self,
window_id: &str,
request: crate::desktop_types::DesktopWindowMoveRequest,
) -> Result<DesktopWindowInfo, DesktopProblem> {
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let args = vec![
"windowmove".to_string(),
window_id.to_string(),
request.x.to_string(),
request.y.to_string(),
];
self.run_input_command_locked(&state, &ready, args)
.await
.map_err(|_| {
DesktopProblem::window_not_found(format!("Window {window_id} not found"))
})?;
self.window_info_locked(&state, &ready, window_id).await
}
pub async fn resize_window(
&self,
window_id: &str,
request: crate::desktop_types::DesktopWindowResizeRequest,
) -> Result<DesktopWindowInfo, DesktopProblem> {
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let args = vec![
"windowsize".to_string(),
window_id.to_string(),
request.width.to_string(),
request.height.to_string(),
];
self.run_input_command_locked(&state, &ready, args)
.await
.map_err(|_| {
DesktopProblem::window_not_found(format!("Window {window_id} not found"))
})?;
self.window_info_locked(&state, &ready, window_id).await
}
async fn window_info_locked(
&self,
state: &DesktopRuntimeStateData,
ready: &DesktopReadyContext,
window_id: &str,
) -> Result<DesktopWindowInfo, DesktopProblem> {
let active_id = self.active_window_id_locked(state, ready).await?;
let title = self.window_title_locked(state, ready, window_id).await?;
let (x, y, width, height) = self.window_geometry_locked(state, ready, window_id).await?;
Ok(DesktopWindowInfo {
id: window_id.to_string(),
title,
x,
y,
width,
height,
is_active: active_id
.as_deref()
.map(|a| a == window_id)
.unwrap_or(false),
})
}
pub async fn launch_app(
&self,
request: crate::desktop_types::DesktopLaunchRequest,
) -> Result<crate::desktop_types::DesktopLaunchResponse, DesktopProblem> {
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
// Verify the app exists
if find_binary(&request.app).is_none() {
// Also try which via the desktop environment
let check = run_command_output(
"which",
&[request.app.clone()],
&ready.environment,
INPUT_TIMEOUT,
)
.await;
if check.is_err() || !check.as_ref().unwrap().status.success() {
return Err(DesktopProblem::app_not_found(format!(
"Application '{}' not found in PATH",
request.app
)));
}
}
let args = request.args.unwrap_or_default();
let snapshot = self
.process_runtime
.start_process(ProcessStartSpec {
command: request.app.clone(),
args,
cwd: None,
env: ready.environment.clone(),
tty: false,
interactive: false,
owner: ProcessOwner::Desktop,
restart_policy: None,
})
.await
.map_err(|err| {
DesktopProblem::runtime_failed(
format!("failed to launch {}: {err}", request.app),
None,
self.processes_locked(&state),
)
})?;
let mut window_id = None;
if request.wait.unwrap_or(false) {
if let Some(pid) = snapshot.pid {
// Poll for window to appear
let deadline = tokio::time::Instant::now() + Duration::from_secs(5);
let search_args = vec!["search".to_string(), "--pid".to_string(), pid.to_string()];
loop {
let output = run_command_output(
"xdotool",
&search_args,
&ready.environment,
INPUT_TIMEOUT,
)
.await;
if let Ok(ref out) = output {
if out.status.success() {
let id = String::from_utf8_lossy(&out.stdout)
.lines()
.next()
.map(|s| s.trim().to_string());
if id.as_ref().is_some_and(|s| !s.is_empty()) {
window_id = id;
break;
}
}
}
if tokio::time::Instant::now() >= deadline {
break;
}
tokio::time::sleep(Duration::from_millis(200)).await;
}
}
}
Ok(crate::desktop_types::DesktopLaunchResponse {
process_id: snapshot.id,
pid: snapshot.pid,
window_id,
})
}
pub async fn open_target(
&self,
request: crate::desktop_types::DesktopOpenRequest,
) -> Result<crate::desktop_types::DesktopOpenResponse, DesktopProblem> {
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let snapshot = self
.process_runtime
.start_process(ProcessStartSpec {
command: "xdg-open".to_string(),
args: vec![request.target],
cwd: None,
env: ready.environment.clone(),
tty: false,
interactive: false,
owner: ProcessOwner::Desktop,
restart_policy: None,
})
.await
.map_err(|err| {
DesktopProblem::runtime_failed(
format!("failed to open target: {err}"),
None,
self.processes_locked(&state),
)
})?;
Ok(crate::desktop_types::DesktopOpenResponse {
process_id: snapshot.id,
pid: snapshot.pid,
})
}
async fn composite_cursor(
&self,
state: &DesktopRuntimeStateData,
ready: &DesktopReadyContext,
screenshot_bytes: Vec<u8>,
options: &DesktopScreenshotOptions,
) -> Result<Vec<u8>, DesktopProblem> {
let pos = self.mouse_position_locked(state, ready).await?;
self.draw_cursor_on_image(screenshot_bytes, pos.x, pos.y, options, &ready.environment)
.await
}
async fn composite_cursor_region(
&self,
state: &DesktopRuntimeStateData,
ready: &DesktopReadyContext,
screenshot_bytes: Vec<u8>,
options: &DesktopScreenshotOptions,
region_x: i32,
region_y: i32,
_region_width: u32,
_region_height: u32,
) -> Result<Vec<u8>, DesktopProblem> {
let pos = self.mouse_position_locked(state, ready).await?;
// Adjust cursor position relative to the region
let cursor_x = pos.x - region_x;
let cursor_y = pos.y - region_y;
if cursor_x < 0 || cursor_y < 0 {
// Cursor is outside the region, return screenshot as-is
return Ok(screenshot_bytes);
}
self.draw_cursor_on_image(
screenshot_bytes,
cursor_x,
cursor_y,
options,
&ready.environment,
)
.await
}
async fn draw_cursor_on_image(
&self,
image_bytes: Vec<u8>,
x: i32,
y: i32,
options: &DesktopScreenshotOptions,
environment: &HashMap<String, String>,
) -> Result<Vec<u8>, DesktopProblem> {
// Draw a crosshair cursor using ImageMagick convert
let draw_cmd = format!(
"stroke red stroke-width 2 line {},{},{},{} line {},{},{},{}",
x - 10,
y,
x + 10,
y,
x,
y - 10,
x,
y + 10
);
let args = vec![
"-".to_string(), // read from stdin
"-draw".to_string(),
draw_cmd,
options.output_arg().to_string(),
];
let output = run_command_output_with_stdin(
"convert",
&args,
environment,
SCREENSHOT_TIMEOUT,
image_bytes.clone(),
)
.await
.map_err(|err| {
DesktopProblem::screenshot_failed(
format!("failed to composite cursor: {err}"),
Vec::new(),
)
})?;
if !output.status.success() {
// Fall back to returning the original screenshot without cursor
return Ok(image_bytes);
}
Ok(output.stdout)
}
pub async fn stream_status(&self) -> DesktopStreamStatusResponse {
self.streaming_manager.status().await
}
}
fn desktop_problem_to_sandbox_error(problem: DesktopProblem) -> SandboxError {

View file

@ -21,13 +21,32 @@ const NEKO_READY_TIMEOUT: Duration = Duration::from_secs(15);
/// How long between readiness polls.
const NEKO_READY_POLL: Duration = Duration::from_millis(300);
#[derive(Debug, Clone)]
pub struct StreamingConfig {
pub video_codec: String,
pub audio_codec: String,
pub frame_rate: u32,
pub webrtc_port_range: String,
}
impl Default for StreamingConfig {
fn default() -> Self {
Self {
video_codec: "vp8".to_string(),
audio_codec: "opus".to_string(),
frame_rate: 30,
webrtc_port_range: NEKO_EPR.to_string(),
}
}
}
#[derive(Debug, Clone)]
pub struct DesktopStreamingManager {
inner: Arc<Mutex<DesktopStreamingState>>,
process_runtime: Arc<ProcessRuntime>,
}
#[derive(Debug, Default)]
#[derive(Debug)]
struct DesktopStreamingState {
active: bool,
process_id: Option<String>,
@ -37,6 +56,23 @@ struct DesktopStreamingState {
neko_session_cookie: Option<String>,
display: Option<String>,
resolution: Option<DesktopResolution>,
streaming_config: StreamingConfig,
window_id: Option<String>,
}
impl Default for DesktopStreamingState {
fn default() -> Self {
Self {
active: false,
process_id: None,
neko_base_url: None,
neko_session_cookie: None,
display: None,
resolution: None,
streaming_config: StreamingConfig::default(),
window_id: None,
}
}
}
impl DesktopStreamingManager {
@ -53,11 +89,18 @@ impl DesktopStreamingManager {
display: &str,
resolution: DesktopResolution,
environment: &HashMap<String, String>,
config: Option<StreamingConfig>,
window_id: Option<String>,
) -> Result<DesktopStreamStatusResponse, SandboxError> {
let config = config.unwrap_or_default();
let mut state = self.inner.lock().await;
if state.active {
return Ok(DesktopStreamStatusResponse { active: true });
return Ok(DesktopStreamStatusResponse {
active: true,
window_id: state.window_id.clone(),
process_id: state.process_id.clone(),
});
}
// Stop any stale process.
@ -72,7 +115,10 @@ impl DesktopStreamingManager {
env.insert("DISPLAY".to_string(), display.to_string());
let bind_addr = format!("0.0.0.0:{}", NEKO_INTERNAL_PORT);
let screen = format!("{}x{}@30", resolution.width, resolution.height);
let screen = format!(
"{}x{}@{}",
resolution.width, resolution.height, config.frame_rate
);
let snapshot = self
.process_runtime
@ -89,11 +135,11 @@ impl DesktopStreamingManager {
"--capture.video.display".to_string(),
display.to_string(),
"--capture.video.codec".to_string(),
"vp8".to_string(),
config.video_codec.clone(),
"--capture.audio.codec".to_string(),
"opus".to_string(),
config.audio_codec.clone(),
"--webrtc.epr".to_string(),
NEKO_EPR.to_string(),
config.webrtc_port_range.clone(),
"--webrtc.icelite".to_string(),
"--webrtc.nat1to1".to_string(),
"127.0.0.1".to_string(),
@ -117,10 +163,13 @@ impl DesktopStreamingManager {
})?;
let neko_base = format!("http://127.0.0.1:{}", NEKO_INTERNAL_PORT);
let process_id_clone = snapshot.id.clone();
state.process_id = Some(snapshot.id.clone());
state.neko_base_url = Some(neko_base.clone());
state.display = Some(display.to_string());
state.resolution = Some(resolution);
state.streaming_config = config;
state.window_id = window_id;
state.active = true;
// Drop the lock before waiting for readiness.
@ -183,7 +232,15 @@ impl DesktopStreamingManager {
state.neko_session_cookie = Some(cookie.clone());
}
Ok(DesktopStreamStatusResponse { active: true })
let state = self.inner.lock().await;
let state_window_id = state.window_id.clone();
drop(state);
Ok(DesktopStreamStatusResponse {
active: true,
window_id: state_window_id,
process_id: Some(process_id_clone),
})
}
/// Stop streaming and tear down neko subprocess.
@ -200,7 +257,21 @@ impl DesktopStreamingManager {
state.neko_session_cookie = None;
state.display = None;
state.resolution = None;
DesktopStreamStatusResponse { active: false }
state.window_id = None;
DesktopStreamStatusResponse {
active: false,
window_id: None,
process_id: None,
}
}
pub async fn status(&self) -> DesktopStreamStatusResponse {
let state = self.inner.lock().await;
DesktopStreamStatusResponse {
active: state.active,
window_id: state.window_id.clone(),
process_id: state.process_id.clone(),
}
}
pub async fn ensure_active(&self) -> Result<(), SandboxError> {

View file

@ -60,6 +60,9 @@ pub struct DesktopStatusResponse {
pub processes: Vec<DesktopProcessInfo>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub runtime_log_path: Option<String>,
/// Current visible windows (included when the desktop is active).
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub windows: Vec<DesktopWindowInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)]
@ -71,6 +74,20 @@ pub struct DesktopStartRequest {
pub height: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dpi: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub display_num: Option<i32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub state_dir: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub stream_video_codec: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub stream_audio_codec: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub stream_frame_rate: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub webrtc_port_range: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub recording_fps: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)]
@ -82,6 +99,8 @@ pub struct DesktopScreenshotQuery {
pub quality: Option<u8>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scale: Option<f32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub show_cursor: Option<bool>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
@ -105,6 +124,8 @@ pub struct DesktopRegionScreenshotQuery {
pub quality: Option<u8>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scale: Option<f32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub show_cursor: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
@ -299,4 +320,78 @@ pub struct DesktopRecordingListResponse {
#[serde(rename_all = "camelCase")]
pub struct DesktopStreamStatusResponse {
pub active: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub window_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub process_id: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopClipboardResponse {
pub text: String,
pub selection: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)]
#[serde(rename_all = "camelCase")]
pub struct DesktopClipboardQuery {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub selection: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopClipboardWriteRequest {
pub text: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub selection: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopLaunchRequest {
pub app: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub args: Option<Vec<String>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub wait: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopLaunchResponse {
pub process_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub pid: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub window_id: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopOpenRequest {
pub target: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopOpenResponse {
pub process_id: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub pid: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopWindowMoveRequest {
pub x: i32,
pub y: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopWindowResizeRequest {
pub width: u32,
pub height: u32,
}

View file

@ -216,6 +216,28 @@ pub fn build_router_with_state(shared: Arc<AppState>) -> (Router, Arc<AppState>)
.route("/desktop/keyboard/up", post(post_v1_desktop_keyboard_up))
.route("/desktop/display/info", get(get_v1_desktop_display_info))
.route("/desktop/windows", get(get_v1_desktop_windows))
.route(
"/desktop/windows/focused",
get(get_v1_desktop_windows_focused),
)
.route(
"/desktop/windows/:id/focus",
post(post_v1_desktop_window_focus),
)
.route(
"/desktop/windows/:id/move",
post(post_v1_desktop_window_move),
)
.route(
"/desktop/windows/:id/resize",
post(post_v1_desktop_window_resize),
)
.route(
"/desktop/clipboard",
get(get_v1_desktop_clipboard).post(post_v1_desktop_clipboard),
)
.route("/desktop/launch", post(post_v1_desktop_launch))
.route("/desktop/open", post(post_v1_desktop_open))
.route(
"/desktop/recording/start",
post(post_v1_desktop_recording_start),
@ -235,6 +257,7 @@ pub fn build_router_with_state(shared: Arc<AppState>) -> (Router, Arc<AppState>)
)
.route("/desktop/stream/start", post(post_v1_desktop_stream_start))
.route("/desktop/stream/stop", post(post_v1_desktop_stream_stop))
.route("/desktop/stream/status", get(get_v1_desktop_stream_status))
.route("/desktop/stream/signaling", get(get_v1_desktop_stream_ws))
.route("/agents", get(get_v1_agents))
.route("/agents/:agent", get(get_v1_agent))
@ -405,6 +428,15 @@ pub async fn shutdown_servers(state: &Arc<AppState>) {
post_v1_desktop_keyboard_up,
get_v1_desktop_display_info,
get_v1_desktop_windows,
get_v1_desktop_windows_focused,
post_v1_desktop_window_focus,
post_v1_desktop_window_move,
post_v1_desktop_window_resize,
get_v1_desktop_clipboard,
post_v1_desktop_clipboard,
post_v1_desktop_launch,
post_v1_desktop_open,
get_v1_desktop_stream_status,
post_v1_desktop_recording_start,
post_v1_desktop_recording_stop,
get_v1_desktop_recordings,
@ -483,6 +515,15 @@ pub async fn shutdown_servers(state: &Arc<AppState>) {
DesktopRecordingInfo,
DesktopRecordingListResponse,
DesktopStreamStatusResponse,
DesktopClipboardResponse,
DesktopClipboardQuery,
DesktopClipboardWriteRequest,
DesktopLaunchRequest,
DesktopLaunchResponse,
DesktopOpenRequest,
DesktopOpenResponse,
DesktopWindowMoveRequest,
DesktopWindowResizeRequest,
ServerStatus,
ServerStatusInfo,
AgentCapabilities,
@ -1029,6 +1070,193 @@ async fn get_v1_desktop_windows(
Ok(Json(windows))
}
/// Get the currently focused desktop window.
///
/// Returns information about the window that currently has input focus.
#[utoipa::path(
get,
path = "/v1/desktop/windows/focused",
tag = "v1",
responses(
(status = 200, description = "Focused window info", body = DesktopWindowInfo),
(status = 404, description = "No window is focused", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
)
)]
async fn get_v1_desktop_windows_focused(
State(state): State<Arc<AppState>>,
) -> Result<Json<DesktopWindowInfo>, ApiError> {
let window = state.desktop_runtime().focused_window().await?;
Ok(Json(window))
}
/// Focus a desktop window.
///
/// Brings the specified window to the foreground and gives it input focus.
#[utoipa::path(
post,
path = "/v1/desktop/windows/{id}/focus",
tag = "v1",
params(
("id" = String, Path, description = "X11 window ID")
),
responses(
(status = 200, description = "Window info after focus", body = DesktopWindowInfo),
(status = 404, description = "Window not found", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
)
)]
async fn post_v1_desktop_window_focus(
State(state): State<Arc<AppState>>,
Path(id): Path<String>,
) -> Result<Json<DesktopWindowInfo>, ApiError> {
let window = state.desktop_runtime().focus_window(&id).await?;
Ok(Json(window))
}
/// Move a desktop window.
///
/// Moves the specified window to the given position.
#[utoipa::path(
post,
path = "/v1/desktop/windows/{id}/move",
tag = "v1",
params(
("id" = String, Path, description = "X11 window ID")
),
request_body = DesktopWindowMoveRequest,
responses(
(status = 200, description = "Window info after move", body = DesktopWindowInfo),
(status = 404, description = "Window not found", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
)
)]
async fn post_v1_desktop_window_move(
State(state): State<Arc<AppState>>,
Path(id): Path<String>,
Json(body): Json<DesktopWindowMoveRequest>,
) -> Result<Json<DesktopWindowInfo>, ApiError> {
let window = state.desktop_runtime().move_window(&id, body).await?;
Ok(Json(window))
}
/// Resize a desktop window.
///
/// Resizes the specified window to the given dimensions.
#[utoipa::path(
post,
path = "/v1/desktop/windows/{id}/resize",
tag = "v1",
params(
("id" = String, Path, description = "X11 window ID")
),
request_body = DesktopWindowResizeRequest,
responses(
(status = 200, description = "Window info after resize", body = DesktopWindowInfo),
(status = 404, description = "Window not found", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
)
)]
async fn post_v1_desktop_window_resize(
State(state): State<Arc<AppState>>,
Path(id): Path<String>,
Json(body): Json<DesktopWindowResizeRequest>,
) -> Result<Json<DesktopWindowInfo>, ApiError> {
let window = state.desktop_runtime().resize_window(&id, body).await?;
Ok(Json(window))
}
/// Read the desktop clipboard.
///
/// Returns the current text content of the X11 clipboard.
#[utoipa::path(
get,
path = "/v1/desktop/clipboard",
tag = "v1",
params(DesktopClipboardQuery),
responses(
(status = 200, description = "Clipboard contents", body = DesktopClipboardResponse),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 500, description = "Clipboard read failed", body = ProblemDetails)
)
)]
async fn get_v1_desktop_clipboard(
State(state): State<Arc<AppState>>,
Query(query): Query<DesktopClipboardQuery>,
) -> Result<Json<DesktopClipboardResponse>, ApiError> {
let clipboard = state
.desktop_runtime()
.get_clipboard(query.selection)
.await?;
Ok(Json(clipboard))
}
/// Write to the desktop clipboard.
///
/// Sets the text content of the X11 clipboard.
#[utoipa::path(
post,
path = "/v1/desktop/clipboard",
tag = "v1",
request_body = DesktopClipboardWriteRequest,
responses(
(status = 200, description = "Clipboard updated", body = DesktopActionResponse),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 500, description = "Clipboard write failed", body = ProblemDetails)
)
)]
async fn post_v1_desktop_clipboard(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopClipboardWriteRequest>,
) -> Result<Json<DesktopActionResponse>, ApiError> {
let result = state.desktop_runtime().set_clipboard(body).await?;
Ok(Json(result))
}
/// Launch a desktop application.
///
/// Launches an application by name on the managed desktop, optionally waiting
/// for its window to appear.
#[utoipa::path(
post,
path = "/v1/desktop/launch",
tag = "v1",
request_body = DesktopLaunchRequest,
responses(
(status = 200, description = "Application launched", body = DesktopLaunchResponse),
(status = 404, description = "Application not found", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
)
)]
async fn post_v1_desktop_launch(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopLaunchRequest>,
) -> Result<Json<DesktopLaunchResponse>, ApiError> {
let result = state.desktop_runtime().launch_app(body).await?;
Ok(Json(result))
}
/// Open a file or URL with the default handler.
///
/// Opens a file path or URL using xdg-open on the managed desktop.
#[utoipa::path(
post,
path = "/v1/desktop/open",
tag = "v1",
request_body = DesktopOpenRequest,
responses(
(status = 200, description = "Target opened", body = DesktopOpenResponse),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
)
)]
async fn post_v1_desktop_open(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopOpenRequest>,
) -> Result<Json<DesktopOpenResponse>, ApiError> {
let result = state.desktop_runtime().open_target(body).await?;
Ok(Json(result))
}
/// Start desktop recording.
///
/// Starts an ffmpeg x11grab recording against the managed desktop and returns
@ -1201,6 +1429,23 @@ async fn post_v1_desktop_stream_stop(
Ok(Json(state.desktop_runtime().stop_streaming().await))
}
/// Get desktop stream status.
///
/// Returns the current state of the desktop WebRTC streaming session.
#[utoipa::path(
get,
path = "/v1/desktop/stream/status",
tag = "v1",
responses(
(status = 200, description = "Desktop stream status", body = DesktopStreamStatusResponse)
)
)]
async fn get_v1_desktop_stream_status(
State(state): State<Arc<AppState>>,
) -> Result<Json<DesktopStreamStatusResponse>, ApiError> {
Ok(Json(state.desktop_runtime().stream_status().await))
}
/// Open a desktop WebRTC signaling session.
///
/// Upgrades the connection to a WebSocket used for WebRTC signaling between

View file

@ -0,0 +1,497 @@
/// Integration tests that verify all software documented in docs/common-software.mdx
/// is installed and working inside the sandbox.
///
/// These tests use `docker/test-common-software/Dockerfile` which extends the base
/// test-agent image with all documented software pre-installed.
///
/// KEEP IN SYNC with docs/common-software.mdx and docker/test-common-software/Dockerfile.
///
/// Run with:
/// cargo test -p sandbox-agent --test common_software
use reqwest::header::HeaderMap;
use reqwest::{Method, StatusCode};
use serde_json::{json, Value};
use serial_test::serial;
#[path = "support/docker_common_software.rs"]
mod docker_support;
use docker_support::TestApp;
async fn send_request(
app: &docker_support::DockerApp,
method: Method,
uri: &str,
body: Option<Value>,
) -> (StatusCode, HeaderMap, Vec<u8>) {
let client = reqwest::Client::new();
let mut builder = client.request(method, app.http_url(uri));
let response = if let Some(body) = body {
builder = builder.header("content-type", "application/json");
builder
.body(body.to_string())
.send()
.await
.expect("request")
} else {
builder.send().await.expect("request")
};
let status = response.status();
let headers = response.headers().clone();
let bytes = response.bytes().await.expect("body");
(status, headers, bytes.to_vec())
}
fn parse_json(bytes: &[u8]) -> Value {
if bytes.is_empty() {
Value::Null
} else {
serde_json::from_slice(bytes).expect("valid json")
}
}
/// Run a command inside the sandbox and assert it exits with code 0.
/// Returns the parsed JSON response.
async fn run_ok(app: &docker_support::DockerApp, command: &str, args: &[&str]) -> Value {
run_ok_with_timeout(app, command, args, 30_000).await
}
async fn run_ok_with_timeout(
app: &docker_support::DockerApp,
command: &str,
args: &[&str],
timeout_ms: u64,
) -> Value {
let (status, _, body) = send_request(
app,
Method::POST,
"/v1/processes/run",
Some(json!({
"command": command,
"args": args,
"timeoutMs": timeout_ms
})),
)
.await;
assert_eq!(
status,
StatusCode::OK,
"run {command} failed: {}",
String::from_utf8_lossy(&body)
);
let parsed = parse_json(&body);
assert_eq!(
parsed["exitCode"], 0,
"{command} exited with non-zero code.\nstdout: {}\nstderr: {}",
parsed["stdout"], parsed["stderr"]
);
parsed
}
// ---------------------------------------------------------------------------
// Browsers
// ---------------------------------------------------------------------------
#[tokio::test]
#[serial]
async fn chromium_is_installed_and_runs() {
let test_app = TestApp::new();
let result = run_ok(&test_app.app, "chromium", &["--version"]).await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(
stdout.contains("Chromium"),
"expected Chromium version string, got: {stdout}"
);
}
#[tokio::test]
#[serial]
async fn firefox_esr_is_installed_and_runs() {
let test_app = TestApp::new();
let result = run_ok(&test_app.app, "firefox-esr", &["--version"]).await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(
stdout.contains("Mozilla Firefox"),
"expected Firefox version string, got: {stdout}"
);
}
// ---------------------------------------------------------------------------
// Languages and runtimes
// ---------------------------------------------------------------------------
#[tokio::test]
#[serial]
async fn nodejs_is_installed_and_runs() {
let test_app = TestApp::new();
let result = run_ok(&test_app.app, "node", &["--version"]).await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(
stdout.starts_with('v'),
"expected node version string, got: {stdout}"
);
}
#[tokio::test]
#[serial]
async fn npm_is_installed() {
let test_app = TestApp::new();
run_ok(&test_app.app, "npm", &["--version"]).await;
}
#[tokio::test]
#[serial]
async fn python3_is_installed_and_runs() {
let test_app = TestApp::new();
let result = run_ok(&test_app.app, "python3", &["--version"]).await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(
stdout.contains("Python 3"),
"expected Python version string, got: {stdout}"
);
}
#[tokio::test]
#[serial]
async fn pip3_is_installed() {
let test_app = TestApp::new();
run_ok(&test_app.app, "pip3", &["--version"]).await;
}
#[tokio::test]
#[serial]
async fn java_is_installed_and_runs() {
let test_app = TestApp::new();
// java --version prints to stdout on modern JDKs
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/processes/run",
Some(json!({
"command": "java",
"args": ["--version"],
"timeoutMs": 30000
})),
)
.await;
assert_eq!(status, StatusCode::OK);
let parsed = parse_json(&body);
assert_eq!(parsed["exitCode"], 0);
let combined = format!(
"{}{}",
parsed["stdout"].as_str().unwrap_or(""),
parsed["stderr"].as_str().unwrap_or("")
);
assert!(
combined.contains("openjdk") || combined.contains("OpenJDK") || combined.contains("java"),
"expected Java version string, got: {combined}"
);
}
#[tokio::test]
#[serial]
async fn ruby_is_installed_and_runs() {
let test_app = TestApp::new();
let result = run_ok(&test_app.app, "ruby", &["--version"]).await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(
stdout.contains("ruby"),
"expected Ruby version string, got: {stdout}"
);
}
// ---------------------------------------------------------------------------
// Databases
// ---------------------------------------------------------------------------
#[tokio::test]
#[serial]
async fn sqlite3_is_installed_and_runs() {
let test_app = TestApp::new();
let result = run_ok(&test_app.app, "sqlite3", &["--version"]).await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(!stdout.is_empty(), "expected sqlite3 version output");
}
#[tokio::test]
#[serial]
async fn redis_server_is_installed() {
let test_app = TestApp::new();
let result = run_ok(&test_app.app, "redis-server", &["--version"]).await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(
stdout.contains("Redis") || stdout.contains("redis"),
"expected Redis version string, got: {stdout}"
);
}
// ---------------------------------------------------------------------------
// Build tools
// ---------------------------------------------------------------------------
#[tokio::test]
#[serial]
async fn gcc_is_installed() {
let test_app = TestApp::new();
run_ok(&test_app.app, "gcc", &["--version"]).await;
}
#[tokio::test]
#[serial]
async fn make_is_installed() {
let test_app = TestApp::new();
run_ok(&test_app.app, "make", &["--version"]).await;
}
#[tokio::test]
#[serial]
async fn cmake_is_installed() {
let test_app = TestApp::new();
run_ok(&test_app.app, "cmake", &["--version"]).await;
}
#[tokio::test]
#[serial]
async fn pkg_config_is_installed() {
let test_app = TestApp::new();
run_ok(&test_app.app, "pkg-config", &["--version"]).await;
}
// ---------------------------------------------------------------------------
// CLI tools
// ---------------------------------------------------------------------------
#[tokio::test]
#[serial]
async fn git_is_installed_and_runs() {
let test_app = TestApp::new();
let result = run_ok(&test_app.app, "git", &["--version"]).await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(
stdout.contains("git version"),
"expected git version string, got: {stdout}"
);
}
#[tokio::test]
#[serial]
async fn jq_is_installed_and_runs() {
let test_app = TestApp::new();
// Pipe a simple JSON through jq
let result = run_ok(&test_app.app, "sh", &["-c", "echo '{\"a\":1}' | jq '.a'"]).await;
let stdout = result["stdout"].as_str().unwrap_or("").trim();
assert_eq!(stdout, "1", "jq did not parse JSON correctly: {stdout}");
}
#[tokio::test]
#[serial]
async fn tmux_is_installed() {
let test_app = TestApp::new();
run_ok(&test_app.app, "tmux", &["-V"]).await;
}
// ---------------------------------------------------------------------------
// Media and graphics
// ---------------------------------------------------------------------------
#[tokio::test]
#[serial]
async fn ffmpeg_is_installed_and_runs() {
let test_app = TestApp::new();
// ffmpeg prints version to stderr, so just check exit code via -version
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/processes/run",
Some(json!({
"command": "ffmpeg",
"args": ["-version"],
"timeoutMs": 10000
})),
)
.await;
assert_eq!(status, StatusCode::OK);
let parsed = parse_json(&body);
assert_eq!(parsed["exitCode"], 0);
let combined = format!(
"{}{}",
parsed["stdout"].as_str().unwrap_or(""),
parsed["stderr"].as_str().unwrap_or("")
);
assert!(
combined.contains("ffmpeg version"),
"expected ffmpeg version string, got: {combined}"
);
}
#[tokio::test]
#[serial]
async fn imagemagick_is_installed() {
let test_app = TestApp::new();
run_ok(&test_app.app, "convert", &["--version"]).await;
}
#[tokio::test]
#[serial]
async fn poppler_pdftoppm_is_installed() {
let test_app = TestApp::new();
// pdftoppm -v prints to stderr and exits 0
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/processes/run",
Some(json!({
"command": "pdftoppm",
"args": ["-v"],
"timeoutMs": 10000
})),
)
.await;
assert_eq!(status, StatusCode::OK);
let parsed = parse_json(&body);
assert_eq!(parsed["exitCode"], 0);
}
// ---------------------------------------------------------------------------
// Desktop applications (verify binary exists, don't launch GUI)
// ---------------------------------------------------------------------------
#[tokio::test]
#[serial]
async fn gimp_is_installed() {
let test_app = TestApp::new();
let result = run_ok(&test_app.app, "gimp", &["--version"]).await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(
stdout.contains("GIMP") || stdout.contains("gimp") || stdout.contains("Image Manipulation"),
"expected GIMP version string, got: {stdout}"
);
}
// ---------------------------------------------------------------------------
// Functional tests: verify tools actually work, not just that they're present
// ---------------------------------------------------------------------------
#[tokio::test]
#[serial]
async fn python3_can_run_script() {
let test_app = TestApp::new();
let result = run_ok(
&test_app.app,
"python3",
&["-c", "import json; print(json.dumps({'ok': True}))"],
)
.await;
let stdout = result["stdout"].as_str().unwrap_or("").trim();
let parsed: Value = serde_json::from_str(stdout).expect("python json output");
assert_eq!(parsed["ok"], true);
}
#[tokio::test]
#[serial]
async fn node_can_run_script() {
let test_app = TestApp::new();
let result = run_ok(
&test_app.app,
"node",
&["-e", "console.log(JSON.stringify({ok: true}))"],
)
.await;
let stdout = result["stdout"].as_str().unwrap_or("").trim();
let parsed: Value = serde_json::from_str(stdout).expect("node json output");
assert_eq!(parsed["ok"], true);
}
#[tokio::test]
#[serial]
async fn ruby_can_run_script() {
let test_app = TestApp::new();
let result = run_ok(
&test_app.app,
"ruby",
&["-e", "require 'json'; puts JSON.generate({ok: true})"],
)
.await;
let stdout = result["stdout"].as_str().unwrap_or("").trim();
let parsed: Value = serde_json::from_str(stdout).expect("ruby json output");
assert_eq!(parsed["ok"], true);
}
#[tokio::test]
#[serial]
async fn gcc_can_compile_and_run_hello_world() {
let test_app = TestApp::new();
// Write a C file
run_ok(
&test_app.app,
"sh",
&["-c", r#"printf '#include <stdio.h>\nint main(){printf("hello\\n");return 0;}\n' > /tmp/hello.c"#],
)
.await;
// Compile it
run_ok(&test_app.app, "gcc", &["-o", "/tmp/hello", "/tmp/hello.c"]).await;
// Run it
let result = run_ok(&test_app.app, "/tmp/hello", &[]).await;
let stdout = result["stdout"].as_str().unwrap_or("").trim();
assert_eq!(stdout, "hello");
}
#[tokio::test]
#[serial]
async fn sqlite3_can_create_and_query() {
let test_app = TestApp::new();
let result = run_ok(
&test_app.app,
"sh",
&[
"-c",
"sqlite3 /tmp/test.db 'CREATE TABLE t(v TEXT); INSERT INTO t VALUES(\"ok\"); SELECT v FROM t;'",
],
)
.await;
let stdout = result["stdout"].as_str().unwrap_or("").trim();
assert_eq!(stdout, "ok");
}
#[tokio::test]
#[serial]
async fn git_can_init_and_commit() {
let test_app = TestApp::new();
run_ok(
&test_app.app,
"sh",
&[
"-c",
"cd /tmp && mkdir -p testrepo && cd testrepo && git init && git config user.email 'test@test.com' && git config user.name 'Test' && touch file && git add file && git commit -m 'init'",
],
)
.await;
}
#[tokio::test]
#[serial]
async fn chromium_headless_can_dump_dom() {
let test_app = TestApp::new();
// Use headless mode to dump the DOM of a blank page
let result = run_ok_with_timeout(
&test_app.app,
"chromium",
&[
"--headless",
"--no-sandbox",
"--disable-gpu",
"--dump-dom",
"data:text/html,<h1>hello</h1>",
],
30_000,
)
.await;
let stdout = result["stdout"].as_str().unwrap_or("");
assert!(
stdout.contains("hello"),
"expected hello in DOM dump, got: {stdout}"
);
}

View file

@ -0,0 +1,332 @@
/// Docker support for common-software integration tests.
///
/// Builds the `docker/test-common-software/Dockerfile` image (which extends the
/// base test-agent image with pre-installed common software) and provides a
/// `TestApp` that runs a container from it.
///
/// KEEP IN SYNC with docs/common-software.mdx and docker/test-common-software/Dockerfile.
use std::collections::BTreeMap;
use std::io::{Read, Write};
use std::net::TcpStream;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::OnceLock;
use std::thread;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use tempfile::TempDir;
const CONTAINER_PORT: u16 = 3000;
const DEFAULT_PATH: &str = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
const BASE_IMAGE_TAG: &str = "sandbox-agent-test:dev";
const COMMON_SOFTWARE_IMAGE_TAG: &str = "sandbox-agent-test-common-software:dev";
static IMAGE_TAG: OnceLock<String> = OnceLock::new();
static DOCKER_BIN: OnceLock<PathBuf> = OnceLock::new();
static CONTAINER_COUNTER: AtomicU64 = AtomicU64::new(0);
#[derive(Clone)]
pub struct DockerApp {
base_url: String,
}
impl DockerApp {
pub fn http_url(&self, path: &str) -> String {
format!("{}{}", self.base_url, path)
}
}
pub struct TestApp {
pub app: DockerApp,
_root: TempDir,
container_id: String,
}
impl TestApp {
pub fn new() -> Self {
let root = tempfile::tempdir().expect("create docker test root");
let layout = TestLayout::new(root.path());
layout.create();
let container_id = unique_container_id();
let image = ensure_common_software_image();
let env = build_env(&layout);
let mounts = build_mounts(root.path());
let base_url = run_container(&container_id, &image, &mounts, &env);
Self {
app: DockerApp { base_url },
_root: root,
container_id,
}
}
}
impl Drop for TestApp {
fn drop(&mut self) {
let _ = Command::new(docker_bin())
.args(["rm", "-f", &self.container_id])
.output();
}
}
struct TestLayout {
home: PathBuf,
xdg_data_home: PathBuf,
xdg_state_home: PathBuf,
}
impl TestLayout {
fn new(root: &Path) -> Self {
Self {
home: root.join("home"),
xdg_data_home: root.join("xdg-data"),
xdg_state_home: root.join("xdg-state"),
}
}
fn create(&self) {
for dir in [&self.home, &self.xdg_data_home, &self.xdg_state_home] {
std::fs::create_dir_all(dir).expect("create docker test dir");
}
}
}
fn ensure_base_image() -> String {
let repo_root = repo_root();
let image_tag =
std::env::var("SANDBOX_AGENT_TEST_IMAGE").unwrap_or_else(|_| BASE_IMAGE_TAG.to_string());
let output = Command::new(docker_bin())
.args(["build", "--tag", &image_tag, "--file"])
.arg(
repo_root
.join("docker")
.join("test-agent")
.join("Dockerfile"),
)
.arg(&repo_root)
.output()
.expect("build base test image");
if !output.status.success() {
panic!(
"failed to build base test image: {}",
String::from_utf8_lossy(&output.stderr)
);
}
image_tag
}
fn ensure_common_software_image() -> String {
IMAGE_TAG
.get_or_init(|| {
let base_image = ensure_base_image();
let repo_root = repo_root();
let image_tag = std::env::var("SANDBOX_AGENT_TEST_COMMON_SOFTWARE_IMAGE")
.unwrap_or_else(|_| COMMON_SOFTWARE_IMAGE_TAG.to_string());
let output = Command::new(docker_bin())
.args([
"build",
"--tag",
&image_tag,
"--build-arg",
&format!("BASE_IMAGE={base_image}"),
"--file",
])
.arg(
repo_root
.join("docker")
.join("test-common-software")
.join("Dockerfile"),
)
.arg(&repo_root)
.output()
.expect("build common-software test image");
if !output.status.success() {
panic!(
"failed to build common-software test image: {}",
String::from_utf8_lossy(&output.stderr)
);
}
image_tag
})
.clone()
}
fn build_env(layout: &TestLayout) -> BTreeMap<String, String> {
let mut env = BTreeMap::new();
env.insert(
"HOME".to_string(),
layout.home.to_string_lossy().to_string(),
);
env.insert(
"XDG_DATA_HOME".to_string(),
layout.xdg_data_home.to_string_lossy().to_string(),
);
env.insert(
"XDG_STATE_HOME".to_string(),
layout.xdg_state_home.to_string_lossy().to_string(),
);
env.insert("PATH".to_string(), DEFAULT_PATH.to_string());
env
}
fn build_mounts(root: &Path) -> Vec<PathBuf> {
vec![root.to_path_buf()]
}
fn run_container(
container_id: &str,
image: &str,
mounts: &[PathBuf],
env: &BTreeMap<String, String>,
) -> String {
let mut args = vec![
"run".to_string(),
"-d".to_string(),
"--rm".to_string(),
"--name".to_string(),
container_id.to_string(),
"-p".to_string(),
format!("127.0.0.1::{CONTAINER_PORT}"),
];
if cfg!(target_os = "linux") {
args.push("--add-host".to_string());
args.push("host.docker.internal:host-gateway".to_string());
}
for mount in mounts {
args.push("-v".to_string());
args.push(format!("{}:{}", mount.display(), mount.display()));
}
for (key, value) in env {
args.push("-e".to_string());
args.push(format!("{key}={value}"));
}
args.push(image.to_string());
args.push("server".to_string());
args.push("--host".to_string());
args.push("0.0.0.0".to_string());
args.push("--port".to_string());
args.push(CONTAINER_PORT.to_string());
args.push("--no-token".to_string());
let output = Command::new(docker_bin())
.args(&args)
.output()
.expect("start docker test container");
if !output.status.success() {
panic!(
"failed to start docker test container: {}",
String::from_utf8_lossy(&output.stderr)
);
}
let port_output = Command::new(docker_bin())
.args(["port", container_id, &format!("{CONTAINER_PORT}/tcp")])
.output()
.expect("resolve mapped docker port");
if !port_output.status.success() {
panic!(
"failed to resolve docker test port: {}",
String::from_utf8_lossy(&port_output.stderr)
);
}
let mapping = String::from_utf8(port_output.stdout)
.expect("docker port utf8")
.trim()
.to_string();
let host_port = mapping.rsplit(':').next().expect("mapped host port").trim();
let base_url = format!("http://127.0.0.1:{host_port}");
wait_for_health(&base_url);
base_url
}
fn wait_for_health(base_url: &str) {
let started = SystemTime::now();
loop {
if probe_health(base_url) {
return;
}
if started
.elapsed()
.unwrap_or_else(|_| Duration::from_secs(0))
.gt(&Duration::from_secs(60))
{
panic!("timed out waiting for common-software docker test server");
}
thread::sleep(Duration::from_millis(200));
}
}
fn probe_health(base_url: &str) -> bool {
let address = base_url.strip_prefix("http://").unwrap_or(base_url);
let mut stream = match TcpStream::connect(address) {
Ok(stream) => stream,
Err(_) => return false,
};
let _ = stream.set_read_timeout(Some(Duration::from_secs(2)));
let _ = stream.set_write_timeout(Some(Duration::from_secs(2)));
let request =
format!("GET /v1/health HTTP/1.1\r\nHost: {address}\r\nConnection: close\r\n\r\n");
if stream.write_all(request.as_bytes()).is_err() {
return false;
}
let mut response = String::new();
if stream.read_to_string(&mut response).is_err() {
return false;
}
response.starts_with("HTTP/1.1 200") || response.starts_with("HTTP/1.0 200")
}
fn unique_container_id() -> String {
let millis = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|value| value.as_millis())
.unwrap_or(0);
let counter = CONTAINER_COUNTER.fetch_add(1, Ordering::Relaxed);
format!(
"sandbox-agent-common-sw-{}-{millis}-{counter}",
std::process::id()
)
}
fn repo_root() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("../../..")
.canonicalize()
.expect("repo root")
}
fn docker_bin() -> &'static Path {
DOCKER_BIN
.get_or_init(|| {
if let Some(value) = std::env::var_os("SANDBOX_AGENT_TEST_DOCKER_BIN") {
let path = PathBuf::from(value);
if path.exists() {
return path;
}
}
for candidate in [
"/usr/local/bin/docker",
"/opt/homebrew/bin/docker",
"/usr/bin/docker",
] {
let path = PathBuf::from(candidate);
if path.exists() {
return path;
}
}
PathBuf::from("docker")
})
.as_path()
}