Add desktop runtime API and SDK support

This commit is contained in:
Nathan Flurry 2026-03-07 23:32:49 -08:00
parent 3d9476ed0b
commit 641597afe6
27 changed files with 5881 additions and 21 deletions

View file

@ -11,6 +11,9 @@ mod build_version {
include!(concat!(env!("OUT_DIR"), "/version.rs"));
}
use crate::desktop_install::{
install_desktop, DesktopInstallRequest, DesktopPackageManager,
};
use crate::router::{
build_router_with_state, shutdown_servers, AppState, AuthConfig, BrandingMode,
};
@ -75,6 +78,8 @@ pub enum Command {
Server(ServerArgs),
/// Call the HTTP API without writing client code.
Api(ApiArgs),
/// Install first-party runtime dependencies.
Install(InstallArgs),
/// EXPERIMENTAL: OpenCode compatibility layer (disabled until ACP Phase 7).
Opencode(OpencodeArgs),
/// Manage the sandbox-agent background daemon.
@ -115,6 +120,12 @@ pub struct ApiArgs {
command: ApiCommand,
}
#[derive(Args, Debug)]
pub struct InstallArgs {
#[command(subcommand)]
command: InstallCommand,
}
#[derive(Args, Debug)]
pub struct OpencodeArgs {
#[arg(long, short = 'H', default_value = DEFAULT_HOST)]
@ -153,6 +164,12 @@ pub struct DaemonArgs {
command: DaemonCommand,
}
#[derive(Subcommand, Debug)]
pub enum InstallCommand {
/// Install desktop runtime dependencies.
Desktop(InstallDesktopArgs),
}
#[derive(Subcommand, Debug)]
pub enum DaemonCommand {
/// Start the daemon in the background.
@ -304,6 +321,18 @@ pub struct InstallAgentArgs {
agent_process_version: Option<String>,
}
#[derive(Args, Debug)]
pub struct InstallDesktopArgs {
#[arg(long, default_value_t = false)]
yes: bool,
#[arg(long, default_value_t = false)]
print_only: bool,
#[arg(long, value_enum)]
package_manager: Option<DesktopPackageManager>,
#[arg(long, default_value_t = false)]
no_fonts: bool,
}
#[derive(Args, Debug)]
pub struct CredentialsExtractArgs {
#[arg(long, short = 'a', value_enum)]
@ -399,6 +428,7 @@ pub fn run_command(command: &Command, cli: &CliConfig) -> Result<(), CliError> {
match command {
Command::Server(args) => run_server(cli, args),
Command::Api(subcommand) => run_api(&subcommand.command, cli),
Command::Install(subcommand) => run_install(&subcommand.command),
Command::Opencode(args) => run_opencode(cli, args),
Command::Daemon(subcommand) => run_daemon(&subcommand.command, cli),
Command::InstallAgent(args) => install_agent_local(args),
@ -406,6 +436,12 @@ pub fn run_command(command: &Command, cli: &CliConfig) -> Result<(), CliError> {
}
}
fn run_install(command: &InstallCommand) -> Result<(), CliError> {
match command {
InstallCommand::Desktop(args) => install_desktop_local(args),
}
}
fn run_server(cli: &CliConfig, server: &ServerArgs) -> Result<(), CliError> {
let auth = if let Some(token) = cli.token.clone() {
AuthConfig::with_token(token)
@ -470,6 +506,17 @@ fn run_api(command: &ApiCommand, cli: &CliConfig) -> Result<(), CliError> {
}
}
fn install_desktop_local(args: &InstallDesktopArgs) -> Result<(), CliError> {
install_desktop(DesktopInstallRequest {
yes: args.yes,
print_only: args.print_only,
package_manager: args.package_manager,
no_fonts: args.no_fonts,
})
.map(|_| ())
.map_err(CliError::Server)
}
fn run_agents(command: &AgentsCommand, cli: &CliConfig) -> Result<(), CliError> {
match command {
AgentsCommand::List(args) => {

View file

@ -0,0 +1,158 @@
use sandbox_agent_error::ProblemDetails;
use serde_json::{json, Map, Value};
use crate::desktop_types::{DesktopErrorInfo, DesktopProcessInfo};
#[derive(Debug, Clone)]
pub struct DesktopProblem {
status: u16,
title: &'static str,
code: &'static str,
message: String,
missing_dependencies: Vec<String>,
install_command: Option<String>,
processes: Vec<DesktopProcessInfo>,
}
impl DesktopProblem {
pub fn unsupported_platform(message: impl Into<String>) -> Self {
Self::new(501, "Desktop Unsupported", "desktop_unsupported_platform", message)
}
pub fn dependencies_missing(
missing_dependencies: Vec<String>,
install_command: Option<String>,
processes: Vec<DesktopProcessInfo>,
) -> Self {
let message = if missing_dependencies.is_empty() {
"Desktop dependencies are not installed".to_string()
} else {
format!(
"Desktop dependencies are not installed: {}",
missing_dependencies.join(", ")
)
};
Self::new(
503,
"Desktop Dependencies Missing",
"desktop_dependencies_missing",
message,
)
.with_missing_dependencies(missing_dependencies)
.with_install_command(install_command)
.with_processes(processes)
}
pub fn runtime_inactive(message: impl Into<String>) -> Self {
Self::new(409, "Desktop Runtime Inactive", "desktop_runtime_inactive", message)
}
pub fn runtime_starting(message: impl Into<String>) -> Self {
Self::new(409, "Desktop Runtime Starting", "desktop_runtime_starting", message)
}
pub fn runtime_failed(
message: impl Into<String>,
install_command: Option<String>,
processes: Vec<DesktopProcessInfo>,
) -> Self {
Self::new(503, "Desktop Runtime Failed", "desktop_runtime_failed", message)
.with_install_command(install_command)
.with_processes(processes)
}
pub fn invalid_action(message: impl Into<String>) -> Self {
Self::new(400, "Desktop Invalid Action", "desktop_invalid_action", message)
}
pub fn screenshot_failed(message: impl Into<String>, processes: Vec<DesktopProcessInfo>) -> Self {
Self::new(502, "Desktop Screenshot Failed", "desktop_screenshot_failed", message)
.with_processes(processes)
}
pub fn input_failed(message: impl Into<String>, processes: Vec<DesktopProcessInfo>) -> Self {
Self::new(502, "Desktop Input Failed", "desktop_input_failed", message)
.with_processes(processes)
}
pub fn to_problem_details(&self) -> ProblemDetails {
let mut extensions = Map::new();
extensions.insert("code".to_string(), Value::String(self.code.to_string()));
if !self.missing_dependencies.is_empty() {
extensions.insert(
"missingDependencies".to_string(),
Value::Array(
self.missing_dependencies
.iter()
.cloned()
.map(Value::String)
.collect(),
),
);
}
if let Some(install_command) = self.install_command.as_ref() {
extensions.insert(
"installCommand".to_string(),
Value::String(install_command.clone()),
);
}
if !self.processes.is_empty() {
extensions.insert(
"processes".to_string(),
json!(self.processes),
);
}
ProblemDetails {
type_: format!("urn:sandbox-agent:error:{}", self.code),
title: self.title.to_string(),
status: self.status,
detail: Some(self.message.clone()),
instance: None,
extensions,
}
}
pub fn to_error_info(&self) -> DesktopErrorInfo {
DesktopErrorInfo {
code: self.code.to_string(),
message: self.message.clone(),
}
}
pub fn code(&self) -> &'static str {
self.code
}
fn new(
status: u16,
title: &'static str,
code: &'static str,
message: impl Into<String>,
) -> Self {
Self {
status,
title,
code,
message: message.into(),
missing_dependencies: Vec::new(),
install_command: None,
processes: Vec::new(),
}
}
fn with_missing_dependencies(mut self, missing_dependencies: Vec<String>) -> Self {
self.missing_dependencies = missing_dependencies;
self
}
fn with_install_command(mut self, install_command: Option<String>) -> Self {
self.install_command = install_command;
self
}
fn with_processes(mut self, processes: Vec<DesktopProcessInfo>) -> Self {
self.processes = processes;
self
}
}

View file

@ -0,0 +1,282 @@
use std::fmt;
use std::io::{self, Write};
use std::path::PathBuf;
use std::process::Command as ProcessCommand;
use clap::ValueEnum;
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
pub enum DesktopPackageManager {
Apt,
Dnf,
Apk,
}
#[derive(Debug, Clone)]
pub struct DesktopInstallRequest {
pub yes: bool,
pub print_only: bool,
pub package_manager: Option<DesktopPackageManager>,
pub no_fonts: bool,
}
pub fn install_desktop(request: DesktopInstallRequest) -> Result<(), String> {
if std::env::consts::OS != "linux" {
return Err("desktop installation is only supported on Linux hosts and sandboxes".to_string());
}
let package_manager = match request.package_manager {
Some(value) => value,
None => detect_package_manager().ok_or_else(|| {
"could not detect a supported package manager (expected apt, dnf, or apk)".to_string()
})?,
};
let packages = desktop_packages(package_manager, request.no_fonts);
let used_sudo = !running_as_root() && find_binary("sudo").is_some();
if !running_as_root() && !used_sudo {
return Err(
"desktop installation requires root or sudo access; rerun as root or install dependencies manually"
.to_string(),
);
}
println!("Desktop package manager: {}", package_manager);
println!("Desktop packages:");
for package in &packages {
println!(" - {package}");
}
println!("Install command:");
println!(" {}", render_install_command(package_manager, used_sudo, &packages));
if request.print_only {
return Ok(());
}
if !request.yes && !prompt_yes_no("Proceed with desktop dependency installation? [y/N] ")? {
return Err("installation cancelled".to_string());
}
run_install_commands(package_manager, used_sudo, &packages)?;
println!("Desktop dependencies installed.");
Ok(())
}
fn detect_package_manager() -> Option<DesktopPackageManager> {
if find_binary("apt-get").is_some() {
return Some(DesktopPackageManager::Apt);
}
if find_binary("dnf").is_some() {
return Some(DesktopPackageManager::Dnf);
}
if find_binary("apk").is_some() {
return Some(DesktopPackageManager::Apk);
}
None
}
fn desktop_packages(
package_manager: DesktopPackageManager,
no_fonts: bool,
) -> Vec<String> {
let mut packages = match package_manager {
DesktopPackageManager::Apt => vec![
"xvfb",
"openbox",
"xdotool",
"imagemagick",
"x11-xserver-utils",
"dbus-x11",
"xauth",
"fonts-dejavu-core",
],
DesktopPackageManager::Dnf => vec![
"xorg-x11-server-Xvfb",
"openbox",
"xdotool",
"ImageMagick",
"xrandr",
"dbus-x11",
"xauth",
"dejavu-sans-fonts",
],
DesktopPackageManager::Apk => vec![
"xvfb",
"openbox",
"xdotool",
"imagemagick",
"xrandr",
"dbus",
"xauth",
"ttf-dejavu",
],
}
.into_iter()
.map(str::to_string)
.collect::<Vec<_>>();
if no_fonts {
packages.retain(|package| {
package != "fonts-dejavu-core"
&& package != "dejavu-sans-fonts"
&& package != "ttf-dejavu"
});
}
packages
}
fn render_install_command(
package_manager: DesktopPackageManager,
used_sudo: bool,
packages: &[String],
) -> String {
let sudo = if used_sudo { "sudo " } else { "" };
match package_manager {
DesktopPackageManager::Apt => format!(
"{sudo}apt-get update && {sudo}env DEBIAN_FRONTEND=noninteractive apt-get install -y {}",
packages.join(" ")
),
DesktopPackageManager::Dnf => {
format!("{sudo}dnf install -y {}", packages.join(" "))
}
DesktopPackageManager::Apk => {
format!("{sudo}apk add --no-cache {}", packages.join(" "))
}
}
}
fn run_install_commands(
package_manager: DesktopPackageManager,
used_sudo: bool,
packages: &[String],
) -> Result<(), String> {
match package_manager {
DesktopPackageManager::Apt => {
run_command(command_with_privilege(
used_sudo,
"apt-get",
vec!["update".to_string()],
))?;
let mut args = vec![
"DEBIAN_FRONTEND=noninteractive".to_string(),
"apt-get".to_string(),
"install".to_string(),
"-y".to_string(),
];
args.extend(packages.iter().cloned());
run_command(command_with_privilege(used_sudo, "env", args))?;
}
DesktopPackageManager::Dnf => {
let mut args = vec!["install".to_string(), "-y".to_string()];
args.extend(packages.iter().cloned());
run_command(command_with_privilege(used_sudo, "dnf", args))?;
}
DesktopPackageManager::Apk => {
let mut args = vec!["add".to_string(), "--no-cache".to_string()];
args.extend(packages.iter().cloned());
run_command(command_with_privilege(used_sudo, "apk", args))?;
}
}
Ok(())
}
fn command_with_privilege(
used_sudo: bool,
program: &str,
args: Vec<String>,
) -> (String, Vec<String>) {
if used_sudo {
let mut sudo_args = vec![program.to_string()];
sudo_args.extend(args);
("sudo".to_string(), sudo_args)
} else {
(program.to_string(), args)
}
}
fn run_command((program, args): (String, Vec<String>)) -> Result<(), String> {
let status = ProcessCommand::new(&program)
.args(&args)
.status()
.map_err(|err| format!("failed to run `{program}`: {err}"))?;
if !status.success() {
return Err(format!(
"command `{}` exited with status {}",
format_command(&program, &args),
status
));
}
Ok(())
}
fn prompt_yes_no(prompt: &str) -> Result<bool, String> {
print!("{prompt}");
io::stdout()
.flush()
.map_err(|err| format!("failed to flush prompt: {err}"))?;
let mut input = String::new();
io::stdin()
.read_line(&mut input)
.map_err(|err| format!("failed to read confirmation: {err}"))?;
let normalized = input.trim().to_ascii_lowercase();
Ok(matches!(normalized.as_str(), "y" | "yes"))
}
fn running_as_root() -> bool {
#[cfg(unix)]
unsafe {
return libc::geteuid() == 0;
}
#[cfg(not(unix))]
{
false
}
}
fn find_binary(name: &str) -> Option<PathBuf> {
let path_env = std::env::var_os("PATH")?;
for path in std::env::split_paths(&path_env) {
let candidate = path.join(name);
if candidate.is_file() {
return Some(candidate);
}
}
None
}
fn format_command(program: &str, args: &[String]) -> String {
let mut parts = vec![program.to_string()];
parts.extend(args.iter().cloned());
parts.join(" ")
}
impl fmt::Display for DesktopPackageManager {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DesktopPackageManager::Apt => write!(f, "apt"),
DesktopPackageManager::Dnf => write!(f, "dnf"),
DesktopPackageManager::Apk => write!(f, "apk"),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn desktop_packages_support_no_fonts() {
let packages = desktop_packages(DesktopPackageManager::Apt, true);
assert!(!packages.iter().any(|value| value == "fonts-dejavu-core"));
assert!(packages.iter().any(|value| value == "xvfb"));
}
#[test]
fn render_install_command_matches_package_manager() {
let packages = vec!["xvfb".to_string(), "openbox".to_string()];
let command = render_install_command(DesktopPackageManager::Apk, false, &packages);
assert_eq!(command, "apk add --no-cache xvfb openbox");
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,173 @@
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum DesktopState {
Inactive,
InstallRequired,
Starting,
Active,
Stopping,
Failed,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct DesktopResolution {
pub width: u32,
pub height: u32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dpi: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct DesktopErrorInfo {
pub code: String,
pub message: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct DesktopProcessInfo {
pub name: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub pid: Option<u32>,
pub running: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub log_path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct DesktopStatusResponse {
pub state: DesktopState,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub display: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub resolution: Option<DesktopResolution>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub started_at: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub last_error: Option<DesktopErrorInfo>,
#[serde(default)]
pub missing_dependencies: Vec<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub install_command: Option<String>,
#[serde(default)]
pub processes: Vec<DesktopProcessInfo>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub runtime_log_path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, Default)]
#[serde(rename_all = "camelCase")]
pub struct DesktopStartRequest {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub width: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub height: Option<u32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dpi: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, Default)]
#[serde(rename_all = "camelCase")]
pub struct DesktopScreenshotQuery {}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopRegionScreenshotQuery {
pub x: i32,
pub y: i32,
pub width: u32,
pub height: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct DesktopMousePositionResponse {
pub x: i32,
pub y: i32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub screen: Option<i32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub window: Option<String>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum DesktopMouseButton {
Left,
Middle,
Right,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopMouseMoveRequest {
pub x: i32,
pub y: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopMouseClickRequest {
pub x: i32,
pub y: i32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub button: Option<DesktopMouseButton>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub click_count: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopMouseDragRequest {
pub start_x: i32,
pub start_y: i32,
pub end_x: i32,
pub end_y: i32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub button: Option<DesktopMouseButton>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopMouseScrollRequest {
pub x: i32,
pub y: i32,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub delta_x: Option<i32>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub delta_y: Option<i32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopKeyboardTypeRequest {
pub text: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub delay_ms: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
#[serde(rename_all = "camelCase")]
pub struct DesktopKeyboardPressRequest {
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct DesktopActionResponse {
pub ok: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct DesktopDisplayInfoResponse {
pub display: String,
pub resolution: DesktopResolution,
}

View file

@ -1,6 +1,10 @@
//! Sandbox agent core utilities.
mod acp_proxy_runtime;
mod desktop_install;
mod desktop_errors;
mod desktop_runtime;
pub mod desktop_types;
pub mod cli;
pub mod daemon;
mod process_runtime;

View file

@ -37,6 +37,9 @@ use tracing::Span;
use utoipa::{Modify, OpenApi, ToSchema};
use crate::acp_proxy_runtime::{AcpProxyRuntime, ProxyPostOutcome};
use crate::desktop_errors::DesktopProblem;
use crate::desktop_runtime::DesktopRuntime;
use crate::desktop_types::*;
use crate::process_runtime::{
decode_input_bytes, ProcessLogFilter, ProcessLogFilterStream, ProcessRuntime,
ProcessRuntimeConfig, ProcessSnapshot, ProcessStartSpec, ProcessStatus, ProcessStream, RunSpec,
@ -87,6 +90,7 @@ pub struct AppState {
acp_proxy: Arc<AcpProxyRuntime>,
opencode_server_manager: Arc<OpenCodeServerManager>,
process_runtime: Arc<ProcessRuntime>,
desktop_runtime: Arc<DesktopRuntime>,
pub(crate) branding: BrandingMode,
version_cache: Mutex<HashMap<AgentId, CachedAgentVersion>>,
}
@ -111,12 +115,14 @@ impl AppState {
},
));
let process_runtime = Arc::new(ProcessRuntime::new());
let desktop_runtime = Arc::new(DesktopRuntime::new());
Self {
auth,
agent_manager,
acp_proxy,
opencode_server_manager,
process_runtime,
desktop_runtime,
branding,
version_cache: Mutex::new(HashMap::new()),
}
@ -138,6 +144,10 @@ impl AppState {
self.process_runtime.clone()
}
pub(crate) fn desktop_runtime(&self) -> Arc<DesktopRuntime> {
self.desktop_runtime.clone()
}
pub(crate) fn purge_version_cache(&self, agent: AgentId) {
self.version_cache.lock().unwrap().remove(&agent);
}
@ -172,6 +182,22 @@ pub fn build_router(state: AppState) -> Router {
pub fn build_router_with_state(shared: Arc<AppState>) -> (Router, Arc<AppState>) {
let mut v1_router = Router::new()
.route("/health", get(get_v1_health))
.route("/desktop/status", get(get_v1_desktop_status))
.route("/desktop/start", post(post_v1_desktop_start))
.route("/desktop/stop", post(post_v1_desktop_stop))
.route("/desktop/screenshot", get(get_v1_desktop_screenshot))
.route(
"/desktop/screenshot/region",
get(get_v1_desktop_screenshot_region),
)
.route("/desktop/mouse/position", get(get_v1_desktop_mouse_position))
.route("/desktop/mouse/move", post(post_v1_desktop_mouse_move))
.route("/desktop/mouse/click", post(post_v1_desktop_mouse_click))
.route("/desktop/mouse/drag", post(post_v1_desktop_mouse_drag))
.route("/desktop/mouse/scroll", post(post_v1_desktop_mouse_scroll))
.route("/desktop/keyboard/type", post(post_v1_desktop_keyboard_type))
.route("/desktop/keyboard/press", post(post_v1_desktop_keyboard_press))
.route("/desktop/display/info", get(get_v1_desktop_display_info))
.route("/agents", get(get_v1_agents))
.route("/agents/:agent", get(get_v1_agent))
.route("/agents/:agent/install", post(post_v1_agent_install))
@ -316,12 +342,26 @@ async fn opencode_unavailable() -> Response {
pub async fn shutdown_servers(state: &Arc<AppState>) {
state.acp_proxy().shutdown_all().await;
state.opencode_server_manager().shutdown().await;
state.desktop_runtime().shutdown().await;
}
#[derive(OpenApi)]
#[openapi(
paths(
get_v1_health,
get_v1_desktop_status,
post_v1_desktop_start,
post_v1_desktop_stop,
get_v1_desktop_screenshot,
get_v1_desktop_screenshot_region,
get_v1_desktop_mouse_position,
post_v1_desktop_mouse_move,
post_v1_desktop_mouse_click,
post_v1_desktop_mouse_drag,
post_v1_desktop_mouse_scroll,
post_v1_desktop_keyboard_type,
post_v1_desktop_keyboard_press,
get_v1_desktop_display_info,
get_v1_agents,
get_v1_agent,
post_v1_agent_install,
@ -360,6 +400,24 @@ pub async fn shutdown_servers(state: &Arc<AppState>) {
components(
schemas(
HealthResponse,
DesktopState,
DesktopResolution,
DesktopErrorInfo,
DesktopProcessInfo,
DesktopStatusResponse,
DesktopStartRequest,
DesktopScreenshotQuery,
DesktopRegionScreenshotQuery,
DesktopMousePositionResponse,
DesktopMouseButton,
DesktopMouseMoveRequest,
DesktopMouseClickRequest,
DesktopMouseDragRequest,
DesktopMouseScrollRequest,
DesktopKeyboardTypeRequest,
DesktopKeyboardPressRequest,
DesktopActionResponse,
DesktopDisplayInfoResponse,
ServerStatus,
ServerStatusInfo,
AgentCapabilities,
@ -438,6 +496,12 @@ impl From<ProblemDetails> for ApiError {
}
}
impl From<DesktopProblem> for ApiError {
fn from(value: DesktopProblem) -> Self {
Self::Problem(value.to_problem_details())
}
}
impl IntoResponse for ApiError {
fn into_response(self) -> Response {
let problem = match &self {
@ -476,6 +540,305 @@ async fn get_v1_health() -> Json<HealthResponse> {
})
}
/// Get desktop runtime status.
///
/// Returns the current desktop runtime state, dependency status, active
/// display metadata, and supervised process information.
#[utoipa::path(
get,
path = "/v1/desktop/status",
tag = "v1",
responses(
(status = 200, description = "Desktop runtime status", body = DesktopStatusResponse),
(status = 401, description = "Authentication required", body = ProblemDetails)
)
)]
async fn get_v1_desktop_status(
State(state): State<Arc<AppState>>,
) -> Result<Json<DesktopStatusResponse>, ApiError> {
Ok(Json(state.desktop_runtime().status().await))
}
/// Start the private desktop runtime.
///
/// Lazily launches the managed Xvfb/openbox stack, validates display health,
/// and returns the resulting desktop status snapshot.
#[utoipa::path(
post,
path = "/v1/desktop/start",
tag = "v1",
request_body = DesktopStartRequest,
responses(
(status = 200, description = "Desktop runtime status after start", body = DesktopStatusResponse),
(status = 400, description = "Invalid desktop start request", body = ProblemDetails),
(status = 409, description = "Desktop runtime is already transitioning", body = ProblemDetails),
(status = 501, description = "Desktop API unsupported on this platform", body = ProblemDetails),
(status = 503, description = "Desktop runtime could not be started", body = ProblemDetails)
)
)]
async fn post_v1_desktop_start(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopStartRequest>,
) -> Result<Json<DesktopStatusResponse>, ApiError> {
let status = state.desktop_runtime().start(body).await?;
Ok(Json(status))
}
/// Stop the private desktop runtime.
///
/// Terminates the managed openbox/Xvfb/dbus processes owned by the desktop
/// runtime and returns the resulting status snapshot.
#[utoipa::path(
post,
path = "/v1/desktop/stop",
tag = "v1",
responses(
(status = 200, description = "Desktop runtime status after stop", body = DesktopStatusResponse),
(status = 409, description = "Desktop runtime is already transitioning", body = ProblemDetails)
)
)]
async fn post_v1_desktop_stop(
State(state): State<Arc<AppState>>,
) -> Result<Json<DesktopStatusResponse>, ApiError> {
let status = state.desktop_runtime().stop().await?;
Ok(Json(status))
}
/// Capture a full desktop screenshot.
///
/// Performs a health-gated full-frame screenshot of the managed desktop and
/// returns PNG bytes.
#[utoipa::path(
get,
path = "/v1/desktop/screenshot",
tag = "v1",
responses(
(status = 200, description = "Desktop screenshot as PNG bytes"),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or screenshot capture failed", body = ProblemDetails)
)
)]
async fn get_v1_desktop_screenshot(
State(state): State<Arc<AppState>>,
) -> Result<Response, ApiError> {
let bytes = state.desktop_runtime().screenshot().await?;
Ok(([(header::CONTENT_TYPE, "image/png")], Bytes::from(bytes)).into_response())
}
/// Capture a desktop screenshot region.
///
/// Performs a health-gated screenshot crop against the managed desktop and
/// returns the requested PNG region bytes.
#[utoipa::path(
get,
path = "/v1/desktop/screenshot/region",
tag = "v1",
params(
("x" = i32, Query, description = "Region x coordinate"),
("y" = i32, Query, description = "Region y coordinate"),
("width" = u32, Query, description = "Region width"),
("height" = u32, Query, description = "Region height")
),
responses(
(status = 200, description = "Desktop screenshot region as PNG bytes"),
(status = 400, description = "Invalid screenshot region", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or screenshot capture failed", body = ProblemDetails)
)
)]
async fn get_v1_desktop_screenshot_region(
State(state): State<Arc<AppState>>,
Query(query): Query<DesktopRegionScreenshotQuery>,
) -> Result<Response, ApiError> {
let bytes = state.desktop_runtime().screenshot_region(query).await?;
Ok(([(header::CONTENT_TYPE, "image/png")], Bytes::from(bytes)).into_response())
}
/// Get the current desktop mouse position.
///
/// Performs a health-gated mouse position query against the managed desktop.
#[utoipa::path(
get,
path = "/v1/desktop/mouse/position",
tag = "v1",
responses(
(status = 200, description = "Desktop mouse position", body = DesktopMousePositionResponse),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or input check failed", body = ProblemDetails)
)
)]
async fn get_v1_desktop_mouse_position(
State(state): State<Arc<AppState>>,
) -> Result<Json<DesktopMousePositionResponse>, ApiError> {
let position = state.desktop_runtime().mouse_position().await?;
Ok(Json(position))
}
/// Move the desktop mouse.
///
/// Performs a health-gated absolute pointer move on the managed desktop and
/// returns the resulting mouse position.
#[utoipa::path(
post,
path = "/v1/desktop/mouse/move",
tag = "v1",
request_body = DesktopMouseMoveRequest,
responses(
(status = 200, description = "Desktop mouse position after move", body = DesktopMousePositionResponse),
(status = 400, description = "Invalid mouse move request", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or input failed", body = ProblemDetails)
)
)]
async fn post_v1_desktop_mouse_move(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopMouseMoveRequest>,
) -> Result<Json<DesktopMousePositionResponse>, ApiError> {
let position = state.desktop_runtime().move_mouse(body).await?;
Ok(Json(position))
}
/// Click on the desktop.
///
/// Performs a health-gated pointer move and click against the managed desktop
/// and returns the resulting mouse position.
#[utoipa::path(
post,
path = "/v1/desktop/mouse/click",
tag = "v1",
request_body = DesktopMouseClickRequest,
responses(
(status = 200, description = "Desktop mouse position after click", body = DesktopMousePositionResponse),
(status = 400, description = "Invalid mouse click request", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or input failed", body = ProblemDetails)
)
)]
async fn post_v1_desktop_mouse_click(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopMouseClickRequest>,
) -> Result<Json<DesktopMousePositionResponse>, ApiError> {
let position = state.desktop_runtime().click_mouse(body).await?;
Ok(Json(position))
}
/// Drag the desktop mouse.
///
/// Performs a health-gated drag gesture against the managed desktop and
/// returns the resulting mouse position.
#[utoipa::path(
post,
path = "/v1/desktop/mouse/drag",
tag = "v1",
request_body = DesktopMouseDragRequest,
responses(
(status = 200, description = "Desktop mouse position after drag", body = DesktopMousePositionResponse),
(status = 400, description = "Invalid mouse drag request", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or input failed", body = ProblemDetails)
)
)]
async fn post_v1_desktop_mouse_drag(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopMouseDragRequest>,
) -> Result<Json<DesktopMousePositionResponse>, ApiError> {
let position = state.desktop_runtime().drag_mouse(body).await?;
Ok(Json(position))
}
/// Scroll the desktop mouse wheel.
///
/// Performs a health-gated scroll gesture at the requested coordinates and
/// returns the resulting mouse position.
#[utoipa::path(
post,
path = "/v1/desktop/mouse/scroll",
tag = "v1",
request_body = DesktopMouseScrollRequest,
responses(
(status = 200, description = "Desktop mouse position after scroll", body = DesktopMousePositionResponse),
(status = 400, description = "Invalid mouse scroll request", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or input failed", body = ProblemDetails)
)
)]
async fn post_v1_desktop_mouse_scroll(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopMouseScrollRequest>,
) -> Result<Json<DesktopMousePositionResponse>, ApiError> {
let position = state.desktop_runtime().scroll_mouse(body).await?;
Ok(Json(position))
}
/// Type desktop keyboard text.
///
/// Performs a health-gated `xdotool type` operation against the managed
/// desktop.
#[utoipa::path(
post,
path = "/v1/desktop/keyboard/type",
tag = "v1",
request_body = DesktopKeyboardTypeRequest,
responses(
(status = 200, description = "Desktop keyboard action result", body = DesktopActionResponse),
(status = 400, description = "Invalid keyboard type request", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or input failed", body = ProblemDetails)
)
)]
async fn post_v1_desktop_keyboard_type(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopKeyboardTypeRequest>,
) -> Result<Json<DesktopActionResponse>, ApiError> {
let response = state.desktop_runtime().type_text(body).await?;
Ok(Json(response))
}
/// Press a desktop keyboard shortcut.
///
/// Performs a health-gated `xdotool key` operation against the managed
/// desktop.
#[utoipa::path(
post,
path = "/v1/desktop/keyboard/press",
tag = "v1",
request_body = DesktopKeyboardPressRequest,
responses(
(status = 200, description = "Desktop keyboard action result", body = DesktopActionResponse),
(status = 400, description = "Invalid keyboard press request", body = ProblemDetails),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or input failed", body = ProblemDetails)
)
)]
async fn post_v1_desktop_keyboard_press(
State(state): State<Arc<AppState>>,
Json(body): Json<DesktopKeyboardPressRequest>,
) -> Result<Json<DesktopActionResponse>, ApiError> {
let response = state.desktop_runtime().press_key(body).await?;
Ok(Json(response))
}
/// Get desktop display information.
///
/// Performs a health-gated display query against the managed desktop and
/// returns the current display identifier and resolution.
#[utoipa::path(
get,
path = "/v1/desktop/display/info",
tag = "v1",
responses(
(status = 200, description = "Desktop display information", body = DesktopDisplayInfoResponse),
(status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
(status = 503, description = "Desktop runtime health or display query failed", body = ProblemDetails)
)
)]
async fn get_v1_desktop_display_info(
State(state): State<Arc<AppState>>,
) -> Result<Json<DesktopDisplayInfoResponse>, ApiError> {
let info = state.desktop_runtime().display_info().await?;
Ok(Json(info))
}
#[utoipa::path(
get,
path = "/v1/agents",

View file

@ -50,6 +50,15 @@ struct EnvVarGuard {
previous: Option<std::ffi::OsString>,
}
struct FakeDesktopEnv {
_temp: TempDir,
_path: EnvVarGuard,
_xdg_state_home: EnvVarGuard,
_assume_linux: EnvVarGuard,
_display_num: EnvVarGuard,
_fake_state_dir: EnvVarGuard,
}
struct LiveServer {
address: SocketAddr,
shutdown_tx: Option<oneshot::Sender<()>>,
@ -167,6 +176,153 @@ exit 0
);
}
fn setup_fake_desktop_env() -> FakeDesktopEnv {
let temp = tempfile::tempdir().expect("create fake desktop tempdir");
let bin_dir = temp.path().join("bin");
let xdg_state_home = temp.path().join("state");
let fake_state_dir = temp.path().join("desktop-state");
fs::create_dir_all(&bin_dir).expect("create fake desktop bin dir");
fs::create_dir_all(&xdg_state_home).expect("create xdg state home");
fs::create_dir_all(&fake_state_dir).expect("create fake state dir");
write_executable(
&bin_dir.join("Xvfb"),
r#"#!/usr/bin/env sh
set -eu
display="${1:-:99}"
number="${display#:}"
socket="/tmp/.X11-unix/X${number}"
mkdir -p /tmp/.X11-unix
touch "$socket"
cleanup() {
rm -f "$socket"
exit 0
}
trap cleanup INT TERM EXIT
while :; do
sleep 1
done
"#,
);
write_executable(
&bin_dir.join("openbox"),
r#"#!/usr/bin/env sh
set -eu
trap 'exit 0' INT TERM
while :; do
sleep 1
done
"#,
);
write_executable(
&bin_dir.join("dbus-launch"),
r#"#!/usr/bin/env sh
set -eu
echo "DBUS_SESSION_BUS_ADDRESS=unix:path=/tmp/sandbox-agent-test-bus"
echo "DBUS_SESSION_BUS_PID=$$"
"#,
);
write_executable(
&bin_dir.join("xrandr"),
r#"#!/usr/bin/env sh
set -eu
cat <<'EOF'
Screen 0: minimum 1 x 1, current 1440 x 900, maximum 32767 x 32767
EOF
"#,
);
write_executable(
&bin_dir.join("import"),
r#"#!/usr/bin/env sh
set -eu
printf '\211PNG\r\n\032\n\000\000\000\rIHDR\000\000\000\001\000\000\000\001\010\006\000\000\000\037\025\304\211\000\000\000\013IDATx\234c\000\001\000\000\005\000\001\r\n-\264\000\000\000\000IEND\256B`\202'
"#,
);
write_executable(
&bin_dir.join("xdotool"),
r#"#!/usr/bin/env sh
set -eu
state_dir="${SANDBOX_AGENT_DESKTOP_FAKE_STATE_DIR:?missing fake state dir}"
state_file="${state_dir}/mouse"
mkdir -p "$state_dir"
if [ ! -f "$state_file" ]; then
printf '0 0\n' > "$state_file"
fi
read_state() {
read -r x y < "$state_file"
}
write_state() {
printf '%s %s\n' "$1" "$2" > "$state_file"
}
command="${1:-}"
case "$command" in
getmouselocation)
read_state
printf 'X=%s\nY=%s\nSCREEN=0\nWINDOW=0\n' "$x" "$y"
;;
mousemove)
shift
x="${1:-0}"
y="${2:-0}"
shift 2 || true
while [ "$#" -gt 0 ]; do
token="$1"
shift
case "$token" in
mousemove)
x="${1:-0}"
y="${2:-0}"
shift 2 || true
;;
mousedown|mouseup)
shift 1 || true
;;
click)
if [ "${1:-}" = "--repeat" ]; then
shift 2 || true
fi
shift 1 || true
;;
esac
done
write_state "$x" "$y"
;;
type|key)
exit 0
;;
*)
exit 0
;;
esac
"#,
);
let original_path = std::env::var_os("PATH").unwrap_or_default();
let mut paths = vec![bin_dir];
paths.extend(std::env::split_paths(&original_path));
let merged_path = std::env::join_paths(paths).expect("join PATH");
FakeDesktopEnv {
_temp: temp,
_path: EnvVarGuard::set_os("PATH", merged_path.as_os_str()),
_xdg_state_home: EnvVarGuard::set_os("XDG_STATE_HOME", xdg_state_home.as_os_str()),
_assume_linux: EnvVarGuard::set("SANDBOX_AGENT_DESKTOP_TEST_ASSUME_LINUX", "1"),
_display_num: EnvVarGuard::set("SANDBOX_AGENT_DESKTOP_DISPLAY_NUM", "190"),
_fake_state_dir: EnvVarGuard::set_os(
"SANDBOX_AGENT_DESKTOP_FAKE_STATE_DIR",
fake_state_dir.as_os_str(),
),
}
}
fn serve_registry_once(document: Value) -> String {
let listener = TcpListener::bind("127.0.0.1:0").expect("bind registry server");
let address = listener.local_addr().expect("registry address");
@ -375,5 +531,7 @@ mod acp_transport;
mod config_endpoints;
#[path = "v1_api/control_plane.rs"]
mod control_plane;
#[path = "v1_api/desktop.rs"]
mod desktop;
#[path = "v1_api/processes.rs"]
mod processes;

View file

@ -0,0 +1,222 @@
use super::*;
use serial_test::serial;
#[tokio::test]
#[serial]
async fn v1_desktop_status_reports_install_required_when_dependencies_are_missing() {
let temp = tempfile::tempdir().expect("create empty path tempdir");
let _path = EnvVarGuard::set_os("PATH", temp.path().as_os_str());
let _assume_linux = EnvVarGuard::set("SANDBOX_AGENT_DESKTOP_TEST_ASSUME_LINUX", "1");
let test_app = TestApp::new(AuthConfig::disabled());
let (status, _, body) = send_request(
&test_app.app,
Method::GET,
"/v1/desktop/status",
None,
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
let parsed = parse_json(&body);
assert_eq!(parsed["state"], "install_required");
assert!(parsed["missingDependencies"]
.as_array()
.expect("missingDependencies array")
.iter()
.any(|value| value == "Xvfb"));
assert_eq!(
parsed["installCommand"],
"sandbox-agent install desktop --yes"
);
}
#[tokio::test]
#[serial]
async fn v1_desktop_lifecycle_and_actions_work_with_fake_runtime() {
let _fake = setup_fake_desktop_env();
let test_app = TestApp::new(AuthConfig::disabled());
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/desktop/start",
Some(json!({
"width": 1440,
"height": 900,
"dpi": 96
})),
&[],
)
.await;
assert_eq!(
status,
StatusCode::OK,
"unexpected start response: {}",
String::from_utf8_lossy(&body)
);
let parsed = parse_json(&body);
assert_eq!(parsed["state"], "active");
let display = parsed["display"].as_str().expect("desktop display").to_string();
assert!(display.starts_with(':'));
assert_eq!(parsed["resolution"]["width"], 1440);
assert_eq!(parsed["resolution"]["height"], 900);
let (status, headers, body) = send_request_raw(
&test_app.app,
Method::GET,
"/v1/desktop/screenshot",
None,
&[],
None,
)
.await;
assert_eq!(status, StatusCode::OK);
assert_eq!(
headers
.get(header::CONTENT_TYPE)
.and_then(|value| value.to_str().ok()),
Some("image/png")
);
assert!(body.starts_with(b"\x89PNG\r\n\x1a\n"));
let (status, _, body) = send_request_raw(
&test_app.app,
Method::GET,
"/v1/desktop/screenshot/region?x=10&y=20&width=30&height=40",
None,
&[],
None,
)
.await;
assert_eq!(status, StatusCode::OK);
assert!(body.starts_with(b"\x89PNG\r\n\x1a\n"));
let (status, _, body) = send_request(
&test_app.app,
Method::GET,
"/v1/desktop/display/info",
None,
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
let display_info = parse_json(&body);
assert_eq!(display_info["display"], display);
assert_eq!(display_info["resolution"]["width"], 1440);
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/desktop/mouse/move",
Some(json!({ "x": 400, "y": 300 })),
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
let mouse = parse_json(&body);
assert_eq!(mouse["x"], 400);
assert_eq!(mouse["y"], 300);
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/desktop/mouse/drag",
Some(json!({
"startX": 100,
"startY": 110,
"endX": 220,
"endY": 230,
"button": "left"
})),
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
let dragged = parse_json(&body);
assert_eq!(dragged["x"], 220);
assert_eq!(dragged["y"], 230);
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/desktop/mouse/click",
Some(json!({
"x": 220,
"y": 230,
"button": "left",
"clickCount": 1
})),
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
let clicked = parse_json(&body);
assert_eq!(clicked["x"], 220);
assert_eq!(clicked["y"], 230);
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/desktop/mouse/scroll",
Some(json!({
"x": 220,
"y": 230,
"deltaY": -3
})),
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
let scrolled = parse_json(&body);
assert_eq!(scrolled["x"], 220);
assert_eq!(scrolled["y"], 230);
let (status, _, body) = send_request(
&test_app.app,
Method::GET,
"/v1/desktop/mouse/position",
None,
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
let position = parse_json(&body);
assert_eq!(position["x"], 220);
assert_eq!(position["y"], 230);
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/desktop/keyboard/type",
Some(json!({ "text": "hello world", "delayMs": 5 })),
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
assert_eq!(parse_json(&body)["ok"], true);
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/desktop/keyboard/press",
Some(json!({ "key": "ctrl+l" })),
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
assert_eq!(parse_json(&body)["ok"], true);
let (status, _, body) = send_request(
&test_app.app,
Method::POST,
"/v1/desktop/stop",
None,
&[],
)
.await;
assert_eq!(status, StatusCode::OK);
assert_eq!(parse_json(&body)["state"], "inactive");
}