Phase 2: snapshot - window tree + screenshot via xcap

- Add xcap and image dependencies
- DesktopBackend trait with all 16 methods for future extensibility
- X11Backend with real snapshot() using xcap Window::all() and
  Monitor::all() for z-ordered window enumeration and screenshot
- Stub implementations for input/window management (phases 4-6)
- Wire X11Backend into DaemonState (now returns Result)
- Real snapshot handler replacing placeholder, updates ref map
This commit is contained in:
Harivansh Rathi 2026-03-24 21:24:34 -04:00
parent dfaa339594
commit 79e6e0e25c
8 changed files with 3041 additions and 34 deletions

2791
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -15,3 +15,5 @@ anyhow = "1"
dirs = "6" dirs = "6"
libc = "0.2" libc = "0.2"
uuid = { version = "1", features = ["v4"] } uuid = { version = "1", features = ["v4"] }
xcap = "0.8"
image = { version = "0.25", features = ["png"] }

58
src/backend/mod.rs Normal file
View file

@ -0,0 +1,58 @@
pub mod x11;
use anyhow::Result;
use crate::core::types::Snapshot;
#[allow(dead_code)]
pub trait DesktopBackend: Send {
/// Capture a screenshot and return a z-ordered window tree with @wN refs.
fn snapshot(&mut self, annotate: bool) -> Result<Snapshot>;
/// Focus a window by its X11 window ID.
fn focus_window(&mut self, xcb_id: u32) -> Result<()>;
/// Move a window to absolute coordinates.
fn move_window(&mut self, xcb_id: u32, x: i32, y: i32) -> Result<()>;
/// Resize a window.
fn resize_window(&mut self, xcb_id: u32, w: u32, h: u32) -> Result<()>;
/// Close a window gracefully.
fn close_window(&mut self, xcb_id: u32) -> Result<()>;
/// Click at absolute coordinates.
fn click(&mut self, x: i32, y: i32) -> Result<()>;
/// Double-click at absolute coordinates.
fn dblclick(&mut self, x: i32, y: i32) -> Result<()>;
/// Type text into the focused window.
fn type_text(&mut self, text: &str) -> Result<()>;
/// Press a single key by name.
fn press_key(&mut self, key: &str) -> Result<()>;
/// Send a hotkey combination.
fn hotkey(&mut self, keys: &[String]) -> Result<()>;
/// Move the mouse cursor to absolute coordinates.
fn mouse_move(&mut self, x: i32, y: i32) -> Result<()>;
/// Scroll the mouse wheel.
fn scroll(&mut self, amount: i32, axis: &str) -> Result<()>;
/// Drag from one position to another.
fn drag(&mut self, x1: i32, y1: i32, x2: i32, y2: i32) -> Result<()>;
/// Get the screen resolution.
fn screen_size(&self) -> Result<(u32, u32)>;
/// Get the current mouse position.
fn mouse_position(&self) -> Result<(i32, i32)>;
/// Take a screenshot and save to a path (no window tree).
fn screenshot(&mut self, path: &str, annotate: bool) -> Result<String>;
/// Launch an application.
fn launch(&self, command: &str, args: &[String]) -> Result<u32>;
}

148
src/backend/x11.rs Normal file
View file

@ -0,0 +1,148 @@
use anyhow::{Context, Result};
use crate::core::types::{Snapshot, WindowInfo};
pub struct X11Backend {
// enigo and x11rb connections added in later phases
}
impl X11Backend {
pub fn new() -> Result<Self> {
Ok(Self {})
}
}
impl super::DesktopBackend for X11Backend {
fn snapshot(&mut self, _annotate: bool) -> Result<Snapshot> {
// Get z-ordered window list via xcap (topmost first internally)
let windows = xcap::Window::all()
.context("Failed to enumerate windows")?;
// Get primary monitor for screenshot
let monitors = xcap::Monitor::all()
.context("Failed to enumerate monitors")?;
let monitor = monitors.into_iter().next()
.context("No monitor found")?;
let image = monitor.capture_image()
.context("Failed to capture screenshot")?;
// Save screenshot
let timestamp = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis();
let screenshot_path = format!("/tmp/desktop-ctl-{timestamp}.png");
image.save(&screenshot_path)
.context("Failed to save screenshot")?;
// Build window info list
let mut window_infos = Vec::new();
let mut ref_counter = 1usize;
for win in &windows {
// Each xcap method returns XCapResult<T> - skip windows where metadata fails
let title = win.title().unwrap_or_default();
let app_name = win.app_name().unwrap_or_default();
// Skip windows with empty titles and app names (desktop, panels, etc.)
if title.is_empty() && app_name.is_empty() {
continue;
}
let xcb_id = win.id().unwrap_or(0);
let x = win.x().unwrap_or(0);
let y = win.y().unwrap_or(0);
let width = win.width().unwrap_or(0);
let height = win.height().unwrap_or(0);
let focused = win.is_focused().unwrap_or(false);
let minimized = win.is_minimized().unwrap_or(false);
let ref_id = format!("w{ref_counter}");
ref_counter += 1;
window_infos.push(WindowInfo {
ref_id,
xcb_id,
title,
app_name,
x,
y,
width,
height,
focused,
minimized,
});
}
Ok(Snapshot {
screenshot: screenshot_path,
windows: window_infos,
})
}
// Stub implementations for methods added in later phases
fn focus_window(&mut self, _xcb_id: u32) -> Result<()> {
anyhow::bail!("Window management not yet implemented (Phase 5)")
}
fn move_window(&mut self, _xcb_id: u32, _x: i32, _y: i32) -> Result<()> {
anyhow::bail!("Window management not yet implemented (Phase 5)")
}
fn resize_window(&mut self, _xcb_id: u32, _w: u32, _h: u32) -> Result<()> {
anyhow::bail!("Window management not yet implemented (Phase 5)")
}
fn close_window(&mut self, _xcb_id: u32) -> Result<()> {
anyhow::bail!("Window management not yet implemented (Phase 5)")
}
fn click(&mut self, _x: i32, _y: i32) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
}
fn dblclick(&mut self, _x: i32, _y: i32) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
}
fn type_text(&mut self, _text: &str) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
}
fn press_key(&mut self, _key: &str) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
}
fn hotkey(&mut self, _keys: &[String]) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
}
fn mouse_move(&mut self, _x: i32, _y: i32) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
}
fn scroll(&mut self, _amount: i32, _axis: &str) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
}
fn drag(&mut self, _x1: i32, _y1: i32, _x2: i32, _y2: i32) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
}
fn screen_size(&self) -> Result<(u32, u32)> {
anyhow::bail!("Utility commands not yet implemented (Phase 6)")
}
fn mouse_position(&self) -> Result<(i32, i32)> {
anyhow::bail!("Utility commands not yet implemented (Phase 6)")
}
fn screenshot(&mut self, _path: &str, _annotate: bool) -> Result<String> {
anyhow::bail!("Standalone screenshot not yet implemented (Phase 6)")
}
fn launch(&self, _command: &str, _args: &[String]) -> Result<u32> {
anyhow::bail!("Launch not yet implemented (Phase 6)")
}
}

View file

@ -1,31 +1,52 @@
use std::sync::Arc; use std::sync::Arc;
use tokio::sync::Mutex; use tokio::sync::Mutex;
use crate::backend::DesktopBackend;
use crate::core::protocol::{Request, Response}; use crate::core::protocol::{Request, Response};
use crate::core::refs::RefEntry;
use super::state::DaemonState; use super::state::DaemonState;
pub async fn handle_request( pub async fn handle_request(
request: &Request, request: &Request,
_state: &Arc<Mutex<DaemonState>>, state: &Arc<Mutex<DaemonState>>,
) -> Response { ) -> Response {
match request.action.as_str() { match request.action.as_str() {
"snapshot" => { "snapshot" => handle_snapshot(request, state).await,
Response::ok(serde_json::json!({ action => Response::err(format!("Unknown action: {action}")),
"screenshot": "/tmp/desktop-ctl-placeholder.png", }
"windows": [ }
{
"ref_id": "w1", async fn handle_snapshot(
"xcb_id": 0, request: &Request,
"title": "Placeholder Window", state: &Arc<Mutex<DaemonState>>,
"app_name": "placeholder", ) -> Response {
"x": 0, "y": 0, "width": 1920, "height": 1080, let annotate = request.extra.get("annotate")
"focused": true, "minimized": false .and_then(|v| v.as_bool())
} .unwrap_or(false);
]
})) let mut state = state.lock().await;
}
action => { match state.backend.snapshot(annotate) {
Response::err(format!("Unknown action: {action}")) Ok(snapshot) => {
} // Update ref map
state.ref_map.clear();
for win in &snapshot.windows {
state.ref_map.insert(RefEntry {
xcb_id: win.xcb_id,
app_class: win.app_name.clone(),
title: win.title.clone(),
pid: 0, // xcap doesn't expose PID directly in snapshot
x: win.x,
y: win.y,
width: win.width,
height: win.height,
focused: win.focused,
minimized: win.minimized,
});
}
Response::ok(serde_json::to_value(&snapshot).unwrap_or_default())
}
Err(e) => Response::err(format!("Snapshot failed: {e}")),
} }
} }

View file

@ -46,7 +46,10 @@ async fn async_run() -> Result<()> {
.context(format!("Failed to bind socket: {}", socket_path.display()))?; .context(format!("Failed to bind socket: {}", socket_path.display()))?;
let session = std::env::var("DESKTOP_CTL_SESSION").unwrap_or_else(|_| "default".to_string()); let session = std::env::var("DESKTOP_CTL_SESSION").unwrap_or_else(|_| "default".to_string());
let state = Arc::new(Mutex::new(DaemonState::new(session, socket_path.clone()))); let state = Arc::new(Mutex::new(
DaemonState::new(session, socket_path.clone())
.context("Failed to initialize daemon state")?
));
let shutdown = Arc::new(tokio::sync::Notify::new()); let shutdown = Arc::new(tokio::sync::Notify::new());
let shutdown_clone = shutdown.clone(); let shutdown_clone = shutdown.clone();

View file

@ -1,4 +1,6 @@
use std::path::PathBuf; use std::path::PathBuf;
use crate::backend::x11::X11Backend;
use crate::core::refs::RefMap; use crate::core::refs::RefMap;
#[allow(dead_code)] #[allow(dead_code)]
@ -6,14 +8,17 @@ pub struct DaemonState {
pub session: String, pub session: String,
pub socket_path: PathBuf, pub socket_path: PathBuf,
pub ref_map: RefMap, pub ref_map: RefMap,
pub backend: X11Backend,
} }
impl DaemonState { impl DaemonState {
pub fn new(session: String, socket_path: PathBuf) -> Self { pub fn new(session: String, socket_path: PathBuf) -> anyhow::Result<Self> {
Self { let backend = X11Backend::new()?;
Ok(Self {
session, session,
socket_path, socket_path,
ref_map: RefMap::new(), ref_map: RefMap::new(),
} backend,
})
} }
} }

View file

@ -1,3 +1,4 @@
mod backend;
mod cli; mod cli;
mod core; mod core;
mod daemon; mod daemon;