Phase 4: mouse + keyboard input via enigo

- Add enigo 0.6 dependency (x11rb/XTest backend)
- Enigo field in X11Backend for input simulation
- Click, double-click at absolute coords or @wN ref centers
- Type text into focused window, press individual keys
- Hotkey combinations (modifier press, key click, modifier release)
- Mouse move, scroll (vertical/horizontal), drag operations
- parse_key() mapping human-readable names to enigo Key values
- Handler dispatchers with ref resolution and coord parsing
This commit is contained in:
Harivansh Rathi 2026-03-24 21:33:30 -04:00
parent 0072a260b8
commit 314a11bcba
4 changed files with 622 additions and 49 deletions

View file

@ -1,31 +1,34 @@
use anyhow::{Context, Result};
use enigo::{
Axis, Button, Coordinate, Direction, Enigo, Key, Keyboard, Mouse, Settings,
};
use crate::core::types::{Snapshot, WindowInfo};
use super::annotate::annotate_screenshot;
use crate::core::types::{Snapshot, WindowInfo};
pub struct X11Backend {
// enigo and x11rb connections added in later phases
enigo: Enigo,
}
impl X11Backend {
pub fn new() -> Result<Self> {
Ok(Self {})
let enigo = Enigo::new(&Settings::default())
.map_err(|e| anyhow::anyhow!("Failed to initialize enigo: {e}"))?;
Ok(Self { enigo })
}
}
impl super::DesktopBackend for X11Backend {
fn snapshot(&mut self, annotate: bool) -> Result<Snapshot> {
// Get z-ordered window list via xcap (topmost first internally)
let windows = xcap::Window::all()
.context("Failed to enumerate windows")?;
let windows = xcap::Window::all().context("Failed to enumerate windows")?;
// Get primary monitor for screenshot
let monitors = xcap::Monitor::all()
.context("Failed to enumerate monitors")?;
let monitor = monitors.into_iter().next()
.context("No monitor found")?;
let monitors = xcap::Monitor::all().context("Failed to enumerate monitors")?;
let monitor = monitors.into_iter().next().context("No monitor found")?;
let mut image = monitor.capture_image()
let mut image = monitor
.capture_image()
.context("Failed to capture screenshot")?;
// Build window info list
@ -78,7 +81,8 @@ impl super::DesktopBackend for X11Backend {
.unwrap_or_default()
.as_millis();
let screenshot_path = format!("/tmp/desktop-ctl-{timestamp}.png");
image.save(&screenshot_path)
image
.save(&screenshot_path)
.context("Failed to save screenshot")?;
Ok(Snapshot {
@ -87,7 +91,7 @@ impl super::DesktopBackend for X11Backend {
})
}
// Stub implementations for methods added in later phases
// Phase 5: window management (stub)
fn focus_window(&mut self, _xcb_id: u32) -> Result<()> {
anyhow::bail!("Window management not yet implemented (Phase 5)")
}
@ -104,38 +108,120 @@ impl super::DesktopBackend for X11Backend {
anyhow::bail!("Window management not yet implemented (Phase 5)")
}
fn click(&mut self, _x: i32, _y: i32) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
// Phase 4: input simulation via enigo
fn click(&mut self, x: i32, y: i32) -> Result<()> {
self.enigo
.move_mouse(x, y, Coordinate::Abs)
.map_err(|e| anyhow::anyhow!("Mouse move failed: {e}"))?;
std::thread::sleep(std::time::Duration::from_millis(10));
self.enigo
.button(Button::Left, Direction::Click)
.map_err(|e| anyhow::anyhow!("Click failed: {e}"))?;
Ok(())
}
fn dblclick(&mut self, _x: i32, _y: i32) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
fn dblclick(&mut self, x: i32, y: i32) -> Result<()> {
self.enigo
.move_mouse(x, y, Coordinate::Abs)
.map_err(|e| anyhow::anyhow!("Mouse move failed: {e}"))?;
std::thread::sleep(std::time::Duration::from_millis(10));
self.enigo
.button(Button::Left, Direction::Click)
.map_err(|e| anyhow::anyhow!("First click failed: {e}"))?;
std::thread::sleep(std::time::Duration::from_millis(50));
self.enigo
.button(Button::Left, Direction::Click)
.map_err(|e| anyhow::anyhow!("Second click failed: {e}"))?;
Ok(())
}
fn type_text(&mut self, _text: &str) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
fn type_text(&mut self, text: &str) -> Result<()> {
self.enigo
.text(text)
.map_err(|e| anyhow::anyhow!("Type failed: {e}"))?;
Ok(())
}
fn press_key(&mut self, _key: &str) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
fn press_key(&mut self, key: &str) -> Result<()> {
let k = parse_key(key)?;
self.enigo
.key(k, Direction::Click)
.map_err(|e| anyhow::anyhow!("Key press failed: {e}"))?;
Ok(())
}
fn hotkey(&mut self, _keys: &[String]) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
fn hotkey(&mut self, keys: &[String]) -> Result<()> {
// Press all modifier keys, click the last key, release modifiers in reverse
let parsed: Vec<Key> = keys
.iter()
.map(|k| parse_key(k))
.collect::<Result<Vec<_>>>()?;
if parsed.is_empty() {
anyhow::bail!("No keys specified for hotkey");
}
let (modifiers, tail) = parsed.split_at(parsed.len() - 1);
for m in modifiers {
self.enigo
.key(*m, Direction::Press)
.map_err(|e| anyhow::anyhow!("Modifier press failed: {e}"))?;
}
self.enigo
.key(tail[0], Direction::Click)
.map_err(|e| anyhow::anyhow!("Key click failed: {e}"))?;
for m in modifiers.iter().rev() {
self.enigo
.key(*m, Direction::Release)
.map_err(|e| anyhow::anyhow!("Modifier release failed: {e}"))?;
}
Ok(())
}
fn mouse_move(&mut self, _x: i32, _y: i32) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
fn mouse_move(&mut self, x: i32, y: i32) -> Result<()> {
self.enigo
.move_mouse(x, y, Coordinate::Abs)
.map_err(|e| anyhow::anyhow!("Mouse move failed: {e}"))?;
Ok(())
}
fn scroll(&mut self, _amount: i32, _axis: &str) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
fn scroll(&mut self, amount: i32, axis: &str) -> Result<()> {
let ax = match axis {
"horizontal" | "h" => Axis::Horizontal,
_ => Axis::Vertical,
};
self.enigo
.scroll(amount, ax)
.map_err(|e| anyhow::anyhow!("Scroll failed: {e}"))?;
Ok(())
}
fn drag(&mut self, _x1: i32, _y1: i32, _x2: i32, _y2: i32) -> Result<()> {
anyhow::bail!("Input simulation not yet implemented (Phase 4)")
fn drag(&mut self, x1: i32, y1: i32, x2: i32, y2: i32) -> Result<()> {
self.enigo
.move_mouse(x1, y1, Coordinate::Abs)
.map_err(|e| anyhow::anyhow!("Mouse move failed: {e}"))?;
std::thread::sleep(std::time::Duration::from_millis(10));
self.enigo
.button(Button::Left, Direction::Press)
.map_err(|e| anyhow::anyhow!("Button press failed: {e}"))?;
std::thread::sleep(std::time::Duration::from_millis(50));
self.enigo
.move_mouse(x2, y2, Coordinate::Abs)
.map_err(|e| anyhow::anyhow!("Mouse move to target failed: {e}"))?;
std::thread::sleep(std::time::Duration::from_millis(10));
self.enigo
.button(Button::Left, Direction::Release)
.map_err(|e| anyhow::anyhow!("Button release failed: {e}"))?;
Ok(())
}
// Phase 6: utility stubs
fn screen_size(&self) -> Result<(u32, u32)> {
anyhow::bail!("Utility commands not yet implemented (Phase 6)")
}
@ -152,3 +238,52 @@ impl super::DesktopBackend for X11Backend {
anyhow::bail!("Launch not yet implemented (Phase 6)")
}
}
fn parse_key(name: &str) -> Result<Key> {
match name.to_lowercase().as_str() {
// Modifiers
"ctrl" | "control" => Ok(Key::Control),
"alt" => Ok(Key::Alt),
"shift" => Ok(Key::Shift),
"super" | "meta" | "win" => Ok(Key::Meta),
// Navigation / editing
"enter" | "return" => Ok(Key::Return),
"tab" => Ok(Key::Tab),
"escape" | "esc" => Ok(Key::Escape),
"backspace" => Ok(Key::Backspace),
"delete" | "del" => Ok(Key::Delete),
"space" => Ok(Key::Space),
// Arrow keys
"up" => Ok(Key::UpArrow),
"down" => Ok(Key::DownArrow),
"left" => Ok(Key::LeftArrow),
"right" => Ok(Key::RightArrow),
// Page navigation
"home" => Ok(Key::Home),
"end" => Ok(Key::End),
"pageup" => Ok(Key::PageUp),
"pagedown" => Ok(Key::PageDown),
// Function keys
"f1" => Ok(Key::F1),
"f2" => Ok(Key::F2),
"f3" => Ok(Key::F3),
"f4" => Ok(Key::F4),
"f5" => Ok(Key::F5),
"f6" => Ok(Key::F6),
"f7" => Ok(Key::F7),
"f8" => Ok(Key::F8),
"f9" => Ok(Key::F9),
"f10" => Ok(Key::F10),
"f11" => Ok(Key::F11),
"f12" => Ok(Key::F12),
// Single character - map to Unicode key
s if s.len() == 1 => Ok(Key::Unicode(s.chars().next().unwrap())),
other => anyhow::bail!("Unknown key: {other}"),
}
}

View file

@ -12,6 +12,14 @@ pub async fn handle_request(
) -> Response {
match request.action.as_str() {
"snapshot" => handle_snapshot(request, state).await,
"click" => handle_click(request, state).await,
"dblclick" => handle_dblclick(request, state).await,
"type" => handle_type(request, state).await,
"press" => handle_press(request, state).await,
"hotkey" => handle_hotkey(request, state).await,
"mouse-move" => handle_mouse_move(request, state).await,
"mouse-scroll" => handle_mouse_scroll(request, state).await,
"mouse-drag" => handle_mouse_drag(request, state).await,
action => Response::err(format!("Unknown action: {action}")),
}
}
@ -20,7 +28,9 @@ async fn handle_snapshot(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
) -> Response {
let annotate = request.extra.get("annotate")
let annotate = request
.extra
.get("annotate")
.and_then(|v| v.as_bool())
.unwrap_or(false);
@ -50,3 +60,208 @@ async fn handle_snapshot(
Err(e) => Response::err(format!("Snapshot failed: {e}")),
}
}
async fn handle_click(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
) -> Response {
let selector = match request.extra.get("selector").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => return Response::err("Missing 'selector' field"),
};
let mut state = state.lock().await;
// Try to parse as coordinates "x,y"
if let Some((x, y)) = parse_coords(&selector) {
return match state.backend.click(x, y) {
Ok(()) => Response::ok(serde_json::json!({"clicked": {"x": x, "y": y}})),
Err(e) => Response::err(format!("Click failed: {e}")),
};
}
// Resolve as window ref
match state.ref_map.resolve_to_center(&selector) {
Some((x, y)) => match state.backend.click(x, y) {
Ok(()) => Response::ok(
serde_json::json!({"clicked": {"x": x, "y": y, "ref": selector}}),
),
Err(e) => Response::err(format!("Click failed: {e}")),
},
None => Response::err(format!("Could not resolve selector: {selector}")),
}
}
async fn handle_dblclick(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
) -> Response {
let selector = match request.extra.get("selector").and_then(|v| v.as_str()) {
Some(s) => s.to_string(),
None => return Response::err("Missing 'selector' field"),
};
let mut state = state.lock().await;
if let Some((x, y)) = parse_coords(&selector) {
return match state.backend.dblclick(x, y) {
Ok(()) => Response::ok(serde_json::json!({"double_clicked": {"x": x, "y": y}})),
Err(e) => Response::err(format!("Double-click failed: {e}")),
};
}
match state.ref_map.resolve_to_center(&selector) {
Some((x, y)) => match state.backend.dblclick(x, y) {
Ok(()) => Response::ok(
serde_json::json!({"double_clicked": {"x": x, "y": y, "ref": selector}}),
),
Err(e) => Response::err(format!("Double-click failed: {e}")),
},
None => Response::err(format!("Could not resolve selector: {selector}")),
}
}
async fn handle_type(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
) -> Response {
let text = match request.extra.get("text").and_then(|v| v.as_str()) {
Some(t) => t.to_string(),
None => return Response::err("Missing 'text' field"),
};
let mut state = state.lock().await;
match state.backend.type_text(&text) {
Ok(()) => Response::ok(serde_json::json!({"typed": text})),
Err(e) => Response::err(format!("Type failed: {e}")),
}
}
async fn handle_press(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
) -> Response {
let key = match request.extra.get("key").and_then(|v| v.as_str()) {
Some(k) => k.to_string(),
None => return Response::err("Missing 'key' field"),
};
let mut state = state.lock().await;
match state.backend.press_key(&key) {
Ok(()) => Response::ok(serde_json::json!({"pressed": key})),
Err(e) => Response::err(format!("Key press failed: {e}")),
}
}
async fn handle_hotkey(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
) -> Response {
let keys: Vec<String> = match request.extra.get("keys").and_then(|v| v.as_array()) {
Some(arr) => arr
.iter()
.filter_map(|v| v.as_str().map(|s| s.to_string()))
.collect(),
None => return Response::err("Missing 'keys' field"),
};
let mut state = state.lock().await;
match state.backend.hotkey(&keys) {
Ok(()) => Response::ok(serde_json::json!({"hotkey": keys})),
Err(e) => Response::err(format!("Hotkey failed: {e}")),
}
}
async fn handle_mouse_move(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
) -> Response {
let x = match request.extra.get("x").and_then(|v| v.as_i64()) {
Some(v) => v as i32,
None => return Response::err("Missing 'x' field"),
};
let y = match request.extra.get("y").and_then(|v| v.as_i64()) {
Some(v) => v as i32,
None => return Response::err("Missing 'y' field"),
};
let mut state = state.lock().await;
match state.backend.mouse_move(x, y) {
Ok(()) => Response::ok(serde_json::json!({"moved": {"x": x, "y": y}})),
Err(e) => Response::err(format!("Mouse move failed: {e}")),
}
}
async fn handle_mouse_scroll(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
) -> Response {
let amount = match request.extra.get("amount").and_then(|v| v.as_i64()) {
Some(v) => v as i32,
None => return Response::err("Missing 'amount' field"),
};
let axis = request
.extra
.get("axis")
.and_then(|v| v.as_str())
.unwrap_or("vertical")
.to_string();
let mut state = state.lock().await;
match state.backend.scroll(amount, &axis) {
Ok(()) => {
Response::ok(serde_json::json!({"scrolled": {"amount": amount, "axis": axis}}))
}
Err(e) => Response::err(format!("Scroll failed: {e}")),
}
}
async fn handle_mouse_drag(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
) -> Response {
let x1 = match request.extra.get("x1").and_then(|v| v.as_i64()) {
Some(v) => v as i32,
None => return Response::err("Missing 'x1' field"),
};
let y1 = match request.extra.get("y1").and_then(|v| v.as_i64()) {
Some(v) => v as i32,
None => return Response::err("Missing 'y1' field"),
};
let x2 = match request.extra.get("x2").and_then(|v| v.as_i64()) {
Some(v) => v as i32,
None => return Response::err("Missing 'x2' field"),
};
let y2 = match request.extra.get("y2").and_then(|v| v.as_i64()) {
Some(v) => v as i32,
None => return Response::err("Missing 'y2' field"),
};
let mut state = state.lock().await;
match state.backend.drag(x1, y1, x2, y2) {
Ok(()) => Response::ok(serde_json::json!({
"dragged": {
"from": {"x": x1, "y": y1},
"to": {"x": x2, "y": y2}
}
})),
Err(e) => Response::err(format!("Drag failed: {e}")),
}
}
fn parse_coords(s: &str) -> Option<(i32, i32)> {
let parts: Vec<&str> = s.split(',').collect();
if parts.len() == 2 {
let x = parts[0].trim().parse().ok()?;
let y = parts[1].trim().parse().ok()?;
Some((x, y))
} else {
None
}
}