From 3dbd9ce52d09759b0ffa96fd60061fab5535cf89 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 00:07:03 -0400 Subject: [PATCH 01/35] init with runtime contract --- CONTRIBUTING.md | 2 +- README.md | 14 +- ...{runtime-output.md => runtime-contract.md} | 0 skills/SKILL.md | 149 ------------------ skills/deskctl/SKILL.md | 132 ++++++++++++++++ skills/deskctl/references/commands.md | 75 +++++++++ skills/deskctl/references/install.md | 75 +++++++++ skills/deskctl/references/sandbox-agent.md | 61 +++++++ .../deskctl/templates/install-deskctl-npm.sh | 27 ++++ .../templates/sandbox-agent-desktop-loop.sh | 7 + 10 files changed, 390 insertions(+), 152 deletions(-) rename docs/{runtime-output.md => runtime-contract.md} (100%) delete mode 100644 skills/SKILL.md create mode 100644 skills/deskctl/SKILL.md create mode 100644 skills/deskctl/references/commands.md create mode 100644 skills/deskctl/references/install.md create mode 100644 skills/deskctl/references/sandbox-agent.md create mode 100644 skills/deskctl/templates/install-deskctl-npm.sh create mode 100644 skills/deskctl/templates/sandbox-agent-desktop-loop.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bdbce4e..926c58a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -21,7 +21,7 @@ pnpm --dir site install - `src/` holds production code and unit tests - `tests/` holds integration tests - `tests/support/` holds shared X11 and daemon helpers for integration coverage -- `docs/runtime-output.md` is the stable-vs-best-effort runtime output contract for agent-facing CLI work +- `docs/runtime-contract.md` is the stable-vs-best-effort runtime output contract for agent-facing CLI work Keep integration-only helpers out of `src/`. diff --git a/README.md b/README.md index 036396a..db7d92f 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,16 @@ npx deskctl-cli --help `deskctl-cli` currently supports `linux-x64` and installs the `deskctl` command by downloading the matching GitHub Release asset. +### Installable skill + +For `skills.sh` / agent skill ecosystems: + +```bash +npx skills add harivansh-afk/deskctl -s deskctl +``` + +The installable skill lives under [`skills/deskctl`](skills/deskctl) and is designed for X11 sandboxes, VMs, and sandbox-agent desktop sessions. It points agents to the npm install path first so they can get `deskctl` without Cargo. + ### Nix ```bash @@ -133,7 +143,7 @@ deskctl doctor - `@wN` refs are short-lived handles assigned by `snapshot` and `list-windows` - `--json` output includes a stable `window_id` for programmatic targeting within the current daemon session - `list-windows` is a cheap read-only operation and does not capture or write a screenshot -- the stable runtime JSON/error contract is documented in [docs/runtime-output.md](docs/runtime-output.md) +- the stable runtime JSON/error contract is documented in [docs/runtime-contract.md](docs/runtime-contract.md) ## Read and Wait Surface @@ -189,7 +199,7 @@ Text mode is compact and follow-up-oriented, but JSON is the parsing contract. - rely on `window_id`, selector-related fields, grouped read payloads, and structured error `kind` values for stable automation - treat monitor naming, incidental whitespace, and default screenshot file names as best-effort -See [docs/runtime-output.md](docs/runtime-output.md) for the exact stable-vs-best-effort breakdown. +See [docs/runtime-conract.md](docs/runtime-contract.md) for the exact stable-vs-best-effort breakdown. ## Distribution diff --git a/docs/runtime-output.md b/docs/runtime-contract.md similarity index 100% rename from docs/runtime-output.md rename to docs/runtime-contract.md diff --git a/skills/SKILL.md b/skills/SKILL.md deleted file mode 100644 index efbd188..0000000 --- a/skills/SKILL.md +++ /dev/null @@ -1,149 +0,0 @@ ---- -name: deskctl -description: Desktop control CLI for AI agents -allowed-tools: Bash(deskctl:*) ---- - -# deskctl - -Desktop control CLI for AI agents on Linux X11. Provides a unified interface for screenshots, mouse/keyboard input, and window management with compact `@wN` window references. - -## Core Workflow - -1. **Snapshot** to see the desktop and get window refs -2. **Query / wait** using grouped `get` and `wait` commands -3. **Act** using refs, explicit selectors, or coordinates -4. **Repeat** as needed - -## Quick Reference - -### See the Desktop - -```bash -deskctl snapshot # Screenshot + window tree with @wN refs -deskctl snapshot --annotate # Screenshot with bounding boxes and labels -deskctl snapshot --json # Structured JSON output -deskctl list-windows # Window tree without screenshot -deskctl screenshot /tmp/s.png # Screenshot only (no window tree) -deskctl get active-window # Currently focused window -deskctl get monitors # Monitor geometry -deskctl get version # deskctl version + backend -deskctl get systeminfo # Runtime-scoped diagnostics -deskctl wait window --selector 'title=Firefox' --timeout 10 -deskctl wait focus --selector 'class=firefox' --timeout 5 -``` - -### Click and Type - -```bash -deskctl click @w1 # Click center of window @w1 -deskctl click 500,300 # Click absolute coordinates -deskctl dblclick @w2 # Double-click window @w2 -deskctl type "hello world" # Type text into focused window -deskctl press enter # Press a key -deskctl hotkey ctrl c # Send Ctrl+C -deskctl hotkey ctrl shift t # Send Ctrl+Shift+T -``` - -### Mouse Control - -```bash -deskctl mouse move 500 300 # Move cursor to coordinates -deskctl mouse scroll 3 # Scroll down 3 units -deskctl mouse scroll -3 # Scroll up 3 units -deskctl mouse drag 100 100 500 500 # Drag from (100,100) to (500,500) -``` - -### Window Management - -```bash -deskctl focus @w2 # Focus window by ref -deskctl focus 'title=Firefox' # Focus by explicit title selector -deskctl focus 'class=firefox' # Focus by explicit class selector -deskctl focus "firefox" # Fuzzy substring match (fails on ambiguity) -deskctl close @w3 # Close window gracefully -deskctl move-window @w1 100 200 # Move window to position -deskctl resize-window @w1 800 600 # Resize window -``` - -### Utilities - -```bash -deskctl doctor # Diagnose X11, screenshot, and daemon health -deskctl get-screen-size # Screen resolution -deskctl get-mouse-position # Current cursor position -deskctl launch firefox # Launch an application -deskctl launch code -- --new-window # Launch with arguments -``` - -### Daemon - -```bash -deskctl daemon start # Start daemon manually -deskctl daemon stop # Stop daemon -deskctl daemon status # Check daemon status -``` - -## Global Options - -- `--json` : Output as structured JSON (all commands) -- `--session NAME` : Session name for multiple daemon instances (default: "default") -- `--socket PATH` : Custom Unix socket path - -## Output Contract - -- Prefer `--json` when an agent needs strict parsing. -- Use `window_id` for stable targeting inside a live daemon session. -- Use `ref_id` / `@wN` for quick short-lived follow-up actions after `snapshot` or `list-windows`. -- Structured JSON failures expose machine-usable `kind` values for selector and wait failures. -- The exact text formatting is intentionally compact but not the parsing contract. See `docs/runtime-output.md` for the stable field policy. - -## Window Refs - -After `snapshot` or `list-windows`, windows are assigned short refs: -- `@w1` is the topmost (usually focused) window -- `@w2`, `@w3`, etc. follow z-order (front to back) -- Refs reset on each `snapshot` call -- Use `--json` to see stable `window_id` values for programmatic tracking within the current daemon session - -## Selector Contract - -Prefer explicit selectors when an agent needs deterministic targeting: - -```bash -ref=w1 -id=win1 -title=Firefox -class=firefox -focused -``` - -Bare selectors such as `firefox` still work as fuzzy substring matches, but they now fail with candidate windows if multiple matches exist. - -## Example Agent Workflow - -```bash -# 1. See what's on screen -deskctl snapshot --annotate - -# 2. Wait for the browser and focus it deterministically -deskctl wait window --selector 'class=firefox' --timeout 10 -deskctl focus 'class=firefox' - -# 3. Navigate to a URL -deskctl hotkey ctrl l -deskctl type "https://example.com" -deskctl press enter - -# 4. Take a new snapshot to see the result -deskctl snapshot -``` - -## Key Names for press/hotkey - -Modifiers: `ctrl`, `alt`, `shift`, `super` -Navigation: `enter`, `tab`, `escape`, `backspace`, `delete`, `space` -Arrows: `up`, `down`, `left`, `right` -Page: `home`, `end`, `pageup`, `pagedown` -Function: `f1` through `f12` -Characters: any single character (e.g. `a`, `1`, `/`) diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md new file mode 100644 index 0000000..1522703 --- /dev/null +++ b/skills/deskctl/SKILL.md @@ -0,0 +1,132 @@ +--- +name: deskctl +description: Desktop control CLI for AI agents on Linux X11. Use when operating an X11 desktop in a sandbox, VM, or sandbox-agent session via screenshots, grouped get/wait commands, selectors, and mouse or keyboard input. Prefer this skill when the task is "control the desktop", "inspect windows", "wait for a window", "click/type in the sandbox desktop", or "use deskctl inside sandbox-agent". +allowed-tools: Bash(deskctl:*), Bash(npx deskctl-cli:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*), Bash(sandbox-agent:*) +--- + +# deskctl + +`deskctl` is a non-interactive desktop control CLI for Linux X11 agents. It works well inside sandbox-agent desktop environments because it gives agents a tight `observe -> wait -> act -> verify` loop. + +## Install skill (optional) + +### npx + +```bash +npx skills add harivansh-afk/deskctl -s deskctl +``` + +### bunx + +```bash +bunx skills add harivansh-afk/deskctl -s deskctl +``` + +## Install the CLI + +Preferred install path: + +```bash +npm install -g deskctl-cli +deskctl --help +``` + +If global npm installs are not writable, use a user prefix: + +```bash +mkdir -p "$HOME/.local/bin" +npm install -g --prefix "$HOME/.local" deskctl-cli +export PATH="$HOME/.local/bin:$PATH" +deskctl --help +``` + +One-shot usage also works: + +```bash +npx deskctl-cli --help +``` + +For install details and fallback paths, see [references/install.md](references/install.md). + +## Sandbox-Agent Notes + +Before using `deskctl` inside sandbox-agent: + +1. Make sure the sandbox has desktop runtime packages installed. +2. Make sure the session is actually running X11. +3. Run `deskctl doctor` before trying to click or type. + +Typical sandbox-agent prep: + +```bash +sandbox-agent install desktop --yes +deskctl doctor +``` + +If `doctor` fails, inspect `DISPLAY`, `XDG_SESSION_TYPE`, and whether the sandbox actually has a desktop session. See [references/sandbox-agent.md](references/sandbox-agent.md). + +## Core Workflow + +Every desktop task should follow this loop: + +1. **Observe** +2. **Target** +3. **Wait** +4. **Act** +5. **Verify** + +```bash +deskctl doctor +deskctl snapshot --annotate +deskctl get active-window +deskctl wait window --selector 'class=firefox' --timeout 10 +deskctl focus 'class=firefox' +deskctl hotkey ctrl l +deskctl type "https://example.com" +deskctl press enter +deskctl snapshot +``` + +## What To Reach For First + +- `deskctl doctor` +- `deskctl snapshot --annotate` +- `deskctl list-windows` +- `deskctl get active-window` +- `deskctl wait window --selector ...` +- `deskctl wait focus --selector ...` + +Use `--json` when you need strict parsing. Use explicit selectors when you need deterministic targeting. + +## Selector Rules + +Prefer explicit selectors: + +```bash +ref=w1 +id=win1 +title=Firefox +class=firefox +focused +``` + +Legacy refs still work: + +```bash +@w1 +w1 +win1 +``` + +Bare strings such as `firefox` are fuzzy substring selectors. They fail on ambiguity instead of silently picking the wrong window. + +## References + +- [references/install.md](references/install.md) - install paths, npm-first bootstrap, runtime prerequisites +- [references/commands.md](references/commands.md) - grouped reads, waits, selectors, and core action commands +- [references/sandbox-agent.md](references/sandbox-agent.md) - using `deskctl` inside sandbox-agent desktop sessions + +## Templates + +- [templates/install-deskctl-npm.sh](templates/install-deskctl-npm.sh) - install `deskctl-cli` into a user prefix +- [templates/sandbox-agent-desktop-loop.sh](templates/sandbox-agent-desktop-loop.sh) - minimal observe/wait/act loop for desktop tasks diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md new file mode 100644 index 0000000..2d2dc1f --- /dev/null +++ b/skills/deskctl/references/commands.md @@ -0,0 +1,75 @@ +# deskctl command guide + +## Observe + +```bash +deskctl doctor +deskctl snapshot +deskctl snapshot --annotate +deskctl list-windows +deskctl screenshot /tmp/current.png +deskctl get active-window +deskctl get monitors +deskctl get version +deskctl get systeminfo +``` + +Use `snapshot --annotate` when you need both the screenshot artifact and the short `@wN` labels. Use `list-windows` when you only need the window tree and do not want screenshot side effects. + +## Wait + +```bash +deskctl wait window --selector 'title=Firefox' --timeout 10 +deskctl wait focus --selector 'class=firefox' --timeout 5 +``` + +Wait commands return the matched window payload on success. In `--json` mode, failures include structured `kind` values so the caller can recover without string parsing. + +## Selectors + +Prefer explicit selectors: + +```bash +ref=w1 +id=win1 +title=Firefox +class=firefox +focused +``` + +Legacy refs still work: + +```bash +@w1 +w1 +win1 +``` + +Bare fuzzy selectors such as `firefox` are supported, but they fail on ambiguity. + +## Act + +```bash +deskctl focus 'class=firefox' +deskctl click @w1 +deskctl dblclick @w2 +deskctl type "hello world" +deskctl press enter +deskctl hotkey ctrl shift t +deskctl mouse move 500 300 +deskctl mouse scroll 3 +deskctl mouse drag 100 100 500 500 +deskctl move-window @w1 100 120 +deskctl resize-window @w1 1280 720 +deskctl close @w3 +deskctl launch firefox +``` + +## Agent loop + +The safe pattern is: + +1. Observe with `snapshot`, `list-windows`, or `get ...` +2. Wait for the target window if needed +3. Act using explicit selectors or refs +4. Snapshot again to verify the result diff --git a/skills/deskctl/references/install.md b/skills/deskctl/references/install.md new file mode 100644 index 0000000..cb97a5c --- /dev/null +++ b/skills/deskctl/references/install.md @@ -0,0 +1,75 @@ +# Install `deskctl` + +`deskctl` is designed to be used non-interactively by agents. The easiest install path is the npm package because it installs the `deskctl` command directly from GitHub Release assets without needing Cargo on the target machine. + +## Preferred: npm global install + +```bash +npm install -g deskctl-cli +deskctl --help +``` + +This is the preferred path for sandboxes, VMs, and sandbox-agent sessions where Node/npm already exists. + +## User-prefix npm install + +If global npm installs are not writable: + +```bash +mkdir -p "$HOME/.local/bin" +npm install -g --prefix "$HOME/.local" deskctl-cli +export PATH="$HOME/.local/bin:$PATH" +deskctl --help +``` + +This avoids `sudo` and keeps the install inside the user home directory. + +## One-shot npm execution + +```bash +npx deskctl-cli --help +``` + +Use this for quick testing. For repeated desktop control, install the command once so the runtime is predictable. + +## Fallback: Cargo + +```bash +cargo install deskctl +``` + +Use this only when the machine already has a Rust toolchain or when you explicitly want a source build. + +## Fallback: local Docker build + +If you need a Linux binary from macOS or another non-Linux host: + +```bash +docker compose -f docker/docker-compose.yml run --rm build +``` + +Then copy `dist/deskctl-linux-x86_64` into the target machine. + +## Runtime prerequisites + +`deskctl` needs: + +- Linux +- X11 +- a valid `DISPLAY` +- a working desktop/window-manager session + +Quick verification: + +```bash +printenv DISPLAY +printenv XDG_SESSION_TYPE +deskctl doctor +``` + +Inside sandbox-agent, you may need to install desktop dependencies first: + +```bash +sandbox-agent install desktop --yes +deskctl doctor +``` diff --git a/skills/deskctl/references/sandbox-agent.md b/skills/deskctl/references/sandbox-agent.md new file mode 100644 index 0000000..d994062 --- /dev/null +++ b/skills/deskctl/references/sandbox-agent.md @@ -0,0 +1,61 @@ +# deskctl inside sandbox-agent + +Use `deskctl` when the sandbox-agent session includes a Linux desktop and you want a tight local desktop-control loop from the shell. + +## When it fits + +`deskctl` is a good fit when: + +- the sandbox already has an X11 desktop session +- you want fast local desktop control from inside the sandbox +- you want short-lived refs like `@w1` and grouped `get` or `wait` primitives + +It is not a replacement for sandbox-agent session orchestration itself. Use sandbox-agent to provision the sandbox and desktop runtime, then use `deskctl` inside that environment to control the GUI. + +## Minimal bootstrap + +```bash +sandbox-agent install desktop --yes +npm install -g deskctl-cli +deskctl doctor +deskctl snapshot --annotate +``` + +If npm global installs are not writable: + +```bash +mkdir -p "$HOME/.local/bin" +npm install -g --prefix "$HOME/.local" deskctl-cli +export PATH="$HOME/.local/bin:$PATH" +deskctl doctor +``` + +## Expected environment + +Check: + +```bash +printenv DISPLAY +printenv XDG_SESSION_TYPE +deskctl --json get systeminfo +``` + +Healthy `deskctl` usage usually means: + +- `DISPLAY` is set +- `XDG_SESSION_TYPE=x11` +- `deskctl doctor` succeeds + +## Recommended workflow + +```bash +deskctl snapshot --annotate +deskctl wait window --selector 'class=firefox' --timeout 10 +deskctl focus 'class=firefox' +deskctl hotkey ctrl l +deskctl type "https://example.com" +deskctl press enter +deskctl snapshot +``` + +Prefer `--json` for strict machine parsing and explicit selectors for deterministic targeting. diff --git a/skills/deskctl/templates/install-deskctl-npm.sh b/skills/deskctl/templates/install-deskctl-npm.sh new file mode 100644 index 0000000..a0ab596 --- /dev/null +++ b/skills/deskctl/templates/install-deskctl-npm.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -euo pipefail + +if command -v deskctl >/dev/null 2>&1; then + echo "deskctl already installed: $(command -v deskctl)" + exit 0 +fi + +if ! command -v npm >/dev/null 2>&1; then + echo "npm is required for the preferred deskctl install path" + exit 1 +fi + +prefix="${DESKCTL_NPM_PREFIX:-$HOME/.local}" +bin_dir="$prefix/bin" + +mkdir -p "$bin_dir" +npm install -g --prefix "$prefix" deskctl-cli + +if ! command -v deskctl >/dev/null 2>&1; then + echo "deskctl installed to $bin_dir" + echo "add this to PATH if needed:" + echo "export PATH=\"$bin_dir:\$PATH\"" +fi + +"$bin_dir/deskctl" --help >/dev/null 2>&1 || true +echo "deskctl bootstrap complete" diff --git a/skills/deskctl/templates/sandbox-agent-desktop-loop.sh b/skills/deskctl/templates/sandbox-agent-desktop-loop.sh new file mode 100644 index 0000000..f47dbb8 --- /dev/null +++ b/skills/deskctl/templates/sandbox-agent-desktop-loop.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail + +deskctl doctor +deskctl snapshot --annotate +deskctl get active-window +deskctl wait window --selector "${1:-focused}" --timeout "${2:-5}" From c37589ccf403106ebba3414ceeb9263c19c96e4f Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 00:30:05 -0400 Subject: [PATCH 02/35] skill validated with workflows --- skills/deskctl/SKILL.md | 128 ++++-------------- skills/deskctl/references/commands.md | 64 ++++----- skills/deskctl/references/install.md | 75 ---------- skills/deskctl/references/runtime-contract.md | 1 + skills/deskctl/references/sandbox-agent.md | 61 --------- .../deskctl/templates/install-deskctl-npm.sh | 27 ---- .../templates/sandbox-agent-desktop-loop.sh | 7 - skills/deskctl/workflows/observe-act.sh | 37 +++++ skills/deskctl/workflows/poll-condition.sh | 42 ++++++ 9 files changed, 134 insertions(+), 308 deletions(-) delete mode 100644 skills/deskctl/references/install.md create mode 120000 skills/deskctl/references/runtime-contract.md delete mode 100644 skills/deskctl/references/sandbox-agent.md delete mode 100644 skills/deskctl/templates/install-deskctl-npm.sh delete mode 100644 skills/deskctl/templates/sandbox-agent-desktop-loop.sh create mode 100755 skills/deskctl/workflows/observe-act.sh create mode 100755 skills/deskctl/workflows/poll-condition.sh diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md index 1522703..81dea19 100644 --- a/skills/deskctl/SKILL.md +++ b/skills/deskctl/SKILL.md @@ -1,132 +1,54 @@ --- name: deskctl -description: Desktop control CLI for AI agents on Linux X11. Use when operating an X11 desktop in a sandbox, VM, or sandbox-agent session via screenshots, grouped get/wait commands, selectors, and mouse or keyboard input. Prefer this skill when the task is "control the desktop", "inspect windows", "wait for a window", "click/type in the sandbox desktop", or "use deskctl inside sandbox-agent". -allowed-tools: Bash(deskctl:*), Bash(npx deskctl-cli:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*), Bash(sandbox-agent:*) +description: Non-interactive X11 desktop control for AI agents. Use when the task involves controlling a Linux desktop - clicking, typing, reading windows, waiting for UI state, or taking screenshots inside a sandbox or VM. +allowed-tools: Bash(deskctl:*), Bash(npx deskctl-cli:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*) --- # deskctl -`deskctl` is a non-interactive desktop control CLI for Linux X11 agents. It works well inside sandbox-agent desktop environments because it gives agents a tight `observe -> wait -> act -> verify` loop. +Non-interactive desktop control CLI for Linux X11 agents. -## Install skill (optional) +All output follows the runtime contract defined in [references/runtime-contract.md](references/runtime-contract.md). Every command returns a stable JSON envelope when called with `--json`. Use `--json` whenever you need to parse output programmatically. -### npx - -```bash -npx skills add harivansh-afk/deskctl -s deskctl -``` - -### bunx - -```bash -bunx skills add harivansh-afk/deskctl -s deskctl -``` - -## Install the CLI - -Preferred install path: +## Quick start ```bash npm install -g deskctl-cli -deskctl --help -``` - -If global npm installs are not writable, use a user prefix: - -```bash -mkdir -p "$HOME/.local/bin" -npm install -g --prefix "$HOME/.local" deskctl-cli -export PATH="$HOME/.local/bin:$PATH" -deskctl --help -``` - -One-shot usage also works: - -```bash -npx deskctl-cli --help -``` - -For install details and fallback paths, see [references/install.md](references/install.md). - -## Sandbox-Agent Notes - -Before using `deskctl` inside sandbox-agent: - -1. Make sure the sandbox has desktop runtime packages installed. -2. Make sure the session is actually running X11. -3. Run `deskctl doctor` before trying to click or type. - -Typical sandbox-agent prep: - -```bash -sandbox-agent install desktop --yes -deskctl doctor -``` - -If `doctor` fails, inspect `DISPLAY`, `XDG_SESSION_TYPE`, and whether the sandbox actually has a desktop session. See [references/sandbox-agent.md](references/sandbox-agent.md). - -## Core Workflow - -Every desktop task should follow this loop: - -1. **Observe** -2. **Target** -3. **Wait** -4. **Act** -5. **Verify** - -```bash deskctl doctor deskctl snapshot --annotate -deskctl get active-window -deskctl wait window --selector 'class=firefox' --timeout 10 -deskctl focus 'class=firefox' -deskctl hotkey ctrl l -deskctl type "https://example.com" -deskctl press enter -deskctl snapshot ``` -## What To Reach For First +## Agent loop -- `deskctl doctor` -- `deskctl snapshot --annotate` -- `deskctl list-windows` -- `deskctl get active-window` -- `deskctl wait window --selector ...` -- `deskctl wait focus --selector ...` - -Use `--json` when you need strict parsing. Use explicit selectors when you need deterministic targeting. - -## Selector Rules - -Prefer explicit selectors: +Every desktop interaction follows: **observe -> wait -> act -> verify**. ```bash -ref=w1 -id=win1 -title=Firefox -class=firefox -focused +deskctl snapshot --annotate # observe +deskctl wait window --selector 'title=Firefox' --timeout 10 # wait +deskctl click 'title=Firefox' # act +deskctl snapshot # verify ``` -Legacy refs still work: +See [workflows/observe-act.sh](workflows/observe-act.sh) for a reusable script. See [workflows/poll-condition.sh](workflows/poll-condition.sh) for polling loops. + +## Selectors ```bash -@w1 -w1 -win1 +ref=w1 # snapshot ref (short-lived) +id=win1 # stable window ID (session-scoped) +title=Firefox # match by title +class=firefox # match by WM class +focused # currently focused window ``` -Bare strings such as `firefox` are fuzzy substring selectors. They fail on ambiguity instead of silently picking the wrong window. +Bare strings like `firefox` do fuzzy matching but fail on ambiguity. Prefer explicit selectors. ## References -- [references/install.md](references/install.md) - install paths, npm-first bootstrap, runtime prerequisites -- [references/commands.md](references/commands.md) - grouped reads, waits, selectors, and core action commands -- [references/sandbox-agent.md](references/sandbox-agent.md) - using `deskctl` inside sandbox-agent desktop sessions +- [references/runtime-contract.md](references/runtime-contract.md) - output contract, stable fields, error kinds +- [references/commands.md](references/commands.md) - all available commands -## Templates +## Workflows -- [templates/install-deskctl-npm.sh](templates/install-deskctl-npm.sh) - install `deskctl-cli` into a user prefix -- [templates/sandbox-agent-desktop-loop.sh](templates/sandbox-agent-desktop-loop.sh) - minimal observe/wait/act loop for desktop tasks +- [workflows/observe-act.sh](workflows/observe-act.sh) - main observe-act loop +- [workflows/poll-condition.sh](workflows/poll-condition.sh) - poll for a condition on screen diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md index 2d2dc1f..d0e7c9f 100644 --- a/skills/deskctl/references/commands.md +++ b/skills/deskctl/references/commands.md @@ -1,21 +1,23 @@ -# deskctl command guide +# deskctl commands + +All commands support `--json` for machine-parseable output following the runtime contract. ## Observe ```bash -deskctl doctor -deskctl snapshot -deskctl snapshot --annotate -deskctl list-windows -deskctl screenshot /tmp/current.png -deskctl get active-window -deskctl get monitors -deskctl get version -deskctl get systeminfo +deskctl doctor # check X11 runtime and daemon health +deskctl snapshot # screenshot + window list +deskctl snapshot --annotate # screenshot with @wN labels overlaid +deskctl list-windows # window list only (no screenshot) +deskctl screenshot /tmp/screen.png # screenshot to explicit path +deskctl get active-window # focused window info +deskctl get monitors # monitor geometry +deskctl get version # version and backend +deskctl get systeminfo # full runtime diagnostics +deskctl get-screen-size # screen resolution +deskctl get-mouse-position # cursor coordinates ``` -Use `snapshot --annotate` when you need both the screenshot artifact and the short `@wN` labels. Use `list-windows` when you only need the window tree and do not want screenshot side effects. - ## Wait ```bash @@ -23,29 +25,19 @@ deskctl wait window --selector 'title=Firefox' --timeout 10 deskctl wait focus --selector 'class=firefox' --timeout 5 ``` -Wait commands return the matched window payload on success. In `--json` mode, failures include structured `kind` values so the caller can recover without string parsing. +Returns the matched window payload on success. Failures include structured `kind` values in `--json` mode. ## Selectors -Prefer explicit selectors: - ```bash -ref=w1 -id=win1 -title=Firefox -class=firefox -focused +ref=w1 # snapshot ref (short-lived, from last snapshot) +id=win1 # stable window ID (session-scoped) +title=Firefox # match by window title +class=firefox # match by WM class +focused # currently focused window ``` -Legacy refs still work: - -```bash -@w1 -w1 -win1 -``` - -Bare fuzzy selectors such as `firefox` are supported, but they fail on ambiguity. +Legacy shorthand: `@w1`, `w1`, `win1`. Bare strings do fuzzy matching but fail on ambiguity. ## Act @@ -58,6 +50,7 @@ deskctl press enter deskctl hotkey ctrl shift t deskctl mouse move 500 300 deskctl mouse scroll 3 +deskctl mouse scroll 3 --axis horizontal deskctl mouse drag 100 100 500 500 deskctl move-window @w1 100 120 deskctl resize-window @w1 1280 720 @@ -65,11 +58,12 @@ deskctl close @w3 deskctl launch firefox ``` -## Agent loop +## Daemon -The safe pattern is: +```bash +deskctl daemon start +deskctl daemon stop +deskctl daemon status +``` -1. Observe with `snapshot`, `list-windows`, or `get ...` -2. Wait for the target window if needed -3. Act using explicit selectors or refs -4. Snapshot again to verify the result +The daemon starts automatically on first command. Manual control is rarely needed. diff --git a/skills/deskctl/references/install.md b/skills/deskctl/references/install.md deleted file mode 100644 index cb97a5c..0000000 --- a/skills/deskctl/references/install.md +++ /dev/null @@ -1,75 +0,0 @@ -# Install `deskctl` - -`deskctl` is designed to be used non-interactively by agents. The easiest install path is the npm package because it installs the `deskctl` command directly from GitHub Release assets without needing Cargo on the target machine. - -## Preferred: npm global install - -```bash -npm install -g deskctl-cli -deskctl --help -``` - -This is the preferred path for sandboxes, VMs, and sandbox-agent sessions where Node/npm already exists. - -## User-prefix npm install - -If global npm installs are not writable: - -```bash -mkdir -p "$HOME/.local/bin" -npm install -g --prefix "$HOME/.local" deskctl-cli -export PATH="$HOME/.local/bin:$PATH" -deskctl --help -``` - -This avoids `sudo` and keeps the install inside the user home directory. - -## One-shot npm execution - -```bash -npx deskctl-cli --help -``` - -Use this for quick testing. For repeated desktop control, install the command once so the runtime is predictable. - -## Fallback: Cargo - -```bash -cargo install deskctl -``` - -Use this only when the machine already has a Rust toolchain or when you explicitly want a source build. - -## Fallback: local Docker build - -If you need a Linux binary from macOS or another non-Linux host: - -```bash -docker compose -f docker/docker-compose.yml run --rm build -``` - -Then copy `dist/deskctl-linux-x86_64` into the target machine. - -## Runtime prerequisites - -`deskctl` needs: - -- Linux -- X11 -- a valid `DISPLAY` -- a working desktop/window-manager session - -Quick verification: - -```bash -printenv DISPLAY -printenv XDG_SESSION_TYPE -deskctl doctor -``` - -Inside sandbox-agent, you may need to install desktop dependencies first: - -```bash -sandbox-agent install desktop --yes -deskctl doctor -``` diff --git a/skills/deskctl/references/runtime-contract.md b/skills/deskctl/references/runtime-contract.md new file mode 120000 index 0000000..8de0781 --- /dev/null +++ b/skills/deskctl/references/runtime-contract.md @@ -0,0 +1 @@ +../../../docs/runtime-contract.md \ No newline at end of file diff --git a/skills/deskctl/references/sandbox-agent.md b/skills/deskctl/references/sandbox-agent.md deleted file mode 100644 index d994062..0000000 --- a/skills/deskctl/references/sandbox-agent.md +++ /dev/null @@ -1,61 +0,0 @@ -# deskctl inside sandbox-agent - -Use `deskctl` when the sandbox-agent session includes a Linux desktop and you want a tight local desktop-control loop from the shell. - -## When it fits - -`deskctl` is a good fit when: - -- the sandbox already has an X11 desktop session -- you want fast local desktop control from inside the sandbox -- you want short-lived refs like `@w1` and grouped `get` or `wait` primitives - -It is not a replacement for sandbox-agent session orchestration itself. Use sandbox-agent to provision the sandbox and desktop runtime, then use `deskctl` inside that environment to control the GUI. - -## Minimal bootstrap - -```bash -sandbox-agent install desktop --yes -npm install -g deskctl-cli -deskctl doctor -deskctl snapshot --annotate -``` - -If npm global installs are not writable: - -```bash -mkdir -p "$HOME/.local/bin" -npm install -g --prefix "$HOME/.local" deskctl-cli -export PATH="$HOME/.local/bin:$PATH" -deskctl doctor -``` - -## Expected environment - -Check: - -```bash -printenv DISPLAY -printenv XDG_SESSION_TYPE -deskctl --json get systeminfo -``` - -Healthy `deskctl` usage usually means: - -- `DISPLAY` is set -- `XDG_SESSION_TYPE=x11` -- `deskctl doctor` succeeds - -## Recommended workflow - -```bash -deskctl snapshot --annotate -deskctl wait window --selector 'class=firefox' --timeout 10 -deskctl focus 'class=firefox' -deskctl hotkey ctrl l -deskctl type "https://example.com" -deskctl press enter -deskctl snapshot -``` - -Prefer `--json` for strict machine parsing and explicit selectors for deterministic targeting. diff --git a/skills/deskctl/templates/install-deskctl-npm.sh b/skills/deskctl/templates/install-deskctl-npm.sh deleted file mode 100644 index a0ab596..0000000 --- a/skills/deskctl/templates/install-deskctl-npm.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -if command -v deskctl >/dev/null 2>&1; then - echo "deskctl already installed: $(command -v deskctl)" - exit 0 -fi - -if ! command -v npm >/dev/null 2>&1; then - echo "npm is required for the preferred deskctl install path" - exit 1 -fi - -prefix="${DESKCTL_NPM_PREFIX:-$HOME/.local}" -bin_dir="$prefix/bin" - -mkdir -p "$bin_dir" -npm install -g --prefix "$prefix" deskctl-cli - -if ! command -v deskctl >/dev/null 2>&1; then - echo "deskctl installed to $bin_dir" - echo "add this to PATH if needed:" - echo "export PATH=\"$bin_dir:\$PATH\"" -fi - -"$bin_dir/deskctl" --help >/dev/null 2>&1 || true -echo "deskctl bootstrap complete" diff --git a/skills/deskctl/templates/sandbox-agent-desktop-loop.sh b/skills/deskctl/templates/sandbox-agent-desktop-loop.sh deleted file mode 100644 index f47dbb8..0000000 --- a/skills/deskctl/templates/sandbox-agent-desktop-loop.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -deskctl doctor -deskctl snapshot --annotate -deskctl get active-window -deskctl wait window --selector "${1:-focused}" --timeout "${2:-5}" diff --git a/skills/deskctl/workflows/observe-act.sh b/skills/deskctl/workflows/observe-act.sh new file mode 100755 index 0000000..0e336ae --- /dev/null +++ b/skills/deskctl/workflows/observe-act.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# observe-act.sh - main desktop interaction loop +# usage: ./observe-act.sh [action] [action-args...] +# example: ./observe-act.sh 'title=Firefox' click +# example: ./observe-act.sh 'class=terminal' type "ls -la" +set -euo pipefail + +SELECTOR="${1:?usage: observe-act.sh [action] [action-args...]}" +ACTION="${2:-click}" +shift 2 2>/dev/null || true + +# 1. observe - snapshot the desktop, get current state +echo "--- observe ---" +deskctl snapshot --annotate --json | head -1 +deskctl get active-window + +# 2. wait - ensure target exists +echo "--- wait ---" +deskctl wait window --selector "$SELECTOR" --timeout 10 + +# 3. act - perform the action on the target +echo "--- act ---" +case "$ACTION" in + click) deskctl click "$SELECTOR" ;; + dblclick) deskctl dblclick "$SELECTOR" ;; + focus) deskctl focus "$SELECTOR" ;; + type) deskctl focus "$SELECTOR" && deskctl type "$@" ;; + press) deskctl focus "$SELECTOR" && deskctl press "$@" ;; + hotkey) deskctl focus "$SELECTOR" && deskctl hotkey "$@" ;; + close) deskctl close "$SELECTOR" ;; + *) echo "unknown action: $ACTION"; exit 1 ;; +esac + +# 4. verify - snapshot again to confirm result +echo "--- verify ---" +sleep 0.5 +deskctl snapshot --json | head -1 diff --git a/skills/deskctl/workflows/poll-condition.sh b/skills/deskctl/workflows/poll-condition.sh new file mode 100755 index 0000000..e173bf5 --- /dev/null +++ b/skills/deskctl/workflows/poll-condition.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# poll-condition.sh - poll the desktop until a condition is met +# usage: ./poll-condition.sh [interval-seconds] [max-attempts] +# example: ./poll-condition.sh "Tickets Available" 5 60 +# example: ./poll-condition.sh "Order Confirmed" 3 20 +# example: ./poll-condition.sh "Download Complete" 10 30 +# +# checks window titles for the match string every N seconds. +# exits 0 when found, exits 1 after max attempts. +set -euo pipefail + +MATCH="${1:?usage: poll-condition.sh [interval] [max-attempts]}" +INTERVAL="${2:-5}" +MAX="${3:-60}" + +attempt=0 +while [ "$attempt" -lt "$MAX" ]; do + attempt=$((attempt + 1)) + + # snapshot and check window titles + windows=$(deskctl list-windows --json 2>/dev/null || echo '{"success":false}') + if echo "$windows" | grep -qi "$MATCH"; then + echo "FOUND: '$MATCH' detected on attempt $attempt" + deskctl snapshot --annotate + exit 0 + fi + + # also check screenshot text via active window title + active=$(deskctl get active-window --json 2>/dev/null || echo '{}') + if echo "$active" | grep -qi "$MATCH"; then + echo "FOUND: '$MATCH' in active window on attempt $attempt" + deskctl snapshot --annotate + exit 0 + fi + + echo "attempt $attempt/$MAX - '$MATCH' not found, waiting ${INTERVAL}s..." + sleep "$INTERVAL" +done + +echo "NOT FOUND: '$MATCH' after $MAX attempts" +deskctl snapshot --annotate +exit 1 From 14c89563211a8fec4b916bc4686ee1b4b86070d4 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 08:17:07 -0400 Subject: [PATCH 03/35] align docs and contract --- README.md | 268 ++++---------------------- docs/runtime-contract.md | 168 +++------------- site/src/pages/architecture.mdx | 104 ++++++---- site/src/pages/commands.mdx | 219 ++++++++------------- site/src/pages/index.astro | 57 +++++- site/src/pages/installation.mdx | 75 ++++--- site/src/pages/quick-start.mdx | 106 +++++----- site/src/pages/runtime-contract.mdx | 177 +++++++++++++++++ site/src/styles/base.css | 21 ++ skills/deskctl/references/commands.md | 52 +++-- 10 files changed, 590 insertions(+), 657 deletions(-) create mode 100644 site/src/pages/runtime-contract.mdx diff --git a/README.md b/README.md index db7d92f..32144f0 100644 --- a/README.md +++ b/README.md @@ -1,266 +1,68 @@ # deskctl -Desktop control CLI for AI agents on Linux X11. +[![npm](https://img.shields.io/npm/v/deskctl-cli?label=npm)](https://www.npmjs.com/package/deskctl-cli) +[![release](https://img.shields.io/github/v/release/harivansh-afk/deskctl?label=release)](https://github.com/harivansh-afk/deskctl/releases) +[![runtime](https://img.shields.io/badge/runtime-linux--x11-111827)](#support-boundary) +[![skill](https://img.shields.io/badge/skills.sh-deskctl-111827)](skills/deskctl) + +Non-interactive desktop control for AI agents on Linux X11. ## Install -### Cargo - -```bash -cargo install deskctl -``` - -Source builds on Linux require: - -- Rust 1.75+ -- `pkg-config` -- X11 development libraries for input and windowing, typically `libx11-dev` and `libxtst-dev` on Debian/Ubuntu - -### npm - ```bash npm install -g deskctl-cli -deskctl --help +deskctl doctor +deskctl snapshot --annotate ``` -One-shot execution is also supported: +One-shot execution also works: ```bash npx deskctl-cli --help ``` -`deskctl-cli` currently supports `linux-x64` and installs the `deskctl` command by downloading the matching GitHub Release asset. +`deskctl-cli` installs the `deskctl` command by downloading the matching GitHub Release asset for the supported runtime target. -### Installable skill - -For `skills.sh` / agent skill ecosystems: +## Installable skill ```bash npx skills add harivansh-afk/deskctl -s deskctl ``` -The installable skill lives under [`skills/deskctl`](skills/deskctl) and is designed for X11 sandboxes, VMs, and sandbox-agent desktop sessions. It points agents to the npm install path first so they can get `deskctl` without Cargo. +The installable skill lives in [`skills/deskctl`](skills/deskctl) and is built around the same observe -> wait -> act -> verify loop as the CLI. -### Nix +## Quick example + +```bash +deskctl doctor +deskctl snapshot --annotate +deskctl wait window --selector 'title=Firefox' --timeout 10 +deskctl focus 'title=Firefox' +deskctl type "hello world" +``` + +## Docs + +- runtime contract: [docs/runtime-contract.md](docs/runtime-contract.md) +- release flow: [docs/releasing.md](docs/releasing.md) +- installable skill: [skills/deskctl](skills/deskctl) +- contributor workflow: [CONTRIBUTING.md](CONTRIBUTING.md) + +## Other install paths + +Nix: ```bash nix run github:harivansh-afk/deskctl -- --help nix profile install github:harivansh-afk/deskctl ``` -The repo flake is the supported Nix install surface in this phase. - -### Docker Convenience - -Build a Linux binary locally with Docker: - -```bash -docker compose -f docker/docker-compose.yml run --rm build -``` - -This writes `dist/deskctl-linux-x86_64`. - -Copy it to an SSH machine where `scp` is unavailable: - -```bash -ssh -p 443 deskctl@ssh.agentcomputer.ai 'cat > ~/deskctl && chmod +x ~/deskctl' < dist/deskctl-linux-x86_64 -``` - -Run it on an X11 session: - -```bash -DISPLAY=:1 XDG_SESSION_TYPE=x11 ~/deskctl --json snapshot --annotate -``` - -### Local Source Build +Source build: ```bash cargo build ``` -## Quick Start +## Support boundary -```bash -# Diagnose the environment first -deskctl doctor - -# See the desktop -deskctl snapshot - -# Query focused runtime state -deskctl get active-window -deskctl get monitors - -# Click a window -deskctl click @w1 - -# Type text -deskctl type "hello world" - -# Wait for a window or focus transition -deskctl wait window --selector 'title=Firefox' --timeout 10 -deskctl wait focus --selector 'class=firefox' --timeout 5 - -# Focus by explicit selector -deskctl focus 'title=Firefox' -``` - -## Architecture - -Client-daemon architecture over Unix sockets (NDJSON wire protocol). -The daemon starts automatically on first command and keeps the X11 connection alive for fast repeated calls. - -Source layout: - -- `src/lib.rs` exposes the shared library target -- `src/main.rs` is the thin CLI wrapper -- `src/` contains production code and unit tests -- `tests/` contains Linux/X11 integration tests -- `tests/support/` contains shared integration helpers - -## Runtime Requirements - -- Linux with X11 session -- Rust 1.75+ plus the source-build dependencies above when building from source - -The binary itself only links the standard glibc runtime on Linux (`libc`, `libm`, `libgcc_s`). - -For deskctl to be fully functional on a fresh VM you still need: - -- an X11 server and an active `DISPLAY` -- `XDG_SESSION_TYPE=x11` or an equivalent X11 session environment -- a window manager or desktop environment that exposes standard EWMH properties such as `_NET_CLIENT_LIST_STACKING` and `_NET_ACTIVE_WINDOW` -- an X server with the extensions needed for input simulation and screen metadata, which is standard on normal desktop X11 setups - -If setup fails, run: - -```bash -deskctl doctor -``` - -## Contract Notes - -- `@wN` refs are short-lived handles assigned by `snapshot` and `list-windows` -- `--json` output includes a stable `window_id` for programmatic targeting within the current daemon session -- `list-windows` is a cheap read-only operation and does not capture or write a screenshot -- the stable runtime JSON/error contract is documented in [docs/runtime-contract.md](docs/runtime-contract.md) - -## Read and Wait Surface - -The grouped runtime reads are: - -```bash -deskctl get active-window -deskctl get monitors -deskctl get version -deskctl get systeminfo -``` - -The grouped runtime waits are: - -```bash -deskctl wait window --selector 'title=Firefox' --timeout 10 -deskctl wait focus --selector 'id=win3' --timeout 5 -``` - -Successful `get active-window`, `wait window`, and `wait focus` responses return a `window` payload with: -- `ref_id` -- `window_id` -- `title` -- `app_name` -- geometry (`x`, `y`, `width`, `height`) -- state flags (`focused`, `minimized`) - -`get monitors` returns: -- `count` -- `monitors[]` with geometry and primary/automatic flags - -`get version` returns: -- `version` -- `backend` - -`get systeminfo` stays runtime-scoped and returns: -- `backend` -- `display` -- `session_type` -- `session` -- `socket_path` -- `screen` -- `monitor_count` -- `monitors` - -Wait timeout and selector failures are structured in `--json` mode so agents can recover without string parsing. - -## Output Policy - -Text mode is compact and follow-up-oriented, but JSON is the parsing contract. - -- use `--json` when an agent needs strict parsing -- rely on `window_id`, selector-related fields, grouped read payloads, and structured error `kind` values for stable automation -- treat monitor naming, incidental whitespace, and default screenshot file names as best-effort - -See [docs/runtime-conract.md](docs/runtime-contract.md) for the exact stable-vs-best-effort breakdown. - -## Distribution - -- GitHub Releases are the canonical binary source -- crates.io package: `deskctl` -- npm package: `deskctl-cli` -- installed command on every channel: `deskctl` -- repo-owned Nix install path: `flake.nix` - -For maintainer publishing and release steps, see [docs/releasing.md](docs/releasing.md). - -## Selector Contract - -Explicit selector modes: - -```bash -ref=w1 -id=win1 -title=Firefox -class=firefox -focused -``` - -Legacy refs remain supported: - -```bash -@w1 -w1 -win1 -``` - -Bare selectors such as `firefox` are still supported as fuzzy substring matches, but they now fail on ambiguity and return candidate windows instead of silently picking the first match. - -## Support Boundary - -`deskctl` supports Linux X11 in this phase. Wayland and Hyprland are explicitly out of scope for the current runtime contract. - -## Workflow - -Local validation uses the root `Makefile`: - -```bash -make fmt-check -make lint -make test-unit -make test-integration -make site-format-check -make validate -``` - -`make validate` is the full repo-quality check and requires Linux with `xvfb-run` plus `pnpm --dir site install`. - -The repository standardizes on `pre-commit` for fast commit-time checks: - -```bash -pre-commit install -pre-commit run --all-files -``` - -See [CONTRIBUTING.md](CONTRIBUTING.md) for the full contributor guide. - -## Acknowledgements - -- [@barrettruth](github.com/barrettruth) - i stole the website from [vimdoc](https://github.com/barrettruth/vimdoc-language-server) +`deskctl` currently supports Linux X11. Use `--json` for stable machine parsing, use `window_id` for programmatic targeting inside a live session, and use `deskctl doctor` first when the runtime looks broken. diff --git a/docs/runtime-contract.md b/docs/runtime-contract.md index 7312357..0316c06 100644 --- a/docs/runtime-contract.md +++ b/docs/runtime-contract.md @@ -1,19 +1,6 @@ -# Runtime Output Contract +# deskctl runtime contract -This document defines the current output contract for `deskctl`. - -It is intentionally scoped to the current Linux X11 runtime surface. -It does not promise stability for future Wayland or window-manager-specific features. - -## Goals - -- Keep `deskctl` fully non-interactive -- Make text output actionable for quick terminal and agent loops -- Make `--json` safe for agent consumption without depending on incidental formatting - -## JSON Envelope - -Every runtime command uses the same top-level JSON envelope: +All commands support `--json` and use the same top-level envelope: ```json { @@ -23,22 +10,11 @@ Every runtime command uses the same top-level JSON envelope: } ``` -Stable top-level fields: +Use `--json` whenever you need to parse output programmatically. -- `success` -- `data` -- `error` +## Stable window fields -`success` is always the authoritative success/failure bit. -When `success` is `false`, the CLI exits non-zero in both text mode and `--json` mode. - -## Stable Fields - -These fields are stable for agent consumption in the current Phase 1 runtime contract. - -### Window Identity - -Whenever a runtime response includes a window payload, these fields are stable: +Whenever a response includes a window payload, these fields are stable: - `ref_id` - `window_id` @@ -51,128 +27,46 @@ Whenever a runtime response includes a window payload, these fields are stable: - `focused` - `minimized` -`window_id` is the stable public identifier for a live daemon session. -`ref_id` is a short-lived convenience handle for the current window snapshot/ref map. +Use `window_id` for stable targeting inside a live daemon session. Use +`ref_id` or `@wN` for short-lived follow-up actions after `snapshot` or +`list-windows`. -### Grouped Reads +## Stable grouped reads -`deskctl get active-window` +- `deskctl get active-window` -> `data.window` +- `deskctl get monitors` -> `data.count`, `data.monitors` +- `deskctl get version` -> `data.version`, `data.backend` +- `deskctl get systeminfo` -> runtime-scoped diagnostic fields such as + `backend`, `display`, `session_type`, `session`, `socket_path`, `screen`, + `monitor_count`, and `monitors` -- stable: `data.window` +## Stable waits -`deskctl get monitors` +- `deskctl wait window` -> `data.wait`, `data.selector`, `data.elapsed_ms`, + `data.window` +- `deskctl wait focus` -> `data.wait`, `data.selector`, `data.elapsed_ms`, + `data.window` -- stable: `data.count` -- stable: `data.monitors` -- stable per monitor: - - `name` - - `x` - - `y` - - `width` - - `height` - - `width_mm` - - `height_mm` - - `primary` - - `automatic` +## Stable structured error kinds -`deskctl get version` - -- stable: `data.version` -- stable: `data.backend` - -`deskctl get systeminfo` - -- stable: `data.backend` -- stable: `data.display` -- stable: `data.session_type` -- stable: `data.session` -- stable: `data.socket_path` -- stable: `data.screen` -- stable: `data.monitor_count` -- stable: `data.monitors` - -### Waits - -`deskctl wait window` -`deskctl wait focus` - -- stable: `data.wait` -- stable: `data.selector` -- stable: `data.elapsed_ms` -- stable: `data.window` - -### Selector-Driven Action Success - -For selector-driven action commands that resolve a window target, these identifiers are stable when present: - -- `data.ref_id` -- `data.window_id` -- `data.title` -- `data.selector` - -This applies to: - -- `click` -- `dblclick` -- `focus` -- `close` -- `move-window` -- `resize-window` - -The exact human-readable text rendering of those commands is not part of the JSON contract. - -### Artifact-Producing Commands - -`snapshot` -`screenshot` - -- stable: `data.screenshot` - -When the command also returns windows, `data.windows` uses the stable window payload documented above. - -## Stable Structured Error Kinds - -When a runtime command returns structured JSON failure data, these error kinds are stable: +When a command fails with structured JSON data, these `kind` values are stable: - `selector_not_found` - `selector_ambiguous` - `selector_invalid` - `timeout` - `not_found` -- `window_not_focused` as `data.last_observation.kind` or equivalent observation payload -Stable structured failure fields include: +Wait failures may also include `window_not_focused` in the last observation +payload. -- `data.kind` -- `data.selector` when selector-related -- `data.mode` when selector-related -- `data.candidates` for ambiguous selector failures -- `data.message` for invalid selector failures -- `data.wait` -- `data.timeout_ms` -- `data.poll_ms` -- `data.last_observation` +## Best-effort fields -## Best-Effort Fields +Treat these as useful but non-contractual: -These values are useful but environment-dependent and should be treated as best-effort: +- exact monitor names +- incidental text formatting in non-JSON mode +- default screenshot file names when no explicit path was provided +- environment-dependent ordering details from the window manager -- exact monitor naming conventions -- EWMH/window-manager-dependent window ordering details -- cosmetic text formatting in non-JSON mode -- screenshot file names when the caller did not provide an explicit path -- command stderr wording outside the structured `kind` classifications above - -## Text Mode Expectations - -Text mode is intended to stay compact and follow-up-useful. - -The exact whitespace/alignment of text output is not stable. -The following expectations are stable at the behavioral level: - -- important runtime reads print actionable identifiers or geometry -- selector failures print enough detail to recover without `--json` -- artifact-producing commands print the artifact path -- window listings print both `@wN` refs and `window_id` values - -If an agent needs strict parsing, it should use `--json`. +For the full repo copy, see `docs/runtime-contract.md`. diff --git a/site/src/pages/architecture.mdx b/site/src/pages/architecture.mdx index 87b2b4e..9478246 100644 --- a/site/src/pages/architecture.mdx +++ b/site/src/pages/architecture.mdx @@ -6,73 +6,93 @@ toc: true # Architecture -## Client-daemon model +## Public model -deskctl uses a client-daemon architecture over Unix sockets. The daemon starts automatically on the first command and keeps the X11 connection alive so repeated calls skip the connection setup overhead. +`deskctl` is a thin, non-interactive X11 control primitive for agent loops. +The public flow is: -Each command opens a new connection to the daemon, sends a single NDJSON request, reads one NDJSON response, and exits. +- diagnose with `deskctl doctor` +- observe with `snapshot`, `list-windows`, and grouped `get` commands +- wait with grouped `wait` commands instead of shell `sleep` +- act with explicit selectors or coordinates +- verify with another read or snapshot -## Wire protocol +The tool stays intentionally narrow. It does not try to be a full desktop shell +or a speculative Wayland abstraction. + +## Client-daemon architecture + +The CLI talks to an auto-managed daemon over a Unix socket. The daemon keeps +the X11 connection alive so repeated commands stay fast and share the same +session-scoped window identity map. + +Each CLI invocation sends one request, reads one response, and exits. + +## Runtime contract Requests and responses are newline-delimited JSON (NDJSON) over a Unix socket. -**Request:** +All commands share the same JSON envelope: ```json -{ "id": "r123456", "action": "snapshot", "annotate": true } +{ + "success": true, + "data": {}, + "error": null +} ``` -**Response:** +For window payloads, the public identity is `window_id`, not an X11 handle. +That keeps the contract backend-neutral even though the current support +boundary is X11-only. -```json -{"success": true, "data": {"screenshot": "/tmp/deskctl-1234567890.png", "windows": [...]}} -``` +The complete stable-vs-best-effort policy lives on the +[runtime contract](/runtime-contract) page. -Error responses include an `error` field: +## Sessions and sockets -```json -{ "success": false, "error": "window not found: @w99" } -``` +Each session gets its own socket path, PID file, and live window mapping. -## Socket location +Public socket resolution order: -The daemon socket is resolved in this order: - -1. `--socket` flag (highest priority) -2. `$DESKCTL_SOCKET_DIR/{session}.sock` -3. `$XDG_RUNTIME_DIR/deskctl/{session}.sock` +1. `--socket` +2. `DESKCTL_SOCKET_DIR/{session}.sock` +3. `XDG_RUNTIME_DIR/deskctl/{session}.sock` 4. `~/.deskctl/{session}.sock` -PID files are stored alongside the socket. +Most users should let `deskctl` manage this automatically. `--session` is the +main public knob when you need isolated daemon instances. -## Sessions +## Diagnostics and failure handling -Multiple isolated daemon instances can run simultaneously using the `--session` flag: +`deskctl doctor` runs before daemon startup and checks: -```sh -deskctl --session workspace1 snapshot -deskctl --session workspace2 snapshot -``` +- display/session setup +- X11 connectivity +- basic window enumeration +- screenshot viability +- socket directory and stale-socket health -Each session has its own socket, PID file, and window ref map. +Selector and wait failures are structured in `--json` mode so clients can +recover without scraping text. -## Backend design +## Backend notes -The core is built around a `DesktopBackend` trait. The current implementation uses `x11rb` for X11 protocol operations and `enigo` for input simulation. +The backend is built around a `DesktopBackend` trait and currently ships with +an X11 implementation backed by `x11rb`. -The trait-based design means adding Wayland support is a single trait implementation with no changes to the core, CLI, or daemon code. +The important public guarantee is not "portable desktop automation." The +important guarantee is "a correct and unsurprising Linux X11 runtime contract." -## X11 integration +## X11 support boundary -Window detection uses EWMH properties: +This phase supports Linux X11 only. -| Property | Purpose | -| --------------------------- | ------------------------ | -| `_NET_CLIENT_LIST_STACKING` | Window stacking order | -| `_NET_ACTIVE_WINDOW` | Currently focused window | -| `_NET_WM_NAME` | Window title (UTF-8) | -| `_NET_WM_STATE_HIDDEN` | Minimized state | -| `_NET_CLOSE_WINDOW` | Graceful close | -| `WM_CLASS` | Application class/name | +That means: -Falls back to `XQueryTree` if `_NET_CLIENT_LIST_STACKING` is unavailable. +- EWMH/window-manager properties matter +- monitor naming and some ordering details are best-effort +- Wayland and Hyprland are out of scope for the current contract + +The runtime documents those boundaries explicitly instead of pretending the +surface is broader than it is. diff --git a/site/src/pages/commands.mdx b/site/src/pages/commands.mdx index e1fc509..8a5132b 100644 --- a/site/src/pages/commands.mdx +++ b/site/src/pages/commands.mdx @@ -6,167 +6,101 @@ toc: true # Commands -## Snapshot - -Capture a screenshot and get the window tree: +## Observe ```sh +deskctl doctor deskctl snapshot deskctl snapshot --annotate -``` - -With `--annotate`, colored bounding boxes and `@wN` labels are drawn on the screenshot. Each window gets a unique color from an 8-color palette. Minimized windows are skipped. - -The screenshot is saved to `/tmp/deskctl-{timestamp}.png`. - -## Click - -Click the center of a window by ref, or click exact coordinates: - -```sh -deskctl click @w1 -deskctl click 960,540 -``` - -## Double click - -```sh -deskctl dblclick @w1 -deskctl dblclick 500,300 -``` - -## Type - -Type a string into the focused window: - -```sh -deskctl type "hello world" -``` - -## Press - -Press a single key: - -```sh -deskctl press enter -deskctl press tab -deskctl press escape -``` - -Supported key names: `enter`, `tab`, `escape`, `backspace`, `delete`, `space`, `up`, `down`, `left`, `right`, `home`, `end`, `pageup`, `pagedown`, `f1`-`f12`, or any single character. - -## Hotkey - -Send a key combination. List modifier keys first, then the target key: - -```sh -deskctl hotkey ctrl c -deskctl hotkey ctrl shift t -deskctl hotkey alt f4 -``` - -Modifier names: `ctrl`, `alt`, `shift`, `super` (also `meta` or `win`). - -## Mouse move - -Move the cursor to absolute coordinates: - -```sh -deskctl mouse move 100 200 -``` - -## Mouse scroll - -Scroll the mouse wheel. Positive values scroll down, negative scroll up: - -```sh -deskctl mouse scroll 3 -deskctl mouse scroll -5 -deskctl mouse scroll 3 --axis horizontal -``` - -## Mouse drag - -Drag from one position to another: - -```sh -deskctl mouse drag 100 200 500 600 -``` - -## Focus - -Focus a window by ref or by name (case-insensitive substring match): - -```sh -deskctl focus @w1 -deskctl focus "firefox" -``` - -## Close - -Close a window gracefully: - -```sh -deskctl close @w2 -deskctl close "terminal" -``` - -## Move window - -Move a window to an absolute position: - -```sh -deskctl move-window @w1 0 0 -deskctl move-window "firefox" 100 100 -``` - -## Resize window - -Resize a window: - -```sh -deskctl resize-window @w1 1280 720 -``` - -## List windows - -List all windows without taking a screenshot: - -```sh deskctl list-windows -``` - -## Get screen size - -```sh +deskctl screenshot +deskctl screenshot /tmp/screen.png +deskctl get active-window +deskctl get monitors +deskctl get version +deskctl get systeminfo deskctl get-screen-size -``` - -## Get mouse position - -```sh deskctl get-mouse-position ``` -## Screenshot +`doctor` checks the runtime before daemon startup. `snapshot` produces a +screenshot plus window refs. `list-windows` is the same window tree without the +side effect of writing a screenshot. -Take a screenshot without the window tree. Optionally specify a save path: +## Wait ```sh -deskctl screenshot -deskctl screenshot /tmp/my-screenshot.png -deskctl screenshot --annotate +deskctl wait window --selector 'title=Firefox' --timeout 10 +deskctl wait focus --selector 'id=win3' --timeout 5 +deskctl --json wait window --selector 'class=firefox' --poll-ms 100 ``` -## Launch +Wait commands return the matched window payload on success. In `--json` mode, +timeouts and selector failures expose structured `kind` values. -Launch an application: +## Act on a window ```sh deskctl launch firefox -deskctl launch code --args /path/to/project +deskctl focus @w1 +deskctl focus 'title=Firefox' +deskctl click @w1 +deskctl click 960,540 +deskctl dblclick @w2 +deskctl close @w3 +deskctl move-window @w1 100 120 +deskctl resize-window @w1 1280 720 ``` +Selector-driven actions accept refs, explicit selector modes, or absolute +coordinates where appropriate. + +## Input and mouse + +```sh +deskctl type "hello world" +deskctl press enter +deskctl hotkey ctrl shift t +deskctl mouse move 100 200 +deskctl mouse scroll 3 +deskctl mouse scroll 3 --axis horizontal +deskctl mouse drag 100 200 500 600 +``` + +Supported key names include `enter`, `tab`, `escape`, `backspace`, `delete`, +`space`, arrow keys, paging keys, `f1` through `f12`, and any single +character. + +## Launch + +```sh +deskctl launch firefox +deskctl launch code -- --new-window +``` + +## Selectors + +Prefer explicit selectors when the target matters: + +```sh +ref=w1 +id=win1 +title=Firefox +class=firefox +focused +``` + +Legacy shorthand is still supported: + +```sh +@w1 +w1 +win1 +``` + +Bare strings like `firefox` are fuzzy matches. They resolve when there is one +match and fail with candidate windows when there are multiple matches. + ## Global options | Flag | Env | Description | @@ -174,3 +108,6 @@ deskctl launch code --args /path/to/project | `--json` | | Output as JSON | | `--socket ` | `DESKCTL_SOCKET` | Path to daemon Unix socket | | `--session ` | | Session name for multiple daemons (default: `default`) | + +`deskctl` manages the daemon automatically. Most users never need to think +about it beyond `--session` and `--socket`. diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro index 9327dc5..4263549 100644 --- a/site/src/pages/index.astro +++ b/site/src/pages/index.astro @@ -8,17 +8,49 @@ import DocLayout from "../layouts/DocLayout.astro"; -

- Desktop control CLI for AI agents on Linux X11. Compact JSON output for - agent loops. Screenshot, click, type, scroll, drag, and manage windows - through a fast client-daemon architecture. 100% native Rust. +

non-interactive desktop control for AI agents

+ + + +

+ deskctl is a thin X11 control primitive for agent loops: diagnose + the runtime, observe the desktop, wait for state transitions, act deterministically, + then verify.

-

Getting started

+
npm install -g deskctl-cli
+deskctl doctor
+deskctl snapshot --annotate
+ +

Start here

Reference

@@ -28,14 +60,27 @@ import DocLayout from "../layouts/DocLayout.astro";
  • Architecture
  • +

    Agent skill

    + +

    + There is also an installable skill for `skills.sh`-style agent runtimes: +

    + +
    npx skills add harivansh-afk/deskctl -s deskctl
    +

    Links

    diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx index e05772d..985cf99 100644 --- a/site/src/pages/installation.mdx +++ b/site/src/pages/installation.mdx @@ -6,43 +6,68 @@ toc: true # Installation -## Cargo +## Default install ```sh -cargo install deskctl +npm install -g deskctl-cli +deskctl --help ``` -## From source +`deskctl-cli` is the default install path. It installs the `deskctl` command by +downloading the matching GitHub Release asset for the supported runtime target. + +## One-shot usage + +```sh +npx deskctl-cli --help +``` + +## Agent skill + +For `skills.sh`-style runtimes: + +```sh +npx skills add harivansh-afk/deskctl -s deskctl +``` + +The repo skill lives under `skills/deskctl` and is designed around the same +observe -> wait -> act -> verify loop as the CLI. + +## Other install paths + +### Nix + +```sh +nix run github:harivansh-afk/deskctl -- --help +nix profile install github:harivansh-afk/deskctl +``` + +### Build from source ```sh git clone https://github.com/harivansh-afk/deskctl cd deskctl -cargo build --release +cargo build ``` -## Docker (cross-compile for Linux) +Source builds on Linux require: -Build a static Linux binary from any platform: +- Rust 1.75+ +- `pkg-config` +- X11 development libraries such as `libx11-dev` and `libxtst-dev` -```sh -docker compose -f docker/docker-compose.yml run --rm build -``` - -This writes `dist/deskctl-linux-x86_64`. - -## Deploy to a remote machine - -Copy the binary over SSH when `scp` is not available: - -```sh -ssh -p 443 user@host 'cat > ~/deskctl && chmod +x ~/deskctl' < dist/deskctl-linux-x86_64 -``` - -## Requirements +## Runtime requirements - Linux with an active X11 session -- `DISPLAY` environment variable set (e.g. `DISPLAY=:1`) -- `XDG_SESSION_TYPE=x11` -- A window manager that exposes EWMH properties (`_NET_CLIENT_LIST_STACKING`, `_NET_ACTIVE_WINDOW`) +- `DISPLAY` set to a usable X11 display, such as `DISPLAY=:1` +- `XDG_SESSION_TYPE=x11` or an equivalent X11 session environment +- a window manager or desktop environment that exposes standard EWMH properties + such as `_NET_CLIENT_LIST_STACKING` and `_NET_ACTIVE_WINDOW` -No extra native libraries are needed beyond the standard glibc runtime (`libc`, `libm`, `libgcc_s`). +The binary itself only depends on the standard Linux glibc runtime. + +If setup fails, run: + +```sh +deskctl doctor +``` diff --git a/site/src/pages/quick-start.mdx b/site/src/pages/quick-start.mdx index 7f3bc07..c783b9e 100644 --- a/site/src/pages/quick-start.mdx +++ b/site/src/pages/quick-start.mdx @@ -6,50 +6,72 @@ toc: true # Quick start -## Core workflow - -The typical agent loop is: snapshot the desktop, interpret the result, act on it. +## Install and diagnose ```sh -# 1. see the desktop -deskctl --json snapshot --annotate +npm install -g deskctl-cli +deskctl doctor +``` -# 2. click a window by its ref -deskctl click @w1 +Use `deskctl doctor` first. It checks X11 connectivity, basic enumeration, +screenshot viability, and socket health before you start driving the desktop. -# 3. type into the focused window -deskctl type "hello world" +## Observe -# 4. press a key +```sh +deskctl snapshot --annotate +deskctl list-windows +deskctl get active-window +deskctl get monitors +``` + +Use `snapshot` when you want a screenshot artifact plus window refs. Use +`list-windows` when you only need the current window tree without writing a +screenshot. + +## Target windows cleanly + +Prefer explicit selectors when you need deterministic targeting: + +```sh +ref=w1 +id=win1 +title=Firefox +class=firefox +focused +``` + +Legacy refs such as `@w1` still work after `snapshot` or `list-windows`. Bare +strings like `firefox` are fuzzy matches and now fail on ambiguity. + +## Wait, act, verify + +The core loop is: + +```sh +# observe +deskctl snapshot --annotate + +# wait +deskctl wait window --selector 'title=Firefox' --timeout 10 + +# act +deskctl focus 'title=Firefox' +deskctl hotkey ctrl l +deskctl type "https://example.com" deskctl press enter + +# verify +deskctl wait focus --selector 'title=Firefox' --timeout 5 +deskctl snapshot ``` -The `--annotate` flag draws colored bounding boxes and `@wN` labels on the screenshot so agents can visually identify windows. +The wait commands return the matched window payload on success, so they compose +cleanly into the next action. -## Window refs +## Use `--json` when parsing matters -Every `snapshot` assigns refs like `@w1`, `@w2`, etc. to each visible window, ordered top-to-bottom by stacking order. Use these refs anywhere a selector is expected: - -```sh -deskctl click @w1 -deskctl focus @w3 -deskctl close @w2 -``` - -You can also select windows by name (case-insensitive substring match): - -```sh -deskctl focus "firefox" -deskctl close "terminal" -``` - -## JSON output - -Pass `--json` for machine-readable output. This is the primary mode for agent integrations: - -```sh -deskctl --json snapshot -``` +Every command supports `--json` and uses the same top-level envelope: ```json { @@ -59,7 +81,7 @@ deskctl --json snapshot "windows": [ { "ref_id": "w1", - "xcb_id": 12345678, + "window_id": "win1", "title": "Firefox", "app_name": "firefox", "x": 0, @@ -74,14 +96,8 @@ deskctl --json snapshot } ``` -## Daemon lifecycle +Use `window_id` for stable targeting inside a live daemon session. The exact +text formatting is intentionally compact, but JSON is the parsing contract. -The daemon starts automatically on the first command. It keeps the X11 connection alive so repeated calls are fast. You do not need to manage it manually. - -```sh -# check if the daemon is running -deskctl daemon status - -# stop it explicitly -deskctl daemon stop -``` +The full stable-vs-best-effort contract lives on the +[runtime contract](/runtime-contract) page. diff --git a/site/src/pages/runtime-contract.mdx b/site/src/pages/runtime-contract.mdx new file mode 100644 index 0000000..4fca14c --- /dev/null +++ b/site/src/pages/runtime-contract.mdx @@ -0,0 +1,177 @@ +--- +layout: ../layouts/DocLayout.astro +title: Runtime contract +toc: true +--- + +# Runtime contract + +This page defines the current public output contract for `deskctl`. + +It is intentionally scoped to the current Linux X11 runtime surface. It does +not promise stability for future Wayland or window-manager-specific features. + +## JSON envelope + +Every command supports `--json` and uses the same top-level envelope: + +```json +{ + "success": true, + "data": {}, + "error": null +} +``` + +Stable top-level fields: + +- `success` +- `data` +- `error` + +If `success` is `false`, the command exits non-zero in both text mode and JSON +mode. + +## Stable window fields + +Whenever a response includes a window payload, these fields are stable: + +- `ref_id` +- `window_id` +- `title` +- `app_name` +- `x` +- `y` +- `width` +- `height` +- `focused` +- `minimized` + +`window_id` is the public session-scoped identifier for programmatic targeting. +`ref_id` is a short-lived convenience handle from the current ref map. + +## Stable grouped reads + +`deskctl get active-window` + +- stable: `data.window` + +`deskctl get monitors` + +- stable: `data.count` +- stable: `data.monitors` + +Stable per-monitor fields: + +- `name` +- `x` +- `y` +- `width` +- `height` +- `width_mm` +- `height_mm` +- `primary` +- `automatic` + +`deskctl get version` + +- stable: `data.version` +- stable: `data.backend` + +`deskctl get systeminfo` + +- stable: `data.backend` +- stable: `data.display` +- stable: `data.session_type` +- stable: `data.session` +- stable: `data.socket_path` +- stable: `data.screen` +- stable: `data.monitor_count` +- stable: `data.monitors` + +## Stable waits + +`deskctl wait window` +`deskctl wait focus` + +- stable: `data.wait` +- stable: `data.selector` +- stable: `data.elapsed_ms` +- stable: `data.window` + +## Stable selector-driven action fields + +When selector-driven actions return resolved window data, these fields are +stable when present: + +- `data.ref_id` +- `data.window_id` +- `data.title` +- `data.selector` + +This applies to: + +- `click` +- `dblclick` +- `focus` +- `close` +- `move-window` +- `resize-window` + +## Stable artifact fields + +For `snapshot` and `screenshot`: + +- stable: `data.screenshot` + +When a command also returns windows, `data.windows` uses the stable window +payload documented above. + +## Stable structured error kinds + +When a command fails with structured JSON data, these error kinds are stable: + +- `selector_not_found` +- `selector_ambiguous` +- `selector_invalid` +- `timeout` +- `not_found` +- `window_not_focused` in `data.last_observation.kind` or an equivalent wait + observation payload + +Stable structured failure fields include: + +- `data.kind` +- `data.selector` +- `data.mode` +- `data.candidates` +- `data.message` +- `data.wait` +- `data.timeout_ms` +- `data.poll_ms` +- `data.last_observation` + +## Best-effort fields + +These values are useful but environment-dependent and should not be treated as +strict parsing guarantees: + +- exact monitor naming conventions +- EWMH/window-manager-dependent ordering details +- cosmetic text formatting in non-JSON mode +- default screenshot file names when no explicit path was provided +- stderr wording outside the structured `kind` classifications above + +## Text mode expectations + +Text mode is intended to stay compact and follow-up-useful. + +The exact whitespace and alignment are not stable. The stable behavioral +expectations are: + +- important reads print actionable identifiers or geometry +- selector failures print enough detail to recover without `--json` +- artifact-producing commands print the artifact path +- window listings print both `@wN` refs and `window_id` values + +If you need strict parsing, use `--json`. diff --git a/site/src/styles/base.css b/site/src/styles/base.css index 86fd6a8..f60c0e6 100644 --- a/site/src/styles/base.css +++ b/site/src/styles/base.css @@ -65,6 +65,23 @@ main { font-style: italic; } +.lede { + font-size: 1.05rem; + max-width: 42rem; +} + +.badges { + display: flex; + flex-wrap: wrap; + gap: 0.6rem; + margin-bottom: 1.25rem; +} + +.badges a, +.badges img { + display: block; +} + header { display: flex; align-items: center; @@ -117,6 +134,10 @@ a:hover { text-decoration-thickness: 2px; } +img { + max-width: 100%; +} + ul, ol { padding-left: 1.25em; diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md index d0e7c9f..77b9513 100644 --- a/skills/deskctl/references/commands.md +++ b/skills/deskctl/references/commands.md @@ -1,21 +1,22 @@ # deskctl commands -All commands support `--json` for machine-parseable output following the runtime contract. +All commands support `--json` for machine-parseable output following the +runtime contract. ## Observe ```bash -deskctl doctor # check X11 runtime and daemon health -deskctl snapshot # screenshot + window list -deskctl snapshot --annotate # screenshot with @wN labels overlaid -deskctl list-windows # window list only (no screenshot) -deskctl screenshot /tmp/screen.png # screenshot to explicit path -deskctl get active-window # focused window info -deskctl get monitors # monitor geometry -deskctl get version # version and backend -deskctl get systeminfo # full runtime diagnostics -deskctl get-screen-size # screen resolution -deskctl get-mouse-position # cursor coordinates +deskctl doctor +deskctl snapshot +deskctl snapshot --annotate +deskctl list-windows +deskctl screenshot /tmp/screen.png +deskctl get active-window +deskctl get monitors +deskctl get version +deskctl get systeminfo +deskctl get-screen-size +deskctl get-mouse-position ``` ## Wait @@ -25,19 +26,21 @@ deskctl wait window --selector 'title=Firefox' --timeout 10 deskctl wait focus --selector 'class=firefox' --timeout 5 ``` -Returns the matched window payload on success. Failures include structured `kind` values in `--json` mode. +Returns the matched window payload on success. Failures include structured +`kind` values in `--json` mode. ## Selectors ```bash -ref=w1 # snapshot ref (short-lived, from last snapshot) -id=win1 # stable window ID (session-scoped) -title=Firefox # match by window title -class=firefox # match by WM class -focused # currently focused window +ref=w1 +id=win1 +title=Firefox +class=firefox +focused ``` -Legacy shorthand: `@w1`, `w1`, `win1`. Bare strings do fuzzy matching but fail on ambiguity. +Legacy shorthand: `@w1`, `w1`, `win1`. Bare strings do fuzzy matching but fail +on ambiguity. ## Act @@ -58,12 +61,5 @@ deskctl close @w3 deskctl launch firefox ``` -## Daemon - -```bash -deskctl daemon start -deskctl daemon stop -deskctl daemon status -``` - -The daemon starts automatically on first command. Manual control is rarely needed. +The daemon starts automatically on first command. In normal usage you should +not need to manage it directly. From 88f9ff85a3fa5b95028bb1e7811078416eaf43ae Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 08:18:57 -0400 Subject: [PATCH 04/35] clean --- site/src/pages/index.astro | 25 ------------------------- site/src/styles/base.css | 12 ------------ 2 files changed, 37 deletions(-) diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro index 4263549..b8bf92b 100644 --- a/site/src/pages/index.astro +++ b/site/src/pages/index.astro @@ -10,31 +10,6 @@ import DocLayout from "../layouts/DocLayout.astro";

    non-interactive desktop control for AI agents

    - -

    deskctl is a thin X11 control primitive for agent loops: diagnose the runtime, observe the desktop, wait for state transitions, act deterministically, diff --git a/site/src/styles/base.css b/site/src/styles/base.css index f60c0e6..cd569a9 100644 --- a/site/src/styles/base.css +++ b/site/src/styles/base.css @@ -70,18 +70,6 @@ main { max-width: 42rem; } -.badges { - display: flex; - flex-wrap: wrap; - gap: 0.6rem; - margin-bottom: 1.25rem; -} - -.badges a, -.badges img { - display: block; -} - header { display: flex; align-items: center; From eac3a61ceb35002bf3957e6d0ebe4c2025ab1203 Mon Sep 17 00:00:00 2001 From: Hari <73809867+harivansh-afk@users.noreply.github.com> Date: Thu, 26 Mar 2026 08:44:10 -0400 Subject: [PATCH 05/35] rename (#11) * align docs and contract * clean * rename from deskctl-cli to deskctl * runtime --- .github/workflows/ci.yml | 4 ++-- .github/workflows/publish.yml | 12 ++++++------ .gitignore | 4 ++-- CONTRIBUTING.md | 2 +- Makefile | 4 ++-- README.md | 9 +++++---- docs/releasing.md | 4 ++-- docs/runtime-contract.md | 2 -- npm/{deskctl-cli => deskctl}/README.md | 10 +++++----- npm/{deskctl-cli => deskctl}/bin/deskctl.js | 2 +- npm/{deskctl-cli => deskctl}/package.json | 4 ++-- npm/{deskctl-cli => deskctl}/scripts/postinstall.js | 2 +- npm/{deskctl-cli => deskctl}/scripts/support.js | 2 +- .../scripts/validate-package.js | 4 ++-- site/src/pages/index.astro | 11 +++-------- site/src/pages/installation.mdx | 6 +++--- site/src/pages/quick-start.mdx | 2 +- skills/deskctl/SKILL.md | 4 ++-- 18 files changed, 41 insertions(+), 47 deletions(-) rename npm/{deskctl-cli => deskctl}/README.md (67%) rename npm/{deskctl-cli => deskctl}/bin/deskctl.js (91%) rename npm/{deskctl-cli => deskctl}/package.json (86%) rename npm/{deskctl-cli => deskctl}/scripts/postinstall.js (94%) rename npm/{deskctl-cli => deskctl}/scripts/support.js (97%) rename npm/{deskctl-cli => deskctl}/scripts/validate-package.js (87%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e95b27a..b7a4d6f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -189,7 +189,7 @@ jobs: NEW="${{ needs.changes.outputs.version }}" if [ "$CURRENT" != "$NEW" ]; then sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml - node -e 'const fs=require("node:fs"); const path="npm/deskctl-cli/package.json"; const pkg=JSON.parse(fs.readFileSync(path,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(path, JSON.stringify(pkg, null, 2)+"\n");' "$NEW" + node -e 'const fs=require("node:fs"); const path="npm/deskctl/package.json"; const pkg=JSON.parse(fs.readFileSync(path,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(path, JSON.stringify(pkg, null, 2)+"\n");' "$NEW" cargo generate-lockfile fi @@ -199,7 +199,7 @@ jobs: git config user.email "github-actions[bot]@users.noreply.github.com" if ! git diff --quiet; then - git add Cargo.toml Cargo.lock npm/deskctl-cli/package.json + git add Cargo.toml Cargo.lock npm/deskctl/package.json git commit -m "release: ${{ needs.changes.outputs.tag }} [skip ci]" fi diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 329f151..c4b1ecf 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -8,7 +8,7 @@ on: required: true type: string publish_npm: - description: Publish deskctl-cli to npm + description: Publish deskctl to npm required: true type: boolean default: false @@ -51,7 +51,7 @@ jobs: TAG="${{ inputs.tag }}" VERSION="${TAG#v}" CARGO_VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') - NPM_VERSION=$(node -p 'require("./npm/deskctl-cli/package.json").version') + NPM_VERSION=$(node -p 'require("./npm/deskctl/package.json").version') test "$VERSION" = "$CARGO_VERSION" test "$VERSION" = "$NPM_VERSION" @@ -62,7 +62,7 @@ jobs: VERSION="${{ inputs.tag }}" VERSION="${VERSION#v}" - if npm view "deskctl-cli@${VERSION}" version >/dev/null 2>&1; then + if npm view "deskctl@${VERSION}" version >/dev/null 2>&1; then echo "npm=true" >> "$GITHUB_OUTPUT" else echo "npm=false" >> "$GITHUB_OUTPUT" @@ -77,8 +77,8 @@ jobs: - name: Validate npm package run: | mkdir -p ./tmp/npm-pack - node npm/deskctl-cli/scripts/validate-package.js - npm pack ./npm/deskctl-cli --pack-destination ./tmp/npm-pack >/dev/null + node npm/deskctl/scripts/validate-package.js + npm pack ./npm/deskctl --pack-destination ./tmp/npm-pack >/dev/null - name: Validate crate publish path run: cargo publish --dry-run --locked @@ -87,7 +87,7 @@ jobs: if: inputs.publish_npm && steps.published.outputs.npm != 'true' env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - run: npm publish ./npm/deskctl-cli --access public + run: npm publish ./npm/deskctl --access public - name: Publish crates.io if: inputs.publish_crates && steps.published.outputs.crates != 'true' diff --git a/.gitignore b/.gitignore index db552f7..40542a9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,5 @@ secret/ .claude/ .codex/ openspec/ -npm/deskctl-cli/vendor/ -npm/deskctl-cli/*.tgz +npm/deskctl/vendor/ +npm/deskctl/*.tgz diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 926c58a..97e8c7c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -70,7 +70,7 @@ The hook config intentionally stays small: Distribution support currently ships through: - crate: `deskctl` -- npm package: `deskctl-cli` +- npm package: `deskctl` - repo flake: `flake.nix` - command name on every channel: `deskctl` diff --git a/Makefile b/Makefile index 97857e3..7e1f852 100644 --- a/Makefile +++ b/Makefile @@ -38,10 +38,10 @@ npm-package-check: echo "npm is required for npm packaging validation."; \ exit 1; \ fi - node npm/deskctl-cli/scripts/validate-package.js + node npm/deskctl/scripts/validate-package.js rm -rf tmp/npm-pack tmp/npm-install mkdir -p tmp/npm-pack tmp/npm-install/bin - npm pack ./npm/deskctl-cli --pack-destination ./tmp/npm-pack >/dev/null + npm pack ./npm/deskctl --pack-destination ./tmp/npm-pack >/dev/null @if [ "$$(uname -s)" != "Linux" ]; then \ echo "Skipping npm package runtime smoke test on non-Linux host."; \ else \ diff --git a/README.md b/README.md index 32144f0..4b42b5f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # deskctl -[![npm](https://img.shields.io/npm/v/deskctl-cli?label=npm)](https://www.npmjs.com/package/deskctl-cli) +[![npm](https://img.shields.io/npm/v/deskctl?label=npm)](https://www.npmjs.com/package/deskctl) [![release](https://img.shields.io/github/v/release/harivansh-afk/deskctl?label=release)](https://github.com/harivansh-afk/deskctl/releases) [![runtime](https://img.shields.io/badge/runtime-linux--x11-111827)](#support-boundary) [![skill](https://img.shields.io/badge/skills.sh-deskctl-111827)](skills/deskctl) @@ -10,7 +10,7 @@ Non-interactive desktop control for AI agents on Linux X11. ## Install ```bash -npm install -g deskctl-cli +npm install -g deskctl deskctl doctor deskctl snapshot --annotate ``` @@ -18,10 +18,11 @@ deskctl snapshot --annotate One-shot execution also works: ```bash -npx deskctl-cli --help +npx deskctl --help ``` -`deskctl-cli` installs the `deskctl` command by downloading the matching GitHub Release asset for the supported runtime target. +`deskctl` installs the command by downloading the matching GitHub Release asset for the supported runtime target. + ## Installable skill diff --git a/docs/releasing.md b/docs/releasing.md index 7271b83..8f39d3f 100644 --- a/docs/releasing.md +++ b/docs/releasing.md @@ -12,14 +12,14 @@ GitHub Releases are the canonical binary source. The npm package consumes those ## Package Names - crate: `deskctl` -- npm package: `deskctl-cli` +- npm package: `deskctl` - installed command: `deskctl` ## Prerequisites Before the first live publish on each registry: -- npm ownership for `deskctl-cli` +- npm ownership for `deskctl` - crates.io ownership for `deskctl` - repository secrets: - `NPM_TOKEN` diff --git a/docs/runtime-contract.md b/docs/runtime-contract.md index 0316c06..ee4727b 100644 --- a/docs/runtime-contract.md +++ b/docs/runtime-contract.md @@ -68,5 +68,3 @@ Treat these as useful but non-contractual: - incidental text formatting in non-JSON mode - default screenshot file names when no explicit path was provided - environment-dependent ordering details from the window manager - -For the full repo copy, see `docs/runtime-contract.md`. diff --git a/npm/deskctl-cli/README.md b/npm/deskctl/README.md similarity index 67% rename from npm/deskctl-cli/README.md rename to npm/deskctl/README.md index fd6f610..7bb42a9 100644 --- a/npm/deskctl-cli/README.md +++ b/npm/deskctl/README.md @@ -1,11 +1,11 @@ -# deskctl-cli +# deskctl -`deskctl-cli` installs the `deskctl` command for Linux X11 systems. +`deskctl` installs the command for Linux X11 systems. ## Install ```bash -npm install -g deskctl-cli +npm install -g deskctl ``` After install, run: @@ -17,7 +17,7 @@ deskctl --help One-shot usage is also supported: ```bash -npx deskctl-cli --help +npx deskctl --help ``` ## Runtime Support @@ -26,7 +26,7 @@ npx deskctl-cli --help - X11 session - currently packaged release asset: `linux-x64` -`deskctl-cli` downloads the matching GitHub Release binary during install. +`deskctl` downloads the matching GitHub Release binary during install. Unsupported targets fail during install with a clear runtime support error instead of installing a broken command. If you want the Rust source-install path instead, use: diff --git a/npm/deskctl-cli/bin/deskctl.js b/npm/deskctl/bin/deskctl.js similarity index 91% rename from npm/deskctl-cli/bin/deskctl.js rename to npm/deskctl/bin/deskctl.js index 9f9b480..b8514cf 100644 --- a/npm/deskctl-cli/bin/deskctl.js +++ b/npm/deskctl/bin/deskctl.js @@ -17,7 +17,7 @@ function main() { `Expected: ${binaryPath}`, `Package version: ${pkg.version}`, `Release tag: ${releaseTag(pkg)}`, - "Try reinstalling deskctl-cli or check that your target is supported." + "Try reinstalling deskctl or check that your target is supported." ].join("\n") ); process.exit(1); diff --git a/npm/deskctl-cli/package.json b/npm/deskctl/package.json similarity index 86% rename from npm/deskctl-cli/package.json rename to npm/deskctl/package.json index 84f27ee..4dbaba6 100644 --- a/npm/deskctl-cli/package.json +++ b/npm/deskctl/package.json @@ -1,7 +1,7 @@ { - "name": "deskctl-cli", + "name": "deskctl", "version": "0.1.6", - "description": "Installable deskctl CLI package for Linux X11 agents", + "description": "Installable deskctl package for Linux X11 agents", "license": "MIT", "homepage": "https://github.com/harivansh-afk/deskctl", "repository": { diff --git a/npm/deskctl-cli/scripts/postinstall.js b/npm/deskctl/scripts/postinstall.js similarity index 94% rename from npm/deskctl-cli/scripts/postinstall.js rename to npm/deskctl/scripts/postinstall.js index de1b1d0..1f43ad0 100644 --- a/npm/deskctl-cli/scripts/postinstall.js +++ b/npm/deskctl/scripts/postinstall.js @@ -44,6 +44,6 @@ async function main() { } main().catch((error) => { - console.error(`deskctl-cli install failed: ${error.message}`); + console.error(`deskctl install failed: ${error.message}`); process.exit(1); }); diff --git a/npm/deskctl-cli/scripts/support.js b/npm/deskctl/scripts/support.js similarity index 97% rename from npm/deskctl-cli/scripts/support.js rename to npm/deskctl/scripts/support.js index 8d41520..1fd0d47 100644 --- a/npm/deskctl-cli/scripts/support.js +++ b/npm/deskctl/scripts/support.js @@ -26,7 +26,7 @@ function supportedTarget(platform = process.platform, arch = process.arch) { } throw new Error( - `deskctl-cli currently supports linux-x64 only. Received ${platform}-${arch}.` + `deskctl currently supports linux-x64 only. Received ${platform}-${arch}.` ); } diff --git a/npm/deskctl-cli/scripts/validate-package.js b/npm/deskctl/scripts/validate-package.js similarity index 87% rename from npm/deskctl-cli/scripts/validate-package.js rename to npm/deskctl/scripts/validate-package.js index 46d3e87..450fd6c 100644 --- a/npm/deskctl-cli/scripts/validate-package.js +++ b/npm/deskctl/scripts/validate-package.js @@ -26,13 +26,13 @@ function main() { } if (pkg.bin?.deskctl !== "bin/deskctl.js") { - throw new Error("deskctl-cli must expose the deskctl bin entrypoint."); + throw new Error("deskctl must expose the deskctl bin entrypoint."); } const target = supportedTarget("linux", "x64"); const targetPath = vendorBinaryPath(target); const vendorDir = path.dirname(targetPath); - if (!vendorDir.endsWith(path.join("deskctl-cli", "vendor"))) { + if (!vendorDir.endsWith(path.join("deskctl", "vendor"))) { throw new Error("Vendor binary directory resolved unexpectedly."); } } diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro index b8bf92b..8b8d4b4 100644 --- a/site/src/pages/index.astro +++ b/site/src/pages/index.astro @@ -16,10 +16,6 @@ import DocLayout from "../layouts/DocLayout.astro"; then verify.

    -
    npm install -g deskctl-cli
    -deskctl doctor
    -deskctl snapshot --annotate
    -

    Start here

      @@ -33,6 +29,7 @@ deskctl snapshot --annotate

      Agent skill

      @@ -47,15 +44,13 @@ deskctl snapshot --annotate diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx index 985cf99..df53fcc 100644 --- a/site/src/pages/installation.mdx +++ b/site/src/pages/installation.mdx @@ -9,17 +9,17 @@ toc: true ## Default install ```sh -npm install -g deskctl-cli +npm install -g deskctl deskctl --help ``` -`deskctl-cli` is the default install path. It installs the `deskctl` command by +`deskctl` is the default install path. It installs the command by downloading the matching GitHub Release asset for the supported runtime target. ## One-shot usage ```sh -npx deskctl-cli --help +npx deskctl --help ``` ## Agent skill diff --git a/site/src/pages/quick-start.mdx b/site/src/pages/quick-start.mdx index c783b9e..10f3ec0 100644 --- a/site/src/pages/quick-start.mdx +++ b/site/src/pages/quick-start.mdx @@ -9,7 +9,7 @@ toc: true ## Install and diagnose ```sh -npm install -g deskctl-cli +npm install -g deskctl deskctl doctor ``` diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md index 81dea19..244a1fb 100644 --- a/skills/deskctl/SKILL.md +++ b/skills/deskctl/SKILL.md @@ -1,7 +1,7 @@ --- name: deskctl description: Non-interactive X11 desktop control for AI agents. Use when the task involves controlling a Linux desktop - clicking, typing, reading windows, waiting for UI state, or taking screenshots inside a sandbox or VM. -allowed-tools: Bash(deskctl:*), Bash(npx deskctl-cli:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*) +allowed-tools: Bash(deskctl:*), Bash(npx deskctl:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*) --- # deskctl @@ -13,7 +13,7 @@ All output follows the runtime contract defined in [references/runtime-contract. ## Quick start ```bash -npm install -g deskctl-cli +npm install -g deskctl deskctl doctor deskctl snapshot --annotate ``` From 86c36a3b509aac8ea5869feb02df931fbcf7d752 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 08:53:50 -0400 Subject: [PATCH 06/35] release: v0.1.7 [skip ci] --- Cargo.lock | 2 +- Cargo.toml | 2 +- npm/deskctl/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 71a9a54..6922004 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "deskctl" -version = "0.1.6" +version = "0.1.7" dependencies = [ "ab_glyph", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index b05507b..5872639 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deskctl" -version = "0.1.6" +version = "0.1.7" edition = "2021" description = "X11 desktop control CLI for agents" license = "MIT" diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json index 4dbaba6..6085bca 100644 --- a/npm/deskctl/package.json +++ b/npm/deskctl/package.json @@ -1,6 +1,6 @@ { "name": "deskctl", - "version": "0.1.6", + "version": "0.1.7", "description": "Installable deskctl package for Linux X11 agents", "license": "MIT", "homepage": "https://github.com/harivansh-afk/deskctl", From 1d72c7b852e4195f20b002a4aaf25e2b1a2b8e26 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:01:12 -0400 Subject: [PATCH 07/35] fix: add registry-url to setup-node for npm auth [skip ci] --- .github/workflows/publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c4b1ecf..1f6b282 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -34,6 +34,7 @@ jobs: - uses: actions/setup-node@v4 with: node-version: 22 + registry-url: https://registry.npmjs.org - name: Install system dependencies run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev From deaffff45a574b1701482ac04043f7af557e46f5 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:07:56 -0400 Subject: [PATCH 08/35] major/minor/patch --- .github/workflows/publish.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 1f6b282..31b3f4f 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -3,15 +3,19 @@ name: Publish Registries on: workflow_dispatch: inputs: - tag: - description: Release tag to publish (for example v0.1.5) + bump: + description: Version bump type required: true - type: string + type: choice + options: + - patch + - minor + - major publish_npm: description: Publish deskctl to npm required: true type: boolean - default: false + default: true publish_crates: description: Publish deskctl to crates.io required: true From 47047e90641bf5e4b90f31aeb1157cd9b054868e Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:11:13 -0400 Subject: [PATCH 09/35] migrate update manifest job to publish workflow --- .github/workflows/ci.yml | 75 +++-------------------------------- .github/workflows/publish.yml | 54 +++++++++++++++++-------- 2 files changed, 43 insertions(+), 86 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b7a4d6f..cb36e61 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,32 +52,13 @@ jobs: echo "rust=${{ steps.filter.outputs.rust }}" >> "$GITHUB_OUTPUT" fi - - name: Calculate next version + - name: Read current version id: version if: github.event_name != 'pull_request' && steps.check.outputs.rust == 'true' run: | - BASE=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') - IFS='.' read -r MAJOR MINOR PATCH <<< "$BASE" - - LATEST=$(git tag -l "v${MAJOR}.${MINOR}.*" | sort -V | tail -1) - - if [ -z "$LATEST" ]; then - NEW="$BASE" - else - LATEST_VER="${LATEST#v}" - IFS='.' read -r _ _ LATEST_PATCH <<< "$LATEST_VER" - NEW_PATCH=$((LATEST_PATCH + 1)) - NEW="${MAJOR}.${MINOR}.${NEW_PATCH}" - fi - - # Ensure the computed version does not already have a tag - while git rev-parse "v${NEW}" >/dev/null 2>&1; do - IFS='.' read -r MAJOR MINOR PATCH <<< "$NEW" - NEW="${MAJOR}.${MINOR}.$((PATCH + 1))" - done - - echo "version=${NEW}" >> "$GITHUB_OUTPUT" - echo "tag=v${NEW}" >> "$GITHUB_OUTPUT" + VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + echo "tag=v${VERSION}" >> "$GITHUB_OUTPUT" validate: name: Validate @@ -167,57 +148,13 @@ jobs: - name: Distribution validation run: make dist-validate - update-manifests: - name: Update Manifests - needs: [changes, validate, integration, distribution] - if: github.event_name != 'pull_request' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - uses: dtolnay/rust-toolchain@stable - - - uses: actions/setup-node@v4 - with: - node-version: 22 - - - name: Update version in Cargo.toml - run: | - CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') - NEW="${{ needs.changes.outputs.version }}" - if [ "$CURRENT" != "$NEW" ]; then - sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml - node -e 'const fs=require("node:fs"); const path="npm/deskctl/package.json"; const pkg=JSON.parse(fs.readFileSync(path,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(path, JSON.stringify(pkg, null, 2)+"\n");' "$NEW" - cargo generate-lockfile - fi - - - name: Commit, tag, and push - run: | - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - - if ! git diff --quiet; then - git add Cargo.toml Cargo.lock npm/deskctl/package.json - git commit -m "release: ${{ needs.changes.outputs.tag }} [skip ci]" - fi - - if ! git rev-parse "${{ needs.changes.outputs.tag }}" >/dev/null 2>&1; then - git tag "${{ needs.changes.outputs.tag }}" - fi - git push origin main --tags - build: name: Build Release Asset - needs: [changes, update-manifests] + needs: [changes, validate, integration, distribution] if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - with: - ref: ${{ needs.changes.outputs.tag }} - fetch-depth: 0 - uses: dtolnay/rust-toolchain@stable with: @@ -242,7 +179,7 @@ jobs: release: name: Release - needs: [changes, build, update-manifests] + needs: [changes, build] if: github.event_name != 'pull_request' runs-on: ubuntu-latest steps: diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 31b3f4f..60aed4d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -28,10 +28,12 @@ permissions: jobs: publish: runs-on: ubuntu-latest + permissions: + contents: write steps: - uses: actions/checkout@v4 with: - ref: ${{ inputs.tag }} + fetch-depth: 0 - uses: dtolnay/rust-toolchain@stable @@ -43,29 +45,46 @@ jobs: - name: Install system dependencies run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev - - name: Verify release exists and contains canonical assets - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Compute next version + id: version run: | - gh release view "${{ inputs.tag }}" --json assets --jq '.assets[].name' > /tmp/release-assets.txt - grep -Fx "deskctl-linux-x86_64" /tmp/release-assets.txt >/dev/null - grep -Fx "checksums.txt" /tmp/release-assets.txt >/dev/null + CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') + IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT" - - name: Verify versions align with tag + case "${{ inputs.bump }}" in + major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;; + minor) MINOR=$((MINOR + 1)); PATCH=0 ;; + patch) PATCH=$((PATCH + 1)) ;; + esac + + NEW="${MAJOR}.${MINOR}.${PATCH}" + TAG="v${NEW}" + + echo "version=${NEW}" >> "$GITHUB_OUTPUT" + echo "tag=${TAG}" >> "$GITHUB_OUTPUT" + echo "Bumping ${CURRENT} -> ${NEW} (${TAG})" + + - name: Bump versions run: | - TAG="${{ inputs.tag }}" - VERSION="${TAG#v}" - CARGO_VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') - NPM_VERSION=$(node -p 'require("./npm/deskctl/package.json").version') + NEW="${{ steps.version.outputs.version }}" + CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') + sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml + node -e 'const fs=require("node:fs"); const p="npm/deskctl/package.json"; const pkg=JSON.parse(fs.readFileSync(p,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(p, JSON.stringify(pkg, null, 2)+"\n");' "$NEW" + cargo generate-lockfile - test "$VERSION" = "$CARGO_VERSION" - test "$VERSION" = "$NPM_VERSION" + - name: Commit, tag, and push + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add Cargo.toml Cargo.lock npm/deskctl/package.json + git commit -m "release: ${{ steps.version.outputs.tag }} [skip ci]" + git tag "${{ steps.version.outputs.tag }}" + git push origin main --tags - name: Check current published state id: published run: | - VERSION="${{ inputs.tag }}" - VERSION="${VERSION#v}" + VERSION="${{ steps.version.outputs.version }}" if npm view "deskctl@${VERSION}" version >/dev/null 2>&1; then echo "npm=true" >> "$GITHUB_OUTPUT" @@ -102,6 +121,7 @@ jobs: - name: Summary run: | - echo "tag=${{ inputs.tag }}" + echo "tag=${{ steps.version.outputs.tag }}" + echo "bump=${{ inputs.bump }}" echo "npm_already_published=${{ steps.published.outputs.npm }}" echo "crates_already_published=${{ steps.published.outputs.crates }}" From eedb5de2d478acebe6dbd75f17f716ccbb8f0d8c Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:13:10 -0400 Subject: [PATCH 10/35] refresh contributor cache [skip ci] From 2a8b51b4f5249969c6adb2a28ea0ef9238b84667 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:21:43 -0400 Subject: [PATCH 11/35] docs: tighten skill install docs and bundle Co-authored-by: Codex --- README.md | 7 +- site/src/pages/index.astro | 2 +- site/src/pages/installation.mdx | 8 +- skills/deskctl/agents/openai.yaml | 7 ++ skills/deskctl/references/runtime-contract.md | 74 ++++++++++++++++++- 5 files changed, 91 insertions(+), 7 deletions(-) create mode 100644 skills/deskctl/agents/openai.yaml mode change 120000 => 100644 skills/deskctl/references/runtime-contract.md diff --git a/README.md b/README.md index 4b42b5f..f2e746f 100644 --- a/README.md +++ b/README.md @@ -27,10 +27,13 @@ npx deskctl --help ## Installable skill ```bash -npx skills add harivansh-afk/deskctl -s deskctl +npx skills add harivansh-afk/deskctl --skill deskctl -g ``` -The installable skill lives in [`skills/deskctl`](skills/deskctl) and is built around the same observe -> wait -> act -> verify loop as the CLI. +The installable skill lives in [`skills/deskctl`](skills/deskctl), follows the +standard `skills/` repo layout, and installs directly from this GitHub repo via +`npx skills add ...`. It is built around the same observe -> wait -> act -> +verify loop as the CLI. ## Quick example diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro index 8b8d4b4..e97b599 100644 --- a/site/src/pages/index.astro +++ b/site/src/pages/index.astro @@ -38,7 +38,7 @@ import DocLayout from "../layouts/DocLayout.astro"; There is also an installable skill for `skills.sh`-style agent runtimes:

      -
      npx skills add harivansh-afk/deskctl -s deskctl
      +
      npx skills add harivansh-afk/deskctl --skill deskctl -g

      Links

      diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx index df53fcc..7754e6b 100644 --- a/site/src/pages/installation.mdx +++ b/site/src/pages/installation.mdx @@ -27,11 +27,13 @@ npx deskctl --help For `skills.sh`-style runtimes: ```sh -npx skills add harivansh-afk/deskctl -s deskctl +npx skills add harivansh-afk/deskctl --skill deskctl -g ``` -The repo skill lives under `skills/deskctl` and is designed around the same -observe -> wait -> act -> verify loop as the CLI. +The repo skill lives under `skills/deskctl`, so `skills` can install it +directly from this GitHub repo. It is designed around the same observe -> wait +-> act -> verify loop as the CLI. `-g` installs it globally; omit that flag if +you want a project-local install. ## Other install paths diff --git a/skills/deskctl/agents/openai.yaml b/skills/deskctl/agents/openai.yaml new file mode 100644 index 0000000..8a5ca13 --- /dev/null +++ b/skills/deskctl/agents/openai.yaml @@ -0,0 +1,7 @@ +interface: + display_name: "deskctl" + short_description: "Control Linux X11 desktops from agent loops" + default_prompt: "Use $deskctl to diagnose the desktop, observe state, wait for UI changes, act deterministically, and verify the result." + +policy: + allow_implicit_invocation: true diff --git a/skills/deskctl/references/runtime-contract.md b/skills/deskctl/references/runtime-contract.md deleted file mode 120000 index 8de0781..0000000 --- a/skills/deskctl/references/runtime-contract.md +++ /dev/null @@ -1 +0,0 @@ -../../../docs/runtime-contract.md \ No newline at end of file diff --git a/skills/deskctl/references/runtime-contract.md b/skills/deskctl/references/runtime-contract.md new file mode 100644 index 0000000..6efd2bc --- /dev/null +++ b/skills/deskctl/references/runtime-contract.md @@ -0,0 +1,73 @@ +# deskctl runtime contract + +This copy ships inside the installable skill so `npx skills add ...` installs a +self-contained reference bundle. + +All commands support `--json` and use the same top-level envelope: + +```json +{ + "success": true, + "data": {}, + "error": null +} +``` + +Use `--json` whenever you need to parse output programmatically. + +## Stable window fields + +Whenever a response includes a window payload, these fields are stable: + +- `ref_id` +- `window_id` +- `title` +- `app_name` +- `x` +- `y` +- `width` +- `height` +- `focused` +- `minimized` + +Use `window_id` for stable targeting inside a live daemon session. Use +`ref_id` or `@wN` for short-lived follow-up actions after `snapshot` or +`list-windows`. + +## Stable grouped reads + +- `deskctl get active-window` -> `data.window` +- `deskctl get monitors` -> `data.count`, `data.monitors` +- `deskctl get version` -> `data.version`, `data.backend` +- `deskctl get systeminfo` -> runtime-scoped diagnostic fields such as + `backend`, `display`, `session_type`, `session`, `socket_path`, `screen`, + `monitor_count`, and `monitors` + +## Stable waits + +- `deskctl wait window` -> `data.wait`, `data.selector`, `data.elapsed_ms`, + `data.window` +- `deskctl wait focus` -> `data.wait`, `data.selector`, `data.elapsed_ms`, + `data.window` + +## Stable structured error kinds + +When a command fails with structured JSON data, these `kind` values are stable: + +- `selector_not_found` +- `selector_ambiguous` +- `selector_invalid` +- `timeout` +- `not_found` + +Wait failures may also include `window_not_focused` in the last observation +payload. + +## Best-effort fields + +Treat these as useful but non-contractual: + +- exact monitor names +- incidental text formatting in non-JSON mode +- default screenshot file names when no explicit path was provided +- environment-dependent ordering details from the window manager From c907e800af804ad44dd844e09f1a0c02d36316a6 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:21:58 -0400 Subject: [PATCH 12/35] change client bin name --- src/cli/mod.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index bab44c9..b24465a 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -7,7 +7,12 @@ use std::path::PathBuf; use crate::core::protocol::{Request, Response}; #[derive(Parser)] -#[command(name = "deskctl", version, about = "Desktop control CLI for AI agents")] +#[command( + name = "deskctl", + bin_name = "deskctl", + version, + about = "Desktop control CLI for AI agents" +)] pub struct App { #[command(flatten)] pub global: GlobalOpts, @@ -988,6 +993,12 @@ mod tests { assert!(help.contains("deskctl snapshot --annotate")); } + #[test] + fn root_help_uses_public_bin_name() { + let help = App::command().render_help().to_string(); + assert!(help.contains("Usage: deskctl [OPTIONS] ")); + } + #[test] fn window_listing_text_includes_window_ids() { let lines = render_success_lines( From 3bfec9eecc890208d6f4f37b97a95534b2a982f5 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:27:43 -0400 Subject: [PATCH 13/35] edit docs --- site/src/pages/index.astro | 13 ------------- site/src/pages/installation.mdx | 15 --------------- 2 files changed, 28 deletions(-) diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro index e97b599..b770178 100644 --- a/site/src/pages/index.astro +++ b/site/src/pages/index.astro @@ -21,7 +21,6 @@ import DocLayout from "../layouts/DocLayout.astro";

      Reference

      @@ -34,23 +33,11 @@ import DocLayout from "../layouts/DocLayout.astro";

      Agent skill

      -

      - There is also an installable skill for `skills.sh`-style agent runtimes: -

      - -
      npx skills add harivansh-afk/deskctl --skill deskctl -g
      -

      Links

      diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx index 7754e6b..ed4e737 100644 --- a/site/src/pages/installation.mdx +++ b/site/src/pages/installation.mdx @@ -10,26 +10,11 @@ toc: true ```sh npm install -g deskctl -deskctl --help ``` `deskctl` is the default install path. It installs the command by downloading the matching GitHub Release asset for the supported runtime target. -## One-shot usage - -```sh -npx deskctl --help -``` - -## Agent skill - -For `skills.sh`-style runtimes: - -```sh -npx skills add harivansh-afk/deskctl --skill deskctl -g -``` - The repo skill lives under `skills/deskctl`, so `skills` can install it directly from this GitHub repo. It is designed around the same observe -> wait -> act -> verify loop as the CLI. `-g` installs it globally; omit that flag if From bf603671f95f28270e4ede426f03442c9203b328 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:28:18 -0400 Subject: [PATCH 14/35] rm: --- site/src/pages/index.astro | 2 -- 1 file changed, 2 deletions(-) diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro index b770178..16a4b29 100644 --- a/site/src/pages/index.astro +++ b/site/src/pages/index.astro @@ -31,8 +31,6 @@ import DocLayout from "../layouts/DocLayout.astro";
    • Runtime contract
    -

    Agent skill

    -

    Links

      From 848ef97e87c321dffda0a6e4823c3ce8871569e1 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:34:10 -0400 Subject: [PATCH 15/35] edit readme --- README.md | 45 +++++++-------------------------------------- 1 file changed, 7 insertions(+), 38 deletions(-) diff --git a/README.md b/README.md index f2e746f..4bc24c8 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,9 @@ # deskctl [![npm](https://img.shields.io/npm/v/deskctl?label=npm)](https://www.npmjs.com/package/deskctl) -[![release](https://img.shields.io/github/v/release/harivansh-afk/deskctl?label=release)](https://github.com/harivansh-afk/deskctl/releases) -[![runtime](https://img.shields.io/badge/runtime-linux--x11-111827)](#support-boundary) [![skill](https://img.shields.io/badge/skills.sh-deskctl-111827)](skills/deskctl) -Non-interactive desktop control for AI agents on Linux X11. +Desktop control cli for AI agents on Linux X11. ## Install @@ -15,44 +13,19 @@ deskctl doctor deskctl snapshot --annotate ``` -One-shot execution also works: +## Skill ```bash -npx deskctl --help -``` - -`deskctl` installs the command by downloading the matching GitHub Release asset for the supported runtime target. - - -## Installable skill - -```bash -npx skills add harivansh-afk/deskctl --skill deskctl -g -``` - -The installable skill lives in [`skills/deskctl`](skills/deskctl), follows the -standard `skills/` repo layout, and installs directly from this GitHub repo via -`npx skills add ...`. It is built around the same observe -> wait -> act -> -verify loop as the CLI. - -## Quick example - -```bash -deskctl doctor -deskctl snapshot --annotate -deskctl wait window --selector 'title=Firefox' --timeout 10 -deskctl focus 'title=Firefox' -deskctl type "hello world" +npx skills add harivansh-afk/deskctl ``` ## Docs - runtime contract: [docs/runtime-contract.md](docs/runtime-contract.md) -- release flow: [docs/releasing.md](docs/releasing.md) -- installable skill: [skills/deskctl](skills/deskctl) -- contributor workflow: [CONTRIBUTING.md](CONTRIBUTING.md) +- releasing: [docs/releasing.md](docs/releasing.md) +- contributing: [CONTRIBUTING.md](CONTRIBUTING.md) -## Other install paths +## Install paths Nix: @@ -61,12 +34,8 @@ nix run github:harivansh-afk/deskctl -- --help nix profile install github:harivansh-afk/deskctl ``` -Source build: +Rust: ```bash cargo build ``` - -## Support boundary - -`deskctl` currently supports Linux X11. Use `--json` for stable machine parsing, use `window_id` for programmatic targeting inside a live session, and use `deskctl doctor` first when the runtime looks broken. From 6c6f33040f8be0aec4855c5fd9eef33c6adef4c1 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 09:35:46 -0400 Subject: [PATCH 16/35] update readme --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 4bc24c8..935f329 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,9 @@ Desktop control cli for AI agents on Linux X11. ```bash npm install -g deskctl +``` + +```bash deskctl doctor deskctl snapshot --annotate ``` From 844f2f2bc6ddb989d1f29bea2725be3741737e53 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 26 Mar 2026 13:37:41 +0000 Subject: [PATCH 17/35] release: v0.1.8 [skip ci] --- Cargo.lock | 2 +- Cargo.toml | 2 +- npm/deskctl/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6922004..3fb1666 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "deskctl" -version = "0.1.7" +version = "0.1.8" dependencies = [ "ab_glyph", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 5872639..fc7816c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deskctl" -version = "0.1.7" +version = "0.1.8" edition = "2021" description = "X11 desktop control CLI for agents" license = "MIT" diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json index 6085bca..45daefe 100644 --- a/npm/deskctl/package.json +++ b/npm/deskctl/package.json @@ -1,6 +1,6 @@ { "name": "deskctl", - "version": "0.1.7", + "version": "0.1.8", "description": "Installable deskctl package for Linux X11 agents", "license": "MIT", "homepage": "https://github.com/harivansh-afk/deskctl", From 2b02513d6ef22ba238e50fea98cc8133c94f2131 Mon Sep 17 00:00:00 2001 From: Hari <73809867+harivansh-afk@users.noreply.github.com> Date: Thu, 26 Mar 2026 11:27:35 -0400 Subject: [PATCH 18/35] Improve docs structure and navigation (#12) * Improve docs structure and navigation Co-authored-by: Codex * rm * handwrite docs --------- Co-authored-by: Codex --- site/src/layouts/DocLayout.astro | 2 +- site/src/pages/architecture.mdx | 98 ----------------------------- site/src/pages/commands.mdx | 24 ++++--- site/src/pages/index.astro | 29 ++++++--- site/src/pages/installation.mdx | 30 ++++++--- site/src/pages/quick-start.mdx | 14 +++-- site/src/pages/runtime-contract.mdx | 4 +- site/src/styles/base.css | 10 +-- 8 files changed, 69 insertions(+), 142 deletions(-) delete mode 100644 site/src/pages/architecture.mdx diff --git a/site/src/layouts/DocLayout.astro b/site/src/layouts/DocLayout.astro index f2608de..afc8648 100644 --- a/site/src/layouts/DocLayout.astro +++ b/site/src/layouts/DocLayout.astro @@ -30,7 +30,7 @@ function formatTocText(text: string): string { { !isIndex && ( -
    diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx index ed4e737..e35f4eb 100644 --- a/site/src/pages/installation.mdx +++ b/site/src/pages/installation.mdx @@ -6,19 +6,30 @@ toc: true # Installation -## Default install +Install the public `deskctl` command first, then validate the desktop runtime +with `deskctl doctor` before trying to automate anything. + +## Recommended path ```sh npm install -g deskctl +deskctl doctor ``` `deskctl` is the default install path. It installs the command by downloading the matching GitHub Release asset for the supported runtime target. -The repo skill lives under `skills/deskctl`, so `skills` can install it -directly from this GitHub repo. It is designed around the same observe -> wait --> act -> verify loop as the CLI. `-g` installs it globally; omit that flag if -you want a project-local install. +This path does not require a Rust toolchain. The installed command is always +`deskctl`, even though the release asset itself is target-specific. + +## Skill install + +The repo skill lives under `skills/deskctl`, so you can install it +directly uring `skills.sh` + +```sh +npx skills add harivansh-afk/deskctl +``` ## Other install paths @@ -29,7 +40,7 @@ nix run github:harivansh-afk/deskctl -- --help nix profile install github:harivansh-afk/deskctl ``` -### Build from source +### Rust ```sh git clone https://github.com/harivansh-afk/deskctl @@ -53,8 +64,13 @@ Source builds on Linux require: The binary itself only depends on the standard Linux glibc runtime. -If setup fails, run: +## Verification + +If setup fails for any reason start here: ```sh deskctl doctor ``` + +`doctor` checks X11 connectivity, window enumeration, screenshot viability, and +daemon/socket health before normal command execution. diff --git a/site/src/pages/quick-start.mdx b/site/src/pages/quick-start.mdx index 10f3ec0..7ecf5a7 100644 --- a/site/src/pages/quick-start.mdx +++ b/site/src/pages/quick-start.mdx @@ -6,17 +6,19 @@ toc: true # Quick start -## Install and diagnose +The fastest way to use `deskctl` is to follow the same four-step loop : observe, wait, act, verify. + +## 1. Install and diagnose ```sh npm install -g deskctl deskctl doctor ``` -Use `deskctl doctor` first. It checks X11 connectivity, basic enumeration, +Run `deskctl doctor` first. It checks X11 connectivity, basic enumeration, screenshot viability, and socket health before you start driving the desktop. -## Observe +## 2. Observe the desktop ```sh deskctl snapshot --annotate @@ -29,7 +31,7 @@ Use `snapshot` when you want a screenshot artifact plus window refs. Use `list-windows` when you only need the current window tree without writing a screenshot. -## Target windows cleanly +## 3. Pick selectors that stay readable Prefer explicit selectors when you need deterministic targeting: @@ -44,7 +46,7 @@ focused Legacy refs such as `@w1` still work after `snapshot` or `list-windows`. Bare strings like `firefox` are fuzzy matches and now fail on ambiguity. -## Wait, act, verify +## 4. Wait, act, verify The core loop is: @@ -69,7 +71,7 @@ deskctl snapshot The wait commands return the matched window payload on success, so they compose cleanly into the next action. -## Use `--json` when parsing matters +## 5. Use `--json` when parsing matters Every command supports `--json` and uses the same top-level envelope: diff --git a/site/src/pages/runtime-contract.mdx b/site/src/pages/runtime-contract.mdx index 4fca14c..e33e999 100644 --- a/site/src/pages/runtime-contract.mdx +++ b/site/src/pages/runtime-contract.mdx @@ -11,7 +11,7 @@ This page defines the current public output contract for `deskctl`. It is intentionally scoped to the current Linux X11 runtime surface. It does not promise stability for future Wayland or window-manager-specific features. -## JSON envelope +## Stable top-level envelope Every command supports `--json` and uses the same top-level envelope: @@ -32,7 +32,7 @@ Stable top-level fields: If `success` is `false`, the command exits non-zero in both text mode and JSON mode. -## Stable window fields +## Stable window payload Whenever a response includes a window payload, these fields are stable: diff --git a/site/src/styles/base.css b/site/src/styles/base.css index cd569a9..e05552e 100644 --- a/site/src/styles/base.css +++ b/site/src/styles/base.css @@ -224,30 +224,30 @@ hr { } } -nav { +.breadcrumbs { max-width: 50rem; margin: 0 auto; padding: 1.5rem clamp(1.25rem, 5vw, 3rem) 0; font-size: 0.9rem; } -nav a { +.breadcrumbs a { color: inherit; text-decoration: none; opacity: 0.6; transition: opacity 0.15s; } -nav a:hover { +.breadcrumbs a:hover { opacity: 1; } -nav .title { +.breadcrumbs .title { font-weight: 500; opacity: 1; } -nav .sep { +.breadcrumbs .sep { opacity: 0.3; margin: 0 0.5em; } From a64b46b479b45310adedf365888fffa458268bf3 Mon Sep 17 00:00:00 2001 From: Hari <73809867+harivansh-afk@users.noreply.github.com> Date: Thu, 26 Mar 2026 11:53:15 -0400 Subject: [PATCH 19/35] deskctl upgrade (#13) * deskctl upgrade * interactive update as well as --yes flag --- npm/deskctl/README.md | 12 + site/src/pages/commands.mdx | 5 +- skills/deskctl/SKILL.md | 6 + skills/deskctl/references/commands.md | 1 + src/cli/mod.rs | 116 ++++++- src/cli/upgrade.rs | 465 ++++++++++++++++++++++++++ 6 files changed, 603 insertions(+), 2 deletions(-) create mode 100644 src/cli/upgrade.rs diff --git a/npm/deskctl/README.md b/npm/deskctl/README.md index 7bb42a9..81f07f4 100644 --- a/npm/deskctl/README.md +++ b/npm/deskctl/README.md @@ -14,6 +14,18 @@ After install, run: deskctl --help ``` +To upgrade version: + +```bash +deskctl upgrade +``` + +For non-interactive use: + +```bash +deskctl upgrade --yes +``` + One-shot usage is also supported: ```bash diff --git a/site/src/pages/commands.mdx b/site/src/pages/commands.mdx index dc9c578..934cdb8 100644 --- a/site/src/pages/commands.mdx +++ b/site/src/pages/commands.mdx @@ -13,6 +13,7 @@ reads, grouped waits, selector-driven actions, and a few input primitives. ```sh deskctl doctor +deskctl upgrade deskctl snapshot deskctl snapshot --annotate deskctl list-windows @@ -26,7 +27,9 @@ deskctl get-screen-size deskctl get-mouse-position ``` -`doctor` checks the runtime before daemon startup. `snapshot` produces a +`doctor` checks the runtime before daemon startup. `upgrade` checks for a newer +published version, shows a short confirmation prompt when an update is +available, and supports `--yes` for non-interactive use. `snapshot` produces a screenshot plus window refs. `list-windows` is the same window tree without the side effect of writing a screenshot. The grouped `get` commands are the preferred read surface for focused state queries. diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md index 244a1fb..67a77c5 100644 --- a/skills/deskctl/SKILL.md +++ b/skills/deskctl/SKILL.md @@ -18,6 +18,12 @@ deskctl doctor deskctl snapshot --annotate ``` +If `deskctl` was installed through npm, refresh it later with: + +```bash +deskctl upgrade --yes +``` + ## Agent loop Every desktop interaction follows: **observe -> wait -> act -> verify**. diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md index 77b9513..27b4310 100644 --- a/skills/deskctl/references/commands.md +++ b/skills/deskctl/references/commands.md @@ -7,6 +7,7 @@ runtime contract. ```bash deskctl doctor +deskctl upgrade deskctl snapshot deskctl snapshot --annotate deskctl list-windows diff --git a/src/cli/mod.rs b/src/cli/mod.rs index b24465a..28092d7 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,4 +1,5 @@ pub mod connection; +pub mod upgrade; use anyhow::Result; use clap::{Args, Parser, Subcommand}; @@ -121,6 +122,9 @@ pub enum Command { /// Diagnose X11 runtime, screenshot, and daemon health #[command(after_help = DOCTOR_EXAMPLES)] Doctor, + /// Upgrade deskctl using the current install channel + #[command(after_help = UPGRADE_EXAMPLES)] + Upgrade(UpgradeOpts), /// Query runtime state #[command(subcommand)] Get(GetCmd), @@ -231,6 +235,8 @@ const GET_SCREEN_SIZE_EXAMPLES: &str = const GET_MOUSE_POSITION_EXAMPLES: &str = "Examples:\n deskctl get-mouse-position\n deskctl --json get-mouse-position"; const DOCTOR_EXAMPLES: &str = "Examples:\n deskctl doctor\n deskctl --json doctor"; +const UPGRADE_EXAMPLES: &str = + "Examples:\n deskctl upgrade\n deskctl upgrade --yes\n deskctl --json upgrade --yes"; const WAIT_WINDOW_EXAMPLES: &str = "Examples:\n deskctl wait window --selector 'title=Firefox' --timeout 10\n deskctl --json wait window --selector 'class=firefox' --poll-ms 100"; const WAIT_FOCUS_EXAMPLES: &str = "Examples:\n deskctl wait focus --selector 'id=win3' --timeout 5\n deskctl wait focus --selector focused --poll-ms 200"; const SCREENSHOT_EXAMPLES: &str = @@ -284,6 +290,13 @@ pub struct WaitSelectorOpts { pub poll_ms: u64, } +#[derive(Args)] +pub struct UpgradeOpts { + /// Skip confirmation and upgrade non-interactively + #[arg(long)] + pub yes: bool, +} + pub fn run() -> Result<()> { let app = App::parse(); @@ -300,6 +313,22 @@ pub fn run() -> Result<()> { return connection::run_doctor(&app.global); } + if let Command::Upgrade(ref upgrade_opts) = app.command { + let response = upgrade::run_upgrade(&app.global, upgrade_opts)?; + let success = response.success; + + if app.global.json { + println!("{}", serde_json::to_string_pretty(&response)?); + if !success { + std::process::exit(1); + } + } else { + print_response(&app.command, &response)?; + } + + return Ok(()); + } + // All other commands need a daemon connection let request = build_request(&app.command)?; let response = connection::send_command(&app.global, &request)?; @@ -363,6 +392,7 @@ fn build_request(cmd: &Command) -> Result { Command::GetScreenSize => Request::new("get-screen-size"), Command::GetMousePosition => Request::new("get-mouse-position"), Command::Doctor => unreachable!(), + Command::Upgrade(_) => unreachable!(), Command::Get(sub) => match sub { GetCmd::ActiveWindow => Request::new("get-active-window"), GetCmd::Monitors => Request::new("get-monitors"), @@ -422,6 +452,7 @@ fn render_success_lines(cmd: &Command, data: Option<&serde_json::Value>) -> Resu Command::Get(GetCmd::Systeminfo) => render_systeminfo_lines(data), Command::GetScreenSize => vec![render_screen_size_line(data)], Command::GetMousePosition => vec![render_mouse_position_line(data)], + Command::Upgrade(_) => render_upgrade_lines(data), Command::Screenshot { annotate, .. } => render_screenshot_lines(data, *annotate), Command::Click { .. } => vec![render_click_line(data, false)], Command::Dblclick { .. } => vec![render_click_line(data, true)], @@ -526,6 +557,41 @@ fn render_error_lines(response: &Response) -> Vec { lines.push("No focused window is available.".to_string()); } } + "upgrade_failed" => { + if let Some(reason) = data.get("io_error").and_then(|value| value.as_str()) { + lines.push(format!("Reason: {reason}")); + } + if let Some(reason) = data.get("reason").and_then(|value| value.as_str()) { + lines.push(format!("Reason: {reason}")); + } + if let Some(command) = data.get("command").and_then(|value| value.as_str()) { + lines.push(format!("Command: {command}")); + } + if let Some(hint) = data.get("hint").and_then(|value| value.as_str()) { + lines.push(format!("Hint: {hint}")); + } + } + "upgrade_unsupported" => { + if let Some(hint) = data.get("hint").and_then(|value| value.as_str()) { + lines.push(format!("Hint: {hint}")); + } + } + "upgrade_confirmation_required" => { + if let Some(current_version) = + data.get("current_version").and_then(|value| value.as_str()) + { + if let Some(latest_version) = + data.get("latest_version").and_then(|value| value.as_str()) + { + lines.push(format!( + "Update available: {current_version} -> {latest_version}" + )); + } + } + if let Some(hint) = data.get("hint").and_then(|value| value.as_str()) { + lines.push(format!("Hint: {hint}")); + } + } _ => {} } @@ -723,6 +789,36 @@ fn render_screenshot_lines(data: &serde_json::Value, annotate: bool) -> Vec Vec { + match data.get("status").and_then(|value| value.as_str()) { + Some("up_to_date") => { + let version = data + .get("latest_version") + .and_then(|value| value.as_str()) + .or_else(|| data.get("current_version").and_then(|value| value.as_str())) + .unwrap_or("unknown"); + vec![format!( + "✔ You're already on the latest version! ({version})" + )] + } + Some("upgraded") => { + let current_version = data + .get("current_version") + .and_then(|value| value.as_str()) + .unwrap_or("unknown"); + let latest_version = data + .get("latest_version") + .and_then(|value| value.as_str()) + .unwrap_or("unknown"); + vec![format!( + "✔ Upgraded deskctl from {current_version} -> {latest_version}" + )] + } + Some("cancelled") => vec!["No changes made.".to_string()], + _ => vec!["Upgrade completed.".to_string()], + } +} + fn render_click_line(data: &serde_json::Value, double: bool) -> String { let action = if double { "Double-clicked" } else { "Clicked" }; let key = if double { "double_clicked" } else { "clicked" }; @@ -978,7 +1074,7 @@ fn truncate_display(value: &str, max_chars: usize) -> String { mod tests { use super::{ render_error_lines, render_screen_size_line, render_success_lines, target_summary, - truncate_display, App, Command, Response, + truncate_display, App, Command, Response, UpgradeOpts, }; use clap::CommandFactory; use serde_json::json; @@ -1104,4 +1200,22 @@ mod tests { let input = format!("fire{}fox", '\u{00E9}'); assert_eq!(truncate_display(&input, 7), "fire..."); } + + #[test] + fn upgrade_success_text_is_neat() { + let lines = render_success_lines( + &Command::Upgrade(UpgradeOpts { yes: false }), + Some(&json!({ + "status": "up_to_date", + "current_version": "0.1.8", + "latest_version": "0.1.8" + })), + ) + .unwrap(); + + assert_eq!( + lines, + vec!["✔ You're already on the latest version! (0.1.8)"] + ); + } } diff --git a/src/cli/upgrade.rs b/src/cli/upgrade.rs new file mode 100644 index 0000000..acc844e --- /dev/null +++ b/src/cli/upgrade.rs @@ -0,0 +1,465 @@ +use std::io::{self, IsTerminal, Write}; +use std::path::{Path, PathBuf}; +use std::process::Command; + +use anyhow::{Context, Result}; +use serde_json::json; + +use crate::cli::{GlobalOpts, UpgradeOpts}; +use crate::core::protocol::Response; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum InstallMethod { + Npm, + Cargo, + Nix, + Source, + Unknown, +} + +impl InstallMethod { + fn as_str(self) -> &'static str { + match self { + Self::Npm => "npm", + Self::Cargo => "cargo", + Self::Nix => "nix", + Self::Source => "source", + Self::Unknown => "unknown", + } + } +} + +#[derive(Debug)] +struct UpgradePlan { + install_method: InstallMethod, + program: &'static str, + args: Vec<&'static str>, +} + +impl UpgradePlan { + fn command_line(&self) -> String { + std::iter::once(self.program) + .chain(self.args.iter().copied()) + .collect::>() + .join(" ") + } +} + +#[derive(Debug)] +struct VersionInfo { + current: String, + latest: String, +} + +pub fn run_upgrade(opts: &GlobalOpts, upgrade_opts: &UpgradeOpts) -> Result { + let current_exe = std::env::current_exe().context("Failed to determine executable path")?; + let install_method = detect_install_method(¤t_exe); + + let Some(plan) = upgrade_plan(install_method) else { + return Ok(Response::err_with_data( + format!( + "deskctl upgrade is not supported for {} installs.", + install_method.as_str() + ), + json!({ + "kind": "upgrade_unsupported", + "install_method": install_method.as_str(), + "current_exe": current_exe.display().to_string(), + "hint": upgrade_hint(install_method), + }), + )); + }; + + if !opts.json { + println!("- Checking for updates..."); + } + + let versions = match resolve_versions(&plan) { + Ok(versions) => versions, + Err(response) => return Ok(response), + }; + + if versions.current == versions.latest { + return Ok(Response::ok(json!({ + "action": "upgrade", + "status": "up_to_date", + "install_method": plan.install_method.as_str(), + "current_version": versions.current, + "latest_version": versions.latest, + }))); + } + + if !upgrade_opts.yes { + if opts.json || !io::stdin().is_terminal() { + return Ok(Response::err_with_data( + format!( + "Upgrade confirmation required for {} -> {}.", + versions.current, versions.latest + ), + json!({ + "kind": "upgrade_confirmation_required", + "install_method": plan.install_method.as_str(), + "current_version": versions.current, + "latest_version": versions.latest, + "command": plan.command_line(), + "hint": "Re-run with --yes to upgrade non-interactively.", + }), + )); + } + + if !confirm_upgrade(&versions)? { + return Ok(Response::ok(json!({ + "action": "upgrade", + "status": "cancelled", + "install_method": plan.install_method.as_str(), + "current_version": versions.current, + "latest_version": versions.latest, + }))); + } + } + + if !opts.json { + println!( + "- Upgrading deskctl from {} -> {}...", + versions.current, versions.latest + ); + } + + let output = match Command::new(plan.program).args(&plan.args).output() { + Ok(output) => output, + Err(error) => return Ok(upgrade_spawn_error_response(&plan, &versions, &error)), + }; + + if output.status.success() { + return Ok(Response::ok(json!({ + "action": "upgrade", + "status": "upgraded", + "install_method": plan.install_method.as_str(), + "current_version": versions.current, + "latest_version": versions.latest, + "command": plan.command_line(), + "exit_code": output.status.code(), + }))); + } + + Ok(upgrade_command_failed_response(&plan, &versions, &output)) +} + +fn resolve_versions(plan: &UpgradePlan) -> std::result::Result { + let current = env!("CARGO_PKG_VERSION").to_string(); + let latest = match plan.install_method { + InstallMethod::Npm => query_npm_latest_version()?, + InstallMethod::Cargo => query_cargo_latest_version()?, + InstallMethod::Nix | InstallMethod::Source | InstallMethod::Unknown => { + return Err(Response::err_with_data( + "Could not determine the latest published version.".to_string(), + json!({ + "kind": "upgrade_failed", + "install_method": plan.install_method.as_str(), + "reason": "Could not determine the latest published version for this install method.", + "command": plan.command_line(), + "hint": upgrade_hint(plan.install_method), + }), + )); + } + }; + + Ok(VersionInfo { current, latest }) +} + +fn query_npm_latest_version() -> std::result::Result { + let output = Command::new("npm") + .args(["view", "deskctl", "version", "--json"]) + .output() + .map_err(|error| { + Response::err_with_data( + "Failed to check the latest npm version.".to_string(), + json!({ + "kind": "upgrade_failed", + "install_method": InstallMethod::Npm.as_str(), + "reason": "Failed to run npm view deskctl version --json.", + "io_error": error.to_string(), + "command": "npm view deskctl version --json", + "hint": upgrade_hint(InstallMethod::Npm), + }), + ) + })?; + + if !output.status.success() { + return Err(Response::err_with_data( + "Failed to check the latest npm version.".to_string(), + json!({ + "kind": "upgrade_failed", + "install_method": InstallMethod::Npm.as_str(), + "reason": command_failure_reason(&output), + "command": "npm view deskctl version --json", + "hint": upgrade_hint(InstallMethod::Npm), + }), + )); + } + + serde_json::from_slice::(&output.stdout).map_err(|_| { + Response::err_with_data( + "Failed to parse the latest npm version.".to_string(), + json!({ + "kind": "upgrade_failed", + "install_method": InstallMethod::Npm.as_str(), + "reason": "npm view returned an unexpected version payload.", + "command": "npm view deskctl version --json", + "hint": upgrade_hint(InstallMethod::Npm), + }), + ) + }) +} + +fn query_cargo_latest_version() -> std::result::Result { + let output = Command::new("cargo") + .args(["search", "deskctl", "--limit", "1"]) + .output() + .map_err(|error| { + Response::err_with_data( + "Failed to check the latest crates.io version.".to_string(), + json!({ + "kind": "upgrade_failed", + "install_method": InstallMethod::Cargo.as_str(), + "reason": "Failed to run cargo search deskctl --limit 1.", + "io_error": error.to_string(), + "command": "cargo search deskctl --limit 1", + "hint": upgrade_hint(InstallMethod::Cargo), + }), + ) + })?; + + if !output.status.success() { + return Err(Response::err_with_data( + "Failed to check the latest crates.io version.".to_string(), + json!({ + "kind": "upgrade_failed", + "install_method": InstallMethod::Cargo.as_str(), + "reason": command_failure_reason(&output), + "command": "cargo search deskctl --limit 1", + "hint": upgrade_hint(InstallMethod::Cargo), + }), + )); + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let latest = stdout + .split('"') + .nth(1) + .map(str::to_string) + .filter(|value| !value.is_empty()); + + latest.ok_or_else(|| { + Response::err_with_data( + "Failed to determine the latest crates.io version.".to_string(), + json!({ + "kind": "upgrade_failed", + "install_method": InstallMethod::Cargo.as_str(), + "reason": "cargo search did not return a published deskctl crate version.", + "command": "cargo search deskctl --limit 1", + "hint": upgrade_hint(InstallMethod::Cargo), + }), + ) + }) +} + +fn confirm_upgrade(versions: &VersionInfo) -> Result { + print!( + "Upgrade deskctl from {} -> {}? [y/N] ", + versions.current, versions.latest + ); + io::stdout().flush()?; + + let mut input = String::new(); + io::stdin().read_line(&mut input)?; + + let trimmed = input.trim(); + Ok(matches!(trimmed, "y" | "Y" | "yes" | "YES" | "Yes")) +} + +fn upgrade_command_failed_response( + plan: &UpgradePlan, + versions: &VersionInfo, + output: &std::process::Output, +) -> Response { + Response::err_with_data( + format!("Upgrade command failed: {}", plan.command_line()), + json!({ + "kind": "upgrade_failed", + "install_method": plan.install_method.as_str(), + "current_version": versions.current, + "latest_version": versions.latest, + "command": plan.command_line(), + "exit_code": output.status.code(), + "reason": command_failure_reason(output), + "hint": upgrade_hint(plan.install_method), + }), + ) +} + +fn upgrade_spawn_error_response( + plan: &UpgradePlan, + versions: &VersionInfo, + error: &std::io::Error, +) -> Response { + Response::err_with_data( + format!("Failed to run {}", plan.command_line()), + json!({ + "kind": "upgrade_failed", + "install_method": plan.install_method.as_str(), + "current_version": versions.current, + "latest_version": versions.latest, + "command": plan.command_line(), + "io_error": error.to_string(), + "hint": upgrade_hint(plan.install_method), + }), + ) +} + +fn command_failure_reason(output: &std::process::Output) -> String { + let stderr = String::from_utf8_lossy(&output.stderr); + let stdout = String::from_utf8_lossy(&output.stdout); + + stderr + .lines() + .chain(stdout.lines()) + .map(str::trim) + .find(|line| !line.is_empty()) + .map(str::to_string) + .unwrap_or_else(|| { + output + .status + .code() + .map(|code| format!("Command exited with status {code}.")) + .unwrap_or_else(|| "Command exited unsuccessfully.".to_string()) + }) +} + +fn upgrade_plan(install_method: InstallMethod) -> Option { + match install_method { + InstallMethod::Npm => Some(UpgradePlan { + install_method, + program: "npm", + args: vec!["install", "-g", "deskctl@latest"], + }), + InstallMethod::Cargo => Some(UpgradePlan { + install_method, + program: "cargo", + args: vec!["install", "deskctl", "--locked"], + }), + InstallMethod::Nix | InstallMethod::Source | InstallMethod::Unknown => None, + } +} + +fn upgrade_hint(install_method: InstallMethod) -> &'static str { + match install_method { + InstallMethod::Nix => { + "Use nix profile upgrade or update the flake reference you installed from." + } + InstallMethod::Source => { + "Rebuild from source or reinstall deskctl through npm, cargo, or nix." + } + InstallMethod::Unknown => { + "Reinstall deskctl through a supported channel such as npm, cargo, or nix." + } + InstallMethod::Npm => "Retry with --yes or run npm install -g deskctl@latest directly.", + InstallMethod::Cargo => "Retry with --yes or run cargo install deskctl --locked directly.", + } +} + +fn detect_install_method(current_exe: &Path) -> InstallMethod { + if looks_like_npm_install(current_exe) { + return InstallMethod::Npm; + } + if looks_like_nix_install(current_exe) { + return InstallMethod::Nix; + } + if looks_like_cargo_install(current_exe) { + return InstallMethod::Cargo; + } + if looks_like_source_tree(current_exe) { + return InstallMethod::Source; + } + InstallMethod::Unknown +} + +fn looks_like_npm_install(path: &Path) -> bool { + let value = normalize(path); + value.contains("/node_modules/deskctl/") && value.contains("/vendor/") +} + +fn looks_like_nix_install(path: &Path) -> bool { + normalize(path).starts_with("/nix/store/") +} + +fn looks_like_cargo_install(path: &Path) -> bool { + let Some(home) = std::env::var_os("HOME") else { + return false; + }; + + let cargo_home = std::env::var_os("CARGO_HOME") + .map(PathBuf::from) + .unwrap_or_else(|| PathBuf::from(home).join(".cargo")); + path == cargo_home.join("bin").join("deskctl") +} + +fn looks_like_source_tree(path: &Path) -> bool { + let value = normalize(path); + value.contains("/target/debug/deskctl") || value.contains("/target/release/deskctl") +} + +fn normalize(path: &Path) -> String { + path.to_string_lossy().replace('\\', "/") +} + +#[cfg(test)] +mod tests { + use std::os::unix::process::ExitStatusExt; + use std::path::Path; + + use super::{command_failure_reason, detect_install_method, upgrade_plan, InstallMethod}; + + #[test] + fn detects_npm_install_path() { + let method = detect_install_method(Path::new( + "/usr/local/lib/node_modules/deskctl/vendor/deskctl-linux-x86_64", + )); + assert_eq!(method, InstallMethod::Npm); + } + + #[test] + fn detects_nix_install_path() { + let method = detect_install_method(Path::new("/nix/store/abc123-deskctl/bin/deskctl")); + assert_eq!(method, InstallMethod::Nix); + } + + #[test] + fn detects_source_tree_path() { + let method = + detect_install_method(Path::new("/Users/example/src/deskctl/target/debug/deskctl")); + assert_eq!(method, InstallMethod::Source); + } + + #[test] + fn npm_upgrade_plan_uses_global_install() { + let plan = upgrade_plan(InstallMethod::Npm).expect("npm installs should support upgrade"); + assert_eq!(plan.command_line(), "npm install -g deskctl@latest"); + } + + #[test] + fn nix_install_has_no_upgrade_plan() { + assert!(upgrade_plan(InstallMethod::Nix).is_none()); + } + + #[test] + fn failure_reason_prefers_stderr() { + let output = std::process::Output { + status: std::process::ExitStatus::from_raw(1 << 8), + stdout: b"".to_vec(), + stderr: b"boom\n".to_vec(), + }; + + assert_eq!(command_failure_reason(&output), "boom"); + } +} From e61c5bc33f7f51d8b43703cfc8e0c068f751e57a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 26 Mar 2026 16:03:29 +0000 Subject: [PATCH 20/35] release: v0.1.9 [skip ci] --- Cargo.lock | 2 +- Cargo.toml | 2 +- npm/deskctl/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3fb1666..157dbc7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "deskctl" -version = "0.1.8" +version = "0.1.9" dependencies = [ "ab_glyph", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index fc7816c..2ebe138 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deskctl" -version = "0.1.8" +version = "0.1.9" edition = "2021" description = "X11 desktop control CLI for agents" license = "MIT" diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json index 45daefe..5dfeaa0 100644 --- a/npm/deskctl/package.json +++ b/npm/deskctl/package.json @@ -1,6 +1,6 @@ { "name": "deskctl", - "version": "0.1.8", + "version": "0.1.9", "description": "Installable deskctl package for Linux X11 agents", "license": "MIT", "homepage": "https://github.com/harivansh-afk/deskctl", From 07a478b0eed0e5df22cb5a1bd16989c3b8f57d33 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 14:21:41 -0400 Subject: [PATCH 21/35] couple CI with publish --- .github/workflows/ci.yml | 143 ++++++++++++++++++++++++++++++++-- .github/workflows/publish.yml | 127 ------------------------------ 2 files changed, 137 insertions(+), 133 deletions(-) delete mode 100644 .github/workflows/publish.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb36e61..bcb02b3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,23 @@ on: push: branches: [main] workflow_dispatch: + inputs: + bump: + description: Version bump type (only for workflow_dispatch) + type: choice + options: + - patch + - minor + - major + default: patch + publish_npm: + description: Publish to npm + type: boolean + default: true + publish_crates: + description: Publish to crates.io + type: boolean + default: false permissions: contents: write @@ -52,13 +69,34 @@ jobs: echo "rust=${{ steps.filter.outputs.rust }}" >> "$GITHUB_OUTPUT" fi - - name: Read current version + - name: Calculate next version id: version if: github.event_name != 'pull_request' && steps.check.outputs.rust == 'true' run: | - VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') - echo "version=${VERSION}" >> "$GITHUB_OUTPUT" - echo "tag=v${VERSION}" >> "$GITHUB_OUTPUT" + CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') + IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT" + + BUMP="${{ inputs.bump || 'patch' }}" + case "$BUMP" in + major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;; + minor) MINOR=$((MINOR + 1)); PATCH=0 ;; + patch) + LATEST=$(git tag -l "v${MAJOR}.${MINOR}.*" | sort -V | tail -1) + if [ -z "$LATEST" ]; then + NEW_PATCH=$PATCH + else + LATEST_VER="${LATEST#v}" + IFS='.' read -r _ _ LATEST_PATCH <<< "$LATEST_VER" + NEW_PATCH=$((LATEST_PATCH + 1)) + fi + PATCH=$NEW_PATCH + ;; + esac + + NEW="${MAJOR}.${MINOR}.${PATCH}" + echo "version=${NEW}" >> "$GITHUB_OUTPUT" + echo "tag=v${NEW}" >> "$GITHUB_OUTPUT" + echo "Computed version: ${NEW} (v${NEW})" validate: name: Validate @@ -177,10 +215,53 @@ jobs: path: target/release/deskctl retention-days: 7 + update-manifests: + name: Update Manifests + needs: [changes, build] + if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - uses: dtolnay/rust-toolchain@stable + + - uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Update versions + run: | + CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') + NEW="${{ needs.changes.outputs.version }}" + if [ "$CURRENT" != "$NEW" ]; then + sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml + cargo generate-lockfile + fi + node -e ' + const fs = require("node:fs"); + const p = "npm/deskctl/package.json"; + const pkg = JSON.parse(fs.readFileSync(p, "utf8")); + pkg.version = process.argv[1]; + fs.writeFileSync(p, JSON.stringify(pkg, null, 2) + "\n"); + ' "$NEW" + + - name: Commit, tag, and push + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git add Cargo.toml Cargo.lock npm/deskctl/package.json + if ! git diff --cached --quiet; then + git commit -m "release: ${{ needs.changes.outputs.tag }} [skip ci]" + fi + git tag "${{ needs.changes.outputs.tag }}" + git push origin main --tags + release: name: Release - needs: [changes, build] - if: github.event_name != 'pull_request' + needs: [changes, build, update-manifests] + if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -209,3 +290,53 @@ jobs: artifacts/deskctl-linux-x86_64 \ artifacts/checksums.txt fi + + publish: + name: Publish + needs: [changes, update-manifests, release] + if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ needs.changes.outputs.tag }} + + - uses: dtolnay/rust-toolchain@stable + + - uses: actions/setup-node@v4 + with: + node-version: 22 + registry-url: https://registry.npmjs.org + + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev + + - name: Check current published state + id: published + run: | + VERSION="${{ needs.changes.outputs.version }}" + if npm view "deskctl@${VERSION}" version >/dev/null 2>&1; then + echo "npm=true" >> "$GITHUB_OUTPUT" + else + echo "npm=false" >> "$GITHUB_OUTPUT" + fi + if curl -fsSL "https://crates.io/api/v1/crates/deskctl/${VERSION}" >/dev/null 2>&1; then + echo "crates=true" >> "$GITHUB_OUTPUT" + else + echo "crates=false" >> "$GITHUB_OUTPUT" + fi + + - name: Validate npm package + run: node npm/deskctl/scripts/validate-package.js + + - name: Publish npm + if: steps.published.outputs.npm != 'true' + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: npm publish ./npm/deskctl --access public + + - name: Publish crates.io + if: inputs.publish_crates && steps.published.outputs.crates != 'true' + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} + run: cargo publish --locked diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index 60aed4d..0000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,127 +0,0 @@ -name: Publish Registries - -on: - workflow_dispatch: - inputs: - bump: - description: Version bump type - required: true - type: choice - options: - - patch - - minor - - major - publish_npm: - description: Publish deskctl to npm - required: true - type: boolean - default: true - publish_crates: - description: Publish deskctl to crates.io - required: true - type: boolean - default: false - -permissions: - contents: read - -jobs: - publish: - runs-on: ubuntu-latest - permissions: - contents: write - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - uses: dtolnay/rust-toolchain@stable - - - uses: actions/setup-node@v4 - with: - node-version: 22 - registry-url: https://registry.npmjs.org - - - name: Install system dependencies - run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev - - - name: Compute next version - id: version - run: | - CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') - IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT" - - case "${{ inputs.bump }}" in - major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;; - minor) MINOR=$((MINOR + 1)); PATCH=0 ;; - patch) PATCH=$((PATCH + 1)) ;; - esac - - NEW="${MAJOR}.${MINOR}.${PATCH}" - TAG="v${NEW}" - - echo "version=${NEW}" >> "$GITHUB_OUTPUT" - echo "tag=${TAG}" >> "$GITHUB_OUTPUT" - echo "Bumping ${CURRENT} -> ${NEW} (${TAG})" - - - name: Bump versions - run: | - NEW="${{ steps.version.outputs.version }}" - CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') - sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml - node -e 'const fs=require("node:fs"); const p="npm/deskctl/package.json"; const pkg=JSON.parse(fs.readFileSync(p,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(p, JSON.stringify(pkg, null, 2)+"\n");' "$NEW" - cargo generate-lockfile - - - name: Commit, tag, and push - run: | - git config user.name "github-actions[bot]" - git config user.email "github-actions[bot]@users.noreply.github.com" - git add Cargo.toml Cargo.lock npm/deskctl/package.json - git commit -m "release: ${{ steps.version.outputs.tag }} [skip ci]" - git tag "${{ steps.version.outputs.tag }}" - git push origin main --tags - - - name: Check current published state - id: published - run: | - VERSION="${{ steps.version.outputs.version }}" - - if npm view "deskctl@${VERSION}" version >/dev/null 2>&1; then - echo "npm=true" >> "$GITHUB_OUTPUT" - else - echo "npm=false" >> "$GITHUB_OUTPUT" - fi - - if curl -fsSL "https://crates.io/api/v1/crates/deskctl/${VERSION}" >/dev/null 2>&1; then - echo "crates=true" >> "$GITHUB_OUTPUT" - else - echo "crates=false" >> "$GITHUB_OUTPUT" - fi - - - name: Validate npm package - run: | - mkdir -p ./tmp/npm-pack - node npm/deskctl/scripts/validate-package.js - npm pack ./npm/deskctl --pack-destination ./tmp/npm-pack >/dev/null - - - name: Validate crate publish path - run: cargo publish --dry-run --locked - - - name: Publish npm - if: inputs.publish_npm && steps.published.outputs.npm != 'true' - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} - run: npm publish ./npm/deskctl --access public - - - name: Publish crates.io - if: inputs.publish_crates && steps.published.outputs.crates != 'true' - env: - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} - run: cargo publish --locked - - - name: Summary - run: | - echo "tag=${{ steps.version.outputs.tag }}" - echo "bump=${{ inputs.bump }}" - echo "npm_already_published=${{ steps.published.outputs.npm }}" - echo "crates_already_published=${{ steps.published.outputs.crates }}" From 8d690a62b43a54d41f5b49c07f025bb4d419e3e6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 26 Mar 2026 18:28:13 +0000 Subject: [PATCH 22/35] release: v0.1.10 [skip ci] --- Cargo.lock | 2 +- Cargo.toml | 2 +- npm/deskctl/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 157dbc7..9680966 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "deskctl" -version = "0.1.9" +version = "0.1.10" dependencies = [ "ab_glyph", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 2ebe138..cc6d11a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deskctl" -version = "0.1.9" +version = "0.1.10" edition = "2021" description = "X11 desktop control CLI for agents" license = "MIT" diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json index 5dfeaa0..adb142c 100644 --- a/npm/deskctl/package.json +++ b/npm/deskctl/package.json @@ -1,6 +1,6 @@ { "name": "deskctl", - "version": "0.1.9", + "version": "0.1.10", "description": "Installable deskctl package for Linux X11 agents", "license": "MIT", "homepage": "https://github.com/harivansh-afk/deskctl", From a58912284b2a797c0d422182aa16c52ade05e580 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 26 Mar 2026 14:40:45 -0400 Subject: [PATCH 23/35] reorder pipeline --- .github/workflows/ci.yml | 76 +++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 29 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bcb02b3..dcef6fb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,6 +28,9 @@ on: type: boolean default: false +env: + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true + permissions: contents: write @@ -186,38 +189,12 @@ jobs: - name: Distribution validation run: make dist-validate - build: - name: Build Release Asset - needs: [changes, validate, integration, distribution] - if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true' - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: dtolnay/rust-toolchain@stable - with: - components: clippy - - - uses: Swatinem/rust-cache@v2 - - - name: Install system dependencies - run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev - - - name: Clippy - run: cargo clippy -- -D warnings - - - name: Build - run: cargo build --release --locked - - - uses: actions/upload-artifact@v4 - with: - name: deskctl-linux-x86_64 - path: target/release/deskctl - retention-days: 7 + # --- Release pipeline: update-manifests -> build -> release -> publish --- + # Version bump happens BEFORE build so the binary has the correct version. update-manifests: name: Update Manifests - needs: [changes, build] + needs: [changes, validate, integration, distribution] if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true' runs-on: ubuntu-latest steps: @@ -258,6 +235,47 @@ jobs: git tag "${{ needs.changes.outputs.tag }}" git push origin main --tags + build: + name: Build Release Asset + needs: [changes, update-manifests] + if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ needs.changes.outputs.tag }} + + - uses: dtolnay/rust-toolchain@stable + with: + components: clippy + + - uses: Swatinem/rust-cache@v2 + + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev + + - name: Verify version + run: | + CARGO_VER=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/') + EXPECTED="${{ needs.changes.outputs.version }}" + if [ "$CARGO_VER" != "$EXPECTED" ]; then + echo "Version mismatch: Cargo.toml=$CARGO_VER expected=$EXPECTED" + exit 1 + fi + echo "Building version $CARGO_VER" + + - name: Clippy + run: cargo clippy -- -D warnings + + - name: Build + run: cargo build --release --locked + + - uses: actions/upload-artifact@v4 + with: + name: deskctl-linux-x86_64 + path: target/release/deskctl + retention-days: 7 + release: name: Release needs: [changes, build, update-manifests] From 580ea79c276fb5eafb35300415441263bb523d04 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 26 Mar 2026 18:47:09 +0000 Subject: [PATCH 24/35] release: v0.1.11 [skip ci] --- Cargo.lock | 2 +- Cargo.toml | 2 +- npm/deskctl/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9680966..b411e80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "deskctl" -version = "0.1.10" +version = "0.1.11" dependencies = [ "ab_glyph", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index cc6d11a..59108df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deskctl" -version = "0.1.10" +version = "0.1.11" edition = "2021" description = "X11 desktop control CLI for agents" license = "MIT" diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json index adb142c..60b8ed4 100644 --- a/npm/deskctl/package.json +++ b/npm/deskctl/package.json @@ -1,6 +1,6 @@ { "name": "deskctl", - "version": "0.1.10", + "version": "0.1.11", "description": "Installable deskctl package for Linux X11 agents", "license": "MIT", "homepage": "https://github.com/harivansh-afk/deskctl", From ff26c570351fc9d3690bf20958291da5f0bf0e89 Mon Sep 17 00:00:00 2001 From: Hari <73809867+harivansh-afk@users.noreply.github.com> Date: Thu, 26 Mar 2026 15:25:40 -0400 Subject: [PATCH 25/35] replace firefox with chrome (#14) --- site/src/pages/commands.mdx | 14 +++--- site/src/pages/quick-start.mdx | 16 +++---- skills/deskctl/SKILL.md | 10 ++--- skills/deskctl/references/commands.md | 12 ++--- skills/deskctl/workflows/observe-act.sh | 2 +- src/cli/mod.rs | 58 ++++++++++++------------- src/core/refs.rs | 12 ++--- 7 files changed, 61 insertions(+), 63 deletions(-) diff --git a/site/src/pages/commands.mdx b/site/src/pages/commands.mdx index 934cdb8..0696558 100644 --- a/site/src/pages/commands.mdx +++ b/site/src/pages/commands.mdx @@ -37,9 +37,9 @@ preferred read surface for focused state queries. ## Wait for state transitions ```sh -deskctl wait window --selector 'title=Firefox' --timeout 10 +deskctl wait window --selector 'title=Chromium' --timeout 10 deskctl wait focus --selector 'id=win3' --timeout 5 -deskctl --json wait window --selector 'class=firefox' --poll-ms 100 +deskctl --json wait window --selector 'class=chromium' --poll-ms 100 ``` Wait commands return the matched window payload on success. In `--json` mode, @@ -48,9 +48,9 @@ timeouts and selector failures expose structured `kind` values. ## Act on windows ```sh -deskctl launch firefox +deskctl launch chromium deskctl focus @w1 -deskctl focus 'title=Firefox' +deskctl focus 'title=Chromium' deskctl click @w1 deskctl click 960,540 deskctl dblclick @w2 @@ -86,8 +86,8 @@ more deterministic for automation, and easier to retry safely. ```sh ref=w1 id=win1 -title=Firefox -class=firefox +title=Chromium +class=chromium focused ``` @@ -99,7 +99,7 @@ w1 win1 ``` -Bare strings like `firefox` are fuzzy matches. They resolve when there is one +Bare strings like `chromium` are fuzzy matches. They resolve when there is one match and fail with candidate windows when there are multiple matches. ## Global options diff --git a/site/src/pages/quick-start.mdx b/site/src/pages/quick-start.mdx index 7ecf5a7..4cc0e25 100644 --- a/site/src/pages/quick-start.mdx +++ b/site/src/pages/quick-start.mdx @@ -38,13 +38,13 @@ Prefer explicit selectors when you need deterministic targeting: ```sh ref=w1 id=win1 -title=Firefox -class=firefox +title=Chromium +class=chromium focused ``` Legacy refs such as `@w1` still work after `snapshot` or `list-windows`. Bare -strings like `firefox` are fuzzy matches and now fail on ambiguity. +strings like `chromium` are fuzzy matches and now fail on ambiguity. ## 4. Wait, act, verify @@ -55,16 +55,16 @@ The core loop is: deskctl snapshot --annotate # wait -deskctl wait window --selector 'title=Firefox' --timeout 10 +deskctl wait window --selector 'title=Chromium' --timeout 10 # act -deskctl focus 'title=Firefox' +deskctl focus 'title=Chromium' deskctl hotkey ctrl l deskctl type "https://example.com" deskctl press enter # verify -deskctl wait focus --selector 'title=Firefox' --timeout 5 +deskctl wait focus --selector 'title=Chromium' --timeout 5 deskctl snapshot ``` @@ -84,8 +84,8 @@ Every command supports `--json` and uses the same top-level envelope: { "ref_id": "w1", "window_id": "win1", - "title": "Firefox", - "app_name": "firefox", + "title": "Chromium", + "app_name": "chromium", "x": 0, "y": 0, "width": 1920, diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md index 67a77c5..c79ca21 100644 --- a/skills/deskctl/SKILL.md +++ b/skills/deskctl/SKILL.md @@ -30,8 +30,8 @@ Every desktop interaction follows: **observe -> wait -> act -> verify**. ```bash deskctl snapshot --annotate # observe -deskctl wait window --selector 'title=Firefox' --timeout 10 # wait -deskctl click 'title=Firefox' # act +deskctl wait window --selector 'title=Chromium' --timeout 10 # wait +deskctl click 'title=Chromium' # act deskctl snapshot # verify ``` @@ -42,12 +42,12 @@ See [workflows/observe-act.sh](workflows/observe-act.sh) for a reusable script. ```bash ref=w1 # snapshot ref (short-lived) id=win1 # stable window ID (session-scoped) -title=Firefox # match by title -class=firefox # match by WM class +title=Chromium # match by title +class=chromium # match by WM class focused # currently focused window ``` -Bare strings like `firefox` do fuzzy matching but fail on ambiguity. Prefer explicit selectors. +Bare strings like `chromium` do fuzzy matching but fail on ambiguity. Prefer explicit selectors. ## References diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md index 27b4310..df69350 100644 --- a/skills/deskctl/references/commands.md +++ b/skills/deskctl/references/commands.md @@ -23,8 +23,8 @@ deskctl get-mouse-position ## Wait ```bash -deskctl wait window --selector 'title=Firefox' --timeout 10 -deskctl wait focus --selector 'class=firefox' --timeout 5 +deskctl wait window --selector 'title=Chromium' --timeout 10 +deskctl wait focus --selector 'class=chromium' --timeout 5 ``` Returns the matched window payload on success. Failures include structured @@ -35,8 +35,8 @@ Returns the matched window payload on success. Failures include structured ```bash ref=w1 id=win1 -title=Firefox -class=firefox +title=Chromium +class=chromium focused ``` @@ -46,7 +46,7 @@ on ambiguity. ## Act ```bash -deskctl focus 'class=firefox' +deskctl focus 'class=chromium' deskctl click @w1 deskctl dblclick @w2 deskctl type "hello world" @@ -59,7 +59,7 @@ deskctl mouse drag 100 100 500 500 deskctl move-window @w1 100 120 deskctl resize-window @w1 1280 720 deskctl close @w3 -deskctl launch firefox +deskctl launch chromium ``` The daemon starts automatically on first command. In normal usage you should diff --git a/skills/deskctl/workflows/observe-act.sh b/skills/deskctl/workflows/observe-act.sh index 0e336ae..8c3abc2 100755 --- a/skills/deskctl/workflows/observe-act.sh +++ b/skills/deskctl/workflows/observe-act.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # observe-act.sh - main desktop interaction loop # usage: ./observe-act.sh [action] [action-args...] -# example: ./observe-act.sh 'title=Firefox' click +# example: ./observe-act.sh 'title=Chromium' click # example: ./observe-act.sh 'class=terminal' type "ls -la" set -euo pipefail diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 28092d7..79008de 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -48,13 +48,13 @@ pub enum Command { /// Click a window ref or coordinates #[command(after_help = CLICK_EXAMPLES)] Click { - /// Selector (ref=w1, id=win1, title=Firefox, class=firefox, focused) or x,y coordinates + /// Selector (ref=w1, id=win1, title=Chromium, class=chromium, focused) or x,y coordinates selector: String, }, /// Double-click a window ref or coordinates #[command(after_help = DBLCLICK_EXAMPLES)] Dblclick { - /// Selector (ref=w1, id=win1, title=Firefox, class=firefox, focused) or x,y coordinates + /// Selector (ref=w1, id=win1, title=Chromium, class=chromium, focused) or x,y coordinates selector: String, }, /// Type text into the focused window @@ -81,19 +81,19 @@ pub enum Command { /// Focus a window by ref or name #[command(after_help = FOCUS_EXAMPLES)] Focus { - /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring + /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring selector: String, }, /// Close a window by ref or name #[command(after_help = CLOSE_EXAMPLES)] Close { - /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring + /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring selector: String, }, /// Move a window #[command(after_help = MOVE_WINDOW_EXAMPLES)] MoveWindow { - /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring + /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring selector: String, /// X position x: i32, @@ -103,7 +103,7 @@ pub enum Command { /// Resize a window #[command(after_help = RESIZE_WINDOW_EXAMPLES)] ResizeWindow { - /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring + /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring selector: String, /// Width w: u32, @@ -210,19 +210,19 @@ const SNAPSHOT_EXAMPLES: &str = const LIST_WINDOWS_EXAMPLES: &str = "Examples:\n deskctl list-windows\n deskctl --json list-windows"; const CLICK_EXAMPLES: &str = - "Examples:\n deskctl click @w1\n deskctl click 'title=Firefox'\n deskctl click 500,300"; + "Examples:\n deskctl click @w1\n deskctl click 'title=Chromium'\n deskctl click 500,300"; const DBLCLICK_EXAMPLES: &str = - "Examples:\n deskctl dblclick @w2\n deskctl dblclick 'class=firefox'\n deskctl dblclick 500,300"; + "Examples:\n deskctl dblclick @w2\n deskctl dblclick 'class=chromium'\n deskctl dblclick 500,300"; const TYPE_EXAMPLES: &str = "Examples:\n deskctl type \"hello world\"\n deskctl type \"https://example.com\""; const PRESS_EXAMPLES: &str = "Examples:\n deskctl press enter\n deskctl press escape"; const HOTKEY_EXAMPLES: &str = "Examples:\n deskctl hotkey ctrl l\n deskctl hotkey ctrl shift t"; const FOCUS_EXAMPLES: &str = - "Examples:\n deskctl focus @w1\n deskctl focus 'title=Firefox'\n deskctl focus focused"; + "Examples:\n deskctl focus @w1\n deskctl focus 'title=Chromium'\n deskctl focus focused"; const CLOSE_EXAMPLES: &str = - "Examples:\n deskctl close @w3\n deskctl close 'id=win2'\n deskctl close 'class=firefox'"; + "Examples:\n deskctl close @w3\n deskctl close 'id=win2'\n deskctl close 'class=chromium'"; const MOVE_WINDOW_EXAMPLES: &str = - "Examples:\n deskctl move-window @w1 100 200\n deskctl move-window 'title=Firefox' 0 0"; + "Examples:\n deskctl move-window @w1 100 200\n deskctl move-window 'title=Chromium' 0 0"; const RESIZE_WINDOW_EXAMPLES: &str = "Examples:\n deskctl resize-window @w1 1280 720\n deskctl resize-window 'id=win2' 800 600"; const GET_MONITORS_EXAMPLES: &str = @@ -237,12 +237,12 @@ const GET_MOUSE_POSITION_EXAMPLES: &str = const DOCTOR_EXAMPLES: &str = "Examples:\n deskctl doctor\n deskctl --json doctor"; const UPGRADE_EXAMPLES: &str = "Examples:\n deskctl upgrade\n deskctl upgrade --yes\n deskctl --json upgrade --yes"; -const WAIT_WINDOW_EXAMPLES: &str = "Examples:\n deskctl wait window --selector 'title=Firefox' --timeout 10\n deskctl --json wait window --selector 'class=firefox' --poll-ms 100"; +const WAIT_WINDOW_EXAMPLES: &str = "Examples:\n deskctl wait window --selector 'title=Chromium' --timeout 10\n deskctl --json wait window --selector 'class=chromium' --poll-ms 100"; const WAIT_FOCUS_EXAMPLES: &str = "Examples:\n deskctl wait focus --selector 'id=win3' --timeout 5\n deskctl wait focus --selector focused --poll-ms 200"; const SCREENSHOT_EXAMPLES: &str = "Examples:\n deskctl screenshot\n deskctl screenshot /tmp/screen.png\n deskctl screenshot --annotate"; const LAUNCH_EXAMPLES: &str = - "Examples:\n deskctl launch firefox\n deskctl launch code -- --new-window"; + "Examples:\n deskctl launch chromium\n deskctl launch code -- --new-window"; const MOUSE_MOVE_EXAMPLES: &str = "Examples:\n deskctl mouse move 500 300\n deskctl mouse move 0 0"; const MOUSE_SCROLL_EXAMPLES: &str = @@ -277,7 +277,7 @@ pub enum WaitCmd { #[derive(Args)] pub struct WaitSelectorOpts { - /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring + /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring #[arg(long)] pub selector: String, @@ -1103,8 +1103,8 @@ mod tests { "windows": [{ "ref_id": "w1", "window_id": "win1", - "title": "Firefox", - "app_name": "firefox", + "title": "Chromium", + "app_name": "chromium", "x": 0, "y": 0, "width": 1280, @@ -1125,37 +1125,37 @@ mod tests { fn action_text_includes_target_identity() { let lines = render_success_lines( &Command::Focus { - selector: "title=Firefox".to_string(), + selector: "title=Chromium".to_string(), }, Some(&json!({ "action": "focus", - "window": "Firefox", - "title": "Firefox", + "window": "Chromium", + "title": "Chromium", "ref_id": "w2", "window_id": "win7" })), ) .unwrap(); - assert_eq!(lines, vec!["Focused @w2 [win7] \"Firefox\""]); + assert_eq!(lines, vec!["Focused @w2 [win7] \"Chromium\""]); } #[test] fn timeout_errors_render_last_observation() { let lines = render_error_lines(&Response::err_with_data( - "Timed out waiting for focus to match selector: title=Firefox", + "Timed out waiting for focus to match selector: title=Chromium", json!({ "kind": "timeout", "wait": "focus", - "selector": "title=Firefox", + "selector": "title=Chromium", "timeout_ms": 1000, "last_observation": { "kind": "window_not_focused", "window": { "ref_id": "w1", "window_id": "win1", - "title": "Firefox", - "app_name": "firefox", + "title": "Chromium", + "app_name": "chromium", "x": 0, "y": 0, "width": 1280, @@ -1167,10 +1167,8 @@ mod tests { }), )); - assert!(lines - .iter() - .any(|line| line - .contains("Timed out after 1000ms waiting for focus selector title=Firefox"))); + assert!(lines.iter().any(|line| line + .contains("Timed out after 1000ms waiting for focus selector title=Chromium"))); assert!(lines .iter() .any(|line| line.contains("matching window exists but is not focused yet"))); @@ -1190,9 +1188,9 @@ mod tests { let summary = target_summary(&json!({ "ref_id": "w1", "window_id": "win1", - "title": "Firefox" + "title": "Chromium" })); - assert_eq!(summary.as_deref(), Some("@w1 [win1] \"Firefox\"")); + assert_eq!(summary.as_deref(), Some("@w1 [win1] \"Chromium\"")); } #[test] diff --git a/src/core/refs.rs b/src/core/refs.rs index 34e1ba7..7fd7b6c 100644 --- a/src/core/refs.rs +++ b/src/core/refs.rs @@ -412,8 +412,8 @@ mod tests { SelectorQuery::WindowId("win4".to_string()) ); assert_eq!( - SelectorQuery::parse("title=Firefox"), - SelectorQuery::Title("Firefox".to_string()) + SelectorQuery::parse("title=Chromium"), + SelectorQuery::Title("Chromium".to_string()) ); assert_eq!( SelectorQuery::parse("class=Navigator"), @@ -458,11 +458,11 @@ mod tests { fn fuzzy_resolution_fails_with_candidates_when_ambiguous() { let mut refs = RefMap::new(); refs.rebuild(&[ - sample_window(1, "Firefox"), + sample_window(1, "Chromium"), BackendWindow { native_id: 2, - title: "Firefox Settings".to_string(), - app_name: "Firefox".to_string(), + title: "Chromium Settings".to_string(), + app_name: "Chromium".to_string(), x: 0, y: 0, width: 10, @@ -472,7 +472,7 @@ mod tests { }, ]); - match refs.resolve("firefox") { + match refs.resolve("chromium") { ResolveResult::Ambiguous { mode, candidates, .. } => { From 3a8d9f90c1ac036cfd5bdb30daf7275909870dd9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 26 Mar 2026 19:31:47 +0000 Subject: [PATCH 26/35] release: v0.1.12 [skip ci] --- Cargo.lock | 2 +- Cargo.toml | 2 +- npm/deskctl/package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b411e80..4acd174 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "deskctl" -version = "0.1.11" +version = "0.1.12" dependencies = [ "ab_glyph", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 59108df..d782ecd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deskctl" -version = "0.1.11" +version = "0.1.12" edition = "2021" description = "X11 desktop control CLI for agents" license = "MIT" diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json index 60b8ed4..1dd5bff 100644 --- a/npm/deskctl/package.json +++ b/npm/deskctl/package.json @@ -1,6 +1,6 @@ { "name": "deskctl", - "version": "0.1.11", + "version": "0.1.12", "description": "Installable deskctl package for Linux X11 agents", "license": "MIT", "homepage": "https://github.com/harivansh-afk/deskctl", From 3ca6c90eafc6020b99730904a70e5f1593ca8441 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Fri, 27 Mar 2026 00:20:37 -0400 Subject: [PATCH 27/35] fix termination bug --- src/daemon/mod.rs | 58 +++++++++++++++++++++++++++++++------------- tests/support/mod.rs | 30 +++++++++++++++++++++++ tests/x11_runtime.rs | 25 +++++++++++++++++++ 3 files changed, 96 insertions(+), 17 deletions(-) diff --git a/src/daemon/mod.rs b/src/daemon/mod.rs index 3df1d9a..9e7e931 100644 --- a/src/daemon/mod.rs +++ b/src/daemon/mod.rs @@ -1,6 +1,7 @@ mod handler; mod state; +use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::{Context, Result}; @@ -12,6 +13,29 @@ use crate::core::paths::{pid_path_from_env, socket_path_from_env}; use crate::core::session; use state::DaemonState; +struct RuntimePathsGuard { + socket_path: PathBuf, + pid_path: Option, +} + +impl RuntimePathsGuard { + fn new(socket_path: PathBuf, pid_path: Option) -> Self { + Self { + socket_path, + pid_path, + } + } +} + +impl Drop for RuntimePathsGuard { + fn drop(&mut self) { + remove_runtime_path(&self.socket_path); + if let Some(ref pid_path) = self.pid_path { + remove_runtime_path(pid_path); + } + } +} + pub fn run() -> Result<()> { // Validate session before starting session::detect_session()?; @@ -25,7 +49,6 @@ pub fn run() -> Result<()> { async fn async_run() -> Result<()> { let socket_path = socket_path_from_env().context("DESKCTL_SOCKET_PATH not set")?; - let pid_path = pid_path_from_env(); // Clean up stale socket @@ -33,20 +56,21 @@ async fn async_run() -> Result<()> { std::fs::remove_file(&socket_path)?; } - // Write PID file - if let Some(ref pid_path) = pid_path { - std::fs::write(pid_path, std::process::id().to_string())?; - } - - let listener = UnixListener::bind(&socket_path) - .context(format!("Failed to bind socket: {}", socket_path.display()))?; - let session = std::env::var("DESKCTL_SESSION").unwrap_or_else(|_| "default".to_string()); let state = Arc::new(Mutex::new( DaemonState::new(session, socket_path.clone()) .context("Failed to initialize daemon state")?, )); + let listener = UnixListener::bind(&socket_path) + .context(format!("Failed to bind socket: {}", socket_path.display()))?; + let _runtime_paths = RuntimePathsGuard::new(socket_path.clone(), pid_path.clone()); + + // Write PID file only after the daemon is ready to serve requests. + if let Some(ref pid_path) = pid_path { + std::fs::write(pid_path, std::process::id().to_string())?; + } + let shutdown = Arc::new(tokio::sync::Notify::new()); let shutdown_clone = shutdown.clone(); @@ -75,14 +99,6 @@ async fn async_run() -> Result<()> { } } - // Cleanup - if socket_path.exists() { - let _ = std::fs::remove_file(&socket_path); - } - if let Some(ref pid_path) = pid_path { - let _ = std::fs::remove_file(pid_path); - } - Ok(()) } @@ -123,3 +139,11 @@ async fn handle_connection( Ok(()) } + +fn remove_runtime_path(path: &Path) { + if let Err(error) = std::fs::remove_file(path) { + if error.kind() != std::io::ErrorKind::NotFound { + eprintln!("Failed to remove runtime path {}: {error}", path.display()); + } + } +} diff --git a/tests/support/mod.rs b/tests/support/mod.rs index 5c6f0be..719334d 100644 --- a/tests/support/mod.rs +++ b/tests/support/mod.rs @@ -142,6 +142,10 @@ impl TestSession { .expect("TestSession always has an explicit socket path") } + pub fn pid_path(&self) -> PathBuf { + self.root.join("deskctl.pid") + } + pub fn create_stale_socket(&self) -> Result<()> { let listener = UnixListener::bind(self.socket_path()) .with_context(|| format!("Failed to bind {}", self.socket_path().display()))?; @@ -187,6 +191,29 @@ impl TestSession { ) }) } + + pub fn run_daemon(&self, env: I) -> Result + where + I: IntoIterator, + K: AsRef, + V: AsRef, + { + let mut command = Command::new(env!("CARGO_BIN_EXE_deskctl")); + command + .env("DESKCTL_DAEMON", "1") + .env("DESKCTL_SOCKET_PATH", self.socket_path()) + .env("DESKCTL_PID_PATH", self.pid_path()) + .env("DESKCTL_SESSION", &self.opts.session) + .envs(env); + + command.output().with_context(|| { + format!( + "Failed to run daemon {} against {}", + env!("CARGO_BIN_EXE_deskctl"), + self.socket_path().display() + ) + }) + } } impl Drop for TestSession { @@ -195,6 +222,9 @@ impl Drop for TestSession { if self.socket_path().exists() { let _ = std::fs::remove_file(self.socket_path()); } + if self.pid_path().exists() { + let _ = std::fs::remove_file(self.pid_path()); + } let _ = std::fs::remove_dir_all(&self.root); } } diff --git a/tests/x11_runtime.rs b/tests/x11_runtime.rs index 2aac58c..30308cb 100644 --- a/tests/x11_runtime.rs +++ b/tests/x11_runtime.rs @@ -114,6 +114,31 @@ fn daemon_start_recovers_from_stale_socket() -> Result<()> { Ok(()) } +#[test] +fn daemon_init_failure_cleans_runtime_state() -> Result<()> { + let _guard = env_lock_guard(); + let session = TestSession::new("daemon-init-failure")?; + + let output = session.run_daemon([("XDG_SESSION_TYPE", "x11"), ("DISPLAY", ":99999")])?; + assert!(!output.status.success(), "daemon startup should fail"); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("Failed to initialize daemon state"), + "unexpected stderr: {stderr}" + ); + assert!( + !session.socket_path().exists(), + "failed startup should remove the socket path" + ); + assert!( + !session.pid_path().exists(), + "failed startup should remove the pid path" + ); + + Ok(()) +} + #[test] fn wait_window_returns_matched_window_payload() -> Result<()> { let _guard = env_lock_guard(); From 9bfada8b4bb06a74d9e45a3f332efb949b02d2ff Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Fri, 27 Mar 2026 10:04:10 -0400 Subject: [PATCH 28/35] fix helper --- tests/support/mod.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/support/mod.rs b/tests/support/mod.rs index 719334d..07cc5a7 100644 --- a/tests/support/mod.rs +++ b/tests/support/mod.rs @@ -4,6 +4,7 @@ use std::os::unix::net::UnixListener; use std::path::{Path, PathBuf}; use std::process::{Command, Output}; use std::sync::{Mutex, OnceLock}; +use std::thread; use std::time::{SystemTime, UNIX_EPOCH}; use anyhow::{anyhow, bail, Context, Result}; @@ -60,8 +61,7 @@ pub struct FixtureWindow { impl FixtureWindow { pub fn create(title: &str, app_class: &str) -> Result { - let (conn, screen_num) = - x11rb::connect(None).context("Failed to connect to the integration test display")?; + let (conn, screen_num) = connect_to_test_display()?; let screen = &conn.setup().roots[screen_num]; let window = conn.generate_id()?; @@ -103,6 +103,26 @@ impl FixtureWindow { } } +fn connect_to_test_display() -> Result<(RustConnection, usize)> { + let max_attempts = 10; + let mut last_error = None; + + for attempt in 0..max_attempts { + match x11rb::connect(None) { + Ok(connection) => return Ok(connection), + Err(error) => { + last_error = Some(anyhow!(error)); + if attempt + 1 < max_attempts { + thread::sleep(std::time::Duration::from_millis(100 * (attempt + 1) as u64)); + } + } + } + } + + Err(last_error.expect("x11 connection attempts should capture an error")) + .context("Failed to connect to the integration test display") +} + impl Drop for FixtureWindow { fn drop(&mut self) { let _ = self.conn.destroy_window(self.window); From 85e191663547943b5a3468f48ecba45768271f74 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 27 Mar 2026 14:10:07 +0000 Subject: [PATCH 29/35] release: v0.1.13 [skip ci] --- Cargo.lock | 22 +++++++++++----------- Cargo.toml | 2 +- npm/deskctl/package.json | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4acd174..c948da0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -241,9 +241,9 @@ checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" -version = "1.2.57" +version = "1.2.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" +checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1" dependencies = [ "find-msvc-tools", "jobserver", @@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "deskctl" -version = "0.1.12" +version = "0.1.13" dependencies = [ "ab_glyph", "anyhow", @@ -1039,9 +1039,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -1699,9 +1699,9 @@ dependencies = [ [[package]] name = "simd-adler32" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] name = "simd_helpers" @@ -1861,9 +1861,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.22.0" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -2297,9 +2297,9 @@ dependencies = [ [[package]] name = "zune-jpeg" -version = "0.5.14" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7a1c0af6e5d8d1363f4994b7a091ccf963d8b694f7da5b0b9cceb82da2c0a6" +checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296" dependencies = [ "zune-core", ] diff --git a/Cargo.toml b/Cargo.toml index d782ecd..2c4745c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deskctl" -version = "0.1.12" +version = "0.1.13" edition = "2021" description = "X11 desktop control CLI for agents" license = "MIT" diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json index 1dd5bff..327fb33 100644 --- a/npm/deskctl/package.json +++ b/npm/deskctl/package.json @@ -1,6 +1,6 @@ { "name": "deskctl", - "version": "0.1.12", + "version": "0.1.13", "description": "Installable deskctl package for Linux X11 agents", "license": "MIT", "homepage": "https://github.com/harivansh-afk/deskctl", From 2107449d9bf1425de3c43d1036465cbed69535cf Mon Sep 17 00:00:00 2001 From: Hari <73809867+harivansh-afk@users.noreply.github.com> Date: Fri, 27 Mar 2026 18:17:51 -0400 Subject: [PATCH 30/35] Update README with asset link and description change Added a link to GitHub assets and removed 'Linux' from description. --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 935f329..dccbe04 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,11 @@ # deskctl - [![npm](https://img.shields.io/npm/v/deskctl?label=npm)](https://www.npmjs.com/package/deskctl) [![skill](https://img.shields.io/badge/skills.sh-deskctl-111827)](skills/deskctl) -Desktop control cli for AI agents on Linux X11. +Desktop control cli for AI agents on X11. + +https://github.com/user-attachments/assets/e820787e-4d1a-463f-bdcf-a829588778bf + ## Install From 19669fb4c14462abd2cfc864328cb9c3c2143bf0 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Fri, 27 Mar 2026 19:25:26 -0400 Subject: [PATCH 31/35] demo --- demo/index.html | 969 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 969 insertions(+) create mode 100644 demo/index.html diff --git a/demo/index.html b/demo/index.html new file mode 100644 index 0000000..70ac230 --- /dev/null +++ b/demo/index.html @@ -0,0 +1,969 @@ + + + + + +deskctl - Desktop Control for AI Agents + + + + +
    +

    deskctl

    +

    desktop control CLI for AI agents

    +
    + +
    +
    +
    +
    +
    +
    +
    +
    +
    + + +
    +
    +
    + Files ~/reports +
    +
    +
    +
    + 📝 + task_brief.txt + 2.1 KB +
    +
    + 📊 + nvda_q1_data.csv + 48 KB +
    +
    + 📄 + prev_report.pdf + 1.2 MB +
    +
    + 📁 + archive/ + -- +
    +
    +
    + task: Prepare NVDA Q1 earnings summary
    + source: finance.yahoo.com, local csv
    + output: Google Docs report with chart +
    +
    +
    + + +
    +
    +
    + Chrome - Yahoo Finance +
    +
    +
    + NVDA + $924.68 + +3.42% + 1Y +
    +
    + + + + + + + + + + + + + $950 + $800 + $650 + +
    +
    +
    +
    + + +
    +
    +
    + Chrome - Google Docs +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + + + + + + + + + + + NVDA 1Y + +
    +
    +
    +
    + + +
    @w1
    +
    @w2
    +
    @w3
    + +
    + +
    + +
    +
    + + +
    +
    Files
    +
    Yahoo Finance
    +
    Google Docs
    +
    +
    +
    + +
    +
    +
    +
    +
    + agent computer +
    +
    +
    +
    + +
    +

    AI agent controlling a live desktop via deskctl

    + +
    + + + + From 2b3d422c7b0d40b5523a07d3f2c3c81b5fb42702 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Fri, 27 Mar 2026 19:40:24 -0400 Subject: [PATCH 32/35] crates.io --- .github/workflows/ci.yml | 54 +++++++++++++++++++++++++++----------- docs/releasing.md | 10 +++---- site/src/pages/index.astro | 3 +++ 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dcef6fb..0bcc90c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ on: publish_crates: description: Publish to crates.io type: boolean - default: false + default: true env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true @@ -309,27 +309,25 @@ jobs: artifacts/checksums.txt fi - publish: - name: Publish + publish-npm: + name: Publish npm needs: [changes, update-manifests, release] - if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true' + if: >- + github.event_name != 'pull_request' + && needs.changes.outputs.rust == 'true' + && (inputs.publish_npm == true || inputs.publish_npm == '') runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: ref: ${{ needs.changes.outputs.tag }} - - uses: dtolnay/rust-toolchain@stable - - uses: actions/setup-node@v4 with: node-version: 22 registry-url: https://registry.npmjs.org - - name: Install system dependencies - run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev - - - name: Check current published state + - name: Check if already published id: published run: | VERSION="${{ needs.changes.outputs.version }}" @@ -338,13 +336,9 @@ jobs: else echo "npm=false" >> "$GITHUB_OUTPUT" fi - if curl -fsSL "https://crates.io/api/v1/crates/deskctl/${VERSION}" >/dev/null 2>&1; then - echo "crates=true" >> "$GITHUB_OUTPUT" - else - echo "crates=false" >> "$GITHUB_OUTPUT" - fi - name: Validate npm package + if: steps.published.outputs.npm != 'true' run: node npm/deskctl/scripts/validate-package.js - name: Publish npm @@ -353,8 +347,36 @@ jobs: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} run: npm publish ./npm/deskctl --access public + publish-crates: + name: Publish crates.io + needs: [changes, update-manifests, release] + if: >- + github.event_name != 'pull_request' + && needs.changes.outputs.rust == 'true' + && (inputs.publish_crates == true || inputs.publish_crates == '') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ needs.changes.outputs.tag }} + + - uses: dtolnay/rust-toolchain@stable + + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev + + - name: Check if already published + id: published + run: | + VERSION="${{ needs.changes.outputs.version }}" + if curl -fsSL "https://crates.io/api/v1/crates/deskctl/${VERSION}" >/dev/null 2>&1; then + echo "crates=true" >> "$GITHUB_OUTPUT" + else + echo "crates=false" >> "$GITHUB_OUTPUT" + fi + - name: Publish crates.io - if: inputs.publish_crates && steps.published.outputs.crates != 'true' + if: steps.published.outputs.crates != 'true' env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} run: cargo publish --locked diff --git a/docs/releasing.md b/docs/releasing.md index 8f39d3f..849d661 100644 --- a/docs/releasing.md +++ b/docs/releasing.md @@ -59,12 +59,12 @@ The repository release workflow: - publishes the canonical GitHub Release asset - uploads `checksums.txt` -The registry publish workflow: +The registry publish jobs (npm and crates.io run in parallel): -- targets an existing release tag -- checks that Cargo, npm, and the requested tag all agree on version -- checks whether that version is already published on npm and crates.io -- only publishes the channels explicitly requested +- target an existing release tag +- check whether that version is already published on the respective registry +- skip already-published versions +- both default to enabled; can be toggled via workflow_dispatch inputs ## Rerun Safety diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro index b914e16..8dfde01 100644 --- a/site/src/pages/index.astro +++ b/site/src/pages/index.astro @@ -43,6 +43,9 @@ import DocLayout from "../layouts/DocLayout.astro";
  • GitHub
  • +
  • + crates.io +
  • npm
  • From 2b7de5fceff991d02308081161079fa1f6176aad Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 27 Mar 2026 23:46:33 +0000 Subject: [PATCH 33/35] release: v0.1.14 [skip ci] --- Cargo.lock | 22 +++++++++++----------- Cargo.toml | 2 +- npm/deskctl/package.json | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c948da0..eb0e2ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "deskctl" -version = "0.1.13" +version = "0.1.14" dependencies = [ "ab_glyph", "anyhow", @@ -911,9 +911,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.91" +version = "0.3.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995" dependencies = [ "once_cell", "wasm-bindgen", @@ -1907,9 +1907,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.114" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a" dependencies = [ "cfg-if", "once_cell", @@ -1920,9 +1920,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.114" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1930,9 +1930,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.114" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf" dependencies = [ "bumpalo", "proc-macro2", @@ -1943,9 +1943,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.114" +version = "0.2.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93" dependencies = [ "unicode-ident", ] diff --git a/Cargo.toml b/Cargo.toml index 2c4745c..be051c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "deskctl" -version = "0.1.13" +version = "0.1.14" edition = "2021" description = "X11 desktop control CLI for agents" license = "MIT" diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json index 327fb33..c676924 100644 --- a/npm/deskctl/package.json +++ b/npm/deskctl/package.json @@ -1,6 +1,6 @@ { "name": "deskctl", - "version": "0.1.13", + "version": "0.1.14", "description": "Installable deskctl package for Linux X11 agents", "license": "MIT", "homepage": "https://github.com/harivansh-afk/deskctl", From 13119eecf7cd96024ac4f0e3f435f4eb45d2759f Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Thu, 2 Apr 2026 14:53:56 -0400 Subject: [PATCH 34/35] update crates.io link --- site/src/pages/index.astro | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro index 8dfde01..478c7a2 100644 --- a/site/src/pages/index.astro +++ b/site/src/pages/index.astro @@ -44,7 +44,7 @@ import DocLayout from "../layouts/DocLayout.astro"; GitHub
  • - crates.io + crates.io
  • npm From 32c6d337f102a2d68de6e7d30ac9e2a8162ff010 Mon Sep 17 00:00:00 2001 From: Harivansh Rathi Date: Sun, 5 Apr 2026 11:46:45 -0400 Subject: [PATCH 35/35] ci: use self-hosted netty runners for validation jobs --- .github/workflows/ci.yml | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0bcc90c..1c2e7f4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,9 +1,5 @@ name: CI -# Runners: uvacompute (https://uvacompute.com) -# To enable, set the UVA_RUNNER repo variable to the correct runner label. -# runs-on: ${{ vars.UVA_RUNNER || 'ubuntu-latest' }} - on: pull_request: branches: [main] @@ -37,7 +33,7 @@ permissions: jobs: changes: name: Changes - runs-on: ubuntu-latest + runs-on: [self-hosted, netty] outputs: rust: ${{ steps.check.outputs.rust }} version: ${{ steps.version.outputs.version }} @@ -105,7 +101,7 @@ jobs: name: Validate needs: changes if: needs.changes.outputs.rust == 'true' - runs-on: ubuntu-latest + runs-on: [self-hosted, netty] steps: - uses: actions/checkout@v4 @@ -129,9 +125,6 @@ jobs: - name: Install site dependencies run: pnpm --dir site install --frozen-lockfile - - name: Install system dependencies - run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev - - name: Format check run: make fmt-check @@ -148,7 +141,7 @@ jobs: name: Integration (Xvfb) needs: changes if: needs.changes.outputs.rust == 'true' - runs-on: ubuntu-latest + runs-on: [self-hosted, netty] steps: - uses: actions/checkout@v4 @@ -156,9 +149,6 @@ jobs: - uses: Swatinem/rust-cache@v2 - - name: Install system dependencies - run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev xvfb - - name: Xvfb integration tests run: make test-integration @@ -166,7 +156,7 @@ jobs: name: Distribution Validate needs: changes if: needs.changes.outputs.rust == 'true' - runs-on: ubuntu-latest + runs-on: [self-hosted, netty] steps: - uses: actions/checkout@v4 @@ -178,19 +168,11 @@ jobs: with: node-version: 22 - - uses: cachix/install-nix-action@v30 - with: - extra_nix_config: | - experimental-features = nix-command flakes - - - name: Install system dependencies - run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev - - name: Distribution validation run: make dist-validate # --- Release pipeline: update-manifests -> build -> release -> publish --- - # Version bump happens BEFORE build so the binary has the correct version. + # These stay on ubuntu-latest for artifact upload/download and registry publishing. update-manifests: name: Update Manifests