From 714e34ba1920e0267ce059c43074adfeabadc2d0 Mon Sep 17 00:00:00 2001
From: Hari <73809867+harivansh-afk@users.noreply.github.com>
Date: Wed, 25 Mar 2026 23:18:28 -0400
Subject: [PATCH 01/37] nix (#7)
npm
cargo
---
.github/workflows/ci.yml | 130 +++++++++++---------
.github/workflows/publish.yml | 102 +++++++++++++++
.gitignore | 2 +
CONTRIBUTING.md | 18 +++
Cargo.toml | 13 ++
Makefile | 32 ++++-
README.md | 53 +++++++-
docs/releasing.md | 110 +++++++++++++++++
flake.lock | 61 +++++++++
flake.nix | 77 ++++++++++++
npm/deskctl-cli/README.md | 36 ++++++
npm/deskctl-cli/bin/deskctl.js | 36 ++++++
npm/deskctl-cli/package.json | 36 ++++++
npm/deskctl-cli/scripts/postinstall.js | 49 ++++++++
npm/deskctl-cli/scripts/support.js | 120 ++++++++++++++++++
npm/deskctl-cli/scripts/validate-package.js | 40 ++++++
16 files changed, 849 insertions(+), 66 deletions(-)
create mode 100644 .github/workflows/publish.yml
create mode 100644 docs/releasing.md
create mode 100644 flake.lock
create mode 100644 flake.nix
create mode 100644 npm/deskctl-cli/README.md
create mode 100644 npm/deskctl-cli/bin/deskctl.js
create mode 100644 npm/deskctl-cli/package.json
create mode 100644 npm/deskctl-cli/scripts/postinstall.js
create mode 100644 npm/deskctl-cli/scripts/support.js
create mode 100644 npm/deskctl-cli/scripts/validate-package.js
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 18311e0..e95b27a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,7 +13,6 @@ on:
permissions:
contents: write
- packages: write
jobs:
changes:
@@ -37,7 +36,11 @@ jobs:
- 'tests/**'
- 'Cargo.toml'
- 'Cargo.lock'
+ - 'npm/**'
+ - 'flake.nix'
+ - 'flake.lock'
- 'docker/**'
+ - '.github/workflows/**'
- 'Makefile'
- name: Set outputs
@@ -137,72 +140,36 @@ jobs:
- name: Xvfb integration tests
run: make test-integration
- build:
- name: Build (${{ matrix.target }})
- needs: [changes, validate, integration]
- if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
+ distribution:
+ name: Distribution Validate
+ needs: changes
+ if: needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
- strategy:
- fail-fast: true
- matrix:
- target: [cargo, docker]
steps:
- uses: actions/checkout@v4
- # --- Cargo steps ---
- uses: dtolnay/rust-toolchain@stable
- if: matrix.target == 'cargo'
- with:
- components: clippy
- uses: Swatinem/rust-cache@v2
- if: matrix.target == 'cargo'
+
+ - uses: actions/setup-node@v4
+ with:
+ node-version: 22
+
+ - uses: cachix/install-nix-action@v30
+ with:
+ extra_nix_config: |
+ experimental-features = nix-command flakes
- name: Install system dependencies
- if: matrix.target == 'cargo'
run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
- - name: Clippy
- if: matrix.target == 'cargo'
- run: cargo clippy -- -D warnings
-
- - name: Build
- if: matrix.target == 'cargo'
- run: cargo build --release --locked
-
- - uses: actions/upload-artifact@v4
- if: matrix.target == 'cargo'
- with:
- name: deskctl-linux-x86_64
- path: target/release/deskctl
- retention-days: 7
-
- # --- Docker steps ---
- - uses: docker/setup-buildx-action@v3
- if: matrix.target == 'docker'
-
- - uses: docker/login-action@v3
- if: matrix.target == 'docker'
- with:
- registry: ghcr.io
- username: ${{ github.actor }}
- password: ${{ secrets.GITHUB_TOKEN }}
-
- - uses: docker/build-push-action@v6
- if: matrix.target == 'docker'
- with:
- context: .
- file: docker/Dockerfile
- push: true
- tags: |
- ghcr.io/${{ github.repository }}:latest
- ghcr.io/${{ github.repository }}:${{ needs.changes.outputs.tag }}
- cache-from: type=gha
- cache-to: type=gha,mode=max
+ - name: Distribution validation
+ run: make dist-validate
update-manifests:
name: Update Manifests
- needs: [changes, build]
+ needs: [changes, validate, integration, distribution]
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
@@ -212,12 +179,17 @@ jobs:
- uses: dtolnay/rust-toolchain@stable
+ - uses: actions/setup-node@v4
+ with:
+ node-version: 22
+
- name: Update version in Cargo.toml
run: |
CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
NEW="${{ needs.changes.outputs.version }}"
if [ "$CURRENT" != "$NEW" ]; then
sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml
+ node -e 'const fs=require("node:fs"); const path="npm/deskctl-cli/package.json"; const pkg=JSON.parse(fs.readFileSync(path,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(path, JSON.stringify(pkg, null, 2)+"\n");' "$NEW"
cargo generate-lockfile
fi
@@ -227,7 +199,7 @@ jobs:
git config user.email "github-actions[bot]@users.noreply.github.com"
if ! git diff --quiet; then
- git add Cargo.toml Cargo.lock
+ git add Cargo.toml Cargo.lock npm/deskctl-cli/package.json
git commit -m "release: ${{ needs.changes.outputs.tag }} [skip ci]"
fi
@@ -236,6 +208,38 @@ jobs:
fi
git push origin main --tags
+ build:
+ name: Build Release Asset
+ needs: [changes, update-manifests]
+ if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ ref: ${{ needs.changes.outputs.tag }}
+ fetch-depth: 0
+
+ - uses: dtolnay/rust-toolchain@stable
+ with:
+ components: clippy
+
+ - uses: Swatinem/rust-cache@v2
+
+ - name: Install system dependencies
+ run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
+
+ - name: Clippy
+ run: cargo clippy -- -D warnings
+
+ - name: Build
+ run: cargo build --release --locked
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: deskctl-linux-x86_64
+ path: target/release/deskctl
+ retention-days: 7
+
release:
name: Release
needs: [changes, build, update-manifests]
@@ -256,9 +260,15 @@ jobs:
chmod +x artifacts/deskctl
mv artifacts/deskctl artifacts/deskctl-linux-x86_64
cd artifacts && sha256sum deskctl-linux-x86_64 > checksums.txt && cd ..
-
- gh release create "${{ needs.changes.outputs.tag }}" \
- --title "${{ needs.changes.outputs.tag }}" \
- --generate-notes \
- artifacts/deskctl-linux-x86_64 \
- artifacts/checksums.txt
+ if gh release view "${{ needs.changes.outputs.tag }}" >/dev/null 2>&1; then
+ gh release upload "${{ needs.changes.outputs.tag }}" \
+ artifacts/deskctl-linux-x86_64 \
+ artifacts/checksums.txt \
+ --clobber
+ else
+ gh release create "${{ needs.changes.outputs.tag }}" \
+ --title "${{ needs.changes.outputs.tag }}" \
+ --generate-notes \
+ artifacts/deskctl-linux-x86_64 \
+ artifacts/checksums.txt
+ fi
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000..329f151
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,102 @@
+name: Publish Registries
+
+on:
+ workflow_dispatch:
+ inputs:
+ tag:
+ description: Release tag to publish (for example v0.1.5)
+ required: true
+ type: string
+ publish_npm:
+ description: Publish deskctl-cli to npm
+ required: true
+ type: boolean
+ default: false
+ publish_crates:
+ description: Publish deskctl to crates.io
+ required: true
+ type: boolean
+ default: false
+
+permissions:
+ contents: read
+
+jobs:
+ publish:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ ref: ${{ inputs.tag }}
+
+ - uses: dtolnay/rust-toolchain@stable
+
+ - uses: actions/setup-node@v4
+ with:
+ node-version: 22
+
+ - name: Install system dependencies
+ run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
+
+ - name: Verify release exists and contains canonical assets
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ gh release view "${{ inputs.tag }}" --json assets --jq '.assets[].name' > /tmp/release-assets.txt
+ grep -Fx "deskctl-linux-x86_64" /tmp/release-assets.txt >/dev/null
+ grep -Fx "checksums.txt" /tmp/release-assets.txt >/dev/null
+
+ - name: Verify versions align with tag
+ run: |
+ TAG="${{ inputs.tag }}"
+ VERSION="${TAG#v}"
+ CARGO_VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
+ NPM_VERSION=$(node -p 'require("./npm/deskctl-cli/package.json").version')
+
+ test "$VERSION" = "$CARGO_VERSION"
+ test "$VERSION" = "$NPM_VERSION"
+
+ - name: Check current published state
+ id: published
+ run: |
+ VERSION="${{ inputs.tag }}"
+ VERSION="${VERSION#v}"
+
+ if npm view "deskctl-cli@${VERSION}" version >/dev/null 2>&1; then
+ echo "npm=true" >> "$GITHUB_OUTPUT"
+ else
+ echo "npm=false" >> "$GITHUB_OUTPUT"
+ fi
+
+ if curl -fsSL "https://crates.io/api/v1/crates/deskctl/${VERSION}" >/dev/null 2>&1; then
+ echo "crates=true" >> "$GITHUB_OUTPUT"
+ else
+ echo "crates=false" >> "$GITHUB_OUTPUT"
+ fi
+
+ - name: Validate npm package
+ run: |
+ mkdir -p ./tmp/npm-pack
+ node npm/deskctl-cli/scripts/validate-package.js
+ npm pack ./npm/deskctl-cli --pack-destination ./tmp/npm-pack >/dev/null
+
+ - name: Validate crate publish path
+ run: cargo publish --dry-run --locked
+
+ - name: Publish npm
+ if: inputs.publish_npm && steps.published.outputs.npm != 'true'
+ env:
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+ run: npm publish ./npm/deskctl-cli --access public
+
+ - name: Publish crates.io
+ if: inputs.publish_crates && steps.published.outputs.crates != 'true'
+ env:
+ CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+ run: cargo publish --locked
+
+ - name: Summary
+ run: |
+ echo "tag=${{ inputs.tag }}"
+ echo "npm_already_published=${{ steps.published.outputs.npm }}"
+ echo "crates_already_published=${{ steps.published.outputs.crates }}"
diff --git a/.gitignore b/.gitignore
index 7406874..db552f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@ secret/
.claude/
.codex/
openspec/
+npm/deskctl-cli/vendor/
+npm/deskctl-cli/*.tgz
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7a1a2a2..bdbce4e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -35,10 +35,15 @@ make lint
make test-unit
make test-integration
make site-format-check
+make cargo-publish-dry-run
+make npm-package-check
+make nix-flake-check
+make dist-validate
make validate
```
`make validate` runs the full Phase 2 validation stack. It requires Linux, `xvfb-run`, and site dependencies to be installed.
+`make dist-validate` runs the distribution validation stack. It requires `npm`, `nix`, and Linux for the full npm runtime smoke path.
## Pre-commit Hooks
@@ -60,6 +65,19 @@ The hook config intentionally stays small:
- Site files reuse the existing `site/` Prettier setup
- Slower checks stay in CI or `make validate`
+## Distribution Work
+
+Distribution support currently ships through:
+
+- crate: `deskctl`
+- npm package: `deskctl-cli`
+- repo flake: `flake.nix`
+- command name on every channel: `deskctl`
+
+For maintainer release and publish steps, see [docs/releasing.md](docs/releasing.md).
+
+Source-build and packaging work should keep Docker as a local Linux build convenience, not as the canonical registry release path.
+
## Integration Tests
Integration coverage is Linux/X11-only in this phase. The supported local entrypoint is:
diff --git a/Cargo.toml b/Cargo.toml
index 023e18a..f373679 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,19 @@ edition = "2021"
description = "X11 desktop control CLI for agents"
license = "MIT"
repository = "https://github.com/harivansh-afk/deskctl"
+homepage = "https://github.com/harivansh-afk/deskctl"
+readme = "README.md"
+keywords = ["x11", "desktop", "automation", "cli", "agent"]
+categories = ["command-line-utilities"]
+rust-version = "1.75"
+include = [
+ "/Cargo.toml",
+ "/Cargo.lock",
+ "/README.md",
+ "/LICENCE",
+ "/assets/**",
+ "/src/**",
+]
[dependencies]
clap = { version = "4", features = ["derive", "env"] }
diff --git a/Makefile b/Makefile
index bb02037..97857e3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: fmt fmt-check lint test-unit test-integration site-format-check validate
+.PHONY: fmt fmt-check lint test-unit test-integration site-format-check cargo-publish-dry-run npm-package-check nix-flake-check dist-validate validate
fmt:
cargo fmt --all
@@ -30,4 +30,34 @@ site-format-check:
fi
pnpm --dir site format:check
+cargo-publish-dry-run:
+ cargo publish --dry-run --allow-dirty --locked
+
+npm-package-check:
+ @if ! command -v npm >/dev/null 2>&1; then \
+ echo "npm is required for npm packaging validation."; \
+ exit 1; \
+ fi
+ node npm/deskctl-cli/scripts/validate-package.js
+ rm -rf tmp/npm-pack tmp/npm-install
+ mkdir -p tmp/npm-pack tmp/npm-install/bin
+ npm pack ./npm/deskctl-cli --pack-destination ./tmp/npm-pack >/dev/null
+ @if [ "$$(uname -s)" != "Linux" ]; then \
+ echo "Skipping npm package runtime smoke test on non-Linux host."; \
+ else \
+ cargo build && \
+ PACK_TGZ=$$(ls ./tmp/npm-pack/*.tgz | head -n 1) && \
+ DESKCTL_BINARY_PATH="$$(pwd)/target/debug/deskctl" npm install --prefix ./tmp/npm-install "$${PACK_TGZ}" && \
+ ./tmp/npm-install/node_modules/.bin/deskctl --version; \
+ fi
+
+nix-flake-check:
+ @if ! command -v nix >/dev/null 2>&1; then \
+ echo "nix is required for flake validation."; \
+ exit 1; \
+ fi
+ nix flake check
+
+dist-validate: test-unit cargo-publish-dry-run npm-package-check nix-flake-check
+
validate: fmt-check lint test-unit test-integration site-format-check
diff --git a/README.md b/README.md
index 6920615..036396a 100644
--- a/README.md
+++ b/README.md
@@ -4,11 +4,45 @@ Desktop control CLI for AI agents on Linux X11.
## Install
+### Cargo
+
```bash
cargo install deskctl
```
-Build a Linux binary with Docker:
+Source builds on Linux require:
+
+- Rust 1.75+
+- `pkg-config`
+- X11 development libraries for input and windowing, typically `libx11-dev` and `libxtst-dev` on Debian/Ubuntu
+
+### npm
+
+```bash
+npm install -g deskctl-cli
+deskctl --help
+```
+
+One-shot execution is also supported:
+
+```bash
+npx deskctl-cli --help
+```
+
+`deskctl-cli` currently supports `linux-x64` and installs the `deskctl` command by downloading the matching GitHub Release asset.
+
+### Nix
+
+```bash
+nix run github:harivansh-afk/deskctl -- --help
+nix profile install github:harivansh-afk/deskctl
+```
+
+The repo flake is the supported Nix install surface in this phase.
+
+### Docker Convenience
+
+Build a Linux binary locally with Docker:
```bash
docker compose -f docker/docker-compose.yml run --rm build
@@ -28,13 +62,12 @@ Run it on an X11 session:
DISPLAY=:1 XDG_SESSION_TYPE=x11 ~/deskctl --json snapshot --annotate
```
-Local source build requirements:
+### Local Source Build
+
```bash
cargo build
```
-At the moment there are no extra native build dependencies beyond a Rust toolchain.
-
## Quick Start
```bash
@@ -78,7 +111,7 @@ Source layout:
## Runtime Requirements
- Linux with X11 session
-- Rust 1.75+ (for build)
+- Rust 1.75+ plus the source-build dependencies above when building from source
The binary itself only links the standard glibc runtime on Linux (`libc`, `libm`, `libgcc_s`).
@@ -158,6 +191,16 @@ Text mode is compact and follow-up-oriented, but JSON is the parsing contract.
See [docs/runtime-output.md](docs/runtime-output.md) for the exact stable-vs-best-effort breakdown.
+## Distribution
+
+- GitHub Releases are the canonical binary source
+- crates.io package: `deskctl`
+- npm package: `deskctl-cli`
+- installed command on every channel: `deskctl`
+- repo-owned Nix install path: `flake.nix`
+
+For maintainer publishing and release steps, see [docs/releasing.md](docs/releasing.md).
+
## Selector Contract
Explicit selector modes:
diff --git a/docs/releasing.md b/docs/releasing.md
new file mode 100644
index 0000000..7271b83
--- /dev/null
+++ b/docs/releasing.md
@@ -0,0 +1,110 @@
+# Releasing deskctl
+
+This document covers the operator flow for shipping `deskctl` across:
+
+- GitHub Releases
+- crates.io
+- npm
+- the repo flake
+
+GitHub Releases are the canonical binary source. The npm package consumes those release assets instead of building a separate binary.
+
+## Package Names
+
+- crate: `deskctl`
+- npm package: `deskctl-cli`
+- installed command: `deskctl`
+
+## Prerequisites
+
+Before the first live publish on each registry:
+
+- npm ownership for `deskctl-cli`
+- crates.io ownership for `deskctl`
+- repository secrets:
+ - `NPM_TOKEN`
+ - `CARGO_REGISTRY_TOKEN`
+
+These are user-owned prerequisites. The repo can validate and automate the rest, but it cannot create registry ownership for you.
+
+## Normal Release Flow
+
+1. Merge release-ready changes to `main`.
+2. Let CI run:
+ - validation
+ - integration
+ - distribution validation
+ - release asset build
+3. Confirm the GitHub Release exists for the version tag and includes:
+ - `deskctl-linux-x86_64`
+ - `checksums.txt`
+4. Trigger the `Publish Registries` workflow with:
+ - `tag`
+ - `publish_npm`
+ - `publish_crates`
+5. Confirm the publish summary for each channel.
+
+## What CI Validates
+
+The repository validates:
+
+- `cargo publish --dry-run --locked`
+- npm package metadata and packability
+- npm install smoke path on Linux using the packaged `deskctl` command
+- repo flake evaluation/build
+
+The repository release workflow:
+
+- builds the Linux release binary
+- publishes the canonical GitHub Release asset
+- uploads `checksums.txt`
+
+The registry publish workflow:
+
+- targets an existing release tag
+- checks that Cargo, npm, and the requested tag all agree on version
+- checks whether that version is already published on npm and crates.io
+- only publishes the channels explicitly requested
+
+## Rerun Safety
+
+Registry publishing is intentionally separate from release asset creation.
+
+If a partial failure happens:
+
+- GitHub Release assets remain the source of truth
+- rerun the `Publish Registries` workflow for the same tag
+- already-published channels are reported and skipped
+- remaining channels can still be published
+
+## Local Validation
+
+Run the distribution checks locally with:
+
+```bash
+make cargo-publish-dry-run
+make npm-package-check
+make nix-flake-check
+make dist-validate
+```
+
+Notes:
+
+- `make npm-package-check` does a runtime smoke test only on Linux
+- `make nix-flake-check` requires a local Nix installation
+- Docker remains a local Linux build convenience, not the canonical release path
+
+## Nix Boundary
+
+The repo-owned `flake.nix` is the supported Nix surface in this phase.
+
+In scope:
+
+- `nix run github:harivansh-afk/deskctl`
+- `nix profile install github:harivansh-afk/deskctl`
+- CI validation for the repo flake
+
+Out of scope for this phase:
+
+- `nixpkgs` upstreaming
+- extra distro packaging outside the repo
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..f194334
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,61 @@
+{
+ "nodes": {
+ "flake-utils": {
+ "inputs": {
+ "systems": "systems"
+ },
+ "locked": {
+ "lastModified": 1731533236,
+ "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+ "type": "github"
+ },
+ "original": {
+ "owner": "numtide",
+ "repo": "flake-utils",
+ "type": "github"
+ }
+ },
+ "nixpkgs": {
+ "locked": {
+ "lastModified": 1774386573,
+ "narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=",
+ "owner": "NixOS",
+ "repo": "nixpkgs",
+ "rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9",
+ "type": "github"
+ },
+ "original": {
+ "owner": "NixOS",
+ "ref": "nixos-unstable",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "root": {
+ "inputs": {
+ "flake-utils": "flake-utils",
+ "nixpkgs": "nixpkgs"
+ }
+ },
+ "systems": {
+ "locked": {
+ "lastModified": 1681028828,
+ "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+ "owner": "nix-systems",
+ "repo": "default",
+ "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+ "type": "github"
+ },
+ "original": {
+ "owner": "nix-systems",
+ "repo": "default",
+ "type": "github"
+ }
+ }
+ },
+ "root": "root",
+ "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..1eafbaa
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,77 @@
+{
+ description = "deskctl - Desktop control CLI for AI agents on Linux X11";
+
+ inputs = {
+ nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+ flake-utils.url = "github:numtide/flake-utils";
+ };
+
+ outputs =
+ { self, nixpkgs, flake-utils }:
+ flake-utils.lib.eachDefaultSystem (
+ system:
+ let
+ pkgs = import nixpkgs { inherit system; };
+ lib = pkgs.lib;
+ cargoToml = builtins.fromTOML (builtins.readFile ./Cargo.toml);
+
+ deskctl =
+ pkgs.rustPlatform.buildRustPackage {
+ pname = cargoToml.package.name;
+ version = cargoToml.package.version;
+ src = ./.;
+ cargoLock.lockFile = ./Cargo.lock;
+ nativeBuildInputs = [ pkgs.pkg-config ];
+ buildInputs = lib.optionals pkgs.stdenv.isLinux [
+ pkgs.libx11
+ pkgs.libxtst
+ ];
+ doCheck = false;
+
+ meta = with lib; {
+ description = cargoToml.package.description;
+ homepage = cargoToml.package.homepage;
+ license = licenses.mit;
+ mainProgram = "deskctl";
+ platforms = platforms.linux;
+ };
+ };
+ in
+ {
+ formatter = pkgs.nixfmt;
+
+ packages = lib.optionalAttrs pkgs.stdenv.isLinux {
+ inherit deskctl;
+ default = deskctl;
+ };
+
+ apps = lib.optionalAttrs pkgs.stdenv.isLinux {
+ default = flake-utils.lib.mkApp { drv = deskctl; };
+ deskctl = flake-utils.lib.mkApp { drv = deskctl; };
+ };
+
+ checks = lib.optionalAttrs pkgs.stdenv.isLinux {
+ build = deskctl;
+ };
+
+ devShells.default = pkgs.mkShell {
+ packages =
+ [
+ pkgs.cargo
+ pkgs.clippy
+ pkgs.nodejs
+ pkgs.nixfmt
+ pkgs.pkg-config
+ pkgs.pnpm
+ pkgs.rustc
+ pkgs.rustfmt
+ ]
+ ++ lib.optionals pkgs.stdenv.isLinux [
+ pkgs.libx11
+ pkgs.libxtst
+ pkgs.xorg.xorgserver
+ ];
+ };
+ }
+ );
+}
diff --git a/npm/deskctl-cli/README.md b/npm/deskctl-cli/README.md
new file mode 100644
index 0000000..fd6f610
--- /dev/null
+++ b/npm/deskctl-cli/README.md
@@ -0,0 +1,36 @@
+# deskctl-cli
+
+`deskctl-cli` installs the `deskctl` command for Linux X11 systems.
+
+## Install
+
+```bash
+npm install -g deskctl-cli
+```
+
+After install, run:
+
+```bash
+deskctl --help
+```
+
+One-shot usage is also supported:
+
+```bash
+npx deskctl-cli --help
+```
+
+## Runtime Support
+
+- Linux
+- X11 session
+- currently packaged release asset: `linux-x64`
+
+`deskctl-cli` downloads the matching GitHub Release binary during install.
+Unsupported targets fail during install with a clear runtime support error instead of installing a broken command.
+
+If you want the Rust source-install path instead, use:
+
+```bash
+cargo install deskctl
+```
diff --git a/npm/deskctl-cli/bin/deskctl.js b/npm/deskctl-cli/bin/deskctl.js
new file mode 100644
index 0000000..9f9b480
--- /dev/null
+++ b/npm/deskctl-cli/bin/deskctl.js
@@ -0,0 +1,36 @@
+#!/usr/bin/env node
+
+const fs = require("node:fs");
+const { spawn } = require("node:child_process");
+
+const { readPackageJson, releaseTag, supportedTarget, vendorBinaryPath } = require("../scripts/support");
+
+function main() {
+ const pkg = readPackageJson();
+ const target = supportedTarget();
+ const binaryPath = vendorBinaryPath(target);
+
+ if (!fs.existsSync(binaryPath)) {
+ console.error(
+ [
+ "deskctl binary is missing from the npm package install.",
+ `Expected: ${binaryPath}`,
+ `Package version: ${pkg.version}`,
+ `Release tag: ${releaseTag(pkg)}`,
+ "Try reinstalling deskctl-cli or check that your target is supported."
+ ].join("\n")
+ );
+ process.exit(1);
+ }
+
+ const child = spawn(binaryPath, process.argv.slice(2), { stdio: "inherit" });
+ child.on("exit", (code, signal) => {
+ if (signal) {
+ process.kill(process.pid, signal);
+ return;
+ }
+ process.exit(code ?? 1);
+ });
+}
+
+main();
diff --git a/npm/deskctl-cli/package.json b/npm/deskctl-cli/package.json
new file mode 100644
index 0000000..c1cdbbc
--- /dev/null
+++ b/npm/deskctl-cli/package.json
@@ -0,0 +1,36 @@
+{
+ "name": "deskctl-cli",
+ "version": "0.1.5",
+ "description": "Installable deskctl CLI package for Linux X11 agents",
+ "license": "MIT",
+ "homepage": "https://github.com/harivansh-afk/deskctl",
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/harivansh-afk/deskctl.git"
+ },
+ "bugs": {
+ "url": "https://github.com/harivansh-afk/deskctl/issues"
+ },
+ "engines": {
+ "node": ">=18"
+ },
+ "bin": {
+ "deskctl": "bin/deskctl.js"
+ },
+ "files": [
+ "README.md",
+ "bin",
+ "scripts"
+ ],
+ "scripts": {
+ "postinstall": "node scripts/postinstall.js",
+ "validate": "node scripts/validate-package.js"
+ },
+ "keywords": [
+ "deskctl",
+ "x11",
+ "desktop",
+ "automation",
+ "cli"
+ ]
+}
diff --git a/npm/deskctl-cli/scripts/postinstall.js b/npm/deskctl-cli/scripts/postinstall.js
new file mode 100644
index 0000000..de1b1d0
--- /dev/null
+++ b/npm/deskctl-cli/scripts/postinstall.js
@@ -0,0 +1,49 @@
+const fs = require("node:fs");
+
+const {
+ checksumsUrl,
+ checksumForAsset,
+ download,
+ ensureVendorDir,
+ installLocalBinary,
+ readPackageJson,
+ releaseAssetUrl,
+ releaseTag,
+ sha256,
+ supportedTarget,
+ vendorBinaryPath
+} = require("./support");
+
+async function main() {
+ const pkg = readPackageJson();
+ const target = supportedTarget();
+ const targetPath = vendorBinaryPath(target);
+
+ ensureVendorDir();
+
+ if (process.env.DESKCTL_BINARY_PATH) {
+ installLocalBinary(process.env.DESKCTL_BINARY_PATH, targetPath);
+ return;
+ }
+
+ const tag = releaseTag(pkg);
+ const assetUrl = releaseAssetUrl(tag, target.assetName);
+ const checksumText = (await download(checksumsUrl(tag))).toString("utf8");
+ const expectedSha = checksumForAsset(checksumText, target.assetName);
+ const asset = await download(assetUrl);
+ const actualSha = sha256(asset);
+
+ if (actualSha !== expectedSha) {
+ throw new Error(
+ `Checksum mismatch for ${target.assetName}. Expected ${expectedSha}, got ${actualSha}.`
+ );
+ }
+
+ fs.writeFileSync(targetPath, asset);
+ fs.chmodSync(targetPath, 0o755);
+}
+
+main().catch((error) => {
+ console.error(`deskctl-cli install failed: ${error.message}`);
+ process.exit(1);
+});
diff --git a/npm/deskctl-cli/scripts/support.js b/npm/deskctl-cli/scripts/support.js
new file mode 100644
index 0000000..8d41520
--- /dev/null
+++ b/npm/deskctl-cli/scripts/support.js
@@ -0,0 +1,120 @@
+const crypto = require("node:crypto");
+const fs = require("node:fs");
+const path = require("node:path");
+const https = require("node:https");
+
+const PACKAGE_ROOT = path.resolve(__dirname, "..");
+const VENDOR_DIR = path.join(PACKAGE_ROOT, "vendor");
+const PACKAGE_JSON = path.join(PACKAGE_ROOT, "package.json");
+
+function readPackageJson() {
+ return JSON.parse(fs.readFileSync(PACKAGE_JSON, "utf8"));
+}
+
+function releaseTag(pkg) {
+ return process.env.DESKCTL_RELEASE_TAG || `v${pkg.version}`;
+}
+
+function supportedTarget(platform = process.platform, arch = process.arch) {
+ if (platform === "linux" && arch === "x64") {
+ return {
+ platform,
+ arch,
+ assetName: "deskctl-linux-x86_64",
+ binaryName: "deskctl-linux-x86_64"
+ };
+ }
+
+ throw new Error(
+ `deskctl-cli currently supports linux-x64 only. Received ${platform}-${arch}.`
+ );
+}
+
+function vendorBinaryPath(target) {
+ return path.join(VENDOR_DIR, target.binaryName);
+}
+
+function releaseBaseUrl(tag) {
+ return (
+ process.env.DESKCTL_RELEASE_BASE_URL ||
+ `https://github.com/harivansh-afk/deskctl/releases/download/${tag}`
+ );
+}
+
+function releaseAssetUrl(tag, assetName) {
+ return process.env.DESKCTL_DOWNLOAD_URL || `${releaseBaseUrl(tag)}/${assetName}`;
+}
+
+function checksumsUrl(tag) {
+ return `${releaseBaseUrl(tag)}/checksums.txt`;
+}
+
+function ensureVendorDir() {
+ fs.mkdirSync(VENDOR_DIR, { recursive: true });
+}
+
+function checksumForAsset(contents, assetName) {
+ const line = contents
+ .split("\n")
+ .map((value) => value.trim())
+ .find((value) => value.endsWith(` ${assetName}`) || value.endsWith(` *${assetName}`));
+
+ if (!line) {
+ throw new Error(`Could not find checksum entry for ${assetName}.`);
+ }
+
+ return line.split(/\s+/)[0];
+}
+
+function sha256(buffer) {
+ return crypto.createHash("sha256").update(buffer).digest("hex");
+}
+
+function download(url) {
+ return new Promise((resolve, reject) => {
+ https
+ .get(url, (response) => {
+ if (
+ response.statusCode &&
+ response.statusCode >= 300 &&
+ response.statusCode < 400 &&
+ response.headers.location
+ ) {
+ response.resume();
+ resolve(download(response.headers.location));
+ return;
+ }
+
+ if (response.statusCode !== 200) {
+ reject(new Error(`Download failed for ${url}: HTTP ${response.statusCode}`));
+ return;
+ }
+
+ const chunks = [];
+ response.on("data", (chunk) => chunks.push(chunk));
+ response.on("end", () => resolve(Buffer.concat(chunks)));
+ })
+ .on("error", reject);
+ });
+}
+
+function installLocalBinary(sourcePath, targetPath) {
+ fs.copyFileSync(sourcePath, targetPath);
+ fs.chmodSync(targetPath, 0o755);
+}
+
+module.exports = {
+ PACKAGE_ROOT,
+ VENDOR_DIR,
+ checksumsUrl,
+ checksumForAsset,
+ download,
+ ensureVendorDir,
+ installLocalBinary,
+ readPackageJson,
+ releaseAssetUrl,
+ releaseTag,
+ sha256,
+ supportedTarget,
+ vendorBinaryPath
+};
diff --git a/npm/deskctl-cli/scripts/validate-package.js b/npm/deskctl-cli/scripts/validate-package.js
new file mode 100644
index 0000000..46d3e87
--- /dev/null
+++ b/npm/deskctl-cli/scripts/validate-package.js
@@ -0,0 +1,40 @@
+const fs = require("node:fs");
+const path = require("node:path");
+
+const { readPackageJson, supportedTarget, vendorBinaryPath } = require("./support");
+
+function readCargoVersion() {
+ const cargoToml = fs.readFileSync(
+ path.resolve(__dirname, "..", "..", "..", "Cargo.toml"),
+ "utf8"
+ );
+ const match = cargoToml.match(/^version = "([^"]+)"/m);
+ if (!match) {
+ throw new Error("Could not determine Cargo.toml version.");
+ }
+ return match[1];
+}
+
+function main() {
+ const pkg = readPackageJson();
+ const cargoVersion = readCargoVersion();
+
+ if (pkg.version !== cargoVersion) {
+ throw new Error(
+ `Version mismatch: npm package is ${pkg.version}, Cargo.toml is ${cargoVersion}.`
+ );
+ }
+
+ if (pkg.bin?.deskctl !== "bin/deskctl.js") {
+ throw new Error("deskctl-cli must expose the deskctl bin entrypoint.");
+ }
+
+ const target = supportedTarget("linux", "x64");
+ const targetPath = vendorBinaryPath(target);
+ const vendorDir = path.dirname(targetPath);
+ if (!vendorDir.endsWith(path.join("deskctl-cli", "vendor"))) {
+ throw new Error("Vendor binary directory resolved unexpectedly.");
+ }
+}
+
+main();
From 1092e503be7bcd3de55429b4c6894bdd1841598b Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Thu, 26 Mar 2026 03:25:14 +0000
Subject: [PATCH 02/37] release: v0.1.6 [skip ci]
---
Cargo.lock | 4 ++--
Cargo.toml | 2 +-
npm/deskctl-cli/package.json | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 1355d04..71a9a54 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
-version = 4
+version = 3
[[package]]
name = "ab_glyph"
@@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "deskctl"
-version = "0.1.5"
+version = "0.1.6"
dependencies = [
"ab_glyph",
"anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index f373679..b05507b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "deskctl"
-version = "0.1.5"
+version = "0.1.6"
edition = "2021"
description = "X11 desktop control CLI for agents"
license = "MIT"
diff --git a/npm/deskctl-cli/package.json b/npm/deskctl-cli/package.json
index c1cdbbc..84f27ee 100644
--- a/npm/deskctl-cli/package.json
+++ b/npm/deskctl-cli/package.json
@@ -1,6 +1,6 @@
{
"name": "deskctl-cli",
- "version": "0.1.5",
+ "version": "0.1.6",
"description": "Installable deskctl CLI package for Linux X11 agents",
"license": "MIT",
"homepage": "https://github.com/harivansh-afk/deskctl",
From 3dbd9ce52d09759b0ffa96fd60061fab5535cf89 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 00:07:03 -0400
Subject: [PATCH 03/37] init with runtime contract
---
CONTRIBUTING.md | 2 +-
README.md | 14 +-
...{runtime-output.md => runtime-contract.md} | 0
skills/SKILL.md | 149 ------------------
skills/deskctl/SKILL.md | 132 ++++++++++++++++
skills/deskctl/references/commands.md | 75 +++++++++
skills/deskctl/references/install.md | 75 +++++++++
skills/deskctl/references/sandbox-agent.md | 61 +++++++
.../deskctl/templates/install-deskctl-npm.sh | 27 ++++
.../templates/sandbox-agent-desktop-loop.sh | 7 +
10 files changed, 390 insertions(+), 152 deletions(-)
rename docs/{runtime-output.md => runtime-contract.md} (100%)
delete mode 100644 skills/SKILL.md
create mode 100644 skills/deskctl/SKILL.md
create mode 100644 skills/deskctl/references/commands.md
create mode 100644 skills/deskctl/references/install.md
create mode 100644 skills/deskctl/references/sandbox-agent.md
create mode 100644 skills/deskctl/templates/install-deskctl-npm.sh
create mode 100644 skills/deskctl/templates/sandbox-agent-desktop-loop.sh
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index bdbce4e..926c58a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -21,7 +21,7 @@ pnpm --dir site install
- `src/` holds production code and unit tests
- `tests/` holds integration tests
- `tests/support/` holds shared X11 and daemon helpers for integration coverage
-- `docs/runtime-output.md` is the stable-vs-best-effort runtime output contract for agent-facing CLI work
+- `docs/runtime-contract.md` is the stable-vs-best-effort runtime output contract for agent-facing CLI work
Keep integration-only helpers out of `src/`.
diff --git a/README.md b/README.md
index 036396a..db7d92f 100644
--- a/README.md
+++ b/README.md
@@ -31,6 +31,16 @@ npx deskctl-cli --help
`deskctl-cli` currently supports `linux-x64` and installs the `deskctl` command by downloading the matching GitHub Release asset.
+### Installable skill
+
+For `skills.sh` / agent skill ecosystems:
+
+```bash
+npx skills add harivansh-afk/deskctl -s deskctl
+```
+
+The installable skill lives under [`skills/deskctl`](skills/deskctl) and is designed for X11 sandboxes, VMs, and sandbox-agent desktop sessions. It points agents to the npm install path first so they can get `deskctl` without Cargo.
+
### Nix
```bash
@@ -133,7 +143,7 @@ deskctl doctor
- `@wN` refs are short-lived handles assigned by `snapshot` and `list-windows`
- `--json` output includes a stable `window_id` for programmatic targeting within the current daemon session
- `list-windows` is a cheap read-only operation and does not capture or write a screenshot
-- the stable runtime JSON/error contract is documented in [docs/runtime-output.md](docs/runtime-output.md)
+- the stable runtime JSON/error contract is documented in [docs/runtime-contract.md](docs/runtime-contract.md)
## Read and Wait Surface
@@ -189,7 +199,7 @@ Text mode is compact and follow-up-oriented, but JSON is the parsing contract.
- rely on `window_id`, selector-related fields, grouped read payloads, and structured error `kind` values for stable automation
- treat monitor naming, incidental whitespace, and default screenshot file names as best-effort
-See [docs/runtime-output.md](docs/runtime-output.md) for the exact stable-vs-best-effort breakdown.
+See [docs/runtime-conract.md](docs/runtime-contract.md) for the exact stable-vs-best-effort breakdown.
## Distribution
diff --git a/docs/runtime-output.md b/docs/runtime-contract.md
similarity index 100%
rename from docs/runtime-output.md
rename to docs/runtime-contract.md
diff --git a/skills/SKILL.md b/skills/SKILL.md
deleted file mode 100644
index efbd188..0000000
--- a/skills/SKILL.md
+++ /dev/null
@@ -1,149 +0,0 @@
----
-name: deskctl
-description: Desktop control CLI for AI agents
-allowed-tools: Bash(deskctl:*)
----
-
-# deskctl
-
-Desktop control CLI for AI agents on Linux X11. Provides a unified interface for screenshots, mouse/keyboard input, and window management with compact `@wN` window references.
-
-## Core Workflow
-
-1. **Snapshot** to see the desktop and get window refs
-2. **Query / wait** using grouped `get` and `wait` commands
-3. **Act** using refs, explicit selectors, or coordinates
-4. **Repeat** as needed
-
-## Quick Reference
-
-### See the Desktop
-
-```bash
-deskctl snapshot # Screenshot + window tree with @wN refs
-deskctl snapshot --annotate # Screenshot with bounding boxes and labels
-deskctl snapshot --json # Structured JSON output
-deskctl list-windows # Window tree without screenshot
-deskctl screenshot /tmp/s.png # Screenshot only (no window tree)
-deskctl get active-window # Currently focused window
-deskctl get monitors # Monitor geometry
-deskctl get version # deskctl version + backend
-deskctl get systeminfo # Runtime-scoped diagnostics
-deskctl wait window --selector 'title=Firefox' --timeout 10
-deskctl wait focus --selector 'class=firefox' --timeout 5
-```
-
-### Click and Type
-
-```bash
-deskctl click @w1 # Click center of window @w1
-deskctl click 500,300 # Click absolute coordinates
-deskctl dblclick @w2 # Double-click window @w2
-deskctl type "hello world" # Type text into focused window
-deskctl press enter # Press a key
-deskctl hotkey ctrl c # Send Ctrl+C
-deskctl hotkey ctrl shift t # Send Ctrl+Shift+T
-```
-
-### Mouse Control
-
-```bash
-deskctl mouse move 500 300 # Move cursor to coordinates
-deskctl mouse scroll 3 # Scroll down 3 units
-deskctl mouse scroll -3 # Scroll up 3 units
-deskctl mouse drag 100 100 500 500 # Drag from (100,100) to (500,500)
-```
-
-### Window Management
-
-```bash
-deskctl focus @w2 # Focus window by ref
-deskctl focus 'title=Firefox' # Focus by explicit title selector
-deskctl focus 'class=firefox' # Focus by explicit class selector
-deskctl focus "firefox" # Fuzzy substring match (fails on ambiguity)
-deskctl close @w3 # Close window gracefully
-deskctl move-window @w1 100 200 # Move window to position
-deskctl resize-window @w1 800 600 # Resize window
-```
-
-### Utilities
-
-```bash
-deskctl doctor # Diagnose X11, screenshot, and daemon health
-deskctl get-screen-size # Screen resolution
-deskctl get-mouse-position # Current cursor position
-deskctl launch firefox # Launch an application
-deskctl launch code -- --new-window # Launch with arguments
-```
-
-### Daemon
-
-```bash
-deskctl daemon start # Start daemon manually
-deskctl daemon stop # Stop daemon
-deskctl daemon status # Check daemon status
-```
-
-## Global Options
-
-- `--json` : Output as structured JSON (all commands)
-- `--session NAME` : Session name for multiple daemon instances (default: "default")
-- `--socket PATH` : Custom Unix socket path
-
-## Output Contract
-
-- Prefer `--json` when an agent needs strict parsing.
-- Use `window_id` for stable targeting inside a live daemon session.
-- Use `ref_id` / `@wN` for quick short-lived follow-up actions after `snapshot` or `list-windows`.
-- Structured JSON failures expose machine-usable `kind` values for selector and wait failures.
-- The exact text formatting is intentionally compact but not the parsing contract. See `docs/runtime-output.md` for the stable field policy.
-
-## Window Refs
-
-After `snapshot` or `list-windows`, windows are assigned short refs:
-- `@w1` is the topmost (usually focused) window
-- `@w2`, `@w3`, etc. follow z-order (front to back)
-- Refs reset on each `snapshot` call
-- Use `--json` to see stable `window_id` values for programmatic tracking within the current daemon session
-
-## Selector Contract
-
-Prefer explicit selectors when an agent needs deterministic targeting:
-
-```bash
-ref=w1
-id=win1
-title=Firefox
-class=firefox
-focused
-```
-
-Bare selectors such as `firefox` still work as fuzzy substring matches, but they now fail with candidate windows if multiple matches exist.
-
-## Example Agent Workflow
-
-```bash
-# 1. See what's on screen
-deskctl snapshot --annotate
-
-# 2. Wait for the browser and focus it deterministically
-deskctl wait window --selector 'class=firefox' --timeout 10
-deskctl focus 'class=firefox'
-
-# 3. Navigate to a URL
-deskctl hotkey ctrl l
-deskctl type "https://example.com"
-deskctl press enter
-
-# 4. Take a new snapshot to see the result
-deskctl snapshot
-```
-
-## Key Names for press/hotkey
-
-Modifiers: `ctrl`, `alt`, `shift`, `super`
-Navigation: `enter`, `tab`, `escape`, `backspace`, `delete`, `space`
-Arrows: `up`, `down`, `left`, `right`
-Page: `home`, `end`, `pageup`, `pagedown`
-Function: `f1` through `f12`
-Characters: any single character (e.g. `a`, `1`, `/`)
diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md
new file mode 100644
index 0000000..1522703
--- /dev/null
+++ b/skills/deskctl/SKILL.md
@@ -0,0 +1,132 @@
+---
+name: deskctl
+description: Desktop control CLI for AI agents on Linux X11. Use when operating an X11 desktop in a sandbox, VM, or sandbox-agent session via screenshots, grouped get/wait commands, selectors, and mouse or keyboard input. Prefer this skill when the task is "control the desktop", "inspect windows", "wait for a window", "click/type in the sandbox desktop", or "use deskctl inside sandbox-agent".
+allowed-tools: Bash(deskctl:*), Bash(npx deskctl-cli:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*), Bash(sandbox-agent:*)
+---
+
+# deskctl
+
+`deskctl` is a non-interactive desktop control CLI for Linux X11 agents. It works well inside sandbox-agent desktop environments because it gives agents a tight `observe -> wait -> act -> verify` loop.
+
+## Install skill (optional)
+
+### npx
+
+```bash
+npx skills add harivansh-afk/deskctl -s deskctl
+```
+
+### bunx
+
+```bash
+bunx skills add harivansh-afk/deskctl -s deskctl
+```
+
+## Install the CLI
+
+Preferred install path:
+
+```bash
+npm install -g deskctl-cli
+deskctl --help
+```
+
+If global npm installs are not writable, use a user prefix:
+
+```bash
+mkdir -p "$HOME/.local/bin"
+npm install -g --prefix "$HOME/.local" deskctl-cli
+export PATH="$HOME/.local/bin:$PATH"
+deskctl --help
+```
+
+One-shot usage also works:
+
+```bash
+npx deskctl-cli --help
+```
+
+For install details and fallback paths, see [references/install.md](references/install.md).
+
+## Sandbox-Agent Notes
+
+Before using `deskctl` inside sandbox-agent:
+
+1. Make sure the sandbox has desktop runtime packages installed.
+2. Make sure the session is actually running X11.
+3. Run `deskctl doctor` before trying to click or type.
+
+Typical sandbox-agent prep:
+
+```bash
+sandbox-agent install desktop --yes
+deskctl doctor
+```
+
+If `doctor` fails, inspect `DISPLAY`, `XDG_SESSION_TYPE`, and whether the sandbox actually has a desktop session. See [references/sandbox-agent.md](references/sandbox-agent.md).
+
+## Core Workflow
+
+Every desktop task should follow this loop:
+
+1. **Observe**
+2. **Target**
+3. **Wait**
+4. **Act**
+5. **Verify**
+
+```bash
+deskctl doctor
+deskctl snapshot --annotate
+deskctl get active-window
+deskctl wait window --selector 'class=firefox' --timeout 10
+deskctl focus 'class=firefox'
+deskctl hotkey ctrl l
+deskctl type "https://example.com"
+deskctl press enter
+deskctl snapshot
+```
+
+## What To Reach For First
+
+- `deskctl doctor`
+- `deskctl snapshot --annotate`
+- `deskctl list-windows`
+- `deskctl get active-window`
+- `deskctl wait window --selector ...`
+- `deskctl wait focus --selector ...`
+
+Use `--json` when you need strict parsing. Use explicit selectors when you need deterministic targeting.
+
+## Selector Rules
+
+Prefer explicit selectors:
+
+```bash
+ref=w1
+id=win1
+title=Firefox
+class=firefox
+focused
+```
+
+Legacy refs still work:
+
+```bash
+@w1
+w1
+win1
+```
+
+Bare strings such as `firefox` are fuzzy substring selectors. They fail on ambiguity instead of silently picking the wrong window.
+
+## References
+
+- [references/install.md](references/install.md) - install paths, npm-first bootstrap, runtime prerequisites
+- [references/commands.md](references/commands.md) - grouped reads, waits, selectors, and core action commands
+- [references/sandbox-agent.md](references/sandbox-agent.md) - using `deskctl` inside sandbox-agent desktop sessions
+
+## Templates
+
+- [templates/install-deskctl-npm.sh](templates/install-deskctl-npm.sh) - install `deskctl-cli` into a user prefix
+- [templates/sandbox-agent-desktop-loop.sh](templates/sandbox-agent-desktop-loop.sh) - minimal observe/wait/act loop for desktop tasks
diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md
new file mode 100644
index 0000000..2d2dc1f
--- /dev/null
+++ b/skills/deskctl/references/commands.md
@@ -0,0 +1,75 @@
+# deskctl command guide
+
+## Observe
+
+```bash
+deskctl doctor
+deskctl snapshot
+deskctl snapshot --annotate
+deskctl list-windows
+deskctl screenshot /tmp/current.png
+deskctl get active-window
+deskctl get monitors
+deskctl get version
+deskctl get systeminfo
+```
+
+Use `snapshot --annotate` when you need both the screenshot artifact and the short `@wN` labels. Use `list-windows` when you only need the window tree and do not want screenshot side effects.
+
+## Wait
+
+```bash
+deskctl wait window --selector 'title=Firefox' --timeout 10
+deskctl wait focus --selector 'class=firefox' --timeout 5
+```
+
+Wait commands return the matched window payload on success. In `--json` mode, failures include structured `kind` values so the caller can recover without string parsing.
+
+## Selectors
+
+Prefer explicit selectors:
+
+```bash
+ref=w1
+id=win1
+title=Firefox
+class=firefox
+focused
+```
+
+Legacy refs still work:
+
+```bash
+@w1
+w1
+win1
+```
+
+Bare fuzzy selectors such as `firefox` are supported, but they fail on ambiguity.
+
+## Act
+
+```bash
+deskctl focus 'class=firefox'
+deskctl click @w1
+deskctl dblclick @w2
+deskctl type "hello world"
+deskctl press enter
+deskctl hotkey ctrl shift t
+deskctl mouse move 500 300
+deskctl mouse scroll 3
+deskctl mouse drag 100 100 500 500
+deskctl move-window @w1 100 120
+deskctl resize-window @w1 1280 720
+deskctl close @w3
+deskctl launch firefox
+```
+
+## Agent loop
+
+The safe pattern is:
+
+1. Observe with `snapshot`, `list-windows`, or `get ...`
+2. Wait for the target window if needed
+3. Act using explicit selectors or refs
+4. Snapshot again to verify the result
diff --git a/skills/deskctl/references/install.md b/skills/deskctl/references/install.md
new file mode 100644
index 0000000..cb97a5c
--- /dev/null
+++ b/skills/deskctl/references/install.md
@@ -0,0 +1,75 @@
+# Install `deskctl`
+
+`deskctl` is designed to be used non-interactively by agents. The easiest install path is the npm package because it installs the `deskctl` command directly from GitHub Release assets without needing Cargo on the target machine.
+
+## Preferred: npm global install
+
+```bash
+npm install -g deskctl-cli
+deskctl --help
+```
+
+This is the preferred path for sandboxes, VMs, and sandbox-agent sessions where Node/npm already exists.
+
+## User-prefix npm install
+
+If global npm installs are not writable:
+
+```bash
+mkdir -p "$HOME/.local/bin"
+npm install -g --prefix "$HOME/.local" deskctl-cli
+export PATH="$HOME/.local/bin:$PATH"
+deskctl --help
+```
+
+This avoids `sudo` and keeps the install inside the user home directory.
+
+## One-shot npm execution
+
+```bash
+npx deskctl-cli --help
+```
+
+Use this for quick testing. For repeated desktop control, install the command once so the runtime is predictable.
+
+## Fallback: Cargo
+
+```bash
+cargo install deskctl
+```
+
+Use this only when the machine already has a Rust toolchain or when you explicitly want a source build.
+
+## Fallback: local Docker build
+
+If you need a Linux binary from macOS or another non-Linux host:
+
+```bash
+docker compose -f docker/docker-compose.yml run --rm build
+```
+
+Then copy `dist/deskctl-linux-x86_64` into the target machine.
+
+## Runtime prerequisites
+
+`deskctl` needs:
+
+- Linux
+- X11
+- a valid `DISPLAY`
+- a working desktop/window-manager session
+
+Quick verification:
+
+```bash
+printenv DISPLAY
+printenv XDG_SESSION_TYPE
+deskctl doctor
+```
+
+Inside sandbox-agent, you may need to install desktop dependencies first:
+
+```bash
+sandbox-agent install desktop --yes
+deskctl doctor
+```
diff --git a/skills/deskctl/references/sandbox-agent.md b/skills/deskctl/references/sandbox-agent.md
new file mode 100644
index 0000000..d994062
--- /dev/null
+++ b/skills/deskctl/references/sandbox-agent.md
@@ -0,0 +1,61 @@
+# deskctl inside sandbox-agent
+
+Use `deskctl` when the sandbox-agent session includes a Linux desktop and you want a tight local desktop-control loop from the shell.
+
+## When it fits
+
+`deskctl` is a good fit when:
+
+- the sandbox already has an X11 desktop session
+- you want fast local desktop control from inside the sandbox
+- you want short-lived refs like `@w1` and grouped `get` or `wait` primitives
+
+It is not a replacement for sandbox-agent session orchestration itself. Use sandbox-agent to provision the sandbox and desktop runtime, then use `deskctl` inside that environment to control the GUI.
+
+## Minimal bootstrap
+
+```bash
+sandbox-agent install desktop --yes
+npm install -g deskctl-cli
+deskctl doctor
+deskctl snapshot --annotate
+```
+
+If npm global installs are not writable:
+
+```bash
+mkdir -p "$HOME/.local/bin"
+npm install -g --prefix "$HOME/.local" deskctl-cli
+export PATH="$HOME/.local/bin:$PATH"
+deskctl doctor
+```
+
+## Expected environment
+
+Check:
+
+```bash
+printenv DISPLAY
+printenv XDG_SESSION_TYPE
+deskctl --json get systeminfo
+```
+
+Healthy `deskctl` usage usually means:
+
+- `DISPLAY` is set
+- `XDG_SESSION_TYPE=x11`
+- `deskctl doctor` succeeds
+
+## Recommended workflow
+
+```bash
+deskctl snapshot --annotate
+deskctl wait window --selector 'class=firefox' --timeout 10
+deskctl focus 'class=firefox'
+deskctl hotkey ctrl l
+deskctl type "https://example.com"
+deskctl press enter
+deskctl snapshot
+```
+
+Prefer `--json` for strict machine parsing and explicit selectors for deterministic targeting.
diff --git a/skills/deskctl/templates/install-deskctl-npm.sh b/skills/deskctl/templates/install-deskctl-npm.sh
new file mode 100644
index 0000000..a0ab596
--- /dev/null
+++ b/skills/deskctl/templates/install-deskctl-npm.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+if command -v deskctl >/dev/null 2>&1; then
+ echo "deskctl already installed: $(command -v deskctl)"
+ exit 0
+fi
+
+if ! command -v npm >/dev/null 2>&1; then
+ echo "npm is required for the preferred deskctl install path"
+ exit 1
+fi
+
+prefix="${DESKCTL_NPM_PREFIX:-$HOME/.local}"
+bin_dir="$prefix/bin"
+
+mkdir -p "$bin_dir"
+npm install -g --prefix "$prefix" deskctl-cli
+
+if ! command -v deskctl >/dev/null 2>&1; then
+ echo "deskctl installed to $bin_dir"
+ echo "add this to PATH if needed:"
+ echo "export PATH=\"$bin_dir:\$PATH\""
+fi
+
+"$bin_dir/deskctl" --help >/dev/null 2>&1 || true
+echo "deskctl bootstrap complete"
diff --git a/skills/deskctl/templates/sandbox-agent-desktop-loop.sh b/skills/deskctl/templates/sandbox-agent-desktop-loop.sh
new file mode 100644
index 0000000..f47dbb8
--- /dev/null
+++ b/skills/deskctl/templates/sandbox-agent-desktop-loop.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+deskctl doctor
+deskctl snapshot --annotate
+deskctl get active-window
+deskctl wait window --selector "${1:-focused}" --timeout "${2:-5}"
From c37589ccf403106ebba3414ceeb9263c19c96e4f Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 00:30:05 -0400
Subject: [PATCH 04/37] skill validated with workflows
---
skills/deskctl/SKILL.md | 128 ++++--------------
skills/deskctl/references/commands.md | 64 ++++-----
skills/deskctl/references/install.md | 75 ----------
skills/deskctl/references/runtime-contract.md | 1 +
skills/deskctl/references/sandbox-agent.md | 61 ---------
.../deskctl/templates/install-deskctl-npm.sh | 27 ----
.../templates/sandbox-agent-desktop-loop.sh | 7 -
skills/deskctl/workflows/observe-act.sh | 37 +++++
skills/deskctl/workflows/poll-condition.sh | 42 ++++++
9 files changed, 134 insertions(+), 308 deletions(-)
delete mode 100644 skills/deskctl/references/install.md
create mode 120000 skills/deskctl/references/runtime-contract.md
delete mode 100644 skills/deskctl/references/sandbox-agent.md
delete mode 100644 skills/deskctl/templates/install-deskctl-npm.sh
delete mode 100644 skills/deskctl/templates/sandbox-agent-desktop-loop.sh
create mode 100755 skills/deskctl/workflows/observe-act.sh
create mode 100755 skills/deskctl/workflows/poll-condition.sh
diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md
index 1522703..81dea19 100644
--- a/skills/deskctl/SKILL.md
+++ b/skills/deskctl/SKILL.md
@@ -1,132 +1,54 @@
---
name: deskctl
-description: Desktop control CLI for AI agents on Linux X11. Use when operating an X11 desktop in a sandbox, VM, or sandbox-agent session via screenshots, grouped get/wait commands, selectors, and mouse or keyboard input. Prefer this skill when the task is "control the desktop", "inspect windows", "wait for a window", "click/type in the sandbox desktop", or "use deskctl inside sandbox-agent".
-allowed-tools: Bash(deskctl:*), Bash(npx deskctl-cli:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*), Bash(sandbox-agent:*)
+description: Non-interactive X11 desktop control for AI agents. Use when the task involves controlling a Linux desktop - clicking, typing, reading windows, waiting for UI state, or taking screenshots inside a sandbox or VM.
+allowed-tools: Bash(deskctl:*), Bash(npx deskctl-cli:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*)
---
# deskctl
-`deskctl` is a non-interactive desktop control CLI for Linux X11 agents. It works well inside sandbox-agent desktop environments because it gives agents a tight `observe -> wait -> act -> verify` loop.
+Non-interactive desktop control CLI for Linux X11 agents.
-## Install skill (optional)
+All output follows the runtime contract defined in [references/runtime-contract.md](references/runtime-contract.md). Every command returns a stable JSON envelope when called with `--json`. Use `--json` whenever you need to parse output programmatically.
-### npx
-
-```bash
-npx skills add harivansh-afk/deskctl -s deskctl
-```
-
-### bunx
-
-```bash
-bunx skills add harivansh-afk/deskctl -s deskctl
-```
-
-## Install the CLI
-
-Preferred install path:
+## Quick start
```bash
npm install -g deskctl-cli
-deskctl --help
-```
-
-If global npm installs are not writable, use a user prefix:
-
-```bash
-mkdir -p "$HOME/.local/bin"
-npm install -g --prefix "$HOME/.local" deskctl-cli
-export PATH="$HOME/.local/bin:$PATH"
-deskctl --help
-```
-
-One-shot usage also works:
-
-```bash
-npx deskctl-cli --help
-```
-
-For install details and fallback paths, see [references/install.md](references/install.md).
-
-## Sandbox-Agent Notes
-
-Before using `deskctl` inside sandbox-agent:
-
-1. Make sure the sandbox has desktop runtime packages installed.
-2. Make sure the session is actually running X11.
-3. Run `deskctl doctor` before trying to click or type.
-
-Typical sandbox-agent prep:
-
-```bash
-sandbox-agent install desktop --yes
-deskctl doctor
-```
-
-If `doctor` fails, inspect `DISPLAY`, `XDG_SESSION_TYPE`, and whether the sandbox actually has a desktop session. See [references/sandbox-agent.md](references/sandbox-agent.md).
-
-## Core Workflow
-
-Every desktop task should follow this loop:
-
-1. **Observe**
-2. **Target**
-3. **Wait**
-4. **Act**
-5. **Verify**
-
-```bash
deskctl doctor
deskctl snapshot --annotate
-deskctl get active-window
-deskctl wait window --selector 'class=firefox' --timeout 10
-deskctl focus 'class=firefox'
-deskctl hotkey ctrl l
-deskctl type "https://example.com"
-deskctl press enter
-deskctl snapshot
```
-## What To Reach For First
+## Agent loop
-- `deskctl doctor`
-- `deskctl snapshot --annotate`
-- `deskctl list-windows`
-- `deskctl get active-window`
-- `deskctl wait window --selector ...`
-- `deskctl wait focus --selector ...`
-
-Use `--json` when you need strict parsing. Use explicit selectors when you need deterministic targeting.
-
-## Selector Rules
-
-Prefer explicit selectors:
+Every desktop interaction follows: **observe -> wait -> act -> verify**.
```bash
-ref=w1
-id=win1
-title=Firefox
-class=firefox
-focused
+deskctl snapshot --annotate # observe
+deskctl wait window --selector 'title=Firefox' --timeout 10 # wait
+deskctl click 'title=Firefox' # act
+deskctl snapshot # verify
```
-Legacy refs still work:
+See [workflows/observe-act.sh](workflows/observe-act.sh) for a reusable script. See [workflows/poll-condition.sh](workflows/poll-condition.sh) for polling loops.
+
+## Selectors
```bash
-@w1
-w1
-win1
+ref=w1 # snapshot ref (short-lived)
+id=win1 # stable window ID (session-scoped)
+title=Firefox # match by title
+class=firefox # match by WM class
+focused # currently focused window
```
-Bare strings such as `firefox` are fuzzy substring selectors. They fail on ambiguity instead of silently picking the wrong window.
+Bare strings like `firefox` do fuzzy matching but fail on ambiguity. Prefer explicit selectors.
## References
-- [references/install.md](references/install.md) - install paths, npm-first bootstrap, runtime prerequisites
-- [references/commands.md](references/commands.md) - grouped reads, waits, selectors, and core action commands
-- [references/sandbox-agent.md](references/sandbox-agent.md) - using `deskctl` inside sandbox-agent desktop sessions
+- [references/runtime-contract.md](references/runtime-contract.md) - output contract, stable fields, error kinds
+- [references/commands.md](references/commands.md) - all available commands
-## Templates
+## Workflows
-- [templates/install-deskctl-npm.sh](templates/install-deskctl-npm.sh) - install `deskctl-cli` into a user prefix
-- [templates/sandbox-agent-desktop-loop.sh](templates/sandbox-agent-desktop-loop.sh) - minimal observe/wait/act loop for desktop tasks
+- [workflows/observe-act.sh](workflows/observe-act.sh) - main observe-act loop
+- [workflows/poll-condition.sh](workflows/poll-condition.sh) - poll for a condition on screen
diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md
index 2d2dc1f..d0e7c9f 100644
--- a/skills/deskctl/references/commands.md
+++ b/skills/deskctl/references/commands.md
@@ -1,21 +1,23 @@
-# deskctl command guide
+# deskctl commands
+
+All commands support `--json` for machine-parseable output following the runtime contract.
## Observe
```bash
-deskctl doctor
-deskctl snapshot
-deskctl snapshot --annotate
-deskctl list-windows
-deskctl screenshot /tmp/current.png
-deskctl get active-window
-deskctl get monitors
-deskctl get version
-deskctl get systeminfo
+deskctl doctor # check X11 runtime and daemon health
+deskctl snapshot # screenshot + window list
+deskctl snapshot --annotate # screenshot with @wN labels overlaid
+deskctl list-windows # window list only (no screenshot)
+deskctl screenshot /tmp/screen.png # screenshot to explicit path
+deskctl get active-window # focused window info
+deskctl get monitors # monitor geometry
+deskctl get version # version and backend
+deskctl get systeminfo # full runtime diagnostics
+deskctl get-screen-size # screen resolution
+deskctl get-mouse-position # cursor coordinates
```
-Use `snapshot --annotate` when you need both the screenshot artifact and the short `@wN` labels. Use `list-windows` when you only need the window tree and do not want screenshot side effects.
-
## Wait
```bash
@@ -23,29 +25,19 @@ deskctl wait window --selector 'title=Firefox' --timeout 10
deskctl wait focus --selector 'class=firefox' --timeout 5
```
-Wait commands return the matched window payload on success. In `--json` mode, failures include structured `kind` values so the caller can recover without string parsing.
+Returns the matched window payload on success. Failures include structured `kind` values in `--json` mode.
## Selectors
-Prefer explicit selectors:
-
```bash
-ref=w1
-id=win1
-title=Firefox
-class=firefox
-focused
+ref=w1 # snapshot ref (short-lived, from last snapshot)
+id=win1 # stable window ID (session-scoped)
+title=Firefox # match by window title
+class=firefox # match by WM class
+focused # currently focused window
```
-Legacy refs still work:
-
-```bash
-@w1
-w1
-win1
-```
-
-Bare fuzzy selectors such as `firefox` are supported, but they fail on ambiguity.
+Legacy shorthand: `@w1`, `w1`, `win1`. Bare strings do fuzzy matching but fail on ambiguity.
## Act
@@ -58,6 +50,7 @@ deskctl press enter
deskctl hotkey ctrl shift t
deskctl mouse move 500 300
deskctl mouse scroll 3
+deskctl mouse scroll 3 --axis horizontal
deskctl mouse drag 100 100 500 500
deskctl move-window @w1 100 120
deskctl resize-window @w1 1280 720
@@ -65,11 +58,12 @@ deskctl close @w3
deskctl launch firefox
```
-## Agent loop
+## Daemon
-The safe pattern is:
+```bash
+deskctl daemon start
+deskctl daemon stop
+deskctl daemon status
+```
-1. Observe with `snapshot`, `list-windows`, or `get ...`
-2. Wait for the target window if needed
-3. Act using explicit selectors or refs
-4. Snapshot again to verify the result
+The daemon starts automatically on first command. Manual control is rarely needed.
diff --git a/skills/deskctl/references/install.md b/skills/deskctl/references/install.md
deleted file mode 100644
index cb97a5c..0000000
--- a/skills/deskctl/references/install.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# Install `deskctl`
-
-`deskctl` is designed to be used non-interactively by agents. The easiest install path is the npm package because it installs the `deskctl` command directly from GitHub Release assets without needing Cargo on the target machine.
-
-## Preferred: npm global install
-
-```bash
-npm install -g deskctl-cli
-deskctl --help
-```
-
-This is the preferred path for sandboxes, VMs, and sandbox-agent sessions where Node/npm already exists.
-
-## User-prefix npm install
-
-If global npm installs are not writable:
-
-```bash
-mkdir -p "$HOME/.local/bin"
-npm install -g --prefix "$HOME/.local" deskctl-cli
-export PATH="$HOME/.local/bin:$PATH"
-deskctl --help
-```
-
-This avoids `sudo` and keeps the install inside the user home directory.
-
-## One-shot npm execution
-
-```bash
-npx deskctl-cli --help
-```
-
-Use this for quick testing. For repeated desktop control, install the command once so the runtime is predictable.
-
-## Fallback: Cargo
-
-```bash
-cargo install deskctl
-```
-
-Use this only when the machine already has a Rust toolchain or when you explicitly want a source build.
-
-## Fallback: local Docker build
-
-If you need a Linux binary from macOS or another non-Linux host:
-
-```bash
-docker compose -f docker/docker-compose.yml run --rm build
-```
-
-Then copy `dist/deskctl-linux-x86_64` into the target machine.
-
-## Runtime prerequisites
-
-`deskctl` needs:
-
-- Linux
-- X11
-- a valid `DISPLAY`
-- a working desktop/window-manager session
-
-Quick verification:
-
-```bash
-printenv DISPLAY
-printenv XDG_SESSION_TYPE
-deskctl doctor
-```
-
-Inside sandbox-agent, you may need to install desktop dependencies first:
-
-```bash
-sandbox-agent install desktop --yes
-deskctl doctor
-```
diff --git a/skills/deskctl/references/runtime-contract.md b/skills/deskctl/references/runtime-contract.md
new file mode 120000
index 0000000..8de0781
--- /dev/null
+++ b/skills/deskctl/references/runtime-contract.md
@@ -0,0 +1 @@
+../../../docs/runtime-contract.md
\ No newline at end of file
diff --git a/skills/deskctl/references/sandbox-agent.md b/skills/deskctl/references/sandbox-agent.md
deleted file mode 100644
index d994062..0000000
--- a/skills/deskctl/references/sandbox-agent.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# deskctl inside sandbox-agent
-
-Use `deskctl` when the sandbox-agent session includes a Linux desktop and you want a tight local desktop-control loop from the shell.
-
-## When it fits
-
-`deskctl` is a good fit when:
-
-- the sandbox already has an X11 desktop session
-- you want fast local desktop control from inside the sandbox
-- you want short-lived refs like `@w1` and grouped `get` or `wait` primitives
-
-It is not a replacement for sandbox-agent session orchestration itself. Use sandbox-agent to provision the sandbox and desktop runtime, then use `deskctl` inside that environment to control the GUI.
-
-## Minimal bootstrap
-
-```bash
-sandbox-agent install desktop --yes
-npm install -g deskctl-cli
-deskctl doctor
-deskctl snapshot --annotate
-```
-
-If npm global installs are not writable:
-
-```bash
-mkdir -p "$HOME/.local/bin"
-npm install -g --prefix "$HOME/.local" deskctl-cli
-export PATH="$HOME/.local/bin:$PATH"
-deskctl doctor
-```
-
-## Expected environment
-
-Check:
-
-```bash
-printenv DISPLAY
-printenv XDG_SESSION_TYPE
-deskctl --json get systeminfo
-```
-
-Healthy `deskctl` usage usually means:
-
-- `DISPLAY` is set
-- `XDG_SESSION_TYPE=x11`
-- `deskctl doctor` succeeds
-
-## Recommended workflow
-
-```bash
-deskctl snapshot --annotate
-deskctl wait window --selector 'class=firefox' --timeout 10
-deskctl focus 'class=firefox'
-deskctl hotkey ctrl l
-deskctl type "https://example.com"
-deskctl press enter
-deskctl snapshot
-```
-
-Prefer `--json` for strict machine parsing and explicit selectors for deterministic targeting.
diff --git a/skills/deskctl/templates/install-deskctl-npm.sh b/skills/deskctl/templates/install-deskctl-npm.sh
deleted file mode 100644
index a0ab596..0000000
--- a/skills/deskctl/templates/install-deskctl-npm.sh
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-if command -v deskctl >/dev/null 2>&1; then
- echo "deskctl already installed: $(command -v deskctl)"
- exit 0
-fi
-
-if ! command -v npm >/dev/null 2>&1; then
- echo "npm is required for the preferred deskctl install path"
- exit 1
-fi
-
-prefix="${DESKCTL_NPM_PREFIX:-$HOME/.local}"
-bin_dir="$prefix/bin"
-
-mkdir -p "$bin_dir"
-npm install -g --prefix "$prefix" deskctl-cli
-
-if ! command -v deskctl >/dev/null 2>&1; then
- echo "deskctl installed to $bin_dir"
- echo "add this to PATH if needed:"
- echo "export PATH=\"$bin_dir:\$PATH\""
-fi
-
-"$bin_dir/deskctl" --help >/dev/null 2>&1 || true
-echo "deskctl bootstrap complete"
diff --git a/skills/deskctl/templates/sandbox-agent-desktop-loop.sh b/skills/deskctl/templates/sandbox-agent-desktop-loop.sh
deleted file mode 100644
index f47dbb8..0000000
--- a/skills/deskctl/templates/sandbox-agent-desktop-loop.sh
+++ /dev/null
@@ -1,7 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-deskctl doctor
-deskctl snapshot --annotate
-deskctl get active-window
-deskctl wait window --selector "${1:-focused}" --timeout "${2:-5}"
diff --git a/skills/deskctl/workflows/observe-act.sh b/skills/deskctl/workflows/observe-act.sh
new file mode 100755
index 0000000..0e336ae
--- /dev/null
+++ b/skills/deskctl/workflows/observe-act.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+# observe-act.sh - main desktop interaction loop
+# usage: ./observe-act.sh [action] [action-args...]
+# example: ./observe-act.sh 'title=Firefox' click
+# example: ./observe-act.sh 'class=terminal' type "ls -la"
+set -euo pipefail
+
+SELECTOR="${1:?usage: observe-act.sh [action] [action-args...]}"
+ACTION="${2:-click}"
+shift 2 2>/dev/null || true
+
+# 1. observe - snapshot the desktop, get current state
+echo "--- observe ---"
+deskctl snapshot --annotate --json | head -1
+deskctl get active-window
+
+# 2. wait - ensure target exists
+echo "--- wait ---"
+deskctl wait window --selector "$SELECTOR" --timeout 10
+
+# 3. act - perform the action on the target
+echo "--- act ---"
+case "$ACTION" in
+ click) deskctl click "$SELECTOR" ;;
+ dblclick) deskctl dblclick "$SELECTOR" ;;
+ focus) deskctl focus "$SELECTOR" ;;
+ type) deskctl focus "$SELECTOR" && deskctl type "$@" ;;
+ press) deskctl focus "$SELECTOR" && deskctl press "$@" ;;
+ hotkey) deskctl focus "$SELECTOR" && deskctl hotkey "$@" ;;
+ close) deskctl close "$SELECTOR" ;;
+ *) echo "unknown action: $ACTION"; exit 1 ;;
+esac
+
+# 4. verify - snapshot again to confirm result
+echo "--- verify ---"
+sleep 0.5
+deskctl snapshot --json | head -1
diff --git a/skills/deskctl/workflows/poll-condition.sh b/skills/deskctl/workflows/poll-condition.sh
new file mode 100755
index 0000000..e173bf5
--- /dev/null
+++ b/skills/deskctl/workflows/poll-condition.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# poll-condition.sh - poll the desktop until a condition is met
+# usage: ./poll-condition.sh [interval-seconds] [max-attempts]
+# example: ./poll-condition.sh "Tickets Available" 5 60
+# example: ./poll-condition.sh "Order Confirmed" 3 20
+# example: ./poll-condition.sh "Download Complete" 10 30
+#
+# checks window titles for the match string every N seconds.
+# exits 0 when found, exits 1 after max attempts.
+set -euo pipefail
+
+MATCH="${1:?usage: poll-condition.sh [interval] [max-attempts]}"
+INTERVAL="${2:-5}"
+MAX="${3:-60}"
+
+attempt=0
+while [ "$attempt" -lt "$MAX" ]; do
+ attempt=$((attempt + 1))
+
+ # snapshot and check window titles
+ windows=$(deskctl list-windows --json 2>/dev/null || echo '{"success":false}')
+ if echo "$windows" | grep -qi "$MATCH"; then
+ echo "FOUND: '$MATCH' detected on attempt $attempt"
+ deskctl snapshot --annotate
+ exit 0
+ fi
+
+ # also check screenshot text via active window title
+ active=$(deskctl get active-window --json 2>/dev/null || echo '{}')
+ if echo "$active" | grep -qi "$MATCH"; then
+ echo "FOUND: '$MATCH' in active window on attempt $attempt"
+ deskctl snapshot --annotate
+ exit 0
+ fi
+
+ echo "attempt $attempt/$MAX - '$MATCH' not found, waiting ${INTERVAL}s..."
+ sleep "$INTERVAL"
+done
+
+echo "NOT FOUND: '$MATCH' after $MAX attempts"
+deskctl snapshot --annotate
+exit 1
From 14c89563211a8fec4b916bc4686ee1b4b86070d4 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 08:17:07 -0400
Subject: [PATCH 05/37] align docs and contract
---
README.md | 268 ++++----------------------
docs/runtime-contract.md | 168 +++-------------
site/src/pages/architecture.mdx | 104 ++++++----
site/src/pages/commands.mdx | 219 ++++++++-------------
site/src/pages/index.astro | 57 +++++-
site/src/pages/installation.mdx | 75 ++++---
site/src/pages/quick-start.mdx | 106 +++++-----
site/src/pages/runtime-contract.mdx | 177 +++++++++++++++++
site/src/styles/base.css | 21 ++
skills/deskctl/references/commands.md | 52 +++--
10 files changed, 590 insertions(+), 657 deletions(-)
create mode 100644 site/src/pages/runtime-contract.mdx
diff --git a/README.md b/README.md
index db7d92f..32144f0 100644
--- a/README.md
+++ b/README.md
@@ -1,266 +1,68 @@
# deskctl
-Desktop control CLI for AI agents on Linux X11.
+[](https://www.npmjs.com/package/deskctl-cli)
+[](https://github.com/harivansh-afk/deskctl/releases)
+[](#support-boundary)
+[](skills/deskctl)
+
+Non-interactive desktop control for AI agents on Linux X11.
## Install
-### Cargo
-
-```bash
-cargo install deskctl
-```
-
-Source builds on Linux require:
-
-- Rust 1.75+
-- `pkg-config`
-- X11 development libraries for input and windowing, typically `libx11-dev` and `libxtst-dev` on Debian/Ubuntu
-
-### npm
-
```bash
npm install -g deskctl-cli
-deskctl --help
+deskctl doctor
+deskctl snapshot --annotate
```
-One-shot execution is also supported:
+One-shot execution also works:
```bash
npx deskctl-cli --help
```
-`deskctl-cli` currently supports `linux-x64` and installs the `deskctl` command by downloading the matching GitHub Release asset.
+`deskctl-cli` installs the `deskctl` command by downloading the matching GitHub Release asset for the supported runtime target.
-### Installable skill
-
-For `skills.sh` / agent skill ecosystems:
+## Installable skill
```bash
npx skills add harivansh-afk/deskctl -s deskctl
```
-The installable skill lives under [`skills/deskctl`](skills/deskctl) and is designed for X11 sandboxes, VMs, and sandbox-agent desktop sessions. It points agents to the npm install path first so they can get `deskctl` without Cargo.
+The installable skill lives in [`skills/deskctl`](skills/deskctl) and is built around the same observe -> wait -> act -> verify loop as the CLI.
-### Nix
+## Quick example
+
+```bash
+deskctl doctor
+deskctl snapshot --annotate
+deskctl wait window --selector 'title=Firefox' --timeout 10
+deskctl focus 'title=Firefox'
+deskctl type "hello world"
+```
+
+## Docs
+
+- runtime contract: [docs/runtime-contract.md](docs/runtime-contract.md)
+- release flow: [docs/releasing.md](docs/releasing.md)
+- installable skill: [skills/deskctl](skills/deskctl)
+- contributor workflow: [CONTRIBUTING.md](CONTRIBUTING.md)
+
+## Other install paths
+
+Nix:
```bash
nix run github:harivansh-afk/deskctl -- --help
nix profile install github:harivansh-afk/deskctl
```
-The repo flake is the supported Nix install surface in this phase.
-
-### Docker Convenience
-
-Build a Linux binary locally with Docker:
-
-```bash
-docker compose -f docker/docker-compose.yml run --rm build
-```
-
-This writes `dist/deskctl-linux-x86_64`.
-
-Copy it to an SSH machine where `scp` is unavailable:
-
-```bash
-ssh -p 443 deskctl@ssh.agentcomputer.ai 'cat > ~/deskctl && chmod +x ~/deskctl' < dist/deskctl-linux-x86_64
-```
-
-Run it on an X11 session:
-
-```bash
-DISPLAY=:1 XDG_SESSION_TYPE=x11 ~/deskctl --json snapshot --annotate
-```
-
-### Local Source Build
+Source build:
```bash
cargo build
```
-## Quick Start
+## Support boundary
-```bash
-# Diagnose the environment first
-deskctl doctor
-
-# See the desktop
-deskctl snapshot
-
-# Query focused runtime state
-deskctl get active-window
-deskctl get monitors
-
-# Click a window
-deskctl click @w1
-
-# Type text
-deskctl type "hello world"
-
-# Wait for a window or focus transition
-deskctl wait window --selector 'title=Firefox' --timeout 10
-deskctl wait focus --selector 'class=firefox' --timeout 5
-
-# Focus by explicit selector
-deskctl focus 'title=Firefox'
-```
-
-## Architecture
-
-Client-daemon architecture over Unix sockets (NDJSON wire protocol).
-The daemon starts automatically on first command and keeps the X11 connection alive for fast repeated calls.
-
-Source layout:
-
-- `src/lib.rs` exposes the shared library target
-- `src/main.rs` is the thin CLI wrapper
-- `src/` contains production code and unit tests
-- `tests/` contains Linux/X11 integration tests
-- `tests/support/` contains shared integration helpers
-
-## Runtime Requirements
-
-- Linux with X11 session
-- Rust 1.75+ plus the source-build dependencies above when building from source
-
-The binary itself only links the standard glibc runtime on Linux (`libc`, `libm`, `libgcc_s`).
-
-For deskctl to be fully functional on a fresh VM you still need:
-
-- an X11 server and an active `DISPLAY`
-- `XDG_SESSION_TYPE=x11` or an equivalent X11 session environment
-- a window manager or desktop environment that exposes standard EWMH properties such as `_NET_CLIENT_LIST_STACKING` and `_NET_ACTIVE_WINDOW`
-- an X server with the extensions needed for input simulation and screen metadata, which is standard on normal desktop X11 setups
-
-If setup fails, run:
-
-```bash
-deskctl doctor
-```
-
-## Contract Notes
-
-- `@wN` refs are short-lived handles assigned by `snapshot` and `list-windows`
-- `--json` output includes a stable `window_id` for programmatic targeting within the current daemon session
-- `list-windows` is a cheap read-only operation and does not capture or write a screenshot
-- the stable runtime JSON/error contract is documented in [docs/runtime-contract.md](docs/runtime-contract.md)
-
-## Read and Wait Surface
-
-The grouped runtime reads are:
-
-```bash
-deskctl get active-window
-deskctl get monitors
-deskctl get version
-deskctl get systeminfo
-```
-
-The grouped runtime waits are:
-
-```bash
-deskctl wait window --selector 'title=Firefox' --timeout 10
-deskctl wait focus --selector 'id=win3' --timeout 5
-```
-
-Successful `get active-window`, `wait window`, and `wait focus` responses return a `window` payload with:
-- `ref_id`
-- `window_id`
-- `title`
-- `app_name`
-- geometry (`x`, `y`, `width`, `height`)
-- state flags (`focused`, `minimized`)
-
-`get monitors` returns:
-- `count`
-- `monitors[]` with geometry and primary/automatic flags
-
-`get version` returns:
-- `version`
-- `backend`
-
-`get systeminfo` stays runtime-scoped and returns:
-- `backend`
-- `display`
-- `session_type`
-- `session`
-- `socket_path`
-- `screen`
-- `monitor_count`
-- `monitors`
-
-Wait timeout and selector failures are structured in `--json` mode so agents can recover without string parsing.
-
-## Output Policy
-
-Text mode is compact and follow-up-oriented, but JSON is the parsing contract.
-
-- use `--json` when an agent needs strict parsing
-- rely on `window_id`, selector-related fields, grouped read payloads, and structured error `kind` values for stable automation
-- treat monitor naming, incidental whitespace, and default screenshot file names as best-effort
-
-See [docs/runtime-conract.md](docs/runtime-contract.md) for the exact stable-vs-best-effort breakdown.
-
-## Distribution
-
-- GitHub Releases are the canonical binary source
-- crates.io package: `deskctl`
-- npm package: `deskctl-cli`
-- installed command on every channel: `deskctl`
-- repo-owned Nix install path: `flake.nix`
-
-For maintainer publishing and release steps, see [docs/releasing.md](docs/releasing.md).
-
-## Selector Contract
-
-Explicit selector modes:
-
-```bash
-ref=w1
-id=win1
-title=Firefox
-class=firefox
-focused
-```
-
-Legacy refs remain supported:
-
-```bash
-@w1
-w1
-win1
-```
-
-Bare selectors such as `firefox` are still supported as fuzzy substring matches, but they now fail on ambiguity and return candidate windows instead of silently picking the first match.
-
-## Support Boundary
-
-`deskctl` supports Linux X11 in this phase. Wayland and Hyprland are explicitly out of scope for the current runtime contract.
-
-## Workflow
-
-Local validation uses the root `Makefile`:
-
-```bash
-make fmt-check
-make lint
-make test-unit
-make test-integration
-make site-format-check
-make validate
-```
-
-`make validate` is the full repo-quality check and requires Linux with `xvfb-run` plus `pnpm --dir site install`.
-
-The repository standardizes on `pre-commit` for fast commit-time checks:
-
-```bash
-pre-commit install
-pre-commit run --all-files
-```
-
-See [CONTRIBUTING.md](CONTRIBUTING.md) for the full contributor guide.
-
-## Acknowledgements
-
-- [@barrettruth](github.com/barrettruth) - i stole the website from [vimdoc](https://github.com/barrettruth/vimdoc-language-server)
+`deskctl` currently supports Linux X11. Use `--json` for stable machine parsing, use `window_id` for programmatic targeting inside a live session, and use `deskctl doctor` first when the runtime looks broken.
diff --git a/docs/runtime-contract.md b/docs/runtime-contract.md
index 7312357..0316c06 100644
--- a/docs/runtime-contract.md
+++ b/docs/runtime-contract.md
@@ -1,19 +1,6 @@
-# Runtime Output Contract
+# deskctl runtime contract
-This document defines the current output contract for `deskctl`.
-
-It is intentionally scoped to the current Linux X11 runtime surface.
-It does not promise stability for future Wayland or window-manager-specific features.
-
-## Goals
-
-- Keep `deskctl` fully non-interactive
-- Make text output actionable for quick terminal and agent loops
-- Make `--json` safe for agent consumption without depending on incidental formatting
-
-## JSON Envelope
-
-Every runtime command uses the same top-level JSON envelope:
+All commands support `--json` and use the same top-level envelope:
```json
{
@@ -23,22 +10,11 @@ Every runtime command uses the same top-level JSON envelope:
}
```
-Stable top-level fields:
+Use `--json` whenever you need to parse output programmatically.
-- `success`
-- `data`
-- `error`
+## Stable window fields
-`success` is always the authoritative success/failure bit.
-When `success` is `false`, the CLI exits non-zero in both text mode and `--json` mode.
-
-## Stable Fields
-
-These fields are stable for agent consumption in the current Phase 1 runtime contract.
-
-### Window Identity
-
-Whenever a runtime response includes a window payload, these fields are stable:
+Whenever a response includes a window payload, these fields are stable:
- `ref_id`
- `window_id`
@@ -51,128 +27,46 @@ Whenever a runtime response includes a window payload, these fields are stable:
- `focused`
- `minimized`
-`window_id` is the stable public identifier for a live daemon session.
-`ref_id` is a short-lived convenience handle for the current window snapshot/ref map.
+Use `window_id` for stable targeting inside a live daemon session. Use
+`ref_id` or `@wN` for short-lived follow-up actions after `snapshot` or
+`list-windows`.
-### Grouped Reads
+## Stable grouped reads
-`deskctl get active-window`
+- `deskctl get active-window` -> `data.window`
+- `deskctl get monitors` -> `data.count`, `data.monitors`
+- `deskctl get version` -> `data.version`, `data.backend`
+- `deskctl get systeminfo` -> runtime-scoped diagnostic fields such as
+ `backend`, `display`, `session_type`, `session`, `socket_path`, `screen`,
+ `monitor_count`, and `monitors`
-- stable: `data.window`
+## Stable waits
-`deskctl get monitors`
+- `deskctl wait window` -> `data.wait`, `data.selector`, `data.elapsed_ms`,
+ `data.window`
+- `deskctl wait focus` -> `data.wait`, `data.selector`, `data.elapsed_ms`,
+ `data.window`
-- stable: `data.count`
-- stable: `data.monitors`
-- stable per monitor:
- - `name`
- - `x`
- - `y`
- - `width`
- - `height`
- - `width_mm`
- - `height_mm`
- - `primary`
- - `automatic`
+## Stable structured error kinds
-`deskctl get version`
-
-- stable: `data.version`
-- stable: `data.backend`
-
-`deskctl get systeminfo`
-
-- stable: `data.backend`
-- stable: `data.display`
-- stable: `data.session_type`
-- stable: `data.session`
-- stable: `data.socket_path`
-- stable: `data.screen`
-- stable: `data.monitor_count`
-- stable: `data.monitors`
-
-### Waits
-
-`deskctl wait window`
-`deskctl wait focus`
-
-- stable: `data.wait`
-- stable: `data.selector`
-- stable: `data.elapsed_ms`
-- stable: `data.window`
-
-### Selector-Driven Action Success
-
-For selector-driven action commands that resolve a window target, these identifiers are stable when present:
-
-- `data.ref_id`
-- `data.window_id`
-- `data.title`
-- `data.selector`
-
-This applies to:
-
-- `click`
-- `dblclick`
-- `focus`
-- `close`
-- `move-window`
-- `resize-window`
-
-The exact human-readable text rendering of those commands is not part of the JSON contract.
-
-### Artifact-Producing Commands
-
-`snapshot`
-`screenshot`
-
-- stable: `data.screenshot`
-
-When the command also returns windows, `data.windows` uses the stable window payload documented above.
-
-## Stable Structured Error Kinds
-
-When a runtime command returns structured JSON failure data, these error kinds are stable:
+When a command fails with structured JSON data, these `kind` values are stable:
- `selector_not_found`
- `selector_ambiguous`
- `selector_invalid`
- `timeout`
- `not_found`
-- `window_not_focused` as `data.last_observation.kind` or equivalent observation payload
-Stable structured failure fields include:
+Wait failures may also include `window_not_focused` in the last observation
+payload.
-- `data.kind`
-- `data.selector` when selector-related
-- `data.mode` when selector-related
-- `data.candidates` for ambiguous selector failures
-- `data.message` for invalid selector failures
-- `data.wait`
-- `data.timeout_ms`
-- `data.poll_ms`
-- `data.last_observation`
+## Best-effort fields
-## Best-Effort Fields
+Treat these as useful but non-contractual:
-These values are useful but environment-dependent and should be treated as best-effort:
+- exact monitor names
+- incidental text formatting in non-JSON mode
+- default screenshot file names when no explicit path was provided
+- environment-dependent ordering details from the window manager
-- exact monitor naming conventions
-- EWMH/window-manager-dependent window ordering details
-- cosmetic text formatting in non-JSON mode
-- screenshot file names when the caller did not provide an explicit path
-- command stderr wording outside the structured `kind` classifications above
-
-## Text Mode Expectations
-
-Text mode is intended to stay compact and follow-up-useful.
-
-The exact whitespace/alignment of text output is not stable.
-The following expectations are stable at the behavioral level:
-
-- important runtime reads print actionable identifiers or geometry
-- selector failures print enough detail to recover without `--json`
-- artifact-producing commands print the artifact path
-- window listings print both `@wN` refs and `window_id` values
-
-If an agent needs strict parsing, it should use `--json`.
+For the full repo copy, see `docs/runtime-contract.md`.
diff --git a/site/src/pages/architecture.mdx b/site/src/pages/architecture.mdx
index 87b2b4e..9478246 100644
--- a/site/src/pages/architecture.mdx
+++ b/site/src/pages/architecture.mdx
@@ -6,73 +6,93 @@ toc: true
# Architecture
-## Client-daemon model
+## Public model
-deskctl uses a client-daemon architecture over Unix sockets. The daemon starts automatically on the first command and keeps the X11 connection alive so repeated calls skip the connection setup overhead.
+`deskctl` is a thin, non-interactive X11 control primitive for agent loops.
+The public flow is:
-Each command opens a new connection to the daemon, sends a single NDJSON request, reads one NDJSON response, and exits.
+- diagnose with `deskctl doctor`
+- observe with `snapshot`, `list-windows`, and grouped `get` commands
+- wait with grouped `wait` commands instead of shell `sleep`
+- act with explicit selectors or coordinates
+- verify with another read or snapshot
-## Wire protocol
+The tool stays intentionally narrow. It does not try to be a full desktop shell
+or a speculative Wayland abstraction.
+
+## Client-daemon architecture
+
+The CLI talks to an auto-managed daemon over a Unix socket. The daemon keeps
+the X11 connection alive so repeated commands stay fast and share the same
+session-scoped window identity map.
+
+Each CLI invocation sends one request, reads one response, and exits.
+
+## Runtime contract
Requests and responses are newline-delimited JSON (NDJSON) over a Unix socket.
-**Request:**
+All commands share the same JSON envelope:
```json
-{ "id": "r123456", "action": "snapshot", "annotate": true }
+{
+ "success": true,
+ "data": {},
+ "error": null
+}
```
-**Response:**
+For window payloads, the public identity is `window_id`, not an X11 handle.
+That keeps the contract backend-neutral even though the current support
+boundary is X11-only.
-```json
-{"success": true, "data": {"screenshot": "/tmp/deskctl-1234567890.png", "windows": [...]}}
-```
+The complete stable-vs-best-effort policy lives on the
+[runtime contract](/runtime-contract) page.
-Error responses include an `error` field:
+## Sessions and sockets
-```json
-{ "success": false, "error": "window not found: @w99" }
-```
+Each session gets its own socket path, PID file, and live window mapping.
-## Socket location
+Public socket resolution order:
-The daemon socket is resolved in this order:
-
-1. `--socket` flag (highest priority)
-2. `$DESKCTL_SOCKET_DIR/{session}.sock`
-3. `$XDG_RUNTIME_DIR/deskctl/{session}.sock`
+1. `--socket`
+2. `DESKCTL_SOCKET_DIR/{session}.sock`
+3. `XDG_RUNTIME_DIR/deskctl/{session}.sock`
4. `~/.deskctl/{session}.sock`
-PID files are stored alongside the socket.
+Most users should let `deskctl` manage this automatically. `--session` is the
+main public knob when you need isolated daemon instances.
-## Sessions
+## Diagnostics and failure handling
-Multiple isolated daemon instances can run simultaneously using the `--session` flag:
+`deskctl doctor` runs before daemon startup and checks:
-```sh
-deskctl --session workspace1 snapshot
-deskctl --session workspace2 snapshot
-```
+- display/session setup
+- X11 connectivity
+- basic window enumeration
+- screenshot viability
+- socket directory and stale-socket health
-Each session has its own socket, PID file, and window ref map.
+Selector and wait failures are structured in `--json` mode so clients can
+recover without scraping text.
-## Backend design
+## Backend notes
-The core is built around a `DesktopBackend` trait. The current implementation uses `x11rb` for X11 protocol operations and `enigo` for input simulation.
+The backend is built around a `DesktopBackend` trait and currently ships with
+an X11 implementation backed by `x11rb`.
-The trait-based design means adding Wayland support is a single trait implementation with no changes to the core, CLI, or daemon code.
+The important public guarantee is not "portable desktop automation." The
+important guarantee is "a correct and unsurprising Linux X11 runtime contract."
-## X11 integration
+## X11 support boundary
-Window detection uses EWMH properties:
+This phase supports Linux X11 only.
-| Property | Purpose |
-| --------------------------- | ------------------------ |
-| `_NET_CLIENT_LIST_STACKING` | Window stacking order |
-| `_NET_ACTIVE_WINDOW` | Currently focused window |
-| `_NET_WM_NAME` | Window title (UTF-8) |
-| `_NET_WM_STATE_HIDDEN` | Minimized state |
-| `_NET_CLOSE_WINDOW` | Graceful close |
-| `WM_CLASS` | Application class/name |
+That means:
-Falls back to `XQueryTree` if `_NET_CLIENT_LIST_STACKING` is unavailable.
+- EWMH/window-manager properties matter
+- monitor naming and some ordering details are best-effort
+- Wayland and Hyprland are out of scope for the current contract
+
+The runtime documents those boundaries explicitly instead of pretending the
+surface is broader than it is.
diff --git a/site/src/pages/commands.mdx b/site/src/pages/commands.mdx
index e1fc509..8a5132b 100644
--- a/site/src/pages/commands.mdx
+++ b/site/src/pages/commands.mdx
@@ -6,167 +6,101 @@ toc: true
# Commands
-## Snapshot
-
-Capture a screenshot and get the window tree:
+## Observe
```sh
+deskctl doctor
deskctl snapshot
deskctl snapshot --annotate
-```
-
-With `--annotate`, colored bounding boxes and `@wN` labels are drawn on the screenshot. Each window gets a unique color from an 8-color palette. Minimized windows are skipped.
-
-The screenshot is saved to `/tmp/deskctl-{timestamp}.png`.
-
-## Click
-
-Click the center of a window by ref, or click exact coordinates:
-
-```sh
-deskctl click @w1
-deskctl click 960,540
-```
-
-## Double click
-
-```sh
-deskctl dblclick @w1
-deskctl dblclick 500,300
-```
-
-## Type
-
-Type a string into the focused window:
-
-```sh
-deskctl type "hello world"
-```
-
-## Press
-
-Press a single key:
-
-```sh
-deskctl press enter
-deskctl press tab
-deskctl press escape
-```
-
-Supported key names: `enter`, `tab`, `escape`, `backspace`, `delete`, `space`, `up`, `down`, `left`, `right`, `home`, `end`, `pageup`, `pagedown`, `f1`-`f12`, or any single character.
-
-## Hotkey
-
-Send a key combination. List modifier keys first, then the target key:
-
-```sh
-deskctl hotkey ctrl c
-deskctl hotkey ctrl shift t
-deskctl hotkey alt f4
-```
-
-Modifier names: `ctrl`, `alt`, `shift`, `super` (also `meta` or `win`).
-
-## Mouse move
-
-Move the cursor to absolute coordinates:
-
-```sh
-deskctl mouse move 100 200
-```
-
-## Mouse scroll
-
-Scroll the mouse wheel. Positive values scroll down, negative scroll up:
-
-```sh
-deskctl mouse scroll 3
-deskctl mouse scroll -5
-deskctl mouse scroll 3 --axis horizontal
-```
-
-## Mouse drag
-
-Drag from one position to another:
-
-```sh
-deskctl mouse drag 100 200 500 600
-```
-
-## Focus
-
-Focus a window by ref or by name (case-insensitive substring match):
-
-```sh
-deskctl focus @w1
-deskctl focus "firefox"
-```
-
-## Close
-
-Close a window gracefully:
-
-```sh
-deskctl close @w2
-deskctl close "terminal"
-```
-
-## Move window
-
-Move a window to an absolute position:
-
-```sh
-deskctl move-window @w1 0 0
-deskctl move-window "firefox" 100 100
-```
-
-## Resize window
-
-Resize a window:
-
-```sh
-deskctl resize-window @w1 1280 720
-```
-
-## List windows
-
-List all windows without taking a screenshot:
-
-```sh
deskctl list-windows
-```
-
-## Get screen size
-
-```sh
+deskctl screenshot
+deskctl screenshot /tmp/screen.png
+deskctl get active-window
+deskctl get monitors
+deskctl get version
+deskctl get systeminfo
deskctl get-screen-size
-```
-
-## Get mouse position
-
-```sh
deskctl get-mouse-position
```
-## Screenshot
+`doctor` checks the runtime before daemon startup. `snapshot` produces a
+screenshot plus window refs. `list-windows` is the same window tree without the
+side effect of writing a screenshot.
-Take a screenshot without the window tree. Optionally specify a save path:
+## Wait
```sh
-deskctl screenshot
-deskctl screenshot /tmp/my-screenshot.png
-deskctl screenshot --annotate
+deskctl wait window --selector 'title=Firefox' --timeout 10
+deskctl wait focus --selector 'id=win3' --timeout 5
+deskctl --json wait window --selector 'class=firefox' --poll-ms 100
```
-## Launch
+Wait commands return the matched window payload on success. In `--json` mode,
+timeouts and selector failures expose structured `kind` values.
-Launch an application:
+## Act on a window
```sh
deskctl launch firefox
-deskctl launch code --args /path/to/project
+deskctl focus @w1
+deskctl focus 'title=Firefox'
+deskctl click @w1
+deskctl click 960,540
+deskctl dblclick @w2
+deskctl close @w3
+deskctl move-window @w1 100 120
+deskctl resize-window @w1 1280 720
```
+Selector-driven actions accept refs, explicit selector modes, or absolute
+coordinates where appropriate.
+
+## Input and mouse
+
+```sh
+deskctl type "hello world"
+deskctl press enter
+deskctl hotkey ctrl shift t
+deskctl mouse move 100 200
+deskctl mouse scroll 3
+deskctl mouse scroll 3 --axis horizontal
+deskctl mouse drag 100 200 500 600
+```
+
+Supported key names include `enter`, `tab`, `escape`, `backspace`, `delete`,
+`space`, arrow keys, paging keys, `f1` through `f12`, and any single
+character.
+
+## Launch
+
+```sh
+deskctl launch firefox
+deskctl launch code -- --new-window
+```
+
+## Selectors
+
+Prefer explicit selectors when the target matters:
+
+```sh
+ref=w1
+id=win1
+title=Firefox
+class=firefox
+focused
+```
+
+Legacy shorthand is still supported:
+
+```sh
+@w1
+w1
+win1
+```
+
+Bare strings like `firefox` are fuzzy matches. They resolve when there is one
+match and fail with candidate windows when there are multiple matches.
+
## Global options
| Flag | Env | Description |
@@ -174,3 +108,6 @@ deskctl launch code --args /path/to/project
| `--json` | | Output as JSON |
| `--socket ` | `DESKCTL_SOCKET` | Path to daemon Unix socket |
| `--session ` | | Session name for multiple daemons (default: `default`) |
+
+`deskctl` manages the daemon automatically. Most users never need to think
+about it beyond `--session` and `--socket`.
diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro
index 9327dc5..4263549 100644
--- a/site/src/pages/index.astro
+++ b/site/src/pages/index.astro
@@ -8,17 +8,49 @@ import DocLayout from "../layouts/DocLayout.astro";
-
- Desktop control CLI for AI agents on Linux X11. Compact JSON output for
- agent loops. Screenshot, click, type, scroll, drag, and manage windows
- through a fast client-daemon architecture. 100% native Rust.
+
non-interactive desktop control for AI agents
+
+
+
+
+ deskctl is a thin X11 control primitive for agent loops: diagnose
+ the runtime, observe the desktop, wait for state transitions, act deterministically,
+ then verify.
- Getting started
+ npm install -g deskctl-cli
+deskctl doctor
+deskctl snapshot --annotate
+
+ Start here
Reference
@@ -28,14 +60,27 @@ import DocLayout from "../layouts/DocLayout.astro";
Architecture
+ Agent skill
+
+
+ There is also an installable skill for `skills.sh`-style agent runtimes:
+
+
+ npx skills add harivansh-afk/deskctl -s deskctl
+
Links
diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx
index e05772d..985cf99 100644
--- a/site/src/pages/installation.mdx
+++ b/site/src/pages/installation.mdx
@@ -6,43 +6,68 @@ toc: true
# Installation
-## Cargo
+## Default install
```sh
-cargo install deskctl
+npm install -g deskctl-cli
+deskctl --help
```
-## From source
+`deskctl-cli` is the default install path. It installs the `deskctl` command by
+downloading the matching GitHub Release asset for the supported runtime target.
+
+## One-shot usage
+
+```sh
+npx deskctl-cli --help
+```
+
+## Agent skill
+
+For `skills.sh`-style runtimes:
+
+```sh
+npx skills add harivansh-afk/deskctl -s deskctl
+```
+
+The repo skill lives under `skills/deskctl` and is designed around the same
+observe -> wait -> act -> verify loop as the CLI.
+
+## Other install paths
+
+### Nix
+
+```sh
+nix run github:harivansh-afk/deskctl -- --help
+nix profile install github:harivansh-afk/deskctl
+```
+
+### Build from source
```sh
git clone https://github.com/harivansh-afk/deskctl
cd deskctl
-cargo build --release
+cargo build
```
-## Docker (cross-compile for Linux)
+Source builds on Linux require:
-Build a static Linux binary from any platform:
+- Rust 1.75+
+- `pkg-config`
+- X11 development libraries such as `libx11-dev` and `libxtst-dev`
-```sh
-docker compose -f docker/docker-compose.yml run --rm build
-```
-
-This writes `dist/deskctl-linux-x86_64`.
-
-## Deploy to a remote machine
-
-Copy the binary over SSH when `scp` is not available:
-
-```sh
-ssh -p 443 user@host 'cat > ~/deskctl && chmod +x ~/deskctl' < dist/deskctl-linux-x86_64
-```
-
-## Requirements
+## Runtime requirements
- Linux with an active X11 session
-- `DISPLAY` environment variable set (e.g. `DISPLAY=:1`)
-- `XDG_SESSION_TYPE=x11`
-- A window manager that exposes EWMH properties (`_NET_CLIENT_LIST_STACKING`, `_NET_ACTIVE_WINDOW`)
+- `DISPLAY` set to a usable X11 display, such as `DISPLAY=:1`
+- `XDG_SESSION_TYPE=x11` or an equivalent X11 session environment
+- a window manager or desktop environment that exposes standard EWMH properties
+ such as `_NET_CLIENT_LIST_STACKING` and `_NET_ACTIVE_WINDOW`
-No extra native libraries are needed beyond the standard glibc runtime (`libc`, `libm`, `libgcc_s`).
+The binary itself only depends on the standard Linux glibc runtime.
+
+If setup fails, run:
+
+```sh
+deskctl doctor
+```
diff --git a/site/src/pages/quick-start.mdx b/site/src/pages/quick-start.mdx
index 7f3bc07..c783b9e 100644
--- a/site/src/pages/quick-start.mdx
+++ b/site/src/pages/quick-start.mdx
@@ -6,50 +6,72 @@ toc: true
# Quick start
-## Core workflow
-
-The typical agent loop is: snapshot the desktop, interpret the result, act on it.
+## Install and diagnose
```sh
-# 1. see the desktop
-deskctl --json snapshot --annotate
+npm install -g deskctl-cli
+deskctl doctor
+```
-# 2. click a window by its ref
-deskctl click @w1
+Use `deskctl doctor` first. It checks X11 connectivity, basic enumeration,
+screenshot viability, and socket health before you start driving the desktop.
-# 3. type into the focused window
-deskctl type "hello world"
+## Observe
-# 4. press a key
+```sh
+deskctl snapshot --annotate
+deskctl list-windows
+deskctl get active-window
+deskctl get monitors
+```
+
+Use `snapshot` when you want a screenshot artifact plus window refs. Use
+`list-windows` when you only need the current window tree without writing a
+screenshot.
+
+## Target windows cleanly
+
+Prefer explicit selectors when you need deterministic targeting:
+
+```sh
+ref=w1
+id=win1
+title=Firefox
+class=firefox
+focused
+```
+
+Legacy refs such as `@w1` still work after `snapshot` or `list-windows`. Bare
+strings like `firefox` are fuzzy matches and now fail on ambiguity.
+
+## Wait, act, verify
+
+The core loop is:
+
+```sh
+# observe
+deskctl snapshot --annotate
+
+# wait
+deskctl wait window --selector 'title=Firefox' --timeout 10
+
+# act
+deskctl focus 'title=Firefox'
+deskctl hotkey ctrl l
+deskctl type "https://example.com"
deskctl press enter
+
+# verify
+deskctl wait focus --selector 'title=Firefox' --timeout 5
+deskctl snapshot
```
-The `--annotate` flag draws colored bounding boxes and `@wN` labels on the screenshot so agents can visually identify windows.
+The wait commands return the matched window payload on success, so they compose
+cleanly into the next action.
-## Window refs
+## Use `--json` when parsing matters
-Every `snapshot` assigns refs like `@w1`, `@w2`, etc. to each visible window, ordered top-to-bottom by stacking order. Use these refs anywhere a selector is expected:
-
-```sh
-deskctl click @w1
-deskctl focus @w3
-deskctl close @w2
-```
-
-You can also select windows by name (case-insensitive substring match):
-
-```sh
-deskctl focus "firefox"
-deskctl close "terminal"
-```
-
-## JSON output
-
-Pass `--json` for machine-readable output. This is the primary mode for agent integrations:
-
-```sh
-deskctl --json snapshot
-```
+Every command supports `--json` and uses the same top-level envelope:
```json
{
@@ -59,7 +81,7 @@ deskctl --json snapshot
"windows": [
{
"ref_id": "w1",
- "xcb_id": 12345678,
+ "window_id": "win1",
"title": "Firefox",
"app_name": "firefox",
"x": 0,
@@ -74,14 +96,8 @@ deskctl --json snapshot
}
```
-## Daemon lifecycle
+Use `window_id` for stable targeting inside a live daemon session. The exact
+text formatting is intentionally compact, but JSON is the parsing contract.
-The daemon starts automatically on the first command. It keeps the X11 connection alive so repeated calls are fast. You do not need to manage it manually.
-
-```sh
-# check if the daemon is running
-deskctl daemon status
-
-# stop it explicitly
-deskctl daemon stop
-```
+The full stable-vs-best-effort contract lives on the
+[runtime contract](/runtime-contract) page.
diff --git a/site/src/pages/runtime-contract.mdx b/site/src/pages/runtime-contract.mdx
new file mode 100644
index 0000000..4fca14c
--- /dev/null
+++ b/site/src/pages/runtime-contract.mdx
@@ -0,0 +1,177 @@
+---
+layout: ../layouts/DocLayout.astro
+title: Runtime contract
+toc: true
+---
+
+# Runtime contract
+
+This page defines the current public output contract for `deskctl`.
+
+It is intentionally scoped to the current Linux X11 runtime surface. It does
+not promise stability for future Wayland or window-manager-specific features.
+
+## JSON envelope
+
+Every command supports `--json` and uses the same top-level envelope:
+
+```json
+{
+ "success": true,
+ "data": {},
+ "error": null
+}
+```
+
+Stable top-level fields:
+
+- `success`
+- `data`
+- `error`
+
+If `success` is `false`, the command exits non-zero in both text mode and JSON
+mode.
+
+## Stable window fields
+
+Whenever a response includes a window payload, these fields are stable:
+
+- `ref_id`
+- `window_id`
+- `title`
+- `app_name`
+- `x`
+- `y`
+- `width`
+- `height`
+- `focused`
+- `minimized`
+
+`window_id` is the public session-scoped identifier for programmatic targeting.
+`ref_id` is a short-lived convenience handle from the current ref map.
+
+## Stable grouped reads
+
+`deskctl get active-window`
+
+- stable: `data.window`
+
+`deskctl get monitors`
+
+- stable: `data.count`
+- stable: `data.monitors`
+
+Stable per-monitor fields:
+
+- `name`
+- `x`
+- `y`
+- `width`
+- `height`
+- `width_mm`
+- `height_mm`
+- `primary`
+- `automatic`
+
+`deskctl get version`
+
+- stable: `data.version`
+- stable: `data.backend`
+
+`deskctl get systeminfo`
+
+- stable: `data.backend`
+- stable: `data.display`
+- stable: `data.session_type`
+- stable: `data.session`
+- stable: `data.socket_path`
+- stable: `data.screen`
+- stable: `data.monitor_count`
+- stable: `data.monitors`
+
+## Stable waits
+
+`deskctl wait window`
+`deskctl wait focus`
+
+- stable: `data.wait`
+- stable: `data.selector`
+- stable: `data.elapsed_ms`
+- stable: `data.window`
+
+## Stable selector-driven action fields
+
+When selector-driven actions return resolved window data, these fields are
+stable when present:
+
+- `data.ref_id`
+- `data.window_id`
+- `data.title`
+- `data.selector`
+
+This applies to:
+
+- `click`
+- `dblclick`
+- `focus`
+- `close`
+- `move-window`
+- `resize-window`
+
+## Stable artifact fields
+
+For `snapshot` and `screenshot`:
+
+- stable: `data.screenshot`
+
+When a command also returns windows, `data.windows` uses the stable window
+payload documented above.
+
+## Stable structured error kinds
+
+When a command fails with structured JSON data, these error kinds are stable:
+
+- `selector_not_found`
+- `selector_ambiguous`
+- `selector_invalid`
+- `timeout`
+- `not_found`
+- `window_not_focused` in `data.last_observation.kind` or an equivalent wait
+ observation payload
+
+Stable structured failure fields include:
+
+- `data.kind`
+- `data.selector`
+- `data.mode`
+- `data.candidates`
+- `data.message`
+- `data.wait`
+- `data.timeout_ms`
+- `data.poll_ms`
+- `data.last_observation`
+
+## Best-effort fields
+
+These values are useful but environment-dependent and should not be treated as
+strict parsing guarantees:
+
+- exact monitor naming conventions
+- EWMH/window-manager-dependent ordering details
+- cosmetic text formatting in non-JSON mode
+- default screenshot file names when no explicit path was provided
+- stderr wording outside the structured `kind` classifications above
+
+## Text mode expectations
+
+Text mode is intended to stay compact and follow-up-useful.
+
+The exact whitespace and alignment are not stable. The stable behavioral
+expectations are:
+
+- important reads print actionable identifiers or geometry
+- selector failures print enough detail to recover without `--json`
+- artifact-producing commands print the artifact path
+- window listings print both `@wN` refs and `window_id` values
+
+If you need strict parsing, use `--json`.
diff --git a/site/src/styles/base.css b/site/src/styles/base.css
index 86fd6a8..f60c0e6 100644
--- a/site/src/styles/base.css
+++ b/site/src/styles/base.css
@@ -65,6 +65,23 @@ main {
font-style: italic;
}
+.lede {
+ font-size: 1.05rem;
+ max-width: 42rem;
+}
+
+.badges {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 0.6rem;
+ margin-bottom: 1.25rem;
+}
+
+.badges a,
+.badges img {
+ display: block;
+}
+
header {
display: flex;
align-items: center;
@@ -117,6 +134,10 @@ a:hover {
text-decoration-thickness: 2px;
}
+img {
+ max-width: 100%;
+}
+
ul,
ol {
padding-left: 1.25em;
diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md
index d0e7c9f..77b9513 100644
--- a/skills/deskctl/references/commands.md
+++ b/skills/deskctl/references/commands.md
@@ -1,21 +1,22 @@
# deskctl commands
-All commands support `--json` for machine-parseable output following the runtime contract.
+All commands support `--json` for machine-parseable output following the
+runtime contract.
## Observe
```bash
-deskctl doctor # check X11 runtime and daemon health
-deskctl snapshot # screenshot + window list
-deskctl snapshot --annotate # screenshot with @wN labels overlaid
-deskctl list-windows # window list only (no screenshot)
-deskctl screenshot /tmp/screen.png # screenshot to explicit path
-deskctl get active-window # focused window info
-deskctl get monitors # monitor geometry
-deskctl get version # version and backend
-deskctl get systeminfo # full runtime diagnostics
-deskctl get-screen-size # screen resolution
-deskctl get-mouse-position # cursor coordinates
+deskctl doctor
+deskctl snapshot
+deskctl snapshot --annotate
+deskctl list-windows
+deskctl screenshot /tmp/screen.png
+deskctl get active-window
+deskctl get monitors
+deskctl get version
+deskctl get systeminfo
+deskctl get-screen-size
+deskctl get-mouse-position
```
## Wait
@@ -25,19 +26,21 @@ deskctl wait window --selector 'title=Firefox' --timeout 10
deskctl wait focus --selector 'class=firefox' --timeout 5
```
-Returns the matched window payload on success. Failures include structured `kind` values in `--json` mode.
+Returns the matched window payload on success. Failures include structured
+`kind` values in `--json` mode.
## Selectors
```bash
-ref=w1 # snapshot ref (short-lived, from last snapshot)
-id=win1 # stable window ID (session-scoped)
-title=Firefox # match by window title
-class=firefox # match by WM class
-focused # currently focused window
+ref=w1
+id=win1
+title=Firefox
+class=firefox
+focused
```
-Legacy shorthand: `@w1`, `w1`, `win1`. Bare strings do fuzzy matching but fail on ambiguity.
+Legacy shorthand: `@w1`, `w1`, `win1`. Bare strings do fuzzy matching but fail
+on ambiguity.
## Act
@@ -58,12 +61,5 @@ deskctl close @w3
deskctl launch firefox
```
-## Daemon
-
-```bash
-deskctl daemon start
-deskctl daemon stop
-deskctl daemon status
-```
-
-The daemon starts automatically on first command. Manual control is rarely needed.
+The daemon starts automatically on first command. In normal usage you should
+not need to manage it directly.
From 88f9ff85a3fa5b95028bb1e7811078416eaf43ae Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 08:18:57 -0400
Subject: [PATCH 06/37] clean
---
site/src/pages/index.astro | 25 -------------------------
site/src/styles/base.css | 12 ------------
2 files changed, 37 deletions(-)
diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro
index 4263549..b8bf92b 100644
--- a/site/src/pages/index.astro
+++ b/site/src/pages/index.astro
@@ -10,31 +10,6 @@ import DocLayout from "../layouts/DocLayout.astro";
non-interactive desktop control for AI agents
-
-
deskctl is a thin X11 control primitive for agent loops: diagnose
the runtime, observe the desktop, wait for state transitions, act deterministically,
diff --git a/site/src/styles/base.css b/site/src/styles/base.css
index f60c0e6..cd569a9 100644
--- a/site/src/styles/base.css
+++ b/site/src/styles/base.css
@@ -70,18 +70,6 @@ main {
max-width: 42rem;
}
-.badges {
- display: flex;
- flex-wrap: wrap;
- gap: 0.6rem;
- margin-bottom: 1.25rem;
-}
-
-.badges a,
-.badges img {
- display: block;
-}
-
header {
display: flex;
align-items: center;
From eac3a61ceb35002bf3957e6d0ebe4c2025ab1203 Mon Sep 17 00:00:00 2001
From: Hari <73809867+harivansh-afk@users.noreply.github.com>
Date: Thu, 26 Mar 2026 08:44:10 -0400
Subject: [PATCH 07/37] rename (#11)
* align docs and contract
* clean
* rename from deskctl-cli to deskctl
* runtime
---
.github/workflows/ci.yml | 4 ++--
.github/workflows/publish.yml | 12 ++++++------
.gitignore | 4 ++--
CONTRIBUTING.md | 2 +-
Makefile | 4 ++--
README.md | 9 +++++----
docs/releasing.md | 4 ++--
docs/runtime-contract.md | 2 --
npm/{deskctl-cli => deskctl}/README.md | 10 +++++-----
npm/{deskctl-cli => deskctl}/bin/deskctl.js | 2 +-
npm/{deskctl-cli => deskctl}/package.json | 4 ++--
npm/{deskctl-cli => deskctl}/scripts/postinstall.js | 2 +-
npm/{deskctl-cli => deskctl}/scripts/support.js | 2 +-
.../scripts/validate-package.js | 4 ++--
site/src/pages/index.astro | 11 +++--------
site/src/pages/installation.mdx | 6 +++---
site/src/pages/quick-start.mdx | 2 +-
skills/deskctl/SKILL.md | 4 ++--
18 files changed, 41 insertions(+), 47 deletions(-)
rename npm/{deskctl-cli => deskctl}/README.md (67%)
rename npm/{deskctl-cli => deskctl}/bin/deskctl.js (91%)
rename npm/{deskctl-cli => deskctl}/package.json (86%)
rename npm/{deskctl-cli => deskctl}/scripts/postinstall.js (94%)
rename npm/{deskctl-cli => deskctl}/scripts/support.js (97%)
rename npm/{deskctl-cli => deskctl}/scripts/validate-package.js (87%)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e95b27a..b7a4d6f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -189,7 +189,7 @@ jobs:
NEW="${{ needs.changes.outputs.version }}"
if [ "$CURRENT" != "$NEW" ]; then
sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml
- node -e 'const fs=require("node:fs"); const path="npm/deskctl-cli/package.json"; const pkg=JSON.parse(fs.readFileSync(path,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(path, JSON.stringify(pkg, null, 2)+"\n");' "$NEW"
+ node -e 'const fs=require("node:fs"); const path="npm/deskctl/package.json"; const pkg=JSON.parse(fs.readFileSync(path,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(path, JSON.stringify(pkg, null, 2)+"\n");' "$NEW"
cargo generate-lockfile
fi
@@ -199,7 +199,7 @@ jobs:
git config user.email "github-actions[bot]@users.noreply.github.com"
if ! git diff --quiet; then
- git add Cargo.toml Cargo.lock npm/deskctl-cli/package.json
+ git add Cargo.toml Cargo.lock npm/deskctl/package.json
git commit -m "release: ${{ needs.changes.outputs.tag }} [skip ci]"
fi
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 329f151..c4b1ecf 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -8,7 +8,7 @@ on:
required: true
type: string
publish_npm:
- description: Publish deskctl-cli to npm
+ description: Publish deskctl to npm
required: true
type: boolean
default: false
@@ -51,7 +51,7 @@ jobs:
TAG="${{ inputs.tag }}"
VERSION="${TAG#v}"
CARGO_VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
- NPM_VERSION=$(node -p 'require("./npm/deskctl-cli/package.json").version')
+ NPM_VERSION=$(node -p 'require("./npm/deskctl/package.json").version')
test "$VERSION" = "$CARGO_VERSION"
test "$VERSION" = "$NPM_VERSION"
@@ -62,7 +62,7 @@ jobs:
VERSION="${{ inputs.tag }}"
VERSION="${VERSION#v}"
- if npm view "deskctl-cli@${VERSION}" version >/dev/null 2>&1; then
+ if npm view "deskctl@${VERSION}" version >/dev/null 2>&1; then
echo "npm=true" >> "$GITHUB_OUTPUT"
else
echo "npm=false" >> "$GITHUB_OUTPUT"
@@ -77,8 +77,8 @@ jobs:
- name: Validate npm package
run: |
mkdir -p ./tmp/npm-pack
- node npm/deskctl-cli/scripts/validate-package.js
- npm pack ./npm/deskctl-cli --pack-destination ./tmp/npm-pack >/dev/null
+ node npm/deskctl/scripts/validate-package.js
+ npm pack ./npm/deskctl --pack-destination ./tmp/npm-pack >/dev/null
- name: Validate crate publish path
run: cargo publish --dry-run --locked
@@ -87,7 +87,7 @@ jobs:
if: inputs.publish_npm && steps.published.outputs.npm != 'true'
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
- run: npm publish ./npm/deskctl-cli --access public
+ run: npm publish ./npm/deskctl --access public
- name: Publish crates.io
if: inputs.publish_crates && steps.published.outputs.crates != 'true'
diff --git a/.gitignore b/.gitignore
index db552f7..40542a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,5 +5,5 @@ secret/
.claude/
.codex/
openspec/
-npm/deskctl-cli/vendor/
-npm/deskctl-cli/*.tgz
+npm/deskctl/vendor/
+npm/deskctl/*.tgz
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 926c58a..97e8c7c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -70,7 +70,7 @@ The hook config intentionally stays small:
Distribution support currently ships through:
- crate: `deskctl`
-- npm package: `deskctl-cli`
+- npm package: `deskctl`
- repo flake: `flake.nix`
- command name on every channel: `deskctl`
diff --git a/Makefile b/Makefile
index 97857e3..7e1f852 100644
--- a/Makefile
+++ b/Makefile
@@ -38,10 +38,10 @@ npm-package-check:
echo "npm is required for npm packaging validation."; \
exit 1; \
fi
- node npm/deskctl-cli/scripts/validate-package.js
+ node npm/deskctl/scripts/validate-package.js
rm -rf tmp/npm-pack tmp/npm-install
mkdir -p tmp/npm-pack tmp/npm-install/bin
- npm pack ./npm/deskctl-cli --pack-destination ./tmp/npm-pack >/dev/null
+ npm pack ./npm/deskctl --pack-destination ./tmp/npm-pack >/dev/null
@if [ "$$(uname -s)" != "Linux" ]; then \
echo "Skipping npm package runtime smoke test on non-Linux host."; \
else \
diff --git a/README.md b/README.md
index 32144f0..4b42b5f 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# deskctl
-[](https://www.npmjs.com/package/deskctl-cli)
+[](https://www.npmjs.com/package/deskctl)
[](https://github.com/harivansh-afk/deskctl/releases)
[](#support-boundary)
[](skills/deskctl)
@@ -10,7 +10,7 @@ Non-interactive desktop control for AI agents on Linux X11.
## Install
```bash
-npm install -g deskctl-cli
+npm install -g deskctl
deskctl doctor
deskctl snapshot --annotate
```
@@ -18,10 +18,11 @@ deskctl snapshot --annotate
One-shot execution also works:
```bash
-npx deskctl-cli --help
+npx deskctl --help
```
-`deskctl-cli` installs the `deskctl` command by downloading the matching GitHub Release asset for the supported runtime target.
+`deskctl` installs the command by downloading the matching GitHub Release asset for the supported runtime target.
+
## Installable skill
diff --git a/docs/releasing.md b/docs/releasing.md
index 7271b83..8f39d3f 100644
--- a/docs/releasing.md
+++ b/docs/releasing.md
@@ -12,14 +12,14 @@ GitHub Releases are the canonical binary source. The npm package consumes those
## Package Names
- crate: `deskctl`
-- npm package: `deskctl-cli`
+- npm package: `deskctl`
- installed command: `deskctl`
## Prerequisites
Before the first live publish on each registry:
-- npm ownership for `deskctl-cli`
+- npm ownership for `deskctl`
- crates.io ownership for `deskctl`
- repository secrets:
- `NPM_TOKEN`
diff --git a/docs/runtime-contract.md b/docs/runtime-contract.md
index 0316c06..ee4727b 100644
--- a/docs/runtime-contract.md
+++ b/docs/runtime-contract.md
@@ -68,5 +68,3 @@ Treat these as useful but non-contractual:
- incidental text formatting in non-JSON mode
- default screenshot file names when no explicit path was provided
- environment-dependent ordering details from the window manager
-
-For the full repo copy, see `docs/runtime-contract.md`.
diff --git a/npm/deskctl-cli/README.md b/npm/deskctl/README.md
similarity index 67%
rename from npm/deskctl-cli/README.md
rename to npm/deskctl/README.md
index fd6f610..7bb42a9 100644
--- a/npm/deskctl-cli/README.md
+++ b/npm/deskctl/README.md
@@ -1,11 +1,11 @@
-# deskctl-cli
+# deskctl
-`deskctl-cli` installs the `deskctl` command for Linux X11 systems.
+`deskctl` installs the command for Linux X11 systems.
## Install
```bash
-npm install -g deskctl-cli
+npm install -g deskctl
```
After install, run:
@@ -17,7 +17,7 @@ deskctl --help
One-shot usage is also supported:
```bash
-npx deskctl-cli --help
+npx deskctl --help
```
## Runtime Support
@@ -26,7 +26,7 @@ npx deskctl-cli --help
- X11 session
- currently packaged release asset: `linux-x64`
-`deskctl-cli` downloads the matching GitHub Release binary during install.
+`deskctl` downloads the matching GitHub Release binary during install.
Unsupported targets fail during install with a clear runtime support error instead of installing a broken command.
If you want the Rust source-install path instead, use:
diff --git a/npm/deskctl-cli/bin/deskctl.js b/npm/deskctl/bin/deskctl.js
similarity index 91%
rename from npm/deskctl-cli/bin/deskctl.js
rename to npm/deskctl/bin/deskctl.js
index 9f9b480..b8514cf 100644
--- a/npm/deskctl-cli/bin/deskctl.js
+++ b/npm/deskctl/bin/deskctl.js
@@ -17,7 +17,7 @@ function main() {
`Expected: ${binaryPath}`,
`Package version: ${pkg.version}`,
`Release tag: ${releaseTag(pkg)}`,
- "Try reinstalling deskctl-cli or check that your target is supported."
+ "Try reinstalling deskctl or check that your target is supported."
].join("\n")
);
process.exit(1);
diff --git a/npm/deskctl-cli/package.json b/npm/deskctl/package.json
similarity index 86%
rename from npm/deskctl-cli/package.json
rename to npm/deskctl/package.json
index 84f27ee..4dbaba6 100644
--- a/npm/deskctl-cli/package.json
+++ b/npm/deskctl/package.json
@@ -1,7 +1,7 @@
{
- "name": "deskctl-cli",
+ "name": "deskctl",
"version": "0.1.6",
- "description": "Installable deskctl CLI package for Linux X11 agents",
+ "description": "Installable deskctl package for Linux X11 agents",
"license": "MIT",
"homepage": "https://github.com/harivansh-afk/deskctl",
"repository": {
diff --git a/npm/deskctl-cli/scripts/postinstall.js b/npm/deskctl/scripts/postinstall.js
similarity index 94%
rename from npm/deskctl-cli/scripts/postinstall.js
rename to npm/deskctl/scripts/postinstall.js
index de1b1d0..1f43ad0 100644
--- a/npm/deskctl-cli/scripts/postinstall.js
+++ b/npm/deskctl/scripts/postinstall.js
@@ -44,6 +44,6 @@ async function main() {
}
main().catch((error) => {
- console.error(`deskctl-cli install failed: ${error.message}`);
+ console.error(`deskctl install failed: ${error.message}`);
process.exit(1);
});
diff --git a/npm/deskctl-cli/scripts/support.js b/npm/deskctl/scripts/support.js
similarity index 97%
rename from npm/deskctl-cli/scripts/support.js
rename to npm/deskctl/scripts/support.js
index 8d41520..1fd0d47 100644
--- a/npm/deskctl-cli/scripts/support.js
+++ b/npm/deskctl/scripts/support.js
@@ -26,7 +26,7 @@ function supportedTarget(platform = process.platform, arch = process.arch) {
}
throw new Error(
- `deskctl-cli currently supports linux-x64 only. Received ${platform}-${arch}.`
+ `deskctl currently supports linux-x64 only. Received ${platform}-${arch}.`
);
}
diff --git a/npm/deskctl-cli/scripts/validate-package.js b/npm/deskctl/scripts/validate-package.js
similarity index 87%
rename from npm/deskctl-cli/scripts/validate-package.js
rename to npm/deskctl/scripts/validate-package.js
index 46d3e87..450fd6c 100644
--- a/npm/deskctl-cli/scripts/validate-package.js
+++ b/npm/deskctl/scripts/validate-package.js
@@ -26,13 +26,13 @@ function main() {
}
if (pkg.bin?.deskctl !== "bin/deskctl.js") {
- throw new Error("deskctl-cli must expose the deskctl bin entrypoint.");
+ throw new Error("deskctl must expose the deskctl bin entrypoint.");
}
const target = supportedTarget("linux", "x64");
const targetPath = vendorBinaryPath(target);
const vendorDir = path.dirname(targetPath);
- if (!vendorDir.endsWith(path.join("deskctl-cli", "vendor"))) {
+ if (!vendorDir.endsWith(path.join("deskctl", "vendor"))) {
throw new Error("Vendor binary directory resolved unexpectedly.");
}
}
diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro
index b8bf92b..8b8d4b4 100644
--- a/site/src/pages/index.astro
+++ b/site/src/pages/index.astro
@@ -16,10 +16,6 @@ import DocLayout from "../layouts/DocLayout.astro";
then verify.
- npm install -g deskctl-cli
-deskctl doctor
-deskctl snapshot --annotate
-
Start here
@@ -33,6 +29,7 @@ deskctl snapshot --annotate
Agent skill
@@ -47,15 +44,13 @@ deskctl snapshot --annotate
diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx
index 985cf99..df53fcc 100644
--- a/site/src/pages/installation.mdx
+++ b/site/src/pages/installation.mdx
@@ -9,17 +9,17 @@ toc: true
## Default install
```sh
-npm install -g deskctl-cli
+npm install -g deskctl
deskctl --help
```
-`deskctl-cli` is the default install path. It installs the `deskctl` command by
+`deskctl` is the default install path. It installs the command by
downloading the matching GitHub Release asset for the supported runtime target.
## One-shot usage
```sh
-npx deskctl-cli --help
+npx deskctl --help
```
## Agent skill
diff --git a/site/src/pages/quick-start.mdx b/site/src/pages/quick-start.mdx
index c783b9e..10f3ec0 100644
--- a/site/src/pages/quick-start.mdx
+++ b/site/src/pages/quick-start.mdx
@@ -9,7 +9,7 @@ toc: true
## Install and diagnose
```sh
-npm install -g deskctl-cli
+npm install -g deskctl
deskctl doctor
```
diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md
index 81dea19..244a1fb 100644
--- a/skills/deskctl/SKILL.md
+++ b/skills/deskctl/SKILL.md
@@ -1,7 +1,7 @@
---
name: deskctl
description: Non-interactive X11 desktop control for AI agents. Use when the task involves controlling a Linux desktop - clicking, typing, reading windows, waiting for UI state, or taking screenshots inside a sandbox or VM.
-allowed-tools: Bash(deskctl:*), Bash(npx deskctl-cli:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*)
+allowed-tools: Bash(deskctl:*), Bash(npx deskctl:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*)
---
# deskctl
@@ -13,7 +13,7 @@ All output follows the runtime contract defined in [references/runtime-contract.
## Quick start
```bash
-npm install -g deskctl-cli
+npm install -g deskctl
deskctl doctor
deskctl snapshot --annotate
```
From 86c36a3b509aac8ea5869feb02df931fbcf7d752 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 08:53:50 -0400
Subject: [PATCH 08/37] release: v0.1.7 [skip ci]
---
Cargo.lock | 2 +-
Cargo.toml | 2 +-
npm/deskctl/package.json | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 71a9a54..6922004 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "deskctl"
-version = "0.1.6"
+version = "0.1.7"
dependencies = [
"ab_glyph",
"anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index b05507b..5872639 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "deskctl"
-version = "0.1.6"
+version = "0.1.7"
edition = "2021"
description = "X11 desktop control CLI for agents"
license = "MIT"
diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json
index 4dbaba6..6085bca 100644
--- a/npm/deskctl/package.json
+++ b/npm/deskctl/package.json
@@ -1,6 +1,6 @@
{
"name": "deskctl",
- "version": "0.1.6",
+ "version": "0.1.7",
"description": "Installable deskctl package for Linux X11 agents",
"license": "MIT",
"homepage": "https://github.com/harivansh-afk/deskctl",
From 1d72c7b852e4195f20b002a4aaf25e2b1a2b8e26 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:01:12 -0400
Subject: [PATCH 09/37] fix: add registry-url to setup-node for npm auth [skip
ci]
---
.github/workflows/publish.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index c4b1ecf..1f6b282 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -34,6 +34,7 @@ jobs:
- uses: actions/setup-node@v4
with:
node-version: 22
+ registry-url: https://registry.npmjs.org
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
From deaffff45a574b1701482ac04043f7af557e46f5 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:07:56 -0400
Subject: [PATCH 10/37] major/minor/patch
---
.github/workflows/publish.yml | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 1f6b282..31b3f4f 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -3,15 +3,19 @@ name: Publish Registries
on:
workflow_dispatch:
inputs:
- tag:
- description: Release tag to publish (for example v0.1.5)
+ bump:
+ description: Version bump type
required: true
- type: string
+ type: choice
+ options:
+ - patch
+ - minor
+ - major
publish_npm:
description: Publish deskctl to npm
required: true
type: boolean
- default: false
+ default: true
publish_crates:
description: Publish deskctl to crates.io
required: true
From 47047e90641bf5e4b90f31aeb1157cd9b054868e Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:11:13 -0400
Subject: [PATCH 11/37] migrate update manifest job to publish workflow
---
.github/workflows/ci.yml | 75 +++--------------------------------
.github/workflows/publish.yml | 54 +++++++++++++++++--------
2 files changed, 43 insertions(+), 86 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b7a4d6f..cb36e61 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,32 +52,13 @@ jobs:
echo "rust=${{ steps.filter.outputs.rust }}" >> "$GITHUB_OUTPUT"
fi
- - name: Calculate next version
+ - name: Read current version
id: version
if: github.event_name != 'pull_request' && steps.check.outputs.rust == 'true'
run: |
- BASE=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
- IFS='.' read -r MAJOR MINOR PATCH <<< "$BASE"
-
- LATEST=$(git tag -l "v${MAJOR}.${MINOR}.*" | sort -V | tail -1)
-
- if [ -z "$LATEST" ]; then
- NEW="$BASE"
- else
- LATEST_VER="${LATEST#v}"
- IFS='.' read -r _ _ LATEST_PATCH <<< "$LATEST_VER"
- NEW_PATCH=$((LATEST_PATCH + 1))
- NEW="${MAJOR}.${MINOR}.${NEW_PATCH}"
- fi
-
- # Ensure the computed version does not already have a tag
- while git rev-parse "v${NEW}" >/dev/null 2>&1; do
- IFS='.' read -r MAJOR MINOR PATCH <<< "$NEW"
- NEW="${MAJOR}.${MINOR}.$((PATCH + 1))"
- done
-
- echo "version=${NEW}" >> "$GITHUB_OUTPUT"
- echo "tag=v${NEW}" >> "$GITHUB_OUTPUT"
+ VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
+ echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
+ echo "tag=v${VERSION}" >> "$GITHUB_OUTPUT"
validate:
name: Validate
@@ -167,57 +148,13 @@ jobs:
- name: Distribution validation
run: make dist-validate
- update-manifests:
- name: Update Manifests
- needs: [changes, validate, integration, distribution]
- if: github.event_name != 'pull_request'
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - uses: dtolnay/rust-toolchain@stable
-
- - uses: actions/setup-node@v4
- with:
- node-version: 22
-
- - name: Update version in Cargo.toml
- run: |
- CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
- NEW="${{ needs.changes.outputs.version }}"
- if [ "$CURRENT" != "$NEW" ]; then
- sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml
- node -e 'const fs=require("node:fs"); const path="npm/deskctl/package.json"; const pkg=JSON.parse(fs.readFileSync(path,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(path, JSON.stringify(pkg, null, 2)+"\n");' "$NEW"
- cargo generate-lockfile
- fi
-
- - name: Commit, tag, and push
- run: |
- git config user.name "github-actions[bot]"
- git config user.email "github-actions[bot]@users.noreply.github.com"
-
- if ! git diff --quiet; then
- git add Cargo.toml Cargo.lock npm/deskctl/package.json
- git commit -m "release: ${{ needs.changes.outputs.tag }} [skip ci]"
- fi
-
- if ! git rev-parse "${{ needs.changes.outputs.tag }}" >/dev/null 2>&1; then
- git tag "${{ needs.changes.outputs.tag }}"
- fi
- git push origin main --tags
-
build:
name: Build Release Asset
- needs: [changes, update-manifests]
+ needs: [changes, validate, integration, distribution]
if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- with:
- ref: ${{ needs.changes.outputs.tag }}
- fetch-depth: 0
- uses: dtolnay/rust-toolchain@stable
with:
@@ -242,7 +179,7 @@ jobs:
release:
name: Release
- needs: [changes, build, update-manifests]
+ needs: [changes, build]
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 31b3f4f..60aed4d 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -28,10 +28,12 @@ permissions:
jobs:
publish:
runs-on: ubuntu-latest
+ permissions:
+ contents: write
steps:
- uses: actions/checkout@v4
with:
- ref: ${{ inputs.tag }}
+ fetch-depth: 0
- uses: dtolnay/rust-toolchain@stable
@@ -43,29 +45,46 @@ jobs:
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
- - name: Verify release exists and contains canonical assets
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ - name: Compute next version
+ id: version
run: |
- gh release view "${{ inputs.tag }}" --json assets --jq '.assets[].name' > /tmp/release-assets.txt
- grep -Fx "deskctl-linux-x86_64" /tmp/release-assets.txt >/dev/null
- grep -Fx "checksums.txt" /tmp/release-assets.txt >/dev/null
+ CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
+ IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
- - name: Verify versions align with tag
+ case "${{ inputs.bump }}" in
+ major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;;
+ minor) MINOR=$((MINOR + 1)); PATCH=0 ;;
+ patch) PATCH=$((PATCH + 1)) ;;
+ esac
+
+ NEW="${MAJOR}.${MINOR}.${PATCH}"
+ TAG="v${NEW}"
+
+ echo "version=${NEW}" >> "$GITHUB_OUTPUT"
+ echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
+ echo "Bumping ${CURRENT} -> ${NEW} (${TAG})"
+
+ - name: Bump versions
run: |
- TAG="${{ inputs.tag }}"
- VERSION="${TAG#v}"
- CARGO_VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
- NPM_VERSION=$(node -p 'require("./npm/deskctl/package.json").version')
+ NEW="${{ steps.version.outputs.version }}"
+ CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
+ sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml
+ node -e 'const fs=require("node:fs"); const p="npm/deskctl/package.json"; const pkg=JSON.parse(fs.readFileSync(p,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(p, JSON.stringify(pkg, null, 2)+"\n");' "$NEW"
+ cargo generate-lockfile
- test "$VERSION" = "$CARGO_VERSION"
- test "$VERSION" = "$NPM_VERSION"
+ - name: Commit, tag, and push
+ run: |
+ git config user.name "github-actions[bot]"
+ git config user.email "github-actions[bot]@users.noreply.github.com"
+ git add Cargo.toml Cargo.lock npm/deskctl/package.json
+ git commit -m "release: ${{ steps.version.outputs.tag }} [skip ci]"
+ git tag "${{ steps.version.outputs.tag }}"
+ git push origin main --tags
- name: Check current published state
id: published
run: |
- VERSION="${{ inputs.tag }}"
- VERSION="${VERSION#v}"
+ VERSION="${{ steps.version.outputs.version }}"
if npm view "deskctl@${VERSION}" version >/dev/null 2>&1; then
echo "npm=true" >> "$GITHUB_OUTPUT"
@@ -102,6 +121,7 @@ jobs:
- name: Summary
run: |
- echo "tag=${{ inputs.tag }}"
+ echo "tag=${{ steps.version.outputs.tag }}"
+ echo "bump=${{ inputs.bump }}"
echo "npm_already_published=${{ steps.published.outputs.npm }}"
echo "crates_already_published=${{ steps.published.outputs.crates }}"
From eedb5de2d478acebe6dbd75f17f716ccbb8f0d8c Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:13:10 -0400
Subject: [PATCH 12/37] refresh contributor cache [skip ci]
From 2a8b51b4f5249969c6adb2a28ea0ef9238b84667 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:21:43 -0400
Subject: [PATCH 13/37] docs: tighten skill install docs and bundle
Co-authored-by: Codex
---
README.md | 7 +-
site/src/pages/index.astro | 2 +-
site/src/pages/installation.mdx | 8 +-
skills/deskctl/agents/openai.yaml | 7 ++
skills/deskctl/references/runtime-contract.md | 74 ++++++++++++++++++-
5 files changed, 91 insertions(+), 7 deletions(-)
create mode 100644 skills/deskctl/agents/openai.yaml
mode change 120000 => 100644 skills/deskctl/references/runtime-contract.md
diff --git a/README.md b/README.md
index 4b42b5f..f2e746f 100644
--- a/README.md
+++ b/README.md
@@ -27,10 +27,13 @@ npx deskctl --help
## Installable skill
```bash
-npx skills add harivansh-afk/deskctl -s deskctl
+npx skills add harivansh-afk/deskctl --skill deskctl -g
```
-The installable skill lives in [`skills/deskctl`](skills/deskctl) and is built around the same observe -> wait -> act -> verify loop as the CLI.
+The installable skill lives in [`skills/deskctl`](skills/deskctl), follows the
+standard `skills/` repo layout, and installs directly from this GitHub repo via
+`npx skills add ...`. It is built around the same observe -> wait -> act ->
+verify loop as the CLI.
## Quick example
diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro
index 8b8d4b4..e97b599 100644
--- a/site/src/pages/index.astro
+++ b/site/src/pages/index.astro
@@ -38,7 +38,7 @@ import DocLayout from "../layouts/DocLayout.astro";
There is also an installable skill for `skills.sh`-style agent runtimes:
- npx skills add harivansh-afk/deskctl -s deskctl
+ npx skills add harivansh-afk/deskctl --skill deskctl -g
Links
diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx
index df53fcc..7754e6b 100644
--- a/site/src/pages/installation.mdx
+++ b/site/src/pages/installation.mdx
@@ -27,11 +27,13 @@ npx deskctl --help
For `skills.sh`-style runtimes:
```sh
-npx skills add harivansh-afk/deskctl -s deskctl
+npx skills add harivansh-afk/deskctl --skill deskctl -g
```
-The repo skill lives under `skills/deskctl` and is designed around the same
-observe -> wait -> act -> verify loop as the CLI.
+The repo skill lives under `skills/deskctl`, so `skills` can install it
+directly from this GitHub repo. It is designed around the same observe -> wait
+-> act -> verify loop as the CLI. `-g` installs it globally; omit that flag if
+you want a project-local install.
## Other install paths
diff --git a/skills/deskctl/agents/openai.yaml b/skills/deskctl/agents/openai.yaml
new file mode 100644
index 0000000..8a5ca13
--- /dev/null
+++ b/skills/deskctl/agents/openai.yaml
@@ -0,0 +1,7 @@
+interface:
+ display_name: "deskctl"
+ short_description: "Control Linux X11 desktops from agent loops"
+ default_prompt: "Use $deskctl to diagnose the desktop, observe state, wait for UI changes, act deterministically, and verify the result."
+
+policy:
+ allow_implicit_invocation: true
diff --git a/skills/deskctl/references/runtime-contract.md b/skills/deskctl/references/runtime-contract.md
deleted file mode 120000
index 8de0781..0000000
--- a/skills/deskctl/references/runtime-contract.md
+++ /dev/null
@@ -1 +0,0 @@
-../../../docs/runtime-contract.md
\ No newline at end of file
diff --git a/skills/deskctl/references/runtime-contract.md b/skills/deskctl/references/runtime-contract.md
new file mode 100644
index 0000000..6efd2bc
--- /dev/null
+++ b/skills/deskctl/references/runtime-contract.md
@@ -0,0 +1,73 @@
+# deskctl runtime contract
+
+This copy ships inside the installable skill so `npx skills add ...` installs a
+self-contained reference bundle.
+
+All commands support `--json` and use the same top-level envelope:
+
+```json
+{
+ "success": true,
+ "data": {},
+ "error": null
+}
+```
+
+Use `--json` whenever you need to parse output programmatically.
+
+## Stable window fields
+
+Whenever a response includes a window payload, these fields are stable:
+
+- `ref_id`
+- `window_id`
+- `title`
+- `app_name`
+- `x`
+- `y`
+- `width`
+- `height`
+- `focused`
+- `minimized`
+
+Use `window_id` for stable targeting inside a live daemon session. Use
+`ref_id` or `@wN` for short-lived follow-up actions after `snapshot` or
+`list-windows`.
+
+## Stable grouped reads
+
+- `deskctl get active-window` -> `data.window`
+- `deskctl get monitors` -> `data.count`, `data.monitors`
+- `deskctl get version` -> `data.version`, `data.backend`
+- `deskctl get systeminfo` -> runtime-scoped diagnostic fields such as
+ `backend`, `display`, `session_type`, `session`, `socket_path`, `screen`,
+ `monitor_count`, and `monitors`
+
+## Stable waits
+
+- `deskctl wait window` -> `data.wait`, `data.selector`, `data.elapsed_ms`,
+ `data.window`
+- `deskctl wait focus` -> `data.wait`, `data.selector`, `data.elapsed_ms`,
+ `data.window`
+
+## Stable structured error kinds
+
+When a command fails with structured JSON data, these `kind` values are stable:
+
+- `selector_not_found`
+- `selector_ambiguous`
+- `selector_invalid`
+- `timeout`
+- `not_found`
+
+Wait failures may also include `window_not_focused` in the last observation
+payload.
+
+## Best-effort fields
+
+Treat these as useful but non-contractual:
+
+- exact monitor names
+- incidental text formatting in non-JSON mode
+- default screenshot file names when no explicit path was provided
+- environment-dependent ordering details from the window manager
From c907e800af804ad44dd844e09f1a0c02d36316a6 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:21:58 -0400
Subject: [PATCH 14/37] change client bin name
---
src/cli/mod.rs | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/src/cli/mod.rs b/src/cli/mod.rs
index bab44c9..b24465a 100644
--- a/src/cli/mod.rs
+++ b/src/cli/mod.rs
@@ -7,7 +7,12 @@ use std::path::PathBuf;
use crate::core::protocol::{Request, Response};
#[derive(Parser)]
-#[command(name = "deskctl", version, about = "Desktop control CLI for AI agents")]
+#[command(
+ name = "deskctl",
+ bin_name = "deskctl",
+ version,
+ about = "Desktop control CLI for AI agents"
+)]
pub struct App {
#[command(flatten)]
pub global: GlobalOpts,
@@ -988,6 +993,12 @@ mod tests {
assert!(help.contains("deskctl snapshot --annotate"));
}
+ #[test]
+ fn root_help_uses_public_bin_name() {
+ let help = App::command().render_help().to_string();
+ assert!(help.contains("Usage: deskctl [OPTIONS] "));
+ }
+
#[test]
fn window_listing_text_includes_window_ids() {
let lines = render_success_lines(
From 3bfec9eecc890208d6f4f37b97a95534b2a982f5 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:27:43 -0400
Subject: [PATCH 15/37] edit docs
---
site/src/pages/index.astro | 13 -------------
site/src/pages/installation.mdx | 15 ---------------
2 files changed, 28 deletions(-)
diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro
index e97b599..b770178 100644
--- a/site/src/pages/index.astro
+++ b/site/src/pages/index.astro
@@ -21,7 +21,6 @@ import DocLayout from "../layouts/DocLayout.astro";
Reference
@@ -34,23 +33,11 @@ import DocLayout from "../layouts/DocLayout.astro";
Agent skill
-
- There is also an installable skill for `skills.sh`-style agent runtimes:
-
-
- npx skills add harivansh-afk/deskctl --skill deskctl -g
-
Links
diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx
index 7754e6b..ed4e737 100644
--- a/site/src/pages/installation.mdx
+++ b/site/src/pages/installation.mdx
@@ -10,26 +10,11 @@ toc: true
```sh
npm install -g deskctl
-deskctl --help
```
`deskctl` is the default install path. It installs the command by
downloading the matching GitHub Release asset for the supported runtime target.
-## One-shot usage
-
-```sh
-npx deskctl --help
-```
-
-## Agent skill
-
-For `skills.sh`-style runtimes:
-
-```sh
-npx skills add harivansh-afk/deskctl --skill deskctl -g
-```
-
The repo skill lives under `skills/deskctl`, so `skills` can install it
directly from this GitHub repo. It is designed around the same observe -> wait
-> act -> verify loop as the CLI. `-g` installs it globally; omit that flag if
From bf603671f95f28270e4ede426f03442c9203b328 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:28:18 -0400
Subject: [PATCH 16/37] rm:
---
site/src/pages/index.astro | 2 --
1 file changed, 2 deletions(-)
diff --git a/site/src/pages/index.astro b/site/src/pages/index.astro
index b770178..16a4b29 100644
--- a/site/src/pages/index.astro
+++ b/site/src/pages/index.astro
@@ -31,8 +31,6 @@ import DocLayout from "../layouts/DocLayout.astro";
Runtime contract
- Agent skill
-
Links
From 848ef97e87c321dffda0a6e4823c3ce8871569e1 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:34:10 -0400
Subject: [PATCH 17/37] edit readme
---
README.md | 45 +++++++--------------------------------------
1 file changed, 7 insertions(+), 38 deletions(-)
diff --git a/README.md b/README.md
index f2e746f..4bc24c8 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,9 @@
# deskctl
[](https://www.npmjs.com/package/deskctl)
-[](https://github.com/harivansh-afk/deskctl/releases)
-[](#support-boundary)
[](skills/deskctl)
-Non-interactive desktop control for AI agents on Linux X11.
+Desktop control cli for AI agents on Linux X11.
## Install
@@ -15,44 +13,19 @@ deskctl doctor
deskctl snapshot --annotate
```
-One-shot execution also works:
+## Skill
```bash
-npx deskctl --help
-```
-
-`deskctl` installs the command by downloading the matching GitHub Release asset for the supported runtime target.
-
-
-## Installable skill
-
-```bash
-npx skills add harivansh-afk/deskctl --skill deskctl -g
-```
-
-The installable skill lives in [`skills/deskctl`](skills/deskctl), follows the
-standard `skills/` repo layout, and installs directly from this GitHub repo via
-`npx skills add ...`. It is built around the same observe -> wait -> act ->
-verify loop as the CLI.
-
-## Quick example
-
-```bash
-deskctl doctor
-deskctl snapshot --annotate
-deskctl wait window --selector 'title=Firefox' --timeout 10
-deskctl focus 'title=Firefox'
-deskctl type "hello world"
+npx skills add harivansh-afk/deskctl
```
## Docs
- runtime contract: [docs/runtime-contract.md](docs/runtime-contract.md)
-- release flow: [docs/releasing.md](docs/releasing.md)
-- installable skill: [skills/deskctl](skills/deskctl)
-- contributor workflow: [CONTRIBUTING.md](CONTRIBUTING.md)
+- releasing: [docs/releasing.md](docs/releasing.md)
+- contributing: [CONTRIBUTING.md](CONTRIBUTING.md)
-## Other install paths
+## Install paths
Nix:
@@ -61,12 +34,8 @@ nix run github:harivansh-afk/deskctl -- --help
nix profile install github:harivansh-afk/deskctl
```
-Source build:
+Rust:
```bash
cargo build
```
-
-## Support boundary
-
-`deskctl` currently supports Linux X11. Use `--json` for stable machine parsing, use `window_id` for programmatic targeting inside a live session, and use `deskctl doctor` first when the runtime looks broken.
From 6c6f33040f8be0aec4855c5fd9eef33c6adef4c1 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 09:35:46 -0400
Subject: [PATCH 18/37] update readme
---
README.md | 3 +++
1 file changed, 3 insertions(+)
diff --git a/README.md b/README.md
index 4bc24c8..935f329 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,9 @@ Desktop control cli for AI agents on Linux X11.
```bash
npm install -g deskctl
+```
+
+```bash
deskctl doctor
deskctl snapshot --annotate
```
From 844f2f2bc6ddb989d1f29bea2725be3741737e53 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Thu, 26 Mar 2026 13:37:41 +0000
Subject: [PATCH 19/37] release: v0.1.8 [skip ci]
---
Cargo.lock | 2 +-
Cargo.toml | 2 +-
npm/deskctl/package.json | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 6922004..3fb1666 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "deskctl"
-version = "0.1.7"
+version = "0.1.8"
dependencies = [
"ab_glyph",
"anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index 5872639..fc7816c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "deskctl"
-version = "0.1.7"
+version = "0.1.8"
edition = "2021"
description = "X11 desktop control CLI for agents"
license = "MIT"
diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json
index 6085bca..45daefe 100644
--- a/npm/deskctl/package.json
+++ b/npm/deskctl/package.json
@@ -1,6 +1,6 @@
{
"name": "deskctl",
- "version": "0.1.7",
+ "version": "0.1.8",
"description": "Installable deskctl package for Linux X11 agents",
"license": "MIT",
"homepage": "https://github.com/harivansh-afk/deskctl",
From 2b02513d6ef22ba238e50fea98cc8133c94f2131 Mon Sep 17 00:00:00 2001
From: Hari <73809867+harivansh-afk@users.noreply.github.com>
Date: Thu, 26 Mar 2026 11:27:35 -0400
Subject: [PATCH 20/37] Improve docs structure and navigation (#12)
* Improve docs structure and navigation
Co-authored-by: Codex
* rm
* handwrite docs
---------
Co-authored-by: Codex
---
site/src/layouts/DocLayout.astro | 2 +-
site/src/pages/architecture.mdx | 98 -----------------------------
site/src/pages/commands.mdx | 24 ++++---
site/src/pages/index.astro | 29 ++++++---
site/src/pages/installation.mdx | 30 ++++++---
site/src/pages/quick-start.mdx | 14 +++--
site/src/pages/runtime-contract.mdx | 4 +-
site/src/styles/base.css | 10 +--
8 files changed, 69 insertions(+), 142 deletions(-)
delete mode 100644 site/src/pages/architecture.mdx
diff --git a/site/src/layouts/DocLayout.astro b/site/src/layouts/DocLayout.astro
index f2608de..afc8648 100644
--- a/site/src/layouts/DocLayout.astro
+++ b/site/src/layouts/DocLayout.astro
@@ -30,7 +30,7 @@ function formatTocText(text: string): string {
{
!isIndex && (
-
diff --git a/site/src/pages/installation.mdx b/site/src/pages/installation.mdx
index ed4e737..e35f4eb 100644
--- a/site/src/pages/installation.mdx
+++ b/site/src/pages/installation.mdx
@@ -6,19 +6,30 @@ toc: true
# Installation
-## Default install
+Install the public `deskctl` command first, then validate the desktop runtime
+with `deskctl doctor` before trying to automate anything.
+
+## Recommended path
```sh
npm install -g deskctl
+deskctl doctor
```
`deskctl` is the default install path. It installs the command by
downloading the matching GitHub Release asset for the supported runtime target.
-The repo skill lives under `skills/deskctl`, so `skills` can install it
-directly from this GitHub repo. It is designed around the same observe -> wait
--> act -> verify loop as the CLI. `-g` installs it globally; omit that flag if
-you want a project-local install.
+This path does not require a Rust toolchain. The installed command is always
+`deskctl`, even though the release asset itself is target-specific.
+
+## Skill install
+
+The repo skill lives under `skills/deskctl`, so you can install it
+directly uring `skills.sh`
+
+```sh
+npx skills add harivansh-afk/deskctl
+```
## Other install paths
@@ -29,7 +40,7 @@ nix run github:harivansh-afk/deskctl -- --help
nix profile install github:harivansh-afk/deskctl
```
-### Build from source
+### Rust
```sh
git clone https://github.com/harivansh-afk/deskctl
@@ -53,8 +64,13 @@ Source builds on Linux require:
The binary itself only depends on the standard Linux glibc runtime.
-If setup fails, run:
+## Verification
+
+If setup fails for any reason start here:
```sh
deskctl doctor
```
+
+`doctor` checks X11 connectivity, window enumeration, screenshot viability, and
+daemon/socket health before normal command execution.
diff --git a/site/src/pages/quick-start.mdx b/site/src/pages/quick-start.mdx
index 10f3ec0..7ecf5a7 100644
--- a/site/src/pages/quick-start.mdx
+++ b/site/src/pages/quick-start.mdx
@@ -6,17 +6,19 @@ toc: true
# Quick start
-## Install and diagnose
+The fastest way to use `deskctl` is to follow the same four-step loop : observe, wait, act, verify.
+
+## 1. Install and diagnose
```sh
npm install -g deskctl
deskctl doctor
```
-Use `deskctl doctor` first. It checks X11 connectivity, basic enumeration,
+Run `deskctl doctor` first. It checks X11 connectivity, basic enumeration,
screenshot viability, and socket health before you start driving the desktop.
-## Observe
+## 2. Observe the desktop
```sh
deskctl snapshot --annotate
@@ -29,7 +31,7 @@ Use `snapshot` when you want a screenshot artifact plus window refs. Use
`list-windows` when you only need the current window tree without writing a
screenshot.
-## Target windows cleanly
+## 3. Pick selectors that stay readable
Prefer explicit selectors when you need deterministic targeting:
@@ -44,7 +46,7 @@ focused
Legacy refs such as `@w1` still work after `snapshot` or `list-windows`. Bare
strings like `firefox` are fuzzy matches and now fail on ambiguity.
-## Wait, act, verify
+## 4. Wait, act, verify
The core loop is:
@@ -69,7 +71,7 @@ deskctl snapshot
The wait commands return the matched window payload on success, so they compose
cleanly into the next action.
-## Use `--json` when parsing matters
+## 5. Use `--json` when parsing matters
Every command supports `--json` and uses the same top-level envelope:
diff --git a/site/src/pages/runtime-contract.mdx b/site/src/pages/runtime-contract.mdx
index 4fca14c..e33e999 100644
--- a/site/src/pages/runtime-contract.mdx
+++ b/site/src/pages/runtime-contract.mdx
@@ -11,7 +11,7 @@ This page defines the current public output contract for `deskctl`.
It is intentionally scoped to the current Linux X11 runtime surface. It does
not promise stability for future Wayland or window-manager-specific features.
-## JSON envelope
+## Stable top-level envelope
Every command supports `--json` and uses the same top-level envelope:
@@ -32,7 +32,7 @@ Stable top-level fields:
If `success` is `false`, the command exits non-zero in both text mode and JSON
mode.
-## Stable window fields
+## Stable window payload
Whenever a response includes a window payload, these fields are stable:
diff --git a/site/src/styles/base.css b/site/src/styles/base.css
index cd569a9..e05552e 100644
--- a/site/src/styles/base.css
+++ b/site/src/styles/base.css
@@ -224,30 +224,30 @@ hr {
}
}
-nav {
+.breadcrumbs {
max-width: 50rem;
margin: 0 auto;
padding: 1.5rem clamp(1.25rem, 5vw, 3rem) 0;
font-size: 0.9rem;
}
-nav a {
+.breadcrumbs a {
color: inherit;
text-decoration: none;
opacity: 0.6;
transition: opacity 0.15s;
}
-nav a:hover {
+.breadcrumbs a:hover {
opacity: 1;
}
-nav .title {
+.breadcrumbs .title {
font-weight: 500;
opacity: 1;
}
-nav .sep {
+.breadcrumbs .sep {
opacity: 0.3;
margin: 0 0.5em;
}
From a64b46b479b45310adedf365888fffa458268bf3 Mon Sep 17 00:00:00 2001
From: Hari <73809867+harivansh-afk@users.noreply.github.com>
Date: Thu, 26 Mar 2026 11:53:15 -0400
Subject: [PATCH 21/37] deskctl upgrade (#13)
* deskctl upgrade
* interactive update
as well as --yes flag
---
npm/deskctl/README.md | 12 +
site/src/pages/commands.mdx | 5 +-
skills/deskctl/SKILL.md | 6 +
skills/deskctl/references/commands.md | 1 +
src/cli/mod.rs | 116 ++++++-
src/cli/upgrade.rs | 465 ++++++++++++++++++++++++++
6 files changed, 603 insertions(+), 2 deletions(-)
create mode 100644 src/cli/upgrade.rs
diff --git a/npm/deskctl/README.md b/npm/deskctl/README.md
index 7bb42a9..81f07f4 100644
--- a/npm/deskctl/README.md
+++ b/npm/deskctl/README.md
@@ -14,6 +14,18 @@ After install, run:
deskctl --help
```
+To upgrade version:
+
+```bash
+deskctl upgrade
+```
+
+For non-interactive use:
+
+```bash
+deskctl upgrade --yes
+```
+
One-shot usage is also supported:
```bash
diff --git a/site/src/pages/commands.mdx b/site/src/pages/commands.mdx
index dc9c578..934cdb8 100644
--- a/site/src/pages/commands.mdx
+++ b/site/src/pages/commands.mdx
@@ -13,6 +13,7 @@ reads, grouped waits, selector-driven actions, and a few input primitives.
```sh
deskctl doctor
+deskctl upgrade
deskctl snapshot
deskctl snapshot --annotate
deskctl list-windows
@@ -26,7 +27,9 @@ deskctl get-screen-size
deskctl get-mouse-position
```
-`doctor` checks the runtime before daemon startup. `snapshot` produces a
+`doctor` checks the runtime before daemon startup. `upgrade` checks for a newer
+published version, shows a short confirmation prompt when an update is
+available, and supports `--yes` for non-interactive use. `snapshot` produces a
screenshot plus window refs. `list-windows` is the same window tree without the
side effect of writing a screenshot. The grouped `get` commands are the
preferred read surface for focused state queries.
diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md
index 244a1fb..67a77c5 100644
--- a/skills/deskctl/SKILL.md
+++ b/skills/deskctl/SKILL.md
@@ -18,6 +18,12 @@ deskctl doctor
deskctl snapshot --annotate
```
+If `deskctl` was installed through npm, refresh it later with:
+
+```bash
+deskctl upgrade --yes
+```
+
## Agent loop
Every desktop interaction follows: **observe -> wait -> act -> verify**.
diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md
index 77b9513..27b4310 100644
--- a/skills/deskctl/references/commands.md
+++ b/skills/deskctl/references/commands.md
@@ -7,6 +7,7 @@ runtime contract.
```bash
deskctl doctor
+deskctl upgrade
deskctl snapshot
deskctl snapshot --annotate
deskctl list-windows
diff --git a/src/cli/mod.rs b/src/cli/mod.rs
index b24465a..28092d7 100644
--- a/src/cli/mod.rs
+++ b/src/cli/mod.rs
@@ -1,4 +1,5 @@
pub mod connection;
+pub mod upgrade;
use anyhow::Result;
use clap::{Args, Parser, Subcommand};
@@ -121,6 +122,9 @@ pub enum Command {
/// Diagnose X11 runtime, screenshot, and daemon health
#[command(after_help = DOCTOR_EXAMPLES)]
Doctor,
+ /// Upgrade deskctl using the current install channel
+ #[command(after_help = UPGRADE_EXAMPLES)]
+ Upgrade(UpgradeOpts),
/// Query runtime state
#[command(subcommand)]
Get(GetCmd),
@@ -231,6 +235,8 @@ const GET_SCREEN_SIZE_EXAMPLES: &str =
const GET_MOUSE_POSITION_EXAMPLES: &str =
"Examples:\n deskctl get-mouse-position\n deskctl --json get-mouse-position";
const DOCTOR_EXAMPLES: &str = "Examples:\n deskctl doctor\n deskctl --json doctor";
+const UPGRADE_EXAMPLES: &str =
+ "Examples:\n deskctl upgrade\n deskctl upgrade --yes\n deskctl --json upgrade --yes";
const WAIT_WINDOW_EXAMPLES: &str = "Examples:\n deskctl wait window --selector 'title=Firefox' --timeout 10\n deskctl --json wait window --selector 'class=firefox' --poll-ms 100";
const WAIT_FOCUS_EXAMPLES: &str = "Examples:\n deskctl wait focus --selector 'id=win3' --timeout 5\n deskctl wait focus --selector focused --poll-ms 200";
const SCREENSHOT_EXAMPLES: &str =
@@ -284,6 +290,13 @@ pub struct WaitSelectorOpts {
pub poll_ms: u64,
}
+#[derive(Args)]
+pub struct UpgradeOpts {
+ /// Skip confirmation and upgrade non-interactively
+ #[arg(long)]
+ pub yes: bool,
+}
+
pub fn run() -> Result<()> {
let app = App::parse();
@@ -300,6 +313,22 @@ pub fn run() -> Result<()> {
return connection::run_doctor(&app.global);
}
+ if let Command::Upgrade(ref upgrade_opts) = app.command {
+ let response = upgrade::run_upgrade(&app.global, upgrade_opts)?;
+ let success = response.success;
+
+ if app.global.json {
+ println!("{}", serde_json::to_string_pretty(&response)?);
+ if !success {
+ std::process::exit(1);
+ }
+ } else {
+ print_response(&app.command, &response)?;
+ }
+
+ return Ok(());
+ }
+
// All other commands need a daemon connection
let request = build_request(&app.command)?;
let response = connection::send_command(&app.global, &request)?;
@@ -363,6 +392,7 @@ fn build_request(cmd: &Command) -> Result {
Command::GetScreenSize => Request::new("get-screen-size"),
Command::GetMousePosition => Request::new("get-mouse-position"),
Command::Doctor => unreachable!(),
+ Command::Upgrade(_) => unreachable!(),
Command::Get(sub) => match sub {
GetCmd::ActiveWindow => Request::new("get-active-window"),
GetCmd::Monitors => Request::new("get-monitors"),
@@ -422,6 +452,7 @@ fn render_success_lines(cmd: &Command, data: Option<&serde_json::Value>) -> Resu
Command::Get(GetCmd::Systeminfo) => render_systeminfo_lines(data),
Command::GetScreenSize => vec![render_screen_size_line(data)],
Command::GetMousePosition => vec![render_mouse_position_line(data)],
+ Command::Upgrade(_) => render_upgrade_lines(data),
Command::Screenshot { annotate, .. } => render_screenshot_lines(data, *annotate),
Command::Click { .. } => vec![render_click_line(data, false)],
Command::Dblclick { .. } => vec![render_click_line(data, true)],
@@ -526,6 +557,41 @@ fn render_error_lines(response: &Response) -> Vec {
lines.push("No focused window is available.".to_string());
}
}
+ "upgrade_failed" => {
+ if let Some(reason) = data.get("io_error").and_then(|value| value.as_str()) {
+ lines.push(format!("Reason: {reason}"));
+ }
+ if let Some(reason) = data.get("reason").and_then(|value| value.as_str()) {
+ lines.push(format!("Reason: {reason}"));
+ }
+ if let Some(command) = data.get("command").and_then(|value| value.as_str()) {
+ lines.push(format!("Command: {command}"));
+ }
+ if let Some(hint) = data.get("hint").and_then(|value| value.as_str()) {
+ lines.push(format!("Hint: {hint}"));
+ }
+ }
+ "upgrade_unsupported" => {
+ if let Some(hint) = data.get("hint").and_then(|value| value.as_str()) {
+ lines.push(format!("Hint: {hint}"));
+ }
+ }
+ "upgrade_confirmation_required" => {
+ if let Some(current_version) =
+ data.get("current_version").and_then(|value| value.as_str())
+ {
+ if let Some(latest_version) =
+ data.get("latest_version").and_then(|value| value.as_str())
+ {
+ lines.push(format!(
+ "Update available: {current_version} -> {latest_version}"
+ ));
+ }
+ }
+ if let Some(hint) = data.get("hint").and_then(|value| value.as_str()) {
+ lines.push(format!("Hint: {hint}"));
+ }
+ }
_ => {}
}
@@ -723,6 +789,36 @@ fn render_screenshot_lines(data: &serde_json::Value, annotate: bool) -> Vec Vec {
+ match data.get("status").and_then(|value| value.as_str()) {
+ Some("up_to_date") => {
+ let version = data
+ .get("latest_version")
+ .and_then(|value| value.as_str())
+ .or_else(|| data.get("current_version").and_then(|value| value.as_str()))
+ .unwrap_or("unknown");
+ vec![format!(
+ "✔ You're already on the latest version! ({version})"
+ )]
+ }
+ Some("upgraded") => {
+ let current_version = data
+ .get("current_version")
+ .and_then(|value| value.as_str())
+ .unwrap_or("unknown");
+ let latest_version = data
+ .get("latest_version")
+ .and_then(|value| value.as_str())
+ .unwrap_or("unknown");
+ vec![format!(
+ "✔ Upgraded deskctl from {current_version} -> {latest_version}"
+ )]
+ }
+ Some("cancelled") => vec!["No changes made.".to_string()],
+ _ => vec!["Upgrade completed.".to_string()],
+ }
+}
+
fn render_click_line(data: &serde_json::Value, double: bool) -> String {
let action = if double { "Double-clicked" } else { "Clicked" };
let key = if double { "double_clicked" } else { "clicked" };
@@ -978,7 +1074,7 @@ fn truncate_display(value: &str, max_chars: usize) -> String {
mod tests {
use super::{
render_error_lines, render_screen_size_line, render_success_lines, target_summary,
- truncate_display, App, Command, Response,
+ truncate_display, App, Command, Response, UpgradeOpts,
};
use clap::CommandFactory;
use serde_json::json;
@@ -1104,4 +1200,22 @@ mod tests {
let input = format!("fire{}fox", '\u{00E9}');
assert_eq!(truncate_display(&input, 7), "fire...");
}
+
+ #[test]
+ fn upgrade_success_text_is_neat() {
+ let lines = render_success_lines(
+ &Command::Upgrade(UpgradeOpts { yes: false }),
+ Some(&json!({
+ "status": "up_to_date",
+ "current_version": "0.1.8",
+ "latest_version": "0.1.8"
+ })),
+ )
+ .unwrap();
+
+ assert_eq!(
+ lines,
+ vec!["✔ You're already on the latest version! (0.1.8)"]
+ );
+ }
}
diff --git a/src/cli/upgrade.rs b/src/cli/upgrade.rs
new file mode 100644
index 0000000..acc844e
--- /dev/null
+++ b/src/cli/upgrade.rs
@@ -0,0 +1,465 @@
+use std::io::{self, IsTerminal, Write};
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+use anyhow::{Context, Result};
+use serde_json::json;
+
+use crate::cli::{GlobalOpts, UpgradeOpts};
+use crate::core::protocol::Response;
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum InstallMethod {
+ Npm,
+ Cargo,
+ Nix,
+ Source,
+ Unknown,
+}
+
+impl InstallMethod {
+ fn as_str(self) -> &'static str {
+ match self {
+ Self::Npm => "npm",
+ Self::Cargo => "cargo",
+ Self::Nix => "nix",
+ Self::Source => "source",
+ Self::Unknown => "unknown",
+ }
+ }
+}
+
+#[derive(Debug)]
+struct UpgradePlan {
+ install_method: InstallMethod,
+ program: &'static str,
+ args: Vec<&'static str>,
+}
+
+impl UpgradePlan {
+ fn command_line(&self) -> String {
+ std::iter::once(self.program)
+ .chain(self.args.iter().copied())
+ .collect::>()
+ .join(" ")
+ }
+}
+
+#[derive(Debug)]
+struct VersionInfo {
+ current: String,
+ latest: String,
+}
+
+pub fn run_upgrade(opts: &GlobalOpts, upgrade_opts: &UpgradeOpts) -> Result {
+ let current_exe = std::env::current_exe().context("Failed to determine executable path")?;
+ let install_method = detect_install_method(¤t_exe);
+
+ let Some(plan) = upgrade_plan(install_method) else {
+ return Ok(Response::err_with_data(
+ format!(
+ "deskctl upgrade is not supported for {} installs.",
+ install_method.as_str()
+ ),
+ json!({
+ "kind": "upgrade_unsupported",
+ "install_method": install_method.as_str(),
+ "current_exe": current_exe.display().to_string(),
+ "hint": upgrade_hint(install_method),
+ }),
+ ));
+ };
+
+ if !opts.json {
+ println!("- Checking for updates...");
+ }
+
+ let versions = match resolve_versions(&plan) {
+ Ok(versions) => versions,
+ Err(response) => return Ok(response),
+ };
+
+ if versions.current == versions.latest {
+ return Ok(Response::ok(json!({
+ "action": "upgrade",
+ "status": "up_to_date",
+ "install_method": plan.install_method.as_str(),
+ "current_version": versions.current,
+ "latest_version": versions.latest,
+ })));
+ }
+
+ if !upgrade_opts.yes {
+ if opts.json || !io::stdin().is_terminal() {
+ return Ok(Response::err_with_data(
+ format!(
+ "Upgrade confirmation required for {} -> {}.",
+ versions.current, versions.latest
+ ),
+ json!({
+ "kind": "upgrade_confirmation_required",
+ "install_method": plan.install_method.as_str(),
+ "current_version": versions.current,
+ "latest_version": versions.latest,
+ "command": plan.command_line(),
+ "hint": "Re-run with --yes to upgrade non-interactively.",
+ }),
+ ));
+ }
+
+ if !confirm_upgrade(&versions)? {
+ return Ok(Response::ok(json!({
+ "action": "upgrade",
+ "status": "cancelled",
+ "install_method": plan.install_method.as_str(),
+ "current_version": versions.current,
+ "latest_version": versions.latest,
+ })));
+ }
+ }
+
+ if !opts.json {
+ println!(
+ "- Upgrading deskctl from {} -> {}...",
+ versions.current, versions.latest
+ );
+ }
+
+ let output = match Command::new(plan.program).args(&plan.args).output() {
+ Ok(output) => output,
+ Err(error) => return Ok(upgrade_spawn_error_response(&plan, &versions, &error)),
+ };
+
+ if output.status.success() {
+ return Ok(Response::ok(json!({
+ "action": "upgrade",
+ "status": "upgraded",
+ "install_method": plan.install_method.as_str(),
+ "current_version": versions.current,
+ "latest_version": versions.latest,
+ "command": plan.command_line(),
+ "exit_code": output.status.code(),
+ })));
+ }
+
+ Ok(upgrade_command_failed_response(&plan, &versions, &output))
+}
+
+fn resolve_versions(plan: &UpgradePlan) -> std::result::Result {
+ let current = env!("CARGO_PKG_VERSION").to_string();
+ let latest = match plan.install_method {
+ InstallMethod::Npm => query_npm_latest_version()?,
+ InstallMethod::Cargo => query_cargo_latest_version()?,
+ InstallMethod::Nix | InstallMethod::Source | InstallMethod::Unknown => {
+ return Err(Response::err_with_data(
+ "Could not determine the latest published version.".to_string(),
+ json!({
+ "kind": "upgrade_failed",
+ "install_method": plan.install_method.as_str(),
+ "reason": "Could not determine the latest published version for this install method.",
+ "command": plan.command_line(),
+ "hint": upgrade_hint(plan.install_method),
+ }),
+ ));
+ }
+ };
+
+ Ok(VersionInfo { current, latest })
+}
+
+fn query_npm_latest_version() -> std::result::Result {
+ let output = Command::new("npm")
+ .args(["view", "deskctl", "version", "--json"])
+ .output()
+ .map_err(|error| {
+ Response::err_with_data(
+ "Failed to check the latest npm version.".to_string(),
+ json!({
+ "kind": "upgrade_failed",
+ "install_method": InstallMethod::Npm.as_str(),
+ "reason": "Failed to run npm view deskctl version --json.",
+ "io_error": error.to_string(),
+ "command": "npm view deskctl version --json",
+ "hint": upgrade_hint(InstallMethod::Npm),
+ }),
+ )
+ })?;
+
+ if !output.status.success() {
+ return Err(Response::err_with_data(
+ "Failed to check the latest npm version.".to_string(),
+ json!({
+ "kind": "upgrade_failed",
+ "install_method": InstallMethod::Npm.as_str(),
+ "reason": command_failure_reason(&output),
+ "command": "npm view deskctl version --json",
+ "hint": upgrade_hint(InstallMethod::Npm),
+ }),
+ ));
+ }
+
+ serde_json::from_slice::(&output.stdout).map_err(|_| {
+ Response::err_with_data(
+ "Failed to parse the latest npm version.".to_string(),
+ json!({
+ "kind": "upgrade_failed",
+ "install_method": InstallMethod::Npm.as_str(),
+ "reason": "npm view returned an unexpected version payload.",
+ "command": "npm view deskctl version --json",
+ "hint": upgrade_hint(InstallMethod::Npm),
+ }),
+ )
+ })
+}
+
+fn query_cargo_latest_version() -> std::result::Result {
+ let output = Command::new("cargo")
+ .args(["search", "deskctl", "--limit", "1"])
+ .output()
+ .map_err(|error| {
+ Response::err_with_data(
+ "Failed to check the latest crates.io version.".to_string(),
+ json!({
+ "kind": "upgrade_failed",
+ "install_method": InstallMethod::Cargo.as_str(),
+ "reason": "Failed to run cargo search deskctl --limit 1.",
+ "io_error": error.to_string(),
+ "command": "cargo search deskctl --limit 1",
+ "hint": upgrade_hint(InstallMethod::Cargo),
+ }),
+ )
+ })?;
+
+ if !output.status.success() {
+ return Err(Response::err_with_data(
+ "Failed to check the latest crates.io version.".to_string(),
+ json!({
+ "kind": "upgrade_failed",
+ "install_method": InstallMethod::Cargo.as_str(),
+ "reason": command_failure_reason(&output),
+ "command": "cargo search deskctl --limit 1",
+ "hint": upgrade_hint(InstallMethod::Cargo),
+ }),
+ ));
+ }
+
+ let stdout = String::from_utf8_lossy(&output.stdout);
+ let latest = stdout
+ .split('"')
+ .nth(1)
+ .map(str::to_string)
+ .filter(|value| !value.is_empty());
+
+ latest.ok_or_else(|| {
+ Response::err_with_data(
+ "Failed to determine the latest crates.io version.".to_string(),
+ json!({
+ "kind": "upgrade_failed",
+ "install_method": InstallMethod::Cargo.as_str(),
+ "reason": "cargo search did not return a published deskctl crate version.",
+ "command": "cargo search deskctl --limit 1",
+ "hint": upgrade_hint(InstallMethod::Cargo),
+ }),
+ )
+ })
+}
+
+fn confirm_upgrade(versions: &VersionInfo) -> Result {
+ print!(
+ "Upgrade deskctl from {} -> {}? [y/N] ",
+ versions.current, versions.latest
+ );
+ io::stdout().flush()?;
+
+ let mut input = String::new();
+ io::stdin().read_line(&mut input)?;
+
+ let trimmed = input.trim();
+ Ok(matches!(trimmed, "y" | "Y" | "yes" | "YES" | "Yes"))
+}
+
+fn upgrade_command_failed_response(
+ plan: &UpgradePlan,
+ versions: &VersionInfo,
+ output: &std::process::Output,
+) -> Response {
+ Response::err_with_data(
+ format!("Upgrade command failed: {}", plan.command_line()),
+ json!({
+ "kind": "upgrade_failed",
+ "install_method": plan.install_method.as_str(),
+ "current_version": versions.current,
+ "latest_version": versions.latest,
+ "command": plan.command_line(),
+ "exit_code": output.status.code(),
+ "reason": command_failure_reason(output),
+ "hint": upgrade_hint(plan.install_method),
+ }),
+ )
+}
+
+fn upgrade_spawn_error_response(
+ plan: &UpgradePlan,
+ versions: &VersionInfo,
+ error: &std::io::Error,
+) -> Response {
+ Response::err_with_data(
+ format!("Failed to run {}", plan.command_line()),
+ json!({
+ "kind": "upgrade_failed",
+ "install_method": plan.install_method.as_str(),
+ "current_version": versions.current,
+ "latest_version": versions.latest,
+ "command": plan.command_line(),
+ "io_error": error.to_string(),
+ "hint": upgrade_hint(plan.install_method),
+ }),
+ )
+}
+
+fn command_failure_reason(output: &std::process::Output) -> String {
+ let stderr = String::from_utf8_lossy(&output.stderr);
+ let stdout = String::from_utf8_lossy(&output.stdout);
+
+ stderr
+ .lines()
+ .chain(stdout.lines())
+ .map(str::trim)
+ .find(|line| !line.is_empty())
+ .map(str::to_string)
+ .unwrap_or_else(|| {
+ output
+ .status
+ .code()
+ .map(|code| format!("Command exited with status {code}."))
+ .unwrap_or_else(|| "Command exited unsuccessfully.".to_string())
+ })
+}
+
+fn upgrade_plan(install_method: InstallMethod) -> Option {
+ match install_method {
+ InstallMethod::Npm => Some(UpgradePlan {
+ install_method,
+ program: "npm",
+ args: vec!["install", "-g", "deskctl@latest"],
+ }),
+ InstallMethod::Cargo => Some(UpgradePlan {
+ install_method,
+ program: "cargo",
+ args: vec!["install", "deskctl", "--locked"],
+ }),
+ InstallMethod::Nix | InstallMethod::Source | InstallMethod::Unknown => None,
+ }
+}
+
+fn upgrade_hint(install_method: InstallMethod) -> &'static str {
+ match install_method {
+ InstallMethod::Nix => {
+ "Use nix profile upgrade or update the flake reference you installed from."
+ }
+ InstallMethod::Source => {
+ "Rebuild from source or reinstall deskctl through npm, cargo, or nix."
+ }
+ InstallMethod::Unknown => {
+ "Reinstall deskctl through a supported channel such as npm, cargo, or nix."
+ }
+ InstallMethod::Npm => "Retry with --yes or run npm install -g deskctl@latest directly.",
+ InstallMethod::Cargo => "Retry with --yes or run cargo install deskctl --locked directly.",
+ }
+}
+
+fn detect_install_method(current_exe: &Path) -> InstallMethod {
+ if looks_like_npm_install(current_exe) {
+ return InstallMethod::Npm;
+ }
+ if looks_like_nix_install(current_exe) {
+ return InstallMethod::Nix;
+ }
+ if looks_like_cargo_install(current_exe) {
+ return InstallMethod::Cargo;
+ }
+ if looks_like_source_tree(current_exe) {
+ return InstallMethod::Source;
+ }
+ InstallMethod::Unknown
+}
+
+fn looks_like_npm_install(path: &Path) -> bool {
+ let value = normalize(path);
+ value.contains("/node_modules/deskctl/") && value.contains("/vendor/")
+}
+
+fn looks_like_nix_install(path: &Path) -> bool {
+ normalize(path).starts_with("/nix/store/")
+}
+
+fn looks_like_cargo_install(path: &Path) -> bool {
+ let Some(home) = std::env::var_os("HOME") else {
+ return false;
+ };
+
+ let cargo_home = std::env::var_os("CARGO_HOME")
+ .map(PathBuf::from)
+ .unwrap_or_else(|| PathBuf::from(home).join(".cargo"));
+ path == cargo_home.join("bin").join("deskctl")
+}
+
+fn looks_like_source_tree(path: &Path) -> bool {
+ let value = normalize(path);
+ value.contains("/target/debug/deskctl") || value.contains("/target/release/deskctl")
+}
+
+fn normalize(path: &Path) -> String {
+ path.to_string_lossy().replace('\\', "/")
+}
+
+#[cfg(test)]
+mod tests {
+ use std::os::unix::process::ExitStatusExt;
+ use std::path::Path;
+
+ use super::{command_failure_reason, detect_install_method, upgrade_plan, InstallMethod};
+
+ #[test]
+ fn detects_npm_install_path() {
+ let method = detect_install_method(Path::new(
+ "/usr/local/lib/node_modules/deskctl/vendor/deskctl-linux-x86_64",
+ ));
+ assert_eq!(method, InstallMethod::Npm);
+ }
+
+ #[test]
+ fn detects_nix_install_path() {
+ let method = detect_install_method(Path::new("/nix/store/abc123-deskctl/bin/deskctl"));
+ assert_eq!(method, InstallMethod::Nix);
+ }
+
+ #[test]
+ fn detects_source_tree_path() {
+ let method =
+ detect_install_method(Path::new("/Users/example/src/deskctl/target/debug/deskctl"));
+ assert_eq!(method, InstallMethod::Source);
+ }
+
+ #[test]
+ fn npm_upgrade_plan_uses_global_install() {
+ let plan = upgrade_plan(InstallMethod::Npm).expect("npm installs should support upgrade");
+ assert_eq!(plan.command_line(), "npm install -g deskctl@latest");
+ }
+
+ #[test]
+ fn nix_install_has_no_upgrade_plan() {
+ assert!(upgrade_plan(InstallMethod::Nix).is_none());
+ }
+
+ #[test]
+ fn failure_reason_prefers_stderr() {
+ let output = std::process::Output {
+ status: std::process::ExitStatus::from_raw(1 << 8),
+ stdout: b"".to_vec(),
+ stderr: b"boom\n".to_vec(),
+ };
+
+ assert_eq!(command_failure_reason(&output), "boom");
+ }
+}
From e61c5bc33f7f51d8b43703cfc8e0c068f751e57a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Thu, 26 Mar 2026 16:03:29 +0000
Subject: [PATCH 22/37] release: v0.1.9 [skip ci]
---
Cargo.lock | 2 +-
Cargo.toml | 2 +-
npm/deskctl/package.json | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 3fb1666..157dbc7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "deskctl"
-version = "0.1.8"
+version = "0.1.9"
dependencies = [
"ab_glyph",
"anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index fc7816c..2ebe138 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "deskctl"
-version = "0.1.8"
+version = "0.1.9"
edition = "2021"
description = "X11 desktop control CLI for agents"
license = "MIT"
diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json
index 45daefe..5dfeaa0 100644
--- a/npm/deskctl/package.json
+++ b/npm/deskctl/package.json
@@ -1,6 +1,6 @@
{
"name": "deskctl",
- "version": "0.1.8",
+ "version": "0.1.9",
"description": "Installable deskctl package for Linux X11 agents",
"license": "MIT",
"homepage": "https://github.com/harivansh-afk/deskctl",
From 07a478b0eed0e5df22cb5a1bd16989c3b8f57d33 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 14:21:41 -0400
Subject: [PATCH 23/37] couple CI with publish
---
.github/workflows/ci.yml | 143 ++++++++++++++++++++++++++++++++--
.github/workflows/publish.yml | 127 ------------------------------
2 files changed, 137 insertions(+), 133 deletions(-)
delete mode 100644 .github/workflows/publish.yml
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cb36e61..bcb02b3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,6 +10,23 @@ on:
push:
branches: [main]
workflow_dispatch:
+ inputs:
+ bump:
+ description: Version bump type (only for workflow_dispatch)
+ type: choice
+ options:
+ - patch
+ - minor
+ - major
+ default: patch
+ publish_npm:
+ description: Publish to npm
+ type: boolean
+ default: true
+ publish_crates:
+ description: Publish to crates.io
+ type: boolean
+ default: false
permissions:
contents: write
@@ -52,13 +69,34 @@ jobs:
echo "rust=${{ steps.filter.outputs.rust }}" >> "$GITHUB_OUTPUT"
fi
- - name: Read current version
+ - name: Calculate next version
id: version
if: github.event_name != 'pull_request' && steps.check.outputs.rust == 'true'
run: |
- VERSION=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
- echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
- echo "tag=v${VERSION}" >> "$GITHUB_OUTPUT"
+ CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
+ IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
+
+ BUMP="${{ inputs.bump || 'patch' }}"
+ case "$BUMP" in
+ major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;;
+ minor) MINOR=$((MINOR + 1)); PATCH=0 ;;
+ patch)
+ LATEST=$(git tag -l "v${MAJOR}.${MINOR}.*" | sort -V | tail -1)
+ if [ -z "$LATEST" ]; then
+ NEW_PATCH=$PATCH
+ else
+ LATEST_VER="${LATEST#v}"
+ IFS='.' read -r _ _ LATEST_PATCH <<< "$LATEST_VER"
+ NEW_PATCH=$((LATEST_PATCH + 1))
+ fi
+ PATCH=$NEW_PATCH
+ ;;
+ esac
+
+ NEW="${MAJOR}.${MINOR}.${PATCH}"
+ echo "version=${NEW}" >> "$GITHUB_OUTPUT"
+ echo "tag=v${NEW}" >> "$GITHUB_OUTPUT"
+ echo "Computed version: ${NEW} (v${NEW})"
validate:
name: Validate
@@ -177,10 +215,53 @@ jobs:
path: target/release/deskctl
retention-days: 7
+ update-manifests:
+ name: Update Manifests
+ needs: [changes, build]
+ if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - uses: dtolnay/rust-toolchain@stable
+
+ - uses: actions/setup-node@v4
+ with:
+ node-version: 22
+
+ - name: Update versions
+ run: |
+ CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
+ NEW="${{ needs.changes.outputs.version }}"
+ if [ "$CURRENT" != "$NEW" ]; then
+ sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml
+ cargo generate-lockfile
+ fi
+ node -e '
+ const fs = require("node:fs");
+ const p = "npm/deskctl/package.json";
+ const pkg = JSON.parse(fs.readFileSync(p, "utf8"));
+ pkg.version = process.argv[1];
+ fs.writeFileSync(p, JSON.stringify(pkg, null, 2) + "\n");
+ ' "$NEW"
+
+ - name: Commit, tag, and push
+ run: |
+ git config user.name "github-actions[bot]"
+ git config user.email "github-actions[bot]@users.noreply.github.com"
+ git add Cargo.toml Cargo.lock npm/deskctl/package.json
+ if ! git diff --cached --quiet; then
+ git commit -m "release: ${{ needs.changes.outputs.tag }} [skip ci]"
+ fi
+ git tag "${{ needs.changes.outputs.tag }}"
+ git push origin main --tags
+
release:
name: Release
- needs: [changes, build]
- if: github.event_name != 'pull_request'
+ needs: [changes, build, update-manifests]
+ if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
@@ -209,3 +290,53 @@ jobs:
artifacts/deskctl-linux-x86_64 \
artifacts/checksums.txt
fi
+
+ publish:
+ name: Publish
+ needs: [changes, update-manifests, release]
+ if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ ref: ${{ needs.changes.outputs.tag }}
+
+ - uses: dtolnay/rust-toolchain@stable
+
+ - uses: actions/setup-node@v4
+ with:
+ node-version: 22
+ registry-url: https://registry.npmjs.org
+
+ - name: Install system dependencies
+ run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
+
+ - name: Check current published state
+ id: published
+ run: |
+ VERSION="${{ needs.changes.outputs.version }}"
+ if npm view "deskctl@${VERSION}" version >/dev/null 2>&1; then
+ echo "npm=true" >> "$GITHUB_OUTPUT"
+ else
+ echo "npm=false" >> "$GITHUB_OUTPUT"
+ fi
+ if curl -fsSL "https://crates.io/api/v1/crates/deskctl/${VERSION}" >/dev/null 2>&1; then
+ echo "crates=true" >> "$GITHUB_OUTPUT"
+ else
+ echo "crates=false" >> "$GITHUB_OUTPUT"
+ fi
+
+ - name: Validate npm package
+ run: node npm/deskctl/scripts/validate-package.js
+
+ - name: Publish npm
+ if: steps.published.outputs.npm != 'true'
+ env:
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+ run: npm publish ./npm/deskctl --access public
+
+ - name: Publish crates.io
+ if: inputs.publish_crates && steps.published.outputs.crates != 'true'
+ env:
+ CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+ run: cargo publish --locked
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
deleted file mode 100644
index 60aed4d..0000000
--- a/.github/workflows/publish.yml
+++ /dev/null
@@ -1,127 +0,0 @@
-name: Publish Registries
-
-on:
- workflow_dispatch:
- inputs:
- bump:
- description: Version bump type
- required: true
- type: choice
- options:
- - patch
- - minor
- - major
- publish_npm:
- description: Publish deskctl to npm
- required: true
- type: boolean
- default: true
- publish_crates:
- description: Publish deskctl to crates.io
- required: true
- type: boolean
- default: false
-
-permissions:
- contents: read
-
-jobs:
- publish:
- runs-on: ubuntu-latest
- permissions:
- contents: write
- steps:
- - uses: actions/checkout@v4
- with:
- fetch-depth: 0
-
- - uses: dtolnay/rust-toolchain@stable
-
- - uses: actions/setup-node@v4
- with:
- node-version: 22
- registry-url: https://registry.npmjs.org
-
- - name: Install system dependencies
- run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
-
- - name: Compute next version
- id: version
- run: |
- CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
- IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
-
- case "${{ inputs.bump }}" in
- major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;;
- minor) MINOR=$((MINOR + 1)); PATCH=0 ;;
- patch) PATCH=$((PATCH + 1)) ;;
- esac
-
- NEW="${MAJOR}.${MINOR}.${PATCH}"
- TAG="v${NEW}"
-
- echo "version=${NEW}" >> "$GITHUB_OUTPUT"
- echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
- echo "Bumping ${CURRENT} -> ${NEW} (${TAG})"
-
- - name: Bump versions
- run: |
- NEW="${{ steps.version.outputs.version }}"
- CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
- sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml
- node -e 'const fs=require("node:fs"); const p="npm/deskctl/package.json"; const pkg=JSON.parse(fs.readFileSync(p,"utf8")); pkg.version=process.argv[1]; fs.writeFileSync(p, JSON.stringify(pkg, null, 2)+"\n");' "$NEW"
- cargo generate-lockfile
-
- - name: Commit, tag, and push
- run: |
- git config user.name "github-actions[bot]"
- git config user.email "github-actions[bot]@users.noreply.github.com"
- git add Cargo.toml Cargo.lock npm/deskctl/package.json
- git commit -m "release: ${{ steps.version.outputs.tag }} [skip ci]"
- git tag "${{ steps.version.outputs.tag }}"
- git push origin main --tags
-
- - name: Check current published state
- id: published
- run: |
- VERSION="${{ steps.version.outputs.version }}"
-
- if npm view "deskctl@${VERSION}" version >/dev/null 2>&1; then
- echo "npm=true" >> "$GITHUB_OUTPUT"
- else
- echo "npm=false" >> "$GITHUB_OUTPUT"
- fi
-
- if curl -fsSL "https://crates.io/api/v1/crates/deskctl/${VERSION}" >/dev/null 2>&1; then
- echo "crates=true" >> "$GITHUB_OUTPUT"
- else
- echo "crates=false" >> "$GITHUB_OUTPUT"
- fi
-
- - name: Validate npm package
- run: |
- mkdir -p ./tmp/npm-pack
- node npm/deskctl/scripts/validate-package.js
- npm pack ./npm/deskctl --pack-destination ./tmp/npm-pack >/dev/null
-
- - name: Validate crate publish path
- run: cargo publish --dry-run --locked
-
- - name: Publish npm
- if: inputs.publish_npm && steps.published.outputs.npm != 'true'
- env:
- NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
- run: npm publish ./npm/deskctl --access public
-
- - name: Publish crates.io
- if: inputs.publish_crates && steps.published.outputs.crates != 'true'
- env:
- CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
- run: cargo publish --locked
-
- - name: Summary
- run: |
- echo "tag=${{ steps.version.outputs.tag }}"
- echo "bump=${{ inputs.bump }}"
- echo "npm_already_published=${{ steps.published.outputs.npm }}"
- echo "crates_already_published=${{ steps.published.outputs.crates }}"
From 8d690a62b43a54d41f5b49c07f025bb4d419e3e6 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Thu, 26 Mar 2026 18:28:13 +0000
Subject: [PATCH 24/37] release: v0.1.10 [skip ci]
---
Cargo.lock | 2 +-
Cargo.toml | 2 +-
npm/deskctl/package.json | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 157dbc7..9680966 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "deskctl"
-version = "0.1.9"
+version = "0.1.10"
dependencies = [
"ab_glyph",
"anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index 2ebe138..cc6d11a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "deskctl"
-version = "0.1.9"
+version = "0.1.10"
edition = "2021"
description = "X11 desktop control CLI for agents"
license = "MIT"
diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json
index 5dfeaa0..adb142c 100644
--- a/npm/deskctl/package.json
+++ b/npm/deskctl/package.json
@@ -1,6 +1,6 @@
{
"name": "deskctl",
- "version": "0.1.9",
+ "version": "0.1.10",
"description": "Installable deskctl package for Linux X11 agents",
"license": "MIT",
"homepage": "https://github.com/harivansh-afk/deskctl",
From a58912284b2a797c0d422182aa16c52ade05e580 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Thu, 26 Mar 2026 14:40:45 -0400
Subject: [PATCH 25/37] reorder pipeline
---
.github/workflows/ci.yml | 76 +++++++++++++++++++++++++---------------
1 file changed, 47 insertions(+), 29 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index bcb02b3..dcef6fb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,6 +28,9 @@ on:
type: boolean
default: false
+env:
+ FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
+
permissions:
contents: write
@@ -186,38 +189,12 @@ jobs:
- name: Distribution validation
run: make dist-validate
- build:
- name: Build Release Asset
- needs: [changes, validate, integration, distribution]
- if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - uses: dtolnay/rust-toolchain@stable
- with:
- components: clippy
-
- - uses: Swatinem/rust-cache@v2
-
- - name: Install system dependencies
- run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
-
- - name: Clippy
- run: cargo clippy -- -D warnings
-
- - name: Build
- run: cargo build --release --locked
-
- - uses: actions/upload-artifact@v4
- with:
- name: deskctl-linux-x86_64
- path: target/release/deskctl
- retention-days: 7
+ # --- Release pipeline: update-manifests -> build -> release -> publish ---
+ # Version bump happens BEFORE build so the binary has the correct version.
update-manifests:
name: Update Manifests
- needs: [changes, build]
+ needs: [changes, validate, integration, distribution]
if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
steps:
@@ -258,6 +235,47 @@ jobs:
git tag "${{ needs.changes.outputs.tag }}"
git push origin main --tags
+ build:
+ name: Build Release Asset
+ needs: [changes, update-manifests]
+ if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ ref: ${{ needs.changes.outputs.tag }}
+
+ - uses: dtolnay/rust-toolchain@stable
+ with:
+ components: clippy
+
+ - uses: Swatinem/rust-cache@v2
+
+ - name: Install system dependencies
+ run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
+
+ - name: Verify version
+ run: |
+ CARGO_VER=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
+ EXPECTED="${{ needs.changes.outputs.version }}"
+ if [ "$CARGO_VER" != "$EXPECTED" ]; then
+ echo "Version mismatch: Cargo.toml=$CARGO_VER expected=$EXPECTED"
+ exit 1
+ fi
+ echo "Building version $CARGO_VER"
+
+ - name: Clippy
+ run: cargo clippy -- -D warnings
+
+ - name: Build
+ run: cargo build --release --locked
+
+ - uses: actions/upload-artifact@v4
+ with:
+ name: deskctl-linux-x86_64
+ path: target/release/deskctl
+ retention-days: 7
+
release:
name: Release
needs: [changes, build, update-manifests]
From 580ea79c276fb5eafb35300415441263bb523d04 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Thu, 26 Mar 2026 18:47:09 +0000
Subject: [PATCH 26/37] release: v0.1.11 [skip ci]
---
Cargo.lock | 2 +-
Cargo.toml | 2 +-
npm/deskctl/package.json | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 9680966..b411e80 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "deskctl"
-version = "0.1.10"
+version = "0.1.11"
dependencies = [
"ab_glyph",
"anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index cc6d11a..59108df 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "deskctl"
-version = "0.1.10"
+version = "0.1.11"
edition = "2021"
description = "X11 desktop control CLI for agents"
license = "MIT"
diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json
index adb142c..60b8ed4 100644
--- a/npm/deskctl/package.json
+++ b/npm/deskctl/package.json
@@ -1,6 +1,6 @@
{
"name": "deskctl",
- "version": "0.1.10",
+ "version": "0.1.11",
"description": "Installable deskctl package for Linux X11 agents",
"license": "MIT",
"homepage": "https://github.com/harivansh-afk/deskctl",
From ff26c570351fc9d3690bf20958291da5f0bf0e89 Mon Sep 17 00:00:00 2001
From: Hari <73809867+harivansh-afk@users.noreply.github.com>
Date: Thu, 26 Mar 2026 15:25:40 -0400
Subject: [PATCH 27/37] replace firefox with chrome (#14)
---
site/src/pages/commands.mdx | 14 +++---
site/src/pages/quick-start.mdx | 16 +++----
skills/deskctl/SKILL.md | 10 ++---
skills/deskctl/references/commands.md | 12 ++---
skills/deskctl/workflows/observe-act.sh | 2 +-
src/cli/mod.rs | 58 ++++++++++++-------------
src/core/refs.rs | 12 ++---
7 files changed, 61 insertions(+), 63 deletions(-)
diff --git a/site/src/pages/commands.mdx b/site/src/pages/commands.mdx
index 934cdb8..0696558 100644
--- a/site/src/pages/commands.mdx
+++ b/site/src/pages/commands.mdx
@@ -37,9 +37,9 @@ preferred read surface for focused state queries.
## Wait for state transitions
```sh
-deskctl wait window --selector 'title=Firefox' --timeout 10
+deskctl wait window --selector 'title=Chromium' --timeout 10
deskctl wait focus --selector 'id=win3' --timeout 5
-deskctl --json wait window --selector 'class=firefox' --poll-ms 100
+deskctl --json wait window --selector 'class=chromium' --poll-ms 100
```
Wait commands return the matched window payload on success. In `--json` mode,
@@ -48,9 +48,9 @@ timeouts and selector failures expose structured `kind` values.
## Act on windows
```sh
-deskctl launch firefox
+deskctl launch chromium
deskctl focus @w1
-deskctl focus 'title=Firefox'
+deskctl focus 'title=Chromium'
deskctl click @w1
deskctl click 960,540
deskctl dblclick @w2
@@ -86,8 +86,8 @@ more deterministic for automation, and easier to retry safely.
```sh
ref=w1
id=win1
-title=Firefox
-class=firefox
+title=Chromium
+class=chromium
focused
```
@@ -99,7 +99,7 @@ w1
win1
```
-Bare strings like `firefox` are fuzzy matches. They resolve when there is one
+Bare strings like `chromium` are fuzzy matches. They resolve when there is one
match and fail with candidate windows when there are multiple matches.
## Global options
diff --git a/site/src/pages/quick-start.mdx b/site/src/pages/quick-start.mdx
index 7ecf5a7..4cc0e25 100644
--- a/site/src/pages/quick-start.mdx
+++ b/site/src/pages/quick-start.mdx
@@ -38,13 +38,13 @@ Prefer explicit selectors when you need deterministic targeting:
```sh
ref=w1
id=win1
-title=Firefox
-class=firefox
+title=Chromium
+class=chromium
focused
```
Legacy refs such as `@w1` still work after `snapshot` or `list-windows`. Bare
-strings like `firefox` are fuzzy matches and now fail on ambiguity.
+strings like `chromium` are fuzzy matches and now fail on ambiguity.
## 4. Wait, act, verify
@@ -55,16 +55,16 @@ The core loop is:
deskctl snapshot --annotate
# wait
-deskctl wait window --selector 'title=Firefox' --timeout 10
+deskctl wait window --selector 'title=Chromium' --timeout 10
# act
-deskctl focus 'title=Firefox'
+deskctl focus 'title=Chromium'
deskctl hotkey ctrl l
deskctl type "https://example.com"
deskctl press enter
# verify
-deskctl wait focus --selector 'title=Firefox' --timeout 5
+deskctl wait focus --selector 'title=Chromium' --timeout 5
deskctl snapshot
```
@@ -84,8 +84,8 @@ Every command supports `--json` and uses the same top-level envelope:
{
"ref_id": "w1",
"window_id": "win1",
- "title": "Firefox",
- "app_name": "firefox",
+ "title": "Chromium",
+ "app_name": "chromium",
"x": 0,
"y": 0,
"width": 1920,
diff --git a/skills/deskctl/SKILL.md b/skills/deskctl/SKILL.md
index 67a77c5..c79ca21 100644
--- a/skills/deskctl/SKILL.md
+++ b/skills/deskctl/SKILL.md
@@ -30,8 +30,8 @@ Every desktop interaction follows: **observe -> wait -> act -> verify**.
```bash
deskctl snapshot --annotate # observe
-deskctl wait window --selector 'title=Firefox' --timeout 10 # wait
-deskctl click 'title=Firefox' # act
+deskctl wait window --selector 'title=Chromium' --timeout 10 # wait
+deskctl click 'title=Chromium' # act
deskctl snapshot # verify
```
@@ -42,12 +42,12 @@ See [workflows/observe-act.sh](workflows/observe-act.sh) for a reusable script.
```bash
ref=w1 # snapshot ref (short-lived)
id=win1 # stable window ID (session-scoped)
-title=Firefox # match by title
-class=firefox # match by WM class
+title=Chromium # match by title
+class=chromium # match by WM class
focused # currently focused window
```
-Bare strings like `firefox` do fuzzy matching but fail on ambiguity. Prefer explicit selectors.
+Bare strings like `chromium` do fuzzy matching but fail on ambiguity. Prefer explicit selectors.
## References
diff --git a/skills/deskctl/references/commands.md b/skills/deskctl/references/commands.md
index 27b4310..df69350 100644
--- a/skills/deskctl/references/commands.md
+++ b/skills/deskctl/references/commands.md
@@ -23,8 +23,8 @@ deskctl get-mouse-position
## Wait
```bash
-deskctl wait window --selector 'title=Firefox' --timeout 10
-deskctl wait focus --selector 'class=firefox' --timeout 5
+deskctl wait window --selector 'title=Chromium' --timeout 10
+deskctl wait focus --selector 'class=chromium' --timeout 5
```
Returns the matched window payload on success. Failures include structured
@@ -35,8 +35,8 @@ Returns the matched window payload on success. Failures include structured
```bash
ref=w1
id=win1
-title=Firefox
-class=firefox
+title=Chromium
+class=chromium
focused
```
@@ -46,7 +46,7 @@ on ambiguity.
## Act
```bash
-deskctl focus 'class=firefox'
+deskctl focus 'class=chromium'
deskctl click @w1
deskctl dblclick @w2
deskctl type "hello world"
@@ -59,7 +59,7 @@ deskctl mouse drag 100 100 500 500
deskctl move-window @w1 100 120
deskctl resize-window @w1 1280 720
deskctl close @w3
-deskctl launch firefox
+deskctl launch chromium
```
The daemon starts automatically on first command. In normal usage you should
diff --git a/skills/deskctl/workflows/observe-act.sh b/skills/deskctl/workflows/observe-act.sh
index 0e336ae..8c3abc2 100755
--- a/skills/deskctl/workflows/observe-act.sh
+++ b/skills/deskctl/workflows/observe-act.sh
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
# observe-act.sh - main desktop interaction loop
# usage: ./observe-act.sh [action] [action-args...]
-# example: ./observe-act.sh 'title=Firefox' click
+# example: ./observe-act.sh 'title=Chromium' click
# example: ./observe-act.sh 'class=terminal' type "ls -la"
set -euo pipefail
diff --git a/src/cli/mod.rs b/src/cli/mod.rs
index 28092d7..79008de 100644
--- a/src/cli/mod.rs
+++ b/src/cli/mod.rs
@@ -48,13 +48,13 @@ pub enum Command {
/// Click a window ref or coordinates
#[command(after_help = CLICK_EXAMPLES)]
Click {
- /// Selector (ref=w1, id=win1, title=Firefox, class=firefox, focused) or x,y coordinates
+ /// Selector (ref=w1, id=win1, title=Chromium, class=chromium, focused) or x,y coordinates
selector: String,
},
/// Double-click a window ref or coordinates
#[command(after_help = DBLCLICK_EXAMPLES)]
Dblclick {
- /// Selector (ref=w1, id=win1, title=Firefox, class=firefox, focused) or x,y coordinates
+ /// Selector (ref=w1, id=win1, title=Chromium, class=chromium, focused) or x,y coordinates
selector: String,
},
/// Type text into the focused window
@@ -81,19 +81,19 @@ pub enum Command {
/// Focus a window by ref or name
#[command(after_help = FOCUS_EXAMPLES)]
Focus {
- /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring
+ /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring
selector: String,
},
/// Close a window by ref or name
#[command(after_help = CLOSE_EXAMPLES)]
Close {
- /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring
+ /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring
selector: String,
},
/// Move a window
#[command(after_help = MOVE_WINDOW_EXAMPLES)]
MoveWindow {
- /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring
+ /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring
selector: String,
/// X position
x: i32,
@@ -103,7 +103,7 @@ pub enum Command {
/// Resize a window
#[command(after_help = RESIZE_WINDOW_EXAMPLES)]
ResizeWindow {
- /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring
+ /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring
selector: String,
/// Width
w: u32,
@@ -210,19 +210,19 @@ const SNAPSHOT_EXAMPLES: &str =
const LIST_WINDOWS_EXAMPLES: &str =
"Examples:\n deskctl list-windows\n deskctl --json list-windows";
const CLICK_EXAMPLES: &str =
- "Examples:\n deskctl click @w1\n deskctl click 'title=Firefox'\n deskctl click 500,300";
+ "Examples:\n deskctl click @w1\n deskctl click 'title=Chromium'\n deskctl click 500,300";
const DBLCLICK_EXAMPLES: &str =
- "Examples:\n deskctl dblclick @w2\n deskctl dblclick 'class=firefox'\n deskctl dblclick 500,300";
+ "Examples:\n deskctl dblclick @w2\n deskctl dblclick 'class=chromium'\n deskctl dblclick 500,300";
const TYPE_EXAMPLES: &str =
"Examples:\n deskctl type \"hello world\"\n deskctl type \"https://example.com\"";
const PRESS_EXAMPLES: &str = "Examples:\n deskctl press enter\n deskctl press escape";
const HOTKEY_EXAMPLES: &str = "Examples:\n deskctl hotkey ctrl l\n deskctl hotkey ctrl shift t";
const FOCUS_EXAMPLES: &str =
- "Examples:\n deskctl focus @w1\n deskctl focus 'title=Firefox'\n deskctl focus focused";
+ "Examples:\n deskctl focus @w1\n deskctl focus 'title=Chromium'\n deskctl focus focused";
const CLOSE_EXAMPLES: &str =
- "Examples:\n deskctl close @w3\n deskctl close 'id=win2'\n deskctl close 'class=firefox'";
+ "Examples:\n deskctl close @w3\n deskctl close 'id=win2'\n deskctl close 'class=chromium'";
const MOVE_WINDOW_EXAMPLES: &str =
- "Examples:\n deskctl move-window @w1 100 200\n deskctl move-window 'title=Firefox' 0 0";
+ "Examples:\n deskctl move-window @w1 100 200\n deskctl move-window 'title=Chromium' 0 0";
const RESIZE_WINDOW_EXAMPLES: &str =
"Examples:\n deskctl resize-window @w1 1280 720\n deskctl resize-window 'id=win2' 800 600";
const GET_MONITORS_EXAMPLES: &str =
@@ -237,12 +237,12 @@ const GET_MOUSE_POSITION_EXAMPLES: &str =
const DOCTOR_EXAMPLES: &str = "Examples:\n deskctl doctor\n deskctl --json doctor";
const UPGRADE_EXAMPLES: &str =
"Examples:\n deskctl upgrade\n deskctl upgrade --yes\n deskctl --json upgrade --yes";
-const WAIT_WINDOW_EXAMPLES: &str = "Examples:\n deskctl wait window --selector 'title=Firefox' --timeout 10\n deskctl --json wait window --selector 'class=firefox' --poll-ms 100";
+const WAIT_WINDOW_EXAMPLES: &str = "Examples:\n deskctl wait window --selector 'title=Chromium' --timeout 10\n deskctl --json wait window --selector 'class=chromium' --poll-ms 100";
const WAIT_FOCUS_EXAMPLES: &str = "Examples:\n deskctl wait focus --selector 'id=win3' --timeout 5\n deskctl wait focus --selector focused --poll-ms 200";
const SCREENSHOT_EXAMPLES: &str =
"Examples:\n deskctl screenshot\n deskctl screenshot /tmp/screen.png\n deskctl screenshot --annotate";
const LAUNCH_EXAMPLES: &str =
- "Examples:\n deskctl launch firefox\n deskctl launch code -- --new-window";
+ "Examples:\n deskctl launch chromium\n deskctl launch code -- --new-window";
const MOUSE_MOVE_EXAMPLES: &str =
"Examples:\n deskctl mouse move 500 300\n deskctl mouse move 0 0";
const MOUSE_SCROLL_EXAMPLES: &str =
@@ -277,7 +277,7 @@ pub enum WaitCmd {
#[derive(Args)]
pub struct WaitSelectorOpts {
- /// Selector: ref=w1, id=win1, title=Firefox, class=firefox, focused, or a fuzzy substring
+ /// Selector: ref=w1, id=win1, title=Chromium, class=chromium, focused, or a fuzzy substring
#[arg(long)]
pub selector: String,
@@ -1103,8 +1103,8 @@ mod tests {
"windows": [{
"ref_id": "w1",
"window_id": "win1",
- "title": "Firefox",
- "app_name": "firefox",
+ "title": "Chromium",
+ "app_name": "chromium",
"x": 0,
"y": 0,
"width": 1280,
@@ -1125,37 +1125,37 @@ mod tests {
fn action_text_includes_target_identity() {
let lines = render_success_lines(
&Command::Focus {
- selector: "title=Firefox".to_string(),
+ selector: "title=Chromium".to_string(),
},
Some(&json!({
"action": "focus",
- "window": "Firefox",
- "title": "Firefox",
+ "window": "Chromium",
+ "title": "Chromium",
"ref_id": "w2",
"window_id": "win7"
})),
)
.unwrap();
- assert_eq!(lines, vec!["Focused @w2 [win7] \"Firefox\""]);
+ assert_eq!(lines, vec!["Focused @w2 [win7] \"Chromium\""]);
}
#[test]
fn timeout_errors_render_last_observation() {
let lines = render_error_lines(&Response::err_with_data(
- "Timed out waiting for focus to match selector: title=Firefox",
+ "Timed out waiting for focus to match selector: title=Chromium",
json!({
"kind": "timeout",
"wait": "focus",
- "selector": "title=Firefox",
+ "selector": "title=Chromium",
"timeout_ms": 1000,
"last_observation": {
"kind": "window_not_focused",
"window": {
"ref_id": "w1",
"window_id": "win1",
- "title": "Firefox",
- "app_name": "firefox",
+ "title": "Chromium",
+ "app_name": "chromium",
"x": 0,
"y": 0,
"width": 1280,
@@ -1167,10 +1167,8 @@ mod tests {
}),
));
- assert!(lines
- .iter()
- .any(|line| line
- .contains("Timed out after 1000ms waiting for focus selector title=Firefox")));
+ assert!(lines.iter().any(|line| line
+ .contains("Timed out after 1000ms waiting for focus selector title=Chromium")));
assert!(lines
.iter()
.any(|line| line.contains("matching window exists but is not focused yet")));
@@ -1190,9 +1188,9 @@ mod tests {
let summary = target_summary(&json!({
"ref_id": "w1",
"window_id": "win1",
- "title": "Firefox"
+ "title": "Chromium"
}));
- assert_eq!(summary.as_deref(), Some("@w1 [win1] \"Firefox\""));
+ assert_eq!(summary.as_deref(), Some("@w1 [win1] \"Chromium\""));
}
#[test]
diff --git a/src/core/refs.rs b/src/core/refs.rs
index 34e1ba7..7fd7b6c 100644
--- a/src/core/refs.rs
+++ b/src/core/refs.rs
@@ -412,8 +412,8 @@ mod tests {
SelectorQuery::WindowId("win4".to_string())
);
assert_eq!(
- SelectorQuery::parse("title=Firefox"),
- SelectorQuery::Title("Firefox".to_string())
+ SelectorQuery::parse("title=Chromium"),
+ SelectorQuery::Title("Chromium".to_string())
);
assert_eq!(
SelectorQuery::parse("class=Navigator"),
@@ -458,11 +458,11 @@ mod tests {
fn fuzzy_resolution_fails_with_candidates_when_ambiguous() {
let mut refs = RefMap::new();
refs.rebuild(&[
- sample_window(1, "Firefox"),
+ sample_window(1, "Chromium"),
BackendWindow {
native_id: 2,
- title: "Firefox Settings".to_string(),
- app_name: "Firefox".to_string(),
+ title: "Chromium Settings".to_string(),
+ app_name: "Chromium".to_string(),
x: 0,
y: 0,
width: 10,
@@ -472,7 +472,7 @@ mod tests {
},
]);
- match refs.resolve("firefox") {
+ match refs.resolve("chromium") {
ResolveResult::Ambiguous {
mode, candidates, ..
} => {
From 3a8d9f90c1ac036cfd5bdb30daf7275909870dd9 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
Date: Thu, 26 Mar 2026 19:31:47 +0000
Subject: [PATCH 28/37] release: v0.1.12 [skip ci]
---
Cargo.lock | 2 +-
Cargo.toml | 2 +-
npm/deskctl/package.json | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index b411e80..4acd174 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "deskctl"
-version = "0.1.11"
+version = "0.1.12"
dependencies = [
"ab_glyph",
"anyhow",
diff --git a/Cargo.toml b/Cargo.toml
index 59108df..d782ecd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "deskctl"
-version = "0.1.11"
+version = "0.1.12"
edition = "2021"
description = "X11 desktop control CLI for agents"
license = "MIT"
diff --git a/npm/deskctl/package.json b/npm/deskctl/package.json
index 60b8ed4..1dd5bff 100644
--- a/npm/deskctl/package.json
+++ b/npm/deskctl/package.json
@@ -1,6 +1,6 @@
{
"name": "deskctl",
- "version": "0.1.11",
+ "version": "0.1.12",
"description": "Installable deskctl package for Linux X11 agents",
"license": "MIT",
"homepage": "https://github.com/harivansh-afk/deskctl",
From 3ca6c90eafc6020b99730904a70e5f1593ca8441 Mon Sep 17 00:00:00 2001
From: Harivansh Rathi
Date: Fri, 27 Mar 2026 00:20:37 -0400
Subject: [PATCH 29/37] fix termination bug
---
src/daemon/mod.rs | 58 +++++++++++++++++++++++++++++++-------------
tests/support/mod.rs | 30 +++++++++++++++++++++++
tests/x11_runtime.rs | 25 +++++++++++++++++++
3 files changed, 96 insertions(+), 17 deletions(-)
diff --git a/src/daemon/mod.rs b/src/daemon/mod.rs
index 3df1d9a..9e7e931 100644
--- a/src/daemon/mod.rs
+++ b/src/daemon/mod.rs
@@ -1,6 +1,7 @@
mod handler;
mod state;
+use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::{Context, Result};
@@ -12,6 +13,29 @@ use crate::core::paths::{pid_path_from_env, socket_path_from_env};
use crate::core::session;
use state::DaemonState;
+struct RuntimePathsGuard {
+ socket_path: PathBuf,
+ pid_path: Option,
+}
+
+impl RuntimePathsGuard {
+ fn new(socket_path: PathBuf, pid_path: Option) -> Self {
+ Self {
+ socket_path,
+ pid_path,
+ }
+ }
+}
+
+impl Drop for RuntimePathsGuard {
+ fn drop(&mut self) {
+ remove_runtime_path(&self.socket_path);
+ if let Some(ref pid_path) = self.pid_path {
+ remove_runtime_path(pid_path);
+ }
+ }
+}
+
pub fn run() -> Result<()> {
// Validate session before starting
session::detect_session()?;
@@ -25,7 +49,6 @@ pub fn run() -> Result<()> {
async fn async_run() -> Result<()> {
let socket_path = socket_path_from_env().context("DESKCTL_SOCKET_PATH not set")?;
-
let pid_path = pid_path_from_env();
// Clean up stale socket
@@ -33,20 +56,21 @@ async fn async_run() -> Result<()> {
std::fs::remove_file(&socket_path)?;
}
- // Write PID file
- if let Some(ref pid_path) = pid_path {
- std::fs::write(pid_path, std::process::id().to_string())?;
- }
-
- let listener = UnixListener::bind(&socket_path)
- .context(format!("Failed to bind socket: {}", socket_path.display()))?;
-
let session = std::env::var("DESKCTL_SESSION").unwrap_or_else(|_| "default".to_string());
let state = Arc::new(Mutex::new(
DaemonState::new(session, socket_path.clone())
.context("Failed to initialize daemon state")?,
));
+ let listener = UnixListener::bind(&socket_path)
+ .context(format!("Failed to bind socket: {}", socket_path.display()))?;
+ let _runtime_paths = RuntimePathsGuard::new(socket_path.clone(), pid_path.clone());
+
+ // Write PID file only after the daemon is ready to serve requests.
+ if let Some(ref pid_path) = pid_path {
+ std::fs::write(pid_path, std::process::id().to_string())?;
+ }
+
let shutdown = Arc::new(tokio::sync::Notify::new());
let shutdown_clone = shutdown.clone();
@@ -75,14 +99,6 @@ async fn async_run() -> Result<()> {
}
}
- // Cleanup
- if socket_path.exists() {
- let _ = std::fs::remove_file(&socket_path);
- }
- if let Some(ref pid_path) = pid_path {
- let _ = std::fs::remove_file(pid_path);
- }
-
Ok(())
}
@@ -123,3 +139,11 @@ async fn handle_connection(
Ok(())
}
+
+fn remove_runtime_path(path: &Path) {
+ if let Err(error) = std::fs::remove_file(path) {
+ if error.kind() != std::io::ErrorKind::NotFound {
+ eprintln!("Failed to remove runtime path {}: {error}", path.display());
+ }
+ }
+}
diff --git a/tests/support/mod.rs b/tests/support/mod.rs
index 5c6f0be..719334d 100644
--- a/tests/support/mod.rs
+++ b/tests/support/mod.rs
@@ -142,6 +142,10 @@ impl TestSession {
.expect("TestSession always has an explicit socket path")
}
+ pub fn pid_path(&self) -> PathBuf {
+ self.root.join("deskctl.pid")
+ }
+
pub fn create_stale_socket(&self) -> Result<()> {
let listener = UnixListener::bind(self.socket_path())
.with_context(|| format!("Failed to bind {}", self.socket_path().display()))?;
@@ -187,6 +191,29 @@ impl TestSession {
)
})
}
+
+ pub fn run_daemon(&self, env: I) -> Result