Compare commits

..

41 commits
v0.1.3 ... main

Author SHA1 Message Date
32c6d337f1 ci: use self-hosted netty runners for validation jobs
Some checks failed
CI / Changes (push) Has been cancelled
CI / Validate (push) Has been cancelled
CI / Integration (Xvfb) (push) Has been cancelled
CI / Distribution Validate (push) Has been cancelled
CI / Update Manifests (push) Has been cancelled
CI / Build Release Asset (push) Has been cancelled
CI / Release (push) Has been cancelled
CI / Publish npm (push) Has been cancelled
CI / Publish crates.io (push) Has been cancelled
2026-04-05 11:46:45 -04:00
13119eecf7 update crates.io link
Some checks failed
CI / Changes (push) Has been cancelled
CI / Validate (push) Has been cancelled
CI / Integration (Xvfb) (push) Has been cancelled
CI / Distribution Validate (push) Has been cancelled
CI / Update Manifests (push) Has been cancelled
CI / Build Release Asset (push) Has been cancelled
CI / Release (push) Has been cancelled
CI / Publish npm (push) Has been cancelled
CI / Publish crates.io (push) Has been cancelled
2026-04-02 14:53:56 -04:00
github-actions[bot]
2b7de5fcef release: v0.1.14 [skip ci] 2026-03-27 23:46:33 +00:00
2b3d422c7b crates.io 2026-03-27 19:40:24 -04:00
19669fb4c1 demo 2026-03-27 19:25:26 -04:00
Hari
2107449d9b
Update README with asset link and description change
Added a link to GitHub assets and removed 'Linux' from description.
2026-03-27 18:17:51 -04:00
github-actions[bot]
85e1916635 release: v0.1.13 [skip ci] 2026-03-27 14:10:07 +00:00
9bfada8b4b fix helper 2026-03-27 10:04:10 -04:00
3ca6c90eaf fix termination bug 2026-03-27 00:20:37 -04:00
github-actions[bot]
3a8d9f90c1 release: v0.1.12 [skip ci] 2026-03-26 19:31:47 +00:00
Hari
ff26c57035
replace firefox with chrome (#14) 2026-03-26 15:25:40 -04:00
github-actions[bot]
580ea79c27 release: v0.1.11 [skip ci] 2026-03-26 18:47:09 +00:00
a58912284b reorder pipeline 2026-03-26 14:40:45 -04:00
github-actions[bot]
8d690a62b4 release: v0.1.10 [skip ci] 2026-03-26 18:28:13 +00:00
07a478b0ee couple CI with publish 2026-03-26 14:21:41 -04:00
github-actions[bot]
e61c5bc33f release: v0.1.9 [skip ci] 2026-03-26 16:03:29 +00:00
Hari
a64b46b479
deskctl upgrade (#13)
* deskctl upgrade

* interactive update
as well as --yes flag
2026-03-26 11:53:15 -04:00
Hari
2b02513d6e
Improve docs structure and navigation (#12)
* Improve docs structure and navigation

Co-authored-by: Codex <noreply@openai.com>

* rm

* handwrite docs

---------

Co-authored-by: Codex <noreply@openai.com>
2026-03-26 11:27:35 -04:00
github-actions[bot]
844f2f2bc6 release: v0.1.8 [skip ci] 2026-03-26 13:37:41 +00:00
6c6f33040f update readme 2026-03-26 09:35:46 -04:00
848ef97e87 edit readme 2026-03-26 09:34:10 -04:00
bf603671f9 rm: 2026-03-26 09:28:18 -04:00
3bfec9eecc edit docs 2026-03-26 09:27:43 -04:00
c907e800af change client bin name 2026-03-26 09:21:58 -04:00
2a8b51b4f5 docs: tighten skill install docs and bundle
Co-authored-by: Codex <noreply@openai.com>
2026-03-26 09:21:43 -04:00
eedb5de2d4 refresh contributor cache [skip ci] 2026-03-26 09:13:10 -04:00
47047e9064 migrate update manifest job to publish workflow 2026-03-26 09:11:13 -04:00
deaffff45a major/minor/patch 2026-03-26 09:07:56 -04:00
1d72c7b852 fix: add registry-url to setup-node for npm auth [skip ci] 2026-03-26 09:01:12 -04:00
86c36a3b50 release: v0.1.7 [skip ci] 2026-03-26 08:53:50 -04:00
Hari
eac3a61ceb rename (#11)
* align docs and contract

* clean

* rename from deskctl-cli to deskctl

* runtime
2026-03-26 08:51:15 -04:00
88f9ff85a3 clean 2026-03-26 08:37:52 -04:00
14c8956321 align docs and contract 2026-03-26 08:37:52 -04:00
c37589ccf4 skill validated with workflows 2026-03-26 00:33:27 -04:00
3dbd9ce52d init with runtime contract 2026-03-26 00:33:27 -04:00
github-actions[bot]
1092e503be release: v0.1.6 [skip ci] 2026-03-26 03:25:14 +00:00
Hari
714e34ba19
nix (#7)
npm
cargo
2026-03-25 23:18:28 -04:00
github-actions[bot]
425a71095a release: v0.1.5 [skip ci] 2026-03-26 02:04:56 +00:00
Hari
543d41c3a2
runtime contract enforcement (#6) 2026-03-25 22:00:16 -04:00
github-actions[bot]
61f4738311 release: v0.1.4 [skip ci] 2026-03-26 01:16:24 +00:00
Hari
a4cf9e32dd
grouped runtime reads and waits selector modes (#5)
- grouped runtime reads and waits
selector modes
- Fix wait command client timeouts and test failures
2026-03-25 21:11:30 -04:00
45 changed files with 5195 additions and 787 deletions

View file

@ -1,24 +1,39 @@
name: CI
# Runners: uvacompute (https://uvacompute.com)
# To enable, set the UVA_RUNNER repo variable to the correct runner label.
# runs-on: ${{ vars.UVA_RUNNER || 'ubuntu-latest' }}
on:
pull_request:
branches: [main]
push:
branches: [main]
workflow_dispatch:
inputs:
bump:
description: Version bump type (only for workflow_dispatch)
type: choice
options:
- patch
- minor
- major
default: patch
publish_npm:
description: Publish to npm
type: boolean
default: true
publish_crates:
description: Publish to crates.io
type: boolean
default: true
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
permissions:
contents: write
packages: write
jobs:
changes:
name: Changes
runs-on: ubuntu-latest
runs-on: [self-hosted, netty]
outputs:
rust: ${{ steps.check.outputs.rust }}
version: ${{ steps.version.outputs.version }}
@ -37,7 +52,11 @@ jobs:
- 'tests/**'
- 'Cargo.toml'
- 'Cargo.lock'
- 'npm/**'
- 'flake.nix'
- 'flake.lock'
- 'docker/**'
- '.github/workflows/**'
- 'Makefile'
- name: Set outputs
@ -53,34 +72,36 @@ jobs:
id: version
if: github.event_name != 'pull_request' && steps.check.outputs.rust == 'true'
run: |
BASE=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
IFS='.' read -r MAJOR MINOR PATCH <<< "$BASE"
CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
IFS='.' read -r MAJOR MINOR PATCH <<< "$CURRENT"
LATEST=$(git tag -l "v${MAJOR}.${MINOR}.*" | sort -V | tail -1)
if [ -z "$LATEST" ]; then
NEW="$BASE"
else
LATEST_VER="${LATEST#v}"
IFS='.' read -r _ _ LATEST_PATCH <<< "$LATEST_VER"
NEW_PATCH=$((LATEST_PATCH + 1))
NEW="${MAJOR}.${MINOR}.${NEW_PATCH}"
fi
# Ensure the computed version does not already have a tag
while git rev-parse "v${NEW}" >/dev/null 2>&1; do
IFS='.' read -r MAJOR MINOR PATCH <<< "$NEW"
NEW="${MAJOR}.${MINOR}.$((PATCH + 1))"
done
BUMP="${{ inputs.bump || 'patch' }}"
case "$BUMP" in
major) MAJOR=$((MAJOR + 1)); MINOR=0; PATCH=0 ;;
minor) MINOR=$((MINOR + 1)); PATCH=0 ;;
patch)
LATEST=$(git tag -l "v${MAJOR}.${MINOR}.*" | sort -V | tail -1)
if [ -z "$LATEST" ]; then
NEW_PATCH=$PATCH
else
LATEST_VER="${LATEST#v}"
IFS='.' read -r _ _ LATEST_PATCH <<< "$LATEST_VER"
NEW_PATCH=$((LATEST_PATCH + 1))
fi
PATCH=$NEW_PATCH
;;
esac
NEW="${MAJOR}.${MINOR}.${PATCH}"
echo "version=${NEW}" >> "$GITHUB_OUTPUT"
echo "tag=v${NEW}" >> "$GITHUB_OUTPUT"
echo "Computed version: ${NEW} (v${NEW})"
validate:
name: Validate
needs: changes
if: needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
runs-on: [self-hosted, netty]
steps:
- uses: actions/checkout@v4
@ -104,9 +125,6 @@ jobs:
- name: Install site dependencies
run: pnpm --dir site install --frozen-lockfile
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
- name: Format check
run: make fmt-check
@ -123,7 +141,7 @@ jobs:
name: Integration (Xvfb)
needs: changes
if: needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
runs-on: [self-hosted, netty]
steps:
- uses: actions/checkout@v4
@ -131,79 +149,35 @@ jobs:
- uses: Swatinem/rust-cache@v2
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev xvfb
- name: Xvfb integration tests
run: make test-integration
build:
name: Build (${{ matrix.target }})
needs: [changes, validate, integration]
if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
target: [cargo, docker]
distribution:
name: Distribution Validate
needs: changes
if: needs.changes.outputs.rust == 'true'
runs-on: [self-hosted, netty]
steps:
- uses: actions/checkout@v4
# --- Cargo steps ---
- uses: dtolnay/rust-toolchain@stable
if: matrix.target == 'cargo'
with:
components: clippy
- uses: Swatinem/rust-cache@v2
if: matrix.target == 'cargo'
- name: Install system dependencies
if: matrix.target == 'cargo'
run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
- name: Clippy
if: matrix.target == 'cargo'
run: cargo clippy -- -D warnings
- name: Build
if: matrix.target == 'cargo'
run: cargo build --release --locked
- uses: actions/upload-artifact@v4
if: matrix.target == 'cargo'
- uses: actions/setup-node@v4
with:
name: deskctl-linux-x86_64
path: target/release/deskctl
retention-days: 7
node-version: 22
# --- Docker steps ---
- uses: docker/setup-buildx-action@v3
if: matrix.target == 'docker'
- name: Distribution validation
run: make dist-validate
- uses: docker/login-action@v3
if: matrix.target == 'docker'
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- uses: docker/build-push-action@v6
if: matrix.target == 'docker'
with:
context: .
file: docker/Dockerfile
push: true
tags: |
ghcr.io/${{ github.repository }}:latest
ghcr.io/${{ github.repository }}:${{ needs.changes.outputs.tag }}
cache-from: type=gha
cache-to: type=gha,mode=max
# --- Release pipeline: update-manifests -> build -> release -> publish ---
# These stay on ubuntu-latest for artifact upload/download and registry publishing.
update-manifests:
name: Update Manifests
needs: [changes, build]
if: github.event_name != 'pull_request'
needs: [changes, validate, integration, distribution]
if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
@ -212,7 +186,11 @@ jobs:
- uses: dtolnay/rust-toolchain@stable
- name: Update version in Cargo.toml
- uses: actions/setup-node@v4
with:
node-version: 22
- name: Update versions
run: |
CURRENT=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
NEW="${{ needs.changes.outputs.version }}"
@ -220,26 +198,70 @@ jobs:
sed -i "0,/^version = \"${CURRENT}\"/s//version = \"${NEW}\"/" Cargo.toml
cargo generate-lockfile
fi
node -e '
const fs = require("node:fs");
const p = "npm/deskctl/package.json";
const pkg = JSON.parse(fs.readFileSync(p, "utf8"));
pkg.version = process.argv[1];
fs.writeFileSync(p, JSON.stringify(pkg, null, 2) + "\n");
' "$NEW"
- name: Commit, tag, and push
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
if ! git diff --quiet; then
git add Cargo.toml Cargo.lock
git add Cargo.toml Cargo.lock npm/deskctl/package.json
if ! git diff --cached --quiet; then
git commit -m "release: ${{ needs.changes.outputs.tag }} [skip ci]"
fi
if ! git rev-parse "${{ needs.changes.outputs.tag }}" >/dev/null 2>&1; then
git tag "${{ needs.changes.outputs.tag }}"
fi
git tag "${{ needs.changes.outputs.tag }}"
git push origin main --tags
build:
name: Build Release Asset
needs: [changes, update-manifests]
if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.changes.outputs.tag }}
- uses: dtolnay/rust-toolchain@stable
with:
components: clippy
- uses: Swatinem/rust-cache@v2
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
- name: Verify version
run: |
CARGO_VER=$(grep '^version' Cargo.toml | head -1 | sed 's/.*"\(.*\)"/\1/')
EXPECTED="${{ needs.changes.outputs.version }}"
if [ "$CARGO_VER" != "$EXPECTED" ]; then
echo "Version mismatch: Cargo.toml=$CARGO_VER expected=$EXPECTED"
exit 1
fi
echo "Building version $CARGO_VER"
- name: Clippy
run: cargo clippy -- -D warnings
- name: Build
run: cargo build --release --locked
- uses: actions/upload-artifact@v4
with:
name: deskctl-linux-x86_64
path: target/release/deskctl
retention-days: 7
release:
name: Release
needs: [changes, build, update-manifests]
if: github.event_name != 'pull_request'
if: github.event_name != 'pull_request' && needs.changes.outputs.rust == 'true'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
@ -256,9 +278,87 @@ jobs:
chmod +x artifacts/deskctl
mv artifacts/deskctl artifacts/deskctl-linux-x86_64
cd artifacts && sha256sum deskctl-linux-x86_64 > checksums.txt && cd ..
if gh release view "${{ needs.changes.outputs.tag }}" >/dev/null 2>&1; then
gh release upload "${{ needs.changes.outputs.tag }}" \
artifacts/deskctl-linux-x86_64 \
artifacts/checksums.txt \
--clobber
else
gh release create "${{ needs.changes.outputs.tag }}" \
--title "${{ needs.changes.outputs.tag }}" \
--generate-notes \
artifacts/deskctl-linux-x86_64 \
artifacts/checksums.txt
fi
gh release create "${{ needs.changes.outputs.tag }}" \
--title "${{ needs.changes.outputs.tag }}" \
--generate-notes \
artifacts/deskctl-linux-x86_64 \
artifacts/checksums.txt
publish-npm:
name: Publish npm
needs: [changes, update-manifests, release]
if: >-
github.event_name != 'pull_request'
&& needs.changes.outputs.rust == 'true'
&& (inputs.publish_npm == true || inputs.publish_npm == '')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.changes.outputs.tag }}
- uses: actions/setup-node@v4
with:
node-version: 22
registry-url: https://registry.npmjs.org
- name: Check if already published
id: published
run: |
VERSION="${{ needs.changes.outputs.version }}"
if npm view "deskctl@${VERSION}" version >/dev/null 2>&1; then
echo "npm=true" >> "$GITHUB_OUTPUT"
else
echo "npm=false" >> "$GITHUB_OUTPUT"
fi
- name: Validate npm package
if: steps.published.outputs.npm != 'true'
run: node npm/deskctl/scripts/validate-package.js
- name: Publish npm
if: steps.published.outputs.npm != 'true'
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
run: npm publish ./npm/deskctl --access public
publish-crates:
name: Publish crates.io
needs: [changes, update-manifests, release]
if: >-
github.event_name != 'pull_request'
&& needs.changes.outputs.rust == 'true'
&& (inputs.publish_crates == true || inputs.publish_crates == '')
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: ${{ needs.changes.outputs.tag }}
- uses: dtolnay/rust-toolchain@stable
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libx11-dev libxtst-dev
- name: Check if already published
id: published
run: |
VERSION="${{ needs.changes.outputs.version }}"
if curl -fsSL "https://crates.io/api/v1/crates/deskctl/${VERSION}" >/dev/null 2>&1; then
echo "crates=true" >> "$GITHUB_OUTPUT"
else
echo "crates=false" >> "$GITHUB_OUTPUT"
fi
- name: Publish crates.io
if: steps.published.outputs.crates != 'true'
env:
CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
run: cargo publish --locked

2
.gitignore vendored
View file

@ -5,3 +5,5 @@ secret/
.claude/
.codex/
openspec/
npm/deskctl/vendor/
npm/deskctl/*.tgz

View file

@ -21,6 +21,7 @@ pnpm --dir site install
- `src/` holds production code and unit tests
- `tests/` holds integration tests
- `tests/support/` holds shared X11 and daemon helpers for integration coverage
- `docs/runtime-contract.md` is the stable-vs-best-effort runtime output contract for agent-facing CLI work
Keep integration-only helpers out of `src/`.
@ -34,10 +35,15 @@ make lint
make test-unit
make test-integration
make site-format-check
make cargo-publish-dry-run
make npm-package-check
make nix-flake-check
make dist-validate
make validate
```
`make validate` runs the full Phase 2 validation stack. It requires Linux, `xvfb-run`, and site dependencies to be installed.
`make dist-validate` runs the distribution validation stack. It requires `npm`, `nix`, and Linux for the full npm runtime smoke path.
## Pre-commit Hooks
@ -59,6 +65,19 @@ The hook config intentionally stays small:
- Site files reuse the existing `site/` Prettier setup
- Slower checks stay in CI or `make validate`
## Distribution Work
Distribution support currently ships through:
- crate: `deskctl`
- npm package: `deskctl`
- repo flake: `flake.nix`
- command name on every channel: `deskctl`
For maintainer release and publish steps, see [docs/releasing.md](docs/releasing.md).
Source-build and packaging work should keep Docker as a local Linux build convenience, not as the canonical registry release path.
## Integration Tests
Integration coverage is Linux/X11-only in this phase. The supported local entrypoint is:

44
Cargo.lock generated
View file

@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
version = 3
[[package]]
name = "ab_glyph"
@ -241,9 +241,9 @@ checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
[[package]]
name = "cc"
version = "1.2.57"
version = "1.2.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423"
checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1"
dependencies = [
"find-msvc-tools",
"jobserver",
@ -400,7 +400,7 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
[[package]]
name = "deskctl"
version = "0.1.3"
version = "0.1.14"
dependencies = [
"ab_glyph",
"anyhow",
@ -911,9 +911,9 @@ dependencies = [
[[package]]
name = "js-sys"
version = "0.3.91"
version = "0.3.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c"
checksum = "cc4c90f45aa2e6eacbe8645f77fdea542ac97a494bcd117a67df9ff4d611f995"
dependencies = [
"once_cell",
"wasm-bindgen",
@ -1039,9 +1039,9 @@ dependencies = [
[[package]]
name = "mio"
version = "1.1.1"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
dependencies = [
"libc",
"wasi",
@ -1699,9 +1699,9 @@ dependencies = [
[[package]]
name = "simd-adler32"
version = "0.3.8"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
[[package]]
name = "simd_helpers"
@ -1861,9 +1861,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "uuid"
version = "1.22.0"
version = "1.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a68d3c8f01c0cfa54a75291d83601161799e4a89a39e0929f4b0354d88757a37"
checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9"
dependencies = [
"getrandom 0.4.2",
"js-sys",
@ -1907,9 +1907,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen"
version = "0.2.114"
version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e"
checksum = "6523d69017b7633e396a89c5efab138161ed5aafcbc8d3e5c5a42ae38f50495a"
dependencies = [
"cfg-if",
"once_cell",
@ -1920,9 +1920,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.114"
version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6"
checksum = "4e3a6c758eb2f701ed3d052ff5737f5bfe6614326ea7f3bbac7156192dc32e67"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@ -1930,9 +1930,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.114"
version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3"
checksum = "921de2737904886b52bcbb237301552d05969a6f9c40d261eb0533c8b055fedf"
dependencies = [
"bumpalo",
"proc-macro2",
@ -1943,9 +1943,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.114"
version = "0.2.115"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16"
checksum = "a93e946af942b58934c604527337bad9ae33ba1d5c6900bbb41c2c07c2364a93"
dependencies = [
"unicode-ident",
]
@ -2297,9 +2297,9 @@ dependencies = [
[[package]]
name = "zune-jpeg"
version = "0.5.14"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7a1c0af6e5d8d1363f4994b7a091ccf963d8b694f7da5b0b9cceb82da2c0a6"
checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296"
dependencies = [
"zune-core",
]

View file

@ -1,10 +1,23 @@
[package]
name = "deskctl"
version = "0.1.3"
version = "0.1.14"
edition = "2021"
description = "X11 desktop control CLI for agents"
license = "MIT"
repository = "https://github.com/harivansh-afk/deskctl"
homepage = "https://github.com/harivansh-afk/deskctl"
readme = "README.md"
keywords = ["x11", "desktop", "automation", "cli", "agent"]
categories = ["command-line-utilities"]
rust-version = "1.75"
include = [
"/Cargo.toml",
"/Cargo.lock",
"/README.md",
"/LICENCE",
"/assets/**",
"/src/**",
]
[dependencies]
clap = { version = "4", features = ["derive", "env"] }

View file

@ -1,4 +1,4 @@
.PHONY: fmt fmt-check lint test-unit test-integration site-format-check validate
.PHONY: fmt fmt-check lint test-unit test-integration site-format-check cargo-publish-dry-run npm-package-check nix-flake-check dist-validate validate
fmt:
cargo fmt --all
@ -30,4 +30,34 @@ site-format-check:
fi
pnpm --dir site format:check
cargo-publish-dry-run:
cargo publish --dry-run --allow-dirty --locked
npm-package-check:
@if ! command -v npm >/dev/null 2>&1; then \
echo "npm is required for npm packaging validation."; \
exit 1; \
fi
node npm/deskctl/scripts/validate-package.js
rm -rf tmp/npm-pack tmp/npm-install
mkdir -p tmp/npm-pack tmp/npm-install/bin
npm pack ./npm/deskctl --pack-destination ./tmp/npm-pack >/dev/null
@if [ "$$(uname -s)" != "Linux" ]; then \
echo "Skipping npm package runtime smoke test on non-Linux host."; \
else \
cargo build && \
PACK_TGZ=$$(ls ./tmp/npm-pack/*.tgz | head -n 1) && \
DESKCTL_BINARY_PATH="$$(pwd)/target/debug/deskctl" npm install --prefix ./tmp/npm-install "$${PACK_TGZ}" && \
./tmp/npm-install/node_modules/.bin/deskctl --version; \
fi
nix-flake-check:
@if ! command -v nix >/dev/null 2>&1; then \
echo "nix is required for flake validation."; \
exit 1; \
fi
nix flake check
dist-validate: test-unit cargo-publish-dry-run npm-package-check nix-flake-check
validate: fmt-check lint test-unit test-integration site-format-check

128
README.md
View file

@ -1,126 +1,46 @@
# deskctl
[![npm](https://img.shields.io/npm/v/deskctl?label=npm)](https://www.npmjs.com/package/deskctl)
[![skill](https://img.shields.io/badge/skills.sh-deskctl-111827)](skills/deskctl)
Desktop control cli for AI agents on X11.
https://github.com/user-attachments/assets/e820787e-4d1a-463f-bdcf-a829588778bf
Desktop control CLI for AI agents on Linux X11.
## Install
```bash
cargo install deskctl
npm install -g deskctl
```
Build a Linux binary with Docker:
```bash
docker compose -f docker/docker-compose.yml run --rm build
deskctl doctor
deskctl snapshot --annotate
```
This writes `dist/deskctl-linux-x86_64`.
Copy it to an SSH machine where `scp` is unavailable:
## Skill
```bash
ssh -p 443 deskctl@ssh.agentcomputer.ai 'cat > ~/deskctl && chmod +x ~/deskctl' < dist/deskctl-linux-x86_64
npx skills add harivansh-afk/deskctl
```
Run it on an X11 session:
## Docs
- runtime contract: [docs/runtime-contract.md](docs/runtime-contract.md)
- releasing: [docs/releasing.md](docs/releasing.md)
- contributing: [CONTRIBUTING.md](CONTRIBUTING.md)
## Install paths
Nix:
```bash
DISPLAY=:1 XDG_SESSION_TYPE=x11 ~/deskctl --json snapshot --annotate
nix run github:harivansh-afk/deskctl -- --help
nix profile install github:harivansh-afk/deskctl
```
Local source build requirements:
Rust:
```bash
cargo build
```
At the moment there are no extra native build dependencies beyond a Rust toolchain.
## Quick Start
```bash
# Diagnose the environment first
deskctl doctor
# See the desktop
deskctl snapshot
# Click a window
deskctl click @w1
# Type text
deskctl type "hello world"
# Focus by name
deskctl focus "firefox"
```
## Architecture
Client-daemon architecture over Unix sockets (NDJSON wire protocol).
The daemon starts automatically on first command and keeps the X11 connection alive for fast repeated calls.
Source layout:
- `src/lib.rs` exposes the shared library target
- `src/main.rs` is the thin CLI wrapper
- `src/` contains production code and unit tests
- `tests/` contains Linux/X11 integration tests
- `tests/support/` contains shared integration helpers
## Runtime Requirements
- Linux with X11 session
- Rust 1.75+ (for build)
The binary itself only links the standard glibc runtime on Linux (`libc`, `libm`, `libgcc_s`).
For deskctl to be fully functional on a fresh VM you still need:
- an X11 server and an active `DISPLAY`
- `XDG_SESSION_TYPE=x11` or an equivalent X11 session environment
- a window manager or desktop environment that exposes standard EWMH properties such as `_NET_CLIENT_LIST_STACKING` and `_NET_ACTIVE_WINDOW`
- an X server with the extensions needed for input simulation and screen metadata, which is standard on normal desktop X11 setups
If setup fails, run:
```bash
deskctl doctor
```
## Contract Notes
- `@wN` refs are short-lived handles assigned by `snapshot` and `list-windows`
- `--json` output includes a stable `window_id` for programmatic targeting within the current daemon session
- `list-windows` is a cheap read-only operation and does not capture or write a screenshot
## Support Boundary
`deskctl` supports Linux X11 in this phase. Wayland and Hyprland are explicitly out of scope for the current runtime contract.
## Workflow
Local validation uses the root `Makefile`:
```bash
make fmt-check
make lint
make test-unit
make test-integration
make site-format-check
make validate
```
`make validate` is the full repo-quality check and requires Linux with `xvfb-run` plus `pnpm --dir site install`.
The repository standardizes on `pre-commit` for fast commit-time checks:
```bash
pre-commit install
pre-commit run --all-files
```
See [CONTRIBUTING.md](CONTRIBUTING.md) for the full contributor guide.
## Acknowledgements
- [@barrettruth](github.com/barrettruth) - i stole the website from [vimdoc](https://github.com/barrettruth/vimdoc-language-server)

969
demo/index.html Normal file
View file

@ -0,0 +1,969 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>deskctl - Desktop Control for AI Agents</title>
<style>
*, *::before, *::after { margin: 0; padding: 0; box-sizing: border-box; }
:root {
/* cozybox light */
--page-bg: #f2f2f2;
--bg: #e7e7e7;
--surface: #dcdcdc;
--surface-2: #e1e1e1;
--border: #c3c7c9;
--text: #282828;
--text-dim: #504945;
--text-muted: #928374;
--selection: #c3c7c9;
--accent: #4261a5;
--green: #427b58;
--red: #c5524a;
--yellow: #d79921;
--orange: #af3a03;
--purple: #8f3f71;
--aqua: #427b58;
--cyan: #3c7678;
--gray: #928374;
--mono: 'Berkeley Mono', 'JetBrains Mono', 'Fira Code', 'SF Mono', Consolas, monospace;
--sans: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
}
body {
font-family: var(--sans);
background: var(--page-bg);
color: var(--text);
overflow: hidden;
height: 100vh;
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
}
.hero {
text-align: center;
margin-bottom: 28px;
z-index: 10;
}
.hero h1 {
font-family: var(--mono);
font-size: 28px;
font-weight: 700;
letter-spacing: -0.5px;
margin-bottom: 4px;
}
.hero p { font-size: 14px; color: var(--text-dim); }
.demo-container {
display: flex;
gap: 16px;
width: 1140px;
max-width: 96vw;
height: 580px;
}
/* ── Desktop ──────────────────────────────────────── */
.desktop-panel {
flex: 1;
position: relative;
border-radius: 12px;
overflow: hidden;
background: var(--bg);
box-shadow: 0 4px 24px rgba(0,0,0,0.08), 0 0 0 1px rgba(0,0,0,0.04);
}
.desktop-titlebar {
height: 30px;
background: var(--surface);
display: flex;
align-items: center;
padding: 0 12px;
gap: 6px;
}
.dot { width: 10px; height: 10px; border-radius: 50%; }
.viewport {
position: relative;
height: calc(100% - 30px);
background: var(--bg);
overflow: hidden;
}
.wallpaper {
position: absolute;
inset: 0;
background:
radial-gradient(ellipse at 25% 35%, rgba(66,97,165,0.04) 0%, transparent 55%),
radial-gradient(ellipse at 75% 65%, rgba(66,123,88,0.03) 0%, transparent 55%),
var(--bg);
}
/* ── Taskbar ──────────────────────────────────────── */
.taskbar {
position: absolute;
bottom: 0; left: 0; right: 0;
height: 28px;
background: var(--surface);
display: flex;
align-items: center;
padding: 0 8px;
gap: 2px;
z-index: 15;
}
.tb-item {
height: 20px;
padding: 0 10px;
font-family: var(--mono);
font-size: 9px;
color: var(--text-dim);
display: flex;
align-items: center;
border-radius: 3px;
opacity: 0;
transform: translateX(-4px);
transition: opacity 0.3s, transform 0.3s, background 0.15s;
}
.tb-item.visible { opacity: 1; transform: translateX(0); }
.tb-item.active { background: rgba(0,0,0,0.06); color: var(--text); }
/* ── Windows ──────────────────────────────────────── */
.win {
position: absolute;
border-radius: 8px;
overflow: hidden;
box-shadow: 0 2px 12px rgba(0,0,0,0.08), 0 0 0 1px rgba(0,0,0,0.04);
transition: box-shadow 0.2s, opacity 0.4s ease, transform 0.4s ease;
opacity: 0;
transform: scale(0.92) translateY(14px);
}
.win.visible { opacity: 1; transform: scale(1) translateY(0); }
.win.focused { box-shadow: 0 4px 20px rgba(0,0,0,0.12), 0 0 0 1px rgba(66,97,165,0.15); z-index: 10; }
.wbar {
height: 26px;
background: var(--surface);
display: flex;
align-items: center;
padding: 0 8px;
gap: 5px;
font-size: 10px;
font-family: var(--mono);
color: var(--text-dim);
}
.wbar .dots { display: flex; gap: 3px; }
.wbar .dots span { width: 7px; height: 7px; border-radius: 50%; }
.wbody {
background: #f8f8f8;
height: calc(100% - 26px);
overflow: hidden;
position: relative;
}
/* ── File Manager ─────────────────────────────────── */
.file-list { padding: 8px; }
.file-row {
display: flex;
align-items: center;
gap: 8px;
padding: 5px 8px;
border-radius: 4px;
font-family: var(--mono);
font-size: 10px;
color: var(--text-dim);
transition: background 0.15s;
}
.file-row.selected { background: var(--selection); color: var(--text); }
.file-row .ficon { font-size: 13px; width: 18px; text-align: center; }
.file-row .fmeta { margin-left: auto; font-size: 8px; color: var(--text-muted); }
.file-preview {
position: absolute;
bottom: 0; left: 0; right: 0;
height: 0;
background: var(--surface);
overflow: hidden;
transition: height 0.3s ease;
font-family: var(--mono);
font-size: 9px;
line-height: 1.5;
color: var(--text-dim);
padding: 0 10px;
}
.file-preview.open { height: 58px; padding: 8px 10px; }
/* ── Stock Chart ──────────────────────────────────── */
.chart-header {
display: flex;
align-items: baseline;
gap: 8px;
padding: 8px 12px 2px;
font-family: var(--mono);
}
.chart-ticker { font-size: 14px; font-weight: 700; color: var(--text); }
.chart-price { font-size: 12px; color: var(--green); }
.chart-change { font-size: 9px; color: var(--green); }
.chart-period { font-size: 8px; color: var(--text-muted); margin-left: auto; }
.chart-area { padding: 4px 12px 8px; height: calc(100% - 60px); }
.chart-area svg { width: 100%; height: 100%; }
.chart-vol {
display: flex;
align-items: flex-end;
gap: 2px;
height: 20px;
padding: 0 12px;
}
.chart-vol div {
flex: 1;
background: var(--border);
border-radius: 1px 1px 0 0;
min-height: 2px;
}
/* ── Google Docs ──────────────────────────────────── */
.gdoc-toolbar {
height: 24px;
background: #f1f3f4;
display: flex;
align-items: center;
padding: 0 8px;
gap: 3px;
}
.gdoc-toolbar .tb { width: 16px; height: 12px; background: #dadce0; border-radius: 2px; }
.gdoc-toolbar .tb.wide { width: 28px; }
.gdoc-toolbar .sep { width: 1px; height: 14px; background: #dadce0; margin: 0 3px; }
.gdoc-page {
background: #ffffff;
margin: 10px auto;
width: 88%;
height: calc(100% - 44px);
border-radius: 2px;
box-shadow: 0 1px 4px rgba(0,0,0,0.06);
padding: 20px 24px;
overflow: hidden;
}
.gdoc-title { font-family: var(--sans); font-size: 16px; font-weight: 700; color: #202124; min-height: 22px; margin-bottom: 4px; }
.gdoc-subtitle { font-family: var(--sans); font-size: 9px; color: #5f6368; margin-bottom: 10px; min-height: 12px; }
.gdoc-body { font-family: var(--sans); font-size: 9px; line-height: 1.6; color: #3c4043; min-height: 14px; }
.gdoc-chart-img {
margin-top: 8px;
width: 100%;
height: 80px;
background: #fafafa;
border: 1px solid #e0e0e0;
border-radius: 4px;
overflow: hidden;
opacity: 0;
transform: scale(0.95);
transition: opacity 0.3s, transform 0.3s;
display: flex;
align-items: center;
justify-content: center;
}
.gdoc-chart-img.visible { opacity: 1; transform: scale(1); }
.gdoc-chart-img svg { width: 95%; height: 80%; }
@keyframes blink { 50% { opacity: 0; } }
/* ── Annotations ──────────────────────────────────── */
.annot {
position: absolute;
border: 2px solid;
border-radius: 4px;
pointer-events: none;
opacity: 0;
transition: opacity 0.3s;
z-index: 20;
}
.annot.visible { opacity: 1; }
.annot-label {
position: absolute;
top: -16px;
left: -2px;
font-family: var(--mono);
font-size: 8px;
font-weight: 700;
padding: 1px 5px;
border-radius: 3px 3px 0 0;
color: #fff;
}
.annot.c1 { border-color: var(--accent); }
.annot.c1 .annot-label { background: var(--accent); }
.annot.c2 { border-color: var(--green); }
.annot.c2 .annot-label { background: var(--green); }
.annot.c3 { border-color: var(--orange); }
.annot.c3 .annot-label { background: var(--orange); }
.flash {
position: absolute;
inset: 0;
background: white;
opacity: 0;
pointer-events: none;
z-index: 50;
transition: opacity 0.05s;
}
.flash.fire { opacity: 0.3; }
/* ── Cursor ───────────────────────────────────────── */
.agent-cursor {
position: absolute;
width: 18px;
height: 22px;
z-index: 100;
pointer-events: none;
filter: drop-shadow(0 1px 2px rgba(0,0,0,0.15));
}
.agent-cursor svg { width: 100%; height: 100%; }
.agent-cursor.clicking { transform: scale(0.85); transition: transform 0.06s ease-out; }
.click-ripple {
position: absolute;
width: 24px; height: 24px;
border-radius: 50%;
border: 2px solid var(--accent);
opacity: 0;
pointer-events: none;
z-index: 99;
transform: translate(-50%, -50%) scale(0.3);
}
.click-ripple.animate { animation: ripple 0.4s ease-out forwards; }
@keyframes ripple {
0% { opacity: 0.6; transform: translate(-50%, -50%) scale(0.3); }
100% { opacity: 0; transform: translate(-50%, -50%) scale(1.5); }
}
/* ── Command Panel (light) ────────────────────────── */
.cmd-panel {
width: 340px;
border-radius: 12px;
overflow: hidden;
background: var(--bg);
box-shadow: 0 4px 24px rgba(0,0,0,0.08), 0 0 0 1px rgba(0,0,0,0.04);
display: flex;
flex-direction: column;
}
.cmd-titlebar {
height: 30px;
background: var(--surface);
display: flex;
align-items: center;
padding: 0 12px;
gap: 6px;
}
.cmd-titlebar .label {
font-family: var(--mono);
font-size: 10px;
color: var(--text-dim);
margin-left: 6px;
}
.cmd-body {
flex: 1;
padding: 12px;
font-family: var(--mono);
font-size: 11px;
line-height: 1.7;
overflow-y: auto;
scrollbar-width: none;
}
.cmd-body::-webkit-scrollbar { display: none; }
.cmd-line {
opacity: 0;
transform: translateY(4px);
transition: opacity 0.25s, transform 0.25s;
margin-bottom: 2px;
}
.cmd-line.visible { opacity: 1; transform: translateY(0); }
.cmd-line .ps { color: var(--green); user-select: none; }
.cmd-line .c { color: var(--text); }
.cmd-line .f { color: var(--orange); }
.cmd-line .s { color: var(--accent); }
.cmd-line .o { color: var(--text-dim); font-size: 10px; padding-left: 2px; }
.cmd-line .ok { color: var(--green); }
.cmd-line .jk { color: var(--purple); }
.cmd-line .jv { color: var(--accent); }
.cmd-line .link { color: var(--accent); text-decoration: underline; }
.cmd-line .agent-msg { color: var(--text); font-size: 10px; line-height: 1.5; padding-left: 2px; }
.cmd-divider {
height: 1px;
background: var(--border);
margin: 8px 0;
opacity: 0;
transition: opacity 0.3s;
}
.cmd-divider.visible { opacity: 1; }
.step-ind {
display: flex;
align-items: center;
gap: 6px;
margin-bottom: 4px;
opacity: 0;
transform: translateY(4px);
transition: opacity 0.25s, transform 0.25s;
}
.step-ind.visible { opacity: 1; transform: translateY(0); }
.badge {
font-size: 8px;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.5px;
padding: 2px 5px;
border-radius: 3px;
font-family: var(--mono);
}
.badge.observe { background: rgba(66,97,165,0.12); color: var(--accent); }
.badge.act { background: rgba(66,123,88,0.12); color: var(--green); }
.badge.wait { background: rgba(175,58,3,0.1); color: var(--orange); }
.badge.verify { background: rgba(143,63,113,0.1); color: var(--purple); }
.badge.done { background: rgba(66,123,88,0.15); color: var(--green); }
.step-lbl { font-size: 9px; color: var(--text-muted); }
.caption {
text-align: center;
margin-top: 20px;
z-index: 10;
}
.caption p { font-size: 11px; color: var(--text-muted); font-family: var(--mono); }
.caption .replay-btn {
display: inline-flex;
align-items: center;
gap: 6px;
margin-top: 8px;
padding: 5px 12px;
border-radius: 6px;
border: 1px solid var(--border);
background: #fff;
color: var(--text-dim);
font-family: var(--mono);
font-size: 10px;
cursor: pointer;
transition: color 0.2s, background 0.2s;
}
.caption .replay-btn:hover { background: var(--bg); color: var(--text); }
</style>
</head>
<body>
<div class="hero">
<h1>deskctl</h1>
<p>desktop control CLI for AI agents</p>
</div>
<div class="demo-container">
<div class="desktop-panel">
<div class="desktop-titlebar">
<div class="dot" style="background:#c5524a"></div>
<div class="dot" style="background:#d79921"></div>
<div class="dot" style="background:#427b58"></div>
</div>
<div class="viewport" id="vp">
<div class="wallpaper"></div>
<!-- File Manager -->
<div class="win" id="w-files" style="left:16px; top:16px; width:200px; height:220px;">
<div class="wbar">
<div class="dots"><span style="background:#c5524a"></span><span style="background:#d79921"></span><span style="background:#427b58"></span></div>
<span>Files ~/reports</span>
</div>
<div class="wbody">
<div class="file-list">
<div class="file-row" id="f-notes">
<span class="ficon">&#128221;</span>
<span>task_brief.txt</span>
<span class="fmeta">2.1 KB</span>
</div>
<div class="file-row" id="f-csv">
<span class="ficon">&#128202;</span>
<span>nvda_q1_data.csv</span>
<span class="fmeta">48 KB</span>
</div>
<div class="file-row" id="f-prev">
<span class="ficon">&#128196;</span>
<span>prev_report.pdf</span>
<span class="fmeta">1.2 MB</span>
</div>
<div class="file-row">
<span class="ficon">&#128193;</span>
<span>archive/</span>
<span class="fmeta">--</span>
</div>
</div>
<div class="file-preview" id="file-preview">
<span style="color:#427b58">task:</span> Prepare NVDA Q1 earnings summary<br>
<span style="color:#427b58">source:</span> finance.yahoo.com, local csv<br>
<span style="color:#427b58">output:</span> Google Docs report with chart
</div>
</div>
</div>
<!-- Stock Chart -->
<div class="win" id="w-chart" style="left:140px; top:40px; width:380px; height:260px;">
<div class="wbar">
<div class="dots"><span style="background:#c5524a"></span><span style="background:#d79921"></span><span style="background:#427b58"></span></div>
<span>Chrome - Yahoo Finance</span>
</div>
<div class="wbody">
<div class="chart-header">
<span class="chart-ticker">NVDA</span>
<span class="chart-price">$924.68</span>
<span class="chart-change">+3.42%</span>
<span class="chart-period">1Y</span>
</div>
<div class="chart-area">
<svg viewBox="0 0 360 140" preserveAspectRatio="none">
<defs>
<linearGradient id="cg" x1="0" y1="0" x2="0" y2="1">
<stop offset="0%" stop-color="#427b58" stop-opacity="0.2"/>
<stop offset="100%" stop-color="#427b58" stop-opacity="0"/>
</linearGradient>
</defs>
<line x1="0" y1="35" x2="360" y2="35" stroke="#dcdcdc" stroke-width="0.5"/>
<line x1="0" y1="70" x2="360" y2="70" stroke="#dcdcdc" stroke-width="0.5"/>
<line x1="0" y1="105" x2="360" y2="105" stroke="#dcdcdc" stroke-width="0.5"/>
<path d="M0,120 L20,115 40,118 60,110 80,105 100,95 120,100 140,85 160,75 180,80 200,65 220,55 240,60 260,45 280,35 300,40 320,28 340,22 360,18 L360,140 L0,140 Z" fill="url(#cg)"/>
<path d="M0,120 L20,115 40,118 60,110 80,105 100,95 120,100 140,85 160,75 180,80 200,65 220,55 240,60 260,45 280,35 300,40 320,28 340,22 360,18" fill="none" stroke="#427b58" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"/>
<text x="352" y="33" fill="#928374" font-size="7" font-family="monospace" text-anchor="end">$950</text>
<text x="352" y="68" fill="#928374" font-size="7" font-family="monospace" text-anchor="end">$800</text>
<text x="352" y="103" fill="#928374" font-size="7" font-family="monospace" text-anchor="end">$650</text>
</svg>
</div>
<div class="chart-vol" id="chart-vol"></div>
</div>
</div>
<!-- Google Docs -->
<div class="win" id="w-docs" style="left:80px; top:60px; width:440px; height:340px;">
<div class="wbar">
<div class="dots"><span style="background:#c5524a"></span><span style="background:#d79921"></span><span style="background:#427b58"></span></div>
<span>Chrome - Google Docs</span>
</div>
<div class="wbody" style="background:#f1f3f4">
<div class="gdoc-toolbar">
<div class="tb"></div><div class="tb"></div><div class="tb wide"></div>
<div class="sep"></div>
<div class="tb"></div><div class="tb"></div><div class="tb"></div>
<div class="sep"></div>
<div class="tb wide"></div><div class="tb"></div>
</div>
<div class="gdoc-page">
<div class="gdoc-title" id="doc-title"></div>
<div class="gdoc-subtitle" id="doc-subtitle"></div>
<div class="gdoc-body" id="doc-body"></div>
<div class="gdoc-chart-img" id="doc-chart">
<svg viewBox="0 0 360 80" preserveAspectRatio="none">
<defs>
<linearGradient id="cg2" x1="0" y1="0" x2="0" y2="1">
<stop offset="0%" stop-color="#427b58" stop-opacity="0.15"/>
<stop offset="100%" stop-color="#427b58" stop-opacity="0"/>
</linearGradient>
</defs>
<rect width="360" height="80" fill="#fafafa"/>
<path d="M0,65 L20,62 40,64 60,58 80,55 100,48 120,52 140,42 160,36 180,39 200,30 220,24 240,27 260,19 280,14 300,17 320,10 340,7 360,5 L360,80 L0,80 Z" fill="url(#cg2)"/>
<path d="M0,65 L20,62 40,64 60,58 80,55 100,48 120,52 140,42 160,36 180,39 200,30 220,24 240,27 260,19 280,14 300,17 320,10 340,7 360,5" fill="none" stroke="#427b58" stroke-width="1.5" stroke-linecap="round" stroke-linejoin="round"/>
<text x="8" y="12" fill="#928374" font-size="7" font-family="monospace">NVDA 1Y</text>
</svg>
</div>
</div>
</div>
</div>
<!-- Annotations -->
<div class="annot c1" id="a1"><div class="annot-label">@w1</div></div>
<div class="annot c2" id="a2"><div class="annot-label">@w2</div></div>
<div class="annot c3" id="a3"><div class="annot-label">@w3</div></div>
<div class="flash" id="flash"></div>
<div class="agent-cursor" id="cur" style="left:380px; top:260px;">
<svg viewBox="0 0 24 24" fill="none"><path d="M5.5 3.21V20.8c0 .45.54.67.85.35l4.86-4.86a.5.5 0 0 1 .35-.15h6.87a.5.5 0 0 0 .35-.85L6.35 2.86a.5.5 0 0 0-.85.35Z" fill="#282828" stroke="#fff" stroke-width="1"/></svg>
</div>
<div class="click-ripple" id="rip"></div>
<!-- Taskbar -->
<div class="taskbar">
<div class="tb-item" id="tb-files">Files</div>
<div class="tb-item" id="tb-chart">Yahoo Finance</div>
<div class="tb-item" id="tb-docs">Google Docs</div>
</div>
</div>
</div>
<div class="cmd-panel">
<div class="cmd-titlebar">
<div class="dot" style="background:#c5524a"></div>
<div class="dot" style="background:#d79921"></div>
<div class="dot" style="background:#427b58"></div>
<span class="label">agent computer</span>
</div>
<div class="cmd-body" id="cb"></div>
</div>
</div>
<div class="caption">
<p>AI agent controlling a live desktop via deskctl</p>
<button class="replay-btn" id="replay" style="display:none" onclick="run()">&#8634; Replay</button>
</div>
<script>
const $ = s => document.getElementById(s);
const W = ms => new Promise(r => setTimeout(r, ms));
const cur = $('cur'), rip = $('rip'), cb = $('cb');
let cx = 380, cy = 260;
(() => {
const v = $('chart-vol');
[8,12,6,14,10,18,8,15,20,12,7,16,10,22,14,8,18,12,9,16].forEach(h => {
const d = document.createElement('div'); d.style.height = h + 'px'; v.appendChild(d);
});
})();
function move(x, y, dur = 500) {
return new Promise(res => {
const sx = cx, sy = cy, dx = x - sx, dy = y - sy, t0 = performance.now();
(function f(n) {
const t = Math.min((n - t0) / dur, 1), e = 1 - (1 - t) ** 3;
const arc = -Math.sin(t * Math.PI) * Math.min(Math.abs(dy) * 0.25, 25);
cur.style.left = (sx + dx * e) + 'px';
cur.style.top = (sy + dy * e + arc) + 'px';
if (t < 1) requestAnimationFrame(f); else { cx = x; cy = y; res(); }
})(performance.now());
});
}
async function clk() {
cur.classList.add('clicking');
rip.style.left = (cx + 4) + 'px'; rip.style.top = (cy + 4) + 'px';
rip.classList.remove('animate'); void rip.offsetWidth; rip.classList.add('animate');
await W(80); cur.classList.remove('clicking');
}
async function flash() {
const f = $('flash'); f.classList.add('fire'); await W(80); f.classList.remove('fire');
}
function show(id) { $(id).classList.add('visible'); }
function hide(id) { $(id).classList.remove('visible'); }
function tbShow(id) { const el = $(id); el.classList.add('visible'); }
function tbActive(id) {
document.querySelectorAll('.tb-item').forEach(t => t.classList.remove('active'));
if (id) $(id).classList.add('active');
}
function focus(id) {
document.querySelectorAll('.win').forEach(w => { w.classList.remove('focused'); w.style.zIndex = ''; });
if (id) { $(id).classList.add('focused'); $(id).style.zIndex = '10'; }
}
function posAnnot(aid, wid) {
const w = $(wid), a = $(aid);
a.style.left = (parseInt(w.style.left) - 2) + 'px';
a.style.top = (parseInt(w.style.top) + 26) + 'px';
a.style.width = (parseInt(w.style.width) + 4) + 'px';
a.style.height = (parseInt(w.style.height) - 22) + 'px';
}
function hideAnnots() { document.querySelectorAll('.annot').forEach(a => a.classList.remove('visible')); }
function typeEl(el, text, ms = 40) {
return new Promise(async res => {
for (const c of text) { el.textContent += c; await W(ms); }
res();
});
}
function step(type, label) {
const d = document.createElement('div'); d.className = 'step-ind';
d.innerHTML = `<span class="badge ${type}">${type}</span><span class="step-lbl">${label}</span>`;
cb.appendChild(d); void d.offsetWidth; d.classList.add('visible'); cb.scrollTop = cb.scrollHeight;
}
function ln(html) {
const d = document.createElement('div'); d.className = 'cmd-line';
d.innerHTML = `<span class="ps">$ </span>${html}`;
cb.appendChild(d); void d.offsetWidth; d.classList.add('visible'); cb.scrollTop = cb.scrollHeight;
}
function out(html) {
const d = document.createElement('div'); d.className = 'cmd-line';
d.innerHTML = `<span class="o">${html}</span>`;
cb.appendChild(d); void d.offsetWidth; d.classList.add('visible'); cb.scrollTop = cb.scrollHeight;
}
function agentMsg(html) {
const d = document.createElement('div'); d.className = 'cmd-line';
d.innerHTML = `<span class="agent-msg">${html}</span>`;
cb.appendChild(d); void d.offsetWidth; d.classList.add('visible'); cb.scrollTop = cb.scrollHeight;
}
function div() {
const d = document.createElement('div'); d.className = 'cmd-divider';
cb.appendChild(d); void d.offsetWidth; d.classList.add('visible'); cb.scrollTop = cb.scrollHeight;
}
function cm(c, f, s) {
let h = `<span class="c">${c}</span>`;
if (f) h += ` <span class="f">${f}</span>`;
if (s) h += ` <span class="s">${s}</span>`;
return h;
}
async function run() {
$('replay').style.display = 'none';
cb.innerHTML = '';
['w-files','w-chart','w-docs'].forEach(id => { hide(id); $(id).classList.remove('focused'); $(id).style.zIndex = ''; });
document.querySelectorAll('.tb-item').forEach(t => { t.classList.remove('visible','active'); });
hideAnnots();
$('f-notes').classList.remove('selected');
$('f-csv').classList.remove('selected');
$('file-preview').classList.remove('open');
$('doc-title').textContent = '';
$('doc-subtitle').textContent = '';
$('doc-body').textContent = '';
$('doc-chart').classList.remove('visible');
cur.style.left = '380px'; cur.style.top = '260px'; cur.style.opacity = '0';
cx = 380; cy = 260;
await W(500);
cur.style.transition = 'opacity 0.3s'; cur.style.opacity = '1';
await W(400); cur.style.transition = 'none';
// 1: Empty desktop
step('observe', 'Scan desktop');
await W(250);
ln(cm('deskctl snapshot'));
await W(400);
out('<span class="jk">"windows"</span>: <span class="o">[]</span>');
out('<span class="ok">empty desktop</span>');
await W(400); div();
// 2: Launch file manager
step('act', 'Open local files');
await W(250);
ln(cm('deskctl launch', '', 'nautilus ~/reports'));
await W(350);
show('w-files'); focus('w-files');
tbShow('tb-files'); tbActive('tb-files');
await W(300);
out('<span class="ok">launched nautilus (pid 3841)</span>');
await W(300);
step('wait', 'Wait for window');
ln(cm('deskctl wait window', "--selector 'title=Files'", '--timeout 5'));
await W(500);
out('<span class="ok">window ready: "Files ~/reports"</span>');
await W(300); div();
// 3: Read task brief
step('observe', 'Read task brief');
await W(250);
ln(cm('deskctl click', '', "'title=Files'"));
await move(100, 62, 450);
await clk();
$('f-notes').classList.add('selected');
await W(200);
out('<span class="ok">clicked "task_brief.txt"</span>');
await W(200);
ln(cm('deskctl hotkey', '', 'space'));
await W(300);
$('file-preview').classList.add('open');
await W(400);
out('<span class="o">task: Prepare NVDA Q1 earnings summary</span>');
out('<span class="o">source: finance.yahoo.com, local csv</span>');
out('<span class="o">output: Google Docs report with chart</span>');
await W(500); div();
// 4: Launch browser
step('act', 'Research stock data');
await W(250);
ln(cm('deskctl launch', '', 'google-chrome finance.yahoo.com/NVDA'));
await W(400);
show('w-chart'); focus('w-chart');
tbShow('tb-chart'); tbActive('tb-chart');
await W(350);
out('<span class="ok">launched chrome (pid 3912)</span>');
step('wait', 'Wait for page');
ln(cm('deskctl wait window', "--selector 'title=Yahoo'", '--timeout 8'));
await W(600);
out('<span class="ok">window ready: "Yahoo Finance - NVDA"</span>');
await W(300); div();
// 5: Snapshot chart
step('observe', 'Capture chart screenshot');
await W(250);
ln(cm('deskctl snapshot', '--annotate'));
await W(300);
await flash();
posAnnot('a1', 'w-files'); posAnnot('a2', 'w-chart');
show('a1'); show('a2');
await W(200);
out('<span class="jk">"windows"</span>: [');
out('&nbsp;&nbsp;{ <span class="jv">"@w1"</span>: <span class="jv">"Files"</span> }');
out('&nbsp;&nbsp;{ <span class="jv">"@w2"</span>: <span class="jv">"Yahoo Finance"</span> }');
out(']');
out('<span class="ok">screenshot saved: chart_nvda.png</span>');
await W(600);
hideAnnots(); div();
// 6: Open Google Docs
step('act', 'Create report document');
await W(250);
ln(cm('deskctl hotkey', '', 'ctrl t'));
await W(300);
out('<span class="ok">new tab opened</span>');
await W(200);
ln(cm('deskctl type', '', '"docs.google.com/document/new"'));
await W(200);
ln(cm('deskctl press', '', 'enter'));
await W(400);
show('w-docs'); focus('w-docs');
tbShow('tb-docs'); tbActive('tb-docs');
await W(350);
out('<span class="ok">navigated to Google Docs</span>');
step('wait', 'Wait for Docs');
ln(cm('deskctl wait window', "--selector 'title=Google Docs'", '--timeout 8'));
await W(500);
out('<span class="ok">document ready</span>');
await W(300); div();
// 7: Type title
step('act', 'Write report');
await W(250);
await move(310, 140, 450);
await clk();
await W(200);
ln(cm('deskctl type', '', '"NVDA Q1 2025 Earnings Summary"'));
await W(200);
await typeEl($('doc-title'), 'NVDA Q1 2025 Earnings Summary', 35);
out('<span class="ok">typed title</span>');
await W(200);
ln(cm('deskctl press', '', 'enter'));
await W(150);
ln(cm('deskctl type', '', '"Prepared by AI Agent via deskctl"'));
await W(200);
await typeEl($('doc-subtitle'), 'Prepared by AI Agent via deskctl', 28);
await W(200);
ln(cm('deskctl press', '', 'enter enter'));
await W(200); div();
// 8: Type body
step('act', 'Write analysis');
await W(250);
const body = "NVIDIA reported strong Q1 results driven by data center revenue growth of 427% YoY. The stock is up 3.42% today at $924.68. Key drivers include H100/H200 GPU demand from hyperscalers and continued AI infrastructure buildout.";
ln(cm('deskctl type', '', '"NVIDIA reported strong Q1..."'));
await W(200);
await typeEl($('doc-body'), body, 12);
out('<span class="ok">typed analysis (224 chars)</span>');
await W(400); div();
// 9: Paste chart
step('act', 'Insert chart screenshot');
await W(250);
ln(cm('deskctl press', '', 'enter enter'));
await W(200);
ln(cm('deskctl hotkey', '', 'ctrl v'));
await W(400);
$('doc-chart').classList.add('visible');
await W(300);
out('<span class="ok">pasted chart_nvda.png into document</span>');
await W(500); div();
// 10: Final verify
step('verify', 'Verify completed report');
await W(250);
ln(cm('deskctl snapshot', '--annotate'));
await W(300);
await flash();
posAnnot('a1', 'w-files'); posAnnot('a2', 'w-chart'); posAnnot('a3', 'w-docs');
show('a1'); show('a2'); show('a3');
await W(200);
out('<span class="jk">"windows"</span>: [');
out('&nbsp;&nbsp;{ <span class="jv">"@w1"</span>: <span class="jv">"Files"</span>, <span class="jv">"@w2"</span>: <span class="jv">"Yahoo Finance"</span>, <span class="jv">"@w3"</span>: <span class="jv">"Google Docs"</span> }');
out(']');
await W(600);
hideAnnots();
await W(300); div();
// 11: Agent summary (Claude-style)
step('done', 'Task complete');
await W(400);
agentMsg('I\'ve completed the NVDA Q1 earnings report.');
await W(300);
agentMsg('');
await W(100);
agentMsg('Here\'s what I did:');
await W(200);
agentMsg(' - Read task_brief.txt from ~/reports for context');
await W(150);
agentMsg(' - Pulled the NVDA 1Y chart from Yahoo Finance');
await W(150);
agentMsg(' - Created a new Google Doc with title, analysis,');
await W(100);
agentMsg(' and embedded the stock chart screenshot');
await W(300);
agentMsg('');
agentMsg('Document: <span class="link">docs.google.com/d/1xK9m...r4/edit</span>');
// Cursor exits
await W(500);
await move(600, 10, 700);
cur.style.transition = 'opacity 0.5s'; cur.style.opacity = '0';
await W(600);
$('replay').style.display = 'inline-flex';
}
window.addEventListener('load', () => setTimeout(run, 300));
</script>
</body>
</html>

110
docs/releasing.md Normal file
View file

@ -0,0 +1,110 @@
# Releasing deskctl
This document covers the operator flow for shipping `deskctl` across:
- GitHub Releases
- crates.io
- npm
- the repo flake
GitHub Releases are the canonical binary source. The npm package consumes those release assets instead of building a separate binary.
## Package Names
- crate: `deskctl`
- npm package: `deskctl`
- installed command: `deskctl`
## Prerequisites
Before the first live publish on each registry:
- npm ownership for `deskctl`
- crates.io ownership for `deskctl`
- repository secrets:
- `NPM_TOKEN`
- `CARGO_REGISTRY_TOKEN`
These are user-owned prerequisites. The repo can validate and automate the rest, but it cannot create registry ownership for you.
## Normal Release Flow
1. Merge release-ready changes to `main`.
2. Let CI run:
- validation
- integration
- distribution validation
- release asset build
3. Confirm the GitHub Release exists for the version tag and includes:
- `deskctl-linux-x86_64`
- `checksums.txt`
4. Trigger the `Publish Registries` workflow with:
- `tag`
- `publish_npm`
- `publish_crates`
5. Confirm the publish summary for each channel.
## What CI Validates
The repository validates:
- `cargo publish --dry-run --locked`
- npm package metadata and packability
- npm install smoke path on Linux using the packaged `deskctl` command
- repo flake evaluation/build
The repository release workflow:
- builds the Linux release binary
- publishes the canonical GitHub Release asset
- uploads `checksums.txt`
The registry publish jobs (npm and crates.io run in parallel):
- target an existing release tag
- check whether that version is already published on the respective registry
- skip already-published versions
- both default to enabled; can be toggled via workflow_dispatch inputs
## Rerun Safety
Registry publishing is intentionally separate from release asset creation.
If a partial failure happens:
- GitHub Release assets remain the source of truth
- rerun the `Publish Registries` workflow for the same tag
- already-published channels are reported and skipped
- remaining channels can still be published
## Local Validation
Run the distribution checks locally with:
```bash
make cargo-publish-dry-run
make npm-package-check
make nix-flake-check
make dist-validate
```
Notes:
- `make npm-package-check` does a runtime smoke test only on Linux
- `make nix-flake-check` requires a local Nix installation
- Docker remains a local Linux build convenience, not the canonical release path
## Nix Boundary
The repo-owned `flake.nix` is the supported Nix surface in this phase.
In scope:
- `nix run github:harivansh-afk/deskctl`
- `nix profile install github:harivansh-afk/deskctl`
- CI validation for the repo flake
Out of scope for this phase:
- `nixpkgs` upstreaming
- extra distro packaging outside the repo

70
docs/runtime-contract.md Normal file
View file

@ -0,0 +1,70 @@
# deskctl runtime contract
All commands support `--json` and use the same top-level envelope:
```json
{
"success": true,
"data": {},
"error": null
}
```
Use `--json` whenever you need to parse output programmatically.
## Stable window fields
Whenever a response includes a window payload, these fields are stable:
- `ref_id`
- `window_id`
- `title`
- `app_name`
- `x`
- `y`
- `width`
- `height`
- `focused`
- `minimized`
Use `window_id` for stable targeting inside a live daemon session. Use
`ref_id` or `@wN` for short-lived follow-up actions after `snapshot` or
`list-windows`.
## Stable grouped reads
- `deskctl get active-window` -> `data.window`
- `deskctl get monitors` -> `data.count`, `data.monitors`
- `deskctl get version` -> `data.version`, `data.backend`
- `deskctl get systeminfo` -> runtime-scoped diagnostic fields such as
`backend`, `display`, `session_type`, `session`, `socket_path`, `screen`,
`monitor_count`, and `monitors`
## Stable waits
- `deskctl wait window` -> `data.wait`, `data.selector`, `data.elapsed_ms`,
`data.window`
- `deskctl wait focus` -> `data.wait`, `data.selector`, `data.elapsed_ms`,
`data.window`
## Stable structured error kinds
When a command fails with structured JSON data, these `kind` values are stable:
- `selector_not_found`
- `selector_ambiguous`
- `selector_invalid`
- `timeout`
- `not_found`
Wait failures may also include `window_not_focused` in the last observation
payload.
## Best-effort fields
Treat these as useful but non-contractual:
- exact monitor names
- incidental text formatting in non-JSON mode
- default screenshot file names when no explicit path was provided
- environment-dependent ordering details from the window manager

61
flake.lock generated Normal file
View file

@ -0,0 +1,61 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1774386573,
"narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

77
flake.nix Normal file
View file

@ -0,0 +1,77 @@
{
description = "deskctl - Desktop control CLI for AI agents on Linux X11";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs =
{ self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (
system:
let
pkgs = import nixpkgs { inherit system; };
lib = pkgs.lib;
cargoToml = builtins.fromTOML (builtins.readFile ./Cargo.toml);
deskctl =
pkgs.rustPlatform.buildRustPackage {
pname = cargoToml.package.name;
version = cargoToml.package.version;
src = ./.;
cargoLock.lockFile = ./Cargo.lock;
nativeBuildInputs = [ pkgs.pkg-config ];
buildInputs = lib.optionals pkgs.stdenv.isLinux [
pkgs.libx11
pkgs.libxtst
];
doCheck = false;
meta = with lib; {
description = cargoToml.package.description;
homepage = cargoToml.package.homepage;
license = licenses.mit;
mainProgram = "deskctl";
platforms = platforms.linux;
};
};
in
{
formatter = pkgs.nixfmt;
packages = lib.optionalAttrs pkgs.stdenv.isLinux {
inherit deskctl;
default = deskctl;
};
apps = lib.optionalAttrs pkgs.stdenv.isLinux {
default = flake-utils.lib.mkApp { drv = deskctl; };
deskctl = flake-utils.lib.mkApp { drv = deskctl; };
};
checks = lib.optionalAttrs pkgs.stdenv.isLinux {
build = deskctl;
};
devShells.default = pkgs.mkShell {
packages =
[
pkgs.cargo
pkgs.clippy
pkgs.nodejs
pkgs.nixfmt
pkgs.pkg-config
pkgs.pnpm
pkgs.rustc
pkgs.rustfmt
]
++ lib.optionals pkgs.stdenv.isLinux [
pkgs.libx11
pkgs.libxtst
pkgs.xorg.xorgserver
];
};
}
);
}

48
npm/deskctl/README.md Normal file
View file

@ -0,0 +1,48 @@
# deskctl
`deskctl` installs the command for Linux X11 systems.
## Install
```bash
npm install -g deskctl
```
After install, run:
```bash
deskctl --help
```
To upgrade version:
```bash
deskctl upgrade
```
For non-interactive use:
```bash
deskctl upgrade --yes
```
One-shot usage is also supported:
```bash
npx deskctl --help
```
## Runtime Support
- Linux
- X11 session
- currently packaged release asset: `linux-x64`
`deskctl` downloads the matching GitHub Release binary during install.
Unsupported targets fail during install with a clear runtime support error instead of installing a broken command.
If you want the Rust source-install path instead, use:
```bash
cargo install deskctl
```

View file

@ -0,0 +1,36 @@
#!/usr/bin/env node
const fs = require("node:fs");
const { spawn } = require("node:child_process");
const { readPackageJson, releaseTag, supportedTarget, vendorBinaryPath } = require("../scripts/support");
function main() {
const pkg = readPackageJson();
const target = supportedTarget();
const binaryPath = vendorBinaryPath(target);
if (!fs.existsSync(binaryPath)) {
console.error(
[
"deskctl binary is missing from the npm package install.",
`Expected: ${binaryPath}`,
`Package version: ${pkg.version}`,
`Release tag: ${releaseTag(pkg)}`,
"Try reinstalling deskctl or check that your target is supported."
].join("\n")
);
process.exit(1);
}
const child = spawn(binaryPath, process.argv.slice(2), { stdio: "inherit" });
child.on("exit", (code, signal) => {
if (signal) {
process.kill(process.pid, signal);
return;
}
process.exit(code ?? 1);
});
}
main();

36
npm/deskctl/package.json Normal file
View file

@ -0,0 +1,36 @@
{
"name": "deskctl",
"version": "0.1.14",
"description": "Installable deskctl package for Linux X11 agents",
"license": "MIT",
"homepage": "https://github.com/harivansh-afk/deskctl",
"repository": {
"type": "git",
"url": "git+https://github.com/harivansh-afk/deskctl.git"
},
"bugs": {
"url": "https://github.com/harivansh-afk/deskctl/issues"
},
"engines": {
"node": ">=18"
},
"bin": {
"deskctl": "bin/deskctl.js"
},
"files": [
"README.md",
"bin",
"scripts"
],
"scripts": {
"postinstall": "node scripts/postinstall.js",
"validate": "node scripts/validate-package.js"
},
"keywords": [
"deskctl",
"x11",
"desktop",
"automation",
"cli"
]
}

View file

@ -0,0 +1,49 @@
const fs = require("node:fs");
const {
checksumsUrl,
checksumForAsset,
download,
ensureVendorDir,
installLocalBinary,
readPackageJson,
releaseAssetUrl,
releaseTag,
sha256,
supportedTarget,
vendorBinaryPath
} = require("./support");
async function main() {
const pkg = readPackageJson();
const target = supportedTarget();
const targetPath = vendorBinaryPath(target);
ensureVendorDir();
if (process.env.DESKCTL_BINARY_PATH) {
installLocalBinary(process.env.DESKCTL_BINARY_PATH, targetPath);
return;
}
const tag = releaseTag(pkg);
const assetUrl = releaseAssetUrl(tag, target.assetName);
const checksumText = (await download(checksumsUrl(tag))).toString("utf8");
const expectedSha = checksumForAsset(checksumText, target.assetName);
const asset = await download(assetUrl);
const actualSha = sha256(asset);
if (actualSha !== expectedSha) {
throw new Error(
`Checksum mismatch for ${target.assetName}. Expected ${expectedSha}, got ${actualSha}.`
);
}
fs.writeFileSync(targetPath, asset);
fs.chmodSync(targetPath, 0o755);
}
main().catch((error) => {
console.error(`deskctl install failed: ${error.message}`);
process.exit(1);
});

View file

@ -0,0 +1,120 @@
const crypto = require("node:crypto");
const fs = require("node:fs");
const path = require("node:path");
const https = require("node:https");
const PACKAGE_ROOT = path.resolve(__dirname, "..");
const VENDOR_DIR = path.join(PACKAGE_ROOT, "vendor");
const PACKAGE_JSON = path.join(PACKAGE_ROOT, "package.json");
function readPackageJson() {
return JSON.parse(fs.readFileSync(PACKAGE_JSON, "utf8"));
}
function releaseTag(pkg) {
return process.env.DESKCTL_RELEASE_TAG || `v${pkg.version}`;
}
function supportedTarget(platform = process.platform, arch = process.arch) {
if (platform === "linux" && arch === "x64") {
return {
platform,
arch,
assetName: "deskctl-linux-x86_64",
binaryName: "deskctl-linux-x86_64"
};
}
throw new Error(
`deskctl currently supports linux-x64 only. Received ${platform}-${arch}.`
);
}
function vendorBinaryPath(target) {
return path.join(VENDOR_DIR, target.binaryName);
}
function releaseBaseUrl(tag) {
return (
process.env.DESKCTL_RELEASE_BASE_URL ||
`https://github.com/harivansh-afk/deskctl/releases/download/${tag}`
);
}
function releaseAssetUrl(tag, assetName) {
return process.env.DESKCTL_DOWNLOAD_URL || `${releaseBaseUrl(tag)}/${assetName}`;
}
function checksumsUrl(tag) {
return `${releaseBaseUrl(tag)}/checksums.txt`;
}
function ensureVendorDir() {
fs.mkdirSync(VENDOR_DIR, { recursive: true });
}
function checksumForAsset(contents, assetName) {
const line = contents
.split("\n")
.map((value) => value.trim())
.find((value) => value.endsWith(` ${assetName}`) || value.endsWith(` *${assetName}`));
if (!line) {
throw new Error(`Could not find checksum entry for ${assetName}.`);
}
return line.split(/\s+/)[0];
}
function sha256(buffer) {
return crypto.createHash("sha256").update(buffer).digest("hex");
}
function download(url) {
return new Promise((resolve, reject) => {
https
.get(url, (response) => {
if (
response.statusCode &&
response.statusCode >= 300 &&
response.statusCode < 400 &&
response.headers.location
) {
response.resume();
resolve(download(response.headers.location));
return;
}
if (response.statusCode !== 200) {
reject(new Error(`Download failed for ${url}: HTTP ${response.statusCode}`));
return;
}
const chunks = [];
response.on("data", (chunk) => chunks.push(chunk));
response.on("end", () => resolve(Buffer.concat(chunks)));
})
.on("error", reject);
});
}
function installLocalBinary(sourcePath, targetPath) {
fs.copyFileSync(sourcePath, targetPath);
fs.chmodSync(targetPath, 0o755);
}
module.exports = {
PACKAGE_ROOT,
VENDOR_DIR,
checksumsUrl,
checksumForAsset,
download,
ensureVendorDir,
installLocalBinary,
readPackageJson,
releaseAssetUrl,
releaseTag,
sha256,
supportedTarget,
vendorBinaryPath
};

View file

@ -0,0 +1,40 @@
const fs = require("node:fs");
const path = require("node:path");
const { readPackageJson, supportedTarget, vendorBinaryPath } = require("./support");
function readCargoVersion() {
const cargoToml = fs.readFileSync(
path.resolve(__dirname, "..", "..", "..", "Cargo.toml"),
"utf8"
);
const match = cargoToml.match(/^version = "([^"]+)"/m);
if (!match) {
throw new Error("Could not determine Cargo.toml version.");
}
return match[1];
}
function main() {
const pkg = readPackageJson();
const cargoVersion = readCargoVersion();
if (pkg.version !== cargoVersion) {
throw new Error(
`Version mismatch: npm package is ${pkg.version}, Cargo.toml is ${cargoVersion}.`
);
}
if (pkg.bin?.deskctl !== "bin/deskctl.js") {
throw new Error("deskctl must expose the deskctl bin entrypoint.");
}
const target = supportedTarget("linux", "x64");
const targetPath = vendorBinaryPath(target);
const vendorDir = path.dirname(targetPath);
if (!vendorDir.endsWith(path.join("deskctl", "vendor"))) {
throw new Error("Vendor binary directory resolved unexpectedly.");
}
}
main();

View file

@ -30,7 +30,7 @@ function formatTocText(text: string): string {
<body>
{
!isIndex && (
<nav>
<nav class="breadcrumbs">
<a class="title" href="/">
deskctl
</a>

View file

@ -1,78 +0,0 @@
---
layout: ../layouts/DocLayout.astro
title: Architecture
toc: true
---
# Architecture
## Client-daemon model
deskctl uses a client-daemon architecture over Unix sockets. The daemon starts automatically on the first command and keeps the X11 connection alive so repeated calls skip the connection setup overhead.
Each command opens a new connection to the daemon, sends a single NDJSON request, reads one NDJSON response, and exits.
## Wire protocol
Requests and responses are newline-delimited JSON (NDJSON) over a Unix socket.
**Request:**
```json
{ "id": "r123456", "action": "snapshot", "annotate": true }
```
**Response:**
```json
{"success": true, "data": {"screenshot": "/tmp/deskctl-1234567890.png", "windows": [...]}}
```
Error responses include an `error` field:
```json
{ "success": false, "error": "window not found: @w99" }
```
## Socket location
The daemon socket is resolved in this order:
1. `--socket` flag (highest priority)
2. `$DESKCTL_SOCKET_DIR/{session}.sock`
3. `$XDG_RUNTIME_DIR/deskctl/{session}.sock`
4. `~/.deskctl/{session}.sock`
PID files are stored alongside the socket.
## Sessions
Multiple isolated daemon instances can run simultaneously using the `--session` flag:
```sh
deskctl --session workspace1 snapshot
deskctl --session workspace2 snapshot
```
Each session has its own socket, PID file, and window ref map.
## Backend design
The core is built around a `DesktopBackend` trait. The current implementation uses `x11rb` for X11 protocol operations and `enigo` for input simulation.
The trait-based design means adding Wayland support is a single trait implementation with no changes to the core, CLI, or daemon code.
## X11 integration
Window detection uses EWMH properties:
| Property | Purpose |
| --------------------------- | ------------------------ |
| `_NET_CLIENT_LIST_STACKING` | Window stacking order |
| `_NET_ACTIVE_WINDOW` | Currently focused window |
| `_NET_WM_NAME` | Window title (UTF-8) |
| `_NET_WM_STATE_HIDDEN` | Minimized state |
| `_NET_CLOSE_WINDOW` | Graceful close |
| `WM_CLASS` | Application class/name |
Falls back to `XQueryTree` if `_NET_CLIENT_LIST_STACKING` is unavailable.

View file

@ -6,167 +6,102 @@ toc: true
# Commands
## Snapshot
The public CLI is intentionally small. Most workflows boil down to grouped
reads, grouped waits, selector-driven actions, and a few input primitives.
Capture a screenshot and get the window tree:
## Observe and inspect
```sh
deskctl doctor
deskctl upgrade
deskctl snapshot
deskctl snapshot --annotate
```
With `--annotate`, colored bounding boxes and `@wN` labels are drawn on the screenshot. Each window gets a unique color from an 8-color palette. Minimized windows are skipped.
The screenshot is saved to `/tmp/deskctl-{timestamp}.png`.
## Click
Click the center of a window by ref, or click exact coordinates:
```sh
deskctl click @w1
deskctl click 960,540
```
## Double click
```sh
deskctl dblclick @w1
deskctl dblclick 500,300
```
## Type
Type a string into the focused window:
```sh
deskctl type "hello world"
```
## Press
Press a single key:
```sh
deskctl press enter
deskctl press tab
deskctl press escape
```
Supported key names: `enter`, `tab`, `escape`, `backspace`, `delete`, `space`, `up`, `down`, `left`, `right`, `home`, `end`, `pageup`, `pagedown`, `f1`-`f12`, or any single character.
## Hotkey
Send a key combination. List modifier keys first, then the target key:
```sh
deskctl hotkey ctrl c
deskctl hotkey ctrl shift t
deskctl hotkey alt f4
```
Modifier names: `ctrl`, `alt`, `shift`, `super` (also `meta` or `win`).
## Mouse move
Move the cursor to absolute coordinates:
```sh
deskctl mouse move 100 200
```
## Mouse scroll
Scroll the mouse wheel. Positive values scroll down, negative scroll up:
```sh
deskctl mouse scroll 3
deskctl mouse scroll -5
deskctl mouse scroll 3 --axis horizontal
```
## Mouse drag
Drag from one position to another:
```sh
deskctl mouse drag 100 200 500 600
```
## Focus
Focus a window by ref or by name (case-insensitive substring match):
```sh
deskctl focus @w1
deskctl focus "firefox"
```
## Close
Close a window gracefully:
```sh
deskctl close @w2
deskctl close "terminal"
```
## Move window
Move a window to an absolute position:
```sh
deskctl move-window @w1 0 0
deskctl move-window "firefox" 100 100
```
## Resize window
Resize a window:
```sh
deskctl resize-window @w1 1280 720
```
## List windows
List all windows without taking a screenshot:
```sh
deskctl list-windows
```
## Get screen size
```sh
deskctl screenshot
deskctl screenshot /tmp/screen.png
deskctl get active-window
deskctl get monitors
deskctl get version
deskctl get systeminfo
deskctl get-screen-size
```
## Get mouse position
```sh
deskctl get-mouse-position
```
## Screenshot
`doctor` checks the runtime before daemon startup. `upgrade` checks for a newer
published version, shows a short confirmation prompt when an update is
available, and supports `--yes` for non-interactive use. `snapshot` produces a
screenshot plus window refs. `list-windows` is the same window tree without the
side effect of writing a screenshot. The grouped `get` commands are the
preferred read surface for focused state queries.
Take a screenshot without the window tree. Optionally specify a save path:
## Wait for state transitions
```sh
deskctl screenshot
deskctl screenshot /tmp/my-screenshot.png
deskctl screenshot --annotate
deskctl wait window --selector 'title=Chromium' --timeout 10
deskctl wait focus --selector 'id=win3' --timeout 5
deskctl --json wait window --selector 'class=chromium' --poll-ms 100
```
## Launch
Wait commands return the matched window payload on success. In `--json` mode,
timeouts and selector failures expose structured `kind` values.
Launch an application:
## Act on windows
```sh
deskctl launch firefox
deskctl launch code --args /path/to/project
deskctl launch chromium
deskctl focus @w1
deskctl focus 'title=Chromium'
deskctl click @w1
deskctl click 960,540
deskctl dblclick @w2
deskctl close @w3
deskctl move-window @w1 100 120
deskctl resize-window @w1 1280 720
```
Selector-driven actions accept refs, explicit selector modes, or absolute
coordinates where appropriate.
## Keyboard and mouse input
```sh
deskctl type "hello world"
deskctl press enter
deskctl hotkey ctrl shift t
deskctl mouse move 100 200
deskctl mouse scroll 3
deskctl mouse scroll 3 --axis horizontal
deskctl mouse drag 100 200 500 600
```
Supported key names include `enter`, `tab`, `escape`, `backspace`, `delete`,
`space`, arrow keys, paging keys, `f1` through `f12`, and any single
character.
## Selectors
Prefer explicit selectors when the target matters. They are clearer in logs,
more deterministic for automation, and easier to retry safely.
```sh
ref=w1
id=win1
title=Chromium
class=chromium
focused
```
Legacy shorthand is still supported:
```sh
@w1
w1
win1
```
Bare strings like `chromium` are fuzzy matches. They resolve when there is one
match and fail with candidate windows when there are multiple matches.
## Global options
| Flag | Env | Description |
@ -174,3 +109,6 @@ deskctl launch code --args /path/to/project
| `--json` | | Output as JSON |
| `--socket <path>` | `DESKCTL_SOCKET` | Path to daemon Unix socket |
| `--session <name>` | | Session name for multiple daemons (default: `default`) |
`deskctl` manages the daemon automatically. Most users never need to think
about it beyond `--session` and `--socket`.

View file

@ -8,24 +8,33 @@ import DocLayout from "../layouts/DocLayout.astro";
<img src="/favicon.svg" alt="" width="40" height="40" />
</header>
<p>
Desktop control CLI for AI agents on Linux X11. Compact JSON output for
agent loops. Screenshot, click, type, scroll, drag, and manage windows
through a fast client-daemon architecture. 100% native Rust.
<p class="tagline">non-interactive desktop control cli for AI agents</p>
<p class="lede">
A thin X11 control primitive for agent loops: diagnose the runtime, observe
the desktop, wait for state transitions, act deterministically, then verify.
</p>
<h2>Getting started</h2>
<h2>Start</h2>
<ul>
<li><a href="/installation">Installation</a></li>
<li><a href="/quick-start">Quick start</a></li>
<li>
<a href="/installation">Installation</a>
</li>
<li>
<a href="/quick-start">Quick start</a>
</li>
</ul>
<h2>Reference</h2>
<ul>
<li><a href="/commands">Commands</a></li>
<li><a href="/architecture">Architecture</a></li>
<li>
<a href="/commands">Commands</a>
</li>
<li>
<a href="/runtime-contract">Runtime contract</a>
</li>
</ul>
<h2>Links</h2>
@ -37,5 +46,8 @@ import DocLayout from "../layouts/DocLayout.astro";
<li>
<a href="https://crates.io/crates/deskctl">crates.io</a>
</li>
<li>
<a href="https://www.npmjs.com/package/deskctl">npm</a>
</li>
</ul>
</DocLayout>

View file

@ -6,43 +6,71 @@ toc: true
# Installation
## Cargo
Install the public `deskctl` command first, then validate the desktop runtime
with `deskctl doctor` before trying to automate anything.
## Recommended path
```sh
cargo install deskctl
npm install -g deskctl
deskctl doctor
```
## From source
`deskctl` is the default install path. It installs the command by
downloading the matching GitHub Release asset for the supported runtime target.
This path does not require a Rust toolchain. The installed command is always
`deskctl`, even though the release asset itself is target-specific.
## Skill install
The repo skill lives under `skills/deskctl`, so you can install it
directly uring `skills.sh`
```sh
npx skills add harivansh-afk/deskctl
```
## Other install paths
### Nix
```sh
nix run github:harivansh-afk/deskctl -- --help
nix profile install github:harivansh-afk/deskctl
```
### Rust
```sh
git clone https://github.com/harivansh-afk/deskctl
cd deskctl
cargo build --release
cargo build
```
## Docker (cross-compile for Linux)
Source builds on Linux require:
Build a static Linux binary from any platform:
- Rust 1.75+
- `pkg-config`
- X11 development libraries such as `libx11-dev` and `libxtst-dev`
```sh
docker compose -f docker/docker-compose.yml run --rm build
```
This writes `dist/deskctl-linux-x86_64`.
## Deploy to a remote machine
Copy the binary over SSH when `scp` is not available:
```sh
ssh -p 443 user@host 'cat > ~/deskctl && chmod +x ~/deskctl' < dist/deskctl-linux-x86_64
```
## Requirements
## Runtime requirements
- Linux with an active X11 session
- `DISPLAY` environment variable set (e.g. `DISPLAY=:1`)
- `XDG_SESSION_TYPE=x11`
- A window manager that exposes EWMH properties (`_NET_CLIENT_LIST_STACKING`, `_NET_ACTIVE_WINDOW`)
- `DISPLAY` set to a usable X11 display, such as `DISPLAY=:1`
- `XDG_SESSION_TYPE=x11` or an equivalent X11 session environment
- a window manager or desktop environment that exposes standard EWMH properties
such as `_NET_CLIENT_LIST_STACKING` and `_NET_ACTIVE_WINDOW`
No extra native libraries are needed beyond the standard glibc runtime (`libc`, `libm`, `libgcc_s`).
The binary itself only depends on the standard Linux glibc runtime.
## Verification
If setup fails for any reason start here:
```sh
deskctl doctor
```
`doctor` checks X11 connectivity, window enumeration, screenshot viability, and
daemon/socket health before normal command execution.

View file

@ -6,50 +6,74 @@ toc: true
# Quick start
## Core workflow
The fastest way to use `deskctl` is to follow the same four-step loop : observe, wait, act, verify.
The typical agent loop is: snapshot the desktop, interpret the result, act on it.
## 1. Install and diagnose
```sh
# 1. see the desktop
deskctl --json snapshot --annotate
npm install -g deskctl
deskctl doctor
```
# 2. click a window by its ref
deskctl click @w1
Run `deskctl doctor` first. It checks X11 connectivity, basic enumeration,
screenshot viability, and socket health before you start driving the desktop.
# 3. type into the focused window
deskctl type "hello world"
## 2. Observe the desktop
# 4. press a key
```sh
deskctl snapshot --annotate
deskctl list-windows
deskctl get active-window
deskctl get monitors
```
Use `snapshot` when you want a screenshot artifact plus window refs. Use
`list-windows` when you only need the current window tree without writing a
screenshot.
## 3. Pick selectors that stay readable
Prefer explicit selectors when you need deterministic targeting:
```sh
ref=w1
id=win1
title=Chromium
class=chromium
focused
```
Legacy refs such as `@w1` still work after `snapshot` or `list-windows`. Bare
strings like `chromium` are fuzzy matches and now fail on ambiguity.
## 4. Wait, act, verify
The core loop is:
```sh
# observe
deskctl snapshot --annotate
# wait
deskctl wait window --selector 'title=Chromium' --timeout 10
# act
deskctl focus 'title=Chromium'
deskctl hotkey ctrl l
deskctl type "https://example.com"
deskctl press enter
# verify
deskctl wait focus --selector 'title=Chromium' --timeout 5
deskctl snapshot
```
The `--annotate` flag draws colored bounding boxes and `@wN` labels on the screenshot so agents can visually identify windows.
The wait commands return the matched window payload on success, so they compose
cleanly into the next action.
## Window refs
## 5. Use `--json` when parsing matters
Every `snapshot` assigns refs like `@w1`, `@w2`, etc. to each visible window, ordered top-to-bottom by stacking order. Use these refs anywhere a selector is expected:
```sh
deskctl click @w1
deskctl focus @w3
deskctl close @w2
```
You can also select windows by name (case-insensitive substring match):
```sh
deskctl focus "firefox"
deskctl close "terminal"
```
## JSON output
Pass `--json` for machine-readable output. This is the primary mode for agent integrations:
```sh
deskctl --json snapshot
```
Every command supports `--json` and uses the same top-level envelope:
```json
{
@ -59,9 +83,9 @@ deskctl --json snapshot
"windows": [
{
"ref_id": "w1",
"xcb_id": 12345678,
"title": "Firefox",
"app_name": "firefox",
"window_id": "win1",
"title": "Chromium",
"app_name": "chromium",
"x": 0,
"y": 0,
"width": 1920,
@ -74,14 +98,8 @@ deskctl --json snapshot
}
```
## Daemon lifecycle
Use `window_id` for stable targeting inside a live daemon session. The exact
text formatting is intentionally compact, but JSON is the parsing contract.
The daemon starts automatically on the first command. It keeps the X11 connection alive so repeated calls are fast. You do not need to manage it manually.
```sh
# check if the daemon is running
deskctl daemon status
# stop it explicitly
deskctl daemon stop
```
The full stable-vs-best-effort contract lives on the
[runtime contract](/runtime-contract) page.

View file

@ -0,0 +1,177 @@
---
layout: ../layouts/DocLayout.astro
title: Runtime contract
toc: true
---
# Runtime contract
This page defines the current public output contract for `deskctl`.
It is intentionally scoped to the current Linux X11 runtime surface. It does
not promise stability for future Wayland or window-manager-specific features.
## Stable top-level envelope
Every command supports `--json` and uses the same top-level envelope:
```json
{
"success": true,
"data": {},
"error": null
}
```
Stable top-level fields:
- `success`
- `data`
- `error`
If `success` is `false`, the command exits non-zero in both text mode and JSON
mode.
## Stable window payload
Whenever a response includes a window payload, these fields are stable:
- `ref_id`
- `window_id`
- `title`
- `app_name`
- `x`
- `y`
- `width`
- `height`
- `focused`
- `minimized`
`window_id` is the public session-scoped identifier for programmatic targeting.
`ref_id` is a short-lived convenience handle from the current ref map.
## Stable grouped reads
`deskctl get active-window`
- stable: `data.window`
`deskctl get monitors`
- stable: `data.count`
- stable: `data.monitors`
Stable per-monitor fields:
- `name`
- `x`
- `y`
- `width`
- `height`
- `width_mm`
- `height_mm`
- `primary`
- `automatic`
`deskctl get version`
- stable: `data.version`
- stable: `data.backend`
`deskctl get systeminfo`
- stable: `data.backend`
- stable: `data.display`
- stable: `data.session_type`
- stable: `data.session`
- stable: `data.socket_path`
- stable: `data.screen`
- stable: `data.monitor_count`
- stable: `data.monitors`
## Stable waits
`deskctl wait window`
`deskctl wait focus`
- stable: `data.wait`
- stable: `data.selector`
- stable: `data.elapsed_ms`
- stable: `data.window`
## Stable selector-driven action fields
When selector-driven actions return resolved window data, these fields are
stable when present:
- `data.ref_id`
- `data.window_id`
- `data.title`
- `data.selector`
This applies to:
- `click`
- `dblclick`
- `focus`
- `close`
- `move-window`
- `resize-window`
## Stable artifact fields
For `snapshot` and `screenshot`:
- stable: `data.screenshot`
When a command also returns windows, `data.windows` uses the stable window
payload documented above.
## Stable structured error kinds
When a command fails with structured JSON data, these error kinds are stable:
- `selector_not_found`
- `selector_ambiguous`
- `selector_invalid`
- `timeout`
- `not_found`
- `window_not_focused` in `data.last_observation.kind` or an equivalent wait
observation payload
Stable structured failure fields include:
- `data.kind`
- `data.selector`
- `data.mode`
- `data.candidates`
- `data.message`
- `data.wait`
- `data.timeout_ms`
- `data.poll_ms`
- `data.last_observation`
## Best-effort fields
These values are useful but environment-dependent and should not be treated as
strict parsing guarantees:
- exact monitor naming conventions
- EWMH/window-manager-dependent ordering details
- cosmetic text formatting in non-JSON mode
- default screenshot file names when no explicit path was provided
- stderr wording outside the structured `kind` classifications above
## Text mode expectations
Text mode is intended to stay compact and follow-up-useful.
The exact whitespace and alignment are not stable. The stable behavioral
expectations are:
- important reads print actionable identifiers or geometry
- selector failures print enough detail to recover without `--json`
- artifact-producing commands print the artifact path
- window listings print both `@wN` refs and `window_id` values
If you need strict parsing, use `--json`.

View file

@ -65,6 +65,11 @@ main {
font-style: italic;
}
.lede {
font-size: 1.05rem;
max-width: 42rem;
}
header {
display: flex;
align-items: center;
@ -117,6 +122,10 @@ a:hover {
text-decoration-thickness: 2px;
}
img {
max-width: 100%;
}
ul,
ol {
padding-left: 1.25em;
@ -215,30 +224,30 @@ hr {
}
}
nav {
.breadcrumbs {
max-width: 50rem;
margin: 0 auto;
padding: 1.5rem clamp(1.25rem, 5vw, 3rem) 0;
font-size: 0.9rem;
}
nav a {
.breadcrumbs a {
color: inherit;
text-decoration: none;
opacity: 0.6;
transition: opacity 0.15s;
}
nav a:hover {
.breadcrumbs a:hover {
opacity: 1;
}
nav .title {
.breadcrumbs .title {
font-weight: 500;
opacity: 1;
}
nav .sep {
.breadcrumbs .sep {
opacity: 0.3;
margin: 0 0.5em;
}

View file

@ -1,117 +0,0 @@
---
name: deskctl
description: Desktop control CLI for AI agents
allowed-tools: Bash(deskctl:*)
---
# deskctl
Desktop control CLI for AI agents on Linux X11. Provides a unified interface for screenshots, mouse/keyboard input, and window management with compact `@wN` window references.
## Core Workflow
1. **Snapshot** to see the desktop and get window refs
2. **Act** using refs or coordinates (click, type, focus)
3. **Repeat** as needed
## Quick Reference
### See the Desktop
```bash
deskctl snapshot # Screenshot + window tree with @wN refs
deskctl snapshot --annotate # Screenshot with bounding boxes and labels
deskctl snapshot --json # Structured JSON output
deskctl list-windows # Window tree without screenshot
deskctl screenshot /tmp/s.png # Screenshot only (no window tree)
```
### Click and Type
```bash
deskctl click @w1 # Click center of window @w1
deskctl click 500,300 # Click absolute coordinates
deskctl dblclick @w2 # Double-click window @w2
deskctl type "hello world" # Type text into focused window
deskctl press enter # Press a key
deskctl hotkey ctrl c # Send Ctrl+C
deskctl hotkey ctrl shift t # Send Ctrl+Shift+T
```
### Mouse Control
```bash
deskctl mouse move 500 300 # Move cursor to coordinates
deskctl mouse scroll 3 # Scroll down 3 units
deskctl mouse scroll -3 # Scroll up 3 units
deskctl mouse drag 100 100 500 500 # Drag from (100,100) to (500,500)
```
### Window Management
```bash
deskctl focus @w2 # Focus window by ref
deskctl focus "firefox" # Focus window by name (substring match)
deskctl close @w3 # Close window gracefully
deskctl move-window @w1 100 200 # Move window to position
deskctl resize-window @w1 800 600 # Resize window
```
### Utilities
```bash
deskctl doctor # Diagnose X11, screenshot, and daemon health
deskctl get-screen-size # Screen resolution
deskctl get-mouse-position # Current cursor position
deskctl launch firefox # Launch an application
deskctl launch code -- --new-window # Launch with arguments
```
### Daemon
```bash
deskctl daemon start # Start daemon manually
deskctl daemon stop # Stop daemon
deskctl daemon status # Check daemon status
```
## Global Options
- `--json` : Output as structured JSON (all commands)
- `--session NAME` : Session name for multiple daemon instances (default: "default")
- `--socket PATH` : Custom Unix socket path
## Window Refs
After `snapshot` or `list-windows`, windows are assigned short refs:
- `@w1` is the topmost (usually focused) window
- `@w2`, `@w3`, etc. follow z-order (front to back)
- Refs reset on each `snapshot` call
- Use `--json` to see stable `window_id` values for programmatic tracking within the current daemon session
## Example Agent Workflow
```bash
# 1. See what's on screen
deskctl snapshot --annotate
# 2. Focus the browser
deskctl focus "firefox"
# 3. Navigate to a URL
deskctl hotkey ctrl l
deskctl type "https://example.com"
deskctl press enter
# 4. Take a new snapshot to see the result
deskctl snapshot
```
## Key Names for press/hotkey
Modifiers: `ctrl`, `alt`, `shift`, `super`
Navigation: `enter`, `tab`, `escape`, `backspace`, `delete`, `space`
Arrows: `up`, `down`, `left`, `right`
Page: `home`, `end`, `pageup`, `pagedown`
Function: `f1` through `f12`
Characters: any single character (e.g. `a`, `1`, `/`)

60
skills/deskctl/SKILL.md Normal file
View file

@ -0,0 +1,60 @@
---
name: deskctl
description: Non-interactive X11 desktop control for AI agents. Use when the task involves controlling a Linux desktop - clicking, typing, reading windows, waiting for UI state, or taking screenshots inside a sandbox or VM.
allowed-tools: Bash(deskctl:*), Bash(npx deskctl:*), Bash(npm:*), Bash(which:*), Bash(printenv:*), Bash(echo:*)
---
# deskctl
Non-interactive desktop control CLI for Linux X11 agents.
All output follows the runtime contract defined in [references/runtime-contract.md](references/runtime-contract.md). Every command returns a stable JSON envelope when called with `--json`. Use `--json` whenever you need to parse output programmatically.
## Quick start
```bash
npm install -g deskctl
deskctl doctor
deskctl snapshot --annotate
```
If `deskctl` was installed through npm, refresh it later with:
```bash
deskctl upgrade --yes
```
## Agent loop
Every desktop interaction follows: **observe -> wait -> act -> verify**.
```bash
deskctl snapshot --annotate # observe
deskctl wait window --selector 'title=Chromium' --timeout 10 # wait
deskctl click 'title=Chromium' # act
deskctl snapshot # verify
```
See [workflows/observe-act.sh](workflows/observe-act.sh) for a reusable script. See [workflows/poll-condition.sh](workflows/poll-condition.sh) for polling loops.
## Selectors
```bash
ref=w1 # snapshot ref (short-lived)
id=win1 # stable window ID (session-scoped)
title=Chromium # match by title
class=chromium # match by WM class
focused # currently focused window
```
Bare strings like `chromium` do fuzzy matching but fail on ambiguity. Prefer explicit selectors.
## References
- [references/runtime-contract.md](references/runtime-contract.md) - output contract, stable fields, error kinds
- [references/commands.md](references/commands.md) - all available commands
## Workflows
- [workflows/observe-act.sh](workflows/observe-act.sh) - main observe-act loop
- [workflows/poll-condition.sh](workflows/poll-condition.sh) - poll for a condition on screen

View file

@ -0,0 +1,7 @@
interface:
display_name: "deskctl"
short_description: "Control Linux X11 desktops from agent loops"
default_prompt: "Use $deskctl to diagnose the desktop, observe state, wait for UI changes, act deterministically, and verify the result."
policy:
allow_implicit_invocation: true

View file

@ -0,0 +1,66 @@
# deskctl commands
All commands support `--json` for machine-parseable output following the
runtime contract.
## Observe
```bash
deskctl doctor
deskctl upgrade
deskctl snapshot
deskctl snapshot --annotate
deskctl list-windows
deskctl screenshot /tmp/screen.png
deskctl get active-window
deskctl get monitors
deskctl get version
deskctl get systeminfo
deskctl get-screen-size
deskctl get-mouse-position
```
## Wait
```bash
deskctl wait window --selector 'title=Chromium' --timeout 10
deskctl wait focus --selector 'class=chromium' --timeout 5
```
Returns the matched window payload on success. Failures include structured
`kind` values in `--json` mode.
## Selectors
```bash
ref=w1
id=win1
title=Chromium
class=chromium
focused
```
Legacy shorthand: `@w1`, `w1`, `win1`. Bare strings do fuzzy matching but fail
on ambiguity.
## Act
```bash
deskctl focus 'class=chromium'
deskctl click @w1
deskctl dblclick @w2
deskctl type "hello world"
deskctl press enter
deskctl hotkey ctrl shift t
deskctl mouse move 500 300
deskctl mouse scroll 3
deskctl mouse scroll 3 --axis horizontal
deskctl mouse drag 100 100 500 500
deskctl move-window @w1 100 120
deskctl resize-window @w1 1280 720
deskctl close @w3
deskctl launch chromium
```
The daemon starts automatically on first command. In normal usage you should
not need to manage it directly.

View file

@ -0,0 +1,73 @@
# deskctl runtime contract
This copy ships inside the installable skill so `npx skills add ...` installs a
self-contained reference bundle.
All commands support `--json` and use the same top-level envelope:
```json
{
"success": true,
"data": {},
"error": null
}
```
Use `--json` whenever you need to parse output programmatically.
## Stable window fields
Whenever a response includes a window payload, these fields are stable:
- `ref_id`
- `window_id`
- `title`
- `app_name`
- `x`
- `y`
- `width`
- `height`
- `focused`
- `minimized`
Use `window_id` for stable targeting inside a live daemon session. Use
`ref_id` or `@wN` for short-lived follow-up actions after `snapshot` or
`list-windows`.
## Stable grouped reads
- `deskctl get active-window` -> `data.window`
- `deskctl get monitors` -> `data.count`, `data.monitors`
- `deskctl get version` -> `data.version`, `data.backend`
- `deskctl get systeminfo` -> runtime-scoped diagnostic fields such as
`backend`, `display`, `session_type`, `session`, `socket_path`, `screen`,
`monitor_count`, and `monitors`
## Stable waits
- `deskctl wait window` -> `data.wait`, `data.selector`, `data.elapsed_ms`,
`data.window`
- `deskctl wait focus` -> `data.wait`, `data.selector`, `data.elapsed_ms`,
`data.window`
## Stable structured error kinds
When a command fails with structured JSON data, these `kind` values are stable:
- `selector_not_found`
- `selector_ambiguous`
- `selector_invalid`
- `timeout`
- `not_found`
Wait failures may also include `window_not_focused` in the last observation
payload.
## Best-effort fields
Treat these as useful but non-contractual:
- exact monitor names
- incidental text formatting in non-JSON mode
- default screenshot file names when no explicit path was provided
- environment-dependent ordering details from the window manager

View file

@ -0,0 +1,37 @@
#!/usr/bin/env bash
# observe-act.sh - main desktop interaction loop
# usage: ./observe-act.sh <selector> [action] [action-args...]
# example: ./observe-act.sh 'title=Chromium' click
# example: ./observe-act.sh 'class=terminal' type "ls -la"
set -euo pipefail
SELECTOR="${1:?usage: observe-act.sh <selector> [action] [action-args...]}"
ACTION="${2:-click}"
shift 2 2>/dev/null || true
# 1. observe - snapshot the desktop, get current state
echo "--- observe ---"
deskctl snapshot --annotate --json | head -1
deskctl get active-window
# 2. wait - ensure target exists
echo "--- wait ---"
deskctl wait window --selector "$SELECTOR" --timeout 10
# 3. act - perform the action on the target
echo "--- act ---"
case "$ACTION" in
click) deskctl click "$SELECTOR" ;;
dblclick) deskctl dblclick "$SELECTOR" ;;
focus) deskctl focus "$SELECTOR" ;;
type) deskctl focus "$SELECTOR" && deskctl type "$@" ;;
press) deskctl focus "$SELECTOR" && deskctl press "$@" ;;
hotkey) deskctl focus "$SELECTOR" && deskctl hotkey "$@" ;;
close) deskctl close "$SELECTOR" ;;
*) echo "unknown action: $ACTION"; exit 1 ;;
esac
# 4. verify - snapshot again to confirm result
echo "--- verify ---"
sleep 0.5
deskctl snapshot --json | head -1

View file

@ -0,0 +1,42 @@
#!/usr/bin/env bash
# poll-condition.sh - poll the desktop until a condition is met
# usage: ./poll-condition.sh <match-string> [interval-seconds] [max-attempts]
# example: ./poll-condition.sh "Tickets Available" 5 60
# example: ./poll-condition.sh "Order Confirmed" 3 20
# example: ./poll-condition.sh "Download Complete" 10 30
#
# checks window titles for the match string every N seconds.
# exits 0 when found, exits 1 after max attempts.
set -euo pipefail
MATCH="${1:?usage: poll-condition.sh <match-string> [interval] [max-attempts]}"
INTERVAL="${2:-5}"
MAX="${3:-60}"
attempt=0
while [ "$attempt" -lt "$MAX" ]; do
attempt=$((attempt + 1))
# snapshot and check window titles
windows=$(deskctl list-windows --json 2>/dev/null || echo '{"success":false}')
if echo "$windows" | grep -qi "$MATCH"; then
echo "FOUND: '$MATCH' detected on attempt $attempt"
deskctl snapshot --annotate
exit 0
fi
# also check screenshot text via active window title
active=$(deskctl get active-window --json 2>/dev/null || echo '{}')
if echo "$active" | grep -qi "$MATCH"; then
echo "FOUND: '$MATCH' in active window on attempt $attempt"
deskctl snapshot --annotate
exit 0
fi
echo "attempt $attempt/$MAX - '$MATCH' not found, waiting ${INTERVAL}s..."
sleep "$INTERVAL"
done
echo "NOT FOUND: '$MATCH' after $MAX attempts"
deskctl snapshot --annotate
exit 1

View file

@ -17,11 +17,30 @@ pub struct BackendWindow {
pub minimized: bool,
}
#[derive(Debug, Clone)]
pub struct BackendMonitor {
pub name: String,
pub x: i32,
pub y: i32,
pub width: u32,
pub height: u32,
pub width_mm: u32,
pub height_mm: u32,
pub primary: bool,
pub automatic: bool,
}
#[allow(dead_code)]
pub trait DesktopBackend: Send {
/// Collect z-ordered windows for read-only queries and targeting.
fn list_windows(&mut self) -> Result<Vec<BackendWindow>>;
/// Get the currently focused window, if one is known.
fn active_window(&mut self) -> Result<Option<BackendWindow>>;
/// Collect monitor geometry and metadata.
fn list_monitors(&self) -> Result<Vec<BackendMonitor>>;
/// Capture the current desktop image without writing it to disk.
fn capture_screenshot(&mut self) -> Result<RgbaImage>;
@ -69,4 +88,7 @@ pub trait DesktopBackend: Send {
/// Launch an application.
fn launch(&self, command: &str, args: &[String]) -> Result<u32>;
/// Human-readable backend name for diagnostics and runtime queries.
fn backend_name(&self) -> &'static str;
}

View file

@ -2,6 +2,7 @@ use anyhow::{Context, Result};
use enigo::{Axis, Button, Coordinate, Direction, Enigo, Key, Keyboard, Mouse, Settings};
use image::RgbaImage;
use x11rb::connection::Connection;
use x11rb::protocol::randr::ConnectionExt as RandrConnectionExt;
use x11rb::protocol::xproto::{
Atom, AtomEnum, ClientMessageData, ClientMessageEvent, ConfigureWindowAux,
ConnectionExt as XprotoConnectionExt, EventMask, GetPropertyReply, ImageFormat, ImageOrder,
@ -9,7 +10,7 @@ use x11rb::protocol::xproto::{
};
use x11rb::rust_connection::RustConnection;
use crate::backend::BackendWindow;
use crate::backend::{BackendMonitor, BackendWindow};
struct Atoms {
client_list_stacking: Atom,
@ -103,6 +104,74 @@ impl X11Backend {
Ok(window_infos)
}
fn active_window_info(&self) -> Result<Option<BackendWindow>> {
let Some(active_window) = self.active_window()? else {
return Ok(None);
};
let title = self.window_title(active_window).unwrap_or_default();
let app_name = self.window_app_name(active_window).unwrap_or_default();
if title.is_empty() && app_name.is_empty() {
return Ok(None);
}
let (x, y, width, height) = self.window_geometry(active_window)?;
let minimized = self.window_is_minimized(active_window).unwrap_or(false);
Ok(Some(BackendWindow {
native_id: active_window,
title,
app_name,
x,
y,
width,
height,
focused: true,
minimized,
}))
}
fn collect_monitors(&self) -> Result<Vec<BackendMonitor>> {
let reply = self
.conn
.randr_get_monitors(self.root, true)?
.reply()
.context("Failed to query RANDR monitors")?;
let mut monitors = Vec::with_capacity(reply.monitors.len());
for (index, monitor) in reply.monitors.into_iter().enumerate() {
monitors.push(BackendMonitor {
name: self
.atom_name(monitor.name)
.unwrap_or_else(|_| format!("monitor{}", index + 1)),
x: i32::from(monitor.x),
y: i32::from(monitor.y),
width: u32::from(monitor.width),
height: u32::from(monitor.height),
width_mm: monitor.width_in_millimeters,
height_mm: monitor.height_in_millimeters,
primary: monitor.primary,
automatic: monitor.automatic,
});
}
if monitors.is_empty() {
let (width, height) = self.root_geometry()?;
monitors.push(BackendMonitor {
name: "screen".to_string(),
x: 0,
y: 0,
width,
height,
width_mm: 0,
height_mm: 0,
primary: true,
automatic: true,
});
}
Ok(monitors)
}
fn capture_root_image(&self) -> Result<RgbaImage> {
let (width, height) = self.root_geometry()?;
let reply = self
@ -224,6 +293,14 @@ impl X11Backend {
.reply()
.with_context(|| format!("Failed to read property {property} from window {window}"))
}
fn atom_name(&self, atom: Atom) -> Result<String> {
self.conn
.get_atom_name(atom)?
.reply()
.map(|reply| String::from_utf8_lossy(&reply.name).to_string())
.with_context(|| format!("Failed to read atom name for {atom}"))
}
}
impl super::DesktopBackend for X11Backend {
@ -231,6 +308,30 @@ impl super::DesktopBackend for X11Backend {
self.collect_window_infos()
}
fn active_window(&mut self) -> Result<Option<BackendWindow>> {
self.active_window_info()
}
fn list_monitors(&self) -> Result<Vec<BackendMonitor>> {
match self.collect_monitors() {
Ok(monitors) => Ok(monitors),
Err(_) => {
let (width, height) = self.root_geometry()?;
Ok(vec![BackendMonitor {
name: "screen".to_string(),
x: 0,
y: 0,
width,
height,
width_mm: 0,
height_mm: 0,
primary: true,
automatic: true,
}])
}
}
}
fn capture_screenshot(&mut self) -> Result<RgbaImage> {
self.capture_root_image()
}
@ -452,6 +553,10 @@ impl super::DesktopBackend for X11Backend {
.with_context(|| format!("Failed to launch: {command}"))?;
Ok(child.id())
}
fn backend_name(&self) -> &'static str {
"x11"
}
}
fn parse_key(name: &str) -> Result<Key> {

View file

@ -79,8 +79,23 @@ fn spawn_daemon(opts: &GlobalOpts) -> Result<()> {
Ok(())
}
fn request_read_timeout(request: &Request) -> Duration {
let default_timeout = Duration::from_secs(30);
match request.action.as_str() {
"wait-window" | "wait-focus" => {
let wait_timeout = request
.extra
.get("timeout_ms")
.and_then(|value| value.as_u64())
.unwrap_or(10_000);
Duration::from_millis(wait_timeout.saturating_add(5_000))
}
_ => default_timeout,
}
}
fn send_request_over_stream(mut stream: UnixStream, request: &Request) -> Result<Response> {
stream.set_read_timeout(Some(Duration::from_secs(30)))?;
stream.set_read_timeout(Some(request_read_timeout(request)))?;
stream.set_write_timeout(Some(Duration::from_secs(5)))?;
let json = serde_json::to_string(request)?;

File diff suppressed because it is too large Load diff

465
src/cli/upgrade.rs Normal file
View file

@ -0,0 +1,465 @@
use std::io::{self, IsTerminal, Write};
use std::path::{Path, PathBuf};
use std::process::Command;
use anyhow::{Context, Result};
use serde_json::json;
use crate::cli::{GlobalOpts, UpgradeOpts};
use crate::core::protocol::Response;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum InstallMethod {
Npm,
Cargo,
Nix,
Source,
Unknown,
}
impl InstallMethod {
fn as_str(self) -> &'static str {
match self {
Self::Npm => "npm",
Self::Cargo => "cargo",
Self::Nix => "nix",
Self::Source => "source",
Self::Unknown => "unknown",
}
}
}
#[derive(Debug)]
struct UpgradePlan {
install_method: InstallMethod,
program: &'static str,
args: Vec<&'static str>,
}
impl UpgradePlan {
fn command_line(&self) -> String {
std::iter::once(self.program)
.chain(self.args.iter().copied())
.collect::<Vec<_>>()
.join(" ")
}
}
#[derive(Debug)]
struct VersionInfo {
current: String,
latest: String,
}
pub fn run_upgrade(opts: &GlobalOpts, upgrade_opts: &UpgradeOpts) -> Result<Response> {
let current_exe = std::env::current_exe().context("Failed to determine executable path")?;
let install_method = detect_install_method(&current_exe);
let Some(plan) = upgrade_plan(install_method) else {
return Ok(Response::err_with_data(
format!(
"deskctl upgrade is not supported for {} installs.",
install_method.as_str()
),
json!({
"kind": "upgrade_unsupported",
"install_method": install_method.as_str(),
"current_exe": current_exe.display().to_string(),
"hint": upgrade_hint(install_method),
}),
));
};
if !opts.json {
println!("- Checking for updates...");
}
let versions = match resolve_versions(&plan) {
Ok(versions) => versions,
Err(response) => return Ok(response),
};
if versions.current == versions.latest {
return Ok(Response::ok(json!({
"action": "upgrade",
"status": "up_to_date",
"install_method": plan.install_method.as_str(),
"current_version": versions.current,
"latest_version": versions.latest,
})));
}
if !upgrade_opts.yes {
if opts.json || !io::stdin().is_terminal() {
return Ok(Response::err_with_data(
format!(
"Upgrade confirmation required for {} -> {}.",
versions.current, versions.latest
),
json!({
"kind": "upgrade_confirmation_required",
"install_method": plan.install_method.as_str(),
"current_version": versions.current,
"latest_version": versions.latest,
"command": plan.command_line(),
"hint": "Re-run with --yes to upgrade non-interactively.",
}),
));
}
if !confirm_upgrade(&versions)? {
return Ok(Response::ok(json!({
"action": "upgrade",
"status": "cancelled",
"install_method": plan.install_method.as_str(),
"current_version": versions.current,
"latest_version": versions.latest,
})));
}
}
if !opts.json {
println!(
"- Upgrading deskctl from {} -> {}...",
versions.current, versions.latest
);
}
let output = match Command::new(plan.program).args(&plan.args).output() {
Ok(output) => output,
Err(error) => return Ok(upgrade_spawn_error_response(&plan, &versions, &error)),
};
if output.status.success() {
return Ok(Response::ok(json!({
"action": "upgrade",
"status": "upgraded",
"install_method": plan.install_method.as_str(),
"current_version": versions.current,
"latest_version": versions.latest,
"command": plan.command_line(),
"exit_code": output.status.code(),
})));
}
Ok(upgrade_command_failed_response(&plan, &versions, &output))
}
fn resolve_versions(plan: &UpgradePlan) -> std::result::Result<VersionInfo, Response> {
let current = env!("CARGO_PKG_VERSION").to_string();
let latest = match plan.install_method {
InstallMethod::Npm => query_npm_latest_version()?,
InstallMethod::Cargo => query_cargo_latest_version()?,
InstallMethod::Nix | InstallMethod::Source | InstallMethod::Unknown => {
return Err(Response::err_with_data(
"Could not determine the latest published version.".to_string(),
json!({
"kind": "upgrade_failed",
"install_method": plan.install_method.as_str(),
"reason": "Could not determine the latest published version for this install method.",
"command": plan.command_line(),
"hint": upgrade_hint(plan.install_method),
}),
));
}
};
Ok(VersionInfo { current, latest })
}
fn query_npm_latest_version() -> std::result::Result<String, Response> {
let output = Command::new("npm")
.args(["view", "deskctl", "version", "--json"])
.output()
.map_err(|error| {
Response::err_with_data(
"Failed to check the latest npm version.".to_string(),
json!({
"kind": "upgrade_failed",
"install_method": InstallMethod::Npm.as_str(),
"reason": "Failed to run npm view deskctl version --json.",
"io_error": error.to_string(),
"command": "npm view deskctl version --json",
"hint": upgrade_hint(InstallMethod::Npm),
}),
)
})?;
if !output.status.success() {
return Err(Response::err_with_data(
"Failed to check the latest npm version.".to_string(),
json!({
"kind": "upgrade_failed",
"install_method": InstallMethod::Npm.as_str(),
"reason": command_failure_reason(&output),
"command": "npm view deskctl version --json",
"hint": upgrade_hint(InstallMethod::Npm),
}),
));
}
serde_json::from_slice::<String>(&output.stdout).map_err(|_| {
Response::err_with_data(
"Failed to parse the latest npm version.".to_string(),
json!({
"kind": "upgrade_failed",
"install_method": InstallMethod::Npm.as_str(),
"reason": "npm view returned an unexpected version payload.",
"command": "npm view deskctl version --json",
"hint": upgrade_hint(InstallMethod::Npm),
}),
)
})
}
fn query_cargo_latest_version() -> std::result::Result<String, Response> {
let output = Command::new("cargo")
.args(["search", "deskctl", "--limit", "1"])
.output()
.map_err(|error| {
Response::err_with_data(
"Failed to check the latest crates.io version.".to_string(),
json!({
"kind": "upgrade_failed",
"install_method": InstallMethod::Cargo.as_str(),
"reason": "Failed to run cargo search deskctl --limit 1.",
"io_error": error.to_string(),
"command": "cargo search deskctl --limit 1",
"hint": upgrade_hint(InstallMethod::Cargo),
}),
)
})?;
if !output.status.success() {
return Err(Response::err_with_data(
"Failed to check the latest crates.io version.".to_string(),
json!({
"kind": "upgrade_failed",
"install_method": InstallMethod::Cargo.as_str(),
"reason": command_failure_reason(&output),
"command": "cargo search deskctl --limit 1",
"hint": upgrade_hint(InstallMethod::Cargo),
}),
));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let latest = stdout
.split('"')
.nth(1)
.map(str::to_string)
.filter(|value| !value.is_empty());
latest.ok_or_else(|| {
Response::err_with_data(
"Failed to determine the latest crates.io version.".to_string(),
json!({
"kind": "upgrade_failed",
"install_method": InstallMethod::Cargo.as_str(),
"reason": "cargo search did not return a published deskctl crate version.",
"command": "cargo search deskctl --limit 1",
"hint": upgrade_hint(InstallMethod::Cargo),
}),
)
})
}
fn confirm_upgrade(versions: &VersionInfo) -> Result<bool> {
print!(
"Upgrade deskctl from {} -> {}? [y/N] ",
versions.current, versions.latest
);
io::stdout().flush()?;
let mut input = String::new();
io::stdin().read_line(&mut input)?;
let trimmed = input.trim();
Ok(matches!(trimmed, "y" | "Y" | "yes" | "YES" | "Yes"))
}
fn upgrade_command_failed_response(
plan: &UpgradePlan,
versions: &VersionInfo,
output: &std::process::Output,
) -> Response {
Response::err_with_data(
format!("Upgrade command failed: {}", plan.command_line()),
json!({
"kind": "upgrade_failed",
"install_method": plan.install_method.as_str(),
"current_version": versions.current,
"latest_version": versions.latest,
"command": plan.command_line(),
"exit_code": output.status.code(),
"reason": command_failure_reason(output),
"hint": upgrade_hint(plan.install_method),
}),
)
}
fn upgrade_spawn_error_response(
plan: &UpgradePlan,
versions: &VersionInfo,
error: &std::io::Error,
) -> Response {
Response::err_with_data(
format!("Failed to run {}", plan.command_line()),
json!({
"kind": "upgrade_failed",
"install_method": plan.install_method.as_str(),
"current_version": versions.current,
"latest_version": versions.latest,
"command": plan.command_line(),
"io_error": error.to_string(),
"hint": upgrade_hint(plan.install_method),
}),
)
}
fn command_failure_reason(output: &std::process::Output) -> String {
let stderr = String::from_utf8_lossy(&output.stderr);
let stdout = String::from_utf8_lossy(&output.stdout);
stderr
.lines()
.chain(stdout.lines())
.map(str::trim)
.find(|line| !line.is_empty())
.map(str::to_string)
.unwrap_or_else(|| {
output
.status
.code()
.map(|code| format!("Command exited with status {code}."))
.unwrap_or_else(|| "Command exited unsuccessfully.".to_string())
})
}
fn upgrade_plan(install_method: InstallMethod) -> Option<UpgradePlan> {
match install_method {
InstallMethod::Npm => Some(UpgradePlan {
install_method,
program: "npm",
args: vec!["install", "-g", "deskctl@latest"],
}),
InstallMethod::Cargo => Some(UpgradePlan {
install_method,
program: "cargo",
args: vec!["install", "deskctl", "--locked"],
}),
InstallMethod::Nix | InstallMethod::Source | InstallMethod::Unknown => None,
}
}
fn upgrade_hint(install_method: InstallMethod) -> &'static str {
match install_method {
InstallMethod::Nix => {
"Use nix profile upgrade or update the flake reference you installed from."
}
InstallMethod::Source => {
"Rebuild from source or reinstall deskctl through npm, cargo, or nix."
}
InstallMethod::Unknown => {
"Reinstall deskctl through a supported channel such as npm, cargo, or nix."
}
InstallMethod::Npm => "Retry with --yes or run npm install -g deskctl@latest directly.",
InstallMethod::Cargo => "Retry with --yes or run cargo install deskctl --locked directly.",
}
}
fn detect_install_method(current_exe: &Path) -> InstallMethod {
if looks_like_npm_install(current_exe) {
return InstallMethod::Npm;
}
if looks_like_nix_install(current_exe) {
return InstallMethod::Nix;
}
if looks_like_cargo_install(current_exe) {
return InstallMethod::Cargo;
}
if looks_like_source_tree(current_exe) {
return InstallMethod::Source;
}
InstallMethod::Unknown
}
fn looks_like_npm_install(path: &Path) -> bool {
let value = normalize(path);
value.contains("/node_modules/deskctl/") && value.contains("/vendor/")
}
fn looks_like_nix_install(path: &Path) -> bool {
normalize(path).starts_with("/nix/store/")
}
fn looks_like_cargo_install(path: &Path) -> bool {
let Some(home) = std::env::var_os("HOME") else {
return false;
};
let cargo_home = std::env::var_os("CARGO_HOME")
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from(home).join(".cargo"));
path == cargo_home.join("bin").join("deskctl")
}
fn looks_like_source_tree(path: &Path) -> bool {
let value = normalize(path);
value.contains("/target/debug/deskctl") || value.contains("/target/release/deskctl")
}
fn normalize(path: &Path) -> String {
path.to_string_lossy().replace('\\', "/")
}
#[cfg(test)]
mod tests {
use std::os::unix::process::ExitStatusExt;
use std::path::Path;
use super::{command_failure_reason, detect_install_method, upgrade_plan, InstallMethod};
#[test]
fn detects_npm_install_path() {
let method = detect_install_method(Path::new(
"/usr/local/lib/node_modules/deskctl/vendor/deskctl-linux-x86_64",
));
assert_eq!(method, InstallMethod::Npm);
}
#[test]
fn detects_nix_install_path() {
let method = detect_install_method(Path::new("/nix/store/abc123-deskctl/bin/deskctl"));
assert_eq!(method, InstallMethod::Nix);
}
#[test]
fn detects_source_tree_path() {
let method =
detect_install_method(Path::new("/Users/example/src/deskctl/target/debug/deskctl"));
assert_eq!(method, InstallMethod::Source);
}
#[test]
fn npm_upgrade_plan_uses_global_install() {
let plan = upgrade_plan(InstallMethod::Npm).expect("npm installs should support upgrade");
assert_eq!(plan.command_line(), "npm install -g deskctl@latest");
}
#[test]
fn nix_install_has_no_upgrade_plan() {
assert!(upgrade_plan(InstallMethod::Nix).is_none());
}
#[test]
fn failure_reason_prefers_stderr() {
let output = std::process::Output {
status: std::process::ExitStatus::from_raw(1 << 8),
stdout: b"".to_vec(),
stderr: b"boom\n".to_vec(),
};
assert_eq!(command_failure_reason(&output), "boom");
}
}

View file

@ -58,4 +58,12 @@ impl Response {
error: Some(msg.into()),
}
}
pub fn err_with_data(msg: impl Into<String>, data: Value) -> Self {
Self {
success: false,
data: Some(data),
error: Some(msg.into()),
}
}
}

View file

@ -7,6 +7,7 @@ use crate::core::types::WindowInfo;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[allow(dead_code)]
pub struct RefEntry {
pub ref_id: String,
pub window_id: String,
pub backend_window_id: u32,
pub app_class: String,
@ -30,6 +31,35 @@ pub struct RefMap {
next_window: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SelectorQuery {
Ref(String),
WindowId(String),
Title(String),
Class(String),
Focused,
Fuzzy(String),
}
#[derive(Debug, Clone)]
pub enum ResolveResult {
Match(RefEntry),
NotFound {
selector: String,
mode: &'static str,
},
Ambiguous {
selector: String,
mode: &'static str,
candidates: Vec<WindowInfo>,
},
Invalid {
selector: String,
mode: &'static str,
message: String,
},
}
#[allow(dead_code)]
impl RefMap {
pub fn new() -> Self {
@ -65,6 +95,7 @@ impl RefMap {
let window_id = self.window_id_for_backend(window.native_id);
let entry = RefEntry {
ref_id: ref_id.clone(),
window_id: window_id.clone(),
backend_window_id: window.native_id,
app_class: window.app_name.clone(),
@ -110,48 +141,205 @@ impl RefMap {
window_id
}
/// Resolve a selector to a RefEntry.
/// Accepts: "@w1", "w1", "ref=w1", "win1", "id=win1", or a substring match on app_class/title.
pub fn resolve(&self, selector: &str) -> Option<&RefEntry> {
let normalized = selector
.strip_prefix('@')
.or_else(|| selector.strip_prefix("ref="))
.unwrap_or(selector);
if let Some(entry) = self.refs.get(normalized) {
return Some(entry);
}
let window_id = selector.strip_prefix("id=").unwrap_or(normalized);
if let Some(ref_id) = self.window_id_to_ref.get(window_id) {
return self.refs.get(ref_id);
}
let lower = selector.to_lowercase();
self.refs.values().find(|entry| {
entry.app_class.to_lowercase().contains(&lower)
|| entry.title.to_lowercase().contains(&lower)
})
pub fn resolve(&self, selector: &str) -> ResolveResult {
self.resolve_query(SelectorQuery::parse(selector), selector)
}
/// Resolve a selector to the center coordinates of the window.
pub fn resolve_to_center(&self, selector: &str) -> Option<(i32, i32)> {
self.resolve(selector).map(|entry| {
(
entry.x + entry.width as i32 / 2,
entry.y + entry.height as i32 / 2,
)
})
pub fn resolve_to_center(&self, selector: &str) -> ResolveResult {
self.resolve(selector)
}
pub fn entries(&self) -> impl Iterator<Item = (&String, &RefEntry)> {
self.refs.iter()
}
fn resolve_query(&self, query: SelectorQuery, selector: &str) -> ResolveResult {
match query {
SelectorQuery::Ref(ref_id) => self
.refs
.get(&ref_id)
.cloned()
.map(ResolveResult::Match)
.unwrap_or_else(|| ResolveResult::NotFound {
selector: selector.to_string(),
mode: "ref",
}),
SelectorQuery::WindowId(window_id) => self
.window_id_to_ref
.get(&window_id)
.and_then(|ref_id| self.refs.get(ref_id))
.cloned()
.map(ResolveResult::Match)
.unwrap_or_else(|| ResolveResult::NotFound {
selector: selector.to_string(),
mode: "id",
}),
SelectorQuery::Focused => self.resolve_candidates(
selector,
"focused",
self.refs
.values()
.filter(|entry| entry.focused)
.cloned()
.collect(),
),
SelectorQuery::Title(title) => {
if title.is_empty() {
return ResolveResult::Invalid {
selector: selector.to_string(),
mode: "title",
message: "title selectors must not be empty".to_string(),
};
}
self.resolve_candidates(
selector,
"title",
self.refs
.values()
.filter(|entry| entry.title.eq_ignore_ascii_case(&title))
.cloned()
.collect(),
)
}
SelectorQuery::Class(app_class) => {
if app_class.is_empty() {
return ResolveResult::Invalid {
selector: selector.to_string(),
mode: "class",
message: "class selectors must not be empty".to_string(),
};
}
self.resolve_candidates(
selector,
"class",
self.refs
.values()
.filter(|entry| entry.app_class.eq_ignore_ascii_case(&app_class))
.cloned()
.collect(),
)
}
SelectorQuery::Fuzzy(value) => {
if let Some(entry) = self.refs.get(&value).cloned() {
return ResolveResult::Match(entry);
}
if let Some(entry) = self
.window_id_to_ref
.get(&value)
.and_then(|ref_id| self.refs.get(ref_id))
.cloned()
{
return ResolveResult::Match(entry);
}
let lower = value.to_lowercase();
self.resolve_candidates(
selector,
"fuzzy",
self.refs
.values()
.filter(|entry| {
entry.app_class.to_lowercase().contains(&lower)
|| entry.title.to_lowercase().contains(&lower)
})
.cloned()
.collect(),
)
}
}
}
fn resolve_candidates(
&self,
selector: &str,
mode: &'static str,
mut candidates: Vec<RefEntry>,
) -> ResolveResult {
candidates.sort_by(|left, right| left.ref_id.cmp(&right.ref_id));
match candidates.len() {
0 => ResolveResult::NotFound {
selector: selector.to_string(),
mode,
},
1 => ResolveResult::Match(candidates.remove(0)),
_ => ResolveResult::Ambiguous {
selector: selector.to_string(),
mode,
candidates: candidates
.into_iter()
.map(|entry| entry.to_window_info())
.collect(),
},
}
}
}
impl SelectorQuery {
pub fn parse(selector: &str) -> Self {
if let Some(value) = selector.strip_prefix('@') {
return Self::Ref(value.to_string());
}
if let Some(value) = selector.strip_prefix("ref=") {
return Self::Ref(value.to_string());
}
if let Some(value) = selector.strip_prefix("id=") {
return Self::WindowId(value.to_string());
}
if let Some(value) = selector.strip_prefix("title=") {
return Self::Title(value.to_string());
}
if let Some(value) = selector.strip_prefix("class=") {
return Self::Class(value.to_string());
}
if selector == "focused" {
return Self::Focused;
}
Self::Fuzzy(selector.to_string())
}
pub fn needs_live_refresh(&self) -> bool {
!matches!(self, Self::Ref(_))
}
}
impl RefEntry {
pub fn center(&self) -> (i32, i32) {
(
self.x + self.width as i32 / 2,
self.y + self.height as i32 / 2,
)
}
pub fn to_window_info(&self) -> WindowInfo {
WindowInfo {
ref_id: self.ref_id.clone(),
window_id: self.window_id.clone(),
title: self.title.clone(),
app_name: self.app_class.clone(),
x: self.x,
y: self.y,
width: self.width,
height: self.height,
focused: self.focused,
minimized: self.minimized,
}
}
}
impl ResolveResult {
pub fn matched_entry(&self) -> Option<&RefEntry> {
match self {
Self::Match(entry) => Some(entry),
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::RefMap;
use super::{RefMap, ResolveResult, SelectorQuery};
use crate::backend::BackendWindow;
fn sample_window(native_id: u32, title: &str) -> BackendWindow {
@ -184,12 +372,18 @@ mod tests {
let public = refs.rebuild(&[sample_window(42, "Editor")]);
let window_id = public[0].window_id.clone();
assert_eq!(refs.resolve("@w1").unwrap().window_id, window_id);
assert_eq!(refs.resolve(&window_id).unwrap().backend_window_id, 42);
assert_eq!(
refs.resolve(&format!("id={window_id}")).unwrap().title,
"Editor"
);
match refs.resolve("@w1") {
ResolveResult::Match(entry) => assert_eq!(entry.window_id, window_id),
other => panic!("unexpected resolve result: {other:?}"),
}
match refs.resolve(&window_id) {
ResolveResult::Match(entry) => assert_eq!(entry.backend_window_id, 42),
other => panic!("unexpected resolve result: {other:?}"),
}
match refs.resolve(&format!("id={window_id}")) {
ResolveResult::Match(entry) => assert_eq!(entry.title, "Editor"),
other => panic!("unexpected resolve result: {other:?}"),
}
}
#[test]
@ -197,6 +391,95 @@ mod tests {
let mut refs = RefMap::new();
refs.rebuild(&[sample_window(7, "Browser")]);
assert_eq!(refs.resolve_to_center("w1"), Some((160, 120)));
match refs.resolve_to_center("w1") {
ResolveResult::Match(entry) => assert_eq!(entry.center(), (160, 120)),
other => panic!("unexpected resolve result: {other:?}"),
}
}
#[test]
fn selector_query_parses_explicit_modes() {
assert_eq!(
SelectorQuery::parse("@w1"),
SelectorQuery::Ref("w1".to_string())
);
assert_eq!(
SelectorQuery::parse("ref=w2"),
SelectorQuery::Ref("w2".to_string())
);
assert_eq!(
SelectorQuery::parse("id=win4"),
SelectorQuery::WindowId("win4".to_string())
);
assert_eq!(
SelectorQuery::parse("title=Chromium"),
SelectorQuery::Title("Chromium".to_string())
);
assert_eq!(
SelectorQuery::parse("class=Navigator"),
SelectorQuery::Class("Navigator".to_string())
);
assert_eq!(SelectorQuery::parse("focused"), SelectorQuery::Focused);
}
#[test]
fn resolve_supports_exact_title_class_and_focused_modes() {
let mut refs = RefMap::new();
refs.rebuild(&[
sample_window(1, "Browser"),
BackendWindow {
native_id: 2,
title: "Editor".to_string(),
app_name: "Code".to_string(),
x: 0,
y: 0,
width: 10,
height: 10,
focused: false,
minimized: false,
},
]);
match refs.resolve("focused") {
ResolveResult::Match(entry) => assert_eq!(entry.title, "Browser"),
other => panic!("unexpected resolve result: {other:?}"),
}
match refs.resolve("title=Editor") {
ResolveResult::Match(entry) => assert_eq!(entry.app_class, "Code"),
other => panic!("unexpected resolve result: {other:?}"),
}
match refs.resolve("class=code") {
ResolveResult::Match(entry) => assert_eq!(entry.title, "Editor"),
other => panic!("unexpected resolve result: {other:?}"),
}
}
#[test]
fn fuzzy_resolution_fails_with_candidates_when_ambiguous() {
let mut refs = RefMap::new();
refs.rebuild(&[
sample_window(1, "Chromium"),
BackendWindow {
native_id: 2,
title: "Chromium Settings".to_string(),
app_name: "Chromium".to_string(),
x: 0,
y: 0,
width: 10,
height: 10,
focused: false,
minimized: false,
},
]);
match refs.resolve("chromium") {
ResolveResult::Ambiguous {
mode, candidates, ..
} => {
assert_eq!(mode, "fuzzy");
assert_eq!(candidates.len(), 2);
}
other => panic!("unexpected resolve result: {other:?}"),
}
}
}

View file

@ -8,7 +8,7 @@ pub struct Snapshot {
}
#[allow(dead_code)]
#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WindowInfo {
pub ref_id: String,
pub window_id: String,
@ -22,6 +22,47 @@ pub struct WindowInfo {
pub minimized: bool,
}
#[allow(dead_code)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MonitorInfo {
pub name: String,
pub x: i32,
pub y: i32,
pub width: u32,
pub height: u32,
pub width_mm: u32,
pub height_mm: u32,
pub primary: bool,
pub automatic: bool,
}
#[allow(dead_code)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScreenSize {
pub width: u32,
pub height: u32,
}
#[allow(dead_code)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VersionInfo {
pub version: String,
pub backend: String,
}
#[allow(dead_code)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SystemInfo {
pub backend: String,
pub display: Option<String>,
pub session_type: Option<String>,
pub session: String,
pub socket_path: String,
pub screen: ScreenSize,
pub monitor_count: usize,
pub monitors: Vec<MonitorInfo>,
}
impl std::fmt::Display for WindowInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let state = if self.focused {
@ -47,9 +88,21 @@ impl std::fmt::Display for WindowInfo {
#[allow(dead_code)]
fn truncate(s: &str, max: usize) -> String {
if s.len() <= max {
if s.chars().count() <= max {
s.to_string()
} else {
format!("{}...", &s[..max - 3])
let truncated: String = s.chars().take(max.saturating_sub(3)).collect();
format!("{truncated}...")
}
}
#[cfg(test)]
mod tests {
use super::truncate;
#[test]
fn truncate_is_char_safe() {
let input = format!("fire{}fox", '\u{00E9}');
assert_eq!(truncate(&input, 7), "fire...");
}
}

View file

@ -2,11 +2,13 @@ use std::sync::Arc;
use anyhow::{Context, Result};
use tokio::sync::Mutex;
use tokio::time::{sleep, Duration, Instant};
use super::state::DaemonState;
use crate::backend::annotate::annotate_screenshot;
use crate::core::protocol::{Request, Response};
use crate::core::types::{Snapshot, WindowInfo};
use crate::core::refs::{ResolveResult, SelectorQuery};
use crate::core::types::{MonitorInfo, ScreenSize, Snapshot, SystemInfo, VersionInfo, WindowInfo};
pub async fn handle_request(request: &Request, state: &Arc<Mutex<DaemonState>>) -> Response {
match request.action.as_str() {
@ -27,6 +29,12 @@ pub async fn handle_request(request: &Request, state: &Arc<Mutex<DaemonState>>)
"list-windows" => handle_list_windows(state).await,
"get-screen-size" => handle_get_screen_size(state).await,
"get-mouse-position" => handle_get_mouse_position(state).await,
"get-active-window" => handle_get_active_window(state).await,
"get-monitors" => handle_get_monitors(state).await,
"get-version" => handle_get_version(state).await,
"get-systeminfo" => handle_get_systeminfo(state).await,
"wait-window" => handle_wait(request, state, WaitKind::Window).await,
"wait-focus" => handle_wait(request, state, WaitKind::Focus).await,
"screenshot" => handle_screenshot(request, state).await,
"launch" => handle_launch(request, state).await,
action => Response::err(format!("Unknown action: {action}")),
@ -54,6 +62,7 @@ async fn handle_click(request: &Request, state: &Arc<Mutex<DaemonState>>) -> Res
};
let mut state = state.lock().await;
let selector_query = SelectorQuery::parse(&selector);
if let Some((x, y)) = parse_coords(&selector) {
return match state.backend.click(x, y) {
@ -62,14 +71,27 @@ async fn handle_click(request: &Request, state: &Arc<Mutex<DaemonState>>) -> Res
};
}
if selector_query.needs_live_refresh() {
if let Err(error) = refresh_windows(&mut state) {
return Response::err(format!("Click failed: {error}"));
}
}
match state.ref_map.resolve_to_center(&selector) {
Some((x, y)) => match state.backend.click(x, y) {
Ok(()) => {
Response::ok(serde_json::json!({"clicked": {"x": x, "y": y, "ref": selector}}))
ResolveResult::Match(entry) => {
let (x, y) = entry.center();
match state.backend.click(x, y) {
Ok(()) => Response::ok(serde_json::json!({
"clicked": {"x": x, "y": y},
"selector": selector,
"ref_id": entry.ref_id,
"window_id": entry.window_id,
"title": entry.title,
})),
Err(error) => Response::err(format!("Click failed: {error}")),
}
Err(error) => Response::err(format!("Click failed: {error}")),
},
None => Response::err(format!("Could not resolve selector: {selector}")),
}
outcome => selector_failure_response(outcome),
}
}
@ -80,6 +102,7 @@ async fn handle_dblclick(request: &Request, state: &Arc<Mutex<DaemonState>>) ->
};
let mut state = state.lock().await;
let selector_query = SelectorQuery::parse(&selector);
if let Some((x, y)) = parse_coords(&selector) {
return match state.backend.dblclick(x, y) {
@ -88,14 +111,27 @@ async fn handle_dblclick(request: &Request, state: &Arc<Mutex<DaemonState>>) ->
};
}
if selector_query.needs_live_refresh() {
if let Err(error) = refresh_windows(&mut state) {
return Response::err(format!("Double-click failed: {error}"));
}
}
match state.ref_map.resolve_to_center(&selector) {
Some((x, y)) => match state.backend.dblclick(x, y) {
Ok(()) => Response::ok(
serde_json::json!({"double_clicked": {"x": x, "y": y, "ref": selector}}),
),
Err(error) => Response::err(format!("Double-click failed: {error}")),
},
None => Response::err(format!("Could not resolve selector: {selector}")),
ResolveResult::Match(entry) => {
let (x, y) = entry.center();
match state.backend.dblclick(x, y) {
Ok(()) => Response::ok(serde_json::json!({
"double_clicked": {"x": x, "y": y},
"selector": selector,
"ref_id": entry.ref_id,
"window_id": entry.window_id,
"title": entry.title,
})),
Err(error) => Response::err(format!("Double-click failed: {error}")),
}
}
outcome => selector_failure_response(outcome),
}
}
@ -218,9 +254,15 @@ async fn handle_window_action(
};
let mut state = state.lock().await;
let selector_query = SelectorQuery::parse(&selector);
if selector_query.needs_live_refresh() {
if let Err(error) = refresh_windows(&mut state) {
return Response::err(format!("{action} failed: {error}"));
}
}
let entry = match state.ref_map.resolve(&selector) {
Some(entry) => entry.clone(),
None => return Response::err(format!("Could not resolve window: {selector}")),
ResolveResult::Match(entry) => entry,
outcome => return selector_failure_response(outcome),
};
let result = match action {
@ -233,7 +275,10 @@ async fn handle_window_action(
Ok(()) => Response::ok(serde_json::json!({
"action": action,
"window": entry.title,
"title": entry.title,
"ref_id": entry.ref_id,
"window_id": entry.window_id,
"selector": selector,
})),
Err(error) => Response::err(format!("{action} failed: {error}")),
}
@ -248,15 +293,24 @@ async fn handle_move_window(request: &Request, state: &Arc<Mutex<DaemonState>>)
let y = request.extra.get("y").and_then(|v| v.as_i64()).unwrap_or(0) as i32;
let mut state = state.lock().await;
let selector_query = SelectorQuery::parse(&selector);
if selector_query.needs_live_refresh() {
if let Err(error) = refresh_windows(&mut state) {
return Response::err(format!("Move failed: {error}"));
}
}
let entry = match state.ref_map.resolve(&selector) {
Some(entry) => entry.clone(),
None => return Response::err(format!("Could not resolve window: {selector}")),
ResolveResult::Match(entry) => entry,
outcome => return selector_failure_response(outcome),
};
match state.backend.move_window(entry.backend_window_id, x, y) {
Ok(()) => Response::ok(serde_json::json!({
"moved": entry.title,
"title": entry.title,
"ref_id": entry.ref_id,
"window_id": entry.window_id,
"selector": selector,
"x": x,
"y": y,
})),
@ -281,9 +335,15 @@ async fn handle_resize_window(request: &Request, state: &Arc<Mutex<DaemonState>>
.unwrap_or(600) as u32;
let mut state = state.lock().await;
let selector_query = SelectorQuery::parse(&selector);
if selector_query.needs_live_refresh() {
if let Err(error) = refresh_windows(&mut state) {
return Response::err(format!("Resize failed: {error}"));
}
}
let entry = match state.ref_map.resolve(&selector) {
Some(entry) => entry.clone(),
None => return Response::err(format!("Could not resolve window: {selector}")),
ResolveResult::Match(entry) => entry,
outcome => return selector_failure_response(outcome),
};
match state
@ -292,7 +352,10 @@ async fn handle_resize_window(request: &Request, state: &Arc<Mutex<DaemonState>>
{
Ok(()) => Response::ok(serde_json::json!({
"resized": entry.title,
"title": entry.title,
"ref_id": entry.ref_id,
"window_id": entry.window_id,
"selector": selector,
"width": width,
"height": height,
})),
@ -324,6 +387,185 @@ async fn handle_get_mouse_position(state: &Arc<Mutex<DaemonState>>) -> Response
}
}
async fn handle_get_active_window(state: &Arc<Mutex<DaemonState>>) -> Response {
let mut state = state.lock().await;
let active_backend_window = match state.backend.active_window() {
Ok(window) => window,
Err(error) => return Response::err(format!("Failed: {error}")),
};
let windows = match refresh_windows(&mut state) {
Ok(windows) => windows,
Err(error) => return Response::err(format!("Failed: {error}")),
};
let active_window = if let Some(active_backend_window) = active_backend_window {
state
.ref_map
.entries()
.find_map(|(_, entry)| {
(entry.backend_window_id == active_backend_window.native_id)
.then(|| entry.to_window_info())
})
.or_else(|| windows.iter().find(|window| window.focused).cloned())
} else {
windows.iter().find(|window| window.focused).cloned()
};
if let Some(window) = active_window {
Response::ok(serde_json::json!({"window": window}))
} else {
Response::err_with_data(
"No focused window is available",
serde_json::json!({"kind": "not_found", "mode": "focused"}),
)
}
}
async fn handle_get_monitors(state: &Arc<Mutex<DaemonState>>) -> Response {
let state = state.lock().await;
match state.backend.list_monitors() {
Ok(monitors) => {
let monitors: Vec<MonitorInfo> = monitors.into_iter().map(Into::into).collect();
Response::ok(serde_json::json!({
"count": monitors.len(),
"monitors": monitors,
}))
}
Err(error) => Response::err(format!("Failed: {error}")),
}
}
async fn handle_get_version(state: &Arc<Mutex<DaemonState>>) -> Response {
let state = state.lock().await;
let info = VersionInfo {
version: env!("CARGO_PKG_VERSION").to_string(),
backend: state.backend.backend_name().to_string(),
};
Response::ok(serde_json::to_value(info).unwrap_or_default())
}
async fn handle_get_systeminfo(state: &Arc<Mutex<DaemonState>>) -> Response {
let state = state.lock().await;
let screen = match state.backend.screen_size() {
Ok((width, height)) => ScreenSize { width, height },
Err(error) => return Response::err(format!("Failed: {error}")),
};
let monitors = match state.backend.list_monitors() {
Ok(monitors) => monitors.into_iter().map(Into::into).collect::<Vec<_>>(),
Err(error) => return Response::err(format!("Failed: {error}")),
};
let info = SystemInfo {
backend: state.backend.backend_name().to_string(),
display: std::env::var("DISPLAY")
.ok()
.filter(|value| !value.is_empty()),
session_type: std::env::var("XDG_SESSION_TYPE")
.ok()
.filter(|value| !value.is_empty()),
session: state.session.clone(),
socket_path: state.socket_path.display().to_string(),
screen,
monitor_count: monitors.len(),
monitors,
};
Response::ok(serde_json::to_value(info).unwrap_or_default())
}
async fn handle_wait(
request: &Request,
state: &Arc<Mutex<DaemonState>>,
wait_kind: WaitKind,
) -> Response {
let selector = match request.extra.get("selector").and_then(|v| v.as_str()) {
Some(selector) => selector.to_string(),
None => return Response::err("Missing 'selector' field"),
};
let timeout_ms = request
.extra
.get("timeout_ms")
.and_then(|v| v.as_u64())
.unwrap_or(10_000);
let poll_ms = request
.extra
.get("poll_ms")
.and_then(|v| v.as_u64())
.unwrap_or(250);
let start = Instant::now();
let deadline = Instant::now() + Duration::from_millis(timeout_ms);
let mut last_observation: serde_json::Value;
loop {
let outcome = {
let mut state = state.lock().await;
if let Err(error) = refresh_windows(&mut state) {
return Response::err(format!("Wait failed: {error}"));
}
observe_wait(&state, &selector, wait_kind)
};
match outcome {
WaitObservation::Satisfied(window) => {
let elapsed_ms = start.elapsed().as_millis() as u64;
return Response::ok(serde_json::json!({
"wait": wait_kind.as_str(),
"selector": selector,
"elapsed_ms": elapsed_ms,
"window": window,
}));
}
WaitObservation::Retry { observation } => {
last_observation = observation;
}
WaitObservation::Failure(response) => return response,
}
if Instant::now() >= deadline {
return Response::err_with_data(
format!(
"Timed out waiting for {} to match selector: {}",
wait_kind.as_str(),
selector
),
serde_json::json!({
"kind": "timeout",
"wait": wait_kind.as_str(),
"selector": selector,
"timeout_ms": timeout_ms,
"poll_ms": poll_ms,
"last_observation": last_observation,
}),
);
}
sleep(Duration::from_millis(poll_ms)).await;
}
}
#[derive(Clone, Copy)]
enum WaitKind {
Window,
Focus,
}
impl WaitKind {
fn as_str(self) -> &'static str {
match self {
Self::Window => "window",
Self::Focus => "focus",
}
}
}
enum WaitObservation {
Satisfied(WindowInfo),
Retry { observation: serde_json::Value },
Failure(Response),
}
async fn handle_screenshot(request: &Request, state: &Arc<Mutex<DaemonState>>) -> Response {
let annotate = request
.extra
@ -387,6 +629,97 @@ fn refresh_windows(state: &mut DaemonState) -> Result<Vec<WindowInfo>> {
Ok(state.ref_map.rebuild(&windows))
}
fn selector_failure_response(result: ResolveResult) -> Response {
match result {
ResolveResult::NotFound { selector, mode } => Response::err_with_data(
format!("Could not resolve selector: {selector}"),
serde_json::json!({
"kind": "selector_not_found",
"selector": selector,
"mode": mode,
}),
),
ResolveResult::Ambiguous {
selector,
mode,
candidates,
} => Response::err_with_data(
format!("Selector is ambiguous: {selector}"),
serde_json::json!({
"kind": "selector_ambiguous",
"selector": selector,
"mode": mode,
"candidates": candidates,
}),
),
ResolveResult::Invalid {
selector,
mode,
message,
} => Response::err_with_data(
format!("Invalid selector '{selector}': {message}"),
serde_json::json!({
"kind": "selector_invalid",
"selector": selector,
"mode": mode,
"message": message,
}),
),
ResolveResult::Match(_) => unreachable!(),
}
}
fn observe_wait(state: &DaemonState, selector: &str, wait_kind: WaitKind) -> WaitObservation {
match state.ref_map.resolve(selector) {
ResolveResult::Match(entry) => {
let window = entry.to_window_info();
match wait_kind {
WaitKind::Window => WaitObservation::Satisfied(window),
WaitKind::Focus if window.focused => WaitObservation::Satisfied(window),
WaitKind::Focus => WaitObservation::Retry {
observation: serde_json::json!({
"kind": "window_not_focused",
"window": window,
}),
},
}
}
ResolveResult::NotFound { selector, mode } => WaitObservation::Retry {
observation: serde_json::json!({
"kind": "selector_not_found",
"selector": selector,
"mode": mode,
}),
},
ResolveResult::Ambiguous {
selector,
mode,
candidates,
} => WaitObservation::Failure(Response::err_with_data(
format!("Selector is ambiguous: {selector}"),
serde_json::json!({
"kind": "selector_ambiguous",
"selector": selector,
"mode": mode,
"candidates": candidates,
}),
)),
ResolveResult::Invalid {
selector,
mode,
message,
} => WaitObservation::Failure(Response::err_with_data(
format!("Invalid selector '{selector}': {message}"),
serde_json::json!({
"kind": "selector_invalid",
"selector": selector,
"mode": mode,
"message": message,
}),
)),
}
}
fn capture_snapshot(
state: &mut DaemonState,
annotate: bool,
@ -438,3 +771,19 @@ fn parse_coords(value: &str) -> Option<(i32, i32)> {
let y = parts[1].trim().parse().ok()?;
Some((x, y))
}
impl From<crate::backend::BackendMonitor> for MonitorInfo {
fn from(value: crate::backend::BackendMonitor) -> Self {
Self {
name: value.name,
x: value.x,
y: value.y,
width: value.width,
height: value.height,
width_mm: value.width_mm,
height_mm: value.height_mm,
primary: value.primary,
automatic: value.automatic,
}
}
}

View file

@ -1,6 +1,7 @@
mod handler;
mod state;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use anyhow::{Context, Result};
@ -12,6 +13,29 @@ use crate::core::paths::{pid_path_from_env, socket_path_from_env};
use crate::core::session;
use state::DaemonState;
struct RuntimePathsGuard {
socket_path: PathBuf,
pid_path: Option<PathBuf>,
}
impl RuntimePathsGuard {
fn new(socket_path: PathBuf, pid_path: Option<PathBuf>) -> Self {
Self {
socket_path,
pid_path,
}
}
}
impl Drop for RuntimePathsGuard {
fn drop(&mut self) {
remove_runtime_path(&self.socket_path);
if let Some(ref pid_path) = self.pid_path {
remove_runtime_path(pid_path);
}
}
}
pub fn run() -> Result<()> {
// Validate session before starting
session::detect_session()?;
@ -25,7 +49,6 @@ pub fn run() -> Result<()> {
async fn async_run() -> Result<()> {
let socket_path = socket_path_from_env().context("DESKCTL_SOCKET_PATH not set")?;
let pid_path = pid_path_from_env();
// Clean up stale socket
@ -33,20 +56,21 @@ async fn async_run() -> Result<()> {
std::fs::remove_file(&socket_path)?;
}
// Write PID file
if let Some(ref pid_path) = pid_path {
std::fs::write(pid_path, std::process::id().to_string())?;
}
let listener = UnixListener::bind(&socket_path)
.context(format!("Failed to bind socket: {}", socket_path.display()))?;
let session = std::env::var("DESKCTL_SESSION").unwrap_or_else(|_| "default".to_string());
let state = Arc::new(Mutex::new(
DaemonState::new(session, socket_path.clone())
.context("Failed to initialize daemon state")?,
));
let listener = UnixListener::bind(&socket_path)
.context(format!("Failed to bind socket: {}", socket_path.display()))?;
let _runtime_paths = RuntimePathsGuard::new(socket_path.clone(), pid_path.clone());
// Write PID file only after the daemon is ready to serve requests.
if let Some(ref pid_path) = pid_path {
std::fs::write(pid_path, std::process::id().to_string())?;
}
let shutdown = Arc::new(tokio::sync::Notify::new());
let shutdown_clone = shutdown.clone();
@ -75,14 +99,6 @@ async fn async_run() -> Result<()> {
}
}
// Cleanup
if socket_path.exists() {
let _ = std::fs::remove_file(&socket_path);
}
if let Some(ref pid_path) = pid_path {
let _ = std::fs::remove_file(pid_path);
}
Ok(())
}
@ -123,3 +139,11 @@ async fn handle_connection(
Ok(())
}
fn remove_runtime_path(path: &Path) {
if let Err(error) = std::fs::remove_file(path) {
if error.kind() != std::io::ErrorKind::NotFound {
eprintln!("Failed to remove runtime path {}: {error}", path.display());
}
}
}

View file

@ -4,6 +4,7 @@ use std::os::unix::net::UnixListener;
use std::path::{Path, PathBuf};
use std::process::{Command, Output};
use std::sync::{Mutex, OnceLock};
use std::thread;
use std::time::{SystemTime, UNIX_EPOCH};
use anyhow::{anyhow, bail, Context, Result};
@ -21,6 +22,13 @@ pub fn env_lock() -> &'static Mutex<()> {
LOCK.get_or_init(|| Mutex::new(()))
}
pub fn env_lock_guard() -> std::sync::MutexGuard<'static, ()> {
match env_lock().lock() {
Ok(guard) => guard,
Err(poisoned) => poisoned.into_inner(),
}
}
pub struct SessionEnvGuard {
old_session_type: Option<String>,
}
@ -53,8 +61,7 @@ pub struct FixtureWindow {
impl FixtureWindow {
pub fn create(title: &str, app_class: &str) -> Result<Self> {
let (conn, screen_num) =
x11rb::connect(None).context("Failed to connect to the integration test display")?;
let (conn, screen_num) = connect_to_test_display()?;
let screen = &conn.setup().roots[screen_num];
let window = conn.generate_id()?;
@ -96,6 +103,26 @@ impl FixtureWindow {
}
}
fn connect_to_test_display() -> Result<(RustConnection, usize)> {
let max_attempts = 10;
let mut last_error = None;
for attempt in 0..max_attempts {
match x11rb::connect(None) {
Ok(connection) => return Ok(connection),
Err(error) => {
last_error = Some(anyhow!(error));
if attempt + 1 < max_attempts {
thread::sleep(std::time::Duration::from_millis(100 * (attempt + 1) as u64));
}
}
}
}
Err(last_error.expect("x11 connection attempts should capture an error"))
.context("Failed to connect to the integration test display")
}
impl Drop for FixtureWindow {
fn drop(&mut self) {
let _ = self.conn.destroy_window(self.window);
@ -135,6 +162,10 @@ impl TestSession {
.expect("TestSession always has an explicit socket path")
}
pub fn pid_path(&self) -> PathBuf {
self.root.join("deskctl.pid")
}
pub fn create_stale_socket(&self) -> Result<()> {
let listener = UnixListener::bind(self.socket_path())
.with_context(|| format!("Failed to bind {}", self.socket_path().display()))?;
@ -180,6 +211,29 @@ impl TestSession {
)
})
}
pub fn run_daemon<I, K, V>(&self, env: I) -> Result<Output>
where
I: IntoIterator<Item = (K, V)>,
K: AsRef<std::ffi::OsStr>,
V: AsRef<std::ffi::OsStr>,
{
let mut command = Command::new(env!("CARGO_BIN_EXE_deskctl"));
command
.env("DESKCTL_DAEMON", "1")
.env("DESKCTL_SOCKET_PATH", self.socket_path())
.env("DESKCTL_PID_PATH", self.pid_path())
.env("DESKCTL_SESSION", &self.opts.session)
.envs(env);
command.output().with_context(|| {
format!(
"Failed to run daemon {} against {}",
env!("CARGO_BIN_EXE_deskctl"),
self.socket_path().display()
)
})
}
}
impl Drop for TestSession {
@ -188,6 +242,9 @@ impl Drop for TestSession {
if self.socket_path().exists() {
let _ = std::fs::remove_file(self.socket_path());
}
if self.pid_path().exists() {
let _ = std::fs::remove_file(self.pid_path());
}
let _ = std::fs::remove_dir_all(&self.root);
}
}
@ -218,3 +275,7 @@ pub fn successful_json_response(output: Output) -> Result<serde_json::Value> {
serde_json::from_slice(&output.stdout).context("Failed to parse JSON output from deskctl")
}
pub fn json_response(output: &Output) -> Result<serde_json::Value> {
serde_json::from_slice(&output.stdout).context("Failed to parse JSON output from deskctl")
}

View file

@ -8,13 +8,13 @@ use deskctl::core::doctor;
use deskctl::core::protocol::Request;
use self::support::{
deskctl_tmp_screenshot_count, env_lock, successful_json_response, FixtureWindow,
SessionEnvGuard, TestSession,
deskctl_tmp_screenshot_count, env_lock_guard, json_response, successful_json_response,
FixtureWindow, SessionEnvGuard, TestSession,
};
#[test]
fn doctor_reports_healthy_x11_environment() -> Result<()> {
let _guard = env_lock().lock().unwrap();
let _guard = env_lock_guard();
let Some(_env) = SessionEnvGuard::prepare() else {
eprintln!("Skipping X11 integration test because DISPLAY is not set");
return Ok(());
@ -46,7 +46,7 @@ fn doctor_reports_healthy_x11_environment() -> Result<()> {
#[test]
fn list_windows_is_side_effect_free() -> Result<()> {
let _guard = env_lock().lock().unwrap();
let _guard = env_lock_guard();
let Some(_env) = SessionEnvGuard::prepare() else {
eprintln!("Skipping X11 integration test because DISPLAY is not set");
return Ok(());
@ -84,7 +84,7 @@ fn list_windows_is_side_effect_free() -> Result<()> {
#[test]
fn daemon_start_recovers_from_stale_socket() -> Result<()> {
let _guard = env_lock().lock().unwrap();
let _guard = env_lock_guard();
let Some(_env) = SessionEnvGuard::prepare() else {
eprintln!("Skipping X11 integration test because DISPLAY is not set");
return Ok(());
@ -113,3 +113,148 @@ fn daemon_start_recovers_from_stale_socket() -> Result<()> {
Ok(())
}
#[test]
fn daemon_init_failure_cleans_runtime_state() -> Result<()> {
let _guard = env_lock_guard();
let session = TestSession::new("daemon-init-failure")?;
let output = session.run_daemon([("XDG_SESSION_TYPE", "x11"), ("DISPLAY", ":99999")])?;
assert!(!output.status.success(), "daemon startup should fail");
let stderr = String::from_utf8_lossy(&output.stderr);
assert!(
stderr.contains("Failed to initialize daemon state"),
"unexpected stderr: {stderr}"
);
assert!(
!session.socket_path().exists(),
"failed startup should remove the socket path"
);
assert!(
!session.pid_path().exists(),
"failed startup should remove the pid path"
);
Ok(())
}
#[test]
fn wait_window_returns_matched_window_payload() -> Result<()> {
let _guard = env_lock_guard();
let Some(_env) = SessionEnvGuard::prepare() else {
eprintln!("Skipping X11 integration test because DISPLAY is not set");
return Ok(());
};
let title = "deskctl wait window test";
let _window = FixtureWindow::create(title, "DeskctlWait")?;
let session = TestSession::new("wait-window-success")?;
let response = successful_json_response(session.run_cli([
"--json",
"wait",
"window",
"--selector",
&format!("title={title}"),
"--timeout",
"1",
"--poll-ms",
"50",
])?)?;
let window = response
.get("data")
.and_then(|data| data.get("window"))
.expect("wait window should return a matched window");
assert_eq!(
window.get("title").and_then(|value| value.as_str()),
Some(title)
);
assert_eq!(
response
.get("data")
.and_then(|data| data.get("wait"))
.and_then(|value| value.as_str()),
Some("window")
);
Ok(())
}
#[test]
fn ambiguous_fuzzy_selector_returns_candidates() -> Result<()> {
let _guard = env_lock_guard();
let Some(_env) = SessionEnvGuard::prepare() else {
eprintln!("Skipping X11 integration test because DISPLAY is not set");
return Ok(());
};
let _window_one = FixtureWindow::create("deskctl ambiguity alpha", "DeskctlAmbiguous")?;
let _window_two = FixtureWindow::create("deskctl ambiguity beta", "DeskctlAmbiguous")?;
let session = TestSession::new("selector-ambiguity")?;
let output = session.run_cli(["--json", "focus", "ambiguity"])?;
let response = json_response(&output)?;
assert!(!output.status.success());
assert_eq!(
response.get("success").and_then(|value| value.as_bool()),
Some(false)
);
assert_eq!(
response
.get("data")
.and_then(|data| data.get("kind"))
.and_then(|value| value.as_str()),
Some("selector_ambiguous")
);
assert!(response
.get("data")
.and_then(|data| data.get("candidates"))
.and_then(|value| value.as_array())
.map(|candidates| candidates.len() >= 2)
.unwrap_or(false));
Ok(())
}
#[test]
fn wait_focus_timeout_is_structured() -> Result<()> {
let _guard = env_lock_guard();
let Some(_env) = SessionEnvGuard::prepare() else {
eprintln!("Skipping X11 integration test because DISPLAY is not set");
return Ok(());
};
let session = TestSession::new("wait-focus-timeout")?;
let output = session.run_cli([
"--json",
"wait",
"focus",
"--selector",
"title=missing-window-for-wait-focus",
"--timeout",
"1",
"--poll-ms",
"50",
])?;
let response = json_response(&output)?;
assert!(!output.status.success());
assert_eq!(
response
.get("data")
.and_then(|data| data.get("kind"))
.and_then(|value| value.as_str()),
Some("timeout")
);
assert_eq!(
response
.get("data")
.and_then(|data| data.get("last_observation"))
.and_then(|value| value.get("kind"))
.and_then(|value| value.as_str()),
Some("selector_not_found")
);
Ok(())
}