Add desktop runtime API and SDK support

This commit is contained in:
Nathan Flurry 2026-03-07 23:32:49 -08:00
parent 3d9476ed0b
commit 641597afe6
27 changed files with 5881 additions and 21 deletions

View file

@ -1,6 +1,6 @@
import { describe, it, expect, beforeAll, afterAll } from "vitest";
import { existsSync } from "node:fs";
import { mkdtempSync, rmSync } from "node:fs";
import { chmodSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import { join } from "node:path";
import { fileURLToPath } from "node:url";
@ -150,21 +150,166 @@ function nodeCommand(source: string): { command: string; args: string[] } {
};
}
function writeExecutable(path: string, source: string): void {
writeFileSync(path, source, "utf8");
chmodSync(path, 0o755);
}
function prepareFakeDesktopEnv(root: string): Record<string, string> {
const binDir = join(root, "bin");
const xdgStateHome = join(root, "xdg-state");
const fakeStateDir = join(root, "fake-state");
mkdirSync(binDir, { recursive: true });
mkdirSync(xdgStateHome, { recursive: true });
mkdirSync(fakeStateDir, { recursive: true });
writeExecutable(
join(binDir, "Xvfb"),
`#!/usr/bin/env sh
set -eu
display="\${1:-:191}"
number="\${display#:}"
socket="/tmp/.X11-unix/X\${number}"
mkdir -p /tmp/.X11-unix
touch "$socket"
cleanup() {
rm -f "$socket"
exit 0
}
trap cleanup INT TERM EXIT
while :; do
sleep 1
done
`,
);
writeExecutable(
join(binDir, "openbox"),
`#!/usr/bin/env sh
set -eu
trap 'exit 0' INT TERM
while :; do
sleep 1
done
`,
);
writeExecutable(
join(binDir, "dbus-launch"),
`#!/usr/bin/env sh
set -eu
echo "DBUS_SESSION_BUS_ADDRESS=unix:path=/tmp/sandbox-agent-test-bus"
echo "DBUS_SESSION_BUS_PID=$$"
`,
);
writeExecutable(
join(binDir, "xrandr"),
`#!/usr/bin/env sh
set -eu
cat <<'EOF'
Screen 0: minimum 1 x 1, current 1440 x 900, maximum 32767 x 32767
EOF
`,
);
writeExecutable(
join(binDir, "import"),
`#!/usr/bin/env sh
set -eu
printf '\\211PNG\\r\\n\\032\\n\\000\\000\\000\\rIHDR\\000\\000\\000\\001\\000\\000\\000\\001\\010\\006\\000\\000\\000\\037\\025\\304\\211\\000\\000\\000\\013IDATx\\234c\\000\\001\\000\\000\\005\\000\\001\\r\\n-\\264\\000\\000\\000\\000IEND\\256B\`\\202'
`,
);
writeExecutable(
join(binDir, "xdotool"),
`#!/usr/bin/env sh
set -eu
state_dir="\${SANDBOX_AGENT_DESKTOP_FAKE_STATE_DIR:?missing fake state dir}"
state_file="\${state_dir}/mouse"
mkdir -p "$state_dir"
if [ ! -f "$state_file" ]; then
printf '0 0\\n' > "$state_file"
fi
read_state() {
read -r x y < "$state_file"
}
write_state() {
printf '%s %s\\n' "$1" "$2" > "$state_file"
}
command="\${1:-}"
case "$command" in
getmouselocation)
read_state
printf 'X=%s\\nY=%s\\nSCREEN=0\\nWINDOW=0\\n' "$x" "$y"
;;
mousemove)
shift
x="\${1:-0}"
y="\${2:-0}"
shift 2 || true
while [ "$#" -gt 0 ]; do
token="$1"
shift
case "$token" in
mousemove)
x="\${1:-0}"
y="\${2:-0}"
shift 2 || true
;;
mousedown|mouseup)
shift 1 || true
;;
click)
if [ "\${1:-}" = "--repeat" ]; then
shift 2 || true
fi
shift 1 || true
;;
esac
done
write_state "$x" "$y"
;;
type|key)
exit 0
;;
*)
exit 0
;;
esac
`,
);
return {
SANDBOX_AGENT_DESKTOP_TEST_ASSUME_LINUX: "1",
SANDBOX_AGENT_DESKTOP_DISPLAY_NUM: "191",
SANDBOX_AGENT_DESKTOP_FAKE_STATE_DIR: fakeStateDir,
XDG_STATE_HOME: xdgStateHome,
PATH: `${binDir}:${process.env.PATH ?? ""}`,
};
}
describe("Integration: TypeScript SDK flat session API", () => {
let handle: SandboxAgentSpawnHandle;
let baseUrl: string;
let token: string;
let dataHome: string;
let desktopHome: string;
beforeAll(async () => {
dataHome = mkdtempSync(join(tmpdir(), "sdk-integration-"));
desktopHome = mkdtempSync(join(tmpdir(), "sdk-desktop-"));
const agentEnv = prepareMockAgentDataHome(dataHome);
const desktopEnv = prepareFakeDesktopEnv(desktopHome);
handle = await spawnSandboxAgent({
enabled: true,
log: "silent",
timeoutMs: 30000,
env: agentEnv,
env: { ...agentEnv, ...desktopEnv },
});
baseUrl = handle.baseUrl;
token = handle.token;
@ -173,6 +318,7 @@ describe("Integration: TypeScript SDK flat session API", () => {
afterAll(async () => {
await handle.dispose();
rmSync(dataHome, { recursive: true, force: true });
rmSync(desktopHome, { recursive: true, force: true });
});
it("detects Node.js runtime", () => {
@ -882,4 +1028,91 @@ describe("Integration: TypeScript SDK flat session API", () => {
await sdk.dispose();
}
});
it("covers desktop status, screenshot, display, mouse, and keyboard helpers", async () => {
const sdk = await SandboxAgent.connect({
baseUrl,
token,
});
try {
const initialStatus = await sdk.getDesktopStatus();
expect(initialStatus.state).toBe("inactive");
const started = await sdk.startDesktop({
width: 1440,
height: 900,
dpi: 96,
});
expect(started.state).toBe("active");
expect(started.display?.startsWith(":")).toBe(true);
expect(started.missingDependencies).toEqual([]);
const displayInfo = await sdk.getDesktopDisplayInfo();
expect(displayInfo.display).toBe(started.display);
expect(displayInfo.resolution.width).toBe(1440);
expect(displayInfo.resolution.height).toBe(900);
const screenshot = await sdk.takeDesktopScreenshot();
expect(Buffer.from(screenshot.subarray(0, 8)).equals(Buffer.from("\x89PNG\r\n\x1a\n", "binary"))).toBe(true);
const region = await sdk.takeDesktopRegionScreenshot({
x: 10,
y: 20,
width: 40,
height: 50,
});
expect(Buffer.from(region.subarray(0, 8)).equals(Buffer.from("\x89PNG\r\n\x1a\n", "binary"))).toBe(true);
const moved = await sdk.moveDesktopMouse({ x: 40, y: 50 });
expect(moved.x).toBe(40);
expect(moved.y).toBe(50);
const dragged = await sdk.dragDesktopMouse({
startX: 40,
startY: 50,
endX: 80,
endY: 90,
button: "left",
});
expect(dragged.x).toBe(80);
expect(dragged.y).toBe(90);
const clicked = await sdk.clickDesktop({
x: 80,
y: 90,
button: "left",
clickCount: 1,
});
expect(clicked.x).toBe(80);
expect(clicked.y).toBe(90);
const scrolled = await sdk.scrollDesktop({
x: 80,
y: 90,
deltaY: -2,
});
expect(scrolled.x).toBe(80);
expect(scrolled.y).toBe(90);
const position = await sdk.getDesktopMousePosition();
expect(position.x).toBe(80);
expect(position.y).toBe(90);
const typed = await sdk.typeDesktopText({
text: "hello desktop",
delayMs: 5,
});
expect(typed.ok).toBe(true);
const pressed = await sdk.pressDesktopKey({ key: "ctrl+l" });
expect(pressed.ok).toBe(true);
const stopped = await sdk.stopDesktop();
expect(stopped.state).toBe("inactive");
} finally {
await sdk.stopDesktop().catch(() => {});
await sdk.dispose();
}
});
});