(null);
// Active recording tracking
const activeRecording = useMemo(() => recordings.find((r) => r.status === "recording"), [recordings]);
+ const visibleWindows = useMemo(() => {
+ return windows.filter((win) => {
+ const title = win.title.trim();
+ if (win.isActive) return true;
+ if (!title || title === "Openbox") return false;
+ return win.width >= 120 && win.height >= 80;
+ });
+ }, [windows]);
const revokeScreenshotUrl = useCallback(() => {
setScreenshotUrl((current) => {
if (current?.startsWith("blob:") && typeof URL.revokeObjectURL === "function") {
@@ -103,6 +173,11 @@ const DesktopTab = ({ getClient }: { getClient: () => SandboxAgent }) => {
try {
const next = await getClient().getDesktopStatus();
setStatus(next);
+ // Status response now includes windows; sync them so we get window
+ // updates for free every time status is polled.
+ if (next.state === "active" && next.windows?.length) {
+ setWindows(next.windows);
+ }
return next;
} catch (loadError) {
setError(extractErrorMessage(loadError, "Unable to load desktop status."));
@@ -118,16 +193,36 @@ const DesktopTab = ({ getClient }: { getClient: () => SandboxAgent }) => {
setScreenshotLoading(true);
setScreenshotError(null);
try {
- const bytes = await getClient().takeDesktopScreenshot();
+ const quality = Number.parseInt(screenshotQuality, 10);
+ const scale = Number.parseFloat(screenshotScale);
+ const request: DesktopScreenshotRequest = {
+ format: screenshotFormat !== "png" ? screenshotFormat : undefined,
+ quality: screenshotFormat !== "png" && Number.isFinite(quality) ? quality : undefined,
+ scale: Number.isFinite(scale) && scale !== 1.0 ? scale : undefined,
+ showCursor: showCursor || undefined,
+ };
+ const bytes = await getClient().takeDesktopScreenshot(request);
revokeScreenshotUrl();
- setScreenshotUrl(await createScreenshotUrl(bytes));
+ const mimeType = screenshotFormat === "jpeg" ? "image/jpeg" : screenshotFormat === "webp" ? "image/webp" : "image/png";
+ setScreenshotUrl(await createScreenshotUrl(bytes, mimeType));
} catch (captureError) {
revokeScreenshotUrl();
setScreenshotError(extractErrorMessage(captureError, "Unable to capture desktop screenshot."));
} finally {
setScreenshotLoading(false);
}
- }, [getClient, revokeScreenshotUrl]);
+ }, [getClient, revokeScreenshotUrl, screenshotFormat, screenshotQuality, screenshotScale, showCursor]);
+ const loadMousePosition = useCallback(async () => {
+ setMousePosLoading(true);
+ try {
+ const pos = await getClient().getDesktopMousePosition();
+ setMousePos({ x: pos.x, y: pos.y });
+ } catch {
+ setMousePos(null);
+ } finally {
+ setMousePosLoading(false);
+ }
+ }, [getClient]);
const loadRecordings = useCallback(async () => {
setRecordingLoading(true);
setRecordingError(null);
@@ -140,15 +235,88 @@ const DesktopTab = ({ getClient }: { getClient: () => SandboxAgent }) => {
setRecordingLoading(false);
}
}, [getClient]);
+ const loadClipboard = useCallback(async () => {
+ setClipboardLoading(true);
+ setClipboardError(null);
+ try {
+ const result = await getClient().getDesktopClipboard({ selection: clipboardSelection });
+ setClipboardText(result.text);
+ } catch (err) {
+ setClipboardError(extractErrorMessage(err, "Unable to read clipboard."));
+ } finally {
+ setClipboardLoading(false);
+ }
+ }, [clipboardSelection, getClient]);
+ const loadWindows = useCallback(async () => {
+ setWindowsLoading(true);
+ setWindowsError(null);
+ try {
+ const result = await getClient().listDesktopWindows();
+ setWindows(result.windows);
+ } catch (err) {
+ setWindowsError(extractErrorMessage(err, "Unable to list windows."));
+ } finally {
+ setWindowsLoading(false);
+ }
+ }, [getClient]);
+ const handleFocusWindow = async (windowId: string) => {
+ setWindowActing(windowId);
+ try {
+ await getClient().focusDesktopWindow(windowId);
+ await loadWindows();
+ } catch (err) {
+ setWindowsError(extractErrorMessage(err, "Unable to focus window."));
+ } finally {
+ setWindowActing(null);
+ }
+ };
+ const handleMoveWindow = async (windowId: string) => {
+ const x = Number.parseInt(editX, 10);
+ const y = Number.parseInt(editY, 10);
+ if (!Number.isFinite(x) || !Number.isFinite(y)) return;
+ setWindowActing(windowId);
+ try {
+ await getClient().moveDesktopWindow(windowId, { x, y });
+ setEditingWindow(null);
+ await loadWindows();
+ } catch (err) {
+ setWindowsError(extractErrorMessage(err, "Unable to move window."));
+ } finally {
+ setWindowActing(null);
+ }
+ };
+ const handleResizeWindow = async (windowId: string) => {
+ const nextWidth = Number.parseInt(editW, 10);
+ const nextHeight = Number.parseInt(editH, 10);
+ if (!Number.isFinite(nextWidth) || !Number.isFinite(nextHeight) || nextWidth <= 0 || nextHeight <= 0) return;
+ setWindowActing(windowId);
+ try {
+ await getClient().resizeDesktopWindow(windowId, { width: nextWidth, height: nextHeight });
+ setEditingWindow(null);
+ await loadWindows();
+ } catch (err) {
+ setWindowsError(extractErrorMessage(err, "Unable to resize window."));
+ } finally {
+ setWindowActing(null);
+ }
+ };
useEffect(() => {
void loadStatus();
}, [loadStatus]);
+ // Auto-refresh status (and windows via status) every 5 seconds when active
+ useEffect(() => {
+ if (status?.state !== "active") return;
+ const interval = setInterval(() => void loadStatus("refresh"), 5000);
+ return () => clearInterval(interval);
+ }, [status?.state, loadStatus]);
useEffect(() => {
if (status?.state === "active") {
void loadRecordings();
} else {
revokeScreenshotUrl();
setLiveViewActive(false);
+ setMousePos(null);
+ setEditingWindow(null);
}
}, [status?.state, loadRecordings, revokeScreenshotUrl]);
useEffect(() => {
@@ -160,19 +328,35 @@ const DesktopTab = ({ getClient }: { getClient: () => SandboxAgent }) => {
const interval = setInterval(() => void loadRecordings(), 3000);
return () => clearInterval(interval);
}, [activeRecording, loadRecordings]);
+ useEffect(() => {
+ if (status?.state !== "active") {
+ setWindows([]);
+ return;
+ }
+ // Initial load; subsequent updates come from the status auto-refresh.
+ void loadWindows();
+ }, [status?.state, loadWindows]);
const handleStart = async () => {
const parsedWidth = Number.parseInt(width, 10);
const parsedHeight = Number.parseInt(height, 10);
const parsedDpi = Number.parseInt(dpi, 10);
+ const parsedFrameRate = Number.parseInt(streamFrameRate, 10);
+ const parsedRecordingFps = Number.parseInt(defaultRecordingFps, 10);
setActing("start");
setError(null);
const startedAt = Date.now();
try {
- const next = await getClient().startDesktop({
+ const request: DesktopStartRequestWithAdvanced = {
width: Number.isFinite(parsedWidth) ? parsedWidth : undefined,
height: Number.isFinite(parsedHeight) ? parsedHeight : undefined,
dpi: Number.isFinite(parsedDpi) ? parsedDpi : undefined,
- });
+ streamVideoCodec: streamVideoCodec !== "vp8" ? streamVideoCodec : undefined,
+ streamAudioCodec: streamAudioCodec !== "opus" ? streamAudioCodec : undefined,
+ streamFrameRate: Number.isFinite(parsedFrameRate) && parsedFrameRate !== 30 ? parsedFrameRate : undefined,
+ webrtcPortRange: webrtcPortRange !== "59050-59070" ? webrtcPortRange : undefined,
+ recordingFps: Number.isFinite(parsedRecordingFps) && parsedRecordingFps !== 30 ? parsedRecordingFps : undefined,
+ };
+ const next = await getClient().startDesktop(request);
setStatus(next);
} catch (startError) {
setError(extractErrorMessage(startError, "Unable to start desktop runtime."));
@@ -205,6 +389,18 @@ const DesktopTab = ({ getClient }: { getClient: () => SandboxAgent }) => {
setActing(null);
}
};
+ const handleWriteClipboard = async () => {
+ setClipboardWriting(true);
+ setClipboardError(null);
+ try {
+ await getClient().setDesktopClipboard({ text: clipboardWriteText, selection: clipboardSelection });
+ setClipboardText(clipboardWriteText);
+ } catch (err) {
+ setClipboardError(extractErrorMessage(err, "Unable to write clipboard."));
+ } finally {
+ setClipboardWriting(false);
+ }
+ };
const handleStartRecording = async () => {
const fps = Number.parseInt(recordingFps, 10);
setRecordingActing("start");
@@ -262,6 +458,41 @@ const DesktopTab = ({ getClient }: { getClient: () => SandboxAgent }) => {
setDownloadingRecordingId(null);
}
};
+ const handleLaunchApp = async () => {
+ if (!launchApp.trim()) return;
+ setLaunching(true);
+ setLaunchError(null);
+ setLaunchResult(null);
+ try {
+ const args = launchArgs.trim() ? launchArgs.trim().split(/\s+/) : undefined;
+ const result = await getClient().launchDesktopApp({
+ app: launchApp.trim(),
+ args,
+ wait: launchWait || undefined,
+ });
+ setLaunchResult(`Started ${result.processId}${result.windowId ? ` (window: ${result.windowId})` : ""}`);
+ await loadWindows();
+ } catch (err) {
+ setLaunchError(extractErrorMessage(err, "Unable to launch app."));
+ } finally {
+ setLaunching(false);
+ }
+ };
+ const handleOpenTarget = async () => {
+ if (!openTarget.trim()) return;
+ setOpening(true);
+ setOpenError(null);
+ setOpenResult(null);
+ try {
+ const result = await getClient().openDesktopTarget({ target: openTarget.trim() });
+ setOpenResult(`Opened via ${result.processId}`);
+ await loadWindows();
+ } catch (err) {
+ setOpenError(extractErrorMessage(err, "Unable to open target."));
+ } finally {
+ setOpening(false);
+ }
+ };
const canRefreshScreenshot = status?.state === "active";
const isActive = status?.state === "active";
const resolutionLabel = useMemo(() => {
@@ -284,6 +515,48 @@ const DesktopTab = ({ getClient }: { getClient: () => SandboxAgent }) => {
)}
+ {isActive && !liveViewActive && (
+
+ )}
{error && {error}
}
{screenshotError && {screenshotError}
}
{/* ========== Runtime Section ========== */}
@@ -329,6 +602,56 @@ const DesktopTab = ({ getClient }: { getClient: () => SandboxAgent }) => {
setDpi(event.target.value)} inputMode="numeric" />
+
+ {showAdvancedStart && (
+
+
+
+
+
+
+
+
+
+
+
+ setStreamFrameRate(event.target.value)}
+ inputMode="numeric"
+ disabled={isActive}
+ />
+
+
+
+ setWebrtcPortRange(event.target.value)} disabled={isActive} />
+
+
+
+ setDefaultRecordingFps(event.target.value)}
+ inputMode="numeric"
+ disabled={isActive}
+ />
+
+
+ )}
{isActive ? (
)}
{!isActive && Start the desktop runtime to enable live view.
}
- {isActive && liveViewActive && }
+ {isActive && liveViewActive && (
+ <>
+
+ Right click to open window
+ {status?.resolution && (
+
+ {status.resolution.width}x{status.resolution.height}
+
+ )}
+
+
+ >
+ )}
{isActive && !liveViewActive && (
<>
{screenshotUrl ? (
@@ -428,7 +773,313 @@ const DesktopTab = ({ getClient }: { getClient: () => SandboxAgent }) => {
)}
>
)}
+ {isActive && (
+
+
+ {mousePos && (
+
+ ({mousePos.x}, {mousePos.y})
+
+ )}
+
+ )}
+ {isActive && (
+
+
+
+
+ Clipboard
+
+
+
+
+
+
+ {clipboardError && (
+
+ {clipboardError}
+
+ )}
+
+
Current contents
+
+ {clipboardText ? clipboardText : (empty)}
+
+
+
+
+ )}
+ {isActive && (
+
+
+
+
+ Windows
+
+
+
+ {windowsError && (
+
+ {windowsError}
+
+ )}
+ {visibleWindows.length > 0 ? (
+
+ {windows.length !== visibleWindows.length && (
+
+ Showing {visibleWindows.length} top-level windows ({windows.length - visibleWindows.length} helper entries hidden)
+
+ )}
+ {visibleWindows.map((win) => (
+
+
+
+
{win.title || "(untitled)"}
+ {win.isActive && (
+
+ focused
+
+ )}
+
+ id: {win.id}
+ {" \u00b7 "}
+ {win.x},{win.y}
+ {" \u00b7 "}
+ {win.width}x{win.height}
+
+
+
+
+
+
+
+
+ {editingWindow?.id === win.id && editingWindow.action === "move" && (
+
+ setEditX(event.target.value)}
+ style={{ width: 60 }}
+ inputMode="numeric"
+ />
+ setEditY(event.target.value)}
+ style={{ width: 60 }}
+ inputMode="numeric"
+ />
+
+
+
+ )}
+ {editingWindow?.id === win.id && editingWindow.action === "resize" && (
+
+ setEditW(event.target.value)}
+ style={{ width: 60 }}
+ inputMode="numeric"
+ />
+ setEditH(event.target.value)}
+ style={{ width: 60 }}
+ inputMode="numeric"
+ />
+
+
+
+ )}
+
+ ))}
+
+ ) : (
+
{windowsLoading ? "Loading..." : "No windows detected. Click refresh to update."}
+ )}
+
+ )}
+ {isActive && (
+
+
+
+
+ Launch / Open
+
+
+
Launch application
+
+ {launchError && (
+
+ {launchError}
+
+ )}
+ {launchResult && (
+
+ {launchResult}
+
+ )}
+
+ Open file or URL
+
+
+ setOpenTarget(event.target.value)}
+ style={{ flex: 1 }}
+ />
+
+
+ {openError && (
+
+ {openError}
+
+ )}
+ {openResult && (
+
+ {openResult}
+
+ )}
+
+ )}
{/* ========== Recording Section ========== */}
diff --git a/sdks/react/src/DesktopViewer.tsx b/sdks/react/src/DesktopViewer.tsx
index 5f55ed4..55e0794 100644
--- a/sdks/react/src/DesktopViewer.tsx
+++ b/sdks/react/src/DesktopViewer.tsx
@@ -14,6 +14,7 @@ export interface DesktopViewerProps {
style?: CSSProperties;
imageStyle?: CSSProperties;
height?: number | string;
+ showStatusBar?: boolean;
onConnect?: (status: DesktopStreamReadyStatus) => void;
onDisconnect?: () => void;
onError?: (error: DesktopStreamErrorStatus | Error) => void;
@@ -76,7 +77,17 @@ const getStatusColor = (state: ConnectionState): string => {
}
};
-export const DesktopViewer = ({ client, className, style, imageStyle, height = 480, onConnect, onDisconnect, onError }: DesktopViewerProps) => {
+export const DesktopViewer = ({
+ client,
+ className,
+ style,
+ imageStyle,
+ height = 480,
+ showStatusBar = true,
+ onConnect,
+ onDisconnect,
+ onError,
+}: DesktopViewerProps) => {
const wrapperRef = useRef
(null);
const videoRef = useRef(null);
const sessionRef = useRef | null>(null);
@@ -194,10 +205,12 @@ export const DesktopViewer = ({ client, className, style, imageStyle, height = 4
return (
-
- {statusMessage}
- {resolution ? `${resolution.width}×${resolution.height}` : "Awaiting stream"}
-
+ {showStatusBar ? (
+
+ {statusMessage}
+ {resolution ? `${resolution.width}×${resolution.height}` : "Awaiting stream"}
+
+ ) : null}
{
+ return this.requestJson("GET", `${API_PREFIX}/desktop/windows/focused`);
+ }
+
+ async focusDesktopWindow(windowId: string): Promise
{
+ return this.requestJson("POST", `${API_PREFIX}/desktop/windows/${encodeURIComponent(windowId)}/focus`);
+ }
+
+ async moveDesktopWindow(windowId: string, request: DesktopWindowMoveRequest): Promise {
+ return this.requestJson("POST", `${API_PREFIX}/desktop/windows/${encodeURIComponent(windowId)}/move`, {
+ body: request,
+ });
+ }
+
+ async resizeDesktopWindow(windowId: string, request: DesktopWindowResizeRequest): Promise {
+ return this.requestJson("POST", `${API_PREFIX}/desktop/windows/${encodeURIComponent(windowId)}/resize`, {
+ body: request,
+ });
+ }
+
+ async getDesktopClipboard(query: DesktopClipboardQuery = {}): Promise {
+ return this.requestJson("GET", `${API_PREFIX}/desktop/clipboard`, {
+ query,
+ });
+ }
+
+ async setDesktopClipboard(request: DesktopClipboardWriteRequest): Promise {
+ return this.requestJson("POST", `${API_PREFIX}/desktop/clipboard`, {
+ body: request,
+ });
+ }
+
+ async launchDesktopApp(request: DesktopLaunchRequest): Promise {
+ return this.requestJson("POST", `${API_PREFIX}/desktop/launch`, {
+ body: request,
+ });
+ }
+
+ async openDesktopTarget(request: DesktopOpenRequest): Promise {
+ return this.requestJson("POST", `${API_PREFIX}/desktop/open`, {
+ body: request,
+ });
+ }
+
+ async getDesktopStreamStatus(): Promise {
+ return this.requestJson("GET", `${API_PREFIX}/desktop/stream/status`);
+ }
+
async startDesktopRecording(request: DesktopRecordingStartRequest = {}): Promise {
return this.requestJson("POST", `${API_PREFIX}/desktop/recording/start`, {
body: request,
diff --git a/sdks/typescript/src/generated/openapi.ts b/sdks/typescript/src/generated/openapi.ts
index 195c481..537102f 100644
--- a/sdks/typescript/src/generated/openapi.ts
+++ b/sdks/typescript/src/generated/openapi.ts
@@ -31,6 +31,18 @@ export interface paths {
put: operations["put_v1_config_skills"];
delete: operations["delete_v1_config_skills"];
};
+ "/v1/desktop/clipboard": {
+ /**
+ * Read the desktop clipboard.
+ * @description Returns the current text content of the X11 clipboard.
+ */
+ get: operations["get_v1_desktop_clipboard"];
+ /**
+ * Write to the desktop clipboard.
+ * @description Sets the text content of the X11 clipboard.
+ */
+ post: operations["post_v1_desktop_clipboard"];
+ };
"/v1/desktop/display/info": {
/**
* Get desktop display information.
@@ -71,6 +83,14 @@ export interface paths {
*/
post: operations["post_v1_desktop_keyboard_up"];
};
+ "/v1/desktop/launch": {
+ /**
+ * Launch a desktop application.
+ * @description Launches an application by name on the managed desktop, optionally waiting
+ * for its window to appear.
+ */
+ post: operations["post_v1_desktop_launch"];
+ };
"/v1/desktop/mouse/click": {
/**
* Click on the desktop.
@@ -126,6 +146,13 @@ export interface paths {
*/
post: operations["post_v1_desktop_mouse_up"];
};
+ "/v1/desktop/open": {
+ /**
+ * Open a file or URL with the default handler.
+ * @description Opens a file path or URL using xdg-open on the managed desktop.
+ */
+ post: operations["post_v1_desktop_open"];
+ };
"/v1/desktop/recording/start": {
/**
* Start desktop recording.
@@ -208,6 +235,15 @@ export interface paths {
*/
post: operations["post_v1_desktop_stop"];
};
+ "/v1/desktop/stream/signaling": {
+ /**
+ * Open a desktop WebRTC signaling session.
+ * @description Upgrades the connection to a WebSocket used for WebRTC signaling between
+ * the browser client and the desktop streaming process. Also accepts mouse
+ * and keyboard input frames as a fallback transport.
+ */
+ get: operations["get_v1_desktop_stream_ws"];
+ };
"/v1/desktop/stream/start": {
/**
* Start desktop streaming.
@@ -215,6 +251,13 @@ export interface paths {
*/
post: operations["post_v1_desktop_stream_start"];
};
+ "/v1/desktop/stream/status": {
+ /**
+ * Get desktop stream status.
+ * @description Returns the current state of the desktop WebRTC streaming session.
+ */
+ get: operations["get_v1_desktop_stream_status"];
+ };
"/v1/desktop/stream/stop": {
/**
* Stop desktop streaming.
@@ -222,14 +265,6 @@ export interface paths {
*/
post: operations["post_v1_desktop_stream_stop"];
};
- "/v1/desktop/stream/ws": {
- /**
- * Open a desktop websocket streaming session.
- * @description Upgrades the connection to a websocket that streams JPEG desktop frames and
- * accepts mouse and keyboard control frames.
- */
- get: operations["get_v1_desktop_stream_ws"];
- };
"/v1/desktop/windows": {
/**
* List visible desktop windows.
@@ -238,6 +273,34 @@ export interface paths {
*/
get: operations["get_v1_desktop_windows"];
};
+ "/v1/desktop/windows/focused": {
+ /**
+ * Get the currently focused desktop window.
+ * @description Returns information about the window that currently has input focus.
+ */
+ get: operations["get_v1_desktop_windows_focused"];
+ };
+ "/v1/desktop/windows/{id}/focus": {
+ /**
+ * Focus a desktop window.
+ * @description Brings the specified window to the foreground and gives it input focus.
+ */
+ post: operations["post_v1_desktop_window_focus"];
+ };
+ "/v1/desktop/windows/{id}/move": {
+ /**
+ * Move a desktop window.
+ * @description Moves the specified window to the given position.
+ */
+ post: operations["post_v1_desktop_window_move"];
+ };
+ "/v1/desktop/windows/{id}/resize": {
+ /**
+ * Resize a desktop window.
+ * @description Resizes the specified window to the given dimensions.
+ */
+ post: operations["post_v1_desktop_window_resize"];
+ };
"/v1/fs/entries": {
get: operations["get_v1_fs_entries"];
};
@@ -443,6 +506,17 @@ export interface components {
DesktopActionResponse: {
ok: boolean;
};
+ DesktopClipboardQuery: {
+ selection?: string | null;
+ };
+ DesktopClipboardResponse: {
+ selection: string;
+ text: string;
+ };
+ DesktopClipboardWriteRequest: {
+ selection?: string | null;
+ text: string;
+ };
DesktopDisplayInfoResponse: {
display: string;
resolution: components["schemas"]["DesktopResolution"];
@@ -472,6 +546,17 @@ export interface components {
DesktopKeyboardUpRequest: {
key: string;
};
+ DesktopLaunchRequest: {
+ app: string;
+ args?: string[] | null;
+ wait?: boolean | null;
+ };
+ DesktopLaunchResponse: {
+ /** Format: int32 */
+ pid?: number | null;
+ processId: string;
+ windowId?: string | null;
+ };
/** @enum {string} */
DesktopMouseButton: "left" | "middle" | "right";
DesktopMouseClickRequest: {
@@ -533,6 +618,14 @@ export interface components {
/** Format: int32 */
y?: number | null;
};
+ DesktopOpenRequest: {
+ target: string;
+ };
+ DesktopOpenResponse: {
+ /** Format: int32 */
+ pid?: number | null;
+ processId: string;
+ };
DesktopProcessInfo: {
logPath?: string | null;
name: string;
@@ -567,6 +660,7 @@ export interface components {
quality?: number | null;
/** Format: float */
scale?: number | null;
+ showCursor?: boolean | null;
/** Format: int32 */
width: number;
/** Format: int32 */
@@ -590,13 +684,24 @@ export interface components {
quality?: number | null;
/** Format: float */
scale?: number | null;
+ showCursor?: boolean | null;
};
DesktopStartRequest: {
+ /** Format: int32 */
+ displayNum?: number | null;
/** Format: int32 */
dpi?: number | null;
/** Format: int32 */
height?: number | null;
/** Format: int32 */
+ recordingFps?: number | null;
+ stateDir?: string | null;
+ streamAudioCodec?: string | null;
+ /** Format: int32 */
+ streamFrameRate?: number | null;
+ streamVideoCodec?: string | null;
+ webrtcPortRange?: string | null;
+ /** Format: int32 */
width?: number | null;
};
/** @enum {string} */
@@ -611,9 +716,13 @@ export interface components {
runtimeLogPath?: string | null;
startedAt?: string | null;
state: components["schemas"]["DesktopState"];
+ /** @description Current visible windows (included when the desktop is active). */
+ windows?: components["schemas"]["DesktopWindowInfo"][];
};
DesktopStreamStatusResponse: {
active: boolean;
+ processId?: string | null;
+ windowId?: string | null;
};
DesktopWindowInfo: {
/** Format: int32 */
@@ -631,6 +740,18 @@ export interface components {
DesktopWindowListResponse: {
windows: components["schemas"]["DesktopWindowInfo"][];
};
+ DesktopWindowMoveRequest: {
+ /** Format: int32 */
+ x: number;
+ /** Format: int32 */
+ y: number;
+ };
+ DesktopWindowResizeRequest: {
+ /** Format: int32 */
+ height: number;
+ /** Format: int32 */
+ width: number;
+ };
/** @enum {string} */
ErrorType:
| "invalid_request"
@@ -1231,6 +1352,68 @@ export interface operations {
};
};
};
+ /**
+ * Read the desktop clipboard.
+ * @description Returns the current text content of the X11 clipboard.
+ */
+ get_v1_desktop_clipboard: {
+ parameters: {
+ query?: {
+ selection?: string | null;
+ };
+ };
+ responses: {
+ /** @description Clipboard contents */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopClipboardResponse"];
+ };
+ };
+ /** @description Desktop runtime is not ready */
+ 409: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ /** @description Clipboard read failed */
+ 500: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ };
+ };
+ /**
+ * Write to the desktop clipboard.
+ * @description Sets the text content of the X11 clipboard.
+ */
+ post_v1_desktop_clipboard: {
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["DesktopClipboardWriteRequest"];
+ };
+ };
+ responses: {
+ /** @description Clipboard updated */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopActionResponse"];
+ };
+ };
+ /** @description Desktop runtime is not ready */
+ 409: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ /** @description Clipboard write failed */
+ 500: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ };
+ };
/**
* Get desktop display information.
* @description Performs a health-gated display query against the managed desktop and
@@ -1410,6 +1593,38 @@ export interface operations {
};
};
};
+ /**
+ * Launch a desktop application.
+ * @description Launches an application by name on the managed desktop, optionally waiting
+ * for its window to appear.
+ */
+ post_v1_desktop_launch: {
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["DesktopLaunchRequest"];
+ };
+ };
+ responses: {
+ /** @description Application launched */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopLaunchResponse"];
+ };
+ };
+ /** @description Application not found */
+ 404: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ /** @description Desktop runtime is not ready */
+ 409: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ };
+ };
/**
* Click on the desktop.
* @description Performs a health-gated pointer move and click against the managed desktop
@@ -1664,6 +1879,31 @@ export interface operations {
};
};
};
+ /**
+ * Open a file or URL with the default handler.
+ * @description Opens a file path or URL using xdg-open on the managed desktop.
+ */
+ post_v1_desktop_open: {
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["DesktopOpenRequest"];
+ };
+ };
+ responses: {
+ /** @description Target opened */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopOpenResponse"];
+ };
+ };
+ /** @description Desktop runtime is not ready */
+ 409: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ };
+ };
/**
* Start desktop recording.
* @description Starts an ffmpeg x11grab recording against the managed desktop and returns
@@ -1834,6 +2074,7 @@ export interface operations {
format?: components["schemas"]["DesktopScreenshotFormat"] | null;
quality?: number | null;
scale?: number | null;
+ showCursor?: boolean | null;
};
};
responses: {
@@ -1876,6 +2117,7 @@ export interface operations {
format?: components["schemas"]["DesktopScreenshotFormat"] | null;
quality?: number | null;
scale?: number | null;
+ showCursor?: boolean | null;
};
};
responses: {
@@ -1990,37 +2232,10 @@ export interface operations {
};
};
/**
- * Start desktop streaming.
- * @description Enables desktop websocket streaming for the managed desktop.
- */
- post_v1_desktop_stream_start: {
- responses: {
- /** @description Desktop streaming started */
- 200: {
- content: {
- "application/json": components["schemas"]["DesktopStreamStatusResponse"];
- };
- };
- };
- };
- /**
- * Stop desktop streaming.
- * @description Disables desktop websocket streaming for the managed desktop.
- */
- post_v1_desktop_stream_stop: {
- responses: {
- /** @description Desktop streaming stopped */
- 200: {
- content: {
- "application/json": components["schemas"]["DesktopStreamStatusResponse"];
- };
- };
- };
- };
- /**
- * Open a desktop websocket streaming session.
- * @description Upgrades the connection to a websocket that streams JPEG desktop frames and
- * accepts mouse and keyboard control frames.
+ * Open a desktop WebRTC signaling session.
+ * @description Upgrades the connection to a WebSocket used for WebRTC signaling between
+ * the browser client and the desktop streaming process. Also accepts mouse
+ * and keyboard input frames as a fallback transport.
*/
get_v1_desktop_stream_ws: {
parameters: {
@@ -2048,6 +2263,48 @@ export interface operations {
};
};
};
+ /**
+ * Start desktop streaming.
+ * @description Enables desktop websocket streaming for the managed desktop.
+ */
+ post_v1_desktop_stream_start: {
+ responses: {
+ /** @description Desktop streaming started */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopStreamStatusResponse"];
+ };
+ };
+ };
+ };
+ /**
+ * Get desktop stream status.
+ * @description Returns the current state of the desktop WebRTC streaming session.
+ */
+ get_v1_desktop_stream_status: {
+ responses: {
+ /** @description Desktop stream status */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopStreamStatusResponse"];
+ };
+ };
+ };
+ };
+ /**
+ * Stop desktop streaming.
+ * @description Disables desktop websocket streaming for the managed desktop.
+ */
+ post_v1_desktop_stream_stop: {
+ responses: {
+ /** @description Desktop streaming stopped */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopStreamStatusResponse"];
+ };
+ };
+ };
+ };
/**
* List visible desktop windows.
* @description Performs a health-gated visible-window enumeration against the managed
@@ -2075,6 +2332,138 @@ export interface operations {
};
};
};
+ /**
+ * Get the currently focused desktop window.
+ * @description Returns information about the window that currently has input focus.
+ */
+ get_v1_desktop_windows_focused: {
+ responses: {
+ /** @description Focused window info */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopWindowInfo"];
+ };
+ };
+ /** @description No window is focused */
+ 404: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ /** @description Desktop runtime is not ready */
+ 409: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ };
+ };
+ /**
+ * Focus a desktop window.
+ * @description Brings the specified window to the foreground and gives it input focus.
+ */
+ post_v1_desktop_window_focus: {
+ parameters: {
+ path: {
+ /** @description X11 window ID */
+ id: string;
+ };
+ };
+ responses: {
+ /** @description Window info after focus */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopWindowInfo"];
+ };
+ };
+ /** @description Window not found */
+ 404: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ /** @description Desktop runtime is not ready */
+ 409: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ };
+ };
+ /**
+ * Move a desktop window.
+ * @description Moves the specified window to the given position.
+ */
+ post_v1_desktop_window_move: {
+ parameters: {
+ path: {
+ /** @description X11 window ID */
+ id: string;
+ };
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["DesktopWindowMoveRequest"];
+ };
+ };
+ responses: {
+ /** @description Window info after move */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopWindowInfo"];
+ };
+ };
+ /** @description Window not found */
+ 404: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ /** @description Desktop runtime is not ready */
+ 409: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ };
+ };
+ /**
+ * Resize a desktop window.
+ * @description Resizes the specified window to the given dimensions.
+ */
+ post_v1_desktop_window_resize: {
+ parameters: {
+ path: {
+ /** @description X11 window ID */
+ id: string;
+ };
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["DesktopWindowResizeRequest"];
+ };
+ };
+ responses: {
+ /** @description Window info after resize */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["DesktopWindowInfo"];
+ };
+ };
+ /** @description Window not found */
+ 404: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ /** @description Desktop runtime is not ready */
+ 409: {
+ content: {
+ "application/json": components["schemas"]["ProblemDetails"];
+ };
+ };
+ };
+ };
get_v1_fs_entries: {
parameters: {
query?: {
diff --git a/sdks/typescript/src/types.ts b/sdks/typescript/src/types.ts
index 080e62c..d56a829 100644
--- a/sdks/typescript/src/types.ts
+++ b/sdks/typescript/src/types.ts
@@ -36,6 +36,16 @@ export type DesktopRecordingStatus = components["schemas"]["DesktopRecordingStat
export type DesktopRecordingInfo = JsonResponse;
export type DesktopRecordingListResponse = JsonResponse;
export type DesktopStreamStatusResponse = JsonResponse;
+export type DesktopClipboardResponse = JsonResponse;
+export type DesktopClipboardQuery =
+ QueryParams extends never ? Record : QueryParams;
+export type DesktopClipboardWriteRequest = JsonRequestBody;
+export type DesktopLaunchRequest = JsonRequestBody;
+export type DesktopLaunchResponse = JsonResponse;
+export type DesktopOpenRequest = JsonRequestBody;
+export type DesktopOpenResponse = JsonResponse;
+export type DesktopWindowMoveRequest = JsonRequestBody;
+export type DesktopWindowResizeRequest = JsonRequestBody;
export type AgentListResponse = JsonResponse;
export type AgentInfo = components["schemas"]["AgentInfo"];
export type AgentQuery = QueryParams;
diff --git a/server/packages/sandbox-agent/src/desktop_errors.rs b/server/packages/sandbox-agent/src/desktop_errors.rs
index 67f99b9..ce57e9e 100644
--- a/server/packages/sandbox-agent/src/desktop_errors.rs
+++ b/server/packages/sandbox-agent/src/desktop_errors.rs
@@ -113,6 +113,40 @@ impl DesktopProblem {
.with_processes(processes)
}
+ pub fn window_not_found(message: impl Into) -> Self {
+ Self::new(404, "Window Not Found", "window_not_found", message)
+ }
+
+ pub fn no_focused_window() -> Self {
+ Self::new(
+ 404,
+ "No Focused Window",
+ "no_focused_window",
+ "No window currently has focus",
+ )
+ }
+
+ pub fn stream_already_active(message: impl Into) -> Self {
+ Self::new(
+ 409,
+ "Stream Already Active",
+ "stream_already_active",
+ message,
+ )
+ }
+
+ pub fn stream_not_active(message: impl Into) -> Self {
+ Self::new(409, "Stream Not Active", "stream_not_active", message)
+ }
+
+ pub fn clipboard_failed(message: impl Into) -> Self {
+ Self::new(500, "Clipboard Failed", "clipboard_failed", message)
+ }
+
+ pub fn app_not_found(message: impl Into) -> Self {
+ Self::new(404, "App Not Found", "app_not_found", message)
+ }
+
pub fn to_problem_details(&self) -> ProblemDetails {
let mut extensions = Map::new();
extensions.insert("code".to_string(), Value::String(self.code.to_string()));
diff --git a/server/packages/sandbox-agent/src/desktop_runtime.rs b/server/packages/sandbox-agent/src/desktop_runtime.rs
index 66da141..29a84dc 100644
--- a/server/packages/sandbox-agent/src/desktop_runtime.rs
+++ b/server/packages/sandbox-agent/src/desktop_runtime.rs
@@ -74,6 +74,8 @@ struct DesktopRuntimeStateData {
xvfb: Option,
openbox: Option,
dbus_pid: Option,
+ streaming_config: Option,
+ recording_fps: Option,
}
#[derive(Debug)]
@@ -138,26 +140,10 @@ impl DesktopScreenshotOptions {
impl Default for DesktopRuntimeConfig {
fn default() -> Self {
- let display_num = std::env::var("SANDBOX_AGENT_DESKTOP_DISPLAY_NUM")
- .ok()
- .and_then(|value| value.parse::().ok())
- .filter(|value| *value > 0)
- .unwrap_or(DEFAULT_DISPLAY_NUM);
-
- let state_dir = std::env::var("SANDBOX_AGENT_DESKTOP_STATE_DIR")
- .ok()
- .map(PathBuf::from)
- .unwrap_or_else(default_state_dir);
-
- let assume_linux_for_tests = std::env::var("SANDBOX_AGENT_DESKTOP_TEST_ASSUME_LINUX")
- .ok()
- .map(|value| value == "1" || value.eq_ignore_ascii_case("true"))
- .unwrap_or(false);
-
Self {
- state_dir,
- display_num,
- assume_linux_for_tests,
+ state_dir: default_state_dir(),
+ display_num: DEFAULT_DISPLAY_NUM,
+ assume_linux_for_tests: false,
}
}
}
@@ -189,6 +175,8 @@ impl DesktopRuntime {
xvfb: None,
openbox: None,
dbus_pid: None,
+ streaming_config: None,
+ recording_fps: None,
})),
config,
}
@@ -200,6 +188,15 @@ impl DesktopRuntime {
let mut response = self.snapshot_locked(&state);
drop(state);
self.append_neko_process(&mut response).await;
+
+ // Include the current window list when the desktop is active so callers
+ // get windows for free when polling status (avoids a separate request).
+ if response.state == DesktopState::Active {
+ if let Ok(window_list) = self.list_windows().await {
+ response.windows = window_list.windows;
+ }
+ }
+
response
}
@@ -248,7 +245,9 @@ impl DesktopRuntime {
let dpi = request.dpi.unwrap_or(DEFAULT_DPI);
validate_start_request(width, height, dpi)?;
- let display_num = self.choose_display_num()?;
+ // Override display_num if provided in request
+ let display_num =
+ self.choose_display_num_from(request.display_num.unwrap_or(self.config.display_num))?;
let display = format!(":{display_num}");
let resolution = DesktopResolution {
width,
@@ -257,6 +256,29 @@ impl DesktopRuntime {
};
let environment = self.base_environment(&display)?;
+ // Store streaming and recording config for later use
+ state.streaming_config = if request.stream_video_codec.is_some()
+ || request.stream_audio_codec.is_some()
+ || request.stream_frame_rate.is_some()
+ || request.webrtc_port_range.is_some()
+ {
+ Some(crate::desktop_streaming::StreamingConfig {
+ video_codec: request
+ .stream_video_codec
+ .unwrap_or_else(|| "vp8".to_string()),
+ audio_codec: request
+ .stream_audio_codec
+ .unwrap_or_else(|| "opus".to_string()),
+ frame_rate: request.stream_frame_rate.unwrap_or(30).clamp(1, 60),
+ webrtc_port_range: request
+ .webrtc_port_range
+ .unwrap_or_else(|| "59050-59070".to_string()),
+ })
+ } else {
+ None
+ };
+ state.recording_fps = request.recording_fps.map(|fps| fps.clamp(1, 60));
+
state.state = DesktopState::Starting;
state.display_num = display_num;
state.display = Some(display.clone());
@@ -344,6 +366,8 @@ impl DesktopRuntime {
state.missing_dependencies = self.detect_missing_dependencies();
state.install_command = self.install_command_for(&state.missing_dependencies);
state.environment.clear();
+ state.streaming_config = None;
+ state.recording_fps = None;
let mut response = self.snapshot_locked(&state);
drop(state);
@@ -360,11 +384,17 @@ impl DesktopRuntime {
query: DesktopScreenshotQuery,
) -> Result {
let options = screenshot_options(query.format, query.quality, query.scale)?;
+ let show_cursor = query.show_cursor.unwrap_or(false);
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
- let bytes = self
+ let mut bytes = self
.capture_screenshot_locked(&state, Some(&ready), &options)
.await?;
+ if show_cursor {
+ bytes = self
+ .composite_cursor(&state, &ready, bytes, &options)
+ .await?;
+ }
Ok(DesktopScreenshotData {
bytes,
content_type: options.content_type(),
@@ -377,12 +407,27 @@ impl DesktopRuntime {
) -> Result {
validate_region(&query)?;
let options = screenshot_options(query.format, query.quality, query.scale)?;
+ let show_cursor = query.show_cursor.unwrap_or(false);
let mut state = self.inner.lock().await;
let ready = self.ensure_ready_locked(&mut state).await?;
let crop = format!("{}x{}+{}+{}", query.width, query.height, query.x, query.y);
- let bytes = self
+ let mut bytes = self
.capture_screenshot_with_crop_locked(&state, &ready, &crop, &options)
.await?;
+ if show_cursor {
+ bytes = self
+ .composite_cursor_region(
+ &state,
+ &ready,
+ bytes,
+ &options,
+ query.x,
+ query.y,
+ query.width,
+ query.height,
+ )
+ .await?;
+ }
Ok(DesktopScreenshotData {
bytes,
content_type: options.content_type(),
@@ -598,6 +643,21 @@ impl DesktopRuntime {
let (x, y, width, height) = self
.window_geometry_locked(&state, &ready, &window_id)
.await?;
+ let is_active = active_window_id
+ .as_deref()
+ .map(|active| active == window_id)
+ .unwrap_or(false);
+
+ // Filter out noise: window-manager chrome, toolkit internals, and
+ // invisible helper windows. Always keep the active window so the
+ // caller can track focus even when the WM itself is focused.
+ if !is_active {
+ let trimmed = title.trim();
+ if trimmed.is_empty() || trimmed == "Openbox" || (width < 120 && height < 80) {
+ continue;
+ }
+ }
+
windows.push(DesktopWindowInfo {
id: window_id.clone(),
title,
@@ -605,10 +665,7 @@ impl DesktopRuntime {
y,
width,
height,
- is_active: active_window_id
- .as_deref()
- .map(|active| active == window_id)
- .unwrap_or(false),
+ is_active,
});
}
Ok(DesktopWindowListResponse { windows })
@@ -658,9 +715,10 @@ impl DesktopRuntime {
})?;
let environment = state.environment.clone();
let display = display.to_string();
+ let streaming_config = state.streaming_config.clone();
drop(state);
self.streaming_manager
- .start(&display, resolution, &environment)
+ .start(&display, resolution, &environment, streaming_config, None)
.await
}
@@ -1503,14 +1561,21 @@ impl DesktopRuntime {
}
fn choose_display_num(&self) -> Result {
+ self.choose_display_num_from(self.config.display_num)
+ }
+
+ fn choose_display_num_from(&self, start: i32) -> Result {
+ if start <= 0 {
+ return Err(DesktopProblem::invalid_action("displayNum must be > 0"));
+ }
for offset in 0..MAX_DISPLAY_PROBE {
- let candidate = self.config.display_num + offset;
+ let candidate = start + offset;
if !socket_path(candidate).exists() {
return Ok(candidate);
}
}
Err(DesktopProblem::runtime_failed(
- "unable to find an available X display starting at :99",
+ format!("unable to find an available X display starting at :{start}"),
None,
Vec::new(),
))
@@ -1579,6 +1644,7 @@ impl DesktopRuntime {
install_command: state.install_command.clone(),
processes: self.processes_locked(state),
runtime_log_path: Some(state.runtime_log_path.to_string_lossy().to_string()),
+ windows: Vec::new(),
}
}
@@ -1656,6 +1722,391 @@ impl DesktopRuntime {
.open(&state.runtime_log_path)
.and_then(|mut file| std::io::Write::write_all(&mut file, line.as_bytes()));
}
+
+ pub async fn get_clipboard(
+ &self,
+ selection: Option,
+ ) -> Result {
+ let mut state = self.inner.lock().await;
+ let ready = self.ensure_ready_locked(&mut state).await?;
+ let sel = selection.unwrap_or_else(|| "clipboard".to_string());
+ let args = vec!["-selection".to_string(), sel.clone(), "-o".to_string()];
+ let output = run_command_output("xclip", &args, &ready.environment, INPUT_TIMEOUT)
+ .await
+ .map_err(|err| {
+ DesktopProblem::clipboard_failed(format!("failed to read clipboard: {err}"))
+ })?;
+ if !output.status.success() {
+ // Empty clipboard is not an error
+ return Ok(crate::desktop_types::DesktopClipboardResponse {
+ text: String::new(),
+ selection: sel,
+ });
+ }
+ Ok(crate::desktop_types::DesktopClipboardResponse {
+ text: String::from_utf8_lossy(&output.stdout).to_string(),
+ selection: sel,
+ })
+ }
+
+ pub async fn set_clipboard(
+ &self,
+ request: crate::desktop_types::DesktopClipboardWriteRequest,
+ ) -> Result {
+ let mut state = self.inner.lock().await;
+ let ready = self.ensure_ready_locked(&mut state).await?;
+ let sel = request.selection.unwrap_or_else(|| "clipboard".to_string());
+ let selections: Vec = if sel == "both" {
+ vec!["clipboard".to_string(), "primary".to_string()]
+ } else {
+ vec![sel]
+ };
+ for selection in &selections {
+ let args = vec![
+ "-selection".to_string(),
+ selection.clone(),
+ "-i".to_string(),
+ ];
+ let output = run_command_output_with_stdin(
+ "xclip",
+ &args,
+ &ready.environment,
+ INPUT_TIMEOUT,
+ request.text.as_bytes().to_vec(),
+ )
+ .await
+ .map_err(|err| {
+ DesktopProblem::clipboard_failed(format!("failed to write clipboard: {err}"))
+ })?;
+ if !output.status.success() {
+ return Err(DesktopProblem::clipboard_failed(format!(
+ "clipboard write failed: {}",
+ String::from_utf8_lossy(&output.stderr).trim()
+ )));
+ }
+ }
+ Ok(DesktopActionResponse { ok: true })
+ }
+
+ pub async fn focused_window(&self) -> Result {
+ let mut state = self.inner.lock().await;
+ let ready = self.ensure_ready_locked(&mut state).await?;
+ let window_id = self
+ .active_window_id_locked(&state, &ready)
+ .await?
+ .ok_or_else(DesktopProblem::no_focused_window)?;
+ let title = self.window_title_locked(&state, &ready, &window_id).await?;
+ let (x, y, width, height) = self
+ .window_geometry_locked(&state, &ready, &window_id)
+ .await?;
+ Ok(DesktopWindowInfo {
+ id: window_id,
+ title,
+ x,
+ y,
+ width,
+ height,
+ is_active: true,
+ })
+ }
+
+ pub async fn focus_window(&self, window_id: &str) -> Result {
+ let mut state = self.inner.lock().await;
+ let ready = self.ensure_ready_locked(&mut state).await?;
+ let args = vec![
+ "windowactivate".to_string(),
+ "--sync".to_string(),
+ window_id.to_string(),
+ "windowfocus".to_string(),
+ "--sync".to_string(),
+ window_id.to_string(),
+ ];
+ self.run_input_command_locked(&state, &ready, args)
+ .await
+ .map_err(|_| {
+ DesktopProblem::window_not_found(format!("Window {window_id} not found"))
+ })?;
+ self.window_info_locked(&state, &ready, window_id).await
+ }
+
+ pub async fn move_window(
+ &self,
+ window_id: &str,
+ request: crate::desktop_types::DesktopWindowMoveRequest,
+ ) -> Result {
+ let mut state = self.inner.lock().await;
+ let ready = self.ensure_ready_locked(&mut state).await?;
+ let args = vec![
+ "windowmove".to_string(),
+ window_id.to_string(),
+ request.x.to_string(),
+ request.y.to_string(),
+ ];
+ self.run_input_command_locked(&state, &ready, args)
+ .await
+ .map_err(|_| {
+ DesktopProblem::window_not_found(format!("Window {window_id} not found"))
+ })?;
+ self.window_info_locked(&state, &ready, window_id).await
+ }
+
+ pub async fn resize_window(
+ &self,
+ window_id: &str,
+ request: crate::desktop_types::DesktopWindowResizeRequest,
+ ) -> Result {
+ let mut state = self.inner.lock().await;
+ let ready = self.ensure_ready_locked(&mut state).await?;
+ let args = vec![
+ "windowsize".to_string(),
+ window_id.to_string(),
+ request.width.to_string(),
+ request.height.to_string(),
+ ];
+ self.run_input_command_locked(&state, &ready, args)
+ .await
+ .map_err(|_| {
+ DesktopProblem::window_not_found(format!("Window {window_id} not found"))
+ })?;
+ self.window_info_locked(&state, &ready, window_id).await
+ }
+
+ async fn window_info_locked(
+ &self,
+ state: &DesktopRuntimeStateData,
+ ready: &DesktopReadyContext,
+ window_id: &str,
+ ) -> Result {
+ let active_id = self.active_window_id_locked(state, ready).await?;
+ let title = self.window_title_locked(state, ready, window_id).await?;
+ let (x, y, width, height) = self.window_geometry_locked(state, ready, window_id).await?;
+ Ok(DesktopWindowInfo {
+ id: window_id.to_string(),
+ title,
+ x,
+ y,
+ width,
+ height,
+ is_active: active_id
+ .as_deref()
+ .map(|a| a == window_id)
+ .unwrap_or(false),
+ })
+ }
+
+ pub async fn launch_app(
+ &self,
+ request: crate::desktop_types::DesktopLaunchRequest,
+ ) -> Result {
+ let mut state = self.inner.lock().await;
+ let ready = self.ensure_ready_locked(&mut state).await?;
+
+ // Verify the app exists
+ if find_binary(&request.app).is_none() {
+ // Also try which via the desktop environment
+ let check = run_command_output(
+ "which",
+ &[request.app.clone()],
+ &ready.environment,
+ INPUT_TIMEOUT,
+ )
+ .await;
+ if check.is_err() || !check.as_ref().unwrap().status.success() {
+ return Err(DesktopProblem::app_not_found(format!(
+ "Application '{}' not found in PATH",
+ request.app
+ )));
+ }
+ }
+
+ let args = request.args.unwrap_or_default();
+ let snapshot = self
+ .process_runtime
+ .start_process(ProcessStartSpec {
+ command: request.app.clone(),
+ args,
+ cwd: None,
+ env: ready.environment.clone(),
+ tty: false,
+ interactive: false,
+ owner: ProcessOwner::Desktop,
+ restart_policy: None,
+ })
+ .await
+ .map_err(|err| {
+ DesktopProblem::runtime_failed(
+ format!("failed to launch {}: {err}", request.app),
+ None,
+ self.processes_locked(&state),
+ )
+ })?;
+
+ let mut window_id = None;
+ if request.wait.unwrap_or(false) {
+ if let Some(pid) = snapshot.pid {
+ // Poll for window to appear
+ let deadline = tokio::time::Instant::now() + Duration::from_secs(5);
+ let search_args = vec!["search".to_string(), "--pid".to_string(), pid.to_string()];
+ loop {
+ let output = run_command_output(
+ "xdotool",
+ &search_args,
+ &ready.environment,
+ INPUT_TIMEOUT,
+ )
+ .await;
+ if let Ok(ref out) = output {
+ if out.status.success() {
+ let id = String::from_utf8_lossy(&out.stdout)
+ .lines()
+ .next()
+ .map(|s| s.trim().to_string());
+ if id.as_ref().is_some_and(|s| !s.is_empty()) {
+ window_id = id;
+ break;
+ }
+ }
+ }
+ if tokio::time::Instant::now() >= deadline {
+ break;
+ }
+ tokio::time::sleep(Duration::from_millis(200)).await;
+ }
+ }
+ }
+
+ Ok(crate::desktop_types::DesktopLaunchResponse {
+ process_id: snapshot.id,
+ pid: snapshot.pid,
+ window_id,
+ })
+ }
+
+ pub async fn open_target(
+ &self,
+ request: crate::desktop_types::DesktopOpenRequest,
+ ) -> Result {
+ let mut state = self.inner.lock().await;
+ let ready = self.ensure_ready_locked(&mut state).await?;
+
+ let snapshot = self
+ .process_runtime
+ .start_process(ProcessStartSpec {
+ command: "xdg-open".to_string(),
+ args: vec![request.target],
+ cwd: None,
+ env: ready.environment.clone(),
+ tty: false,
+ interactive: false,
+ owner: ProcessOwner::Desktop,
+ restart_policy: None,
+ })
+ .await
+ .map_err(|err| {
+ DesktopProblem::runtime_failed(
+ format!("failed to open target: {err}"),
+ None,
+ self.processes_locked(&state),
+ )
+ })?;
+
+ Ok(crate::desktop_types::DesktopOpenResponse {
+ process_id: snapshot.id,
+ pid: snapshot.pid,
+ })
+ }
+
+ async fn composite_cursor(
+ &self,
+ state: &DesktopRuntimeStateData,
+ ready: &DesktopReadyContext,
+ screenshot_bytes: Vec,
+ options: &DesktopScreenshotOptions,
+ ) -> Result, DesktopProblem> {
+ let pos = self.mouse_position_locked(state, ready).await?;
+ self.draw_cursor_on_image(screenshot_bytes, pos.x, pos.y, options, &ready.environment)
+ .await
+ }
+
+ async fn composite_cursor_region(
+ &self,
+ state: &DesktopRuntimeStateData,
+ ready: &DesktopReadyContext,
+ screenshot_bytes: Vec,
+ options: &DesktopScreenshotOptions,
+ region_x: i32,
+ region_y: i32,
+ _region_width: u32,
+ _region_height: u32,
+ ) -> Result, DesktopProblem> {
+ let pos = self.mouse_position_locked(state, ready).await?;
+ // Adjust cursor position relative to the region
+ let cursor_x = pos.x - region_x;
+ let cursor_y = pos.y - region_y;
+ if cursor_x < 0 || cursor_y < 0 {
+ // Cursor is outside the region, return screenshot as-is
+ return Ok(screenshot_bytes);
+ }
+ self.draw_cursor_on_image(
+ screenshot_bytes,
+ cursor_x,
+ cursor_y,
+ options,
+ &ready.environment,
+ )
+ .await
+ }
+
+ async fn draw_cursor_on_image(
+ &self,
+ image_bytes: Vec,
+ x: i32,
+ y: i32,
+ options: &DesktopScreenshotOptions,
+ environment: &HashMap,
+ ) -> Result, DesktopProblem> {
+ // Draw a crosshair cursor using ImageMagick convert
+ let draw_cmd = format!(
+ "stroke red stroke-width 2 line {},{},{},{} line {},{},{},{}",
+ x - 10,
+ y,
+ x + 10,
+ y,
+ x,
+ y - 10,
+ x,
+ y + 10
+ );
+ let args = vec![
+ "-".to_string(), // read from stdin
+ "-draw".to_string(),
+ draw_cmd,
+ options.output_arg().to_string(),
+ ];
+ let output = run_command_output_with_stdin(
+ "convert",
+ &args,
+ environment,
+ SCREENSHOT_TIMEOUT,
+ image_bytes.clone(),
+ )
+ .await
+ .map_err(|err| {
+ DesktopProblem::screenshot_failed(
+ format!("failed to composite cursor: {err}"),
+ Vec::new(),
+ )
+ })?;
+ if !output.status.success() {
+ // Fall back to returning the original screenshot without cursor
+ return Ok(image_bytes);
+ }
+ Ok(output.stdout)
+ }
+
+ pub async fn stream_status(&self) -> DesktopStreamStatusResponse {
+ self.streaming_manager.status().await
+ }
}
fn desktop_problem_to_sandbox_error(problem: DesktopProblem) -> SandboxError {
diff --git a/server/packages/sandbox-agent/src/desktop_streaming.rs b/server/packages/sandbox-agent/src/desktop_streaming.rs
index f2e7b15..8629132 100644
--- a/server/packages/sandbox-agent/src/desktop_streaming.rs
+++ b/server/packages/sandbox-agent/src/desktop_streaming.rs
@@ -21,13 +21,32 @@ const NEKO_READY_TIMEOUT: Duration = Duration::from_secs(15);
/// How long between readiness polls.
const NEKO_READY_POLL: Duration = Duration::from_millis(300);
+#[derive(Debug, Clone)]
+pub struct StreamingConfig {
+ pub video_codec: String,
+ pub audio_codec: String,
+ pub frame_rate: u32,
+ pub webrtc_port_range: String,
+}
+
+impl Default for StreamingConfig {
+ fn default() -> Self {
+ Self {
+ video_codec: "vp8".to_string(),
+ audio_codec: "opus".to_string(),
+ frame_rate: 30,
+ webrtc_port_range: NEKO_EPR.to_string(),
+ }
+ }
+}
+
#[derive(Debug, Clone)]
pub struct DesktopStreamingManager {
inner: Arc>,
process_runtime: Arc,
}
-#[derive(Debug, Default)]
+#[derive(Debug)]
struct DesktopStreamingState {
active: bool,
process_id: Option,
@@ -37,6 +56,23 @@ struct DesktopStreamingState {
neko_session_cookie: Option,
display: Option,
resolution: Option,
+ streaming_config: StreamingConfig,
+ window_id: Option,
+}
+
+impl Default for DesktopStreamingState {
+ fn default() -> Self {
+ Self {
+ active: false,
+ process_id: None,
+ neko_base_url: None,
+ neko_session_cookie: None,
+ display: None,
+ resolution: None,
+ streaming_config: StreamingConfig::default(),
+ window_id: None,
+ }
+ }
}
impl DesktopStreamingManager {
@@ -53,11 +89,18 @@ impl DesktopStreamingManager {
display: &str,
resolution: DesktopResolution,
environment: &HashMap,
+ config: Option,
+ window_id: Option,
) -> Result {
+ let config = config.unwrap_or_default();
let mut state = self.inner.lock().await;
if state.active {
- return Ok(DesktopStreamStatusResponse { active: true });
+ return Ok(DesktopStreamStatusResponse {
+ active: true,
+ window_id: state.window_id.clone(),
+ process_id: state.process_id.clone(),
+ });
}
// Stop any stale process.
@@ -72,7 +115,10 @@ impl DesktopStreamingManager {
env.insert("DISPLAY".to_string(), display.to_string());
let bind_addr = format!("0.0.0.0:{}", NEKO_INTERNAL_PORT);
- let screen = format!("{}x{}@30", resolution.width, resolution.height);
+ let screen = format!(
+ "{}x{}@{}",
+ resolution.width, resolution.height, config.frame_rate
+ );
let snapshot = self
.process_runtime
@@ -89,11 +135,11 @@ impl DesktopStreamingManager {
"--capture.video.display".to_string(),
display.to_string(),
"--capture.video.codec".to_string(),
- "vp8".to_string(),
+ config.video_codec.clone(),
"--capture.audio.codec".to_string(),
- "opus".to_string(),
+ config.audio_codec.clone(),
"--webrtc.epr".to_string(),
- NEKO_EPR.to_string(),
+ config.webrtc_port_range.clone(),
"--webrtc.icelite".to_string(),
"--webrtc.nat1to1".to_string(),
"127.0.0.1".to_string(),
@@ -117,10 +163,13 @@ impl DesktopStreamingManager {
})?;
let neko_base = format!("http://127.0.0.1:{}", NEKO_INTERNAL_PORT);
+ let process_id_clone = snapshot.id.clone();
state.process_id = Some(snapshot.id.clone());
state.neko_base_url = Some(neko_base.clone());
state.display = Some(display.to_string());
state.resolution = Some(resolution);
+ state.streaming_config = config;
+ state.window_id = window_id;
state.active = true;
// Drop the lock before waiting for readiness.
@@ -183,7 +232,15 @@ impl DesktopStreamingManager {
state.neko_session_cookie = Some(cookie.clone());
}
- Ok(DesktopStreamStatusResponse { active: true })
+ let state = self.inner.lock().await;
+ let state_window_id = state.window_id.clone();
+ drop(state);
+
+ Ok(DesktopStreamStatusResponse {
+ active: true,
+ window_id: state_window_id,
+ process_id: Some(process_id_clone),
+ })
}
/// Stop streaming and tear down neko subprocess.
@@ -200,7 +257,21 @@ impl DesktopStreamingManager {
state.neko_session_cookie = None;
state.display = None;
state.resolution = None;
- DesktopStreamStatusResponse { active: false }
+ state.window_id = None;
+ DesktopStreamStatusResponse {
+ active: false,
+ window_id: None,
+ process_id: None,
+ }
+ }
+
+ pub async fn status(&self) -> DesktopStreamStatusResponse {
+ let state = self.inner.lock().await;
+ DesktopStreamStatusResponse {
+ active: state.active,
+ window_id: state.window_id.clone(),
+ process_id: state.process_id.clone(),
+ }
}
pub async fn ensure_active(&self) -> Result<(), SandboxError> {
diff --git a/server/packages/sandbox-agent/src/desktop_types.rs b/server/packages/sandbox-agent/src/desktop_types.rs
index 7f813da..912e62f 100644
--- a/server/packages/sandbox-agent/src/desktop_types.rs
+++ b/server/packages/sandbox-agent/src/desktop_types.rs
@@ -60,6 +60,9 @@ pub struct DesktopStatusResponse {
pub processes: Vec,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub runtime_log_path: Option,
+ /// Current visible windows (included when the desktop is active).
+ #[serde(default, skip_serializing_if = "Vec::is_empty")]
+ pub windows: Vec,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)]
@@ -71,6 +74,20 @@ pub struct DesktopStartRequest {
pub height: Option,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub dpi: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub display_num: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub state_dir: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub stream_video_codec: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub stream_audio_codec: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub stream_frame_rate: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub webrtc_port_range: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub recording_fps: Option,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)]
@@ -82,6 +99,8 @@ pub struct DesktopScreenshotQuery {
pub quality: Option,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scale: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub show_cursor: Option,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
@@ -105,6 +124,8 @@ pub struct DesktopRegionScreenshotQuery {
pub quality: Option,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scale: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub show_cursor: Option,
}
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, PartialEq, Eq)]
@@ -299,4 +320,78 @@ pub struct DesktopRecordingListResponse {
#[serde(rename_all = "camelCase")]
pub struct DesktopStreamStatusResponse {
pub active: bool,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub window_id: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub process_id: Option,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DesktopClipboardResponse {
+ pub text: String,
+ pub selection: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema, IntoParams, Default)]
+#[serde(rename_all = "camelCase")]
+pub struct DesktopClipboardQuery {
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub selection: Option,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DesktopClipboardWriteRequest {
+ pub text: String,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub selection: Option,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DesktopLaunchRequest {
+ pub app: String,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub args: Option>,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub wait: Option,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DesktopLaunchResponse {
+ pub process_id: String,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub pid: Option,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub window_id: Option,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DesktopOpenRequest {
+ pub target: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DesktopOpenResponse {
+ pub process_id: String,
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub pid: Option,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DesktopWindowMoveRequest {
+ pub x: i32,
+ pub y: i32,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, ToSchema)]
+#[serde(rename_all = "camelCase")]
+pub struct DesktopWindowResizeRequest {
+ pub width: u32,
+ pub height: u32,
}
diff --git a/server/packages/sandbox-agent/src/router.rs b/server/packages/sandbox-agent/src/router.rs
index 35f7212..195a5cd 100644
--- a/server/packages/sandbox-agent/src/router.rs
+++ b/server/packages/sandbox-agent/src/router.rs
@@ -216,6 +216,28 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc)
.route("/desktop/keyboard/up", post(post_v1_desktop_keyboard_up))
.route("/desktop/display/info", get(get_v1_desktop_display_info))
.route("/desktop/windows", get(get_v1_desktop_windows))
+ .route(
+ "/desktop/windows/focused",
+ get(get_v1_desktop_windows_focused),
+ )
+ .route(
+ "/desktop/windows/:id/focus",
+ post(post_v1_desktop_window_focus),
+ )
+ .route(
+ "/desktop/windows/:id/move",
+ post(post_v1_desktop_window_move),
+ )
+ .route(
+ "/desktop/windows/:id/resize",
+ post(post_v1_desktop_window_resize),
+ )
+ .route(
+ "/desktop/clipboard",
+ get(get_v1_desktop_clipboard).post(post_v1_desktop_clipboard),
+ )
+ .route("/desktop/launch", post(post_v1_desktop_launch))
+ .route("/desktop/open", post(post_v1_desktop_open))
.route(
"/desktop/recording/start",
post(post_v1_desktop_recording_start),
@@ -235,6 +257,7 @@ pub fn build_router_with_state(shared: Arc) -> (Router, Arc)
)
.route("/desktop/stream/start", post(post_v1_desktop_stream_start))
.route("/desktop/stream/stop", post(post_v1_desktop_stream_stop))
+ .route("/desktop/stream/status", get(get_v1_desktop_stream_status))
.route("/desktop/stream/signaling", get(get_v1_desktop_stream_ws))
.route("/agents", get(get_v1_agents))
.route("/agents/:agent", get(get_v1_agent))
@@ -405,6 +428,15 @@ pub async fn shutdown_servers(state: &Arc) {
post_v1_desktop_keyboard_up,
get_v1_desktop_display_info,
get_v1_desktop_windows,
+ get_v1_desktop_windows_focused,
+ post_v1_desktop_window_focus,
+ post_v1_desktop_window_move,
+ post_v1_desktop_window_resize,
+ get_v1_desktop_clipboard,
+ post_v1_desktop_clipboard,
+ post_v1_desktop_launch,
+ post_v1_desktop_open,
+ get_v1_desktop_stream_status,
post_v1_desktop_recording_start,
post_v1_desktop_recording_stop,
get_v1_desktop_recordings,
@@ -483,6 +515,15 @@ pub async fn shutdown_servers(state: &Arc) {
DesktopRecordingInfo,
DesktopRecordingListResponse,
DesktopStreamStatusResponse,
+ DesktopClipboardResponse,
+ DesktopClipboardQuery,
+ DesktopClipboardWriteRequest,
+ DesktopLaunchRequest,
+ DesktopLaunchResponse,
+ DesktopOpenRequest,
+ DesktopOpenResponse,
+ DesktopWindowMoveRequest,
+ DesktopWindowResizeRequest,
ServerStatus,
ServerStatusInfo,
AgentCapabilities,
@@ -1029,6 +1070,193 @@ async fn get_v1_desktop_windows(
Ok(Json(windows))
}
+/// Get the currently focused desktop window.
+///
+/// Returns information about the window that currently has input focus.
+#[utoipa::path(
+ get,
+ path = "/v1/desktop/windows/focused",
+ tag = "v1",
+ responses(
+ (status = 200, description = "Focused window info", body = DesktopWindowInfo),
+ (status = 404, description = "No window is focused", body = ProblemDetails),
+ (status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
+ )
+)]
+async fn get_v1_desktop_windows_focused(
+ State(state): State>,
+) -> Result, ApiError> {
+ let window = state.desktop_runtime().focused_window().await?;
+ Ok(Json(window))
+}
+
+/// Focus a desktop window.
+///
+/// Brings the specified window to the foreground and gives it input focus.
+#[utoipa::path(
+ post,
+ path = "/v1/desktop/windows/{id}/focus",
+ tag = "v1",
+ params(
+ ("id" = String, Path, description = "X11 window ID")
+ ),
+ responses(
+ (status = 200, description = "Window info after focus", body = DesktopWindowInfo),
+ (status = 404, description = "Window not found", body = ProblemDetails),
+ (status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
+ )
+)]
+async fn post_v1_desktop_window_focus(
+ State(state): State>,
+ Path(id): Path,
+) -> Result, ApiError> {
+ let window = state.desktop_runtime().focus_window(&id).await?;
+ Ok(Json(window))
+}
+
+/// Move a desktop window.
+///
+/// Moves the specified window to the given position.
+#[utoipa::path(
+ post,
+ path = "/v1/desktop/windows/{id}/move",
+ tag = "v1",
+ params(
+ ("id" = String, Path, description = "X11 window ID")
+ ),
+ request_body = DesktopWindowMoveRequest,
+ responses(
+ (status = 200, description = "Window info after move", body = DesktopWindowInfo),
+ (status = 404, description = "Window not found", body = ProblemDetails),
+ (status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
+ )
+)]
+async fn post_v1_desktop_window_move(
+ State(state): State>,
+ Path(id): Path,
+ Json(body): Json,
+) -> Result, ApiError> {
+ let window = state.desktop_runtime().move_window(&id, body).await?;
+ Ok(Json(window))
+}
+
+/// Resize a desktop window.
+///
+/// Resizes the specified window to the given dimensions.
+#[utoipa::path(
+ post,
+ path = "/v1/desktop/windows/{id}/resize",
+ tag = "v1",
+ params(
+ ("id" = String, Path, description = "X11 window ID")
+ ),
+ request_body = DesktopWindowResizeRequest,
+ responses(
+ (status = 200, description = "Window info after resize", body = DesktopWindowInfo),
+ (status = 404, description = "Window not found", body = ProblemDetails),
+ (status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
+ )
+)]
+async fn post_v1_desktop_window_resize(
+ State(state): State>,
+ Path(id): Path,
+ Json(body): Json,
+) -> Result, ApiError> {
+ let window = state.desktop_runtime().resize_window(&id, body).await?;
+ Ok(Json(window))
+}
+
+/// Read the desktop clipboard.
+///
+/// Returns the current text content of the X11 clipboard.
+#[utoipa::path(
+ get,
+ path = "/v1/desktop/clipboard",
+ tag = "v1",
+ params(DesktopClipboardQuery),
+ responses(
+ (status = 200, description = "Clipboard contents", body = DesktopClipboardResponse),
+ (status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
+ (status = 500, description = "Clipboard read failed", body = ProblemDetails)
+ )
+)]
+async fn get_v1_desktop_clipboard(
+ State(state): State>,
+ Query(query): Query,
+) -> Result, ApiError> {
+ let clipboard = state
+ .desktop_runtime()
+ .get_clipboard(query.selection)
+ .await?;
+ Ok(Json(clipboard))
+}
+
+/// Write to the desktop clipboard.
+///
+/// Sets the text content of the X11 clipboard.
+#[utoipa::path(
+ post,
+ path = "/v1/desktop/clipboard",
+ tag = "v1",
+ request_body = DesktopClipboardWriteRequest,
+ responses(
+ (status = 200, description = "Clipboard updated", body = DesktopActionResponse),
+ (status = 409, description = "Desktop runtime is not ready", body = ProblemDetails),
+ (status = 500, description = "Clipboard write failed", body = ProblemDetails)
+ )
+)]
+async fn post_v1_desktop_clipboard(
+ State(state): State>,
+ Json(body): Json,
+) -> Result, ApiError> {
+ let result = state.desktop_runtime().set_clipboard(body).await?;
+ Ok(Json(result))
+}
+
+/// Launch a desktop application.
+///
+/// Launches an application by name on the managed desktop, optionally waiting
+/// for its window to appear.
+#[utoipa::path(
+ post,
+ path = "/v1/desktop/launch",
+ tag = "v1",
+ request_body = DesktopLaunchRequest,
+ responses(
+ (status = 200, description = "Application launched", body = DesktopLaunchResponse),
+ (status = 404, description = "Application not found", body = ProblemDetails),
+ (status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
+ )
+)]
+async fn post_v1_desktop_launch(
+ State(state): State>,
+ Json(body): Json,
+) -> Result, ApiError> {
+ let result = state.desktop_runtime().launch_app(body).await?;
+ Ok(Json(result))
+}
+
+/// Open a file or URL with the default handler.
+///
+/// Opens a file path or URL using xdg-open on the managed desktop.
+#[utoipa::path(
+ post,
+ path = "/v1/desktop/open",
+ tag = "v1",
+ request_body = DesktopOpenRequest,
+ responses(
+ (status = 200, description = "Target opened", body = DesktopOpenResponse),
+ (status = 409, description = "Desktop runtime is not ready", body = ProblemDetails)
+ )
+)]
+async fn post_v1_desktop_open(
+ State(state): State>,
+ Json(body): Json,
+) -> Result, ApiError> {
+ let result = state.desktop_runtime().open_target(body).await?;
+ Ok(Json(result))
+}
+
/// Start desktop recording.
///
/// Starts an ffmpeg x11grab recording against the managed desktop and returns
@@ -1201,6 +1429,23 @@ async fn post_v1_desktop_stream_stop(
Ok(Json(state.desktop_runtime().stop_streaming().await))
}
+/// Get desktop stream status.
+///
+/// Returns the current state of the desktop WebRTC streaming session.
+#[utoipa::path(
+ get,
+ path = "/v1/desktop/stream/status",
+ tag = "v1",
+ responses(
+ (status = 200, description = "Desktop stream status", body = DesktopStreamStatusResponse)
+ )
+)]
+async fn get_v1_desktop_stream_status(
+ State(state): State>,
+) -> Result, ApiError> {
+ Ok(Json(state.desktop_runtime().stream_status().await))
+}
+
/// Open a desktop WebRTC signaling session.
///
/// Upgrades the connection to a WebSocket used for WebRTC signaling between
diff --git a/server/packages/sandbox-agent/tests/common_software.rs b/server/packages/sandbox-agent/tests/common_software.rs
new file mode 100644
index 0000000..c40eefc
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/common_software.rs
@@ -0,0 +1,497 @@
+/// Integration tests that verify all software documented in docs/common-software.mdx
+/// is installed and working inside the sandbox.
+///
+/// These tests use `docker/test-common-software/Dockerfile` which extends the base
+/// test-agent image with all documented software pre-installed.
+///
+/// KEEP IN SYNC with docs/common-software.mdx and docker/test-common-software/Dockerfile.
+///
+/// Run with:
+/// cargo test -p sandbox-agent --test common_software
+use reqwest::header::HeaderMap;
+use reqwest::{Method, StatusCode};
+use serde_json::{json, Value};
+use serial_test::serial;
+
+#[path = "support/docker_common_software.rs"]
+mod docker_support;
+use docker_support::TestApp;
+
+async fn send_request(
+ app: &docker_support::DockerApp,
+ method: Method,
+ uri: &str,
+ body: Option,
+) -> (StatusCode, HeaderMap, Vec) {
+ let client = reqwest::Client::new();
+ let mut builder = client.request(method, app.http_url(uri));
+
+ let response = if let Some(body) = body {
+ builder = builder.header("content-type", "application/json");
+ builder
+ .body(body.to_string())
+ .send()
+ .await
+ .expect("request")
+ } else {
+ builder.send().await.expect("request")
+ };
+ let status = response.status();
+ let headers = response.headers().clone();
+ let bytes = response.bytes().await.expect("body");
+ (status, headers, bytes.to_vec())
+}
+
+fn parse_json(bytes: &[u8]) -> Value {
+ if bytes.is_empty() {
+ Value::Null
+ } else {
+ serde_json::from_slice(bytes).expect("valid json")
+ }
+}
+
+/// Run a command inside the sandbox and assert it exits with code 0.
+/// Returns the parsed JSON response.
+async fn run_ok(app: &docker_support::DockerApp, command: &str, args: &[&str]) -> Value {
+ run_ok_with_timeout(app, command, args, 30_000).await
+}
+
+async fn run_ok_with_timeout(
+ app: &docker_support::DockerApp,
+ command: &str,
+ args: &[&str],
+ timeout_ms: u64,
+) -> Value {
+ let (status, _, body) = send_request(
+ app,
+ Method::POST,
+ "/v1/processes/run",
+ Some(json!({
+ "command": command,
+ "args": args,
+ "timeoutMs": timeout_ms
+ })),
+ )
+ .await;
+ assert_eq!(
+ status,
+ StatusCode::OK,
+ "run {command} failed: {}",
+ String::from_utf8_lossy(&body)
+ );
+ let parsed = parse_json(&body);
+ assert_eq!(
+ parsed["exitCode"], 0,
+ "{command} exited with non-zero code.\nstdout: {}\nstderr: {}",
+ parsed["stdout"], parsed["stderr"]
+ );
+ parsed
+}
+
+// ---------------------------------------------------------------------------
+// Browsers
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+#[serial]
+async fn chromium_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ let result = run_ok(&test_app.app, "chromium", &["--version"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(
+ stdout.contains("Chromium"),
+ "expected Chromium version string, got: {stdout}"
+ );
+}
+
+#[tokio::test]
+#[serial]
+async fn firefox_esr_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ let result = run_ok(&test_app.app, "firefox-esr", &["--version"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(
+ stdout.contains("Mozilla Firefox"),
+ "expected Firefox version string, got: {stdout}"
+ );
+}
+
+// ---------------------------------------------------------------------------
+// Languages and runtimes
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+#[serial]
+async fn nodejs_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ let result = run_ok(&test_app.app, "node", &["--version"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(
+ stdout.starts_with('v'),
+ "expected node version string, got: {stdout}"
+ );
+}
+
+#[tokio::test]
+#[serial]
+async fn npm_is_installed() {
+ let test_app = TestApp::new();
+ run_ok(&test_app.app, "npm", &["--version"]).await;
+}
+
+#[tokio::test]
+#[serial]
+async fn python3_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ let result = run_ok(&test_app.app, "python3", &["--version"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(
+ stdout.contains("Python 3"),
+ "expected Python version string, got: {stdout}"
+ );
+}
+
+#[tokio::test]
+#[serial]
+async fn pip3_is_installed() {
+ let test_app = TestApp::new();
+ run_ok(&test_app.app, "pip3", &["--version"]).await;
+}
+
+#[tokio::test]
+#[serial]
+async fn java_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ // java --version prints to stdout on modern JDKs
+ let (status, _, body) = send_request(
+ &test_app.app,
+ Method::POST,
+ "/v1/processes/run",
+ Some(json!({
+ "command": "java",
+ "args": ["--version"],
+ "timeoutMs": 30000
+ })),
+ )
+ .await;
+ assert_eq!(status, StatusCode::OK);
+ let parsed = parse_json(&body);
+ assert_eq!(parsed["exitCode"], 0);
+ let combined = format!(
+ "{}{}",
+ parsed["stdout"].as_str().unwrap_or(""),
+ parsed["stderr"].as_str().unwrap_or("")
+ );
+ assert!(
+ combined.contains("openjdk") || combined.contains("OpenJDK") || combined.contains("java"),
+ "expected Java version string, got: {combined}"
+ );
+}
+
+#[tokio::test]
+#[serial]
+async fn ruby_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ let result = run_ok(&test_app.app, "ruby", &["--version"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(
+ stdout.contains("ruby"),
+ "expected Ruby version string, got: {stdout}"
+ );
+}
+
+// ---------------------------------------------------------------------------
+// Databases
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+#[serial]
+async fn sqlite3_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ let result = run_ok(&test_app.app, "sqlite3", &["--version"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(!stdout.is_empty(), "expected sqlite3 version output");
+}
+
+#[tokio::test]
+#[serial]
+async fn redis_server_is_installed() {
+ let test_app = TestApp::new();
+ let result = run_ok(&test_app.app, "redis-server", &["--version"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(
+ stdout.contains("Redis") || stdout.contains("redis"),
+ "expected Redis version string, got: {stdout}"
+ );
+}
+
+// ---------------------------------------------------------------------------
+// Build tools
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+#[serial]
+async fn gcc_is_installed() {
+ let test_app = TestApp::new();
+ run_ok(&test_app.app, "gcc", &["--version"]).await;
+}
+
+#[tokio::test]
+#[serial]
+async fn make_is_installed() {
+ let test_app = TestApp::new();
+ run_ok(&test_app.app, "make", &["--version"]).await;
+}
+
+#[tokio::test]
+#[serial]
+async fn cmake_is_installed() {
+ let test_app = TestApp::new();
+ run_ok(&test_app.app, "cmake", &["--version"]).await;
+}
+
+#[tokio::test]
+#[serial]
+async fn pkg_config_is_installed() {
+ let test_app = TestApp::new();
+ run_ok(&test_app.app, "pkg-config", &["--version"]).await;
+}
+
+// ---------------------------------------------------------------------------
+// CLI tools
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+#[serial]
+async fn git_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ let result = run_ok(&test_app.app, "git", &["--version"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(
+ stdout.contains("git version"),
+ "expected git version string, got: {stdout}"
+ );
+}
+
+#[tokio::test]
+#[serial]
+async fn jq_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ // Pipe a simple JSON through jq
+ let result = run_ok(&test_app.app, "sh", &["-c", "echo '{\"a\":1}' | jq '.a'"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("").trim();
+ assert_eq!(stdout, "1", "jq did not parse JSON correctly: {stdout}");
+}
+
+#[tokio::test]
+#[serial]
+async fn tmux_is_installed() {
+ let test_app = TestApp::new();
+ run_ok(&test_app.app, "tmux", &["-V"]).await;
+}
+
+// ---------------------------------------------------------------------------
+// Media and graphics
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+#[serial]
+async fn ffmpeg_is_installed_and_runs() {
+ let test_app = TestApp::new();
+ // ffmpeg prints version to stderr, so just check exit code via -version
+ let (status, _, body) = send_request(
+ &test_app.app,
+ Method::POST,
+ "/v1/processes/run",
+ Some(json!({
+ "command": "ffmpeg",
+ "args": ["-version"],
+ "timeoutMs": 10000
+ })),
+ )
+ .await;
+ assert_eq!(status, StatusCode::OK);
+ let parsed = parse_json(&body);
+ assert_eq!(parsed["exitCode"], 0);
+ let combined = format!(
+ "{}{}",
+ parsed["stdout"].as_str().unwrap_or(""),
+ parsed["stderr"].as_str().unwrap_or("")
+ );
+ assert!(
+ combined.contains("ffmpeg version"),
+ "expected ffmpeg version string, got: {combined}"
+ );
+}
+
+#[tokio::test]
+#[serial]
+async fn imagemagick_is_installed() {
+ let test_app = TestApp::new();
+ run_ok(&test_app.app, "convert", &["--version"]).await;
+}
+
+#[tokio::test]
+#[serial]
+async fn poppler_pdftoppm_is_installed() {
+ let test_app = TestApp::new();
+ // pdftoppm -v prints to stderr and exits 0
+ let (status, _, body) = send_request(
+ &test_app.app,
+ Method::POST,
+ "/v1/processes/run",
+ Some(json!({
+ "command": "pdftoppm",
+ "args": ["-v"],
+ "timeoutMs": 10000
+ })),
+ )
+ .await;
+ assert_eq!(status, StatusCode::OK);
+ let parsed = parse_json(&body);
+ assert_eq!(parsed["exitCode"], 0);
+}
+
+// ---------------------------------------------------------------------------
+// Desktop applications (verify binary exists, don't launch GUI)
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+#[serial]
+async fn gimp_is_installed() {
+ let test_app = TestApp::new();
+ let result = run_ok(&test_app.app, "gimp", &["--version"]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(
+ stdout.contains("GIMP") || stdout.contains("gimp") || stdout.contains("Image Manipulation"),
+ "expected GIMP version string, got: {stdout}"
+ );
+}
+
+// ---------------------------------------------------------------------------
+// Functional tests: verify tools actually work, not just that they're present
+// ---------------------------------------------------------------------------
+
+#[tokio::test]
+#[serial]
+async fn python3_can_run_script() {
+ let test_app = TestApp::new();
+ let result = run_ok(
+ &test_app.app,
+ "python3",
+ &["-c", "import json; print(json.dumps({'ok': True}))"],
+ )
+ .await;
+ let stdout = result["stdout"].as_str().unwrap_or("").trim();
+ let parsed: Value = serde_json::from_str(stdout).expect("python json output");
+ assert_eq!(parsed["ok"], true);
+}
+
+#[tokio::test]
+#[serial]
+async fn node_can_run_script() {
+ let test_app = TestApp::new();
+ let result = run_ok(
+ &test_app.app,
+ "node",
+ &["-e", "console.log(JSON.stringify({ok: true}))"],
+ )
+ .await;
+ let stdout = result["stdout"].as_str().unwrap_or("").trim();
+ let parsed: Value = serde_json::from_str(stdout).expect("node json output");
+ assert_eq!(parsed["ok"], true);
+}
+
+#[tokio::test]
+#[serial]
+async fn ruby_can_run_script() {
+ let test_app = TestApp::new();
+ let result = run_ok(
+ &test_app.app,
+ "ruby",
+ &["-e", "require 'json'; puts JSON.generate({ok: true})"],
+ )
+ .await;
+ let stdout = result["stdout"].as_str().unwrap_or("").trim();
+ let parsed: Value = serde_json::from_str(stdout).expect("ruby json output");
+ assert_eq!(parsed["ok"], true);
+}
+
+#[tokio::test]
+#[serial]
+async fn gcc_can_compile_and_run_hello_world() {
+ let test_app = TestApp::new();
+
+ // Write a C file
+ run_ok(
+ &test_app.app,
+ "sh",
+ &["-c", r#"printf '#include \nint main(){printf("hello\\n");return 0;}\n' > /tmp/hello.c"#],
+ )
+ .await;
+
+ // Compile it
+ run_ok(&test_app.app, "gcc", &["-o", "/tmp/hello", "/tmp/hello.c"]).await;
+
+ // Run it
+ let result = run_ok(&test_app.app, "/tmp/hello", &[]).await;
+ let stdout = result["stdout"].as_str().unwrap_or("").trim();
+ assert_eq!(stdout, "hello");
+}
+
+#[tokio::test]
+#[serial]
+async fn sqlite3_can_create_and_query() {
+ let test_app = TestApp::new();
+ let result = run_ok(
+ &test_app.app,
+ "sh",
+ &[
+ "-c",
+ "sqlite3 /tmp/test.db 'CREATE TABLE t(v TEXT); INSERT INTO t VALUES(\"ok\"); SELECT v FROM t;'",
+ ],
+ )
+ .await;
+ let stdout = result["stdout"].as_str().unwrap_or("").trim();
+ assert_eq!(stdout, "ok");
+}
+
+#[tokio::test]
+#[serial]
+async fn git_can_init_and_commit() {
+ let test_app = TestApp::new();
+ run_ok(
+ &test_app.app,
+ "sh",
+ &[
+ "-c",
+ "cd /tmp && mkdir -p testrepo && cd testrepo && git init && git config user.email 'test@test.com' && git config user.name 'Test' && touch file && git add file && git commit -m 'init'",
+ ],
+ )
+ .await;
+}
+
+#[tokio::test]
+#[serial]
+async fn chromium_headless_can_dump_dom() {
+ let test_app = TestApp::new();
+ // Use headless mode to dump the DOM of a blank page
+ let result = run_ok_with_timeout(
+ &test_app.app,
+ "chromium",
+ &[
+ "--headless",
+ "--no-sandbox",
+ "--disable-gpu",
+ "--dump-dom",
+ "data:text/html,hello
",
+ ],
+ 30_000,
+ )
+ .await;
+ let stdout = result["stdout"].as_str().unwrap_or("");
+ assert!(
+ stdout.contains("hello"),
+ "expected hello in DOM dump, got: {stdout}"
+ );
+}
diff --git a/server/packages/sandbox-agent/tests/support/docker_common_software.rs b/server/packages/sandbox-agent/tests/support/docker_common_software.rs
new file mode 100644
index 0000000..4b29ed6
--- /dev/null
+++ b/server/packages/sandbox-agent/tests/support/docker_common_software.rs
@@ -0,0 +1,332 @@
+/// Docker support for common-software integration tests.
+///
+/// Builds the `docker/test-common-software/Dockerfile` image (which extends the
+/// base test-agent image with pre-installed common software) and provides a
+/// `TestApp` that runs a container from it.
+///
+/// KEEP IN SYNC with docs/common-software.mdx and docker/test-common-software/Dockerfile.
+use std::collections::BTreeMap;
+use std::io::{Read, Write};
+use std::net::TcpStream;
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::OnceLock;
+use std::thread;
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+
+use tempfile::TempDir;
+
+const CONTAINER_PORT: u16 = 3000;
+const DEFAULT_PATH: &str = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
+const BASE_IMAGE_TAG: &str = "sandbox-agent-test:dev";
+const COMMON_SOFTWARE_IMAGE_TAG: &str = "sandbox-agent-test-common-software:dev";
+
+static IMAGE_TAG: OnceLock = OnceLock::new();
+static DOCKER_BIN: OnceLock = OnceLock::new();
+static CONTAINER_COUNTER: AtomicU64 = AtomicU64::new(0);
+
+#[derive(Clone)]
+pub struct DockerApp {
+ base_url: String,
+}
+
+impl DockerApp {
+ pub fn http_url(&self, path: &str) -> String {
+ format!("{}{}", self.base_url, path)
+ }
+}
+
+pub struct TestApp {
+ pub app: DockerApp,
+ _root: TempDir,
+ container_id: String,
+}
+
+impl TestApp {
+ pub fn new() -> Self {
+ let root = tempfile::tempdir().expect("create docker test root");
+ let layout = TestLayout::new(root.path());
+ layout.create();
+
+ let container_id = unique_container_id();
+ let image = ensure_common_software_image();
+ let env = build_env(&layout);
+ let mounts = build_mounts(root.path());
+ let base_url = run_container(&container_id, &image, &mounts, &env);
+
+ Self {
+ app: DockerApp { base_url },
+ _root: root,
+ container_id,
+ }
+ }
+}
+
+impl Drop for TestApp {
+ fn drop(&mut self) {
+ let _ = Command::new(docker_bin())
+ .args(["rm", "-f", &self.container_id])
+ .output();
+ }
+}
+
+struct TestLayout {
+ home: PathBuf,
+ xdg_data_home: PathBuf,
+ xdg_state_home: PathBuf,
+}
+
+impl TestLayout {
+ fn new(root: &Path) -> Self {
+ Self {
+ home: root.join("home"),
+ xdg_data_home: root.join("xdg-data"),
+ xdg_state_home: root.join("xdg-state"),
+ }
+ }
+
+ fn create(&self) {
+ for dir in [&self.home, &self.xdg_data_home, &self.xdg_state_home] {
+ std::fs::create_dir_all(dir).expect("create docker test dir");
+ }
+ }
+}
+
+fn ensure_base_image() -> String {
+ let repo_root = repo_root();
+ let image_tag =
+ std::env::var("SANDBOX_AGENT_TEST_IMAGE").unwrap_or_else(|_| BASE_IMAGE_TAG.to_string());
+ let output = Command::new(docker_bin())
+ .args(["build", "--tag", &image_tag, "--file"])
+ .arg(
+ repo_root
+ .join("docker")
+ .join("test-agent")
+ .join("Dockerfile"),
+ )
+ .arg(&repo_root)
+ .output()
+ .expect("build base test image");
+ if !output.status.success() {
+ panic!(
+ "failed to build base test image: {}",
+ String::from_utf8_lossy(&output.stderr)
+ );
+ }
+ image_tag
+}
+
+fn ensure_common_software_image() -> String {
+ IMAGE_TAG
+ .get_or_init(|| {
+ let base_image = ensure_base_image();
+ let repo_root = repo_root();
+ let image_tag = std::env::var("SANDBOX_AGENT_TEST_COMMON_SOFTWARE_IMAGE")
+ .unwrap_or_else(|_| COMMON_SOFTWARE_IMAGE_TAG.to_string());
+ let output = Command::new(docker_bin())
+ .args([
+ "build",
+ "--tag",
+ &image_tag,
+ "--build-arg",
+ &format!("BASE_IMAGE={base_image}"),
+ "--file",
+ ])
+ .arg(
+ repo_root
+ .join("docker")
+ .join("test-common-software")
+ .join("Dockerfile"),
+ )
+ .arg(&repo_root)
+ .output()
+ .expect("build common-software test image");
+ if !output.status.success() {
+ panic!(
+ "failed to build common-software test image: {}",
+ String::from_utf8_lossy(&output.stderr)
+ );
+ }
+ image_tag
+ })
+ .clone()
+}
+
+fn build_env(layout: &TestLayout) -> BTreeMap {
+ let mut env = BTreeMap::new();
+ env.insert(
+ "HOME".to_string(),
+ layout.home.to_string_lossy().to_string(),
+ );
+ env.insert(
+ "XDG_DATA_HOME".to_string(),
+ layout.xdg_data_home.to_string_lossy().to_string(),
+ );
+ env.insert(
+ "XDG_STATE_HOME".to_string(),
+ layout.xdg_state_home.to_string_lossy().to_string(),
+ );
+ env.insert("PATH".to_string(), DEFAULT_PATH.to_string());
+ env
+}
+
+fn build_mounts(root: &Path) -> Vec {
+ vec![root.to_path_buf()]
+}
+
+fn run_container(
+ container_id: &str,
+ image: &str,
+ mounts: &[PathBuf],
+ env: &BTreeMap,
+) -> String {
+ let mut args = vec![
+ "run".to_string(),
+ "-d".to_string(),
+ "--rm".to_string(),
+ "--name".to_string(),
+ container_id.to_string(),
+ "-p".to_string(),
+ format!("127.0.0.1::{CONTAINER_PORT}"),
+ ];
+
+ if cfg!(target_os = "linux") {
+ args.push("--add-host".to_string());
+ args.push("host.docker.internal:host-gateway".to_string());
+ }
+
+ for mount in mounts {
+ args.push("-v".to_string());
+ args.push(format!("{}:{}", mount.display(), mount.display()));
+ }
+
+ for (key, value) in env {
+ args.push("-e".to_string());
+ args.push(format!("{key}={value}"));
+ }
+
+ args.push(image.to_string());
+ args.push("server".to_string());
+ args.push("--host".to_string());
+ args.push("0.0.0.0".to_string());
+ args.push("--port".to_string());
+ args.push(CONTAINER_PORT.to_string());
+ args.push("--no-token".to_string());
+
+ let output = Command::new(docker_bin())
+ .args(&args)
+ .output()
+ .expect("start docker test container");
+ if !output.status.success() {
+ panic!(
+ "failed to start docker test container: {}",
+ String::from_utf8_lossy(&output.stderr)
+ );
+ }
+
+ let port_output = Command::new(docker_bin())
+ .args(["port", container_id, &format!("{CONTAINER_PORT}/tcp")])
+ .output()
+ .expect("resolve mapped docker port");
+ if !port_output.status.success() {
+ panic!(
+ "failed to resolve docker test port: {}",
+ String::from_utf8_lossy(&port_output.stderr)
+ );
+ }
+
+ let mapping = String::from_utf8(port_output.stdout)
+ .expect("docker port utf8")
+ .trim()
+ .to_string();
+ let host_port = mapping.rsplit(':').next().expect("mapped host port").trim();
+ let base_url = format!("http://127.0.0.1:{host_port}");
+ wait_for_health(&base_url);
+ base_url
+}
+
+fn wait_for_health(base_url: &str) {
+ let started = SystemTime::now();
+ loop {
+ if probe_health(base_url) {
+ return;
+ }
+ if started
+ .elapsed()
+ .unwrap_or_else(|_| Duration::from_secs(0))
+ .gt(&Duration::from_secs(60))
+ {
+ panic!("timed out waiting for common-software docker test server");
+ }
+ thread::sleep(Duration::from_millis(200));
+ }
+}
+
+fn probe_health(base_url: &str) -> bool {
+ let address = base_url.strip_prefix("http://").unwrap_or(base_url);
+ let mut stream = match TcpStream::connect(address) {
+ Ok(stream) => stream,
+ Err(_) => return false,
+ };
+ let _ = stream.set_read_timeout(Some(Duration::from_secs(2)));
+ let _ = stream.set_write_timeout(Some(Duration::from_secs(2)));
+
+ let request =
+ format!("GET /v1/health HTTP/1.1\r\nHost: {address}\r\nConnection: close\r\n\r\n");
+ if stream.write_all(request.as_bytes()).is_err() {
+ return false;
+ }
+
+ let mut response = String::new();
+ if stream.read_to_string(&mut response).is_err() {
+ return false;
+ }
+
+ response.starts_with("HTTP/1.1 200") || response.starts_with("HTTP/1.0 200")
+}
+
+fn unique_container_id() -> String {
+ let millis = SystemTime::now()
+ .duration_since(UNIX_EPOCH)
+ .map(|value| value.as_millis())
+ .unwrap_or(0);
+ let counter = CONTAINER_COUNTER.fetch_add(1, Ordering::Relaxed);
+ format!(
+ "sandbox-agent-common-sw-{}-{millis}-{counter}",
+ std::process::id()
+ )
+}
+
+fn repo_root() -> PathBuf {
+ PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+ .join("../../..")
+ .canonicalize()
+ .expect("repo root")
+}
+
+fn docker_bin() -> &'static Path {
+ DOCKER_BIN
+ .get_or_init(|| {
+ if let Some(value) = std::env::var_os("SANDBOX_AGENT_TEST_DOCKER_BIN") {
+ let path = PathBuf::from(value);
+ if path.exists() {
+ return path;
+ }
+ }
+
+ for candidate in [
+ "/usr/local/bin/docker",
+ "/opt/homebrew/bin/docker",
+ "/usr/bin/docker",
+ ] {
+ let path = PathBuf::from(candidate);
+ if path.exists() {
+ return path;
+ }
+ }
+
+ PathBuf::from("docker")
+ })
+ .as_path()
+}