mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-15 07:04:48 +00:00
feat: desktop computer-use APIs with neko-based streaming
Add desktop runtime management (Xvfb, openbox, dbus), screen capture, mouse/keyboard input, and video streaming via neko binary extracted from the m1k1o/neko container. Includes Docker test rig, TypeScript SDK desktop support, and inspector Desktop tab. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
3895e34bdb
commit
33821d8660
66 changed files with 13190 additions and 1135 deletions
257
sdks/react/src/DesktopViewer.tsx
Normal file
257
sdks/react/src/DesktopViewer.tsx
Normal file
|
|
@ -0,0 +1,257 @@
|
|||
"use client";
|
||||
|
||||
import type { CSSProperties, MouseEvent, WheelEvent } from "react";
|
||||
import { useEffect, useRef, useState } from "react";
|
||||
import type { DesktopMouseButton, DesktopStreamErrorStatus, DesktopStreamReadyStatus, SandboxAgent } from "sandbox-agent";
|
||||
|
||||
type ConnectionState = "connecting" | "ready" | "closed" | "error";
|
||||
|
||||
export type DesktopViewerClient = Pick<SandboxAgent, "startDesktopStream" | "stopDesktopStream" | "connectDesktopStream">;
|
||||
|
||||
export interface DesktopViewerProps {
|
||||
client: DesktopViewerClient;
|
||||
className?: string;
|
||||
style?: CSSProperties;
|
||||
imageStyle?: CSSProperties;
|
||||
height?: number | string;
|
||||
onConnect?: (status: DesktopStreamReadyStatus) => void;
|
||||
onDisconnect?: () => void;
|
||||
onError?: (error: DesktopStreamErrorStatus | Error) => void;
|
||||
}
|
||||
|
||||
const shellStyle: CSSProperties = {
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
overflow: "hidden",
|
||||
border: "1px solid rgba(15, 23, 42, 0.14)",
|
||||
borderRadius: 14,
|
||||
background: "linear-gradient(180deg, rgba(248, 250, 252, 0.96) 0%, rgba(226, 232, 240, 0.92) 100%)",
|
||||
boxShadow: "0 20px 40px rgba(15, 23, 42, 0.08)",
|
||||
};
|
||||
|
||||
const statusBarStyle: CSSProperties = {
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "space-between",
|
||||
gap: 12,
|
||||
padding: "10px 14px",
|
||||
borderBottom: "1px solid rgba(15, 23, 42, 0.08)",
|
||||
background: "rgba(255, 255, 255, 0.78)",
|
||||
color: "#0f172a",
|
||||
fontSize: 12,
|
||||
lineHeight: 1.4,
|
||||
};
|
||||
|
||||
const viewportStyle: CSSProperties = {
|
||||
position: "relative",
|
||||
display: "flex",
|
||||
alignItems: "center",
|
||||
justifyContent: "center",
|
||||
overflow: "hidden",
|
||||
background: "radial-gradient(circle at top, rgba(14, 165, 233, 0.18), transparent 45%), linear-gradient(180deg, #0f172a 0%, #111827 100%)",
|
||||
};
|
||||
|
||||
const imageBaseStyle: CSSProperties = {
|
||||
display: "block",
|
||||
width: "100%",
|
||||
height: "100%",
|
||||
objectFit: "contain",
|
||||
userSelect: "none",
|
||||
};
|
||||
|
||||
const hintStyle: CSSProperties = {
|
||||
opacity: 0.66,
|
||||
};
|
||||
|
||||
const getStatusColor = (state: ConnectionState): string => {
|
||||
switch (state) {
|
||||
case "ready":
|
||||
return "#15803d";
|
||||
case "error":
|
||||
return "#b91c1c";
|
||||
case "closed":
|
||||
return "#b45309";
|
||||
default:
|
||||
return "#475569";
|
||||
}
|
||||
};
|
||||
|
||||
export const DesktopViewer = ({ client, className, style, imageStyle, height = 480, onConnect, onDisconnect, onError }: DesktopViewerProps) => {
|
||||
const wrapperRef = useRef<HTMLDivElement | null>(null);
|
||||
const sessionRef = useRef<ReturnType<DesktopViewerClient["connectDesktopStream"]> | null>(null);
|
||||
const [connectionState, setConnectionState] = useState<ConnectionState>("connecting");
|
||||
const [statusMessage, setStatusMessage] = useState("Starting desktop stream...");
|
||||
const [frameUrl, setFrameUrl] = useState<string | null>(null);
|
||||
const [resolution, setResolution] = useState<{ width: number; height: number } | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
let cancelled = false;
|
||||
let lastObjectUrl: string | null = null;
|
||||
let session: ReturnType<DesktopViewerClient["connectDesktopStream"]> | null = null;
|
||||
|
||||
setConnectionState("connecting");
|
||||
setStatusMessage("Starting desktop stream...");
|
||||
setResolution(null);
|
||||
|
||||
const connect = async () => {
|
||||
try {
|
||||
await client.startDesktopStream();
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
|
||||
session = client.connectDesktopStream();
|
||||
sessionRef.current = session;
|
||||
session.onReady((status) => {
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
setConnectionState("ready");
|
||||
setStatusMessage("Desktop stream connected.");
|
||||
setResolution({ width: status.width, height: status.height });
|
||||
onConnect?.(status);
|
||||
});
|
||||
session.onFrame((frame) => {
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
const nextUrl = URL.createObjectURL(new Blob([frame.slice().buffer], { type: "image/jpeg" }));
|
||||
setFrameUrl((current) => {
|
||||
if (current) {
|
||||
URL.revokeObjectURL(current);
|
||||
}
|
||||
return nextUrl;
|
||||
});
|
||||
if (lastObjectUrl) {
|
||||
URL.revokeObjectURL(lastObjectUrl);
|
||||
}
|
||||
lastObjectUrl = nextUrl;
|
||||
});
|
||||
session.onError((error) => {
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
setConnectionState("error");
|
||||
setStatusMessage(error instanceof Error ? error.message : error.message);
|
||||
onError?.(error);
|
||||
});
|
||||
session.onClose(() => {
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
setConnectionState((current) => (current === "error" ? current : "closed"));
|
||||
setStatusMessage((current) => (current === "Desktop stream connected." ? "Desktop stream disconnected." : current));
|
||||
onDisconnect?.();
|
||||
});
|
||||
} catch (error) {
|
||||
if (cancelled) {
|
||||
return;
|
||||
}
|
||||
const nextError = error instanceof Error ? error : new Error("Failed to initialize desktop stream.");
|
||||
setConnectionState("error");
|
||||
setStatusMessage(nextError.message);
|
||||
onError?.(nextError);
|
||||
}
|
||||
};
|
||||
|
||||
void connect();
|
||||
|
||||
return () => {
|
||||
cancelled = true;
|
||||
session?.close();
|
||||
sessionRef.current = null;
|
||||
void client.stopDesktopStream().catch(() => undefined);
|
||||
setFrameUrl((current) => {
|
||||
if (current) {
|
||||
URL.revokeObjectURL(current);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
if (lastObjectUrl) {
|
||||
URL.revokeObjectURL(lastObjectUrl);
|
||||
}
|
||||
};
|
||||
}, [client, onConnect, onDisconnect, onError]);
|
||||
|
||||
const scalePoint = (clientX: number, clientY: number) => {
|
||||
const wrapper = wrapperRef.current;
|
||||
if (!wrapper || !resolution) {
|
||||
return null;
|
||||
}
|
||||
const rect = wrapper.getBoundingClientRect();
|
||||
if (rect.width === 0 || rect.height === 0) {
|
||||
return null;
|
||||
}
|
||||
const x = Math.max(0, Math.min(resolution.width, ((clientX - rect.left) / rect.width) * resolution.width));
|
||||
const y = Math.max(0, Math.min(resolution.height, ((clientY - rect.top) / rect.height) * resolution.height));
|
||||
return {
|
||||
x: Math.round(x),
|
||||
y: Math.round(y),
|
||||
};
|
||||
};
|
||||
|
||||
const buttonFromMouseEvent = (event: MouseEvent<HTMLDivElement>): DesktopMouseButton => {
|
||||
switch (event.button) {
|
||||
case 1:
|
||||
return "middle";
|
||||
case 2:
|
||||
return "right";
|
||||
default:
|
||||
return "left";
|
||||
}
|
||||
};
|
||||
|
||||
const withSession = (callback: (session: NonNullable<ReturnType<DesktopViewerClient["connectDesktopStream"]>>) => void) => {
|
||||
const session = sessionRef.current;
|
||||
if (session) {
|
||||
callback(session);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className={className} style={{ ...shellStyle, ...style }}>
|
||||
<div style={statusBarStyle}>
|
||||
<span style={{ color: getStatusColor(connectionState) }}>{statusMessage}</span>
|
||||
<span style={hintStyle}>{resolution ? `${resolution.width}×${resolution.height}` : "Awaiting frames"}</span>
|
||||
</div>
|
||||
<div
|
||||
ref={wrapperRef}
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
style={{ ...viewportStyle, height }}
|
||||
onMouseMove={(event) => {
|
||||
const point = scalePoint(event.clientX, event.clientY);
|
||||
if (!point) {
|
||||
return;
|
||||
}
|
||||
withSession((session) => session.moveMouse(point.x, point.y));
|
||||
}}
|
||||
onMouseDown={(event) => {
|
||||
event.preventDefault();
|
||||
const point = scalePoint(event.clientX, event.clientY);
|
||||
withSession((session) => session.mouseDown(buttonFromMouseEvent(event), point?.x, point?.y));
|
||||
}}
|
||||
onMouseUp={(event) => {
|
||||
const point = scalePoint(event.clientX, event.clientY);
|
||||
withSession((session) => session.mouseUp(buttonFromMouseEvent(event), point?.x, point?.y));
|
||||
}}
|
||||
onWheel={(event: WheelEvent<HTMLDivElement>) => {
|
||||
event.preventDefault();
|
||||
const point = scalePoint(event.clientX, event.clientY);
|
||||
if (!point) {
|
||||
return;
|
||||
}
|
||||
withSession((session) => session.scroll(point.x, point.y, Math.round(event.deltaX), Math.round(event.deltaY)));
|
||||
}}
|
||||
onKeyDown={(event) => {
|
||||
withSession((session) => session.keyDown(event.key));
|
||||
}}
|
||||
onKeyUp={(event) => {
|
||||
withSession((session) => session.keyUp(event.key));
|
||||
}}
|
||||
>
|
||||
{frameUrl ? <img alt="Desktop stream" draggable={false} src={frameUrl} style={{ ...imageBaseStyle, ...imageStyle }} /> : null}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
export { AgentConversation } from "./AgentConversation.tsx";
|
||||
export { AgentTranscript } from "./AgentTranscript.tsx";
|
||||
export { ChatComposer } from "./ChatComposer.tsx";
|
||||
export { DesktopViewer } from "./DesktopViewer.tsx";
|
||||
export { ProcessTerminal } from "./ProcessTerminal.tsx";
|
||||
export { useTranscriptVirtualizer } from "./useTranscriptVirtualizer.ts";
|
||||
|
||||
|
|
@ -23,6 +24,11 @@ export type {
|
|||
ChatComposerProps,
|
||||
} from "./ChatComposer.tsx";
|
||||
|
||||
export type {
|
||||
DesktopViewerClient,
|
||||
DesktopViewerProps,
|
||||
} from "./DesktopViewer.tsx";
|
||||
|
||||
export type {
|
||||
ProcessTerminalClient,
|
||||
ProcessTerminalProps,
|
||||
|
|
|
|||
|
|
@ -23,12 +23,35 @@ import {
|
|||
type SetSessionModeRequest,
|
||||
} from "acp-http-client";
|
||||
import type { SandboxProvider } from "./providers/types.ts";
|
||||
import { DesktopStreamSession, type DesktopStreamConnectOptions } from "./desktop-stream.ts";
|
||||
import {
|
||||
type AcpServerListResponse,
|
||||
type AgentInfo,
|
||||
type AgentInstallRequest,
|
||||
type AgentInstallResponse,
|
||||
type AgentListResponse,
|
||||
type DesktopActionResponse,
|
||||
type DesktopDisplayInfoResponse,
|
||||
type DesktopKeyboardDownRequest,
|
||||
type DesktopKeyboardPressRequest,
|
||||
type DesktopKeyboardTypeRequest,
|
||||
type DesktopMouseClickRequest,
|
||||
type DesktopMouseDownRequest,
|
||||
type DesktopMouseDragRequest,
|
||||
type DesktopMouseMoveRequest,
|
||||
type DesktopMousePositionResponse,
|
||||
type DesktopMouseScrollRequest,
|
||||
type DesktopMouseUpRequest,
|
||||
type DesktopKeyboardUpRequest,
|
||||
type DesktopRecordingInfo,
|
||||
type DesktopRecordingListResponse,
|
||||
type DesktopRecordingStartRequest,
|
||||
type DesktopRegionScreenshotQuery,
|
||||
type DesktopScreenshotQuery,
|
||||
type DesktopStartRequest,
|
||||
type DesktopStatusResponse,
|
||||
type DesktopStreamStatusResponse,
|
||||
type DesktopWindowListResponse,
|
||||
type FsActionResponse,
|
||||
type FsDeleteQuery,
|
||||
type FsEntriesQuery,
|
||||
|
|
@ -53,7 +76,9 @@ import {
|
|||
type ProcessInfo,
|
||||
type ProcessInputRequest,
|
||||
type ProcessInputResponse,
|
||||
type ProcessListQuery,
|
||||
type ProcessListResponse,
|
||||
type ProcessOwner,
|
||||
type ProcessLogEntry,
|
||||
type ProcessLogsQuery,
|
||||
type ProcessLogsResponse,
|
||||
|
|
@ -201,6 +226,7 @@ export interface ProcessTerminalConnectOptions extends ProcessTerminalWebSocketU
|
|||
}
|
||||
|
||||
export type ProcessTerminalSessionOptions = ProcessTerminalConnectOptions;
|
||||
export type DesktopStreamSessionOptions = DesktopStreamConnectOptions;
|
||||
|
||||
export class SandboxAgentError extends Error {
|
||||
readonly status: number;
|
||||
|
|
@ -1533,6 +1559,148 @@ export class SandboxAgent {
|
|||
return this.requestHealth();
|
||||
}
|
||||
|
||||
async startDesktop(request: DesktopStartRequest = {}): Promise<DesktopStatusResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/start`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async stopDesktop(): Promise<DesktopStatusResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/stop`);
|
||||
}
|
||||
|
||||
async getDesktopStatus(): Promise<DesktopStatusResponse> {
|
||||
return this.requestJson("GET", `${API_PREFIX}/desktop/status`);
|
||||
}
|
||||
|
||||
async getDesktopDisplayInfo(): Promise<DesktopDisplayInfoResponse> {
|
||||
return this.requestJson("GET", `${API_PREFIX}/desktop/display/info`);
|
||||
}
|
||||
|
||||
async takeDesktopScreenshot(query: DesktopScreenshotQuery = {}): Promise<Uint8Array> {
|
||||
const response = await this.requestRaw("GET", `${API_PREFIX}/desktop/screenshot`, {
|
||||
query,
|
||||
accept: "image/*",
|
||||
});
|
||||
const buffer = await response.arrayBuffer();
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
async takeDesktopRegionScreenshot(query: DesktopRegionScreenshotQuery): Promise<Uint8Array> {
|
||||
const response = await this.requestRaw("GET", `${API_PREFIX}/desktop/screenshot/region`, {
|
||||
query,
|
||||
accept: "image/*",
|
||||
});
|
||||
const buffer = await response.arrayBuffer();
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
async getDesktopMousePosition(): Promise<DesktopMousePositionResponse> {
|
||||
return this.requestJson("GET", `${API_PREFIX}/desktop/mouse/position`);
|
||||
}
|
||||
|
||||
async moveDesktopMouse(request: DesktopMouseMoveRequest): Promise<DesktopMousePositionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/mouse/move`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async clickDesktop(request: DesktopMouseClickRequest): Promise<DesktopMousePositionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/mouse/click`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async mouseDownDesktop(request: DesktopMouseDownRequest): Promise<DesktopMousePositionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/mouse/down`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async mouseUpDesktop(request: DesktopMouseUpRequest): Promise<DesktopMousePositionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/mouse/up`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async dragDesktopMouse(request: DesktopMouseDragRequest): Promise<DesktopMousePositionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/mouse/drag`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async scrollDesktop(request: DesktopMouseScrollRequest): Promise<DesktopMousePositionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/mouse/scroll`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async typeDesktopText(request: DesktopKeyboardTypeRequest): Promise<DesktopActionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/keyboard/type`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async pressDesktopKey(request: DesktopKeyboardPressRequest): Promise<DesktopActionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/keyboard/press`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async keyDownDesktop(request: DesktopKeyboardDownRequest): Promise<DesktopActionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/keyboard/down`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async keyUpDesktop(request: DesktopKeyboardUpRequest): Promise<DesktopActionResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/keyboard/up`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async listDesktopWindows(): Promise<DesktopWindowListResponse> {
|
||||
return this.requestJson("GET", `${API_PREFIX}/desktop/windows`);
|
||||
}
|
||||
|
||||
async startDesktopRecording(request: DesktopRecordingStartRequest = {}): Promise<DesktopRecordingInfo> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/recording/start`, {
|
||||
body: request,
|
||||
});
|
||||
}
|
||||
|
||||
async stopDesktopRecording(): Promise<DesktopRecordingInfo> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/recording/stop`);
|
||||
}
|
||||
|
||||
async listDesktopRecordings(): Promise<DesktopRecordingListResponse> {
|
||||
return this.requestJson("GET", `${API_PREFIX}/desktop/recordings`);
|
||||
}
|
||||
|
||||
async getDesktopRecording(id: string): Promise<DesktopRecordingInfo> {
|
||||
return this.requestJson("GET", `${API_PREFIX}/desktop/recordings/${encodeURIComponent(id)}`);
|
||||
}
|
||||
|
||||
async downloadDesktopRecording(id: string): Promise<Uint8Array> {
|
||||
const response = await this.requestRaw("GET", `${API_PREFIX}/desktop/recordings/${encodeURIComponent(id)}/download`, {
|
||||
accept: "video/mp4",
|
||||
});
|
||||
const buffer = await response.arrayBuffer();
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
||||
async deleteDesktopRecording(id: string): Promise<void> {
|
||||
await this.requestRaw("DELETE", `${API_PREFIX}/desktop/recordings/${encodeURIComponent(id)}`);
|
||||
}
|
||||
|
||||
async startDesktopStream(): Promise<DesktopStreamStatusResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/stream/start`);
|
||||
}
|
||||
|
||||
async stopDesktopStream(): Promise<DesktopStreamStatusResponse> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/stream/stop`);
|
||||
}
|
||||
|
||||
async listAgents(options?: AgentQueryOptions): Promise<AgentListResponse> {
|
||||
return this.requestJson("GET", `${API_PREFIX}/agents`, {
|
||||
query: toAgentQuery(options),
|
||||
|
|
@ -1665,8 +1833,10 @@ export class SandboxAgent {
|
|||
});
|
||||
}
|
||||
|
||||
async listProcesses(): Promise<ProcessListResponse> {
|
||||
return this.requestJson("GET", `${API_PREFIX}/processes`);
|
||||
async listProcesses(query?: ProcessListQuery): Promise<ProcessListResponse> {
|
||||
return this.requestJson("GET", `${API_PREFIX}/processes`, {
|
||||
query,
|
||||
});
|
||||
}
|
||||
|
||||
async getProcess(id: string): Promise<ProcessInfo> {
|
||||
|
|
@ -1754,6 +1924,32 @@ export class SandboxAgent {
|
|||
return new ProcessTerminalSession(this.connectProcessTerminalWebSocket(id, options));
|
||||
}
|
||||
|
||||
buildDesktopStreamWebSocketUrl(options: ProcessTerminalWebSocketUrlOptions = {}): string {
|
||||
return toWebSocketUrl(
|
||||
this.buildUrl(`${API_PREFIX}/desktop/stream/ws`, {
|
||||
access_token: options.accessToken ?? this.token,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
connectDesktopStreamWebSocket(options: DesktopStreamConnectOptions = {}): WebSocket {
|
||||
const WebSocketCtor = options.WebSocket ?? globalThis.WebSocket;
|
||||
if (!WebSocketCtor) {
|
||||
throw new Error("WebSocket API is not available; provide a WebSocket implementation.");
|
||||
}
|
||||
|
||||
return new WebSocketCtor(
|
||||
this.buildDesktopStreamWebSocketUrl({
|
||||
accessToken: options.accessToken,
|
||||
}),
|
||||
options.protocols,
|
||||
);
|
||||
}
|
||||
|
||||
connectDesktopStream(options: DesktopStreamSessionOptions = {}): DesktopStreamSession {
|
||||
return new DesktopStreamSession(this.connectDesktopStreamWebSocket(options));
|
||||
}
|
||||
|
||||
private async getLiveConnection(agent: string): Promise<LiveAcpConnection> {
|
||||
await this.awaitHealthy();
|
||||
|
||||
|
|
|
|||
236
sdks/typescript/src/desktop-stream.ts
Normal file
236
sdks/typescript/src/desktop-stream.ts
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
import type { DesktopMouseButton } from "./types.ts";
|
||||
|
||||
const WS_READY_STATE_CONNECTING = 0;
|
||||
const WS_READY_STATE_OPEN = 1;
|
||||
const WS_READY_STATE_CLOSED = 3;
|
||||
|
||||
export interface DesktopStreamReadyStatus {
|
||||
type: "ready";
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface DesktopStreamErrorStatus {
|
||||
type: "error";
|
||||
message: string;
|
||||
}
|
||||
|
||||
export type DesktopStreamStatusMessage = DesktopStreamReadyStatus | DesktopStreamErrorStatus;
|
||||
|
||||
export interface DesktopStreamConnectOptions {
|
||||
accessToken?: string;
|
||||
WebSocket?: typeof WebSocket;
|
||||
protocols?: string | string[];
|
||||
}
|
||||
|
||||
type DesktopStreamClientFrame =
|
||||
| {
|
||||
type: "moveMouse";
|
||||
x: number;
|
||||
y: number;
|
||||
}
|
||||
| {
|
||||
type: "mouseDown" | "mouseUp";
|
||||
x?: number;
|
||||
y?: number;
|
||||
button?: DesktopMouseButton;
|
||||
}
|
||||
| {
|
||||
type: "scroll";
|
||||
x: number;
|
||||
y: number;
|
||||
deltaX?: number;
|
||||
deltaY?: number;
|
||||
}
|
||||
| {
|
||||
type: "keyDown" | "keyUp";
|
||||
key: string;
|
||||
}
|
||||
| {
|
||||
type: "close";
|
||||
};
|
||||
|
||||
export class DesktopStreamSession {
|
||||
readonly socket: WebSocket;
|
||||
readonly closed: Promise<void>;
|
||||
|
||||
private readonly readyListeners = new Set<(status: DesktopStreamReadyStatus) => void>();
|
||||
private readonly frameListeners = new Set<(frame: Uint8Array) => void>();
|
||||
private readonly errorListeners = new Set<(error: DesktopStreamErrorStatus | Error) => void>();
|
||||
private readonly closeListeners = new Set<() => void>();
|
||||
|
||||
private closeSignalSent = false;
|
||||
private closedResolve!: () => void;
|
||||
|
||||
constructor(socket: WebSocket) {
|
||||
this.socket = socket;
|
||||
this.socket.binaryType = "arraybuffer";
|
||||
this.closed = new Promise<void>((resolve) => {
|
||||
this.closedResolve = resolve;
|
||||
});
|
||||
|
||||
this.socket.addEventListener("message", (event) => {
|
||||
void this.handleMessage(event.data);
|
||||
});
|
||||
this.socket.addEventListener("error", () => {
|
||||
this.emitError(new Error("Desktop stream websocket connection failed."));
|
||||
});
|
||||
this.socket.addEventListener("close", () => {
|
||||
this.closedResolve();
|
||||
for (const listener of this.closeListeners) {
|
||||
listener();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
onReady(listener: (status: DesktopStreamReadyStatus) => void): () => void {
|
||||
this.readyListeners.add(listener);
|
||||
return () => {
|
||||
this.readyListeners.delete(listener);
|
||||
};
|
||||
}
|
||||
|
||||
onFrame(listener: (frame: Uint8Array) => void): () => void {
|
||||
this.frameListeners.add(listener);
|
||||
return () => {
|
||||
this.frameListeners.delete(listener);
|
||||
};
|
||||
}
|
||||
|
||||
onError(listener: (error: DesktopStreamErrorStatus | Error) => void): () => void {
|
||||
this.errorListeners.add(listener);
|
||||
return () => {
|
||||
this.errorListeners.delete(listener);
|
||||
};
|
||||
}
|
||||
|
||||
onClose(listener: () => void): () => void {
|
||||
this.closeListeners.add(listener);
|
||||
return () => {
|
||||
this.closeListeners.delete(listener);
|
||||
};
|
||||
}
|
||||
|
||||
moveMouse(x: number, y: number): void {
|
||||
this.sendFrame({ type: "moveMouse", x, y });
|
||||
}
|
||||
|
||||
mouseDown(button?: DesktopMouseButton, x?: number, y?: number): void {
|
||||
this.sendFrame({ type: "mouseDown", button, x, y });
|
||||
}
|
||||
|
||||
mouseUp(button?: DesktopMouseButton, x?: number, y?: number): void {
|
||||
this.sendFrame({ type: "mouseUp", button, x, y });
|
||||
}
|
||||
|
||||
scroll(x: number, y: number, deltaX?: number, deltaY?: number): void {
|
||||
this.sendFrame({ type: "scroll", x, y, deltaX, deltaY });
|
||||
}
|
||||
|
||||
keyDown(key: string): void {
|
||||
this.sendFrame({ type: "keyDown", key });
|
||||
}
|
||||
|
||||
keyUp(key: string): void {
|
||||
this.sendFrame({ type: "keyUp", key });
|
||||
}
|
||||
|
||||
close(): void {
|
||||
if (this.socket.readyState === WS_READY_STATE_CONNECTING) {
|
||||
this.socket.addEventListener(
|
||||
"open",
|
||||
() => {
|
||||
this.close();
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.socket.readyState === WS_READY_STATE_OPEN) {
|
||||
if (!this.closeSignalSent) {
|
||||
this.closeSignalSent = true;
|
||||
this.sendFrame({ type: "close" });
|
||||
}
|
||||
this.socket.close();
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.socket.readyState !== WS_READY_STATE_CLOSED) {
|
||||
this.socket.close();
|
||||
}
|
||||
}
|
||||
|
||||
private async handleMessage(data: unknown): Promise<void> {
|
||||
try {
|
||||
if (typeof data === "string") {
|
||||
const frame = parseStatusFrame(data);
|
||||
if (!frame) {
|
||||
this.emitError(new Error("Received invalid desktop stream control frame."));
|
||||
return;
|
||||
}
|
||||
|
||||
if (frame.type === "ready") {
|
||||
for (const listener of this.readyListeners) {
|
||||
listener(frame);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
this.emitError(frame);
|
||||
return;
|
||||
}
|
||||
|
||||
const bytes = await decodeBinaryFrame(data);
|
||||
for (const listener of this.frameListeners) {
|
||||
listener(bytes);
|
||||
}
|
||||
} catch (error) {
|
||||
this.emitError(error instanceof Error ? error : new Error(String(error)));
|
||||
}
|
||||
}
|
||||
|
||||
private sendFrame(frame: DesktopStreamClientFrame): void {
|
||||
if (this.socket.readyState !== WS_READY_STATE_OPEN) {
|
||||
return;
|
||||
}
|
||||
this.socket.send(JSON.stringify(frame));
|
||||
}
|
||||
|
||||
private emitError(error: DesktopStreamErrorStatus | Error): void {
|
||||
for (const listener of this.errorListeners) {
|
||||
listener(error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function parseStatusFrame(payload: string): DesktopStreamStatusMessage | null {
|
||||
const value = JSON.parse(payload) as Record<string, unknown>;
|
||||
if (value.type === "ready" && typeof value.width === "number" && typeof value.height === "number") {
|
||||
return {
|
||||
type: "ready",
|
||||
width: value.width,
|
||||
height: value.height,
|
||||
};
|
||||
}
|
||||
if (value.type === "error" && typeof value.message === "string") {
|
||||
return {
|
||||
type: "error",
|
||||
message: value.message,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function decodeBinaryFrame(data: unknown): Promise<Uint8Array> {
|
||||
if (data instanceof ArrayBuffer) {
|
||||
return new Uint8Array(data);
|
||||
}
|
||||
if (ArrayBuffer.isView(data)) {
|
||||
return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
|
||||
}
|
||||
if (typeof Blob !== "undefined" && data instanceof Blob) {
|
||||
return new Uint8Array(await data.arrayBuffer());
|
||||
}
|
||||
throw new Error("Unsupported desktop stream binary frame type.");
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -14,10 +14,18 @@ export {
|
|||
export { AcpRpcError } from "acp-http-client";
|
||||
|
||||
export { buildInspectorUrl } from "./inspector.ts";
|
||||
export { DesktopStreamSession } from "./desktop-stream.ts";
|
||||
export type {
|
||||
DesktopStreamConnectOptions,
|
||||
DesktopStreamErrorStatus,
|
||||
DesktopStreamReadyStatus,
|
||||
DesktopStreamStatusMessage,
|
||||
} from "./desktop-stream.ts";
|
||||
|
||||
export type {
|
||||
SandboxAgentHealthWaitOptions,
|
||||
AgentQueryOptions,
|
||||
DesktopStreamSessionOptions,
|
||||
ProcessLogFollowQuery,
|
||||
ProcessLogListener,
|
||||
ProcessLogSubscription,
|
||||
|
|
@ -50,6 +58,37 @@ export type {
|
|||
AgentInstallRequest,
|
||||
AgentInstallResponse,
|
||||
AgentListResponse,
|
||||
DesktopActionResponse,
|
||||
DesktopDisplayInfoResponse,
|
||||
DesktopErrorInfo,
|
||||
DesktopKeyboardDownRequest,
|
||||
DesktopKeyboardUpRequest,
|
||||
DesktopKeyModifiers,
|
||||
DesktopKeyboardPressRequest,
|
||||
DesktopKeyboardTypeRequest,
|
||||
DesktopMouseButton,
|
||||
DesktopMouseClickRequest,
|
||||
DesktopMouseDownRequest,
|
||||
DesktopMouseDragRequest,
|
||||
DesktopMouseMoveRequest,
|
||||
DesktopMousePositionResponse,
|
||||
DesktopMouseScrollRequest,
|
||||
DesktopMouseUpRequest,
|
||||
DesktopProcessInfo,
|
||||
DesktopRecordingInfo,
|
||||
DesktopRecordingListResponse,
|
||||
DesktopRecordingStartRequest,
|
||||
DesktopRecordingStatus,
|
||||
DesktopRegionScreenshotQuery,
|
||||
DesktopResolution,
|
||||
DesktopScreenshotFormat,
|
||||
DesktopScreenshotQuery,
|
||||
DesktopStartRequest,
|
||||
DesktopState,
|
||||
DesktopStatusResponse,
|
||||
DesktopStreamStatusResponse,
|
||||
DesktopWindowInfo,
|
||||
DesktopWindowListResponse,
|
||||
FsActionResponse,
|
||||
FsDeleteQuery,
|
||||
FsEntriesQuery,
|
||||
|
|
@ -74,10 +113,12 @@ export type {
|
|||
ProcessInfo,
|
||||
ProcessInputRequest,
|
||||
ProcessInputResponse,
|
||||
ProcessListQuery,
|
||||
ProcessListResponse,
|
||||
ProcessLogEntry,
|
||||
ProcessLogsQuery,
|
||||
ProcessLogsResponse,
|
||||
ProcessOwner,
|
||||
ProcessLogsStream,
|
||||
ProcessRunRequest,
|
||||
ProcessRunResponse,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,38 @@ import type { components, operations } from "./generated/openapi.ts";
|
|||
export type ProblemDetails = components["schemas"]["ProblemDetails"];
|
||||
|
||||
export type HealthResponse = JsonResponse<operations["get_v1_health"], 200>;
|
||||
export type DesktopState = components["schemas"]["DesktopState"];
|
||||
export type DesktopResolution = components["schemas"]["DesktopResolution"];
|
||||
export type DesktopErrorInfo = components["schemas"]["DesktopErrorInfo"];
|
||||
export type DesktopProcessInfo = components["schemas"]["DesktopProcessInfo"];
|
||||
export type DesktopStatusResponse = JsonResponse<operations["get_v1_desktop_status"], 200>;
|
||||
export type DesktopStartRequest = JsonRequestBody<operations["post_v1_desktop_start"]>;
|
||||
export type DesktopScreenshotFormat = components["schemas"]["DesktopScreenshotFormat"];
|
||||
export type DesktopScreenshotQuery =
|
||||
QueryParams<operations["get_v1_desktop_screenshot"]> extends never ? Record<string, never> : QueryParams<operations["get_v1_desktop_screenshot"]>;
|
||||
export type DesktopRegionScreenshotQuery = QueryParams<operations["get_v1_desktop_screenshot_region"]>;
|
||||
export type DesktopMousePositionResponse = JsonResponse<operations["get_v1_desktop_mouse_position"], 200>;
|
||||
export type DesktopMouseButton = components["schemas"]["DesktopMouseButton"];
|
||||
export type DesktopMouseMoveRequest = JsonRequestBody<operations["post_v1_desktop_mouse_move"]>;
|
||||
export type DesktopMouseClickRequest = JsonRequestBody<operations["post_v1_desktop_mouse_click"]>;
|
||||
export type DesktopMouseDownRequest = JsonRequestBody<operations["post_v1_desktop_mouse_down"]>;
|
||||
export type DesktopMouseUpRequest = JsonRequestBody<operations["post_v1_desktop_mouse_up"]>;
|
||||
export type DesktopMouseDragRequest = JsonRequestBody<operations["post_v1_desktop_mouse_drag"]>;
|
||||
export type DesktopMouseScrollRequest = JsonRequestBody<operations["post_v1_desktop_mouse_scroll"]>;
|
||||
export type DesktopKeyboardTypeRequest = JsonRequestBody<operations["post_v1_desktop_keyboard_type"]>;
|
||||
export type DesktopKeyModifiers = components["schemas"]["DesktopKeyModifiers"];
|
||||
export type DesktopKeyboardPressRequest = JsonRequestBody<operations["post_v1_desktop_keyboard_press"]>;
|
||||
export type DesktopKeyboardDownRequest = JsonRequestBody<operations["post_v1_desktop_keyboard_down"]>;
|
||||
export type DesktopKeyboardUpRequest = JsonRequestBody<operations["post_v1_desktop_keyboard_up"]>;
|
||||
export type DesktopActionResponse = JsonResponse<operations["post_v1_desktop_keyboard_type"], 200>;
|
||||
export type DesktopDisplayInfoResponse = JsonResponse<operations["get_v1_desktop_display_info"], 200>;
|
||||
export type DesktopWindowInfo = components["schemas"]["DesktopWindowInfo"];
|
||||
export type DesktopWindowListResponse = JsonResponse<operations["get_v1_desktop_windows"], 200>;
|
||||
export type DesktopRecordingStartRequest = JsonRequestBody<operations["post_v1_desktop_recording_start"]>;
|
||||
export type DesktopRecordingStatus = components["schemas"]["DesktopRecordingStatus"];
|
||||
export type DesktopRecordingInfo = JsonResponse<operations["post_v1_desktop_recording_start"], 200>;
|
||||
export type DesktopRecordingListResponse = JsonResponse<operations["get_v1_desktop_recordings"], 200>;
|
||||
export type DesktopStreamStatusResponse = JsonResponse<operations["post_v1_desktop_stream_start"], 200>;
|
||||
export type AgentListResponse = JsonResponse<operations["get_v1_agents"], 200>;
|
||||
export type AgentInfo = components["schemas"]["AgentInfo"];
|
||||
export type AgentQuery = QueryParams<operations["get_v1_agents"]>;
|
||||
|
|
@ -37,11 +69,13 @@ export type ProcessCreateRequest = JsonRequestBody<operations["post_v1_processes
|
|||
export type ProcessInfo = components["schemas"]["ProcessInfo"];
|
||||
export type ProcessInputRequest = JsonRequestBody<operations["post_v1_process_input"]>;
|
||||
export type ProcessInputResponse = JsonResponse<operations["post_v1_process_input"], 200>;
|
||||
export type ProcessListQuery = QueryParams<operations["get_v1_processes"]>;
|
||||
export type ProcessListResponse = JsonResponse<operations["get_v1_processes"], 200>;
|
||||
export type ProcessLogEntry = components["schemas"]["ProcessLogEntry"];
|
||||
export type ProcessLogsQuery = QueryParams<operations["get_v1_process_logs"]>;
|
||||
export type ProcessLogsResponse = JsonResponse<operations["get_v1_process_logs"], 200>;
|
||||
export type ProcessLogsStream = components["schemas"]["ProcessLogsStream"];
|
||||
export type ProcessOwner = components["schemas"]["ProcessOwner"];
|
||||
export type ProcessRunRequest = JsonRequestBody<operations["post_v1_processes_run"]>;
|
||||
export type ProcessRunResponse = JsonResponse<operations["post_v1_processes_run"], 200>;
|
||||
export type ProcessSignalQuery = QueryParams<operations["post_v1_process_stop"]>;
|
||||
|
|
|
|||
244
sdks/typescript/tests/helpers/docker.ts
Normal file
244
sdks/typescript/tests/helpers/docker.ts
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
import { execFileSync } from "node:child_process";
|
||||
import { mkdtempSync, mkdirSync, rmSync } from "node:fs";
|
||||
import { dirname, join, resolve } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const REPO_ROOT = resolve(__dirname, "../../../..");
|
||||
const CONTAINER_PORT = 3000;
|
||||
const DEFAULT_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
|
||||
const DEFAULT_IMAGE_TAG = "sandbox-agent-test:dev";
|
||||
const STANDARD_PATHS = new Set(["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]);
|
||||
|
||||
let cachedImage: string | undefined;
|
||||
let containerCounter = 0;
|
||||
|
||||
export type DockerSandboxAgentHandle = {
|
||||
baseUrl: string;
|
||||
token: string;
|
||||
dispose: () => Promise<void>;
|
||||
};
|
||||
|
||||
export type DockerSandboxAgentOptions = {
|
||||
env?: Record<string, string>;
|
||||
pathMode?: "merge" | "replace";
|
||||
timeoutMs?: number;
|
||||
};
|
||||
|
||||
type TestLayout = {
|
||||
rootDir: string;
|
||||
homeDir: string;
|
||||
xdgDataHome: string;
|
||||
xdgStateHome: string;
|
||||
appDataDir: string;
|
||||
localAppDataDir: string;
|
||||
installDir: string;
|
||||
};
|
||||
|
||||
export function createDockerTestLayout(): TestLayout {
|
||||
const tempRoot = join(REPO_ROOT, ".context", "docker-test-");
|
||||
mkdirSync(resolve(REPO_ROOT, ".context"), { recursive: true });
|
||||
const rootDir = mkdtempSync(tempRoot);
|
||||
const homeDir = join(rootDir, "home");
|
||||
const xdgDataHome = join(rootDir, "xdg-data");
|
||||
const xdgStateHome = join(rootDir, "xdg-state");
|
||||
const appDataDir = join(rootDir, "appdata", "Roaming");
|
||||
const localAppDataDir = join(rootDir, "appdata", "Local");
|
||||
const installDir = join(xdgDataHome, "sandbox-agent", "bin");
|
||||
|
||||
for (const dir of [homeDir, xdgDataHome, xdgStateHome, appDataDir, localAppDataDir, installDir]) {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
|
||||
return {
|
||||
rootDir,
|
||||
homeDir,
|
||||
xdgDataHome,
|
||||
xdgStateHome,
|
||||
appDataDir,
|
||||
localAppDataDir,
|
||||
installDir,
|
||||
};
|
||||
}
|
||||
|
||||
export function disposeDockerTestLayout(layout: TestLayout): void {
|
||||
try {
|
||||
rmSync(layout.rootDir, { recursive: true, force: true });
|
||||
} catch (error) {
|
||||
if (typeof process.getuid === "function" && typeof process.getgid === "function") {
|
||||
try {
|
||||
execFileSync(
|
||||
"docker",
|
||||
[
|
||||
"run",
|
||||
"--rm",
|
||||
"--user",
|
||||
"0:0",
|
||||
"--entrypoint",
|
||||
"sh",
|
||||
"-v",
|
||||
`${layout.rootDir}:${layout.rootDir}`,
|
||||
ensureImage(),
|
||||
"-c",
|
||||
`chown -R ${process.getuid()}:${process.getgid()} '${layout.rootDir}'`,
|
||||
],
|
||||
{ stdio: "pipe" },
|
||||
);
|
||||
rmSync(layout.rootDir, { recursive: true, force: true });
|
||||
return;
|
||||
} catch {}
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export async function startDockerSandboxAgent(layout: TestLayout, options: DockerSandboxAgentOptions = {}): Promise<DockerSandboxAgentHandle> {
|
||||
const image = ensureImage();
|
||||
const containerId = uniqueContainerId();
|
||||
const env = buildEnv(layout, options.env ?? {}, options.pathMode ?? "merge");
|
||||
const mounts = buildMounts(layout.rootDir, env);
|
||||
|
||||
const args = ["run", "-d", "--rm", "--name", containerId, "-p", `127.0.0.1::${CONTAINER_PORT}`];
|
||||
|
||||
if (typeof process.getuid === "function" && typeof process.getgid === "function") {
|
||||
args.push("--user", `${process.getuid()}:${process.getgid()}`);
|
||||
}
|
||||
|
||||
if (process.platform === "linux") {
|
||||
args.push("--add-host", "host.docker.internal:host-gateway");
|
||||
}
|
||||
|
||||
for (const mount of mounts) {
|
||||
args.push("-v", `${mount}:${mount}`);
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(env)) {
|
||||
args.push("-e", `${key}=${value}`);
|
||||
}
|
||||
|
||||
args.push(image, "server", "--host", "0.0.0.0", "--port", String(CONTAINER_PORT), "--no-token");
|
||||
|
||||
execFileSync("docker", args, { stdio: "pipe" });
|
||||
|
||||
try {
|
||||
const mapping = execFileSync("docker", ["port", containerId, `${CONTAINER_PORT}/tcp`], {
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
}).trim();
|
||||
const mappingParts = mapping.split(":");
|
||||
const hostPort = mappingParts[mappingParts.length - 1]?.trim();
|
||||
if (!hostPort) {
|
||||
throw new Error(`missing mapped host port in ${mapping}`);
|
||||
}
|
||||
const baseUrl = `http://127.0.0.1:${hostPort}`;
|
||||
await waitForHealth(baseUrl, options.timeoutMs ?? 30_000);
|
||||
|
||||
return {
|
||||
baseUrl,
|
||||
token: "",
|
||||
dispose: async () => {
|
||||
try {
|
||||
execFileSync("docker", ["rm", "-f", containerId], { stdio: "pipe" });
|
||||
} catch {}
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
try {
|
||||
execFileSync("docker", ["rm", "-f", containerId], { stdio: "pipe" });
|
||||
} catch {}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
function ensureImage(): string {
|
||||
if (cachedImage) {
|
||||
return cachedImage;
|
||||
}
|
||||
|
||||
cachedImage = process.env.SANDBOX_AGENT_TEST_IMAGE ?? DEFAULT_IMAGE_TAG;
|
||||
execFileSync("docker", ["build", "--tag", cachedImage, "--file", resolve(REPO_ROOT, "docker/test-agent/Dockerfile"), REPO_ROOT], {
|
||||
cwd: REPO_ROOT,
|
||||
stdio: ["ignore", "ignore", "pipe"],
|
||||
});
|
||||
return cachedImage;
|
||||
}
|
||||
|
||||
function buildEnv(layout: TestLayout, extraEnv: Record<string, string>, pathMode: "merge" | "replace"): Record<string, string> {
|
||||
const env: Record<string, string> = {
|
||||
HOME: layout.homeDir,
|
||||
USERPROFILE: layout.homeDir,
|
||||
XDG_DATA_HOME: layout.xdgDataHome,
|
||||
XDG_STATE_HOME: layout.xdgStateHome,
|
||||
APPDATA: layout.appDataDir,
|
||||
LOCALAPPDATA: layout.localAppDataDir,
|
||||
PATH: DEFAULT_PATH,
|
||||
};
|
||||
|
||||
const customPathEntries = new Set<string>();
|
||||
for (const entry of (extraEnv.PATH ?? "").split(":")) {
|
||||
if (!entry || entry === DEFAULT_PATH || !entry.startsWith("/")) continue;
|
||||
if (entry.startsWith(layout.rootDir)) {
|
||||
customPathEntries.add(entry);
|
||||
}
|
||||
}
|
||||
if (pathMode === "replace") {
|
||||
env.PATH = extraEnv.PATH ?? "";
|
||||
} else if (customPathEntries.size > 0) {
|
||||
env.PATH = `${Array.from(customPathEntries).join(":")}:${DEFAULT_PATH}`;
|
||||
}
|
||||
|
||||
for (const [key, value] of Object.entries(extraEnv)) {
|
||||
if (key === "PATH") {
|
||||
continue;
|
||||
}
|
||||
env[key] = rewriteLocalhostUrl(key, value);
|
||||
}
|
||||
|
||||
return env;
|
||||
}
|
||||
|
||||
function buildMounts(rootDir: string, env: Record<string, string>): string[] {
|
||||
const mounts = new Set<string>([rootDir]);
|
||||
|
||||
for (const key of ["HOME", "USERPROFILE", "XDG_DATA_HOME", "XDG_STATE_HOME", "APPDATA", "LOCALAPPDATA", "SANDBOX_AGENT_DESKTOP_FAKE_STATE_DIR"]) {
|
||||
const value = env[key];
|
||||
if (value?.startsWith("/")) {
|
||||
mounts.add(value);
|
||||
}
|
||||
}
|
||||
|
||||
for (const entry of (env.PATH ?? "").split(":")) {
|
||||
if (entry.startsWith("/") && !STANDARD_PATHS.has(entry)) {
|
||||
mounts.add(entry);
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(mounts);
|
||||
}
|
||||
|
||||
async function waitForHealth(baseUrl: string, timeoutMs: number): Promise<void> {
|
||||
const started = Date.now();
|
||||
while (Date.now() - started < timeoutMs) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/v1/health`);
|
||||
if (response.ok) {
|
||||
return;
|
||||
}
|
||||
} catch {}
|
||||
await new Promise((resolve) => setTimeout(resolve, 200));
|
||||
}
|
||||
|
||||
throw new Error(`timed out waiting for sandbox-agent health at ${baseUrl}`);
|
||||
}
|
||||
|
||||
function uniqueContainerId(): string {
|
||||
containerCounter += 1;
|
||||
return `sandbox-agent-ts-${process.pid}-${Date.now().toString(36)}-${containerCounter.toString(36)}`;
|
||||
}
|
||||
|
||||
function rewriteLocalhostUrl(key: string, value: string): string {
|
||||
if (key.endsWith("_URL") || key.endsWith("_URI")) {
|
||||
return value.replace("http://127.0.0.1", "http://host.docker.internal").replace("http://localhost", "http://host.docker.internal");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
|
@ -1,9 +1,6 @@
|
|||
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
||||
import { existsSync } from "node:fs";
|
||||
import { mkdtempSync, rmSync } from "node:fs";
|
||||
import { dirname, resolve } from "node:path";
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { tmpdir } from "node:os";
|
||||
import {
|
||||
InMemorySessionPersistDriver,
|
||||
|
|
@ -14,36 +11,11 @@ import {
|
|||
type SessionPersistDriver,
|
||||
type SessionRecord,
|
||||
} from "../src/index.ts";
|
||||
import { spawnSandboxAgent, isNodeRuntime, type SandboxAgentSpawnHandle } from "../src/spawn.ts";
|
||||
import { isNodeRuntime } from "../src/spawn.ts";
|
||||
import { createDockerTestLayout, disposeDockerTestLayout, startDockerSandboxAgent, type DockerSandboxAgentHandle } from "./helpers/docker.ts";
|
||||
import { prepareMockAgentDataHome } from "./helpers/mock-agent.ts";
|
||||
import WebSocket from "ws";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
function findBinary(): string | null {
|
||||
if (process.env.SANDBOX_AGENT_BIN) {
|
||||
return process.env.SANDBOX_AGENT_BIN;
|
||||
}
|
||||
|
||||
const cargoPaths = [resolve(__dirname, "../../../target/debug/sandbox-agent"), resolve(__dirname, "../../../target/release/sandbox-agent")];
|
||||
|
||||
for (const p of cargoPaths) {
|
||||
if (existsSync(p)) {
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const BINARY_PATH = findBinary();
|
||||
if (!BINARY_PATH) {
|
||||
throw new Error("sandbox-agent binary not found. Build it (cargo build -p sandbox-agent) or set SANDBOX_AGENT_BIN.");
|
||||
}
|
||||
if (!process.env.SANDBOX_AGENT_BIN) {
|
||||
process.env.SANDBOX_AGENT_BIN = BINARY_PATH;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
|
@ -110,6 +82,15 @@ async function waitForAsync<T>(fn: () => Promise<T | undefined | null>, timeoutM
|
|||
throw new Error("timed out waiting for condition");
|
||||
}
|
||||
|
||||
async function withTimeout<T>(promise: Promise<T>, label: string, timeoutMs = 15_000): Promise<T> {
|
||||
return await Promise.race([
|
||||
promise,
|
||||
sleep(timeoutMs).then(() => {
|
||||
throw new Error(`${label} timed out after ${timeoutMs}ms`);
|
||||
}),
|
||||
]);
|
||||
}
|
||||
|
||||
function buildTarArchive(entries: Array<{ name: string; content: string }>): Uint8Array {
|
||||
const blocks: Buffer[] = [];
|
||||
|
||||
|
|
@ -174,34 +155,77 @@ function decodeProcessLogData(data: string, encoding: string): string {
|
|||
|
||||
function nodeCommand(source: string): { command: string; args: string[] } {
|
||||
return {
|
||||
command: process.execPath,
|
||||
command: "node",
|
||||
args: ["-e", source],
|
||||
};
|
||||
}
|
||||
|
||||
function forwardRequest(defaultFetch: typeof fetch, baseUrl: string, outgoing: Request, parsed: URL): Promise<Response> {
|
||||
const forwardedInit: RequestInit & { duplex?: "half" } = {
|
||||
method: outgoing.method,
|
||||
headers: new Headers(outgoing.headers),
|
||||
signal: outgoing.signal,
|
||||
};
|
||||
|
||||
if (outgoing.method !== "GET" && outgoing.method !== "HEAD") {
|
||||
forwardedInit.body = outgoing.body;
|
||||
forwardedInit.duplex = "half";
|
||||
}
|
||||
|
||||
const forwardedUrl = new URL(`${parsed.pathname}${parsed.search}`, baseUrl);
|
||||
return defaultFetch(forwardedUrl, forwardedInit);
|
||||
}
|
||||
|
||||
async function launchDesktopFocusWindow(sdk: SandboxAgent, display: string): Promise<string> {
|
||||
const windowProcess = await sdk.createProcess({
|
||||
command: "xterm",
|
||||
args: ["-geometry", "80x24+40+40", "-title", "Sandbox Desktop Test", "-e", "sh", "-lc", "sleep 60"],
|
||||
env: { DISPLAY: display },
|
||||
});
|
||||
|
||||
await waitForAsync(
|
||||
async () => {
|
||||
const result = await sdk.runProcess({
|
||||
command: "sh",
|
||||
args: [
|
||||
"-lc",
|
||||
'wid="$(xdotool search --onlyvisible --name \'Sandbox Desktop Test\' 2>/dev/null | head -n 1 || true)"; if [ -z "$wid" ]; then exit 3; fi; xdotool windowactivate "$wid"',
|
||||
],
|
||||
env: { DISPLAY: display },
|
||||
timeoutMs: 5_000,
|
||||
});
|
||||
|
||||
return result.exitCode === 0 ? true : undefined;
|
||||
},
|
||||
10_000,
|
||||
200,
|
||||
);
|
||||
|
||||
return windowProcess.id;
|
||||
}
|
||||
|
||||
describe("Integration: TypeScript SDK flat session API", () => {
|
||||
let handle: SandboxAgentSpawnHandle;
|
||||
let handle: DockerSandboxAgentHandle;
|
||||
let baseUrl: string;
|
||||
let token: string;
|
||||
let dataHome: string;
|
||||
let layout: ReturnType<typeof createDockerTestLayout>;
|
||||
|
||||
beforeAll(async () => {
|
||||
dataHome = mkdtempSync(join(tmpdir(), "sdk-integration-"));
|
||||
const agentEnv = prepareMockAgentDataHome(dataHome);
|
||||
beforeEach(async () => {
|
||||
layout = createDockerTestLayout();
|
||||
prepareMockAgentDataHome(layout.xdgDataHome);
|
||||
|
||||
handle = await spawnSandboxAgent({
|
||||
enabled: true,
|
||||
log: "silent",
|
||||
handle = await startDockerSandboxAgent(layout, {
|
||||
timeoutMs: 30000,
|
||||
env: agentEnv,
|
||||
});
|
||||
baseUrl = handle.baseUrl;
|
||||
token = handle.token;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await handle.dispose();
|
||||
rmSync(dataHome, { recursive: true, force: true });
|
||||
afterEach(async () => {
|
||||
await handle?.dispose?.();
|
||||
if (layout) {
|
||||
disposeDockerTestLayout(layout);
|
||||
}
|
||||
});
|
||||
|
||||
it("detects Node.js runtime", () => {
|
||||
|
|
@ -280,11 +304,12 @@ describe("Integration: TypeScript SDK flat session API", () => {
|
|||
token,
|
||||
});
|
||||
|
||||
const directory = mkdtempSync(join(tmpdir(), "sdk-fs-"));
|
||||
const directory = join(layout.rootDir, "fs-test");
|
||||
const nestedDir = join(directory, "nested");
|
||||
const filePath = join(directory, "notes.txt");
|
||||
const movedPath = join(directory, "notes-moved.txt");
|
||||
const uploadDir = join(directory, "uploaded");
|
||||
mkdirSync(directory, { recursive: true });
|
||||
|
||||
try {
|
||||
const listedAgents = await sdk.listAgents({ config: true, noCache: true });
|
||||
|
|
@ -341,25 +366,30 @@ describe("Integration: TypeScript SDK flat session API", () => {
|
|||
const parsed = new URL(outgoing.url);
|
||||
seenPaths.push(parsed.pathname);
|
||||
|
||||
const forwardedUrl = new URL(`${parsed.pathname}${parsed.search}`, baseUrl);
|
||||
const forwarded = new Request(forwardedUrl.toString(), outgoing);
|
||||
return defaultFetch(forwarded);
|
||||
return forwardRequest(defaultFetch, baseUrl, outgoing, parsed);
|
||||
};
|
||||
|
||||
const sdk = await SandboxAgent.connect({
|
||||
token,
|
||||
fetch: customFetch,
|
||||
});
|
||||
let sessionId: string | undefined;
|
||||
|
||||
await sdk.getHealth();
|
||||
const session = await sdk.createSession({ agent: "mock" });
|
||||
const prompt = await session.prompt([{ type: "text", text: "custom fetch integration test" }]);
|
||||
expect(prompt.stopReason).toBe("end_turn");
|
||||
try {
|
||||
await withTimeout(sdk.getHealth(), "custom fetch getHealth");
|
||||
const session = await withTimeout(sdk.createSession({ agent: "mock" }), "custom fetch createSession");
|
||||
sessionId = session.id;
|
||||
expect(session.agent).toBe("mock");
|
||||
await withTimeout(sdk.destroySession(session.id), "custom fetch destroySession");
|
||||
|
||||
expect(seenPaths).toContain("/v1/health");
|
||||
expect(seenPaths.some((path) => path.startsWith("/v1/acp/"))).toBe(true);
|
||||
|
||||
await sdk.dispose();
|
||||
expect(seenPaths).toContain("/v1/health");
|
||||
expect(seenPaths.some((path) => path.startsWith("/v1/acp/"))).toBe(true);
|
||||
} finally {
|
||||
if (sessionId) {
|
||||
await sdk.destroySession(sessionId).catch(() => {});
|
||||
}
|
||||
await withTimeout(sdk.dispose(), "custom fetch dispose");
|
||||
}
|
||||
}, 60_000);
|
||||
|
||||
it("requires baseUrl when fetch is not provided", async () => {
|
||||
|
|
@ -386,9 +416,7 @@ describe("Integration: TypeScript SDK flat session API", () => {
|
|||
}
|
||||
}
|
||||
|
||||
const forwardedUrl = new URL(`${parsed.pathname}${parsed.search}`, baseUrl);
|
||||
const forwarded = new Request(forwardedUrl.toString(), outgoing);
|
||||
return defaultFetch(forwarded);
|
||||
return forwardRequest(defaultFetch, baseUrl, outgoing, parsed);
|
||||
};
|
||||
|
||||
const sdk = await SandboxAgent.connect({
|
||||
|
|
@ -710,7 +738,9 @@ describe("Integration: TypeScript SDK flat session API", () => {
|
|||
token,
|
||||
});
|
||||
|
||||
const directory = mkdtempSync(join(tmpdir(), "sdk-config-"));
|
||||
const directory = join(layout.rootDir, "config-test");
|
||||
|
||||
mkdirSync(directory, { recursive: true });
|
||||
|
||||
const mcpConfig = {
|
||||
type: "local" as const,
|
||||
|
|
@ -957,4 +987,98 @@ describe("Integration: TypeScript SDK flat session API", () => {
|
|||
await sdk.dispose();
|
||||
}
|
||||
});
|
||||
|
||||
it("covers desktop status, screenshot, display, mouse, and keyboard helpers", async () => {
|
||||
const sdk = await SandboxAgent.connect({
|
||||
baseUrl,
|
||||
token,
|
||||
});
|
||||
let focusWindowProcessId: string | undefined;
|
||||
|
||||
try {
|
||||
const initialStatus = await sdk.getDesktopStatus();
|
||||
expect(initialStatus.state).toBe("inactive");
|
||||
|
||||
const started = await sdk.startDesktop({
|
||||
width: 1440,
|
||||
height: 900,
|
||||
dpi: 96,
|
||||
});
|
||||
expect(started.state).toBe("active");
|
||||
expect(started.display?.startsWith(":")).toBe(true);
|
||||
expect(started.missingDependencies).toEqual([]);
|
||||
|
||||
const displayInfo = await sdk.getDesktopDisplayInfo();
|
||||
expect(displayInfo.display).toBe(started.display);
|
||||
expect(displayInfo.resolution.width).toBe(1440);
|
||||
expect(displayInfo.resolution.height).toBe(900);
|
||||
|
||||
const screenshot = await sdk.takeDesktopScreenshot();
|
||||
expect(Buffer.from(screenshot.subarray(0, 8)).equals(Buffer.from("\x89PNG\r\n\x1a\n", "binary"))).toBe(true);
|
||||
|
||||
const region = await sdk.takeDesktopRegionScreenshot({
|
||||
x: 10,
|
||||
y: 20,
|
||||
width: 40,
|
||||
height: 50,
|
||||
});
|
||||
expect(Buffer.from(region.subarray(0, 8)).equals(Buffer.from("\x89PNG\r\n\x1a\n", "binary"))).toBe(true);
|
||||
|
||||
const moved = await sdk.moveDesktopMouse({ x: 40, y: 50 });
|
||||
expect(moved.x).toBe(40);
|
||||
expect(moved.y).toBe(50);
|
||||
|
||||
const dragged = await sdk.dragDesktopMouse({
|
||||
startX: 40,
|
||||
startY: 50,
|
||||
endX: 80,
|
||||
endY: 90,
|
||||
button: "left",
|
||||
});
|
||||
expect(dragged.x).toBe(80);
|
||||
expect(dragged.y).toBe(90);
|
||||
|
||||
const clicked = await sdk.clickDesktop({
|
||||
x: 80,
|
||||
y: 90,
|
||||
button: "left",
|
||||
clickCount: 1,
|
||||
});
|
||||
expect(clicked.x).toBe(80);
|
||||
expect(clicked.y).toBe(90);
|
||||
|
||||
const scrolled = await sdk.scrollDesktop({
|
||||
x: 80,
|
||||
y: 90,
|
||||
deltaY: -2,
|
||||
});
|
||||
expect(scrolled.x).toBe(80);
|
||||
expect(scrolled.y).toBe(90);
|
||||
|
||||
const position = await sdk.getDesktopMousePosition();
|
||||
expect(position.x).toBe(80);
|
||||
expect(position.y).toBe(90);
|
||||
|
||||
focusWindowProcessId = await launchDesktopFocusWindow(sdk, started.display!);
|
||||
|
||||
const typed = await sdk.typeDesktopText({
|
||||
text: "hello desktop",
|
||||
delayMs: 5,
|
||||
});
|
||||
expect(typed.ok).toBe(true);
|
||||
|
||||
const pressed = await sdk.pressDesktopKey({ key: "ctrl+l" });
|
||||
expect(pressed.ok).toBe(true);
|
||||
|
||||
const stopped = await sdk.stopDesktop();
|
||||
expect(stopped.state).toBe("inactive");
|
||||
} finally {
|
||||
if (focusWindowProcessId) {
|
||||
await sdk.killProcess(focusWindowProcessId, { waitMs: 5_000 }).catch(() => {});
|
||||
await sdk.deleteProcess(focusWindowProcessId).catch(() => {});
|
||||
}
|
||||
await sdk.stopDesktop().catch(() => {});
|
||||
await sdk.dispose();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ export default defineConfig({
|
|||
test: {
|
||||
include: ["tests/**/*.test.ts"],
|
||||
testTimeout: 30000,
|
||||
teardownTimeout: 10000,
|
||||
pool: "forks",
|
||||
hookTimeout: 120000,
|
||||
},
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue