mirror of
https://github.com/harivansh-afk/sandbox-agent.git
synced 2026-04-19 11:03:48 +00:00
feat: replace neko with native GStreamer WebRTC for desktop streaming
Replace the neko binary dependency with a native GStreamer pipeline (ximagesrc -> vp8enc -> webrtcbin) for desktop video streaming. This removes the external neko process and integrates screen capture directly via gstreamer-rs crate bindings behind a `desktop-gstreamer` feature flag. Key changes: - Add desktop_gstreamer.rs with GStreamer WebRTC pipeline management - Rewrite signaling protocol (ready/offer/answer/candidate over WS) - Add leaky queues and videorate for low-latency streaming - Rewrite ICE candidates to 127.0.0.1 for Docker connectivity - Constrain UDP port range (30000-30100) via libnice agent - Update TypeScript SDK desktop-stream.ts for new signaling - Update inspector DesktopTab with WebRTC Live View - Update Dockerfiles to install GStreamer dev packages Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
e638148345
commit
944ad1ba55
22 changed files with 1848 additions and 1170 deletions
|
|
@ -23,10 +23,7 @@ import {
|
|||
type SetSessionModeRequest,
|
||||
} from "acp-http-client";
|
||||
import type { SandboxAgentSpawnHandle, SandboxAgentSpawnOptions } from "./spawn.ts";
|
||||
import {
|
||||
DesktopStreamSession,
|
||||
type DesktopStreamConnectOptions,
|
||||
} from "./desktop-stream.ts";
|
||||
import { DesktopStreamSession, type DesktopStreamConnectOptions } from "./desktop-stream.ts";
|
||||
import {
|
||||
type AcpServerListResponse,
|
||||
type AgentInfo,
|
||||
|
|
@ -1530,9 +1527,7 @@ export class SandboxAgent {
|
|||
return this.requestJson("GET", `${API_PREFIX}/desktop/windows`);
|
||||
}
|
||||
|
||||
async startDesktopRecording(
|
||||
request: DesktopRecordingStartRequest = {},
|
||||
): Promise<DesktopRecordingInfo> {
|
||||
async startDesktopRecording(request: DesktopRecordingStartRequest = {}): Promise<DesktopRecordingInfo> {
|
||||
return this.requestJson("POST", `${API_PREFIX}/desktop/recording/start`, {
|
||||
body: request,
|
||||
});
|
||||
|
|
@ -1551,13 +1546,9 @@ export class SandboxAgent {
|
|||
}
|
||||
|
||||
async downloadDesktopRecording(id: string): Promise<Uint8Array> {
|
||||
const response = await this.requestRaw(
|
||||
"GET",
|
||||
`${API_PREFIX}/desktop/recordings/${encodeURIComponent(id)}/download`,
|
||||
{
|
||||
accept: "video/mp4",
|
||||
},
|
||||
);
|
||||
const response = await this.requestRaw("GET", `${API_PREFIX}/desktop/recordings/${encodeURIComponent(id)}/download`, {
|
||||
accept: "video/mp4",
|
||||
});
|
||||
const buffer = await response.arrayBuffer();
|
||||
return new Uint8Array(buffer);
|
||||
}
|
||||
|
|
@ -1799,7 +1790,7 @@ export class SandboxAgent {
|
|||
|
||||
buildDesktopStreamWebSocketUrl(options: ProcessTerminalWebSocketUrlOptions = {}): string {
|
||||
return toWebSocketUrl(
|
||||
this.buildUrl(`${API_PREFIX}/desktop/stream/ws`, {
|
||||
this.buildUrl(`${API_PREFIX}/desktop/stream/signaling`, {
|
||||
access_token: options.accessToken ?? this.token,
|
||||
}),
|
||||
);
|
||||
|
|
@ -1820,7 +1811,7 @@ export class SandboxAgent {
|
|||
}
|
||||
|
||||
connectDesktopStream(options: DesktopStreamSessionOptions = {}): DesktopStreamSession {
|
||||
return new DesktopStreamSession(this.connectDesktopStreamWebSocket(options));
|
||||
return new DesktopStreamSession(this.connectDesktopStreamWebSocket(options), options);
|
||||
}
|
||||
|
||||
private async getLiveConnection(agent: string): Promise<LiveAcpConnection> {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import type { DesktopMouseButton } from "./types.ts";
|
||||
|
||||
const WS_READY_STATE_CONNECTING = 0;
|
||||
const WS_READY_STATE_OPEN = 1;
|
||||
const WS_READY_STATE_CLOSED = 3;
|
||||
|
||||
|
|
@ -21,63 +20,140 @@ export interface DesktopStreamConnectOptions {
|
|||
accessToken?: string;
|
||||
WebSocket?: typeof WebSocket;
|
||||
protocols?: string | string[];
|
||||
RTCPeerConnection?: typeof RTCPeerConnection;
|
||||
rtcConfig?: RTCConfiguration;
|
||||
}
|
||||
|
||||
type DesktopStreamClientFrame =
|
||||
| {
|
||||
type: "moveMouse";
|
||||
x: number;
|
||||
y: number;
|
||||
}
|
||||
| {
|
||||
type: "mouseDown" | "mouseUp";
|
||||
x?: number;
|
||||
y?: number;
|
||||
button?: DesktopMouseButton;
|
||||
}
|
||||
| {
|
||||
type: "scroll";
|
||||
x: number;
|
||||
y: number;
|
||||
deltaX?: number;
|
||||
deltaY?: number;
|
||||
}
|
||||
| {
|
||||
type: "keyDown" | "keyUp";
|
||||
key: string;
|
||||
}
|
||||
| {
|
||||
type: "close";
|
||||
};
|
||||
/**
|
||||
* Data channel binary input protocol (Big Endian).
|
||||
*
|
||||
* Byte 0: opcode
|
||||
* 0x01 = mouse_move (bytes 1-2: u16 BE x, bytes 3-4: u16 BE y)
|
||||
* 0x02 = mouse_down (byte 1: u8 button)
|
||||
* 0x03 = mouse_up (byte 1: u8 button)
|
||||
* 0x04 = mouse_scroll (bytes 1-2: i16 BE dx, bytes 3-4: i16 BE dy)
|
||||
* 0x05 = key_down (bytes 1-4: u32 BE keysym)
|
||||
* 0x06 = key_up (bytes 1-4: u32 BE keysym)
|
||||
*/
|
||||
const OP_MOUSE_MOVE = 0x01;
|
||||
const OP_MOUSE_DOWN = 0x02;
|
||||
const OP_MOUSE_UP = 0x03;
|
||||
const OP_MOUSE_SCROLL = 0x04;
|
||||
const OP_KEY_DOWN = 0x05;
|
||||
const OP_KEY_UP = 0x06;
|
||||
|
||||
function mouseButtonToX11(button?: DesktopMouseButton): number {
|
||||
switch (button) {
|
||||
case "middle":
|
||||
return 2;
|
||||
case "right":
|
||||
return 3;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
function keyToX11Keysym(key: string): number {
|
||||
if (key.length === 1) {
|
||||
const cp = key.charCodeAt(0);
|
||||
if (cp >= 0x20 && cp <= 0x7e) return cp;
|
||||
return 0x01000000 + cp;
|
||||
}
|
||||
|
||||
const map: Record<string, number> = {
|
||||
Backspace: 0xff08,
|
||||
Tab: 0xff09,
|
||||
Return: 0xff0d,
|
||||
Enter: 0xff0d,
|
||||
Escape: 0xff1b,
|
||||
Delete: 0xffff,
|
||||
Home: 0xff50,
|
||||
Left: 0xff51,
|
||||
ArrowLeft: 0xff51,
|
||||
Up: 0xff52,
|
||||
ArrowUp: 0xff52,
|
||||
Right: 0xff53,
|
||||
ArrowRight: 0xff53,
|
||||
Down: 0xff54,
|
||||
ArrowDown: 0xff54,
|
||||
PageUp: 0xff55,
|
||||
PageDown: 0xff56,
|
||||
End: 0xff57,
|
||||
Insert: 0xff63,
|
||||
F1: 0xffbe,
|
||||
F2: 0xffbf,
|
||||
F3: 0xffc0,
|
||||
F4: 0xffc1,
|
||||
F5: 0xffc2,
|
||||
F6: 0xffc3,
|
||||
F7: 0xffc4,
|
||||
F8: 0xffc5,
|
||||
F9: 0xffc6,
|
||||
F10: 0xffc7,
|
||||
F11: 0xffc8,
|
||||
F12: 0xffc9,
|
||||
Shift: 0xffe1,
|
||||
ShiftLeft: 0xffe1,
|
||||
ShiftRight: 0xffe2,
|
||||
Control: 0xffe3,
|
||||
ControlLeft: 0xffe3,
|
||||
ControlRight: 0xffe4,
|
||||
Alt: 0xffe9,
|
||||
AltLeft: 0xffe9,
|
||||
AltRight: 0xffea,
|
||||
Meta: 0xffeb,
|
||||
MetaLeft: 0xffeb,
|
||||
MetaRight: 0xffec,
|
||||
CapsLock: 0xffe5,
|
||||
NumLock: 0xff7f,
|
||||
ScrollLock: 0xff14,
|
||||
" ": 0x0020,
|
||||
Space: 0x0020,
|
||||
};
|
||||
|
||||
return map[key] ?? 0;
|
||||
}
|
||||
|
||||
export class DesktopStreamSession {
|
||||
readonly socket: WebSocket;
|
||||
readonly closed: Promise<void>;
|
||||
|
||||
private pc: RTCPeerConnection | null = null;
|
||||
private dataChannel: RTCDataChannel | null = null;
|
||||
private mediaStream: MediaStream | null = null;
|
||||
private connected = false;
|
||||
private pendingCandidates: Record<string, unknown>[] = [];
|
||||
private cachedReadyStatus: DesktopStreamReadyStatus | null = null;
|
||||
|
||||
private readonly readyListeners = new Set<(status: DesktopStreamReadyStatus) => void>();
|
||||
private readonly frameListeners = new Set<(frame: Uint8Array) => void>();
|
||||
private readonly trackListeners = new Set<(stream: MediaStream) => void>();
|
||||
private readonly connectListeners = new Set<() => void>();
|
||||
private readonly disconnectListeners = new Set<() => void>();
|
||||
private readonly errorListeners = new Set<(error: DesktopStreamErrorStatus | Error) => void>();
|
||||
private readonly closeListeners = new Set<() => void>();
|
||||
|
||||
private closeSignalSent = false;
|
||||
private closedResolve!: () => void;
|
||||
private readonly PeerConnection: typeof RTCPeerConnection;
|
||||
private readonly rtcConfig: RTCConfiguration;
|
||||
|
||||
constructor(socket: WebSocket) {
|
||||
constructor(socket: WebSocket, options: DesktopStreamConnectOptions = {}) {
|
||||
this.socket = socket;
|
||||
this.socket.binaryType = "arraybuffer";
|
||||
this.PeerConnection = options.RTCPeerConnection ?? globalThis.RTCPeerConnection;
|
||||
this.rtcConfig = options.rtcConfig ?? {};
|
||||
|
||||
this.closed = new Promise<void>((resolve) => {
|
||||
this.closedResolve = resolve;
|
||||
});
|
||||
|
||||
this.socket.addEventListener("message", (event) => {
|
||||
void this.handleMessage(event.data);
|
||||
this.handleMessage(event.data as string);
|
||||
});
|
||||
this.socket.addEventListener("error", () => {
|
||||
this.emitError(new Error("Desktop stream websocket connection failed."));
|
||||
this.emitError(new Error("Desktop stream signaling connection failed."));
|
||||
});
|
||||
this.socket.addEventListener("close", () => {
|
||||
this.teardownPeerConnection();
|
||||
this.closedResolve();
|
||||
for (const listener of this.closeListeners) {
|
||||
for (const listener of this.disconnectListeners) {
|
||||
listener();
|
||||
}
|
||||
});
|
||||
|
|
@ -85,15 +161,35 @@ export class DesktopStreamSession {
|
|||
|
||||
onReady(listener: (status: DesktopStreamReadyStatus) => void): () => void {
|
||||
this.readyListeners.add(listener);
|
||||
if (this.cachedReadyStatus) {
|
||||
listener(this.cachedReadyStatus);
|
||||
}
|
||||
return () => {
|
||||
this.readyListeners.delete(listener);
|
||||
};
|
||||
}
|
||||
|
||||
onFrame(listener: (frame: Uint8Array) => void): () => void {
|
||||
this.frameListeners.add(listener);
|
||||
onTrack(listener: (stream: MediaStream) => void): () => void {
|
||||
this.trackListeners.add(listener);
|
||||
if (this.mediaStream) {
|
||||
listener(this.mediaStream);
|
||||
}
|
||||
return () => {
|
||||
this.frameListeners.delete(listener);
|
||||
this.trackListeners.delete(listener);
|
||||
};
|
||||
}
|
||||
|
||||
onConnect(listener: () => void): () => void {
|
||||
this.connectListeners.add(listener);
|
||||
return () => {
|
||||
this.connectListeners.delete(listener);
|
||||
};
|
||||
}
|
||||
|
||||
onDisconnect(listener: () => void): () => void {
|
||||
this.disconnectListeners.add(listener);
|
||||
return () => {
|
||||
this.disconnectListeners.delete(listener);
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -104,97 +200,313 @@ export class DesktopStreamSession {
|
|||
};
|
||||
}
|
||||
|
||||
/** @deprecated Use onDisconnect instead. */
|
||||
onClose(listener: () => void): () => void {
|
||||
this.closeListeners.add(listener);
|
||||
return () => {
|
||||
this.closeListeners.delete(listener);
|
||||
};
|
||||
return this.onDisconnect(listener);
|
||||
}
|
||||
|
||||
/** @deprecated No longer emits JPEG frames. Use onTrack for WebRTC media. */
|
||||
onFrame(_listener: (frame: Uint8Array) => void): () => void {
|
||||
return () => {};
|
||||
}
|
||||
|
||||
getMediaStream(): MediaStream | null {
|
||||
return this.mediaStream;
|
||||
}
|
||||
|
||||
moveMouse(x: number, y: number): void {
|
||||
this.sendFrame({ type: "moveMouse", x, y });
|
||||
if (this.dataChannel?.readyState === "open") {
|
||||
const buf = new ArrayBuffer(5);
|
||||
const view = new DataView(buf);
|
||||
view.setUint8(0, OP_MOUSE_MOVE);
|
||||
view.setUint16(1, x, false);
|
||||
view.setUint16(3, y, false);
|
||||
this.dataChannel.send(buf);
|
||||
} else {
|
||||
this.sendSignaling("moveMouse", { x, y });
|
||||
}
|
||||
}
|
||||
|
||||
mouseDown(button?: DesktopMouseButton, x?: number, y?: number): void {
|
||||
this.sendFrame({ type: "mouseDown", button, x, y });
|
||||
if (x != null && y != null) {
|
||||
this.moveMouse(x, y);
|
||||
}
|
||||
if (this.dataChannel?.readyState === "open") {
|
||||
const buf = new ArrayBuffer(2);
|
||||
const view = new DataView(buf);
|
||||
view.setUint8(0, OP_MOUSE_DOWN);
|
||||
view.setUint8(1, mouseButtonToX11(button));
|
||||
this.dataChannel.send(buf);
|
||||
} else {
|
||||
this.sendSignaling("mouseDown", { button, x, y });
|
||||
}
|
||||
}
|
||||
|
||||
mouseUp(button?: DesktopMouseButton, x?: number, y?: number): void {
|
||||
this.sendFrame({ type: "mouseUp", button, x, y });
|
||||
if (x != null && y != null) {
|
||||
this.moveMouse(x, y);
|
||||
}
|
||||
if (this.dataChannel?.readyState === "open") {
|
||||
const buf = new ArrayBuffer(2);
|
||||
const view = new DataView(buf);
|
||||
view.setUint8(0, OP_MOUSE_UP);
|
||||
view.setUint8(1, mouseButtonToX11(button));
|
||||
this.dataChannel.send(buf);
|
||||
} else {
|
||||
this.sendSignaling("mouseUp", { button, x, y });
|
||||
}
|
||||
}
|
||||
|
||||
scroll(x: number, y: number, deltaX?: number, deltaY?: number): void {
|
||||
this.sendFrame({ type: "scroll", x, y, deltaX, deltaY });
|
||||
this.moveMouse(x, y);
|
||||
if (this.dataChannel?.readyState === "open") {
|
||||
const buf = new ArrayBuffer(5);
|
||||
const view = new DataView(buf);
|
||||
view.setUint8(0, OP_MOUSE_SCROLL);
|
||||
view.setInt16(1, deltaX ?? 0, false);
|
||||
view.setInt16(3, deltaY ?? 0, false);
|
||||
this.dataChannel.send(buf);
|
||||
} else {
|
||||
this.sendSignaling("scroll", { x, y, deltaX, deltaY });
|
||||
}
|
||||
}
|
||||
|
||||
keyDown(key: string): void {
|
||||
this.sendFrame({ type: "keyDown", key });
|
||||
const keysym = keyToX11Keysym(key);
|
||||
if (keysym === 0) return;
|
||||
if (this.dataChannel?.readyState === "open") {
|
||||
const buf = new ArrayBuffer(5);
|
||||
const view = new DataView(buf);
|
||||
view.setUint8(0, OP_KEY_DOWN);
|
||||
view.setUint32(1, keysym, false);
|
||||
this.dataChannel.send(buf);
|
||||
} else {
|
||||
this.sendSignaling("keyDown", { key });
|
||||
}
|
||||
}
|
||||
|
||||
keyUp(key: string): void {
|
||||
this.sendFrame({ type: "keyUp", key });
|
||||
const keysym = keyToX11Keysym(key);
|
||||
if (keysym === 0) return;
|
||||
if (this.dataChannel?.readyState === "open") {
|
||||
const buf = new ArrayBuffer(5);
|
||||
const view = new DataView(buf);
|
||||
view.setUint8(0, OP_KEY_UP);
|
||||
view.setUint32(1, keysym, false);
|
||||
this.dataChannel.send(buf);
|
||||
} else {
|
||||
this.sendSignaling("keyUp", { key });
|
||||
}
|
||||
}
|
||||
|
||||
close(): void {
|
||||
if (this.socket.readyState === WS_READY_STATE_CONNECTING) {
|
||||
this.socket.addEventListener(
|
||||
"open",
|
||||
() => {
|
||||
this.close();
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.socket.readyState === WS_READY_STATE_OPEN) {
|
||||
if (!this.closeSignalSent) {
|
||||
this.closeSignalSent = true;
|
||||
this.sendFrame({ type: "close" });
|
||||
}
|
||||
this.socket.close();
|
||||
return;
|
||||
}
|
||||
|
||||
this.teardownPeerConnection();
|
||||
if (this.socket.readyState !== WS_READY_STATE_CLOSED) {
|
||||
this.socket.close();
|
||||
}
|
||||
}
|
||||
|
||||
private async handleMessage(data: unknown): Promise<void> {
|
||||
private handleMessage(data: string): void {
|
||||
let msg: Record<string, unknown>;
|
||||
try {
|
||||
if (typeof data === "string") {
|
||||
const frame = parseStatusFrame(data);
|
||||
if (!frame) {
|
||||
this.emitError(new Error("Received invalid desktop stream control frame."));
|
||||
return;
|
||||
}
|
||||
msg = JSON.parse(data) as Record<string, unknown>;
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
if (frame.type === "ready") {
|
||||
for (const listener of this.readyListeners) {
|
||||
listener(frame);
|
||||
}
|
||||
return;
|
||||
}
|
||||
const type = (msg.type as string) ?? "";
|
||||
|
||||
this.emitError(frame);
|
||||
return;
|
||||
switch (type) {
|
||||
case "ready": {
|
||||
const status: DesktopStreamReadyStatus = {
|
||||
type: "ready",
|
||||
width: Number(msg.width) || 0,
|
||||
height: Number(msg.height) || 0,
|
||||
};
|
||||
this.cachedReadyStatus = status;
|
||||
for (const listener of this.readyListeners) {
|
||||
listener(status);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
const bytes = await decodeBinaryFrame(data);
|
||||
for (const listener of this.frameListeners) {
|
||||
listener(bytes);
|
||||
case "offer": {
|
||||
if (msg.sdp) {
|
||||
void this.handleOffer(msg.sdp as string);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "candidate": {
|
||||
void this.handleCandidate(msg as unknown as RTCIceCandidateInit);
|
||||
break;
|
||||
}
|
||||
|
||||
case "error": {
|
||||
const errorStatus: DesktopStreamErrorStatus = {
|
||||
type: "error",
|
||||
message: (msg.message as string) ?? "Unknown error",
|
||||
};
|
||||
this.emitError(errorStatus);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private async handleOffer(sdp: string): Promise<void> {
|
||||
try {
|
||||
const config: RTCConfiguration = {
|
||||
...this.rtcConfig,
|
||||
iceServers: this.rtcConfig.iceServers ?? [{ urls: "stun:stun.l.google.com:19302" }],
|
||||
};
|
||||
const pc = new this.PeerConnection(config);
|
||||
this.pc = pc;
|
||||
|
||||
pc.ontrack = (event) => {
|
||||
const stream = event.streams[0] ?? new MediaStream([event.track]);
|
||||
this.mediaStream = stream;
|
||||
for (const listener of this.trackListeners) {
|
||||
listener(stream);
|
||||
}
|
||||
};
|
||||
|
||||
pc.onicecandidate = (event) => {
|
||||
if (event.candidate) {
|
||||
this.sendJson({
|
||||
type: "candidate",
|
||||
candidate: event.candidate.candidate,
|
||||
sdpMLineIndex: event.candidate.sdpMLineIndex,
|
||||
sdpMid: event.candidate.sdpMid,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
pc.onconnectionstatechange = () => {
|
||||
switch (pc.connectionState) {
|
||||
case "connected":
|
||||
if (!this.connected) {
|
||||
this.connected = true;
|
||||
for (const listener of this.connectListeners) {
|
||||
listener();
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "closed":
|
||||
case "failed":
|
||||
this.emitError(new Error(`WebRTC connection ${pc.connectionState}.`));
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
pc.oniceconnectionstatechange = () => {
|
||||
switch (pc.iceConnectionState) {
|
||||
case "connected":
|
||||
if (!this.connected) {
|
||||
this.connected = true;
|
||||
for (const listener of this.connectListeners) {
|
||||
listener();
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "closed":
|
||||
case "failed":
|
||||
this.emitError(new Error(`WebRTC ICE ${pc.iceConnectionState}.`));
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
// Server creates the data channel; client receives it.
|
||||
pc.ondatachannel = (event) => {
|
||||
this.dataChannel = event.channel;
|
||||
this.dataChannel.binaryType = "arraybuffer";
|
||||
this.dataChannel.onerror = () => {
|
||||
this.emitError(new Error("WebRTC data channel error."));
|
||||
};
|
||||
this.dataChannel.onclose = () => {
|
||||
this.dataChannel = null;
|
||||
};
|
||||
};
|
||||
|
||||
await pc.setRemoteDescription({ type: "offer", sdp });
|
||||
|
||||
// Flush any ICE candidates that arrived before the PC was ready.
|
||||
for (const pending of this.pendingCandidates) {
|
||||
try {
|
||||
await pc.addIceCandidate(pending as unknown as RTCIceCandidateInit);
|
||||
} catch {
|
||||
// ignore stale candidates
|
||||
}
|
||||
}
|
||||
this.pendingCandidates = [];
|
||||
|
||||
const answer = await pc.createAnswer();
|
||||
await pc.setLocalDescription(answer);
|
||||
|
||||
this.sendJson({ type: "answer", sdp: answer.sdp });
|
||||
} catch (error) {
|
||||
this.emitError(error instanceof Error ? error : new Error(String(error)));
|
||||
}
|
||||
}
|
||||
|
||||
private sendFrame(frame: DesktopStreamClientFrame): void {
|
||||
if (this.socket.readyState !== WS_READY_STATE_OPEN) {
|
||||
private async handleCandidate(candidate: RTCIceCandidateInit): Promise<void> {
|
||||
if (!this.pc) {
|
||||
this.pendingCandidates.push(candidate as unknown as Record<string, unknown>);
|
||||
return;
|
||||
}
|
||||
this.socket.send(JSON.stringify(frame));
|
||||
try {
|
||||
await this.pc.addIceCandidate(candidate);
|
||||
} catch (error) {
|
||||
this.emitError(error instanceof Error ? error : new Error(String(error)));
|
||||
}
|
||||
}
|
||||
|
||||
/** Send a JSON message to the server. */
|
||||
private sendJson(msg: Record<string, unknown>): void {
|
||||
if (this.socket.readyState !== WS_READY_STATE_OPEN) return;
|
||||
this.socket.send(JSON.stringify(msg));
|
||||
}
|
||||
|
||||
/** Send a typed input message over the signaling WebSocket as fallback. */
|
||||
private sendSignaling(type: string, data: Record<string, unknown>): void {
|
||||
this.sendJson({ type, ...data });
|
||||
}
|
||||
|
||||
/** Tear down the peer connection, nullifying handlers first to prevent stale
|
||||
* callbacks. */
|
||||
private teardownPeerConnection(): void {
|
||||
if (this.dataChannel) {
|
||||
this.dataChannel.onerror = null;
|
||||
this.dataChannel.onmessage = null;
|
||||
this.dataChannel.onopen = null;
|
||||
this.dataChannel.onclose = null;
|
||||
try {
|
||||
this.dataChannel.close();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
this.dataChannel = null;
|
||||
}
|
||||
if (this.pc) {
|
||||
this.pc.onicecandidate = null;
|
||||
this.pc.onicecandidateerror = null;
|
||||
this.pc.onconnectionstatechange = null;
|
||||
this.pc.oniceconnectionstatechange = null;
|
||||
this.pc.onsignalingstatechange = null;
|
||||
this.pc.onnegotiationneeded = null;
|
||||
this.pc.ontrack = null;
|
||||
this.pc.ondatachannel = null;
|
||||
try {
|
||||
this.pc.close();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
this.pc = null;
|
||||
}
|
||||
this.mediaStream = null;
|
||||
this.connected = false;
|
||||
}
|
||||
|
||||
private emitError(error: DesktopStreamErrorStatus | Error): void {
|
||||
|
|
@ -203,34 +515,3 @@ export class DesktopStreamSession {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
function parseStatusFrame(payload: string): DesktopStreamStatusMessage | null {
|
||||
const value = JSON.parse(payload) as Record<string, unknown>;
|
||||
if (value.type === "ready" && typeof value.width === "number" && typeof value.height === "number") {
|
||||
return {
|
||||
type: "ready",
|
||||
width: value.width,
|
||||
height: value.height,
|
||||
};
|
||||
}
|
||||
if (value.type === "error" && typeof value.message === "string") {
|
||||
return {
|
||||
type: "error",
|
||||
message: value.message,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function decodeBinaryFrame(data: unknown): Promise<Uint8Array> {
|
||||
if (data instanceof ArrayBuffer) {
|
||||
return new Uint8Array(data);
|
||||
}
|
||||
if (ArrayBuffer.isView(data)) {
|
||||
return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
|
||||
}
|
||||
if (typeof Blob !== "undefined" && data instanceof Blob) {
|
||||
return new Uint8Array(await data.arrayBuffer());
|
||||
}
|
||||
throw new Error("Unsupported desktop stream binary frame type.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@
|
|||
* Do not make direct changes to the file.
|
||||
*/
|
||||
|
||||
|
||||
export interface paths {
|
||||
"/v1/acp": {
|
||||
get: operations["get_v1_acp_servers"];
|
||||
|
|
@ -225,9 +224,10 @@ export interface paths {
|
|||
};
|
||||
"/v1/desktop/stream/ws": {
|
||||
/**
|
||||
* Open a desktop websocket streaming session.
|
||||
* @description Upgrades the connection to a websocket that streams JPEG desktop frames and
|
||||
* accepts mouse and keyboard control frames.
|
||||
* Open a desktop WebRTC signaling session.
|
||||
* @description Upgrades the connection to a WebSocket used for WebRTC signaling between
|
||||
* the browser client and the desktop streaming process. Also accepts mouse
|
||||
* and keyboard input frames as a fallback transport.
|
||||
*/
|
||||
get: operations["get_v1_desktop_stream_ws"];
|
||||
};
|
||||
|
|
@ -633,7 +633,23 @@ export interface components {
|
|||
windows: components["schemas"]["DesktopWindowInfo"][];
|
||||
};
|
||||
/** @enum {string} */
|
||||
ErrorType: "invalid_request" | "conflict" | "unsupported_agent" | "agent_not_installed" | "install_failed" | "agent_process_exited" | "token_invalid" | "permission_denied" | "not_acceptable" | "unsupported_media_type" | "not_found" | "session_not_found" | "session_already_exists" | "mode_not_supported" | "stream_error" | "timeout";
|
||||
ErrorType:
|
||||
| "invalid_request"
|
||||
| "conflict"
|
||||
| "unsupported_agent"
|
||||
| "agent_not_installed"
|
||||
| "install_failed"
|
||||
| "agent_process_exited"
|
||||
| "token_invalid"
|
||||
| "permission_denied"
|
||||
| "not_acceptable"
|
||||
| "unsupported_media_type"
|
||||
| "not_found"
|
||||
| "session_not_found"
|
||||
| "session_already_exists"
|
||||
| "mode_not_supported"
|
||||
| "stream_error"
|
||||
| "timeout";
|
||||
FsActionResponse: {
|
||||
path: string;
|
||||
};
|
||||
|
|
@ -692,35 +708,37 @@ export interface components {
|
|||
directory: string;
|
||||
mcpName: string;
|
||||
};
|
||||
McpServerConfig: ({
|
||||
args?: string[];
|
||||
command: string;
|
||||
cwd?: string | null;
|
||||
enabled?: boolean | null;
|
||||
env?: {
|
||||
[key: string]: string;
|
||||
} | null;
|
||||
/** Format: int64 */
|
||||
timeoutMs?: number | null;
|
||||
/** @enum {string} */
|
||||
type: "local";
|
||||
}) | ({
|
||||
bearerTokenEnvVar?: string | null;
|
||||
enabled?: boolean | null;
|
||||
envHeaders?: {
|
||||
[key: string]: string;
|
||||
} | null;
|
||||
headers?: {
|
||||
[key: string]: string;
|
||||
} | null;
|
||||
oauth?: Record<string, unknown> | null | null;
|
||||
/** Format: int64 */
|
||||
timeoutMs?: number | null;
|
||||
transport?: string | null;
|
||||
/** @enum {string} */
|
||||
type: "remote";
|
||||
url: string;
|
||||
});
|
||||
McpServerConfig:
|
||||
| {
|
||||
args?: string[];
|
||||
command: string;
|
||||
cwd?: string | null;
|
||||
enabled?: boolean | null;
|
||||
env?: {
|
||||
[key: string]: string;
|
||||
} | null;
|
||||
/** Format: int64 */
|
||||
timeoutMs?: number | null;
|
||||
/** @enum {string} */
|
||||
type: "local";
|
||||
}
|
||||
| {
|
||||
bearerTokenEnvVar?: string | null;
|
||||
enabled?: boolean | null;
|
||||
envHeaders?: {
|
||||
[key: string]: string;
|
||||
} | null;
|
||||
headers?: {
|
||||
[key: string]: string;
|
||||
} | null;
|
||||
oauth?: Record<string, unknown> | null | null;
|
||||
/** Format: int64 */
|
||||
timeoutMs?: number | null;
|
||||
transport?: string | null;
|
||||
/** @enum {string} */
|
||||
type: "remote";
|
||||
url: string;
|
||||
};
|
||||
ProblemDetails: {
|
||||
detail?: string | null;
|
||||
instance?: string | null;
|
||||
|
|
@ -880,7 +898,6 @@ export type $defs = Record<string, never>;
|
|||
export type external = Record<string, never>;
|
||||
|
||||
export interface operations {
|
||||
|
||||
get_v1_acp_servers: {
|
||||
responses: {
|
||||
/** @description Active ACP server instances */
|
||||
|
|
@ -2002,9 +2019,10 @@ export interface operations {
|
|||
};
|
||||
};
|
||||
/**
|
||||
* Open a desktop websocket streaming session.
|
||||
* @description Upgrades the connection to a websocket that streams JPEG desktop frames and
|
||||
* accepts mouse and keyboard control frames.
|
||||
* Open a desktop WebRTC signaling session.
|
||||
* @description Upgrades the connection to a WebSocket used for WebRTC signaling between
|
||||
* the browser client and the desktop streaming process. Also accepts mouse
|
||||
* and keyboard input frames as a fallback transport.
|
||||
*/
|
||||
get_v1_desktop_stream_ws: {
|
||||
parameters: {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue